From af453548ee60f060c49902a9fe83ea94566e1d70 Mon Sep 17 00:00:00 2001 From: Greg Heartsfield Date: Tue, 25 Jan 2022 18:21:43 -0600 Subject: [PATCH] feat: allow author and event id prefix search This is an experimental non-NIP feature that allows a subscription filter to include a prefix for authors and events. --- src/db.rs | 229 +++++++++++++++++++++++++++++++++++++++----- src/event.rs | 28 +++--- src/subscription.rs | 67 ++++++++++--- 3 files changed, 274 insertions(+), 50 deletions(-) diff --git a/src/db.rs b/src/db.rs index aca3bef..0d07843 100644 --- a/src/db.rs +++ b/src/db.rs @@ -383,6 +383,80 @@ fn is_hex(s: &str) -> bool { s.chars().all(|x| char::is_ascii_hexdigit(&x)) } +/// Check if a string contains only f chars +fn is_all_fs(s: &str) -> bool { + s.chars().all(|x| x == 'f' || x == 'F') +} + +#[derive(PartialEq, Debug, Clone)] +enum HexSearch { + // when no range is needed, exact 32-byte + Exact(Vec), + // lower (inclusive) and upper range (exclusive) + Range(Vec, Vec), + // lower bound only, upper bound is MAX inclusive + LowerOnly(Vec), +} + +/// Find the next hex sequence greater than the argument. +fn hex_range(s: &str) -> Option { + // handle special cases + if !is_hex(s) || s.len() > 64 { + return None; + } + if s.len() == 64 { + return Some(HexSearch::Exact(hex::decode(s).ok()?)); + } + // if s is odd, add a zero + let mut hash_base = s.to_owned(); + let mut odd = hash_base.len() % 2 != 0; + if odd { + // extend the string to make it even + hash_base.push('0'); + } + let base = hex::decode(hash_base).ok()?; + // check for all ff's + if is_all_fs(s) { + // there is no higher bound, we only want to search for blobs greater than this. + return Some(HexSearch::LowerOnly(base)); + } + + // return a range + let mut upper = base.clone(); + let mut byte_len = upper.len(); + + // for odd strings, we made them longer, but we want to increment the upper char (+16). + // we know we can do this without overflowing because we explicitly set the bottom half to 0's. + while byte_len > 0 { + byte_len -= 1; + // check if byte can be incremented, or if we need to carry. + let b = upper[byte_len]; + if b == u8::MAX { + // reset and carry + upper[byte_len] = 0; + } else if odd { + // check if first char in this byte is NOT 'f' + if b < 240 { + upper[byte_len] = b + 16; // bump up the first character in this byte + // increment done, stop iterating through the vec + break; + } else { + // if it is 'f', reset the byte to 0 and do a carry + // reset and carry + upper[byte_len] = 0; + } + // done with odd logic, so don't repeat this + odd = false; + } else { + // bump up the first character in this byte + upper[byte_len] = b + 1; + // increment done, stop iterating + break; + } + } + Some(HexSearch::Range(base, upper)) +} + fn repeat_vars(count: usize) -> String { if count == 0 { return "".to_owned(); @@ -409,17 +483,33 @@ fn query_from_sub(sub: &Subscription) -> (String, Vec>) { for f in sub.filters.iter() { // individual filter components let mut filter_components: Vec = Vec::new(); - // Query for "authors" - if f.authors.is_some() { - let authors_escaped: Vec = f - .authors - .as_ref() - .unwrap() - .iter() - .filter(|&x| is_hex(x)) - .map(|x| format!("x'{}'", x)) - .collect(); - let authors_clause = format!("author IN ({})", authors_escaped.join(", ")); + // Query for "authors", allowing prefix matches + if let Some(authvec) = &f.authors { + // take each author and convert to a hexsearch + let mut auth_searches: Vec = vec![]; + for auth in authvec { + match hex_range(auth) { + Some(HexSearch::Exact(ex)) => { + info!("Exact match for author"); + auth_searches.push("author=?".to_owned()); + params.push(Box::new(ex)); + } + Some(HexSearch::Range(lower, upper)) => { + auth_searches.push("(author>? AND author { + // info!("{:?} => lower; {:?} ", auth, hex::encode(lower)); + auth_searches.push("author>?".to_owned()); + params.push(Box::new(lower)); + } + None => { + info!("Could not parse hex range from {:?}", auth); + } + } + } + let authors_clause = format!("({})", auth_searches.join(" OR ")); filter_components.push(authors_clause); } // Query for Kind @@ -429,17 +519,31 @@ fn query_from_sub(sub: &Subscription) -> (String, Vec>) { let kind_clause = format!("kind IN ({})", str_kinds.join(", ")); filter_components.push(kind_clause); } - // Query for event - if f.ids.is_some() { - let ids_escaped: Vec = f - .ids - .as_ref() - .unwrap() - .iter() - .filter(|&x| is_hex(x)) - .map(|x| format!("x'{}'", x)) - .collect(); - let id_clause = format!("event_hash IN ({})", ids_escaped.join(", ")); + // Query for event, allowing prefix matches + if let Some(idvec) = &f.ids { + // take each author and convert to a hexsearch + let mut id_searches: Vec = vec![]; + for id in idvec { + match hex_range(id) { + Some(HexSearch::Exact(ex)) => { + id_searches.push("event_hash=?".to_owned()); + params.push(Box::new(ex)); + } + Some(HexSearch::Range(lower, upper)) => { + id_searches.push("(event_hash>? AND event_hash { + id_searches.push("event_hash>?".to_owned()); + params.push(Box::new(lower)); + } + None => { + info!("Could not parse hex range from {:?}", id); + } + } + } + let id_clause = format!("({})", id_searches.join(" OR ")); filter_components.push(id_clause); } // Query for tags @@ -553,3 +657,84 @@ pub async fn db_query( ok }); } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn hex_range_exact() -> Result<()> { + let hex = "abcdef00abcdef00abcdef00abcdef00abcdef00abcdef00abcdef00abcdef00"; + let r = hex_range(hex); + assert_eq!( + r, + Some(HexSearch::Exact(hex::decode(hex).expect("invalid hex"))) + ); + Ok(()) + } + #[test] + fn hex_full_range() -> Result<()> { + //let hex = "abcdef00abcdef00abcdef00abcdef00abcdef00abcdef00abcdef00abcdef00"; + let hex = "aaaa"; + let hex_upper = "aaab"; + let r = hex_range(hex); + assert_eq!( + r, + Some(HexSearch::Range( + hex::decode(hex).expect("invalid hex"), + hex::decode(hex_upper).expect("invalid hex") + )) + ); + Ok(()) + } + + #[test] + fn hex_full_range_odd() -> Result<()> { + let r = hex_range("abc"); + assert_eq!( + r, + Some(HexSearch::Range( + hex::decode("abc0").expect("invalid hex"), + hex::decode("abd0").expect("invalid hex") + )) + ); + Ok(()) + } + + #[test] + fn hex_full_range_odd_end_f() -> Result<()> { + let r = hex_range("abf"); + assert_eq!( + r, + Some(HexSearch::Range( + hex::decode("abf0").expect("invalid hex"), + hex::decode("ac00").expect("invalid hex") + )) + ); + Ok(()) + } + + #[test] + fn hex_no_upper() -> Result<()> { + let r = hex_range("ffff"); + assert_eq!( + r, + Some(HexSearch::LowerOnly( + hex::decode("ffff").expect("invalid hex") + )) + ); + Ok(()) + } + + #[test] + fn hex_no_upper_odd() -> Result<()> { + let r = hex_range("fff"); + assert_eq!( + r, + Some(HexSearch::LowerOnly( + hex::decode("fff0").expect("invalid hex") + )) + ); + Ok(()) + } +} diff --git a/src/event.rs b/src/event.rs index 99b2453..a9a8261 100644 --- a/src/event.rs +++ b/src/event.rs @@ -145,6 +145,7 @@ impl Event { return false; } // * validate the message digest (sig) using the pubkey & computed sha256 message hash. + let sig = schnorr::Signature::from_str(&self.sig).unwrap(); if let Ok(msg) = secp256k1::Message::from_slice(digest.as_ref()) { let pubkey = XOnlyPublicKey::from_str(&self.pubkey).unwrap(); @@ -193,21 +194,6 @@ impl Event { serde_json::Value::Array(tags) } - /// Generic tag match - // TODO: is this used anywhere? - pub fn generic_tag_match(&self, tagname: &str, tagvalue: &str) -> bool { - match &self.tagidx { - Some(idx) => { - // get the set of values for this tag - match idx.get(tagname) { - Some(valset) => valset.contains(tagvalue), - None => false, - } - } - None => false, - } - } - /// Determine if the given tag and value set intersect with tags in this event. pub fn generic_tag_val_intersect(&self, tagname: &str, check: &HashSet) -> bool { match &self.tagidx { @@ -258,7 +244,8 @@ mod tests { #[test] fn empty_event_tag_match() -> Result<()> { let event = simple_event(); - assert!(!event.event_tag_match("foo")); + assert!(!event + .generic_tag_val_intersect("e", &HashSet::from(["foo".to_owned(), "bar".to_owned()]))); Ok(()) } @@ -266,7 +253,14 @@ mod tests { fn single_event_tag_match() -> Result<()> { let mut event = simple_event(); event.tags = vec![vec!["e".to_owned(), "foo".to_owned()]]; - assert!(event.event_tag_match("foo")); + event.build_index(); + assert_eq!( + event.generic_tag_val_intersect( + "e", + &HashSet::from(["foo".to_owned(), "bar".to_owned()]) + ), + true + ); Ok(()) } diff --git a/src/subscription.rs b/src/subscription.rs index 75337e1..20767b4 100644 --- a/src/subscription.rs +++ b/src/subscription.rs @@ -160,19 +160,28 @@ impl Subscription { } } +fn prefix_match(prefixes: &[String], target: &str) -> bool { + for prefix in prefixes { + if target.starts_with(prefix) { + return true; + } + } + // none matched + false +} + impl ReqFilter { - /// Check for a match within the authors list. fn ids_match(&self, event: &Event) -> bool { self.ids .as_ref() - .map(|vs| vs.contains(&event.id.to_owned())) + .map(|vs| prefix_match(vs, &event.id)) .unwrap_or(true) } fn authors_match(&self, event: &Event) -> bool { self.authors .as_ref() - .map(|vs| vs.contains(&event.pubkey.to_owned())) + .map(|vs| prefix_match(vs, &event.pubkey)) .unwrap_or(true) } @@ -259,9 +268,45 @@ mod tests { } #[test] - fn interest_id_nomatch() -> Result<()> { + fn interest_author_prefix_match() -> Result<()> { + // subscription with a filter for ID + let s: Subscription = serde_json::from_str(r#"["REQ","xyz",{"authors": ["abc"]}]"#)?; + let e = Event { + id: "foo".to_owned(), + pubkey: "abcd".to_owned(), + created_at: 0, + kind: 0, + tags: Vec::new(), + content: "".to_owned(), + sig: "".to_owned(), + tagidx: None, + }; + assert!(s.interested_in_event(&e)); + Ok(()) + } + + #[test] + fn interest_id_prefix_match() -> Result<()> { // subscription with a filter for ID let s: Subscription = serde_json::from_str(r#"["REQ","xyz",{"ids": ["abc"]}]"#)?; + let e = Event { + id: "abcd".to_owned(), + pubkey: "".to_owned(), + created_at: 0, + kind: 0, + tags: Vec::new(), + content: "".to_owned(), + sig: "".to_owned(), + tagidx: None, + }; + assert!(s.interested_in_event(&e)); + Ok(()) + } + + #[test] + fn interest_id_nomatch() -> Result<()> { + // subscription with a filter for ID + let s: Subscription = serde_json::from_str(r#"["REQ","xyz",{"ids": ["xyz"]}]"#)?; let e = Event { id: "abcde".to_owned(), pubkey: "".to_owned(), @@ -272,7 +317,7 @@ mod tests { sig: "".to_owned(), tagidx: None, }; - assert_eq!(s.interested_in_event(&e), false); + assert!(!s.interested_in_event(&e)); Ok(()) } @@ -291,7 +336,7 @@ mod tests { sig: "".to_owned(), tagidx: None, }; - assert_eq!(s.interested_in_event(&e), false); + assert!(!s.interested_in_event(&e)); Ok(()) } @@ -309,7 +354,7 @@ mod tests { sig: "".to_owned(), tagidx: None, }; - assert_eq!(s.interested_in_event(&e), true); + assert!(s.interested_in_event(&e)); Ok(()) } @@ -327,7 +372,7 @@ mod tests { sig: "".to_owned(), tagidx: None, }; - assert_eq!(s.interested_in_event(&e), true); + assert!(s.interested_in_event(&e)); Ok(()) } @@ -345,7 +390,7 @@ mod tests { sig: "".to_owned(), tagidx: None, }; - assert_eq!(s.interested_in_event(&e), true); + assert!(s.interested_in_event(&e)); Ok(()) } #[test] @@ -363,7 +408,7 @@ mod tests { sig: "".to_owned(), tagidx: None, }; - assert_eq!(s.interested_in_event(&e), true); + assert!(s.interested_in_event(&e)); Ok(()) } @@ -381,7 +426,7 @@ mod tests { sig: "".to_owned(), tagidx: None, }; - assert_eq!(s.interested_in_event(&e), false); + assert!(!s.interested_in_event(&e)); Ok(()) } }