feat: allow author and event id prefix search

This is an experimental non-NIP feature that allows a subscription
filter to include a prefix for authors and events.
This commit is contained in:
Greg Heartsfield 2022-01-25 18:21:43 -06:00
parent df251c821c
commit af453548ee
3 changed files with 274 additions and 50 deletions

229
src/db.rs
View File

@ -383,6 +383,80 @@ fn is_hex(s: &str) -> bool {
s.chars().all(|x| char::is_ascii_hexdigit(&x))
}
/// Check if a string contains only f chars
fn is_all_fs(s: &str) -> bool {
s.chars().all(|x| x == 'f' || x == 'F')
}
#[derive(PartialEq, Debug, Clone)]
enum HexSearch {
// when no range is needed, exact 32-byte
Exact(Vec<u8>),
// lower (inclusive) and upper range (exclusive)
Range(Vec<u8>, Vec<u8>),
// lower bound only, upper bound is MAX inclusive
LowerOnly(Vec<u8>),
}
/// Find the next hex sequence greater than the argument.
fn hex_range(s: &str) -> Option<HexSearch> {
// handle special cases
if !is_hex(s) || s.len() > 64 {
return None;
}
if s.len() == 64 {
return Some(HexSearch::Exact(hex::decode(s).ok()?));
}
// if s is odd, add a zero
let mut hash_base = s.to_owned();
let mut odd = hash_base.len() % 2 != 0;
if odd {
// extend the string to make it even
hash_base.push('0');
}
let base = hex::decode(hash_base).ok()?;
// check for all ff's
if is_all_fs(s) {
// there is no higher bound, we only want to search for blobs greater than this.
return Some(HexSearch::LowerOnly(base));
}
// return a range
let mut upper = base.clone();
let mut byte_len = upper.len();
// for odd strings, we made them longer, but we want to increment the upper char (+16).
// we know we can do this without overflowing because we explicitly set the bottom half to 0's.
while byte_len > 0 {
byte_len -= 1;
// check if byte can be incremented, or if we need to carry.
let b = upper[byte_len];
if b == u8::MAX {
// reset and carry
upper[byte_len] = 0;
} else if odd {
// check if first char in this byte is NOT 'f'
if b < 240 {
upper[byte_len] = b + 16; // bump up the first character in this byte
// increment done, stop iterating through the vec
break;
} else {
// if it is 'f', reset the byte to 0 and do a carry
// reset and carry
upper[byte_len] = 0;
}
// done with odd logic, so don't repeat this
odd = false;
} else {
// bump up the first character in this byte
upper[byte_len] = b + 1;
// increment done, stop iterating
break;
}
}
Some(HexSearch::Range(base, upper))
}
fn repeat_vars(count: usize) -> String {
if count == 0 {
return "".to_owned();
@ -409,17 +483,33 @@ fn query_from_sub(sub: &Subscription) -> (String, Vec<Box<dyn ToSql>>) {
for f in sub.filters.iter() {
// individual filter components
let mut filter_components: Vec<String> = Vec::new();
// Query for "authors"
if f.authors.is_some() {
let authors_escaped: Vec<String> = f
.authors
.as_ref()
.unwrap()
.iter()
.filter(|&x| is_hex(x))
.map(|x| format!("x'{}'", x))
.collect();
let authors_clause = format!("author IN ({})", authors_escaped.join(", "));
// Query for "authors", allowing prefix matches
if let Some(authvec) = &f.authors {
// take each author and convert to a hexsearch
let mut auth_searches: Vec<String> = vec![];
for auth in authvec {
match hex_range(auth) {
Some(HexSearch::Exact(ex)) => {
info!("Exact match for author");
auth_searches.push("author=?".to_owned());
params.push(Box::new(ex));
}
Some(HexSearch::Range(lower, upper)) => {
auth_searches.push("(author>? AND author<?)".to_owned());
params.push(Box::new(lower));
params.push(Box::new(upper));
}
Some(HexSearch::LowerOnly(lower)) => {
// info!("{:?} => lower; {:?} ", auth, hex::encode(lower));
auth_searches.push("author>?".to_owned());
params.push(Box::new(lower));
}
None => {
info!("Could not parse hex range from {:?}", auth);
}
}
}
let authors_clause = format!("({})", auth_searches.join(" OR "));
filter_components.push(authors_clause);
}
// Query for Kind
@ -429,17 +519,31 @@ fn query_from_sub(sub: &Subscription) -> (String, Vec<Box<dyn ToSql>>) {
let kind_clause = format!("kind IN ({})", str_kinds.join(", "));
filter_components.push(kind_clause);
}
// Query for event
if f.ids.is_some() {
let ids_escaped: Vec<String> = f
.ids
.as_ref()
.unwrap()
.iter()
.filter(|&x| is_hex(x))
.map(|x| format!("x'{}'", x))
.collect();
let id_clause = format!("event_hash IN ({})", ids_escaped.join(", "));
// Query for event, allowing prefix matches
if let Some(idvec) = &f.ids {
// take each author and convert to a hexsearch
let mut id_searches: Vec<String> = vec![];
for id in idvec {
match hex_range(id) {
Some(HexSearch::Exact(ex)) => {
id_searches.push("event_hash=?".to_owned());
params.push(Box::new(ex));
}
Some(HexSearch::Range(lower, upper)) => {
id_searches.push("(event_hash>? AND event_hash<?)".to_owned());
params.push(Box::new(lower));
params.push(Box::new(upper));
}
Some(HexSearch::LowerOnly(lower)) => {
id_searches.push("event_hash>?".to_owned());
params.push(Box::new(lower));
}
None => {
info!("Could not parse hex range from {:?}", id);
}
}
}
let id_clause = format!("({})", id_searches.join(" OR "));
filter_components.push(id_clause);
}
// Query for tags
@ -553,3 +657,84 @@ pub async fn db_query(
ok
});
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn hex_range_exact() -> Result<()> {
let hex = "abcdef00abcdef00abcdef00abcdef00abcdef00abcdef00abcdef00abcdef00";
let r = hex_range(hex);
assert_eq!(
r,
Some(HexSearch::Exact(hex::decode(hex).expect("invalid hex")))
);
Ok(())
}
#[test]
fn hex_full_range() -> Result<()> {
//let hex = "abcdef00abcdef00abcdef00abcdef00abcdef00abcdef00abcdef00abcdef00";
let hex = "aaaa";
let hex_upper = "aaab";
let r = hex_range(hex);
assert_eq!(
r,
Some(HexSearch::Range(
hex::decode(hex).expect("invalid hex"),
hex::decode(hex_upper).expect("invalid hex")
))
);
Ok(())
}
#[test]
fn hex_full_range_odd() -> Result<()> {
let r = hex_range("abc");
assert_eq!(
r,
Some(HexSearch::Range(
hex::decode("abc0").expect("invalid hex"),
hex::decode("abd0").expect("invalid hex")
))
);
Ok(())
}
#[test]
fn hex_full_range_odd_end_f() -> Result<()> {
let r = hex_range("abf");
assert_eq!(
r,
Some(HexSearch::Range(
hex::decode("abf0").expect("invalid hex"),
hex::decode("ac00").expect("invalid hex")
))
);
Ok(())
}
#[test]
fn hex_no_upper() -> Result<()> {
let r = hex_range("ffff");
assert_eq!(
r,
Some(HexSearch::LowerOnly(
hex::decode("ffff").expect("invalid hex")
))
);
Ok(())
}
#[test]
fn hex_no_upper_odd() -> Result<()> {
let r = hex_range("fff");
assert_eq!(
r,
Some(HexSearch::LowerOnly(
hex::decode("fff0").expect("invalid hex")
))
);
Ok(())
}
}

View File

@ -145,6 +145,7 @@ impl Event {
return false;
}
// * validate the message digest (sig) using the pubkey & computed sha256 message hash.
let sig = schnorr::Signature::from_str(&self.sig).unwrap();
if let Ok(msg) = secp256k1::Message::from_slice(digest.as_ref()) {
let pubkey = XOnlyPublicKey::from_str(&self.pubkey).unwrap();
@ -193,21 +194,6 @@ impl Event {
serde_json::Value::Array(tags)
}
/// Generic tag match
// TODO: is this used anywhere?
pub fn generic_tag_match(&self, tagname: &str, tagvalue: &str) -> bool {
match &self.tagidx {
Some(idx) => {
// get the set of values for this tag
match idx.get(tagname) {
Some(valset) => valset.contains(tagvalue),
None => false,
}
}
None => false,
}
}
/// Determine if the given tag and value set intersect with tags in this event.
pub fn generic_tag_val_intersect(&self, tagname: &str, check: &HashSet<String>) -> bool {
match &self.tagidx {
@ -258,7 +244,8 @@ mod tests {
#[test]
fn empty_event_tag_match() -> Result<()> {
let event = simple_event();
assert!(!event.event_tag_match("foo"));
assert!(!event
.generic_tag_val_intersect("e", &HashSet::from(["foo".to_owned(), "bar".to_owned()])));
Ok(())
}
@ -266,7 +253,14 @@ mod tests {
fn single_event_tag_match() -> Result<()> {
let mut event = simple_event();
event.tags = vec![vec!["e".to_owned(), "foo".to_owned()]];
assert!(event.event_tag_match("foo"));
event.build_index();
assert_eq!(
event.generic_tag_val_intersect(
"e",
&HashSet::from(["foo".to_owned(), "bar".to_owned()])
),
true
);
Ok(())
}

View File

@ -160,19 +160,28 @@ impl Subscription {
}
}
fn prefix_match(prefixes: &[String], target: &str) -> bool {
for prefix in prefixes {
if target.starts_with(prefix) {
return true;
}
}
// none matched
false
}
impl ReqFilter {
/// Check for a match within the authors list.
fn ids_match(&self, event: &Event) -> bool {
self.ids
.as_ref()
.map(|vs| vs.contains(&event.id.to_owned()))
.map(|vs| prefix_match(vs, &event.id))
.unwrap_or(true)
}
fn authors_match(&self, event: &Event) -> bool {
self.authors
.as_ref()
.map(|vs| vs.contains(&event.pubkey.to_owned()))
.map(|vs| prefix_match(vs, &event.pubkey))
.unwrap_or(true)
}
@ -259,9 +268,45 @@ mod tests {
}
#[test]
fn interest_id_nomatch() -> Result<()> {
fn interest_author_prefix_match() -> Result<()> {
// subscription with a filter for ID
let s: Subscription = serde_json::from_str(r#"["REQ","xyz",{"authors": ["abc"]}]"#)?;
let e = Event {
id: "foo".to_owned(),
pubkey: "abcd".to_owned(),
created_at: 0,
kind: 0,
tags: Vec::new(),
content: "".to_owned(),
sig: "".to_owned(),
tagidx: None,
};
assert!(s.interested_in_event(&e));
Ok(())
}
#[test]
fn interest_id_prefix_match() -> Result<()> {
// subscription with a filter for ID
let s: Subscription = serde_json::from_str(r#"["REQ","xyz",{"ids": ["abc"]}]"#)?;
let e = Event {
id: "abcd".to_owned(),
pubkey: "".to_owned(),
created_at: 0,
kind: 0,
tags: Vec::new(),
content: "".to_owned(),
sig: "".to_owned(),
tagidx: None,
};
assert!(s.interested_in_event(&e));
Ok(())
}
#[test]
fn interest_id_nomatch() -> Result<()> {
// subscription with a filter for ID
let s: Subscription = serde_json::from_str(r#"["REQ","xyz",{"ids": ["xyz"]}]"#)?;
let e = Event {
id: "abcde".to_owned(),
pubkey: "".to_owned(),
@ -272,7 +317,7 @@ mod tests {
sig: "".to_owned(),
tagidx: None,
};
assert_eq!(s.interested_in_event(&e), false);
assert!(!s.interested_in_event(&e));
Ok(())
}
@ -291,7 +336,7 @@ mod tests {
sig: "".to_owned(),
tagidx: None,
};
assert_eq!(s.interested_in_event(&e), false);
assert!(!s.interested_in_event(&e));
Ok(())
}
@ -309,7 +354,7 @@ mod tests {
sig: "".to_owned(),
tagidx: None,
};
assert_eq!(s.interested_in_event(&e), true);
assert!(s.interested_in_event(&e));
Ok(())
}
@ -327,7 +372,7 @@ mod tests {
sig: "".to_owned(),
tagidx: None,
};
assert_eq!(s.interested_in_event(&e), true);
assert!(s.interested_in_event(&e));
Ok(())
}
@ -345,7 +390,7 @@ mod tests {
sig: "".to_owned(),
tagidx: None,
};
assert_eq!(s.interested_in_event(&e), true);
assert!(s.interested_in_event(&e));
Ok(())
}
#[test]
@ -363,7 +408,7 @@ mod tests {
sig: "".to_owned(),
tagidx: None,
};
assert_eq!(s.interested_in_event(&e), true);
assert!(s.interested_in_event(&e));
Ok(())
}
@ -381,7 +426,7 @@ mod tests {
sig: "".to_owned(),
tagidx: None,
};
assert_eq!(s.interested_in_event(&e), false);
assert!(!s.interested_in_event(&e));
Ok(())
}
}