improvement: add indexed tag queries

This commit is contained in:
Greg Heartsfield 2022-01-05 17:33:53 -05:00
parent 19ed990c57
commit 7037555516
2 changed files with 90 additions and 20 deletions

View File

@ -9,6 +9,8 @@ use secp256k1::{schnorr, Secp256k1, VerifyOnly, XOnlyPublicKey};
use serde::{Deserialize, Deserializer, Serialize};
use serde_json::value::Value;
use serde_json::Number;
use std::collections::HashMap;
use std::collections::HashSet;
use std::str::FromStr;
use std::time::SystemTime;
@ -35,6 +37,9 @@ pub struct Event {
pub(crate) tags: Vec<Vec<String>>,
pub(crate) content: String,
pub(crate) sig: String,
// Optimization for tag search, built on demand
#[serde(skip)]
pub(crate) tagidx: Option<HashMap<String, HashSet<String>>>,
}
/// Simple tag type for array of array of strings.
@ -56,7 +61,9 @@ impl From<EventCmd> for Result<Event> {
if ec.cmd != "EVENT" {
Err(CommandUnknownError)
} else if ec.event.is_valid() {
Ok(ec.event)
let mut e = ec.event;
e.build_index();
Ok(e)
} else {
Err(EventInvalid)
}
@ -72,6 +79,30 @@ fn unix_time() -> u64 {
}
impl Event {
/// Build an event tag index
fn build_index(&mut self) {
// if there are no tags; just leave the index as None
if self.tags.is_empty() {
return;
}
// otherwise, build an index
let mut idx: HashMap<String, HashSet<String>> = HashMap::new();
// iterate over tags that have at least 2 elements
for t in self.tags.iter().filter(|x| x.len() > 1) {
let tagname = t.get(0).unwrap();
let tagval = t.get(1).unwrap();
// ensure a vector exists for this tag
if !idx.contains_key(tagname) {
idx.insert(tagname.clone(), HashSet::new());
}
// get the tag vec and insert entry
let tidx = idx.get_mut(tagname).expect("could not get tag vector");
tidx.insert(tagval.clone());
}
// save the tag structure
self.tagidx = Some(idx);
}
/// Create a short event identifier, suitable for logging.
pub fn get_event_id_prefix(&self) -> String {
self.id.chars().take(8).collect()
@ -193,6 +224,34 @@ impl Event {
pub fn pubkey_tag_match(&self, pubkey: &str) -> bool {
self.get_pubkey_tags().contains(&pubkey)
}
/// Generic tag match
pub fn generic_tag_match(&self, tagname: &str, tagvalue: &str) -> bool {
match &self.tagidx {
Some(idx) => {
// get the set of values for this tag
match idx.get(tagname) {
Some(valset) => valset.contains(tagvalue),
None => false,
}
}
None => false,
}
}
/// Determine if the given tag and value set intersect with tags in this event.
pub fn generic_tag_val_intersect(&self, tagname: &str, check: &HashSet<String>) -> bool {
match &self.tagidx {
Some(idx) => match idx.get(tagname) {
Some(valset) => {
let common = valset.intersection(check);
common.count() > 0
}
None => false,
},
None => false,
}
}
}
#[cfg(test)]

View File

@ -34,6 +34,12 @@ pub struct ReqFilter {
pub until: Option<u64>,
/// List of author public keys
pub authors: Option<Vec<String>>,
/// Set of event tags, for quick indexing
#[serde(skip)]
event_tag_set: Option<HashSet<String>>,
/// Set of pubkey tags, for quick indexing
#[serde(skip)]
pubkey_tag_set: Option<HashSet<String>>,
}
impl<'de> Deserialize<'de> for Subscription {
@ -76,8 +82,10 @@ impl<'de> Deserialize<'de> for Subscription {
let mut filters = vec![];
for fv in i {
let f: ReqFilter = serde_json::from_value(fv.take())
let mut f: ReqFilter = serde_json::from_value(fv.take())
.map_err(|_| serde::de::Error::custom("could not parse filter"))?;
// create indexes
f.update_tag_indexes();
filters.push(f);
}
Ok(Subscription {
@ -105,6 +113,15 @@ impl Subscription {
}
impl ReqFilter {
/// Update pubkey and event indexes
fn update_tag_indexes(&mut self) {
if let Some(event_vec) = &self.events {
self.event_tag_set = Some(HashSet::from_iter(event_vec.iter().cloned()));
}
if let Some(pubkey_vec) = &self.pubkeys {
self.pubkey_tag_set = Some(HashSet::from_iter(pubkey_vec.iter().cloned()));
}
}
/// Check for a match within the authors list.
fn ids_match(&self, event: &Event) -> bool {
self.ids
@ -119,33 +136,27 @@ impl ReqFilter {
.map(|vs| vs.contains(&event.pubkey.to_owned()))
.unwrap_or(true)
}
/// Check if this filter either matches, or does not care about the event tags.
fn event_match(&self, event: &Event) -> bool {
// This needs to be analyzed for performance; building these
// hash sets for each active subscription isn't great.
if let Some(es) = &self.events {
let event_refs =
HashSet::<_>::from_iter(event.get_event_tags().iter().map(|x| x.to_owned()));
let filter_refs = HashSet::<_>::from_iter(es.iter().map(|x| &x[..]));
let cardinality = event_refs.intersection(&filter_refs).count();
cardinality > 0
// an event match is performed by looking at the ReqFilter events field, and sending a hashset to the event to intersect with.
if let Some(es) = &self.event_tag_set {
// if there exists event tags in this filter, find if any intersect.
event.generic_tag_val_intersect("e", es)
} else {
// if no event tags were requested in a filter, we do match
true
}
}
/// Check if this filter either matches, or does not care about
/// the pubkey/petname tags.
/// Check if this filter either matches, or does not care about the event tags.
fn pubkey_match(&self, event: &Event) -> bool {
// This needs to be analyzed for performance; building these
// hash sets for each active subscription isn't great.
if let Some(ps) = &self.pubkeys {
let pubkey_refs =
HashSet::<_>::from_iter(event.get_pubkey_tags().iter().map(|x| x.to_owned()));
let filter_refs = HashSet::<_>::from_iter(ps.iter().map(|x| &x[..]));
let cardinality = pubkey_refs.intersection(&filter_refs).count();
cardinality > 0
// an event match is performed by looking at the ReqFilter events field, and sending a hashset to the event to intersect with.
if let Some(ps) = &self.pubkey_tag_set {
// if there exists event tags in this filter, find if any intersect.
event.generic_tag_val_intersect("p", ps)
} else {
// if no event tags were requested in a filter, we do match
true
}
}