fix(NIP-01): allow limits on a per-filter basis

The original implementation of subscription limit applied to the
entire query, instead of the specific filter.  Now, each filter gets
its own query limit.  When a limit is applied, the most recent N
events will be returned, otherwise the default is to return the
earliest events (in order), for all matching events.
This commit is contained in:
Greg Heartsfield 2022-07-04 17:25:32 -05:00
parent e894a86566
commit 1c14adc766
2 changed files with 132 additions and 127 deletions

257
src/db.rs
View File

@ -7,6 +7,7 @@ use crate::hexrange::hex_range;
use crate::hexrange::HexSearch;
use crate::nip05;
use crate::schema::{upgrade_db, STARTUP_SQL};
use crate::subscription::ReqFilter;
use crate::subscription::Subscription;
use crate::utils::is_hex;
use governor::clock::Clock;
@ -377,152 +378,156 @@ fn repeat_vars(count: usize) -> String {
s
}
/// Create a dynamic SQL query string and params from a subscription.
fn query_from_sub(sub: &Subscription) -> (String, Vec<Box<dyn ToSql>>) {
/// Create a dynamic SQL subquery and params from a subscription filter.
fn query_from_filter(f: &ReqFilter) -> (String, Vec<Box<dyn ToSql>>) {
// build a dynamic SQL query. all user-input is either an integer
// (sqli-safe), or a string that is filtered to only contain
// hexadecimal characters. Strings that require escaping (tag
// names/values) use parameters.
let mut limit: Option<u32> = None;
let mut query =
"SELECT DISTINCT(e.content) FROM event e LEFT JOIN tag t ON e.id=t.event_id ".to_owned();
// parameters
"SELECT DISTINCT(e.content), e.created_at FROM event e LEFT JOIN tag t ON e.id=t.event_id "
.to_owned();
// query parameters for SQLite
let mut params: Vec<Box<dyn ToSql>> = vec![];
// for every filter in the subscription, generate a where clause
let mut filter_clauses: Vec<String> = Vec::new();
for f in sub.filters.iter() {
// individual filter components
let mut filter_components: Vec<String> = Vec::new();
// Query for "authors", allowing prefix matches
if let Some(authvec) = &f.authors {
// take each author and convert to a hexsearch
let mut auth_searches: Vec<String> = vec![];
for auth in authvec {
match hex_range(auth) {
Some(HexSearch::Exact(ex)) => {
auth_searches.push("author=?".to_owned());
params.push(Box::new(ex));
}
Some(HexSearch::Range(lower, upper)) => {
auth_searches.push("(author>? AND author<?)".to_owned());
params.push(Box::new(lower));
params.push(Box::new(upper));
}
Some(HexSearch::LowerOnly(lower)) => {
auth_searches.push("author>?".to_owned());
params.push(Box::new(lower));
}
None => {
info!("Could not parse hex range from author {:?}", auth);
}
// individual filter components (single conditions such as an author or event ID)
let mut filter_components: Vec<String> = Vec::new();
// Query for "authors", allowing prefix matches
if let Some(authvec) = &f.authors {
// take each author and convert to a hexsearch
let mut auth_searches: Vec<String> = vec![];
for auth in authvec {
match hex_range(auth) {
Some(HexSearch::Exact(ex)) => {
auth_searches.push("author=?".to_owned());
params.push(Box::new(ex));
}
Some(HexSearch::Range(lower, upper)) => {
auth_searches.push("(author>? AND author<?)".to_owned());
params.push(Box::new(lower));
params.push(Box::new(upper));
}
Some(HexSearch::LowerOnly(lower)) => {
auth_searches.push("author>?".to_owned());
params.push(Box::new(lower));
}
None => {
info!("Could not parse hex range from author {:?}", auth);
}
}
let authors_clause = format!("({})", auth_searches.join(" OR "));
filter_components.push(authors_clause);
}
if let Some(lim) = f.limit {
limit = Some(lim)
}
// Query for Kind
if let Some(ks) = &f.kinds {
// kind is number, no escaping needed
let str_kinds: Vec<String> = ks.iter().map(|x| x.to_string()).collect();
let kind_clause = format!("kind IN ({})", str_kinds.join(", "));
filter_components.push(kind_clause);
}
// Query for event, allowing prefix matches
if let Some(idvec) = &f.ids {
// take each author and convert to a hexsearch
let mut id_searches: Vec<String> = vec![];
for id in idvec {
match hex_range(id) {
Some(HexSearch::Exact(ex)) => {
id_searches.push("event_hash=?".to_owned());
params.push(Box::new(ex));
}
Some(HexSearch::Range(lower, upper)) => {
id_searches.push("(event_hash>? AND event_hash<?)".to_owned());
params.push(Box::new(lower));
params.push(Box::new(upper));
}
Some(HexSearch::LowerOnly(lower)) => {
id_searches.push("event_hash>?".to_owned());
params.push(Box::new(lower));
}
None => {
info!("Could not parse hex range from id {:?}", id);
}
let authors_clause = format!("({})", auth_searches.join(" OR "));
filter_components.push(authors_clause);
}
// Query for Kind
if let Some(ks) = &f.kinds {
// kind is number, no escaping needed
let str_kinds: Vec<String> = ks.iter().map(|x| x.to_string()).collect();
let kind_clause = format!("kind IN ({})", str_kinds.join(", "));
filter_components.push(kind_clause);
}
// Query for event, allowing prefix matches
if let Some(idvec) = &f.ids {
// take each author and convert to a hexsearch
let mut id_searches: Vec<String> = vec![];
for id in idvec {
match hex_range(id) {
Some(HexSearch::Exact(ex)) => {
id_searches.push("event_hash=?".to_owned());
params.push(Box::new(ex));
}
Some(HexSearch::Range(lower, upper)) => {
id_searches.push("(event_hash>? AND event_hash<?)".to_owned());
params.push(Box::new(lower));
params.push(Box::new(upper));
}
Some(HexSearch::LowerOnly(lower)) => {
id_searches.push("event_hash>?".to_owned());
params.push(Box::new(lower));
}
None => {
info!("Could not parse hex range from id {:?}", id);
}
}
let id_clause = format!("({})", id_searches.join(" OR "));
filter_components.push(id_clause);
}
// Query for tags
if let Some(map) = &f.tags {
for (key, val) in map.iter() {
let mut str_vals: Vec<Box<dyn ToSql>> = vec![];
let mut blob_vals: Vec<Box<dyn ToSql>> = vec![];
for v in val {
if is_hex(v) {
if let Ok(h) = hex::decode(&v) {
blob_vals.push(Box::new(h));
}
} else {
str_vals.push(Box::new(v.to_owned()));
let id_clause = format!("({})", id_searches.join(" OR "));
filter_components.push(id_clause);
}
// Query for tags
if let Some(map) = &f.tags {
for (key, val) in map.iter() {
let mut str_vals: Vec<Box<dyn ToSql>> = vec![];
let mut blob_vals: Vec<Box<dyn ToSql>> = vec![];
for v in val {
if is_hex(v) {
if let Ok(h) = hex::decode(&v) {
blob_vals.push(Box::new(h));
}
} else {
str_vals.push(Box::new(v.to_owned()));
}
// create clauses with "?" params for each tag value being searched
let str_clause = format!("value IN ({})", repeat_vars(str_vals.len()));
let blob_clause = format!("value_hex IN ({})", repeat_vars(blob_vals.len()));
let tag_clause = format!("(name=? AND ({} OR {}))", str_clause, blob_clause);
// add the tag name as the first parameter
params.push(Box::new(key.to_owned()));
// add all tag values that are plain strings as params
params.append(&mut str_vals);
// add all tag values that are blobs as params
params.append(&mut blob_vals);
filter_components.push(tag_clause);
}
}
// Query for timestamp
if f.since.is_some() {
let created_clause = format!("created_at > {}", f.since.unwrap());
filter_components.push(created_clause);
}
// Query for timestamp
if f.until.is_some() {
let until_clause = format!("created_at < {}", f.until.unwrap());
filter_components.push(until_clause);
}
// combine all clauses, and add to filter_clauses
if !filter_components.is_empty() {
let mut fc = "( ".to_owned();
fc.push_str(&filter_components.join(" AND "));
fc.push_str(" )");
filter_clauses.push(fc);
// create clauses with "?" params for each tag value being searched
let str_clause = format!("value IN ({})", repeat_vars(str_vals.len()));
let blob_clause = format!("value_hex IN ({})", repeat_vars(blob_vals.len()));
let tag_clause = format!("(name=? AND ({} OR {}))", str_clause, blob_clause);
// add the tag name as the first parameter
params.push(Box::new(key.to_owned()));
// add all tag values that are plain strings as params
params.append(&mut str_vals);
// add all tag values that are blobs as params
params.append(&mut blob_vals);
filter_components.push(tag_clause);
}
}
// Query for timestamp
if f.since.is_some() {
let created_clause = format!("created_at > {}", f.since.unwrap());
filter_components.push(created_clause);
}
// Query for timestamp
if f.until.is_some() {
let until_clause = format!("created_at < {}", f.until.unwrap());
filter_components.push(until_clause);
}
// never display hidden events
query.push_str(" WHERE hidden!=TRUE ");
query.push_str(" WHERE hidden!=TRUE");
// build filter component conditions
if !filter_components.is_empty() {
query.push_str(" AND ");
query.push_str(&filter_components.join(" AND "));
}
// Apply per-filter limit to this subquery.
// The use of a LIMIT implies a DESC order, to capture only the most recent events.
if let Some(lim) = f.limit {
query.push_str(&format!(" ORDER BY e.created_at DESC LIMIT {}", lim))
} else {
query.push_str(" ORDER BY e.created_at ASC")
}
(query, params)
}
// combine all filters with OR clauses, if any exist
if !filter_clauses.is_empty() {
query.push_str(" AND (");
query.push_str(&filter_clauses.join(" OR "));
query.push_str(") ");
/// Create a dynamic SQL query string and params from a subscription.
fn query_from_sub(sub: &Subscription) -> (String, Vec<Box<dyn ToSql>>) {
// build a dynamic SQL query for an entire subscription, based on
// SQL subqueries for filters.
let mut subqueries: Vec<String> = Vec::new();
// subquery params
let mut params: Vec<Box<dyn ToSql>> = vec![];
// for every filter in the subscription, generate a subquery
for f in sub.filters.iter() {
let (f_subquery, mut f_params) = query_from_filter(&f);
subqueries.push(f_subquery);
params.append(&mut f_params);
}
// add order clause
query.push_str(&format!(
" ORDER BY created_at {}",
limit.map_or("ASC", |_| "DESC")
));
if let Some(lim) = limit {
query.push_str(&format!(" LIMIT {}", lim))
}
debug!("query string: {}", query);
// encapsulate subqueries into select statements
let subqueries_selects: Vec<String> = subqueries
.iter()
.map(|s| {
return format!("SELECT content, created_at FROM ({})", s);
})
.collect();
let query: String = subqueries_selects.join(" UNION ");
info!("final query string: {}", query);
(query, params)
}

View File

@ -32,7 +32,7 @@ pub struct ReqFilter {
/// List of author public keys
pub authors: Option<Vec<String>>,
/// Limit number of results
pub limit: Option<u32>,
pub limit: Option<u64>,
/// Set of tags
#[serde(skip)]
pub tags: Option<HashMap<String, HashSet<String>>>,