From 388eadf8800a6e05f822bcac9c55112da0c8bf05 Mon Sep 17 00:00:00 2001 From: Kieran Date: Wed, 22 Nov 2023 22:45:30 +0000 Subject: [PATCH] feat: limit_scrapers Signed-off-by: Greg Heartsfield --- config.toml | 5 +++++ src/config.rs | 2 ++ src/server.rs | 10 +++++++--- src/subscription.rs | 45 +++++++++++++++++++++++++++++++++++++++------ 4 files changed, 53 insertions(+), 9 deletions(-) diff --git a/config.toml b/config.toml index 56973cf..b5ae8c4 100644 --- a/config.toml +++ b/config.toml @@ -155,6 +155,11 @@ reject_future_seconds = 1800 # 0, 1, 2, 3, 7, 40, 41, 42, 43, 44, 30023, #] +# Rejects imprecise requests (kind only and author only etc) +# This is a temperary measure to improve the adoption of outbox model +# Its recommended to have this enabled +limit_scrapers = true + [authorization] # Pubkey addresses in this array are whitelisted for event publishing. # Only valid events by these authors will be accepted, if the variable diff --git a/src/config.rs b/src/config.rs index 399b831..aa8bfb5 100644 --- a/src/config.rs +++ b/src/config.rs @@ -74,6 +74,7 @@ pub struct Limits { pub event_persist_buffer: usize, // events to buffer for database commits (block senders if database writes are too slow) pub event_kind_blacklist: Option>, pub event_kind_allowlist: Option>, + pub limit_scrapers: bool, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -308,6 +309,7 @@ impl Default for Settings { event_persist_buffer: 4096, event_kind_blacklist: None, event_kind_allowlist: None, + limit_scrapers: false }, authorization: Authorization { pubkey_whitelist: None, // Allow any address to publish diff --git a/src/server.rs b/src/server.rs index 3c2b9f2..eef1e7b 100644 --- a/src/server.rs +++ b/src/server.rs @@ -1261,7 +1261,6 @@ async fn nostr_server( // handle each type of message let evid = ec.event_id().to_owned(); let parsed : Result = Result::::from(ec); - metrics.cmd_event.inc(); match parsed { Ok(WrappedEvent(e)) => { metrics.cmd_event.inc(); @@ -1342,10 +1341,15 @@ async fn nostr_server( if conn.has_subscription(&s) { info!("client sent duplicate subscription, ignoring (cid: {}, sub: {:?})", cid, s.id); } else { - metrics.cmd_req.inc(); + metrics.cmd_req.inc(); if let Some(ref lim) = sub_lim_opt { lim.until_ready_with_jitter(jitter).await; } + if settings.limits.limit_scrapers && s.is_scraper() { + info!("subscription was scraper, ignoring (cid: {}, sub: {:?})", cid, s.id); + ws_stream.send(Message::Text(format!("[\"EOSE\",\"{}\"]", s.id))).await.ok(); + continue + } let (abandon_query_tx, abandon_query_rx) = oneshot::channel::<()>(); match conn.subscribe(s.clone()) { Ok(()) => { @@ -1369,7 +1373,7 @@ async fn nostr_server( // closing a request simply removes the subscription. let parsed : Result = Result::::from(cc); if let Ok(c) = parsed { - metrics.cmd_close.inc(); + metrics.cmd_close.inc(); // check if a query is currently // running, and remove it if so. let stop_tx = running_queries.remove(&c.id); diff --git a/src/subscription.rs b/src/subscription.rs index 17aaceb..4ceca64 100644 --- a/src/subscription.rs +++ b/src/subscription.rs @@ -45,8 +45,8 @@ pub struct ReqFilter { impl Serialize for ReqFilter { fn serialize(&self, serializer: S) -> Result - where - S: Serializer, + where + S: Serializer, { let mut map = serializer.serialize_map(None)?; if let Some(ids) = &self.ids { @@ -80,8 +80,8 @@ impl Serialize for ReqFilter { impl<'de> Deserialize<'de> for ReqFilter { fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, + where + D: Deserializer<'de>, { let received: Value = Deserialize::deserialize(deserializer)?; let filter = received.as_object().ok_or_else(|| { @@ -184,8 +184,8 @@ impl<'de> Deserialize<'de> for Subscription { /// Custom deserializer for subscriptions, which have a more /// complex structure than the other message types. fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, + where + D: Deserializer<'de>, { let mut v: Value = Deserialize::deserialize(deserializer)?; // this should be a 3-or-more element array. @@ -258,6 +258,29 @@ impl Subscription { } false } + + /// Is this subscription defined as a scraper query + pub fn is_scraper(&self) -> bool { + for f in &self.filters { + let mut precision = 0; + if f.ids.is_some() { + precision += 2; + } + if f.authors.is_some() { + precision += 1; + } + if f.kinds.is_some() { + precision += 1; + } + if f.tags.is_some() { + precision += 1; + } + if precision < 2 { + return true; + } + } + false + } } fn prefix_match(prefixes: &[String], target: &str) -> bool { @@ -647,4 +670,14 @@ mod tests { } Ok(()) } + + #[test] + fn is_scraper() -> Result<()> { + assert_eq!(true, serde_json::from_str::(r#"["REQ","some-id",{"kinds": [1984],"since": 123,"limit":1}]"#)?.is_scraper()); + assert_eq!(true, serde_json::from_str::(r#"["REQ","some-id",{"kinds": [1984]},{"kinds": [1984],"authors":["aaaa"]}]"#)?.is_scraper()); + assert_eq!(false, serde_json::from_str::(r#"["REQ","some-id",{"kinds": [1984],"authors":["aaaa"]}]"#)?.is_scraper()); + assert_eq!(false, serde_json::from_str::(r#"["REQ","some-id",{"ids": ["aaaa"]}]"#)?.is_scraper()); + assert_eq!(false, serde_json::from_str::(r##"["REQ","some-id",{"#p": ["aaaa"],"kinds":[1,4]}]"##)?.is_scraper()); + Ok(()) + } }