From 50daab8a6fda5fffbd6ffb04553798e3009abf9c Mon Sep 17 00:00:00 2001 From: Greg Heartsfield Date: Fri, 6 Jan 2023 06:57:56 -0600 Subject: [PATCH] refactor: make a standalone re-tagging function --- src/event.rs | 4 +-- src/schema.rs | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 72 insertions(+), 3 deletions(-) diff --git a/src/event.rs b/src/event.rs index 0fc938d..daf99f1 100644 --- a/src/event.rs +++ b/src/event.rs @@ -176,11 +176,11 @@ impl Event { } /// Update delegation status - fn update_delegation(&mut self) { + pub fn update_delegation(&mut self) { self.delegated_by = self.delegated_author(); } /// Build an event tag index - fn build_index(&mut self) { + pub fn build_index(&mut self) { // if there are no tags; just leave the index as None if self.tags.is_empty() { return; diff --git a/src/schema.rs b/src/schema.rs index 88abf1b..b5ab8ac 100644 --- a/src/schema.rs +++ b/src/schema.rs @@ -95,6 +95,20 @@ pub fn curr_db_version(conn: &mut Connection) -> Result { Ok(curr_version) } +/// Determine event count +pub fn db_event_count(conn: &mut Connection) -> Result { + let query = "SELECT count(*) FROM event;"; + let count = conn.query_row(query, [], |row| row.get(0))?; + Ok(count) +} + +/// Determine tag count +pub fn db_tag_count(conn: &mut Connection) -> Result { + let query = "SELECT count(*) FROM tag;"; + let count = conn.query_row(query, [], |row| row.get(0))?; + Ok(count) +} + fn mig_init(conn: &mut PooledConnection) -> Result { match conn.execute_batch(INIT_SQL) { Ok(()) => { @@ -206,6 +220,62 @@ pub fn upgrade_db(conn: &mut PooledConnection) -> Result<()> { Ok(()) } +pub fn rebuild_tags(conn: &mut PooledConnection) -> Result<()> { + // Check how many events we have to process + let count = db_event_count(conn)?; + let update_each_percent = 0.05; + let mut percent_done = 0.0; + let mut events_processed = 0; + let start = Instant::now(); + let tx = conn.transaction()?; + { + // Clear out table + tx.execute("DELETE FROM tag;", [])?; + let mut stmt = tx.prepare("select id, content from event order by id;")?; + let mut tag_rows = stmt.query([])?; + while let Some(row) = tag_rows.next()? { + if (events_processed as f32)/(count as f32) > percent_done { + info!("Tag update {}% complete...", (100.0*percent_done).round()); + percent_done += update_each_percent; + } + // we want to capture the event_id that had the tag, the tag name, and the tag hex value. + let event_id: u64 = row.get(0)?; + let event_json: String = row.get(1)?; + let event: Event = serde_json::from_str(&event_json)?; + // look at each event, and each tag, creating new tag entries if appropriate. + for t in event.tags.iter().filter(|x| x.len() > 1) { + let tagname = t.get(0).unwrap(); + let tagnamechar_opt = single_char_tagname(tagname); + if tagnamechar_opt.is_none() { + continue; + } + // safe because len was > 1 + let tagval = t.get(1).unwrap(); + // insert as BLOB if we can restore it losslessly. + // this means it needs to be even length and lowercase. + if (tagval.len() % 2 == 0) && is_lower_hex(tagval) { + tx.execute( + "INSERT INTO tag (event_id, name, value_hex) VALUES (?1, ?2, ?3);", + params![event_id, tagname, hex::decode(tagval).ok()], + )?; + } else { + // otherwise, insert as text + tx.execute( + "INSERT INTO tag (event_id, name, value) VALUES (?1, ?2, ?3);", + params![event_id, tagname, &tagval], + )?; + } + } + events_processed += 1; + } + } + tx.commit()?; + info!("rebuilt tags in {:?}", start.elapsed()); + Ok(()) +} + + + //// Migration Scripts fn mig_1_to_2(conn: &mut PooledConnection) -> Result { @@ -337,7 +407,6 @@ fn mig_5_to_6(conn: &mut PooledConnection) -> Result { let mut stmt = tx.prepare("select id, content from event order by id;")?; let mut tag_rows = stmt.query([])?; while let Some(row) = tag_rows.next()? { - // we want to capture the event_id that had the tag, the tag name, and the tag hex value. let event_id: u64 = row.get(0)?; let event_json: String = row.get(1)?; let event: Event = serde_json::from_str(&event_json)?;