mirror of
https://github.com/scsibug/nostr-rs-relay.git
synced 2024-11-09 21:29:06 -05:00
feat: bulk loading script for importing events
This commit is contained in:
parent
50daab8a6f
commit
76c77c3e56
176
src/bin/bulkloader.rs
Normal file
176
src/bin/bulkloader.rs
Normal file
|
@ -0,0 +1,176 @@
|
||||||
|
use std::io;
|
||||||
|
use std::path::Path;
|
||||||
|
use nostr_rs_relay::utils::is_lower_hex;
|
||||||
|
use tracing::*;
|
||||||
|
use nostr_rs_relay::config;
|
||||||
|
use nostr_rs_relay::event::{Event,single_char_tagname};
|
||||||
|
use nostr_rs_relay::error::{Error, Result};
|
||||||
|
use nostr_rs_relay::db::build_pool;
|
||||||
|
use nostr_rs_relay::schema::{curr_db_version, DB_VERSION};
|
||||||
|
use rusqlite::{OpenFlags, Transaction};
|
||||||
|
use nostr_rs_relay::db::PooledConnection;
|
||||||
|
use std::sync::mpsc;
|
||||||
|
use std::thread;
|
||||||
|
use rusqlite::params;
|
||||||
|
|
||||||
|
/// Bulk load JSONL data from STDIN to the database specified in config.toml (or ./nostr.db as a default).
|
||||||
|
/// The database must already exist, this will not create a new one.
|
||||||
|
/// Tested against schema v13.
|
||||||
|
|
||||||
|
pub fn main() -> Result<()> {
|
||||||
|
let _trace_sub = tracing_subscriber::fmt::try_init();
|
||||||
|
println!("Nostr-rs-relay Bulk Loader");
|
||||||
|
// check for a database file, or create one.
|
||||||
|
let settings = config::Settings::new();
|
||||||
|
if !Path::new(&settings.database.data_directory).is_dir() {
|
||||||
|
info!("Database directory does not exist");
|
||||||
|
return Err(Error::DatabaseDirError);
|
||||||
|
}
|
||||||
|
// Get a database pool
|
||||||
|
let pool = build_pool("bulk-loader", &settings, OpenFlags::SQLITE_OPEN_READ_WRITE | OpenFlags::SQLITE_OPEN_CREATE, 1,4,false);
|
||||||
|
{
|
||||||
|
// check for database schema version
|
||||||
|
let mut conn: PooledConnection = pool.get()?;
|
||||||
|
let version = curr_db_version(&mut conn)?;
|
||||||
|
info!("current version is: {:?}", version);
|
||||||
|
// ensure the schema version is current.
|
||||||
|
if version != DB_VERSION {
|
||||||
|
info!("version is not current, exiting");
|
||||||
|
panic!("cannot write to schema other than v{}", DB_VERSION);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// this channel will contain parsed events ready to be inserted
|
||||||
|
let (event_tx, event_rx) = mpsc::sync_channel(100_000);
|
||||||
|
// Thread for reading events
|
||||||
|
let _stdin_reader_handler = thread::spawn(move || {
|
||||||
|
let stdin = io::stdin();
|
||||||
|
for readline in stdin.lines() {
|
||||||
|
if let Ok(line) = readline {
|
||||||
|
// try to parse a nostr event
|
||||||
|
let eres: Result<Event, serde_json::Error> = serde_json::from_str(&line);
|
||||||
|
if let Ok(mut e) = eres {
|
||||||
|
if let Ok(()) = e.validate() {
|
||||||
|
e.build_index();
|
||||||
|
//debug!("Event: {:?}", e);
|
||||||
|
event_tx.send(Some(e)).ok();
|
||||||
|
} else {
|
||||||
|
info!("could not validate event");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
info!("error reading event: {:?}", eres);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// error reading
|
||||||
|
info!("error reading: {:?}", readline);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
info!("finished parsing events");
|
||||||
|
event_tx.send(None).ok();
|
||||||
|
let ok: Result<()> = Ok(());
|
||||||
|
return ok;
|
||||||
|
});
|
||||||
|
let mut conn: PooledConnection = pool.get()?;
|
||||||
|
let mut events_read = 0;
|
||||||
|
let event_batch_size =50_000;
|
||||||
|
let mut new_events = 0;
|
||||||
|
let mut has_more_events = true;
|
||||||
|
while has_more_events {
|
||||||
|
// begin a transaction
|
||||||
|
let tx = conn.transaction()?;
|
||||||
|
// read in batch_size events and commit
|
||||||
|
for _ in 0..event_batch_size {
|
||||||
|
match event_rx.recv() {
|
||||||
|
Ok(Some(e)) => {
|
||||||
|
events_read += 1;
|
||||||
|
// ignore ephemeral events
|
||||||
|
if !(e.kind >= 20000 && e.kind < 30000) {
|
||||||
|
match write_event(&tx, e) {
|
||||||
|
Ok(c) => {
|
||||||
|
new_events += c;
|
||||||
|
},
|
||||||
|
Err(e) => {
|
||||||
|
info!("error inserting event: {:?}", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
Ok(None) => {
|
||||||
|
// signal that the sender will never produce more
|
||||||
|
// events
|
||||||
|
has_more_events=false;
|
||||||
|
break;
|
||||||
|
},
|
||||||
|
Err(_) => {
|
||||||
|
info!("sender is closed");
|
||||||
|
// sender is done
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
info!("committed {} events...", new_events);
|
||||||
|
tx.commit()?;
|
||||||
|
conn.execute_batch("pragma wal_checkpoint(truncate)")?;
|
||||||
|
|
||||||
|
}
|
||||||
|
info!("processed {} events", events_read);
|
||||||
|
info!("stored {} new events", new_events);
|
||||||
|
// get a connection for writing events
|
||||||
|
// read standard in.
|
||||||
|
info!("finished reading input");
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Write an event and update the tag table.
|
||||||
|
/// Assumes the event has its index built.
|
||||||
|
fn write_event(tx: &Transaction, e: Event) -> Result<usize> {
|
||||||
|
let id_blob = hex::decode(&e.id).ok();
|
||||||
|
let pubkey_blob: Option<Vec<u8>> = hex::decode(&e.pubkey).ok();
|
||||||
|
let delegator_blob: Option<Vec<u8>> = e.delegated_by.as_ref().and_then(|d| hex::decode(d).ok());
|
||||||
|
let event_str = serde_json::to_string(&e).ok();
|
||||||
|
// ignore if the event hash is a duplicate.
|
||||||
|
let ins_count = tx.execute(
|
||||||
|
"INSERT OR IGNORE INTO event (event_hash, created_at, kind, author, delegated_by, content, first_seen, hidden) VALUES (?1, ?2, ?3, ?4, ?5, ?6, strftime('%s','now'), FALSE);",
|
||||||
|
params![id_blob, e.created_at, e.kind, pubkey_blob, delegator_blob, event_str]
|
||||||
|
)?;
|
||||||
|
if ins_count == 0 {
|
||||||
|
return Ok(0);
|
||||||
|
}
|
||||||
|
// we want to capture the event_id that had the tag, the tag name, and the tag hex value.
|
||||||
|
let event_id = tx.last_insert_rowid();
|
||||||
|
// look at each event, and each tag, creating new tag entries if appropriate.
|
||||||
|
for t in e.tags.iter().filter(|x| x.len() > 1) {
|
||||||
|
let tagname = t.get(0).unwrap();
|
||||||
|
let tagnamechar_opt = single_char_tagname(tagname);
|
||||||
|
if tagnamechar_opt.is_none() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// safe because len was > 1
|
||||||
|
let tagval = t.get(1).unwrap();
|
||||||
|
// insert as BLOB if we can restore it losslessly.
|
||||||
|
// this means it needs to be even length and lowercase.
|
||||||
|
if (tagval.len() % 2 == 0) && is_lower_hex(tagval) {
|
||||||
|
tx.execute(
|
||||||
|
"INSERT INTO tag (event_id, name, value_hex) VALUES (?1, ?2, ?3);",
|
||||||
|
params![event_id, tagname, hex::decode(tagval).ok()],
|
||||||
|
)?;
|
||||||
|
} else {
|
||||||
|
// otherwise, insert as text
|
||||||
|
tx.execute(
|
||||||
|
"INSERT INTO tag (event_id, name, value) VALUES (?1, ?2, ?3);",
|
||||||
|
params![event_id, tagname, &tagval],
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if e.kind == 0 || e.kind == 3 || e.kind == 41 || (e.kind >= 10000 && e.kind < 20000) {
|
||||||
|
//let query = "SELECT id FROM event WHERE kind=? AND author=? ORDER BY created_at DESC LIMIT 1;";
|
||||||
|
//let count: usize = tx.query_row(query, params![e.kind, pubkey_blob], |row| row.get(0))?;
|
||||||
|
//info!("found {} rows that /would/ be preserved", count);
|
||||||
|
match tx.execute(
|
||||||
|
"DELETE FROM event WHERE kind=? and author=? and id NOT IN (SELECT id FROM event WHERE kind=? AND author=? ORDER BY created_at DESC LIMIT 1);",
|
||||||
|
params![e.kind, pubkey_blob, e.kind, pubkey_blob],
|
||||||
|
) {
|
||||||
|
Ok(_) => {},
|
||||||
|
Err(x) => {info!("error deleting replaceable event: {:?}",x);}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(ins_count)
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user