feat: increase wal_checkpoint time when WAL is large

2025-04-24 13:09:56 -04:00 · 2022-12-26 10:03:51 -06:00 · 2022-12-26 10:03:51 -06:00 · 65fd0ed08b
commit 65fd0ed08b
parent 0b51675b38
2 changed files with 46 additions and 13 deletions
--- a/src/db.rs
+++ b/src/db.rs
@ -39,9 +39,8 @@ pub struct SubmittedEvent {
 /// Database file
 pub const DB_FILE: &str = "nostr.db";

-/// How frequently to run maintenance
-/// How many persisted events before DB maintenannce is triggered.
-pub const EVENT_MAINTENANCE_FREQ_SEC: u64 = 60;
+/// How frequently to attempt checkpointing
+pub const CHECKPOINT_FREQ_SEC: u64 = 60;

 /// How many persisted events before we pause for backups.
 /// It isn't clear this is enough to make the online backup API work yet.
@ -119,8 +118,8 @@ enum SqliteStatus {
    Other(u64),
 }

-/// Checkpoint/Truncate WAL
-pub fn checkpoint_db(conn: &mut PooledConnection) -> Result<()> {
+/// Checkpoint/Truncate WAL.  Returns the number of WAL pages remaining.
+pub fn checkpoint_db(conn: &mut PooledConnection) -> Result<usize> {
    let query = "PRAGMA wal_checkpoint(TRUNCATE);";
    let start = Instant::now();
    let (cp_result, wal_size, _frames_checkpointed) = conn.query_row(query, [], |row| {
@ -141,7 +140,7 @@ pub fn checkpoint_db(conn: &mut PooledConnection) -> Result<()> {
        result,
        wal_size
    );
-    Ok(())
+    Ok(wal_size as usize)
 }

 /// Spawn a database writer that persists events to the SQLite store.
@ -690,21 +689,54 @@ fn log_pool_stats(name: &str, pool: &SqlitePool) {
    );
 }

+
 /// Perform database maintenance on a regular basis
-pub async fn db_maintenance(pool: SqlitePool) {
+pub async fn db_optimize(pool: SqlitePool) {
    tokio::task::spawn(async move {
        loop {
            tokio::select! {
-                _ = tokio::time::sleep(Duration::from_secs(EVENT_MAINTENANCE_FREQ_SEC)) => {
+                _ = tokio::time::sleep(Duration::from_secs(60*60)) => {
                    if let Ok(mut conn) = pool.get() {
 			// the busy timer will block writers, so don't set
 			// this any higher than you want max latency for event
 			// writes.
-			conn.busy_timeout(Duration::from_secs(1)).ok();
-                        debug!("running database optimizer");
+                        info!("running database optimizer");
                        optimize_db(&mut conn).ok();
+                    }
+		}
+            };
+        }
+    });
+}
+
+/// Perform database WAL checkpoint on a regular basis
+pub async fn db_checkpoint(pool: SqlitePool) {
+    tokio::task::spawn(async move {
+	// WAL size in pages.
+	let mut current_wal_size = 0;
+	// WAL threshold for more aggressive checkpointing (10,000 pages, or about 40MB)
+	let wal_threshold = 1000*10;
+	// default threshold for the busy timer
+	let busy_wait_default = Duration::from_secs(1);
+	// if the WAL file is getting too big, switch to this
+	let busy_wait_default_long = Duration::from_secs(5);
+        loop {
+            tokio::select! {
+                _ = tokio::time::sleep(Duration::from_secs(CHECKPOINT_FREQ_SEC)) => {
+                    if let Ok(mut conn) = pool.get() {
+			// the busy timer will block writers, so don't set
+			// this any higher than you want max latency for event
+			// writes.
+			if current_wal_size <= wal_threshold {
+			    conn.busy_timeout(busy_wait_default).ok();
+			} else {
+			    // if the wal size has exceeded a threshold, increase the busy timeout.
+			    conn.busy_timeout(busy_wait_default_long).ok();
+			}
                        debug!("running wal_checkpoint(TRUNCATE)");
-                        checkpoint_db(&mut conn).ok();
+                        if let Ok(new_size) = checkpoint_db(&mut conn) {
+			    current_wal_size = new_size;
+			}
                    }
 		}
            };
--- a/src/server.rs
+++ b/src/server.rs
@ -325,10 +325,11 @@ pub fn start_server(settings: Settings, shutdown_rx: MpscReceiver<()>) -> Result
            &settings,
            OpenFlags::SQLITE_OPEN_READ_WRITE | OpenFlags::SQLITE_OPEN_CREATE,
            1,
-            1,
+            2,
            false,
        );
-        db::db_maintenance(maintenance_pool).await;
+        db::db_optimize(maintenance_pool.clone()).await;
+        db::db_checkpoint(maintenance_pool).await;

        // listen for (external to tokio) shutdown request
        let controlled_shutdown = invoke_shutdown.clone();