improvement: block new readers when WAL is large

This commit is contained in:
Greg Heartsfield 2022-12-27 09:48:07 -06:00
parent 5f9fe1ce59
commit b23b3ce8ec
2 changed files with 27 additions and 5 deletions

View File

@ -19,8 +19,10 @@ use r2d2_sqlite::SqliteConnectionManager;
use rusqlite::params; use rusqlite::params;
use rusqlite::types::ToSql; use rusqlite::types::ToSql;
use rusqlite::OpenFlags; use rusqlite::OpenFlags;
use tokio::sync::{Mutex, MutexGuard};
use std::fmt::Write as _; use std::fmt::Write as _;
use std::path::Path; use std::path::Path;
use std::sync::Arc;
use std::thread; use std::thread;
use std::time::Duration; use std::time::Duration;
use std::time::Instant; use std::time::Instant;
@ -691,7 +693,7 @@ fn log_pool_stats(name: &str, pool: &SqlitePool) {
/// Perform database maintenance on a regular basis /// Perform database maintenance on a regular basis
pub async fn db_optimize(pool: SqlitePool) { pub async fn db_optimize_task(pool: SqlitePool) {
tokio::task::spawn(async move { tokio::task::spawn(async move {
loop { loop {
tokio::select! { tokio::select! {
@ -710,7 +712,7 @@ pub async fn db_optimize(pool: SqlitePool) {
} }
/// Perform database WAL checkpoint on a regular basis /// Perform database WAL checkpoint on a regular basis
pub async fn db_checkpoint(pool: SqlitePool) { pub async fn db_checkpoint_task(pool: SqlitePool, safe_to_read: Arc<Mutex<u64>>) {
tokio::task::spawn(async move { tokio::task::spawn(async move {
// WAL size in pages. // WAL size in pages.
let mut current_wal_size = 0; let mut current_wal_size = 0;
@ -724,6 +726,7 @@ pub async fn db_checkpoint(pool: SqlitePool) {
tokio::select! { tokio::select! {
_ = tokio::time::sleep(Duration::from_secs(CHECKPOINT_FREQ_SEC)) => { _ = tokio::time::sleep(Duration::from_secs(CHECKPOINT_FREQ_SEC)) => {
if let Ok(mut conn) = pool.get() { if let Ok(mut conn) = pool.get() {
let mut _guard:Option<MutexGuard<u64>> = None;
// the busy timer will block writers, so don't set // the busy timer will block writers, so don't set
// this any higher than you want max latency for event // this any higher than you want max latency for event
// writes. // writes.
@ -732,6 +735,9 @@ pub async fn db_checkpoint(pool: SqlitePool) {
} else { } else {
// if the wal size has exceeded a threshold, increase the busy timeout. // if the wal size has exceeded a threshold, increase the busy timeout.
conn.busy_timeout(busy_wait_default_long).ok(); conn.busy_timeout(busy_wait_default_long).ok();
// take a lock that will prevent new readers.
info!("blocking new readers to perform wal_checkpoint");
_guard = Some(safe_to_read.lock().await);
} }
debug!("running wal_checkpoint(TRUNCATE)"); debug!("running wal_checkpoint(TRUNCATE)");
if let Ok(new_size) = checkpoint_db(&mut conn) { if let Ok(new_size) = checkpoint_db(&mut conn) {

View File

@ -25,10 +25,12 @@ use hyper::{
use rusqlite::OpenFlags; use rusqlite::OpenFlags;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_json::json; use serde_json::json;
use tokio::sync::Mutex;
use std::collections::HashMap; use std::collections::HashMap;
use std::convert::Infallible; use std::convert::Infallible;
use std::net::SocketAddr; use std::net::SocketAddr;
use std::path::Path; use std::path::Path;
use std::sync::Arc;
use std::sync::atomic::Ordering; use std::sync::atomic::Ordering;
use std::sync::mpsc::Receiver as MpscReceiver; use std::sync::mpsc::Receiver as MpscReceiver;
use std::time::Duration; use std::time::Duration;
@ -54,6 +56,7 @@ async fn handle_web_request(
broadcast: Sender<Event>, broadcast: Sender<Event>,
event_tx: tokio::sync::mpsc::Sender<SubmittedEvent>, event_tx: tokio::sync::mpsc::Sender<SubmittedEvent>,
shutdown: Receiver<()>, shutdown: Receiver<()>,
safe_to_read: Arc<Mutex<u64>>,
) -> Result<Response<Body>, Infallible> { ) -> Result<Response<Body>, Infallible> {
match ( match (
request.uri().path(), request.uri().path(),
@ -114,6 +117,7 @@ async fn handle_web_request(
broadcast, broadcast,
event_tx, event_tx,
shutdown, shutdown,
safe_to_read,
)); ));
} }
// todo: trace, don't print... // todo: trace, don't print...
@ -328,8 +332,13 @@ pub fn start_server(settings: Settings, shutdown_rx: MpscReceiver<()>) -> Result
2, 2,
false, false,
); );
db::db_optimize(maintenance_pool.clone()).await;
db::db_checkpoint(maintenance_pool).await; // Create a mutex that will block readers, so that a
// checkpoint can be performed quickly.
let safe_to_read = Arc::new(Mutex::new(0));
db::db_optimize_task(maintenance_pool.clone()).await;
db::db_checkpoint_task(maintenance_pool, safe_to_read.clone()).await;
// listen for (external to tokio) shutdown request // listen for (external to tokio) shutdown request
let controlled_shutdown = invoke_shutdown.clone(); let controlled_shutdown = invoke_shutdown.clone();
@ -378,6 +387,7 @@ pub fn start_server(settings: Settings, shutdown_rx: MpscReceiver<()>) -> Result
let event = event_tx.clone(); let event = event_tx.clone();
let stop = invoke_shutdown.clone(); let stop = invoke_shutdown.clone();
let settings = settings.clone(); let settings = settings.clone();
let safe_to_read = safe_to_read.clone();
async move { async move {
// service_fn converts our function into a `Service` // service_fn converts our function into a `Service`
Ok::<_, Infallible>(service_fn(move |request: Request<Body>| { Ok::<_, Infallible>(service_fn(move |request: Request<Body>| {
@ -389,6 +399,7 @@ pub fn start_server(settings: Settings, shutdown_rx: MpscReceiver<()>) -> Result
bcast.clone(), bcast.clone(),
event.clone(), event.clone(),
stop.subscribe(), stop.subscribe(),
safe_to_read.clone(),
) )
})) }))
} }
@ -465,6 +476,7 @@ async fn nostr_server(
broadcast: Sender<Event>, broadcast: Sender<Event>,
event_tx: mpsc::Sender<SubmittedEvent>, event_tx: mpsc::Sender<SubmittedEvent>,
mut shutdown: Receiver<()>, mut shutdown: Receiver<()>,
safe_to_read: Arc<Mutex<u64>>,
) { ) {
// the time this websocket nostr server started // the time this websocket nostr server started
let orig_start = Instant::now(); let orig_start = Instant::now();
@ -675,6 +687,10 @@ async fn nostr_server(
previous_query.send(()).ok(); previous_query.send(()).ok();
} }
if s.needs_historical_events() { if s.needs_historical_events() {
{
// acquire and immediately release lock; this ensures we do not start new queries during a wal checkpoint.
let _ = safe_to_read.lock().await;
}
// start a database query. this spawns a blocking database query on a worker thread. // start a database query. this spawns a blocking database query on a worker thread.
db::db_query(s, cid.to_owned(), pool.clone(), query_tx.clone(), abandon_query_rx).await; db::db_query(s, cid.to_owned(), pool.clone(), query_tx.clone(), abandon_query_rx).await;
} }