improvement: block new readers when WAL is large

This commit is contained in:
Greg Heartsfield 2022-12-27 09:48:07 -06:00
parent 5f9fe1ce59
commit b23b3ce8ec
2 changed files with 27 additions and 5 deletions

View File

@ -19,8 +19,10 @@ use r2d2_sqlite::SqliteConnectionManager;
use rusqlite::params;
use rusqlite::types::ToSql;
use rusqlite::OpenFlags;
use tokio::sync::{Mutex, MutexGuard};
use std::fmt::Write as _;
use std::path::Path;
use std::sync::Arc;
use std::thread;
use std::time::Duration;
use std::time::Instant;
@ -691,7 +693,7 @@ fn log_pool_stats(name: &str, pool: &SqlitePool) {
/// Perform database maintenance on a regular basis
pub async fn db_optimize(pool: SqlitePool) {
pub async fn db_optimize_task(pool: SqlitePool) {
tokio::task::spawn(async move {
loop {
tokio::select! {
@ -710,7 +712,7 @@ pub async fn db_optimize(pool: SqlitePool) {
}
/// Perform database WAL checkpoint on a regular basis
pub async fn db_checkpoint(pool: SqlitePool) {
pub async fn db_checkpoint_task(pool: SqlitePool, safe_to_read: Arc<Mutex<u64>>) {
tokio::task::spawn(async move {
// WAL size in pages.
let mut current_wal_size = 0;
@ -724,6 +726,7 @@ pub async fn db_checkpoint(pool: SqlitePool) {
tokio::select! {
_ = tokio::time::sleep(Duration::from_secs(CHECKPOINT_FREQ_SEC)) => {
if let Ok(mut conn) = pool.get() {
let mut _guard:Option<MutexGuard<u64>> = None;
// the busy timer will block writers, so don't set
// this any higher than you want max latency for event
// writes.
@ -732,6 +735,9 @@ pub async fn db_checkpoint(pool: SqlitePool) {
} else {
// if the wal size has exceeded a threshold, increase the busy timeout.
conn.busy_timeout(busy_wait_default_long).ok();
// take a lock that will prevent new readers.
info!("blocking new readers to perform wal_checkpoint");
_guard = Some(safe_to_read.lock().await);
}
debug!("running wal_checkpoint(TRUNCATE)");
if let Ok(new_size) = checkpoint_db(&mut conn) {

View File

@ -25,10 +25,12 @@ use hyper::{
use rusqlite::OpenFlags;
use serde::{Deserialize, Serialize};
use serde_json::json;
use tokio::sync::Mutex;
use std::collections::HashMap;
use std::convert::Infallible;
use std::net::SocketAddr;
use std::path::Path;
use std::sync::Arc;
use std::sync::atomic::Ordering;
use std::sync::mpsc::Receiver as MpscReceiver;
use std::time::Duration;
@ -54,6 +56,7 @@ async fn handle_web_request(
broadcast: Sender<Event>,
event_tx: tokio::sync::mpsc::Sender<SubmittedEvent>,
shutdown: Receiver<()>,
safe_to_read: Arc<Mutex<u64>>,
) -> Result<Response<Body>, Infallible> {
match (
request.uri().path(),
@ -114,6 +117,7 @@ async fn handle_web_request(
broadcast,
event_tx,
shutdown,
safe_to_read,
));
}
// todo: trace, don't print...
@ -328,8 +332,13 @@ pub fn start_server(settings: Settings, shutdown_rx: MpscReceiver<()>) -> Result
2,
false,
);
db::db_optimize(maintenance_pool.clone()).await;
db::db_checkpoint(maintenance_pool).await;
// Create a mutex that will block readers, so that a
// checkpoint can be performed quickly.
let safe_to_read = Arc::new(Mutex::new(0));
db::db_optimize_task(maintenance_pool.clone()).await;
db::db_checkpoint_task(maintenance_pool, safe_to_read.clone()).await;
// listen for (external to tokio) shutdown request
let controlled_shutdown = invoke_shutdown.clone();
@ -378,6 +387,7 @@ pub fn start_server(settings: Settings, shutdown_rx: MpscReceiver<()>) -> Result
let event = event_tx.clone();
let stop = invoke_shutdown.clone();
let settings = settings.clone();
let safe_to_read = safe_to_read.clone();
async move {
// service_fn converts our function into a `Service`
Ok::<_, Infallible>(service_fn(move |request: Request<Body>| {
@ -389,6 +399,7 @@ pub fn start_server(settings: Settings, shutdown_rx: MpscReceiver<()>) -> Result
bcast.clone(),
event.clone(),
stop.subscribe(),
safe_to_read.clone(),
)
}))
}
@ -465,6 +476,7 @@ async fn nostr_server(
broadcast: Sender<Event>,
event_tx: mpsc::Sender<SubmittedEvent>,
mut shutdown: Receiver<()>,
safe_to_read: Arc<Mutex<u64>>,
) {
// the time this websocket nostr server started
let orig_start = Instant::now();
@ -675,6 +687,10 @@ async fn nostr_server(
previous_query.send(()).ok();
}
if s.needs_historical_events() {
{
// acquire and immediately release lock; this ensures we do not start new queries during a wal checkpoint.
let _ = safe_to_read.lock().await;
}
// start a database query. this spawns a blocking database query on a worker thread.
db::db_query(s, cid.to_owned(), pool.clone(), query_tx.clone(), abandon_query_rx).await;
}