about summary refs log tree commit diff
diff options
context:
space:
mode:
authorgennyble <gen@nyble.dev>2025-08-19 20:24:21 -0500
committergennyble <gen@nyble.dev>2025-08-19 20:24:21 -0500
commit645e9496b8e0d6c6b7e7e6254760e816b9fc3382 (patch)
tree7f7753dc276f4e25d4ddf814dba431c2e1934b5e
parent323a50d29815fc1b15bbacac69442f23f4bee156 (diff)
downloadcorgi-645e9496b8e0d6c6b7e7e6254760e816b9fc3382.tar.gz
corgi-645e9496b8e0d6c6b7e7e6254760e816b9fc3382.zip
rework database; 1000x uptick in corgi-stats performance
-rw-r--r--.gitignore2
-rw-r--r--Cargo.lock13
-rw-r--r--corgi/Cargo.toml5
-rw-r--r--corgi/changelog.md16
-rw-r--r--corgi/src/main.rs29
-rw-r--r--corgi/src/stats.rs134
-rw-r--r--stats/Cargo.toml2
-rw-r--r--stats/src/main.rs29
8 files changed, 198 insertions, 32 deletions
diff --git a/.gitignore b/.gitignore
index cee48f1..76a8f24 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,3 @@
 /target
-*.sqlite
+*.sqlite*
 *.db
\ No newline at end of file
diff --git a/Cargo.lock b/Cargo.lock
index f3bd8c4..b8d94f6 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -82,7 +82,7 @@ checksum = "ea618ded77af626818bde0f0802da7c20d47e38e23e37be40f6f807a76079e82"
 
 [[package]]
 name = "corgi"
-version = "1.0.1"
+version = "1.1.0"
 dependencies = [
  "base64",
  "confindent",
@@ -92,12 +92,13 @@ dependencies = [
  "regex-lite",
  "rusqlite",
  "sha2",
+ "time",
  "tokio",
 ]
 
 [[package]]
 name = "corgi-stats"
-version = "0.1.0"
+version = "1.1.0"
 dependencies = [
  "rusqlite",
  "time",
@@ -524,9 +525,9 @@ dependencies = [
 
 [[package]]
 name = "time"
-version = "0.3.40"
+version = "0.3.41"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9d9c75b47bdff86fa3334a3db91356b8d7d86a9b839dab7d0bdc5c3d3a077618"
+checksum = "8a7619e19bc266e0f9c5e6686659d394bc57973859340060a69221e57dbc0c40"
 dependencies = [
  "deranged",
  "itoa",
@@ -545,9 +546,9 @@ checksum = "c9e9a38711f559d9e3ce1cdb06dd7c5b8ea546bc90052da6d06bb76da74bb07c"
 
 [[package]]
 name = "time-macros"
-version = "0.2.21"
+version = "0.2.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "29aa485584182073ed57fd5004aa09c371f021325014694e432313345865fd04"
+checksum = "3526739392ec93fd8b359c8e98514cb3e8e021beb4e5f597b00a0221f8ed8a49"
 dependencies = [
  "num-conv",
  "time-core",
diff --git a/corgi/Cargo.toml b/corgi/Cargo.toml
index df2a8ef..b2e56ba 100644
--- a/corgi/Cargo.toml
+++ b/corgi/Cargo.toml
@@ -6,7 +6,7 @@ license = "ISC"
 repository = "https://git.nyble.dev/corgi/about"
 readme = "../README.md"
 
-version = "1.0.1"
+version = "1.1.0"
 edition = "2024"
 
 [dependencies]
@@ -14,8 +14,9 @@ base64 = "0.22.1"
 http-body-util = "0.1.3"
 hyper-util = { version = "0.1.10", features = ["tokio"] }
 regex-lite = "0.1.6"
-rusqlite = { version = "0.34.0", features = ["bundled"] }
+rusqlite = { version = "0.34.0", features = ["bundled", "time"] }
 sha2 = "0.10.8"
+time = { version = "0.3.41", features = ["formatting"] }
 
 [dependencies.confindent]
 version = "2.2.1"
diff --git a/corgi/changelog.md b/corgi/changelog.md
index 8967e3e..1011e46 100644
--- a/corgi/changelog.md
+++ b/corgi/changelog.md
@@ -1,3 +1,19 @@
+# v1.1.0
+
+this release makes changes to the sqlite database aimed to allow applications
+ingesting the data to run more efficiently. in doing this, corgi-stats received
+a 1000x uplift when querying against my production table with 11 million rows.
+
+corgi will automatically perform the database migrations necessary and fill in
+initial values for the new column/table using the existing data. If you have
+a lot of rows in the database, this might take a few seconds.
+
+- switch sqlite db to use write-ahead logging  
+- add a `request_count` column to the `agents` table  
+- create an `ephemeral_requests` table, managed by corgi, to store the last 24 hours requests separate from the main `requests` table  
+- create a trigger to insert a row in the `ephemeral_requests` table when `requests` receives and insert  
+- create a trigger to increment `request_count` in the `agents` table when a row is inserted on `requests`  
+
 # v1.0.1
 
 - have corgi set the stats database to WAL journal mode.
\ No newline at end of file
diff --git a/corgi/src/main.rs b/corgi/src/main.rs
index 1772d68..1192c4c 100644
--- a/corgi/src/main.rs
+++ b/corgi/src/main.rs
@@ -1,5 +1,10 @@
 use core::fmt;
-use std::{net::SocketAddr, pin::Pin, sync::Arc};
+use std::{
+	net::SocketAddr,
+	pin::Pin,
+	sync::Arc,
+	time::{Duration, Instant},
+};
 
 use caller::HttpRequest;
 use http_body_util::{BodyExt, Full};
@@ -40,11 +45,33 @@ async fn run(settings: Settings, stats: Stats) {
 		client_addr: addr,
 	};
 
+	let mut last_clean = None;
+
 	loop {
+		// Clean at the top so we do it once on boot, but keep out of the
+		// flow of the request to keep it speedy. This will delay accepting
+		// a new connection when the clean actually runs, but that is fine.
+		match last_clean {
+			None => {
+				let count = svc.stats.cleanup_ephemeral_requests();
+				println!("cleaned {count} requests from the ephemeral table");
+				last_clean = Some(Instant::now());
+			}
+			Some(inst) if inst.elapsed() >= Duration::from_secs(60 * 60) => {
+				let count = svc.stats.cleanup_ephemeral_requests();
+				println!("cleaned {count} requests from the ephemeral table");
+				last_clean = Some(Instant::now());
+			}
+			_ => (),
+		}
+
+		// Now we accept the connection and spawn a handler
 		let (stream, caddr) = listen.accept().await.unwrap();
 		let io = TokioIo::new(stream);
+
 		let mut svc_clone = svc.clone();
 		svc_clone.client_addr = caddr;
+
 		tokio::task::spawn(
 			async move { http1::Builder::new().serve_connection(io, svc_clone).await },
 		);
diff --git a/corgi/src/stats.rs b/corgi/src/stats.rs
index 23e85a0..c407cd9 100644
--- a/corgi/src/stats.rs
+++ b/corgi/src/stats.rs
@@ -3,6 +3,7 @@ use std::{net::IpAddr, path::Path, sync::Mutex};
 use base64::{Engine, prelude::BASE64_STANDARD_NO_PAD};
 use rusqlite::{Connection, OptionalExtension, params};
 use sha2::{Digest, Sha256};
+use time::{Duration, OffsetDateTime, PrimitiveDateTime};
 
 #[derive(Debug)]
 pub struct Stats {
@@ -21,8 +22,30 @@ impl Stats {
 
 		Self::set_wal(&conn);
 
-		conn.execute(CREATE_TABLE_AGENT, ()).unwrap();
-		conn.execute(CREATE_TABLE_REQUESTS, ()).unwrap();
+		// "agents" exists and trigger does not; we need to alter and prime
+		if Self::table_exists(&conn, "agents") && !Self::trigger_exists(&conn, "agent_count") {
+			println!("agents table exists, but needs request_count column. Altering and priming");
+			conn.execute(MIGRATE_AGENTS_ADD_REQUEST_COUNT, ()).unwrap();
+
+			Self::prime_agents_request_count(&conn);
+		} else {
+			conn.execute(CREATE_TABLE_AGENT, ()).unwrap();
+			conn.execute(CREATE_TABLE_REQUESTS, ()).unwrap();
+		}
+
+		conn.execute(CREATE_TRIGGER_COUNT_AGENT, ()).unwrap();
+
+		// Instead of just an IF NOT EXISTS here, we're checking it exists
+		// so we can copy an initial amount of requests from the main table
+		// to the ephemeral table.
+		if !Self::table_exists(&conn, "ephemeral_requests") {
+			println!("ephemeral_requests does not exist. Creating and priming");
+			conn.execute(CREATE_TRIGGER_EPHEMERAL, ()).unwrap();
+			conn.execute(CREATE_TABLE_EPHEMERAL_REQUESTS, ()).unwrap();
+
+			let count = Self::prime_ephemeral_table(&conn);
+			println!("Primed with {count} rows");
+		}
 	}
 
 	fn set_wal(conn: &Connection) {
@@ -38,6 +61,32 @@ impl Stats {
 		}
 	}
 
+	fn table_exists(conn: &Connection, name: &str) -> bool {
+		let exist: Option<String> = conn
+			.query_row(
+				"SELECT name FROM sqlite_schema WHERE type='table' AND name=?1;",
+				params![name],
+				|r| r.get(0),
+			)
+			.optional()
+			.unwrap();
+
+		exist.is_some()
+	}
+
+	fn trigger_exists(conn: &Connection, name: &str) -> bool {
+		let exist: Option<String> = conn
+			.query_row(
+				"SELECT name FROM sqlite_schema WHERE type='trigger' AND name=?1;",
+				params![name],
+				|r| r.get(0),
+			)
+			.optional()
+			.unwrap();
+
+		exist.is_some()
+	}
+
 	pub fn log_request(&self, request: Request) {
 		let Request {
 			agent,
@@ -83,6 +132,61 @@ impl Stats {
 		)
 		.unwrap();
 	}
+
+	/// Small, single line function to return the lower-bound date of ephemeral
+	/// requests.
+	fn ephemeral_lifetime() -> OffsetDateTime {
+		OffsetDateTime::now_utc() - Duration::days(1)
+	}
+
+	pub fn cleanup_ephemeral_requests(&self) -> usize {
+		let lower = Self::ephemeral_lifetime();
+
+		let sql = "DELETE FROM ephemeral_requests WHERE timestamp < ?1;";
+
+		let conn = self.conn.lock().unwrap();
+
+		match conn.execute(sql, params![lower]) {
+			Err(e) => {
+				eprintln!("ERROR failed to run ephemeral clean: {e}");
+				panic!();
+			}
+			Ok(count) => count,
+		}
+	}
+
+	fn prime_ephemeral_table(conn: &Connection) -> usize {
+		let lower = Self::ephemeral_lifetime();
+
+		let sql = "INSERT INTO ephemeral_requests SELECT id, timestamp FROM requests WHERE timestamp > ?1;";
+		match conn.execute(sql, params![lower]) {
+			Err(e) => {
+				eprintln!("ERROR failed to prime ephemeral: {e}");
+				panic!();
+			}
+			Ok(count) => count,
+		}
+	}
+
+	fn prime_agents_request_count(conn: &Connection) {
+		let sql = "SELECT  agent_id, count(id) as count FROM requests GROUP BY agent_id";
+		let mut prepared = conn.prepare(sql).unwrap();
+
+		let counts: Vec<(i64, i64)> = prepared
+			.query_map((), |row| Ok((row.get(0)?, row.get(1)?)))
+			.optional()
+			.unwrap()
+			.map(|iter| iter.map(|e| e.unwrap()).collect())
+			.unwrap();
+
+		for (agent, count) in counts {
+			conn.execute(
+				"UPDATE agents SET request_count = ?1 WHERE id = ?2;",
+				params![count, agent],
+			)
+			.unwrap();
+		}
+	}
 }
 
 pub struct Request<'r> {
@@ -96,9 +200,13 @@ const CREATE_TABLE_AGENT: &'static str = "\
 	CREATE TABLE IF NOT EXISTS agents(
 		id INTEGER PRIMARY KEY AUTOINCREMENT,
 		hash TEXT NOT NULL,
-		agent TEXT NOT NULL
+		agent TEXT NOT NULL,
+		request_count INTEGER NOT NULL DEFAULT 0
 	);";
 
+const MIGRATE_AGENTS_ADD_REQUEST_COUNT: &'static str =
+	"ALTER TABLE agents ADD COLUMN request_count INTEGER NOT NULL DEFAULT 0";
+
 const CREATE_TABLE_REQUESTS: &'static str = "\
 	CREATE TABLE IF NOT EXISTS requests(
 		id INTEGER PRIMARY KEY AUTOINCREMENT,
@@ -110,3 +218,23 @@ const CREATE_TABLE_REQUESTS: &'static str = "\
 		FOREIGN KEY (agent_id)
 			REFERENCES agents(id)
 	);";
+
+const CREATE_TRIGGER_EPHEMERAL: &'static str = "\
+	CREATE TRIGGER IF NOT EXISTS requests_copy_ephemeral AFTER INSERT ON requests
+	BEGIN
+		INSERT INTO ephemeral_requests(request_id, timestamp) VALUES(new.id, new.timestamp);
+	END;";
+
+const CREATE_TRIGGER_COUNT_AGENT: &'static str = "\
+	CREATE TRIGGER IF NOT EXISTS agent_count AFTER INSERT ON requests
+	BEGIN
+		UPDATE agents SET request_count = request_count + 1 WHERE agents.id = new.agent_id;
+	END;";
+
+const CREATE_TABLE_EPHEMERAL_REQUESTS: &'static str = "\
+	CREATE TABLE IF NOT EXISTS ephemeral_requests(
+		request_id INTEGER PRIMARY KEY AUTOINCREMENT,
+		timestamp TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
+		FOREIGN KEY (request_id)
+			REFERENCES requests(id)
+	);";
diff --git a/stats/Cargo.toml b/stats/Cargo.toml
index a012b94..c7e36b1 100644
--- a/stats/Cargo.toml
+++ b/stats/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "corgi-stats"
-version = "0.1.0"
+version = "1.1.0"
 edition = "2024"
 
 [dependencies]
diff --git a/stats/src/main.rs b/stats/src/main.rs
index 7e3f922..5b83bdd 100644
--- a/stats/src/main.rs
+++ b/stats/src/main.rs
@@ -3,18 +3,16 @@ use std::{io::Write, time::Instant};
 use rusqlite::{Connection, params};
 use time::{Duration, OffsetDateTime};
 
-// Thank you, cat, for optimizing my query
-const TOP_TEN_ALL_TIME: &str = "\
-	SELECT reqs.cnt, agents.agent 
-		FROM agents 
-		JOIN (
-			SELECT count(id) as cnt, agent_id 
-			FROM requests
-			GROUP BY agent_id
-		) reqs 
-		ON reqs.agent_id=agents.id
-		ORDER BY reqs.cnt DESC LIMIT 10;
-";
+const TOP_TEN_ALL_TIME: &str =
+	"SELECT request_count, agent FROM agents ORDER BY request_count DESC LIMIT 10;";
+
+const LAST_FIFTEEN_MINUTES: &str = "\
+	SELECT count(ephemeral_requests.request_id) as request_count, agents.agent
+	FROM ephemeral_requests
+	INNER JOIN requests ON ephemeral_requests.request_id = requests.id
+	INNER JOIN agents ON requests.agent_id = agents.id
+	WHERE ephemeral_requests.timestamp > ?1
+	GROUP BY requests.agent_id;";
 
 const STYLE: &'static str = include_str!("style.css");
 const FAVICON: &'static [u8] = include_bytes!("favicon.gif");
@@ -48,13 +46,8 @@ fn main() {
 	let now = OffsetDateTime::now_utc();
 	let fifteen_ago = now - Duration::minutes(15);
 
-	let query = "SELECT count(requests.id) AS request_count, agents.agent FROM requests \
-		INNER JOIN agents ON requests.agent_id = agents.id \
-		WHERE requests.timestamp > ?1 \
-		GROUP BY requests.agent_id;";
-
 	let start = Instant::now();
-	let mut prepared = db.prepare(query).unwrap();
+	let mut prepared = db.prepare(LAST_FIFTEEN_MINUTES).unwrap();
 	let mut agents: Vec<(usize, String)> = prepared
 		.query_map(params![fifteen_ago], |row| Ok((row.get(0)?, row.get(1)?)))
 		.unwrap()