about summary refs log tree commit diff
path: root/corgi/src
diff options
context:
space:
mode:
Diffstat (limited to 'corgi/src')
-rw-r--r--corgi/src/caller.rs365
-rw-r--r--corgi/src/main.rs278
-rw-r--r--corgi/src/settings.rs82
-rw-r--r--corgi/src/stats.rs158
-rw-r--r--corgi/src/util.rs66
5 files changed, 659 insertions, 290 deletions
diff --git a/corgi/src/caller.rs b/corgi/src/caller.rs
index a8b04c0..385177f 100644
--- a/corgi/src/caller.rs
+++ b/corgi/src/caller.rs
@@ -1,19 +1,79 @@
 use std::{
-	ffi::{self, CString},
-	io::Write,
 	net::IpAddr,
+	pin::Pin,
 	process::Stdio,
-	ptr,
-	str::FromStr,
+	sync::{
+		OnceLock,
+		mpsc::{Sender, TryRecvError},
+	},
+	task::Poll,
+	time::{Duration, Instant},
 };
 
+use futures_util::stream::StreamExt;
+use hyper::body::{Body, Bytes, Frame};
 use tokio::{
-	io::AsyncWriteExt,
-	process::Command,
-	sync::oneshot::{self, Receiver, Sender},
+	io::{AsyncRead, AsyncReadExt, AsyncWriteExt},
+	process::{Child, ChildStdout, Command},
 };
+use tokio_util::io::ReaderStream;
 
-use crate::{Script, ScriptKind};
+use crate::Script;
+
+static GRAVEYARD: OnceLock<Sender<GraveyardEvent>> = OnceLock::new();
+const GRAVEYARD_WAIT_MS: u128 = 1000;
+
+enum GraveyardEvent {
+	Kill { child: Child },
+}
+
+pub fn start_graveyard() {
+	let (tx, rx) = std::sync::mpsc::channel();
+	GRAVEYARD.set(tx).unwrap();
+
+	tokio::task::spawn(async move {
+		let mut children = vec![];
+
+		loop {
+			let start = Instant::now();
+
+			'child_collector: loop {
+				match rx.try_recv() {
+					Ok(GraveyardEvent::Kill { child }) => children.push(child),
+					Err(TryRecvError::Empty) => {
+						break 'child_collector;
+					}
+					Err(TryRecvError::Disconnected) => {
+						eprintln!("graveyard channel closed");
+						//return;
+					}
+				}
+			}
+
+			let start_len = children.len();
+			children.retain_mut(|child| match child.try_wait() {
+				Err(e) => {
+					eprintln!("[graveyard] error waiting on child. {e}");
+					let _ = child.start_kill();
+					false
+				}
+				Ok(None) => true,
+				Ok(Some(_code)) => false,
+			});
+			let end_len = children.len();
+			println!("[graveyard] reaped {} children", start_len - end_len);
+
+			let elapsed = start.elapsed();
+
+			tokio::time::sleep(Duration::from_millis(
+				GRAVEYARD_WAIT_MS
+					.saturating_sub(elapsed.as_millis())
+					.max(100) as u64,
+			))
+			.await;
+		}
+	});
+}
 
 pub struct HttpRequest {
 	pub content_type: String,
@@ -62,12 +122,6 @@ impl HttpRequest {
 }
 
 pub async fn call_and_parse_cgi(script: Script, http: HttpRequest) -> CgiResponse {
-	if script.kind != ScriptKind::Executable {
-		eprintln!("Somehow made it to executable path with module script");
-		eprintln!("Script: {}", script.name);
-		panic!("TODO: recover")
-	}
-
 	let mut cmd = Command::new(&script.filename);
 
 	// Set env specified in the conf. Be sure we do this after we
@@ -81,7 +135,7 @@ pub async fn call_and_parse_cgi(script: Script, http: HttpRequest) -> CgiRespons
 		});
 
 	let cmd = cmd.stdout(Stdio::piped()).stderr(Stdio::piped());
-	let output = if let Some(bytes) = http.body {
+	let child = if let Some(bytes) = http.body {
 		cmd.env("CONTENT_LENGTH", bytes.len().to_string());
 		let mut child = cmd.stdin(Stdio::piped()).spawn().unwrap();
 
@@ -95,162 +149,229 @@ pub async fn call_and_parse_cgi(script: Script, http: HttpRequest) -> CgiRespons
 		cmd_stdin.flush().await.unwrap();
 		drop(cmd_stdin);
 
-		child.wait_with_output().await.unwrap()
+		child
 	} else {
-		cmd.spawn().unwrap().wait_with_output().await.unwrap()
+		cmd.spawn().unwrap()
 	};
 
-	parse_cgi_response(&output.stdout)
+	parse_cgi_response(child).await
 }
 
-fn parse_cgi_response(stdout: &[u8]) -> CgiResponse {
-	let mut response = CgiResponse {
-		// Default status code is 200 per RFC
-		status: 200,
-		headers: vec![],
-		body: None,
-	};
+async fn parse_cgi_response(mut child: Child) -> CgiResponse {
+	let mut status = 200;
+	let mut headers = vec![];
+
+	let mut stdout = child.stdout.take().unwrap();
+	let mut weird = WeirdBuffer::new();
 
-	let mut curr = stdout;
 	loop {
-		// Find the newline to know where this header ends
-		let nl = curr.iter().position(|b| *b == b'\n').expect("no nl in header");
-		let line = &curr[..nl];
+		let newline_position = match weird.data().iter().position(|b| *b == b'\n') {
+			None => {
+				weird.reclaim();
 
-		// Find the colon to separate the key from the value
-		let colon = line.iter().position(|b| *b == b':').expect("no colon in header");
-		let key = &line[..colon];
-		let mut value = &line[colon + 1..];
+				let count = stdout.read(weird.free_mut()).await.unwrap();
+				if count == 0 {
+					panic!("data malformed. body not found")
+				}
 
+				weird.consume_free(count);
+				continue;
+			}
+			Some(nl) => nl,
+		};
+
+		// take_data consuming from the end of garbage is weird, but if we have
+		// the function also run consume_data for use, we need a mut reference
+		// which will block us from taking out immutable references later, like
+		// when forming the body
+		weird.consume_data(newline_position + 1);
+		let line = &weird.take_data(newline_position + 1)[..newline_position];
+
+		// zero-length line, we've hit the body!
+		if newline_position == 0 || (newline_position == 1 && line[0] == b'\r') {
+			let buffer = if weird.data_len() > 0 {
+				Some(weird.data().to_vec())
+			} else {
+				None
+			};
+
+			let body = StreamedBody {
+				buffer,
+				stream: ReaderStream::new(stdout),
+			};
+
+			GRAVEYARD
+				.get()
+				.unwrap()
+				.send(GraveyardEvent::Kill { child })
+				.unwrap();
+
+			return CgiResponse {
+				status,
+				headers,
+				body,
+			};
+		}
+
+		let colon_position = match line.iter().position(|b| *b == b':') {
+			None => {
+				panic!("malformed header: no colon in header")
+			}
+			Some(cpos) => cpos,
+		};
+
+		let key = &line[..colon_position];
+		let mut value = &line[colon_position + 1..];
 		if value[0] == b' ' {
 			value = &value[1..];
 		}
-		if value[value.len().saturating_sub(1)] == b'\r' {
-			value = &value[..value.len().saturating_sub(1)];
+		if value[value.len() - 1] == b'\r' {
+			value = &value[..value.len() - 1];
 		}
 
-		response.headers.push((key.to_vec(), value.to_vec()));
+		headers.push((key.to_vec(), value.to_vec()));
 
-		// Is this header a status line?
+		// Look for and extract status line
 		let key_string = String::from_utf8_lossy(key);
 		if key_string == "Status" {
 			let value_string = String::from_utf8_lossy(value);
 			if let Some((raw_code, _raw_msg)) = value_string.trim().split_once(' ') {
 				let code: u16 = raw_code.parse().unwrap();
-				response.status = code;
+				status = code;
 			}
 		}
-
-		// Body next?
-		let next_nl = curr[nl + 1] == b'\n';
-		let next_crlf = curr[nl + 1] == b'\r' && curr[nl + 2] == b'\n';
-		if next_nl || next_crlf {
-			let offset = if next_nl { 2 } else { 3 };
-			let body = &curr[nl + offset..];
-			if body.len() > 0 {
-				response.body = Some(body.to_vec());
-			}
-
-			return response;
-		}
-
-		// Move past the newline
-		curr = &curr[nl + 1..];
 	}
 }
 
-#[derive(Debug)]
 pub struct CgiResponse {
 	/// The Status header of the CGI response
 	pub status: u16,
 	/// Headers except "Status"
 	pub headers: Vec<(Vec<u8>, Vec<u8>)>,
 	/// CGI response body
-	pub body: Option<Vec<u8>>,
+	pub body: StreamedBody<ChildStdout>,
+}
+
+pub type StreamedChild = StreamedBody<ChildStdout>;
+
+pub struct StreamedBody<R: AsyncRead + Unpin> {
+	buffer: Option<Vec<u8>>,
+	stream: ReaderStream<R>,
 }
 
-#[repr(C)]
-struct ModuleRequest {
-	headers_len: ffi::c_ulong,
-	headers: *const [[*const ffi::c_char; 2]],
-	body_len: ffi::c_ulong,
-	body: *const u8,
+impl<R: AsyncRead + Unpin> Body for StreamedBody<R> {
+	type Data = Bytes;
+	type Error = std::io::Error;
+
+	fn poll_frame(
+		self: Pin<&mut Self>,
+		cx: &mut std::task::Context<'_>,
+	) -> std::task::Poll<Option<Result<hyper::body::Frame<Self::Data>, Self::Error>>> {
+		let this = self.get_mut();
+
+		if let Some(buffer) = this.buffer.take() {
+			Poll::Ready(Some(Ok(Frame::data(Bytes::from(buffer)))))
+		} else {
+			match this.stream.poll_next_unpin(cx) {
+				Poll::Ready(Some(Err(e))) => Poll::Ready(Some(Err(e))),
+				Poll::Ready(Some(Ok(by))) => Poll::Ready(Some(Ok(Frame::data(by)))),
+				Poll::Ready(None) => Poll::Ready(None),
+				Poll::Pending => Poll::Pending,
+			}
+		}
+	}
 }
 
-#[repr(C)]
-struct ModuleResponse {
-	status: ffi::c_ushort,
-	headers_len: ffi::c_ulong,
-	headers: &'static [[*const ffi::c_char; 2]],
-	body_len: ffi::c_ulong,
-	body: *const u8,
+struct WeirdBuffer {
+	buffer: [u8; 8096],
+	data_start: usize,
+	free_start: usize,
 }
 
-type HandleFn = unsafe extern "C" fn(*const ModuleRequest) -> *const ModuleResponse;
-type CleanupFn = unsafe extern "C" fn(*const ModuleResponse);
+impl WeirdBuffer {
+	pub fn new() -> Self {
+		Self {
+			buffer: [0; 8096],
+			data_start: 0,
+			free_start: 0,
+		}
+	}
 
-pub async fn call_and_parse_module(script: Script, req: HttpRequest) -> CgiResponse {
-	let (tx, rx) = oneshot::channel();
-	std::thread::spawn(move || unsafe { module_thread(script, req, tx) });
+	pub fn data_len(&self) -> usize {
+		self.free_start - self.data_start
+	}
 
-	rx.await.unwrap()
-}
+	pub fn data(&self) -> &[u8] {
+		&self.buffer[self.data_start..self.free_start]
+	}
 
-unsafe fn module_thread(script: Script, req: HttpRequest, tx: Sender<CgiResponse>) {
-	let env: Vec<(String, String)> = req
-		.build_kv()
-		.into_iter()
-		.chain(req.http_headers.into_iter())
-		.chain(script.env.into_iter())
-		.collect();
-
-	let mut headers_owned = vec![];
-	for (k, v) in env {
-		headers_owned.push([
-			CString::from_str(k.as_str()).unwrap(),
-			CString::from_str(v.as_str()).unwrap(),
-		]);
+	pub fn data_mut(&mut self) -> &mut [u8] {
+		&mut self.buffer[self.data_start..self.free_start]
 	}
 
-	let headers: Vec<[*const ffi::c_char; 2]> =
-		headers_owned.iter().map(|kvarr| [kvarr[0].as_ptr(), kvarr[1].as_ptr()]).collect();
+	pub fn take_data(&self, count: usize) -> &[u8] {
+		let data = &self.buffer[self.data_start - count..self.data_start];
+		data
+	}
 
-	let modreq = ModuleRequest {
-		headers_len: headers.len() as u64,
-		headers: &headers[..] as *const [[*const ffi::c_char; 2]],
-		body_len: req.body.as_ref().map(|v| v.len()).unwrap_or(0) as u64,
-		body: req.body.as_ref().map(|v| v.as_ptr()).unwrap_or(ptr::null()),
-	};
+	pub fn consume_data(&mut self, count: usize) {
+		if self.data_start + count > self.free_start {
+			panic!(
+				"tried to consume more data than there was. tried to consume {} from data section of len {}",
+				count,
+				self.data_len()
+			)
+		}
 
-	let mut cgi = CgiResponse {
-		status: 200,
-		headers: vec![],
-		body: None,
-	};
+		self.data_start += count
+	}
 
-	unsafe {
-		let lib = libloading::Library::new(script.filename).unwrap();
-		let handle: libloading::Symbol<HandleFn> = lib.get(b"cgi_handle").unwrap();
-		let free: libloading::Symbol<CleanupFn> = lib.get(b"cgi_cleanup").unwrap();
+	pub fn consume_free(&mut self, count: usize) {
+		if self.free_start + count > self.buffer.len() {
+			panic!(
+				"tried to consume more free space than there was. tried to consume {} from free section of {}",
+				count,
+				self.free_len()
+			)
+		}
+
+		self.free_start += count;
+	}
+
+	pub fn garbage_len(&self) -> usize {
+		self.data_start
+	}
+
+	pub fn garbage(&self) -> &[u8] {
+		&self.buffer[..self.data_start]
+	}
 
-		let response = handle((&modreq) as *const ModuleRequest);
-		let response_ref = response.as_ref().unwrap();
+	pub fn reclaim(&mut self) {
+		let src = self.buffer[self.data_start..self.free_start].as_ptr();
+		let len = self.data_len();
+		let dst = self.buffer[..len].as_mut_ptr();
 
-		for idx in 0..response_ref.headers_len {
-			let kvarr = response_ref.headers[idx as usize];
-			let k = ffi::CStr::from_ptr(kvarr[0]).to_string_lossy();
-			let v = ffi::CStr::from_ptr(kvarr[1]).to_string_lossy();
-			cgi.headers.push((k.as_bytes().to_vec(), v.as_bytes().to_vec()));
+		if self.data_start == 0 {
+			return;
 		}
 
-		let maybe_body: Option<Vec<u8>> = response_ref
-			.body
-			.as_ref()
-			.map(|b| std::slice::from_raw_parts(b, response_ref.body_len as usize).to_vec());
-		cgi.body = maybe_body;
+		unsafe {
+			std::ptr::copy(src, dst, self.data_len());
+		}
 
-		free(response);
-	};
+		self.data_start = 0;
+		self.free_start = len;
+	}
+
+	pub fn free_len(&self) -> usize {
+		self.buffer.len() - self.free_start
+	}
 
-	tx.send(cgi).unwrap()
+	pub fn free(&self) -> &[u8] {
+		&self.buffer[self.free_start..]
+	}
+
+	pub fn free_mut(&mut self) -> &mut [u8] {
+		&mut self.buffer[self.free_start..]
+	}
 }
diff --git a/corgi/src/main.rs b/corgi/src/main.rs
index 6a3c528..bb44f05 100644
--- a/corgi/src/main.rs
+++ b/corgi/src/main.rs
@@ -1,119 +1,41 @@
+use core::fmt;
 use std::{
-	net::{IpAddr, SocketAddr},
-	path::PathBuf,
+	net::SocketAddr,
 	pin::Pin,
-	process::Stdio,
 	sync::Arc,
-	time::Instant,
+	time::{Duration, Instant},
 };
 
 use caller::HttpRequest;
-use confindent::{Confindent, Value, ValueParseError};
-use http_body_util::{BodyExt, Full};
+use http_body_util::{BodyExt, Either, Full};
 use hyper::{
 	HeaderMap, Request, Response, StatusCode,
 	body::{Bytes, Incoming},
-	header::HeaderValue,
 	server::conn::http1,
 	service::Service,
 };
 use hyper_util::rt::TokioIo;
-use regex_lite::Regex;
+use settings::{Script, Settings};
 use stats::Stats;
-use tokio::{io::AsyncWriteExt, net::TcpListener, process::Command, runtime::Runtime};
+use tokio::{net::TcpListener, runtime::Runtime};
+use util::owned_header;
+
+use crate::caller::StreamedChild;
 
 mod caller;
+mod settings;
 mod stats;
-
-#[derive(Clone, Debug)]
-pub struct Settings {
-	port: u16,
-	scripts: Vec<Script>,
-}
-
-#[derive(Clone, Debug, PartialEq)]
-pub enum ScriptKind {
-	Executable,
-	Object,
-}
-
-#[derive(Clone, Debug)]
-pub struct Script {
-	name: String,
-	kind: ScriptKind,
-	regex: Option<Regex>,
-	filename: String,
-	env: Vec<(String, String)>,
-}
-
-const CONF_DEFAULT: &str = "/etc/corgi.conf";
+mod util;
 
 fn main() {
-	let conf_path = std::env::args().nth(1).unwrap_or(String::from(CONF_DEFAULT));
-	let conf = Confindent::from_file(conf_path).expect("failed to open conf");
-
-	let mut settings = Settings {
-		port: 26744,
-		scripts: conf.children("Script").into_iter().map(parse_script_conf).collect(),
-	};
-
-	if let Some(server) = conf.child("Server") {
-		match server.child_parse("Port") {
-			Err(ValueParseError::NoValue) => (),
-			Err(err) => {
-				eprintln!("Server.Port is malformed: {err}");
-				std::process::exit(1);
-			}
-			Ok(port) => settings.port = port,
-		}
-	}
-
-	let stats = Stats::new(PathBuf::from(
-		conf.get("Server/StatsDb").unwrap().to_owned(),
-	));
+	let settings = Settings::get();
+	let stats = Stats::new(&settings.stats_path);
 	stats.create_tables();
 
 	let rt = Runtime::new().unwrap();
 	rt.block_on(async { run(settings, stats).await });
 }
 
-fn parse_script_conf(conf: &Value) -> Script {
-	let name = conf.value_owned().expect("Missing value for 'Script' key");
-	let filename = conf.child_owned("Path").expect("Missing 'Path' key");
-	let environment = conf.child("Environment");
-	let env = environment
-		.map(|e| e.values().map(|v| (v.key_owned(), v.value_owned().unwrap())).collect());
-
-	let regex = match conf.get("Match/Regex") {
-		None => None,
-		Some(restr) => match Regex::new(restr) {
-			Err(err) => {
-				eprintln!("Failed to compile regex: {restr}\nerror: {err}");
-				std::process::exit(1);
-			}
-			Ok(re) => Some(re),
-		},
-	};
-
-	let kind = match conf.get("Type") {
-		None => ScriptKind::Executable,
-		Some("executable") => ScriptKind::Executable,
-		Some("object") => ScriptKind::Object,
-		Some(kind) => {
-			eprintln!("'{kind}' is not a valid script type");
-			std::process::exit(1)
-		}
-	};
-
-	Script {
-		name,
-		kind,
-		regex,
-		filename,
-		env: env.unwrap_or_default(),
-	}
-}
-
 // We have tokio::main at home :)
 async fn run(settings: Settings, stats: Stats) {
 	let addr = SocketAddr::from(([0, 0, 0, 0], settings.port));
@@ -125,11 +47,36 @@ async fn run(settings: Settings, stats: Stats) {
 		client_addr: addr,
 	};
 
+	let mut last_clean = None;
+
+	caller::start_graveyard();
+	println!("started graveyard!");
+
 	loop {
+		// Clean at the top so we do it once on boot, but keep out of the
+		// flow of the request to keep it speedy. This will delay accepting
+		// a new connection when the clean actually runs, but that is fine.
+		match last_clean {
+			None => {
+				let count = svc.stats.cleanup_ephemeral_requests();
+				println!("cleaned {count} requests from the ephemeral table");
+				last_clean = Some(Instant::now());
+			}
+			Some(inst) if inst.elapsed() >= Duration::from_secs(60 * 60) => {
+				let count = svc.stats.cleanup_ephemeral_requests();
+				println!("cleaned {count} requests from the ephemeral table");
+				last_clean = Some(Instant::now());
+			}
+			_ => (),
+		}
+
+		// Now we accept the connection and spawn a handler
 		let (stream, caddr) = listen.accept().await.unwrap();
 		let io = TokioIo::new(stream);
+
 		let mut svc_clone = svc.clone();
 		svc_clone.client_addr = caddr;
+
 		tokio::task::spawn(
 			async move { http1::Builder::new().serve_connection(io, svc_clone).await },
 		);
@@ -144,7 +91,7 @@ struct Svc {
 }
 
 impl Service<Request<Incoming>> for Svc {
-	type Response = Response<Full<Bytes>>;
+	type Response = Response<Either<Full<Bytes>, StreamedChild>>;
 	type Error = hyper::Error;
 	type Future = Pin<Box<dyn Future<Output = Result<Self::Response, Self::Error>> + Send>>;
 
@@ -163,62 +110,49 @@ impl Svc {
 		stats: Arc<Stats>,
 		caddr: SocketAddr,
 		req: Request<Incoming>,
-	) -> Response<Full<Bytes>> {
-		let start = Instant::now();
+	) -> Response<Either<Full<Bytes>, StreamedChild>> {
+		match Self::handle_fallible(settings, stats, caddr, req).await {
+			Err(re) => re.into_response(),
+			Ok(response) => response,
+		}
+	}
 
+	async fn handle_fallible(
+		settings: Settings,
+		stats: Arc<Stats>,
+		caddr: SocketAddr,
+		req: Request<Incoming>,
+	) -> Result<Response<Either<Full<Bytes>, StreamedChild>>, RuntimeError> {
 		// Collect things we need from the request before we eat it's body
 		let method = req.method().as_str().to_ascii_uppercase();
 		let version = req.version();
-		let path = req.uri().path().to_owned();
-		let query = req.uri().query().unwrap_or_default().to_owned();
-		let headers = req.headers().clone();
-
-		let body = req.into_body().collect().await.unwrap().to_bytes().to_vec();
-		let content_length = body.len();
-
-		let mut maybe_script = None;
-		for set_script in settings.scripts {
-			if let Some(regex) = set_script.regex.as_ref() {
-				if regex.is_match(&path) {
-					maybe_script = Some(set_script);
-					break;
-				}
-			} else {
-				maybe_script = Some(set_script);
-				break;
-			}
-		}
-
-		let script = match maybe_script {
-			Some(script) => script,
-			None => {
-				eprintln!("path didn't match any script");
-				panic!("TODO recover?");
-			}
-		};
+		let path = util::url_decode(req.uri().path(), false)?;
+		let query = req
+			.uri()
+			.query()
+			.map(|s| util::url_decode(s, false))
+			.transpose()?
+			.unwrap_or_default();
 
-		let content_type = headers
-			.get("content-type")
-			.map(|s| s.to_str().ok())
-			.flatten()
-			.unwrap_or_default()
-			.to_owned();
+		let script = Self::select_script(&settings, &path).ok_or(RuntimeError::NoScript)?;
 
-		let uagent = headers
-			.get("user-agent")
-			.map(|s| s.to_str().ok())
-			.flatten()
-			.unwrap_or_default()
-			.to_owned();
+		// Clone the headers and extract what we need
+		let headers = req.headers().clone();
+		let content_type = owned_header(headers.get("content-type")).unwrap_or_default();
+		let uagent = owned_header(headers.get("user-agent")).unwrap_or_default();
 
 		// Find the client address
 		let client_addr = {
-			let x_forward = Self::parse_addr_from_header(headers.get("x-forwarded-for"));
-			let forward = Self::parse_addr_from_header(headers.get("forwarded-for"));
+			let x_forward = util::parse_from_header(headers.get("x-forwarded-for"));
+			let forward = util::parse_from_header(headers.get("forwarded-for"));
 
 			forward.unwrap_or(x_forward.unwrap_or(caddr.ip()))
 		};
 
+		// Finally, get the body which consumes the request
+		let body = req.into_body().collect().await.unwrap().to_bytes().to_vec();
+		let content_length = body.len();
+
 		let server_name = headers
 			.get("Host")
 			.expect("no http host header set")
@@ -239,16 +173,9 @@ impl Svc {
 			body: if content_length > 0 { Some(body) } else { None },
 		};
 
-		let start_cgi = Instant::now();
-		let cgi_response = match script.kind {
-			ScriptKind::Executable => {
-				caller::call_and_parse_cgi(script.clone(), http_request).await
-			}
-			ScriptKind::Object => caller::call_and_parse_module(script.clone(), http_request).await,
-		};
-		let cgi_time = start_cgi.elapsed();
-
+		let cgi_response = caller::call_and_parse_cgi(script.clone(), http_request).await;
 		let status = StatusCode::from_u16(cgi_response.status).unwrap();
+
 		let mut response = Response::builder().status(status);
 
 		for (key, value) in cgi_response.headers {
@@ -263,20 +190,28 @@ impl Svc {
 		};
 
 		println!(
-			"served to [{client_addr}]\n\tscript: {}\n\tpath: {path}\n\tcgi took {}ms. total time {}ms\n\tUA: {uagent}",
-			&script.name,
-			cgi_time.as_millis(),
-			start.elapsed().as_millis()
+			"served to [{client_addr}]\n\tscript: {}\n\tpath: {path}\n\tUA: {uagent}",
+			&script.name
 		);
 
 		stats.log_request(db_req);
 
-		let response_body = cgi_response.body.map(|v| Bytes::from(v)).unwrap_or(Bytes::new());
-		response.body(Full::new(response_body)).unwrap()
+		let response_body = cgi_response.body;
+		Ok(response.body(Either::Right(response_body)).unwrap())
 	}
 
-	fn parse_addr_from_header(maybe_hval: Option<&HeaderValue>) -> Option<IpAddr> {
-		maybe_hval.map(|h| h.to_str().ok()).flatten().map(|s| s.parse().ok()).flatten()
+	fn select_script<'s>(settings: &'s Settings, path: &str) -> Option<&'s Script> {
+		for script in &settings.scripts {
+			if let Some(regex) = script.regex.as_ref() {
+				if regex.is_match(path) {
+					return Some(script);
+				}
+			} else {
+				return Some(script);
+			}
+		}
+
+		None
 	}
 
 	fn build_http_vec(headers: HeaderMap) -> Vec<(String, String)> {
@@ -312,13 +247,38 @@ impl Svc {
 	}
 }
 
-fn path_to_name(path: &str) -> String {
-	let mut ret = String::with_capacity(path.len());
-	for ch in path.chars() {
-		match ch {
-			'/' => ret.push('-'),
-			ch => ret.push(ch),
+fn status_page<D: fmt::Display>(
+	status: u16,
+	msg: D,
+) -> Response<Either<Full<Bytes>, StreamedChild>> {
+	let body_str = format!(
+		"<html>\n\
+			\t<head><title>{status}</title></head>\n\
+			\t<body style='width: 20rem; padding: 0px; margin: 2rem;'>\n\
+			\t\t<h1>{status}</h1>\n\
+			\t\t<hr/>\n\
+			\t\t<p>{msg}</p>\n\
+			\t</body>\n\
+		</html>"
+	);
+
+	Response::builder()
+		.status(status)
+		.header("Content-Type", "text/html")
+		.body(Either::Left(Full::new(body_str.into())))
+		.unwrap()
+}
+
+enum RuntimeError {
+	MalformedRequest,
+	NoScript,
+}
+
+impl RuntimeError {
+	pub fn into_response(&self) -> Response<Either<Full<Bytes>, StreamedChild>> {
+		match self {
+			Self::MalformedRequest => status_page(400, "bad request"),
+			Self::NoScript => status_page(404, "failed to route request"),
 		}
 	}
-	ret
 }
diff --git a/corgi/src/settings.rs b/corgi/src/settings.rs
new file mode 100644
index 0000000..ee701b0
--- /dev/null
+++ b/corgi/src/settings.rs
@@ -0,0 +1,82 @@
+use std::path::PathBuf;
+
+use confindent::{Confindent, Value, ValueParseError};
+use regex_lite::Regex;
+
+const CONF_DEFAULT: &str = "/etc/corgi.conf";
+
+#[derive(Clone, Debug)]
+pub struct Script {
+	pub name: String,
+	pub regex: Option<Regex>,
+	pub filename: String,
+	pub env: Vec<(String, String)>,
+}
+
+#[derive(Clone, Debug)]
+pub struct Settings {
+	pub port: u16,
+	pub scripts: Vec<Script>,
+	pub stats_path: PathBuf,
+}
+
+impl Settings {
+	pub fn get() -> Self {
+		let conf_path = std::env::args()
+			.nth(1)
+			.unwrap_or(String::from(CONF_DEFAULT));
+		let conf = Confindent::from_file(conf_path).expect("failed to open conf");
+
+		let mut settings = Settings {
+			port: 26744,
+			scripts: conf
+				.children("Script")
+				.into_iter()
+				.map(parse_script_conf)
+				.collect(),
+			stats_path: conf.get_parse("Server/StatsDb").unwrap(),
+		};
+
+		if let Some(server) = conf.child("Server") {
+			match server.child_parse("Port") {
+				Err(ValueParseError::NoValue) => (),
+				Err(err) => {
+					eprintln!("Server.Port is malformed: {err}");
+					std::process::exit(1);
+				}
+				Ok(port) => settings.port = port,
+			}
+		}
+
+		settings
+	}
+}
+
+fn parse_script_conf(conf: &Value) -> Script {
+	let name = conf.value_owned().expect("Missing value for 'Script' key");
+	let filename = conf.child_owned("Path").expect("Missing 'Path' key");
+	let environment = conf.child("Environment");
+	let env = environment.map(|e| {
+		e.values()
+			.map(|v| (v.key_owned(), v.value_owned().unwrap()))
+			.collect()
+	});
+
+	let regex = match conf.get("Match/Regex") {
+		None => None,
+		Some(restr) => match Regex::new(restr) {
+			Err(err) => {
+				eprintln!("Failed to compile regex: {restr}\nerror: {err}");
+				std::process::exit(1);
+			}
+			Ok(re) => Some(re),
+		},
+	};
+
+	Script {
+		name,
+		regex,
+		filename,
+		env: env.unwrap_or_default(),
+	}
+}
diff --git a/corgi/src/stats.rs b/corgi/src/stats.rs
index 0e3b99a..9e9d15c 100644
--- a/corgi/src/stats.rs
+++ b/corgi/src/stats.rs
@@ -1,12 +1,9 @@
-use std::{
-	net::{IpAddr, SocketAddr},
-	path::PathBuf,
-	sync::Mutex,
-};
+use std::{net::IpAddr, path::Path, sync::Mutex};
 
 use base64::{Engine, prelude::BASE64_STANDARD_NO_PAD};
 use rusqlite::{Connection, OptionalExtension, params};
 use sha2::{Digest, Sha256};
+use time::{Duration, OffsetDateTime};
 
 #[derive(Debug)]
 pub struct Stats {
@@ -14,7 +11,7 @@ pub struct Stats {
 }
 
 impl Stats {
-	pub fn new(db_path: PathBuf) -> Self {
+	pub fn new(db_path: &Path) -> Self {
 		Self {
 			conn: Mutex::new(Connection::open(db_path).unwrap()),
 		}
@@ -22,8 +19,72 @@ impl Stats {
 
 	pub fn create_tables(&self) {
 		let conn = self.conn.lock().unwrap();
-		conn.execute(CREATE_TABLE_AGENT, ()).unwrap();
-		conn.execute(CREATE_TABLE_REQUESTS, ()).unwrap();
+
+		Self::set_wal(&conn);
+
+		// "agents" exists and trigger does not; we need to alter and prime
+		if Self::table_exists(&conn, "agents") && !Self::trigger_exists(&conn, "agent_count") {
+			println!("agents table exists, but needs request_count column. Altering and priming");
+			conn.execute(MIGRATE_AGENTS_ADD_REQUEST_COUNT, ()).unwrap();
+
+			Self::prime_agents_request_count(&conn);
+		} else {
+			conn.execute(CREATE_TABLE_AGENT, ()).unwrap();
+			conn.execute(CREATE_TABLE_REQUESTS, ()).unwrap();
+		}
+
+		conn.execute(CREATE_TRIGGER_COUNT_AGENT, ()).unwrap();
+
+		// Instead of just an IF NOT EXISTS here, we're checking it exists
+		// so we can copy an initial amount of requests from the main table
+		// to the ephemeral table.
+		if !Self::table_exists(&conn, "ephemeral_requests") {
+			println!("ephemeral_requests does not exist. Creating and priming");
+			conn.execute(CREATE_TRIGGER_EPHEMERAL, ()).unwrap();
+			conn.execute(CREATE_TABLE_EPHEMERAL_REQUESTS, ()).unwrap();
+
+			let count = Self::prime_ephemeral_table(&conn);
+			println!("Primed with {count} rows");
+		}
+	}
+
+	fn set_wal(conn: &Connection) {
+		let journal_mode: String = conn
+			.pragma_update_and_check(None, "journal_mode", "WAL", |row| row.get(0))
+			.unwrap();
+
+		match journal_mode.to_ascii_lowercase().as_str() {
+			"wal" => (),
+			_ => {
+				eprintln!("WARN sqlitedb did not successfully enter the WAL journal mode");
+			}
+		}
+	}
+
+	fn table_exists(conn: &Connection, name: &str) -> bool {
+		let exist: Option<String> = conn
+			.query_row(
+				"SELECT name FROM sqlite_schema WHERE type='table' AND name=?1;",
+				params![name],
+				|r| r.get(0),
+			)
+			.optional()
+			.unwrap();
+
+		exist.is_some()
+	}
+
+	fn trigger_exists(conn: &Connection, name: &str) -> bool {
+		let exist: Option<String> = conn
+			.query_row(
+				"SELECT name FROM sqlite_schema WHERE type='trigger' AND name=?1;",
+				params![name],
+				|r| r.get(0),
+			)
+			.optional()
+			.unwrap();
+
+		exist.is_some()
 	}
 
 	pub fn log_request(&self, request: Request) {
@@ -71,6 +132,61 @@ impl Stats {
 		)
 		.unwrap();
 	}
+
+	/// Small, single line function to return the lower-bound date of ephemeral
+	/// requests.
+	fn ephemeral_lifetime() -> OffsetDateTime {
+		OffsetDateTime::now_utc() - Duration::days(1)
+	}
+
+	pub fn cleanup_ephemeral_requests(&self) -> usize {
+		let lower = Self::ephemeral_lifetime();
+
+		let sql = "DELETE FROM ephemeral_requests WHERE timestamp < ?1;";
+
+		let conn = self.conn.lock().unwrap();
+
+		match conn.execute(sql, params![lower]) {
+			Err(e) => {
+				eprintln!("ERROR failed to run ephemeral clean: {e}");
+				panic!();
+			}
+			Ok(count) => count,
+		}
+	}
+
+	fn prime_ephemeral_table(conn: &Connection) -> usize {
+		let lower = Self::ephemeral_lifetime();
+
+		let sql = "INSERT INTO ephemeral_requests SELECT id, timestamp FROM requests WHERE timestamp > ?1;";
+		match conn.execute(sql, params![lower]) {
+			Err(e) => {
+				eprintln!("ERROR failed to prime ephemeral: {e}");
+				panic!();
+			}
+			Ok(count) => count,
+		}
+	}
+
+	fn prime_agents_request_count(conn: &Connection) {
+		let sql = "SELECT  agent_id, count(id) as count FROM requests GROUP BY agent_id";
+		let mut prepared = conn.prepare(sql).unwrap();
+
+		let counts: Vec<(i64, i64)> = prepared
+			.query_map((), |row| Ok((row.get(0)?, row.get(1)?)))
+			.optional()
+			.unwrap()
+			.map(|iter| iter.map(|e| e.unwrap()).collect())
+			.unwrap();
+
+		for (agent, count) in counts {
+			conn.execute(
+				"UPDATE agents SET request_count = ?1 WHERE id = ?2;",
+				params![count, agent],
+			)
+			.unwrap();
+		}
+	}
 }
 
 pub struct Request<'r> {
@@ -84,9 +200,13 @@ const CREATE_TABLE_AGENT: &'static str = "\
 	CREATE TABLE IF NOT EXISTS agents(
 		id INTEGER PRIMARY KEY AUTOINCREMENT,
 		hash TEXT NOT NULL,
-		agent TEXT NOT NULL
+		agent TEXT NOT NULL,
+		request_count INTEGER NOT NULL DEFAULT 0
 	);";
 
+const MIGRATE_AGENTS_ADD_REQUEST_COUNT: &'static str =
+	"ALTER TABLE agents ADD COLUMN request_count INTEGER NOT NULL DEFAULT 0";
+
 const CREATE_TABLE_REQUESTS: &'static str = "\
 	CREATE TABLE IF NOT EXISTS requests(
 		id INTEGER PRIMARY KEY AUTOINCREMENT,
@@ -98,3 +218,23 @@ const CREATE_TABLE_REQUESTS: &'static str = "\
 		FOREIGN KEY (agent_id)
 			REFERENCES agents(id)
 	);";
+
+const CREATE_TRIGGER_EPHEMERAL: &'static str = "\
+	CREATE TRIGGER IF NOT EXISTS requests_copy_ephemeral AFTER INSERT ON requests
+	BEGIN
+		INSERT INTO ephemeral_requests(request_id, timestamp) VALUES(new.id, new.timestamp);
+	END;";
+
+const CREATE_TRIGGER_COUNT_AGENT: &'static str = "\
+	CREATE TRIGGER IF NOT EXISTS agent_count AFTER INSERT ON requests
+	BEGIN
+		UPDATE agents SET request_count = request_count + 1 WHERE agents.id = new.agent_id;
+	END;";
+
+const CREATE_TABLE_EPHEMERAL_REQUESTS: &'static str = "\
+	CREATE TABLE IF NOT EXISTS ephemeral_requests(
+		request_id INTEGER PRIMARY KEY AUTOINCREMENT,
+		timestamp TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
+		FOREIGN KEY (request_id)
+			REFERENCES requests(id)
+	);";
diff --git a/corgi/src/util.rs b/corgi/src/util.rs
new file mode 100644
index 0000000..727c8c7
--- /dev/null
+++ b/corgi/src/util.rs
@@ -0,0 +1,66 @@
+use std::str::FromStr;
+
+use hyper::header::HeaderValue;
+
+use crate::RuntimeError;
+
+// Ripped and modified from gennyble/mavourings query.rs
+/// Decode a URL encoded string, optionally treating a plus, '+', as a space. If
+/// the final string is not UTF8, RuntimeError::MalformedRequest is returned
+pub fn url_decode(urlencoded: &str, plus_as_space: bool) -> Result<String, RuntimeError> {
+	let mut uncoded: Vec<u8> = Vec::with_capacity(urlencoded.len());
+
+	let mut chars = urlencoded.chars().peekable();
+	loop {
+		let mut utf8_bytes = [0; 4];
+		match chars.next() {
+			Some('+') => match plus_as_space {
+				true => uncoded.push(b' '),
+				false => uncoded.push(b'+'),
+			},
+			Some('%') => match chars.peek() {
+				Some(c) if c.is_ascii_hexdigit() => {
+					let upper = chars.next().unwrap();
+
+					if let Some(lower) = chars.peek() {
+						if lower.is_ascii_hexdigit() {
+							let upper = upper.to_digit(16).unwrap();
+							let lower = chars.next().unwrap().to_digit(16).unwrap();
+
+							uncoded.push(upper as u8 * 16 + lower as u8);
+							continue;
+						}
+					}
+
+					uncoded.push(b'%');
+					uncoded.extend_from_slice(upper.encode_utf8(&mut utf8_bytes).as_bytes());
+				}
+				_ => {
+					uncoded.push(b'%');
+				}
+			},
+			Some(c) => {
+				uncoded.extend_from_slice(c.encode_utf8(&mut utf8_bytes).as_bytes());
+			}
+			None => {
+				uncoded.shrink_to_fit();
+				return String::from_utf8(uncoded).map_err(|_| RuntimeError::MalformedRequest);
+			}
+		}
+	}
+}
+
+pub fn parse_from_header<T: FromStr>(maybe_hval: Option<&HeaderValue>) -> Option<T> {
+	maybe_hval
+		.map(|h| h.to_str().ok())
+		.flatten()
+		.map(|s| s.parse().ok())
+		.flatten()
+}
+
+pub fn owned_header(maybe_hval: Option<&HeaderValue>) -> Option<String> {
+	maybe_hval
+		.map(|h| h.to_str().ok())
+		.flatten()
+		.map(<_>::to_owned)
+}