diff options
-rw-r--r-- | .gitignore | 2 | ||||
-rw-r--r-- | .rustfmt.toml | 1 | ||||
-rw-r--r-- | Cargo.lock | 47 | ||||
-rw-r--r-- | Cargo.toml | 2 | ||||
-rw-r--r-- | DEVELOPEMENT.md | 8 | ||||
-rw-r--r-- | README.md | 6 | ||||
-rw-r--r-- | TODO.md | 32 | ||||
-rw-r--r-- | corgi.conf | 16 | ||||
-rw-r--r-- | corgi/Cargo.toml | 6 | ||||
-rw-r--r-- | corgi/changelog.md | 19 | ||||
-rw-r--r-- | corgi/src/caller.rs | 109 | ||||
-rw-r--r-- | corgi/src/main.rs | 268 | ||||
-rw-r--r-- | corgi/src/settings.rs | 82 | ||||
-rw-r--r-- | corgi/src/stats.rs | 158 | ||||
-rw-r--r-- | corgi/src/util.rs | 66 | ||||
-rw-r--r-- | parrot_module/Cargo.toml | 10 | ||||
-rw-r--r-- | parrot_module/src/lib.rs | 67 | ||||
-rw-r--r-- | smalldog/Cargo.toml | 6 | ||||
-rw-r--r-- | smalldog/src/lib.rs | 133 | ||||
-rw-r--r-- | stats/Cargo.toml (renamed from stats_module/Cargo.toml) | 9 | ||||
-rw-r--r-- | stats/src/favicon.gif | bin | 0 -> 138 bytes | |||
-rw-r--r-- | stats/src/main.rs | 145 | ||||
-rw-r--r-- | stats/src/style.css | 37 | ||||
-rw-r--r-- | stats_module/src/lib.rs | 59 |
24 files changed, 681 insertions, 607 deletions
diff --git a/.gitignore b/.gitignore index cee48f1..76a8f24 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,3 @@ /target -*.sqlite +*.sqlite* *.db \ No newline at end of file diff --git a/.rustfmt.toml b/.rustfmt.toml index 4639247..218e203 100644 --- a/.rustfmt.toml +++ b/.rustfmt.toml @@ -1,2 +1 @@ hard_tabs = true -chain_width = 100 diff --git a/Cargo.lock b/Cargo.lock index 6795892..b8d94f6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -82,21 +82,29 @@ checksum = "ea618ded77af626818bde0f0802da7c20d47e38e23e37be40f6f807a76079e82" [[package]] name = "corgi" -version = "1.0.0" +version = "1.1.0" dependencies = [ "base64", "confindent", "http-body-util", "hyper", "hyper-util", - "libloading", "regex-lite", "rusqlite", "sha2", + "time", "tokio", ] [[package]] +name = "corgi-stats" +version = "1.1.0" +dependencies = [ + "rusqlite", + "time", +] + +[[package]] name = "cpufeatures" version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -318,16 +326,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6" [[package]] -name = "libloading" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34" -dependencies = [ - "cfg-if", - "windows-targets", -] - -[[package]] name = "libsqlite3-sys" version = "0.32.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -384,10 +382,6 @@ name = "parrot" version = "0.1.0" [[package]] -name = "parrot_module" -version = "0.1.0" - -[[package]] name = "pin-project-lite" version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -503,10 +497,6 @@ dependencies = [ ] [[package]] -name = "smalldog" -version = "0.1.0" - -[[package]] name = "smallvec" version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -523,15 +513,6 @@ dependencies = [ ] [[package]] -name = "stats_module" -version = "0.1.0" -dependencies = [ - "rusqlite", - "smalldog", - "time", -] - -[[package]] name = "syn" version = "2.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -544,9 +525,9 @@ dependencies = [ [[package]] name = "time" -version = "0.3.40" +version = "0.3.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d9c75b47bdff86fa3334a3db91356b8d7d86a9b839dab7d0bdc5c3d3a077618" +checksum = "8a7619e19bc266e0f9c5e6686659d394bc57973859340060a69221e57dbc0c40" dependencies = [ "deranged", "itoa", @@ -565,9 +546,9 @@ checksum = "c9e9a38711f559d9e3ce1cdb06dd7c5b8ea546bc90052da6d06bb76da74bb07c" [[package]] name = "time-macros" -version = "0.2.21" +version = "0.2.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29aa485584182073ed57fd5004aa09c371f021325014694e432313345865fd04" +checksum = "3526739392ec93fd8b359c8e98514cb3e8e021beb4e5f597b00a0221f8ed8a49" dependencies = [ "num-conv", "time-core", diff --git a/Cargo.toml b/Cargo.toml index 47bbc10..9568d9e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [workspace] -members = ["corgi", "parrot", "parrot_module", "smalldog", "stats_module"] +members = ["corgi", "parrot", "stats"] resolver = "3" # use this profile like this: diff --git a/DEVELOPEMENT.md b/DEVELOPEMENT.md new file mode 100644 index 0000000..2e1c4c8 --- /dev/null +++ b/DEVELOPEMENT.md @@ -0,0 +1,8 @@ +`corgi.conf` has the `skip-worktree` bti set. if you want to commit this file, remove the bit, add it, and re-set the bit. +``` +# set skip-worktree +git update-index --skip-worktree corgi.conf + +# unset skip-worktree +git update-index --no-skip-worktree corgi.conf +``` diff --git a/README.md b/README.md index c312ee6..ffa270b 100644 --- a/README.md +++ b/README.md @@ -22,8 +22,8 @@ Script <path-to-cgi-script> See [corgi.conf](corgi.conf) for the configuration I use with my cgit instance. -Scripts are tried in order, looking for one that matches. If none match, -the first script is ran. +Scripts are tried in order, looking for one that has a matching regex. If a +script does not have a regex, it is selected. If none match, 404 is returned. Sets the following environmental variables for the CGI script, many following [RFC 3875][rfc]: - **`GATEWAY_INTERFACE`** to the fixed value `CGI/1.1` @@ -44,4 +44,4 @@ They will be uppercased and hyphens replaced with underscores. Any environmental variable may be overridden if it is set in the configuration file, except the `CONTENT_LENGTH` envar. -[rfc]: https://datatracker.ietf.org/doc/html/rfc3875 \ No newline at end of file +[rfc]: https://datatracker.ietf.org/doc/html/rfc3875 diff --git a/TODO.md b/TODO.md index a439640..198fddf 100644 --- a/TODO.md +++ b/TODO.md @@ -10,10 +10,23 @@ a `catch_unwind`? there are complications with the first one, but perhaps we can make it work? -(5) Crate For The Module System - so we don't have to copy the weird structs. and also so maybe we - can make it safer? A C header, too, maybe? that just define the - struct. and really some kind of documentation, probably. +(6) CGI Bin To Man-in-the-middle Requests + Is this just parrot? Maybe this is just parrot? I want something, for + ease of debugging, to sit between corgi and the binary so we can + see both ends of the conversation. I had a bug in corgi-stats that had + corgi erroring with "no nl in header" and that was confusing. I was + able to fix it kind of blindly, but that was not ideal. It could've + been bad! Being able to see what corgi-stats was returning would've + been very helpful. + +(7) Investigate Compressing the SQLite Stats DB + Looking at the below project. It's in Rust and easily integrated, + I think, so we should use it to explore how things work. I think it + could be good; I imagine the data we store compresses great. The IP + addresses, user agents, paths, script names. Like, in the 11 million + row production database we store the script name of almost certainly + mostly "cgit", so that could get dramatically reduced. + https://github.com/phiresky/sqlite-zstd DONE :) DONE :) DONE :) DONE :) DONE :) DONE :) DONE :) DONE :) DONE :) ======================================================================= @@ -25,4 +38,13 @@ DONE :) DONE :) DONE :) DONE :) DONE :) DONE :) DONE :) DONE :) DONE :) (2) Support Matching CGI Based On Path The other requirement for git-http-backend, which needs to - trigger on the regex `/.+/(info/refs|git-upload-pack)` \ No newline at end of file + trigger on the regex `/.+/(info/refs|git-upload-pack)` + +(5) Crate For The Module System + so we don't have to copy the weird structs. and also so maybe we + can make it safer? A C header, too, maybe? that just define the + struct. and really some kind of documentation, probably. + + EWONTFIX + lol. module system was ripped out because it turns out there is no + good way to handle more than one request concurrently, it seems. diff --git a/corgi.conf b/corgi.conf index ff1a416..8f644f0 100644 --- a/corgi.conf +++ b/corgi.conf @@ -1,11 +1,13 @@ Server Port 26744 + StatsDb /var/corgi/stats.sqlite -Script module-test - Path target/release/libparrot.dylib - Type object +Script stats + Path /opt/corgi/corgi-stats Match - Regex /object + Regex ^/stats + Environment + CORGI_STATS_DB /var/corgi/stats.sqlite Script git-backend Path /usr/lib/git-core/git-http-backend @@ -15,9 +17,9 @@ Script git-backend GIT_HTTP_EXPORT_ALL 1 GIT_PROJECT_ROOT /srv/git HOME /srv/git - HTTP_HOST git.nyble.dev + HTTP_HOST git.dreamy.place Script cgit - Path /usr/lib/cgit/cgit.cgi + Path /opt/cgit/cgit Environment - HTTP_HOST git.nyble.dev \ No newline at end of file + HTTP_HOST git.dreamy.place \ No newline at end of file diff --git a/corgi/Cargo.toml b/corgi/Cargo.toml index ea5fe74..b2e56ba 100644 --- a/corgi/Cargo.toml +++ b/corgi/Cargo.toml @@ -6,17 +6,17 @@ license = "ISC" repository = "https://git.nyble.dev/corgi/about" readme = "../README.md" -version = "1.0.0" +version = "1.1.0" edition = "2024" [dependencies] base64 = "0.22.1" http-body-util = "0.1.3" hyper-util = { version = "0.1.10", features = ["tokio"] } -libloading = "0.8.6" regex-lite = "0.1.6" -rusqlite = { version = "0.34.0", features = ["bundled"] } +rusqlite = { version = "0.34.0", features = ["bundled", "time"] } sha2 = "0.10.8" +time = { version = "0.3.41", features = ["formatting"] } [dependencies.confindent] version = "2.2.1" diff --git a/corgi/changelog.md b/corgi/changelog.md new file mode 100644 index 0000000..1011e46 --- /dev/null +++ b/corgi/changelog.md @@ -0,0 +1,19 @@ +# v1.1.0 + +this release makes changes to the sqlite database aimed to allow applications +ingesting the data to run more efficiently. in doing this, corgi-stats received +a 1000x uplift when querying against my production table with 11 million rows. + +corgi will automatically perform the database migrations necessary and fill in +initial values for the new column/table using the existing data. If you have +a lot of rows in the database, this might take a few seconds. + +- switch sqlite db to use write-ahead logging +- add a `request_count` column to the `agents` table +- create an `ephemeral_requests` table, managed by corgi, to store the last 24 hours requests separate from the main `requests` table +- create a trigger to insert a row in the `ephemeral_requests` table when `requests` receives and insert +- create a trigger to increment `request_count` in the `agents` table when a row is inserted on `requests` + +# v1.0.1 + +- have corgi set the stats database to WAL journal mode. \ No newline at end of file diff --git a/corgi/src/caller.rs b/corgi/src/caller.rs index a8b04c0..29be5ca 100644 --- a/corgi/src/caller.rs +++ b/corgi/src/caller.rs @@ -1,19 +1,8 @@ -use std::{ - ffi::{self, CString}, - io::Write, - net::IpAddr, - process::Stdio, - ptr, - str::FromStr, -}; +use std::{net::IpAddr, process::Stdio}; -use tokio::{ - io::AsyncWriteExt, - process::Command, - sync::oneshot::{self, Receiver, Sender}, -}; +use tokio::{io::AsyncWriteExt, process::Command}; -use crate::{Script, ScriptKind}; +use crate::Script; pub struct HttpRequest { pub content_type: String, @@ -62,12 +51,6 @@ impl HttpRequest { } pub async fn call_and_parse_cgi(script: Script, http: HttpRequest) -> CgiResponse { - if script.kind != ScriptKind::Executable { - eprintln!("Somehow made it to executable path with module script"); - eprintln!("Script: {}", script.name); - panic!("TODO: recover") - } - let mut cmd = Command::new(&script.filename); // Set env specified in the conf. Be sure we do this after we @@ -168,89 +151,3 @@ pub struct CgiResponse { /// CGI response body pub body: Option<Vec<u8>>, } - -#[repr(C)] -struct ModuleRequest { - headers_len: ffi::c_ulong, - headers: *const [[*const ffi::c_char; 2]], - body_len: ffi::c_ulong, - body: *const u8, -} - -#[repr(C)] -struct ModuleResponse { - status: ffi::c_ushort, - headers_len: ffi::c_ulong, - headers: &'static [[*const ffi::c_char; 2]], - body_len: ffi::c_ulong, - body: *const u8, -} - -type HandleFn = unsafe extern "C" fn(*const ModuleRequest) -> *const ModuleResponse; -type CleanupFn = unsafe extern "C" fn(*const ModuleResponse); - -pub async fn call_and_parse_module(script: Script, req: HttpRequest) -> CgiResponse { - let (tx, rx) = oneshot::channel(); - std::thread::spawn(move || unsafe { module_thread(script, req, tx) }); - - rx.await.unwrap() -} - -unsafe fn module_thread(script: Script, req: HttpRequest, tx: Sender<CgiResponse>) { - let env: Vec<(String, String)> = req - .build_kv() - .into_iter() - .chain(req.http_headers.into_iter()) - .chain(script.env.into_iter()) - .collect(); - - let mut headers_owned = vec![]; - for (k, v) in env { - headers_owned.push([ - CString::from_str(k.as_str()).unwrap(), - CString::from_str(v.as_str()).unwrap(), - ]); - } - - let headers: Vec<[*const ffi::c_char; 2]> = - headers_owned.iter().map(|kvarr| [kvarr[0].as_ptr(), kvarr[1].as_ptr()]).collect(); - - let modreq = ModuleRequest { - headers_len: headers.len() as u64, - headers: &headers[..] as *const [[*const ffi::c_char; 2]], - body_len: req.body.as_ref().map(|v| v.len()).unwrap_or(0) as u64, - body: req.body.as_ref().map(|v| v.as_ptr()).unwrap_or(ptr::null()), - }; - - let mut cgi = CgiResponse { - status: 200, - headers: vec![], - body: None, - }; - - unsafe { - let lib = libloading::Library::new(script.filename).unwrap(); - let handle: libloading::Symbol<HandleFn> = lib.get(b"cgi_handle").unwrap(); - let free: libloading::Symbol<CleanupFn> = lib.get(b"cgi_cleanup").unwrap(); - - let response = handle((&modreq) as *const ModuleRequest); - let response_ref = response.as_ref().unwrap(); - - for idx in 0..response_ref.headers_len { - let kvarr = response_ref.headers[idx as usize]; - let k = ffi::CStr::from_ptr(kvarr[0]).to_string_lossy(); - let v = ffi::CStr::from_ptr(kvarr[1]).to_string_lossy(); - cgi.headers.push((k.as_bytes().to_vec(), v.as_bytes().to_vec())); - } - - let maybe_body: Option<Vec<u8>> = response_ref - .body - .as_ref() - .map(|b| std::slice::from_raw_parts(b, response_ref.body_len as usize).to_vec()); - cgi.body = maybe_body; - - free(response); - }; - - tx.send(cgi).unwrap() -} diff --git a/corgi/src/main.rs b/corgi/src/main.rs index 6a3c528..1192c4c 100644 --- a/corgi/src/main.rs +++ b/corgi/src/main.rs @@ -1,119 +1,39 @@ +use core::fmt; use std::{ - net::{IpAddr, SocketAddr}, - path::PathBuf, + net::SocketAddr, pin::Pin, - process::Stdio, sync::Arc, - time::Instant, + time::{Duration, Instant}, }; use caller::HttpRequest; -use confindent::{Confindent, Value, ValueParseError}; use http_body_util::{BodyExt, Full}; use hyper::{ HeaderMap, Request, Response, StatusCode, body::{Bytes, Incoming}, - header::HeaderValue, server::conn::http1, service::Service, }; use hyper_util::rt::TokioIo; -use regex_lite::Regex; +use settings::{Script, Settings}; use stats::Stats; -use tokio::{io::AsyncWriteExt, net::TcpListener, process::Command, runtime::Runtime}; +use tokio::{net::TcpListener, runtime::Runtime}; +use util::owned_header; mod caller; +mod settings; mod stats; - -#[derive(Clone, Debug)] -pub struct Settings { - port: u16, - scripts: Vec<Script>, -} - -#[derive(Clone, Debug, PartialEq)] -pub enum ScriptKind { - Executable, - Object, -} - -#[derive(Clone, Debug)] -pub struct Script { - name: String, - kind: ScriptKind, - regex: Option<Regex>, - filename: String, - env: Vec<(String, String)>, -} - -const CONF_DEFAULT: &str = "/etc/corgi.conf"; +mod util; fn main() { - let conf_path = std::env::args().nth(1).unwrap_or(String::from(CONF_DEFAULT)); - let conf = Confindent::from_file(conf_path).expect("failed to open conf"); - - let mut settings = Settings { - port: 26744, - scripts: conf.children("Script").into_iter().map(parse_script_conf).collect(), - }; - - if let Some(server) = conf.child("Server") { - match server.child_parse("Port") { - Err(ValueParseError::NoValue) => (), - Err(err) => { - eprintln!("Server.Port is malformed: {err}"); - std::process::exit(1); - } - Ok(port) => settings.port = port, - } - } - - let stats = Stats::new(PathBuf::from( - conf.get("Server/StatsDb").unwrap().to_owned(), - )); + let settings = Settings::get(); + let stats = Stats::new(&settings.stats_path); stats.create_tables(); let rt = Runtime::new().unwrap(); rt.block_on(async { run(settings, stats).await }); } -fn parse_script_conf(conf: &Value) -> Script { - let name = conf.value_owned().expect("Missing value for 'Script' key"); - let filename = conf.child_owned("Path").expect("Missing 'Path' key"); - let environment = conf.child("Environment"); - let env = environment - .map(|e| e.values().map(|v| (v.key_owned(), v.value_owned().unwrap())).collect()); - - let regex = match conf.get("Match/Regex") { - None => None, - Some(restr) => match Regex::new(restr) { - Err(err) => { - eprintln!("Failed to compile regex: {restr}\nerror: {err}"); - std::process::exit(1); - } - Ok(re) => Some(re), - }, - }; - - let kind = match conf.get("Type") { - None => ScriptKind::Executable, - Some("executable") => ScriptKind::Executable, - Some("object") => ScriptKind::Object, - Some(kind) => { - eprintln!("'{kind}' is not a valid script type"); - std::process::exit(1) - } - }; - - Script { - name, - kind, - regex, - filename, - env: env.unwrap_or_default(), - } -} - // We have tokio::main at home :) async fn run(settings: Settings, stats: Stats) { let addr = SocketAddr::from(([0, 0, 0, 0], settings.port)); @@ -125,11 +45,33 @@ async fn run(settings: Settings, stats: Stats) { client_addr: addr, }; + let mut last_clean = None; + loop { + // Clean at the top so we do it once on boot, but keep out of the + // flow of the request to keep it speedy. This will delay accepting + // a new connection when the clean actually runs, but that is fine. + match last_clean { + None => { + let count = svc.stats.cleanup_ephemeral_requests(); + println!("cleaned {count} requests from the ephemeral table"); + last_clean = Some(Instant::now()); + } + Some(inst) if inst.elapsed() >= Duration::from_secs(60 * 60) => { + let count = svc.stats.cleanup_ephemeral_requests(); + println!("cleaned {count} requests from the ephemeral table"); + last_clean = Some(Instant::now()); + } + _ => (), + } + + // Now we accept the connection and spawn a handler let (stream, caddr) = listen.accept().await.unwrap(); let io = TokioIo::new(stream); + let mut svc_clone = svc.clone(); svc_clone.client_addr = caddr; + tokio::task::spawn( async move { http1::Builder::new().serve_connection(io, svc_clone).await }, ); @@ -164,61 +106,48 @@ impl Svc { caddr: SocketAddr, req: Request<Incoming>, ) -> Response<Full<Bytes>> { - let start = Instant::now(); + match Self::handle_fallible(settings, stats, caddr, req).await { + Err(re) => re.into_response(), + Ok(response) => response, + } + } + async fn handle_fallible( + settings: Settings, + stats: Arc<Stats>, + caddr: SocketAddr, + req: Request<Incoming>, + ) -> Result<Response<Full<Bytes>>, RuntimeError> { // Collect things we need from the request before we eat it's body let method = req.method().as_str().to_ascii_uppercase(); let version = req.version(); - let path = req.uri().path().to_owned(); - let query = req.uri().query().unwrap_or_default().to_owned(); - let headers = req.headers().clone(); - - let body = req.into_body().collect().await.unwrap().to_bytes().to_vec(); - let content_length = body.len(); - - let mut maybe_script = None; - for set_script in settings.scripts { - if let Some(regex) = set_script.regex.as_ref() { - if regex.is_match(&path) { - maybe_script = Some(set_script); - break; - } - } else { - maybe_script = Some(set_script); - break; - } - } + let path = util::url_decode(req.uri().path(), false)?; + let query = req + .uri() + .query() + .map(|s| util::url_decode(s, false)) + .transpose()? + .unwrap_or_default(); - let script = match maybe_script { - Some(script) => script, - None => { - eprintln!("path didn't match any script"); - panic!("TODO recover?"); - } - }; + let script = Self::select_script(&settings, &path).ok_or(RuntimeError::NoScript)?; - let content_type = headers - .get("content-type") - .map(|s| s.to_str().ok()) - .flatten() - .unwrap_or_default() - .to_owned(); - - let uagent = headers - .get("user-agent") - .map(|s| s.to_str().ok()) - .flatten() - .unwrap_or_default() - .to_owned(); + // Clone the headers and extract what we need + let headers = req.headers().clone(); + let content_type = owned_header(headers.get("content-type")).unwrap_or_default(); + let uagent = owned_header(headers.get("user-agent")).unwrap_or_default(); // Find the client address let client_addr = { - let x_forward = Self::parse_addr_from_header(headers.get("x-forwarded-for")); - let forward = Self::parse_addr_from_header(headers.get("forwarded-for")); + let x_forward = util::parse_from_header(headers.get("x-forwarded-for")); + let forward = util::parse_from_header(headers.get("forwarded-for")); forward.unwrap_or(x_forward.unwrap_or(caddr.ip())) }; + // Finally, get the body which consumes the request + let body = req.into_body().collect().await.unwrap().to_bytes().to_vec(); + let content_length = body.len(); + let server_name = headers .get("Host") .expect("no http host header set") @@ -239,16 +168,9 @@ impl Svc { body: if content_length > 0 { Some(body) } else { None }, }; - let start_cgi = Instant::now(); - let cgi_response = match script.kind { - ScriptKind::Executable => { - caller::call_and_parse_cgi(script.clone(), http_request).await - } - ScriptKind::Object => caller::call_and_parse_module(script.clone(), http_request).await, - }; - let cgi_time = start_cgi.elapsed(); - + let cgi_response = caller::call_and_parse_cgi(script.clone(), http_request).await; let status = StatusCode::from_u16(cgi_response.status).unwrap(); + let mut response = Response::builder().status(status); for (key, value) in cgi_response.headers { @@ -263,20 +185,32 @@ impl Svc { }; println!( - "served to [{client_addr}]\n\tscript: {}\n\tpath: {path}\n\tcgi took {}ms. total time {}ms\n\tUA: {uagent}", - &script.name, - cgi_time.as_millis(), - start.elapsed().as_millis() + "served to [{client_addr}]\n\tscript: {}\n\tpath: {path}\n\tUA: {uagent}", + &script.name ); stats.log_request(db_req); - let response_body = cgi_response.body.map(|v| Bytes::from(v)).unwrap_or(Bytes::new()); - response.body(Full::new(response_body)).unwrap() + let response_body = cgi_response + .body + .map(|v| Bytes::from(v)) + .unwrap_or(Bytes::new()); + + Ok(response.body(Full::new(response_body)).unwrap()) } - fn parse_addr_from_header(maybe_hval: Option<&HeaderValue>) -> Option<IpAddr> { - maybe_hval.map(|h| h.to_str().ok()).flatten().map(|s| s.parse().ok()).flatten() + fn select_script<'s>(settings: &'s Settings, path: &str) -> Option<&'s Script> { + for script in &settings.scripts { + if let Some(regex) = script.regex.as_ref() { + if regex.is_match(path) { + return Some(script); + } + } else { + return Some(script); + } + } + + None } fn build_http_vec(headers: HeaderMap) -> Vec<(String, String)> { @@ -312,13 +246,35 @@ impl Svc { } } -fn path_to_name(path: &str) -> String { - let mut ret = String::with_capacity(path.len()); - for ch in path.chars() { - match ch { - '/' => ret.push('-'), - ch => ret.push(ch), +fn status_page<D: fmt::Display>(status: u16, msg: D) -> Response<Full<Bytes>> { + let body_str = format!( + "<html>\n\ + \t<head><title>{status}</title></head>\n\ + \t<body style='width: 20rem; padding: 0px; margin: 2rem;'>\n\ + \t\t<h1>{status}</h1>\n\ + \t\t<hr/>\n\ + \t\t<p>{msg}</p>\n\ + \t</body>\n\ + </html>" + ); + + Response::builder() + .status(status) + .header("Content-Type", "text/html") + .body(Full::new(body_str.into())) + .unwrap() +} + +enum RuntimeError { + MalformedRequest, + NoScript, +} + +impl RuntimeError { + pub fn into_response(&self) -> Response<Full<Bytes>> { + match self { + Self::MalformedRequest => status_page(400, "bad request"), + Self::NoScript => status_page(404, "failed to route request"), } } - ret } diff --git a/corgi/src/settings.rs b/corgi/src/settings.rs new file mode 100644 index 0000000..ee701b0 --- /dev/null +++ b/corgi/src/settings.rs @@ -0,0 +1,82 @@ +use std::path::PathBuf; + +use confindent::{Confindent, Value, ValueParseError}; +use regex_lite::Regex; + +const CONF_DEFAULT: &str = "/etc/corgi.conf"; + +#[derive(Clone, Debug)] +pub struct Script { + pub name: String, + pub regex: Option<Regex>, + pub filename: String, + pub env: Vec<(String, String)>, +} + +#[derive(Clone, Debug)] +pub struct Settings { + pub port: u16, + pub scripts: Vec<Script>, + pub stats_path: PathBuf, +} + +impl Settings { + pub fn get() -> Self { + let conf_path = std::env::args() + .nth(1) + .unwrap_or(String::from(CONF_DEFAULT)); + let conf = Confindent::from_file(conf_path).expect("failed to open conf"); + + let mut settings = Settings { + port: 26744, + scripts: conf + .children("Script") + .into_iter() + .map(parse_script_conf) + .collect(), + stats_path: conf.get_parse("Server/StatsDb").unwrap(), + }; + + if let Some(server) = conf.child("Server") { + match server.child_parse("Port") { + Err(ValueParseError::NoValue) => (), + Err(err) => { + eprintln!("Server.Port is malformed: {err}"); + std::process::exit(1); + } + Ok(port) => settings.port = port, + } + } + + settings + } +} + +fn parse_script_conf(conf: &Value) -> Script { + let name = conf.value_owned().expect("Missing value for 'Script' key"); + let filename = conf.child_owned("Path").expect("Missing 'Path' key"); + let environment = conf.child("Environment"); + let env = environment.map(|e| { + e.values() + .map(|v| (v.key_owned(), v.value_owned().unwrap())) + .collect() + }); + + let regex = match conf.get("Match/Regex") { + None => None, + Some(restr) => match Regex::new(restr) { + Err(err) => { + eprintln!("Failed to compile regex: {restr}\nerror: {err}"); + std::process::exit(1); + } + Ok(re) => Some(re), + }, + }; + + Script { + name, + regex, + filename, + env: env.unwrap_or_default(), + } +} diff --git a/corgi/src/stats.rs b/corgi/src/stats.rs index 0e3b99a..9e9d15c 100644 --- a/corgi/src/stats.rs +++ b/corgi/src/stats.rs @@ -1,12 +1,9 @@ -use std::{ - net::{IpAddr, SocketAddr}, - path::PathBuf, - sync::Mutex, -}; +use std::{net::IpAddr, path::Path, sync::Mutex}; use base64::{Engine, prelude::BASE64_STANDARD_NO_PAD}; use rusqlite::{Connection, OptionalExtension, params}; use sha2::{Digest, Sha256}; +use time::{Duration, OffsetDateTime}; #[derive(Debug)] pub struct Stats { @@ -14,7 +11,7 @@ pub struct Stats { } impl Stats { - pub fn new(db_path: PathBuf) -> Self { + pub fn new(db_path: &Path) -> Self { Self { conn: Mutex::new(Connection::open(db_path).unwrap()), } @@ -22,8 +19,72 @@ impl Stats { pub fn create_tables(&self) { let conn = self.conn.lock().unwrap(); - conn.execute(CREATE_TABLE_AGENT, ()).unwrap(); - conn.execute(CREATE_TABLE_REQUESTS, ()).unwrap(); + + Self::set_wal(&conn); + + // "agents" exists and trigger does not; we need to alter and prime + if Self::table_exists(&conn, "agents") && !Self::trigger_exists(&conn, "agent_count") { + println!("agents table exists, but needs request_count column. Altering and priming"); + conn.execute(MIGRATE_AGENTS_ADD_REQUEST_COUNT, ()).unwrap(); + + Self::prime_agents_request_count(&conn); + } else { + conn.execute(CREATE_TABLE_AGENT, ()).unwrap(); + conn.execute(CREATE_TABLE_REQUESTS, ()).unwrap(); + } + + conn.execute(CREATE_TRIGGER_COUNT_AGENT, ()).unwrap(); + + // Instead of just an IF NOT EXISTS here, we're checking it exists + // so we can copy an initial amount of requests from the main table + // to the ephemeral table. + if !Self::table_exists(&conn, "ephemeral_requests") { + println!("ephemeral_requests does not exist. Creating and priming"); + conn.execute(CREATE_TRIGGER_EPHEMERAL, ()).unwrap(); + conn.execute(CREATE_TABLE_EPHEMERAL_REQUESTS, ()).unwrap(); + + let count = Self::prime_ephemeral_table(&conn); + println!("Primed with {count} rows"); + } + } + + fn set_wal(conn: &Connection) { + let journal_mode: String = conn + .pragma_update_and_check(None, "journal_mode", "WAL", |row| row.get(0)) + .unwrap(); + + match journal_mode.to_ascii_lowercase().as_str() { + "wal" => (), + _ => { + eprintln!("WARN sqlitedb did not successfully enter the WAL journal mode"); + } + } + } + + fn table_exists(conn: &Connection, name: &str) -> bool { + let exist: Option<String> = conn + .query_row( + "SELECT name FROM sqlite_schema WHERE type='table' AND name=?1;", + params![name], + |r| r.get(0), + ) + .optional() + .unwrap(); + + exist.is_some() + } + + fn trigger_exists(conn: &Connection, name: &str) -> bool { + let exist: Option<String> = conn + .query_row( + "SELECT name FROM sqlite_schema WHERE type='trigger' AND name=?1;", + params![name], + |r| r.get(0), + ) + .optional() + .unwrap(); + + exist.is_some() } pub fn log_request(&self, request: Request) { @@ -71,6 +132,61 @@ impl Stats { ) .unwrap(); } + + /// Small, single line function to return the lower-bound date of ephemeral + /// requests. + fn ephemeral_lifetime() -> OffsetDateTime { + OffsetDateTime::now_utc() - Duration::days(1) + } + + pub fn cleanup_ephemeral_requests(&self) -> usize { + let lower = Self::ephemeral_lifetime(); + + let sql = "DELETE FROM ephemeral_requests WHERE timestamp < ?1;"; + + let conn = self.conn.lock().unwrap(); + + match conn.execute(sql, params![lower]) { + Err(e) => { + eprintln!("ERROR failed to run ephemeral clean: {e}"); + panic!(); + } + Ok(count) => count, + } + } + + fn prime_ephemeral_table(conn: &Connection) -> usize { + let lower = Self::ephemeral_lifetime(); + + let sql = "INSERT INTO ephemeral_requests SELECT id, timestamp FROM requests WHERE timestamp > ?1;"; + match conn.execute(sql, params![lower]) { + Err(e) => { + eprintln!("ERROR failed to prime ephemeral: {e}"); + panic!(); + } + Ok(count) => count, + } + } + + fn prime_agents_request_count(conn: &Connection) { + let sql = "SELECT agent_id, count(id) as count FROM requests GROUP BY agent_id"; + let mut prepared = conn.prepare(sql).unwrap(); + + let counts: Vec<(i64, i64)> = prepared + .query_map((), |row| Ok((row.get(0)?, row.get(1)?))) + .optional() + .unwrap() + .map(|iter| iter.map(|e| e.unwrap()).collect()) + .unwrap(); + + for (agent, count) in counts { + conn.execute( + "UPDATE agents SET request_count = ?1 WHERE id = ?2;", + params![count, agent], + ) + .unwrap(); + } + } } pub struct Request<'r> { @@ -84,9 +200,13 @@ const CREATE_TABLE_AGENT: &'static str = "\ CREATE TABLE IF NOT EXISTS agents( id INTEGER PRIMARY KEY AUTOINCREMENT, hash TEXT NOT NULL, - agent TEXT NOT NULL + agent TEXT NOT NULL, + request_count INTEGER NOT NULL DEFAULT 0 );"; +const MIGRATE_AGENTS_ADD_REQUEST_COUNT: &'static str = + "ALTER TABLE agents ADD COLUMN request_count INTEGER NOT NULL DEFAULT 0"; + const CREATE_TABLE_REQUESTS: &'static str = "\ CREATE TABLE IF NOT EXISTS requests( id INTEGER PRIMARY KEY AUTOINCREMENT, @@ -98,3 +218,23 @@ const CREATE_TABLE_REQUESTS: &'static str = "\ FOREIGN KEY (agent_id) REFERENCES agents(id) );"; + +const CREATE_TRIGGER_EPHEMERAL: &'static str = "\ + CREATE TRIGGER IF NOT EXISTS requests_copy_ephemeral AFTER INSERT ON requests + BEGIN + INSERT INTO ephemeral_requests(request_id, timestamp) VALUES(new.id, new.timestamp); + END;"; + +const CREATE_TRIGGER_COUNT_AGENT: &'static str = "\ + CREATE TRIGGER IF NOT EXISTS agent_count AFTER INSERT ON requests + BEGIN + UPDATE agents SET request_count = request_count + 1 WHERE agents.id = new.agent_id; + END;"; + +const CREATE_TABLE_EPHEMERAL_REQUESTS: &'static str = "\ + CREATE TABLE IF NOT EXISTS ephemeral_requests( + request_id INTEGER PRIMARY KEY AUTOINCREMENT, + timestamp TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, + FOREIGN KEY (request_id) + REFERENCES requests(id) + );"; diff --git a/corgi/src/util.rs b/corgi/src/util.rs new file mode 100644 index 0000000..727c8c7 --- /dev/null +++ b/corgi/src/util.rs @@ -0,0 +1,66 @@ +use std::str::FromStr; + +use hyper::header::HeaderValue; + +use crate::RuntimeError; + +// Ripped and modified from gennyble/mavourings query.rs +/// Decode a URL encoded string, optionally treating a plus, '+', as a space. If +/// the final string is not UTF8, RuntimeError::MalformedRequest is returned +pub fn url_decode(urlencoded: &str, plus_as_space: bool) -> Result<String, RuntimeError> { + let mut uncoded: Vec<u8> = Vec::with_capacity(urlencoded.len()); + + let mut chars = urlencoded.chars().peekable(); + loop { + let mut utf8_bytes = [0; 4]; + match chars.next() { + Some('+') => match plus_as_space { + true => uncoded.push(b' '), + false => uncoded.push(b'+'), + }, + Some('%') => match chars.peek() { + Some(c) if c.is_ascii_hexdigit() => { + let upper = chars.next().unwrap(); + + if let Some(lower) = chars.peek() { + if lower.is_ascii_hexdigit() { + let upper = upper.to_digit(16).unwrap(); + let lower = chars.next().unwrap().to_digit(16).unwrap(); + + uncoded.push(upper as u8 * 16 + lower as u8); + continue; + } + } + + uncoded.push(b'%'); + uncoded.extend_from_slice(upper.encode_utf8(&mut utf8_bytes).as_bytes()); + } + _ => { + uncoded.push(b'%'); + } + }, + Some(c) => { + uncoded.extend_from_slice(c.encode_utf8(&mut utf8_bytes).as_bytes()); + } + None => { + uncoded.shrink_to_fit(); + return String::from_utf8(uncoded).map_err(|_| RuntimeError::MalformedRequest); + } + } + } +} + +pub fn parse_from_header<T: FromStr>(maybe_hval: Option<&HeaderValue>) -> Option<T> { + maybe_hval + .map(|h| h.to_str().ok()) + .flatten() + .map(|s| s.parse().ok()) + .flatten() +} + +pub fn owned_header(maybe_hval: Option<&HeaderValue>) -> Option<String> { + maybe_hval + .map(|h| h.to_str().ok()) + .flatten() + .map(<_>::to_owned) +} diff --git a/parrot_module/Cargo.toml b/parrot_module/Cargo.toml deleted file mode 100644 index f4ced8e..0000000 --- a/parrot_module/Cargo.toml +++ /dev/null @@ -1,10 +0,0 @@ -[package] -name = "parrot_module" -version = "0.1.0" -edition = "2024" - -[lib] -name = "parrot" -crate-type = ["cdylib"] - -[dependencies] diff --git a/parrot_module/src/lib.rs b/parrot_module/src/lib.rs deleted file mode 100644 index 38148ae..0000000 --- a/parrot_module/src/lib.rs +++ /dev/null @@ -1,67 +0,0 @@ -use std::ffi::{self, CStr, CString}; - -#[repr(C)] -struct ModuleRequest<'req> { - headers_len: ffi::c_ulong, - headers: &'req [[*const ffi::c_char; 2]], - body_len: ffi::c_ulong, - body: *const u8, -} - -#[repr(C)] -struct ModuleResponse { - status: ffi::c_ushort, - headers_len: ffi::c_ulong, - headers: &'static [[*const ffi::c_char; 2]], - body_len: ffi::c_ulong, - body: *const u8, -} - -const HEADERS: &'static [[*const ffi::c_char; 2]] = - &[[c"Content-Type".as_ptr(), c"text/plain".as_ptr()]]; - -#[unsafe(no_mangle)] -extern "C" fn cgi_handle(req: *const ModuleRequest) -> *const ModuleResponse { - let mut ret = String::new(); - - // unwrap is bad here - let reqref = unsafe { req.as_ref() }.unwrap(); - - for idx in 0..reqref.headers_len { - let kvarr = reqref.headers[idx as usize]; - let k = unsafe { CStr::from_ptr(kvarr[0]) }.to_string_lossy(); - let v = unsafe { CStr::from_ptr(kvarr[1]) }.to_string_lossy(); - - // While debugging I removed the format!() because it was SIGSEGVing - // as the String it allocated freed, and now that this is here it can stay - ret.push_str(&k); - ret.push(':'); - ret.push(' '); - ret.push_str(&v); - ret.push('\n') - } - - let body = CString::new(ret).unwrap(); - - let resp = ModuleResponse { - status: 200, - headers_len: 1, - headers: HEADERS, - body_len: body.as_bytes().len() as u64, - body: body.into_raw() as *const u8, - }; - - let boxed = Box::new(resp); - Box::<ModuleResponse>::into_raw(boxed) -} - -#[unsafe(no_mangle)] -extern "C" fn cgi_cleanup(req: *const ModuleResponse) { - // from_raw what we need to here so that these get dropped - let boxed = unsafe { Box::from_raw(req as *mut ModuleResponse) }; - let body = unsafe { CString::from_raw(boxed.body as *mut i8) }; - - // Explicitly calling drop here to feel good about myself - drop(body); - drop(boxed); -} diff --git a/smalldog/Cargo.toml b/smalldog/Cargo.toml deleted file mode 100644 index 6ae5cc4..0000000 --- a/smalldog/Cargo.toml +++ /dev/null @@ -1,6 +0,0 @@ -[package] -name = "smalldog" -version = "0.1.0" -edition = "2024" - -[dependencies] diff --git a/smalldog/src/lib.rs b/smalldog/src/lib.rs deleted file mode 100644 index e139d30..0000000 --- a/smalldog/src/lib.rs +++ /dev/null @@ -1,133 +0,0 @@ -use core::ffi; -use std::{borrow::Cow, ffi::CStr, ptr, sync::Mutex}; - -#[repr(C)] -pub struct ModuleRequest<'req> { - pub headers_len: ffi::c_ulong, - pub headers: &'req [[*const ffi::c_char; 2]], - pub body_len: ffi::c_ulong, - pub body: *const u8, -} - -pub struct Request<'req> { - headers: Vec<(Cow<'req, str>, Cow<'req, str>)>, - body: Option<&'req [u8]>, -} - -impl<'req> Request<'req> { - pub fn from_mod_request(request: *const ModuleRequest<'req>) -> Self { - // SAFTEY: corgi will never give us a null pointer - let reqref = unsafe { request.as_ref() }.unwrap(); - - let mut headers = vec![]; - for idx in 0..reqref.headers_len as usize { - let kvarr = reqref.headers[idx as usize]; - let k = unsafe { CStr::from_ptr(kvarr[0]) }.to_string_lossy(); - let v = unsafe { CStr::from_ptr(kvarr[1]) }.to_string_lossy(); - headers.push((k, v)); - } - - let body = if reqref.body.is_null() { - None - } else { - Some(unsafe { std::slice::from_raw_parts(reqref.body, reqref.body_len as usize) }) - }; - - Self { headers, body } - } - - pub fn header(&self, key: &str) -> Option<&str> { - for (hkey, hval) in &self.headers { - if hkey == key { - return Some(hval); - } - } - - None - } - - pub fn headers(&self) -> &[(Cow<str>, Cow<str>)] { - &self.headers - } - - pub fn body(&self) -> Option<&[u8]> { - self.body - } -} - -#[repr(C)] -pub struct ModuleResponse { - pub status: ffi::c_ushort, - pub headers_len: ffi::c_ulong, - pub headers: &'static [[*const ffi::c_char; 2]], - pub body_len: ffi::c_ulong, - pub body: *const u8, -} - -const HEADERS_LEN: usize = 64; -static mut HEADERS: [[*const ffi::c_char; 2]; HEADERS_LEN] = [[ptr::null(), ptr::null()]; 64]; -static RESPONSE: Mutex<Option<Response>> = Mutex::new(None); - -pub struct Response { - headers: Vec<(Cow<'static, CStr>, Cow<'static, CStr>)>, - body: Vec<u8>, -} - -impl Response { - pub fn new() -> Self { - Self { - headers: vec![], - body: vec![], - } - } - - pub fn into_mod_response(self, status: u16) -> *const ModuleResponse { - let mut lock = RESPONSE.lock().unwrap(); - *lock = Some(self); - - let this = lock.as_mut().unwrap(); - - for (idx, (key, value)) in this.headers.iter().enumerate().take(HEADERS_LEN) { - unsafe { - HEADERS[idx][0] = key.as_ptr(); - HEADERS[idx][1] = value.as_ptr(); - } - } - - let headers_len = this.headers.len().min(HEADERS_LEN) as u64; - let boxed = Box::new(ModuleResponse { - status, - headers_len, - headers: unsafe { &HEADERS[..headers_len as usize] }, - body_len: this.body.len() as u64, - body: this.body.as_ptr(), - }); - - Box::<ModuleResponse>::into_raw(boxed) - } - - pub fn header<K: Into<Cow<'static, CStr>>, V: Into<Cow<'static, CStr>>>( - &mut self, - key: K, - value: V, - ) -> &mut Self { - self.headers.push((key.into(), value.into())); - self - } - - pub fn body(&mut self, vec: Vec<u8>) -> &mut Self { - self.body = vec; - self - } - - pub fn cleanup(response: *const ModuleResponse) { - let mut lock = RESPONSE.lock().unwrap(); - match lock.take() { - Some(response) => drop(response), - None => (), - } - - let boxed = unsafe { Box::from_raw(response as *mut ModuleResponse) }; - drop(boxed); - } -} diff --git a/stats_module/Cargo.toml b/stats/Cargo.toml index 95b6d41..c7e36b1 100644 --- a/stats_module/Cargo.toml +++ b/stats/Cargo.toml @@ -1,13 +1,8 @@ [package] -name = "stats_module" -version = "0.1.0" +name = "corgi-stats" +version = "1.1.0" edition = "2024" -[lib] -name = "stats" -crate-type = ["cdylib"] - [dependencies] rusqlite = { version = "0.34.0", features = ["bundled", "time"] } time = "0.3.40" -smalldog = { path = "../smalldog" } diff --git a/stats/src/favicon.gif b/stats/src/favicon.gif new file mode 100644 index 0000000..371343b --- /dev/null +++ b/stats/src/favicon.gif Binary files differdiff --git a/stats/src/main.rs b/stats/src/main.rs new file mode 100644 index 0000000..5b83bdd --- /dev/null +++ b/stats/src/main.rs @@ -0,0 +1,145 @@ +use std::{io::Write, time::Instant}; + +use rusqlite::{Connection, params}; +use time::{Duration, OffsetDateTime}; + +const TOP_TEN_ALL_TIME: &str = + "SELECT request_count, agent FROM agents ORDER BY request_count DESC LIMIT 10;"; + +const LAST_FIFTEEN_MINUTES: &str = "\ + SELECT count(ephemeral_requests.request_id) as request_count, agents.agent + FROM ephemeral_requests + INNER JOIN requests ON ephemeral_requests.request_id = requests.id + INNER JOIN agents ON requests.agent_id = agents.id + WHERE ephemeral_requests.timestamp > ?1 + GROUP BY requests.agent_id;"; + +const STYLE: &'static str = include_str!("style.css"); +const FAVICON: &'static [u8] = include_bytes!("favicon.gif"); + +fn main() { + let Some(path) = std::env::var("PATH_INFO").ok() else { + error_and_die(500, "no path provided"); + }; + + match path.as_ref() { + "/stats/favicon.gif" => { + println!("Content-Type: image/png\n"); + std::io::stdout().write_all(FAVICON).unwrap(); + std::process::exit(1); + } + "/stats" | "/stats/" => (), + _ => error_and_die(404, "not found"), + } + + let db_path = std::env::var("CORGI_STATS_DB").ok(); + let db = if let Some(path) = db_path { + if let Ok(db) = Connection::open(path) { + db + } else { + error_and_die(500, "failed to open database"); + } + } else { + error_and_die(500, "database key not set"); + }; + + let now = OffsetDateTime::now_utc(); + let fifteen_ago = now - Duration::minutes(15); + + let start = Instant::now(); + let mut prepared = db.prepare(LAST_FIFTEEN_MINUTES).unwrap(); + let mut agents: Vec<(usize, String)> = prepared + .query_map(params![fifteen_ago], |row| Ok((row.get(0)?, row.get(1)?))) + .unwrap() + .map(|r| r.unwrap()) + .collect(); + + agents.sort_by(|a, b| a.0.cmp(&b.0).reverse()); + + let mut prepared = db.prepare(TOP_TEN_ALL_TIME).unwrap(); + let highest_five: Vec<(usize, String)> = prepared + .query_map(params![], |row| Ok((row.get(0)?, row.get(1)?))) + .unwrap() + .map(|r| r.unwrap()) + .collect(); + let sum_highest_five = highest_five.iter().fold(0, |acc, (count, _)| acc + count); + let elapsed = start.elapsed(); + + println!("Content-Type: text/html\n"); + println!("<html>"); + #[rustfmt::skip] + println!("<head>\n\ + <title>corgi stats</title>\n\ + <style>\n{STYLE}\n</style>\n\ + <link rel='icon' type='image/gif' href='/stats/favicon.gif' />\n\ + </head>"); + + println!("<body>"); + println!("<h1>Corgi Stats :)</h1>"); + println!("<p>generated in {}ms</p>", elapsed.as_millis()); + + #[rustfmt::skip] + println!("<table>\n\ + <thead>\n\ + \t<tr>\n\ + \t\t<th scope='row' colspan='3' class='ttitle'>Requests for the last 15 minutes</th>\n\ + \t</tr>\n\ + \t<tr>\n\ + \t\t<th># Req.</th>\n\ + \t\t<th>Req/min</th>\n\ + \t\t<th>Agent</th>\n\ + \t</tr>\n\ + </thead>\n<tbody>"); + + for (count, agent) in &agents { + #[rustfmt::skip] + println!("<tr>\n\ + \t<td>{count}</td>\n\ + \t<td>{:.1}</td>\n\ + \t<td>{agent}</td>\n\ + </tr>", + *count as f32 / 15.0); + } + + println!("</tbody>\n</table>"); + + #[rustfmt::skip] + println!("<table>\n\ + <thead>\n\ + \t<tr>\n\ + \t\t<th scope='row' colspan='3' class='ttitle'>Top 10 User Agents All Time</th>\n\ + \t</tr>\n\ + \t<tr>\n\ + \t\t<th># Req.</th>\n\ + \t\t<th>% of 10</th>\n\ + \t\t<th>Agent</th>\n\ + \t</tr>\n\ + </thead>\n<tbody>"); + + // Finish what we started + println!("</body>\n</html>"); + + for (count, agent) in highest_five { + #[rustfmt::skip] + println!("<tr>\n\ + \t<td>{count}</td>\n\ + \t<td>{:.1}</td>\n\ + \t<td>{agent}</td>\n\ + </tr>", + (count as f32 / sum_highest_five as f32) * 100.0); + } +} + +fn error_and_die<S: Into<String>>(status: u16, msg: S) -> ! { + println!("Status: {status}"); + println!("Content-Type: text/html\n"); + println!("<html>"); + println!("\t<head><title>{status}</title></head>"); + println!("\t<body style='width: 20rem; padding: 0px; margin: 2rem;'>"); + println!("\t\t<h1>{status}</h1>"); + println!("\t\t<hr/>"); + println!("\t\t<p>{}</p>", msg.into()); + println!("\t</body>\n</html>"); + + std::process::exit(0) +} diff --git a/stats/src/style.css b/stats/src/style.css new file mode 100644 index 0000000..5b3995d --- /dev/null +++ b/stats/src/style.css @@ -0,0 +1,37 @@ +h1 { + font-family: sans-serif; +} + +table { + border-collapse: collapse; + border: 2px solid gray; + margin: 1rem 0px; +} + +tr { + background-color: white; +} + +tbody>tr:nth-of-type(odd) { + background-color: cornsilk; +} + +th, +td { + border: 1px solid darkslateblue; + padding: 2px 3px; +} + +thead th { + background-color: darksalmon; +} + +th { + text-align: left; + padding: 4px 6px; + white-space: nowrap; +} + +th.ttitle { + text-align: center; +} \ No newline at end of file diff --git a/stats_module/src/lib.rs b/stats_module/src/lib.rs deleted file mode 100644 index a56d22d..0000000 --- a/stats_module/src/lib.rs +++ /dev/null @@ -1,59 +0,0 @@ -use rusqlite::{Connection, params}; -use smalldog::{ModuleRequest, ModuleResponse, Request, Response}; -use time::{Duration, OffsetDateTime}; - -#[unsafe(no_mangle)] -extern "C" fn cgi_handle(req: *const ModuleRequest) -> *const ModuleResponse { - let mut response = Response::new(); - let mut body = String::new(); - - let request = Request::from_mod_request(req); - let db = if let Some(path) = request.header("CORGI_STATS_DB") { - Connection::open(path).unwrap() - } else { - return make_error(500, "could not open stats database"); - }; - - let now = OffsetDateTime::now_utc(); - let fifteen_ago = now - Duration::minutes(15); - - let query = "SELECT count(requests.id) AS request_count, agents.agent FROM requests \ - INNER JOIN agents ON requests.agent_id = agents.id \ - WHERE requests.timestamp > ?1 \ - GROUP BY requests.agent_id;"; - - let mut prepared = db.prepare(query).unwrap(); - let mut agents: Vec<(usize, String)> = prepared - .query_map(params![fifteen_ago], |row| Ok((row.get(0)?, row.get(1)?))) - .unwrap() - .map(|r| r.unwrap()) - .collect(); - - agents.sort_by(|a, b| a.0.cmp(&b.0).reverse()); - - body.push_str("<p>In the last fifteen minutes:<br/><code><pre>"); - body.push_str("total | req/m | agent\n"); - for (count, agent) in &agents { - body.push_str(&format!( - "{count:<5} | {:<5.1} | {agent}\n", - *count as f32 / 15.0 - )); - } - body.push_str("</pre></code></p>"); - response.body(body.into_bytes()); - - response.into_mod_response(200) -} - -fn make_error<S: AsRef<str>>(code: u16, msg: S) -> *const ModuleResponse { - let mut response = Response::new(); - response.header(c"Content-Length", c"text/html"); - response.body(msg.as_ref().as_bytes().to_vec()); - - response.into_mod_response(code) -} - -#[unsafe(no_mangle)] -extern "C" fn cgi_cleanup(response: *const ModuleResponse) { - Response::cleanup(response); -} |