diff options
-rw-r--r-- | .rustfmt.toml | 1 | ||||
-rw-r--r-- | Cargo.lock | 36 | ||||
-rw-r--r-- | Cargo.toml | 2 | ||||
-rw-r--r-- | README.md | 6 | ||||
-rw-r--r-- | TODO.md | 16 | ||||
-rw-r--r-- | corgi/Cargo.toml | 1 | ||||
-rw-r--r-- | corgi/src/caller.rs | 109 | ||||
-rw-r--r-- | corgi/src/main.rs | 249 | ||||
-rw-r--r-- | corgi/src/settings.rs | 82 | ||||
-rw-r--r-- | corgi/src/stats.rs | 8 | ||||
-rw-r--r-- | corgi/src/util.rs | 66 | ||||
-rw-r--r-- | parrot_module/Cargo.toml | 10 | ||||
-rw-r--r-- | parrot_module/src/lib.rs | 67 | ||||
-rw-r--r-- | smalldog/Cargo.toml | 6 | ||||
-rw-r--r-- | smalldog/src/lib.rs | 133 | ||||
-rw-r--r-- | stats/Cargo.toml (renamed from stats_module/Cargo.toml) | 7 | ||||
-rw-r--r-- | stats/src/favicon.gif | bin | 0 -> 138 bytes | |||
-rw-r--r-- | stats/src/main.rs | 152 | ||||
-rw-r--r-- | stats/src/style.css | 37 | ||||
-rw-r--r-- | stats_module/src/lib.rs | 59 |
20 files changed, 454 insertions, 593 deletions
diff --git a/.rustfmt.toml b/.rustfmt.toml index 4639247..218e203 100644 --- a/.rustfmt.toml +++ b/.rustfmt.toml @@ -1,2 +1 @@ hard_tabs = true -chain_width = 100 diff --git a/Cargo.lock b/Cargo.lock index 6795892..eefd0c8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -89,7 +89,6 @@ dependencies = [ "http-body-util", "hyper", "hyper-util", - "libloading", "regex-lite", "rusqlite", "sha2", @@ -97,6 +96,14 @@ dependencies = [ ] [[package]] +name = "corgi-stats" +version = "0.1.0" +dependencies = [ + "rusqlite", + "time", +] + +[[package]] name = "cpufeatures" version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -318,16 +325,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6" [[package]] -name = "libloading" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34" -dependencies = [ - "cfg-if", - "windows-targets", -] - -[[package]] name = "libsqlite3-sys" version = "0.32.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -384,10 +381,6 @@ name = "parrot" version = "0.1.0" [[package]] -name = "parrot_module" -version = "0.1.0" - -[[package]] name = "pin-project-lite" version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -503,10 +496,6 @@ dependencies = [ ] [[package]] -name = "smalldog" -version = "0.1.0" - -[[package]] name = "smallvec" version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -523,15 +512,6 @@ dependencies = [ ] [[package]] -name = "stats_module" -version = "0.1.0" -dependencies = [ - "rusqlite", - "smalldog", - "time", -] - -[[package]] name = "syn" version = "2.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" diff --git a/Cargo.toml b/Cargo.toml index 47bbc10..9568d9e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [workspace] -members = ["corgi", "parrot", "parrot_module", "smalldog", "stats_module"] +members = ["corgi", "parrot", "stats"] resolver = "3" # use this profile like this: diff --git a/README.md b/README.md index c312ee6..ffa270b 100644 --- a/README.md +++ b/README.md @@ -22,8 +22,8 @@ Script <path-to-cgi-script> See [corgi.conf](corgi.conf) for the configuration I use with my cgit instance. -Scripts are tried in order, looking for one that matches. If none match, -the first script is ran. +Scripts are tried in order, looking for one that has a matching regex. If a +script does not have a regex, it is selected. If none match, 404 is returned. Sets the following environmental variables for the CGI script, many following [RFC 3875][rfc]: - **`GATEWAY_INTERFACE`** to the fixed value `CGI/1.1` @@ -44,4 +44,4 @@ They will be uppercased and hyphens replaced with underscores. Any environmental variable may be overridden if it is set in the configuration file, except the `CONTENT_LENGTH` envar. -[rfc]: https://datatracker.ietf.org/doc/html/rfc3875 \ No newline at end of file +[rfc]: https://datatracker.ietf.org/doc/html/rfc3875 diff --git a/TODO.md b/TODO.md index a439640..d7c4aa2 100644 --- a/TODO.md +++ b/TODO.md @@ -10,11 +10,6 @@ a `catch_unwind`? there are complications with the first one, but perhaps we can make it work? -(5) Crate For The Module System - so we don't have to copy the weird structs. and also so maybe we - can make it safer? A C header, too, maybe? that just define the - struct. and really some kind of documentation, probably. - DONE :) DONE :) DONE :) DONE :) DONE :) DONE :) DONE :) DONE :) DONE :) ======================================================================= @@ -25,4 +20,13 @@ DONE :) DONE :) DONE :) DONE :) DONE :) DONE :) DONE :) DONE :) DONE :) (2) Support Matching CGI Based On Path The other requirement for git-http-backend, which needs to - trigger on the regex `/.+/(info/refs|git-upload-pack)` \ No newline at end of file + trigger on the regex `/.+/(info/refs|git-upload-pack)` + +(5) Crate For The Module System + so we don't have to copy the weird structs. and also so maybe we + can make it safer? A C header, too, maybe? that just define the + struct. and really some kind of documentation, probably. + + EWONTFIX + lol. module system was ripped out because it turns out there is no + good way to handle more than one request concurrently, it seems. diff --git a/corgi/Cargo.toml b/corgi/Cargo.toml index ea5fe74..f3f6bda 100644 --- a/corgi/Cargo.toml +++ b/corgi/Cargo.toml @@ -13,7 +13,6 @@ edition = "2024" base64 = "0.22.1" http-body-util = "0.1.3" hyper-util = { version = "0.1.10", features = ["tokio"] } -libloading = "0.8.6" regex-lite = "0.1.6" rusqlite = { version = "0.34.0", features = ["bundled"] } sha2 = "0.10.8" diff --git a/corgi/src/caller.rs b/corgi/src/caller.rs index a8b04c0..29be5ca 100644 --- a/corgi/src/caller.rs +++ b/corgi/src/caller.rs @@ -1,19 +1,8 @@ -use std::{ - ffi::{self, CString}, - io::Write, - net::IpAddr, - process::Stdio, - ptr, - str::FromStr, -}; +use std::{net::IpAddr, process::Stdio}; -use tokio::{ - io::AsyncWriteExt, - process::Command, - sync::oneshot::{self, Receiver, Sender}, -}; +use tokio::{io::AsyncWriteExt, process::Command}; -use crate::{Script, ScriptKind}; +use crate::Script; pub struct HttpRequest { pub content_type: String, @@ -62,12 +51,6 @@ impl HttpRequest { } pub async fn call_and_parse_cgi(script: Script, http: HttpRequest) -> CgiResponse { - if script.kind != ScriptKind::Executable { - eprintln!("Somehow made it to executable path with module script"); - eprintln!("Script: {}", script.name); - panic!("TODO: recover") - } - let mut cmd = Command::new(&script.filename); // Set env specified in the conf. Be sure we do this after we @@ -168,89 +151,3 @@ pub struct CgiResponse { /// CGI response body pub body: Option<Vec<u8>>, } - -#[repr(C)] -struct ModuleRequest { - headers_len: ffi::c_ulong, - headers: *const [[*const ffi::c_char; 2]], - body_len: ffi::c_ulong, - body: *const u8, -} - -#[repr(C)] -struct ModuleResponse { - status: ffi::c_ushort, - headers_len: ffi::c_ulong, - headers: &'static [[*const ffi::c_char; 2]], - body_len: ffi::c_ulong, - body: *const u8, -} - -type HandleFn = unsafe extern "C" fn(*const ModuleRequest) -> *const ModuleResponse; -type CleanupFn = unsafe extern "C" fn(*const ModuleResponse); - -pub async fn call_and_parse_module(script: Script, req: HttpRequest) -> CgiResponse { - let (tx, rx) = oneshot::channel(); - std::thread::spawn(move || unsafe { module_thread(script, req, tx) }); - - rx.await.unwrap() -} - -unsafe fn module_thread(script: Script, req: HttpRequest, tx: Sender<CgiResponse>) { - let env: Vec<(String, String)> = req - .build_kv() - .into_iter() - .chain(req.http_headers.into_iter()) - .chain(script.env.into_iter()) - .collect(); - - let mut headers_owned = vec![]; - for (k, v) in env { - headers_owned.push([ - CString::from_str(k.as_str()).unwrap(), - CString::from_str(v.as_str()).unwrap(), - ]); - } - - let headers: Vec<[*const ffi::c_char; 2]> = - headers_owned.iter().map(|kvarr| [kvarr[0].as_ptr(), kvarr[1].as_ptr()]).collect(); - - let modreq = ModuleRequest { - headers_len: headers.len() as u64, - headers: &headers[..] as *const [[*const ffi::c_char; 2]], - body_len: req.body.as_ref().map(|v| v.len()).unwrap_or(0) as u64, - body: req.body.as_ref().map(|v| v.as_ptr()).unwrap_or(ptr::null()), - }; - - let mut cgi = CgiResponse { - status: 200, - headers: vec![], - body: None, - }; - - unsafe { - let lib = libloading::Library::new(script.filename).unwrap(); - let handle: libloading::Symbol<HandleFn> = lib.get(b"cgi_handle").unwrap(); - let free: libloading::Symbol<CleanupFn> = lib.get(b"cgi_cleanup").unwrap(); - - let response = handle((&modreq) as *const ModuleRequest); - let response_ref = response.as_ref().unwrap(); - - for idx in 0..response_ref.headers_len { - let kvarr = response_ref.headers[idx as usize]; - let k = ffi::CStr::from_ptr(kvarr[0]).to_string_lossy(); - let v = ffi::CStr::from_ptr(kvarr[1]).to_string_lossy(); - cgi.headers.push((k.as_bytes().to_vec(), v.as_bytes().to_vec())); - } - - let maybe_body: Option<Vec<u8>> = response_ref - .body - .as_ref() - .map(|b| std::slice::from_raw_parts(b, response_ref.body_len as usize).to_vec()); - cgi.body = maybe_body; - - free(response); - }; - - tx.send(cgi).unwrap() -} diff --git a/corgi/src/main.rs b/corgi/src/main.rs index 6a3c528..1772d68 100644 --- a/corgi/src/main.rs +++ b/corgi/src/main.rs @@ -1,119 +1,34 @@ -use std::{ - net::{IpAddr, SocketAddr}, - path::PathBuf, - pin::Pin, - process::Stdio, - sync::Arc, - time::Instant, -}; +use core::fmt; +use std::{net::SocketAddr, pin::Pin, sync::Arc}; use caller::HttpRequest; -use confindent::{Confindent, Value, ValueParseError}; use http_body_util::{BodyExt, Full}; use hyper::{ HeaderMap, Request, Response, StatusCode, body::{Bytes, Incoming}, - header::HeaderValue, server::conn::http1, service::Service, }; use hyper_util::rt::TokioIo; -use regex_lite::Regex; +use settings::{Script, Settings}; use stats::Stats; -use tokio::{io::AsyncWriteExt, net::TcpListener, process::Command, runtime::Runtime}; +use tokio::{net::TcpListener, runtime::Runtime}; +use util::owned_header; mod caller; +mod settings; mod stats; - -#[derive(Clone, Debug)] -pub struct Settings { - port: u16, - scripts: Vec<Script>, -} - -#[derive(Clone, Debug, PartialEq)] -pub enum ScriptKind { - Executable, - Object, -} - -#[derive(Clone, Debug)] -pub struct Script { - name: String, - kind: ScriptKind, - regex: Option<Regex>, - filename: String, - env: Vec<(String, String)>, -} - -const CONF_DEFAULT: &str = "/etc/corgi.conf"; +mod util; fn main() { - let conf_path = std::env::args().nth(1).unwrap_or(String::from(CONF_DEFAULT)); - let conf = Confindent::from_file(conf_path).expect("failed to open conf"); - - let mut settings = Settings { - port: 26744, - scripts: conf.children("Script").into_iter().map(parse_script_conf).collect(), - }; - - if let Some(server) = conf.child("Server") { - match server.child_parse("Port") { - Err(ValueParseError::NoValue) => (), - Err(err) => { - eprintln!("Server.Port is malformed: {err}"); - std::process::exit(1); - } - Ok(port) => settings.port = port, - } - } - - let stats = Stats::new(PathBuf::from( - conf.get("Server/StatsDb").unwrap().to_owned(), - )); + let settings = Settings::get(); + let stats = Stats::new(&settings.stats_path); stats.create_tables(); let rt = Runtime::new().unwrap(); rt.block_on(async { run(settings, stats).await }); } -fn parse_script_conf(conf: &Value) -> Script { - let name = conf.value_owned().expect("Missing value for 'Script' key"); - let filename = conf.child_owned("Path").expect("Missing 'Path' key"); - let environment = conf.child("Environment"); - let env = environment - .map(|e| e.values().map(|v| (v.key_owned(), v.value_owned().unwrap())).collect()); - - let regex = match conf.get("Match/Regex") { - None => None, - Some(restr) => match Regex::new(restr) { - Err(err) => { - eprintln!("Failed to compile regex: {restr}\nerror: {err}"); - std::process::exit(1); - } - Ok(re) => Some(re), - }, - }; - - let kind = match conf.get("Type") { - None => ScriptKind::Executable, - Some("executable") => ScriptKind::Executable, - Some("object") => ScriptKind::Object, - Some(kind) => { - eprintln!("'{kind}' is not a valid script type"); - std::process::exit(1) - } - }; - - Script { - name, - kind, - regex, - filename, - env: env.unwrap_or_default(), - } -} - // We have tokio::main at home :) async fn run(settings: Settings, stats: Stats) { let addr = SocketAddr::from(([0, 0, 0, 0], settings.port)); @@ -164,61 +79,48 @@ impl Svc { caddr: SocketAddr, req: Request<Incoming>, ) -> Response<Full<Bytes>> { - let start = Instant::now(); + match Self::handle_fallible(settings, stats, caddr, req).await { + Err(re) => re.into_response(), + Ok(response) => response, + } + } + async fn handle_fallible( + settings: Settings, + stats: Arc<Stats>, + caddr: SocketAddr, + req: Request<Incoming>, + ) -> Result<Response<Full<Bytes>>, RuntimeError> { // Collect things we need from the request before we eat it's body let method = req.method().as_str().to_ascii_uppercase(); let version = req.version(); - let path = req.uri().path().to_owned(); - let query = req.uri().query().unwrap_or_default().to_owned(); - let headers = req.headers().clone(); - - let body = req.into_body().collect().await.unwrap().to_bytes().to_vec(); - let content_length = body.len(); - - let mut maybe_script = None; - for set_script in settings.scripts { - if let Some(regex) = set_script.regex.as_ref() { - if regex.is_match(&path) { - maybe_script = Some(set_script); - break; - } - } else { - maybe_script = Some(set_script); - break; - } - } - - let script = match maybe_script { - Some(script) => script, - None => { - eprintln!("path didn't match any script"); - panic!("TODO recover?"); - } - }; + let path = util::url_decode(req.uri().path(), false)?; + let query = req + .uri() + .query() + .map(|s| util::url_decode(s, false)) + .transpose()? + .unwrap_or_default(); - let content_type = headers - .get("content-type") - .map(|s| s.to_str().ok()) - .flatten() - .unwrap_or_default() - .to_owned(); + let script = Self::select_script(&settings, &path).ok_or(RuntimeError::NoScript)?; - let uagent = headers - .get("user-agent") - .map(|s| s.to_str().ok()) - .flatten() - .unwrap_or_default() - .to_owned(); + // Clone the headers and extract what we need + let headers = req.headers().clone(); + let content_type = owned_header(headers.get("content-type")).unwrap_or_default(); + let uagent = owned_header(headers.get("user-agent")).unwrap_or_default(); // Find the client address let client_addr = { - let x_forward = Self::parse_addr_from_header(headers.get("x-forwarded-for")); - let forward = Self::parse_addr_from_header(headers.get("forwarded-for")); + let x_forward = util::parse_from_header(headers.get("x-forwarded-for")); + let forward = util::parse_from_header(headers.get("forwarded-for")); forward.unwrap_or(x_forward.unwrap_or(caddr.ip())) }; + // Finally, get the body which consumes the request + let body = req.into_body().collect().await.unwrap().to_bytes().to_vec(); + let content_length = body.len(); + let server_name = headers .get("Host") .expect("no http host header set") @@ -239,16 +141,9 @@ impl Svc { body: if content_length > 0 { Some(body) } else { None }, }; - let start_cgi = Instant::now(); - let cgi_response = match script.kind { - ScriptKind::Executable => { - caller::call_and_parse_cgi(script.clone(), http_request).await - } - ScriptKind::Object => caller::call_and_parse_module(script.clone(), http_request).await, - }; - let cgi_time = start_cgi.elapsed(); - + let cgi_response = caller::call_and_parse_cgi(script.clone(), http_request).await; let status = StatusCode::from_u16(cgi_response.status).unwrap(); + let mut response = Response::builder().status(status); for (key, value) in cgi_response.headers { @@ -263,20 +158,32 @@ impl Svc { }; println!( - "served to [{client_addr}]\n\tscript: {}\n\tpath: {path}\n\tcgi took {}ms. total time {}ms\n\tUA: {uagent}", - &script.name, - cgi_time.as_millis(), - start.elapsed().as_millis() + "served to [{client_addr}]\n\tscript: {}\n\tpath: {path}\n\tUA: {uagent}", + &script.name ); stats.log_request(db_req); - let response_body = cgi_response.body.map(|v| Bytes::from(v)).unwrap_or(Bytes::new()); - response.body(Full::new(response_body)).unwrap() + let response_body = cgi_response + .body + .map(|v| Bytes::from(v)) + .unwrap_or(Bytes::new()); + + Ok(response.body(Full::new(response_body)).unwrap()) } - fn parse_addr_from_header(maybe_hval: Option<&HeaderValue>) -> Option<IpAddr> { - maybe_hval.map(|h| h.to_str().ok()).flatten().map(|s| s.parse().ok()).flatten() + fn select_script<'s>(settings: &'s Settings, path: &str) -> Option<&'s Script> { + for script in &settings.scripts { + if let Some(regex) = script.regex.as_ref() { + if regex.is_match(path) { + return Some(script); + } + } else { + return Some(script); + } + } + + None } fn build_http_vec(headers: HeaderMap) -> Vec<(String, String)> { @@ -312,13 +219,35 @@ impl Svc { } } -fn path_to_name(path: &str) -> String { - let mut ret = String::with_capacity(path.len()); - for ch in path.chars() { - match ch { - '/' => ret.push('-'), - ch => ret.push(ch), +fn status_page<D: fmt::Display>(status: u16, msg: D) -> Response<Full<Bytes>> { + let body_str = format!( + "<html>\n\ + \t<head><title>{status}</title></head>\n\ + \t<body style='width: 20rem; padding: 0px; margin: 2rem;'>\n\ + \t\t<h1>{status}</h1>\n\ + \t\t<hr/>\n\ + \t\t<p>{msg}</p>\n\ + \t</body>\n\ + </html>" + ); + + Response::builder() + .status(status) + .header("Content-Type", "text/html") + .body(Full::new(body_str.into())) + .unwrap() +} + +enum RuntimeError { + MalformedRequest, + NoScript, +} + +impl RuntimeError { + pub fn into_response(&self) -> Response<Full<Bytes>> { + match self { + Self::MalformedRequest => status_page(400, "bad request"), + Self::NoScript => status_page(404, "failed to route request"), } } - ret } diff --git a/corgi/src/settings.rs b/corgi/src/settings.rs new file mode 100644 index 0000000..ee701b0 --- /dev/null +++ b/corgi/src/settings.rs @@ -0,0 +1,82 @@ +use std::path::PathBuf; + +use confindent::{Confindent, Value, ValueParseError}; +use regex_lite::Regex; + +const CONF_DEFAULT: &str = "/etc/corgi.conf"; + +#[derive(Clone, Debug)] +pub struct Script { + pub name: String, + pub regex: Option<Regex>, + pub filename: String, + pub env: Vec<(String, String)>, +} + +#[derive(Clone, Debug)] +pub struct Settings { + pub port: u16, + pub scripts: Vec<Script>, + pub stats_path: PathBuf, +} + +impl Settings { + pub fn get() -> Self { + let conf_path = std::env::args() + .nth(1) + .unwrap_or(String::from(CONF_DEFAULT)); + let conf = Confindent::from_file(conf_path).expect("failed to open conf"); + + let mut settings = Settings { + port: 26744, + scripts: conf + .children("Script") + .into_iter() + .map(parse_script_conf) + .collect(), + stats_path: conf.get_parse("Server/StatsDb").unwrap(), + }; + + if let Some(server) = conf.child("Server") { + match server.child_parse("Port") { + Err(ValueParseError::NoValue) => (), + Err(err) => { + eprintln!("Server.Port is malformed: {err}"); + std::process::exit(1); + } + Ok(port) => settings.port = port, + } + } + + settings + } +} + +fn parse_script_conf(conf: &Value) -> Script { + let name = conf.value_owned().expect("Missing value for 'Script' key"); + let filename = conf.child_owned("Path").expect("Missing 'Path' key"); + let environment = conf.child("Environment"); + let env = environment.map(|e| { + e.values() + .map(|v| (v.key_owned(), v.value_owned().unwrap())) + .collect() + }); + + let regex = match conf.get("Match/Regex") { + None => None, + Some(restr) => match Regex::new(restr) { + Err(err) => { + eprintln!("Failed to compile regex: {restr}\nerror: {err}"); + std::process::exit(1); + } + Ok(re) => Some(re), + }, + }; + + Script { + name, + regex, + filename, + env: env.unwrap_or_default(), + } +} diff --git a/corgi/src/stats.rs b/corgi/src/stats.rs index 0e3b99a..bf9c1ec 100644 --- a/corgi/src/stats.rs +++ b/corgi/src/stats.rs @@ -1,8 +1,4 @@ -use std::{ - net::{IpAddr, SocketAddr}, - path::PathBuf, - sync::Mutex, -}; +use std::{net::IpAddr, path::Path, sync::Mutex}; use base64::{Engine, prelude::BASE64_STANDARD_NO_PAD}; use rusqlite::{Connection, OptionalExtension, params}; @@ -14,7 +10,7 @@ pub struct Stats { } impl Stats { - pub fn new(db_path: PathBuf) -> Self { + pub fn new(db_path: &Path) -> Self { Self { conn: Mutex::new(Connection::open(db_path).unwrap()), } diff --git a/corgi/src/util.rs b/corgi/src/util.rs new file mode 100644 index 0000000..727c8c7 --- /dev/null +++ b/corgi/src/util.rs @@ -0,0 +1,66 @@ +use std::str::FromStr; + +use hyper::header::HeaderValue; + +use crate::RuntimeError; + +// Ripped and modified from gennyble/mavourings query.rs +/// Decode a URL encoded string, optionally treating a plus, '+', as a space. If +/// the final string is not UTF8, RuntimeError::MalformedRequest is returned +pub fn url_decode(urlencoded: &str, plus_as_space: bool) -> Result<String, RuntimeError> { + let mut uncoded: Vec<u8> = Vec::with_capacity(urlencoded.len()); + + let mut chars = urlencoded.chars().peekable(); + loop { + let mut utf8_bytes = [0; 4]; + match chars.next() { + Some('+') => match plus_as_space { + true => uncoded.push(b' '), + false => uncoded.push(b'+'), + }, + Some('%') => match chars.peek() { + Some(c) if c.is_ascii_hexdigit() => { + let upper = chars.next().unwrap(); + + if let Some(lower) = chars.peek() { + if lower.is_ascii_hexdigit() { + let upper = upper.to_digit(16).unwrap(); + let lower = chars.next().unwrap().to_digit(16).unwrap(); + + uncoded.push(upper as u8 * 16 + lower as u8); + continue; + } + } + + uncoded.push(b'%'); + uncoded.extend_from_slice(upper.encode_utf8(&mut utf8_bytes).as_bytes()); + } + _ => { + uncoded.push(b'%'); + } + }, + Some(c) => { + uncoded.extend_from_slice(c.encode_utf8(&mut utf8_bytes).as_bytes()); + } + None => { + uncoded.shrink_to_fit(); + return String::from_utf8(uncoded).map_err(|_| RuntimeError::MalformedRequest); + } + } + } +} + +pub fn parse_from_header<T: FromStr>(maybe_hval: Option<&HeaderValue>) -> Option<T> { + maybe_hval + .map(|h| h.to_str().ok()) + .flatten() + .map(|s| s.parse().ok()) + .flatten() +} + +pub fn owned_header(maybe_hval: Option<&HeaderValue>) -> Option<String> { + maybe_hval + .map(|h| h.to_str().ok()) + .flatten() + .map(<_>::to_owned) +} diff --git a/parrot_module/Cargo.toml b/parrot_module/Cargo.toml deleted file mode 100644 index f4ced8e..0000000 --- a/parrot_module/Cargo.toml +++ /dev/null @@ -1,10 +0,0 @@ -[package] -name = "parrot_module" -version = "0.1.0" -edition = "2024" - -[lib] -name = "parrot" -crate-type = ["cdylib"] - -[dependencies] diff --git a/parrot_module/src/lib.rs b/parrot_module/src/lib.rs deleted file mode 100644 index 38148ae..0000000 --- a/parrot_module/src/lib.rs +++ /dev/null @@ -1,67 +0,0 @@ -use std::ffi::{self, CStr, CString}; - -#[repr(C)] -struct ModuleRequest<'req> { - headers_len: ffi::c_ulong, - headers: &'req [[*const ffi::c_char; 2]], - body_len: ffi::c_ulong, - body: *const u8, -} - -#[repr(C)] -struct ModuleResponse { - status: ffi::c_ushort, - headers_len: ffi::c_ulong, - headers: &'static [[*const ffi::c_char; 2]], - body_len: ffi::c_ulong, - body: *const u8, -} - -const HEADERS: &'static [[*const ffi::c_char; 2]] = - &[[c"Content-Type".as_ptr(), c"text/plain".as_ptr()]]; - -#[unsafe(no_mangle)] -extern "C" fn cgi_handle(req: *const ModuleRequest) -> *const ModuleResponse { - let mut ret = String::new(); - - // unwrap is bad here - let reqref = unsafe { req.as_ref() }.unwrap(); - - for idx in 0..reqref.headers_len { - let kvarr = reqref.headers[idx as usize]; - let k = unsafe { CStr::from_ptr(kvarr[0]) }.to_string_lossy(); - let v = unsafe { CStr::from_ptr(kvarr[1]) }.to_string_lossy(); - - // While debugging I removed the format!() because it was SIGSEGVing - // as the String it allocated freed, and now that this is here it can stay - ret.push_str(&k); - ret.push(':'); - ret.push(' '); - ret.push_str(&v); - ret.push('\n') - } - - let body = CString::new(ret).unwrap(); - - let resp = ModuleResponse { - status: 200, - headers_len: 1, - headers: HEADERS, - body_len: body.as_bytes().len() as u64, - body: body.into_raw() as *const u8, - }; - - let boxed = Box::new(resp); - Box::<ModuleResponse>::into_raw(boxed) -} - -#[unsafe(no_mangle)] -extern "C" fn cgi_cleanup(req: *const ModuleResponse) { - // from_raw what we need to here so that these get dropped - let boxed = unsafe { Box::from_raw(req as *mut ModuleResponse) }; - let body = unsafe { CString::from_raw(boxed.body as *mut i8) }; - - // Explicitly calling drop here to feel good about myself - drop(body); - drop(boxed); -} diff --git a/smalldog/Cargo.toml b/smalldog/Cargo.toml deleted file mode 100644 index 6ae5cc4..0000000 --- a/smalldog/Cargo.toml +++ /dev/null @@ -1,6 +0,0 @@ -[package] -name = "smalldog" -version = "0.1.0" -edition = "2024" - -[dependencies] diff --git a/smalldog/src/lib.rs b/smalldog/src/lib.rs deleted file mode 100644 index e139d30..0000000 --- a/smalldog/src/lib.rs +++ /dev/null @@ -1,133 +0,0 @@ -use core::ffi; -use std::{borrow::Cow, ffi::CStr, ptr, sync::Mutex}; - -#[repr(C)] -pub struct ModuleRequest<'req> { - pub headers_len: ffi::c_ulong, - pub headers: &'req [[*const ffi::c_char; 2]], - pub body_len: ffi::c_ulong, - pub body: *const u8, -} - -pub struct Request<'req> { - headers: Vec<(Cow<'req, str>, Cow<'req, str>)>, - body: Option<&'req [u8]>, -} - -impl<'req> Request<'req> { - pub fn from_mod_request(request: *const ModuleRequest<'req>) -> Self { - // SAFTEY: corgi will never give us a null pointer - let reqref = unsafe { request.as_ref() }.unwrap(); - - let mut headers = vec![]; - for idx in 0..reqref.headers_len as usize { - let kvarr = reqref.headers[idx as usize]; - let k = unsafe { CStr::from_ptr(kvarr[0]) }.to_string_lossy(); - let v = unsafe { CStr::from_ptr(kvarr[1]) }.to_string_lossy(); - headers.push((k, v)); - } - - let body = if reqref.body.is_null() { - None - } else { - Some(unsafe { std::slice::from_raw_parts(reqref.body, reqref.body_len as usize) }) - }; - - Self { headers, body } - } - - pub fn header(&self, key: &str) -> Option<&str> { - for (hkey, hval) in &self.headers { - if hkey == key { - return Some(hval); - } - } - - None - } - - pub fn headers(&self) -> &[(Cow<str>, Cow<str>)] { - &self.headers - } - - pub fn body(&self) -> Option<&[u8]> { - self.body - } -} - -#[repr(C)] -pub struct ModuleResponse { - pub status: ffi::c_ushort, - pub headers_len: ffi::c_ulong, - pub headers: &'static [[*const ffi::c_char; 2]], - pub body_len: ffi::c_ulong, - pub body: *const u8, -} - -const HEADERS_LEN: usize = 64; -static mut HEADERS: [[*const ffi::c_char; 2]; HEADERS_LEN] = [[ptr::null(), ptr::null()]; 64]; -static RESPONSE: Mutex<Option<Response>> = Mutex::new(None); - -pub struct Response { - headers: Vec<(Cow<'static, CStr>, Cow<'static, CStr>)>, - body: Vec<u8>, -} - -impl Response { - pub fn new() -> Self { - Self { - headers: vec![], - body: vec![], - } - } - - pub fn into_mod_response(self, status: u16) -> *const ModuleResponse { - let mut lock = RESPONSE.lock().unwrap(); - *lock = Some(self); - - let this = lock.as_mut().unwrap(); - - for (idx, (key, value)) in this.headers.iter().enumerate().take(HEADERS_LEN) { - unsafe { - HEADERS[idx][0] = key.as_ptr(); - HEADERS[idx][1] = value.as_ptr(); - } - } - - let headers_len = this.headers.len().min(HEADERS_LEN) as u64; - let boxed = Box::new(ModuleResponse { - status, - headers_len, - headers: unsafe { &HEADERS[..headers_len as usize] }, - body_len: this.body.len() as u64, - body: this.body.as_ptr(), - }); - - Box::<ModuleResponse>::into_raw(boxed) - } - - pub fn header<K: Into<Cow<'static, CStr>>, V: Into<Cow<'static, CStr>>>( - &mut self, - key: K, - value: V, - ) -> &mut Self { - self.headers.push((key.into(), value.into())); - self - } - - pub fn body(&mut self, vec: Vec<u8>) -> &mut Self { - self.body = vec; - self - } - - pub fn cleanup(response: *const ModuleResponse) { - let mut lock = RESPONSE.lock().unwrap(); - match lock.take() { - Some(response) => drop(response), - None => (), - } - - let boxed = unsafe { Box::from_raw(response as *mut ModuleResponse) }; - drop(boxed); - } -} diff --git a/stats_module/Cargo.toml b/stats/Cargo.toml index 95b6d41..a012b94 100644 --- a/stats_module/Cargo.toml +++ b/stats/Cargo.toml @@ -1,13 +1,8 @@ [package] -name = "stats_module" +name = "corgi-stats" version = "0.1.0" edition = "2024" -[lib] -name = "stats" -crate-type = ["cdylib"] - [dependencies] rusqlite = { version = "0.34.0", features = ["bundled", "time"] } time = "0.3.40" -smalldog = { path = "../smalldog" } diff --git a/stats/src/favicon.gif b/stats/src/favicon.gif new file mode 100644 index 0000000..371343b --- /dev/null +++ b/stats/src/favicon.gif Binary files differdiff --git a/stats/src/main.rs b/stats/src/main.rs new file mode 100644 index 0000000..4b57834 --- /dev/null +++ b/stats/src/main.rs @@ -0,0 +1,152 @@ +use std::{io::Write, time::Instant}; + +use rusqlite::{Connection, params}; +use time::{Duration, OffsetDateTime}; + +// Thank you, cat, for optimizing my query +const TOP_TEN_ALL_TIME: &str = "\ + SELECT reqs.cnt, agents.agent + FROM agents + JOIN ( + SELECT count(id) as cnt, agent_id + FROM requests + GROUP BY agent_id + ) reqs + ON reqs.agent_id=agents.id + ORDER BY reqs.cnt DESC LIMIT 10; +"; + +const STYLE: &'static str = include_str!("style.css"); +const FAVICON: &'static [u8] = include_bytes!("favicon.gif"); + +fn main() { + let Some(path) = std::env::var("PATH_INFO").ok() else { + error_and_die(500, "no path provided"); + }; + + match path.as_ref() { + "/stats/favicon.gif" => { + println!("Content-Type: image/png\n"); + std::io::stdout().write_all(FAVICON).unwrap(); + std::process::exit(1); + } + "/stats" | "/stats/" => (), + _ => error_and_die(404, "not found"), + } + + let db_path = std::env::var("CORGI_STATS_DB").ok(); + let db = if let Some(path) = db_path { + if let Ok(db) = Connection::open(path) { + db + } else { + error_and_die(500, "failed to open database"); + } + } else { + error_and_die(500, "database key not set"); + }; + + let now = OffsetDateTime::now_utc(); + let fifteen_ago = now - Duration::minutes(15); + + let query = "SELECT count(requests.id) AS request_count, agents.agent FROM requests \ + INNER JOIN agents ON requests.agent_id = agents.id \ + WHERE requests.timestamp > ?1 \ + GROUP BY requests.agent_id;"; + + let start = Instant::now(); + let mut prepared = db.prepare(query).unwrap(); + let mut agents: Vec<(usize, String)> = prepared + .query_map(params![fifteen_ago], |row| Ok((row.get(0)?, row.get(1)?))) + .unwrap() + .map(|r| r.unwrap()) + .collect(); + + agents.sort_by(|a, b| a.0.cmp(&b.0).reverse()); + + let mut prepared = db.prepare(TOP_TEN_ALL_TIME).unwrap(); + let highest_five: Vec<(usize, String)> = prepared + .query_map(params![], |row| Ok((row.get(0)?, row.get(1)?))) + .unwrap() + .map(|r| r.unwrap()) + .collect(); + let sum_highest_five = highest_five.iter().fold(0, |acc, (count, _)| acc + count); + let elapsed = start.elapsed(); + + println!("Content-Type: text/html\n"); + println!("<html>"); + #[rustfmt::skip] + println!("<head>\n\ + <title>corgi stats</title>\n\ + <style>\n{STYLE}\n</style>\n\ + <link rel='icon' type='image/gif' href='/stats/favicon.gif' />\n\ + </head>"); + + println!("<body>"); + println!("<h1>Corgi Stats :)</h1>"); + println!("<p>generated in {}ms</p>", elapsed.as_millis()); + + #[rustfmt::skip] + println!("<table>\n\ + <thead>\n\ + \t<tr>\n\ + \t\t<th scope='row' colspan='3' class='ttitle'>Requests for the last 15 minutes</th>\n\ + \t</tr>\n\ + \t<tr>\n\ + \t\t<th># Req.</th>\n\ + \t\t<th>Req/min</th>\n\ + \t\t<th>Agent</th>\n\ + \t</tr>\n\ + </thead>\n<tbody>"); + + for (count, agent) in &agents { + #[rustfmt::skip] + println!("<tr>\n\ + \t<td>{count}</td>\n\ + \t<td>{:.1}</td>\n\ + \t<td>{agent}</td>\n\ + </tr>", + *count as f32 / 15.0); + } + + println!("</tbody>\n</table>"); + + #[rustfmt::skip] + println!("<table>\n\ + <thead>\n\ + \t<tr>\n\ + \t\t<th scope='row' colspan='3' class='ttitle'>Top 10 User Agents All Time</th>\n\ + \t</tr>\n\ + \t<tr>\n\ + \t\t<th># Req.</th>\n\ + \t\t<th>Req/min</th>\n\ + \t\t<th>Agent</th>\n\ + \t</tr>\n\ + </thead>\n<tbody>"); + + // Finish what we started + println!("</body>\n</html>"); + + for (count, agent) in highest_five { + #[rustfmt::skip] + println!("<tr>\n\ + \t<td>{count}</td>\n\ + \t<td>{:.1}</td>\n\ + \t<td>{agent}</td>\n\ + </tr>", + (count as f32 / sum_highest_five as f32) * 100.0); + } +} + +fn error_and_die<S: Into<String>>(status: u16, msg: S) -> ! { + println!("Status: {status}"); + println!("Content-Type: text/html\n"); + println!("<html>"); + println!("\t<head><title>{status}</title></head>"); + println!("\t<body style='width: 20rem; padding: 0px; margin: 2rem;'>"); + println!("\t\t<h1>{status}</h1>"); + println!("\t\t<hr/>"); + println!("\t\t<p>{}</p>", msg.into()); + println!("\t</body>\n</html>"); + + std::process::exit(0); +} diff --git a/stats/src/style.css b/stats/src/style.css new file mode 100644 index 0000000..5b3995d --- /dev/null +++ b/stats/src/style.css @@ -0,0 +1,37 @@ +h1 { + font-family: sans-serif; +} + +table { + border-collapse: collapse; + border: 2px solid gray; + margin: 1rem 0px; +} + +tr { + background-color: white; +} + +tbody>tr:nth-of-type(odd) { + background-color: cornsilk; +} + +th, +td { + border: 1px solid darkslateblue; + padding: 2px 3px; +} + +thead th { + background-color: darksalmon; +} + +th { + text-align: left; + padding: 4px 6px; + white-space: nowrap; +} + +th.ttitle { + text-align: center; +} \ No newline at end of file diff --git a/stats_module/src/lib.rs b/stats_module/src/lib.rs deleted file mode 100644 index a56d22d..0000000 --- a/stats_module/src/lib.rs +++ /dev/null @@ -1,59 +0,0 @@ -use rusqlite::{Connection, params}; -use smalldog::{ModuleRequest, ModuleResponse, Request, Response}; -use time::{Duration, OffsetDateTime}; - -#[unsafe(no_mangle)] -extern "C" fn cgi_handle(req: *const ModuleRequest) -> *const ModuleResponse { - let mut response = Response::new(); - let mut body = String::new(); - - let request = Request::from_mod_request(req); - let db = if let Some(path) = request.header("CORGI_STATS_DB") { - Connection::open(path).unwrap() - } else { - return make_error(500, "could not open stats database"); - }; - - let now = OffsetDateTime::now_utc(); - let fifteen_ago = now - Duration::minutes(15); - - let query = "SELECT count(requests.id) AS request_count, agents.agent FROM requests \ - INNER JOIN agents ON requests.agent_id = agents.id \ - WHERE requests.timestamp > ?1 \ - GROUP BY requests.agent_id;"; - - let mut prepared = db.prepare(query).unwrap(); - let mut agents: Vec<(usize, String)> = prepared - .query_map(params![fifteen_ago], |row| Ok((row.get(0)?, row.get(1)?))) - .unwrap() - .map(|r| r.unwrap()) - .collect(); - - agents.sort_by(|a, b| a.0.cmp(&b.0).reverse()); - - body.push_str("<p>In the last fifteen minutes:<br/><code><pre>"); - body.push_str("total | req/m | agent\n"); - for (count, agent) in &agents { - body.push_str(&format!( - "{count:<5} | {:<5.1} | {agent}\n", - *count as f32 / 15.0 - )); - } - body.push_str("</pre></code></p>"); - response.body(body.into_bytes()); - - response.into_mod_response(200) -} - -fn make_error<S: AsRef<str>>(code: u16, msg: S) -> *const ModuleResponse { - let mut response = Response::new(); - response.header(c"Content-Length", c"text/html"); - response.body(msg.as_ref().as_bytes().to_vec()); - - response.into_mod_response(code) -} - -#[unsafe(no_mangle)] -extern "C" fn cgi_cleanup(response: *const ModuleResponse) { - Response::cleanup(response); -} |