diff options
author | gennyble <gen@nyble.dev> | 2025-03-16 13:49:12 -0500 |
---|---|---|
committer | gennyble <gen@nyble.dev> | 2025-03-16 13:49:12 -0500 |
commit | aad1583d8b5ae737bb424c461925bc69119c36e9 (patch) | |
tree | 8fb341a019fe167270d086f4fa075965c727f506 | |
parent | 17ca7fb2fbb9b6202dcdb99ed296dfb97822cc19 (diff) | |
download | corgi-aad1583d8b5ae737bb424c461925bc69119c36e9.tar.gz corgi-aad1583d8b5ae737bb424c461925bc69119c36e9.zip |
Refactor and prepare for object loading
-rw-r--r-- | corgi/Cargo.toml | 9 | ||||
-rw-r--r-- | corgi/src/caller.rs | 145 | ||||
-rw-r--r-- | corgi/src/main.rs | 178 |
3 files changed, 204 insertions, 128 deletions
diff --git a/corgi/Cargo.toml b/corgi/Cargo.toml index 5847211..0abd9df 100644 --- a/corgi/Cargo.toml +++ b/corgi/Cargo.toml @@ -1,17 +1,22 @@ [package] name = "corgi" +description = "a simple CGI server" +authors = ["gennyble <gen@nyble.dev>"] +license = "ISC" +repository = "https://git.nyble.dev/corgi/about" +readme = "../README.md" + version = "1.0.0" edition = "2024" [dependencies] http-body-util = "0.1.3" hyper-util = { version = "0.1.10", features = ["tokio"] } +libloading = "0.8.6" regex-lite = "0.1.6" [dependencies.confindent] version = "2.2.1" -git = "https://github.com/gennyble/confindent" -branch = "v2" [dependencies.tokio] version = "1.44.0" diff --git a/corgi/src/caller.rs b/corgi/src/caller.rs new file mode 100644 index 0000000..1803ccf --- /dev/null +++ b/corgi/src/caller.rs @@ -0,0 +1,145 @@ +use std::{net::IpAddr, process::Stdio}; + +use tokio::{io::AsyncWriteExt, process::Command}; + +use crate::{Script, ScriptKind}; + +pub struct HttpRequest { + pub content_type: String, + // gateway_interface = "CGI/1.1" + pub path_info: String, + pub query_string: String, + pub remote_addr: IpAddr, + pub request_method: String, + pub script_name: String, + pub server_name: String, + pub server_port: u16, + pub server_protocol: String, + // server_version = Self::SERVER_VERSION + pub http_headers: Vec<(String, String)>, + pub body: Option<Vec<u8>>, +} + +impl HttpRequest { + pub const GATEWAY_INTERFACE: &str = "CGI/1.1"; + const SERVER_SOFTWARE: &'static str = + concat!(env!("CARGO_PKG_NAME"), '/', env!("CARGO_PKG_VERSION")); +} + +pub async fn call_and_parse_cgi(script: Script, http: HttpRequest) -> CgiResponse { + if script.kind != ScriptKind::Executable { + eprintln!("Somehow made it to executable path with module script"); + eprintln!("Script: {}", script.name); + panic!("TODO: recover") + } + + let mut cmd = Command::new(&script.filename); + cmd.env("CONTENT_TYPE", http.content_type) + .env("GATEWAY_INTERFACE", HttpRequest::GATEWAY_INTERFACE) + .env("PATH_INFO", http.path_info) + .env("QUERY_STRING", http.query_string) + .env("REMOTE_ADDR", http.remote_addr.to_string()) + .env("REQUEST_METHOD", http.request_method) + .env("SCRIPT_NAME", http.script_name) + .env("SERVER_NAME", http.server_name) + .env("SERVER_PORT", http.server_port.to_string()) + .env("SERVER_PROTOCOL", http.server_protocol) + .env("SERVER_SOFTWARE", HttpRequest::SERVER_SOFTWARE); + + http.http_headers.into_iter().for_each(|(key, val)| { + cmd.env(key, val); + }); + + // Set env specified in the conf. Be sure we do this after we + // set the HTTP headers as to overwrite any we might want + script.env.iter().for_each(|(key, val)| { + cmd.env(key.to_ascii_uppercase(), val); + }); + + let cmd = cmd.stdout(Stdio::piped()).stderr(Stdio::piped()); + let output = if let Some(bytes) = http.body { + cmd.env("CONTENT_LENGTH", bytes.len().to_string()); + let mut child = cmd.stdin(Stdio::piped()).spawn().unwrap(); + + let mut cmd_stdin = child.stdin.take().unwrap(); + cmd_stdin.write_all(&bytes).await.unwrap(); + + // we might not need the explicit flush here, stdin doesn't seem + // to require it, but there used to be a BufWriter here instead + // and if you drop without a flush the buffered contents are lost, + // so it stays because i am traumatized or something. + cmd_stdin.flush().await.unwrap(); + drop(cmd_stdin); + + child.wait_with_output().await.unwrap() + } else { + cmd.spawn().unwrap().wait_with_output().await.unwrap() + }; + + parse_cgi_response(&output.stdout) +} + +fn parse_cgi_response(stdout: &[u8]) -> CgiResponse { + let mut response = CgiResponse { + // Default status code is 200 per RFC + status: 200, + headers: vec![], + body: None, + }; + + let mut curr = stdout; + loop { + // Find the newline to know where this header ends + let nl = curr.iter().position(|b| *b == b'\n').expect("no nl in header"); + let line = &curr[..nl]; + + // Find the colon to separate the key from the value + let colon = line.iter().position(|b| *b == b':').expect("no colon in header"); + let key = &line[..colon]; + let mut value = &line[colon + 1..]; + + if value[0] == b' ' { + value = &value[1..]; + } + if value[value.len().saturating_sub(1)] == b'\r' { + value = &value[..value.len().saturating_sub(1)]; + } + + response.headers.push((key.to_vec(), value.to_vec())); + + // Is this header a status line? + let key_string = String::from_utf8_lossy(key); + if key_string == "Status" { + let value_string = String::from_utf8_lossy(value); + if let Some((raw_code, _raw_msg)) = value_string.trim().split_once(' ') { + let code: u16 = raw_code.parse().unwrap(); + response.status = code; + } + } + + // Body next? + let next_nl = curr[nl + 1] == b'\n'; + let next_crlf = curr[nl + 1] == b'\r' && curr[nl + 2] == b'\n'; + if next_nl || next_crlf { + let offset = if next_nl { 2 } else { 3 }; + let body = &curr[nl + offset..]; + if body.len() > 0 { + response.body = Some(body.to_vec()); + } + + return response; + } + + // Move past the newline + curr = &curr[nl + 1..]; + } +} + +pub struct CgiResponse { + /// The Status header of the CGI response + pub status: u16, + /// Headers except "Status" + pub headers: Vec<(Vec<u8>, Vec<u8>)>, + /// CGI response body + pub body: Option<Vec<u8>>, +} diff --git a/corgi/src/main.rs b/corgi/src/main.rs index aa7bf4a..0338d0e 100644 --- a/corgi/src/main.rs +++ b/corgi/src/main.rs @@ -5,6 +5,7 @@ use std::{ time::Instant, }; +use caller::HttpRequest; use confindent::{Confindent, Value, ValueParseError}; use http_body_util::{BodyExt, Full}; use hyper::{ @@ -18,15 +19,24 @@ use hyper_util::rt::TokioIo; use regex_lite::Regex; use tokio::{io::AsyncWriteExt, net::TcpListener, process::Command, runtime::Runtime}; +mod caller; + #[derive(Clone, Debug)] pub struct Settings { port: u16, scripts: Vec<Script>, } +#[derive(Clone, Debug, PartialEq)] +pub enum ScriptKind { + Executable, + Object, +} + #[derive(Clone, Debug)] pub struct Script { name: String, + kind: ScriptKind, regex: Option<Regex>, filename: String, env: Vec<(String, String)>, @@ -76,8 +86,19 @@ fn parse_script_conf(conf: &Value) -> Script { }, }; + let kind = match conf.get("Type") { + None => ScriptKind::Executable, + Some("executable") => ScriptKind::Executable, + Some("object") => ScriptKind::Object, + Some(kind) => { + eprintln!("'{kind}' is not a valid script type"); + std::process::exit(1) + } + }; + Script { name, + kind, regex, filename, env: env.unwrap_or_default(), @@ -138,22 +159,31 @@ impl Svc { let path = req.uri().path().to_owned(); let query = req.uri().query().unwrap_or_default().to_owned(); let headers = req.headers().clone(); - let body = req.into_body().collect().await.unwrap().to_bytes(); - let content_length = body.len(); - let mut script = settings.scripts[0].clone(); + let body = req.into_body().collect().await.unwrap().to_bytes().to_vec(); + let content_length = body.len(); + let mut maybe_script = None; for set_script in settings.scripts { if let Some(regex) = set_script.regex.as_ref() { if regex.is_match(&path) { - script = set_script; + maybe_script = Some(set_script); break; } } else { - script = set_script; + maybe_script = Some(set_script); + break; } } + let script = match maybe_script { + Some(script) => script, + None => { + eprintln!("path didn't match any script"); + panic!("TODO recover?"); + } + }; + let content_type = headers .get("content-type") .map(|s| s.to_str().ok()) @@ -182,42 +212,22 @@ impl Svc { .to_str() .expect("failed to decode http host as string"); - let mut cmd = Command::new(&script.filename); - cmd.env("CONTENT_TYPE", content_type) - .env("GATEWAY_INTERFACE", "CGI/1.1") - .env("PATH_INFO", &path) - .env("QUERY_STRING", query) - .env("REMOTE_ADDR", client_addr.to_string()) - .env("REQUEST_METHOD", method) - .env("SCRIPT_NAME", script.filename) - .env("SERVER_NAME", server_name) - .env("SERVER_PORT", settings.port.to_string()) - .env("SERVER_PROTOCOL", format!("{:?}", version)) - .env("SERVER_SOFTWARE", Self::SERVER_SOFTWARE); - - if content_length > 0 { - cmd.env("CONTENT_LENGTH", content_length.to_string()); - } - - // Set env associated with the HTTP request headers - Self::set_http_env(headers, &mut cmd); - - // Set env specified in the conf. Be sure we do this after we - // set the HTTP headers as to overwrite any we might want - for (key, value) in &script.env { - cmd.env(key.to_ascii_uppercase(), value); - } - - let debugcgi = script.name == "git-backend"; - - let cgibody = if content_length > 0 { - Some(&body) - } else { - None + let http_request = HttpRequest { + content_type, + path_info: path.clone(), + query_string: query, + remote_addr: client_addr, + request_method: method, + script_name: script.filename.to_owned(), + server_name: server_name.to_owned(), + server_port: settings.port, + server_protocol: format!("{:?}", version), + http_headers: Self::build_http_vec(headers), + body: if content_length > 0 { Some(body) } else { None }, }; let start_cgi = Instant::now(); - let cgi_response = Self::call_and_parse_cgi(cmd, cgibody).await; + let cgi_response = caller::call_and_parse_cgi(script.clone(), http_request).await; let cgi_time = start_cgi.elapsed(); let status = StatusCode::from_u16(cgi_response.status).unwrap(); @@ -242,10 +252,9 @@ impl Svc { maybe_hval.map(|h| h.to_str().ok()).flatten().map(|s| s.parse().ok()).flatten() } - const SERVER_SOFTWARE: &'static str = - concat!(env!("CARGO_PKG_NAME"), '/', env!("CARGO_PKG_VERSION")); + fn build_http_vec(headers: HeaderMap) -> Vec<(String, String)> { + let mut ret = vec![]; - fn set_http_env(headers: HeaderMap, cmd: &mut Command) { for (key, value) in headers.iter() { let key_str = key.as_str(); @@ -264,101 +273,18 @@ impl Svc { match value.to_str() { Ok(val_str) => { - cmd.env(key_upper, val_str); + ret.push((key_upper, val_str.to_owned())); } Err(err) => { eprintln!("value for header {key_str} is not a string: {err}") } } } - } - - async fn call_and_parse_cgi(mut cmd: Command, body: Option<&Bytes>) -> CgiResponse { - let mut response = CgiResponse { - // Default status code is 200 per RFC - status: 200, - headers: vec![], - body: None, - }; - - let cmd = cmd.stdout(Stdio::piped()).stderr(Stdio::piped()); - let output = if let Some(bytes) = body { - let mut child = cmd.stdin(Stdio::piped()).spawn().unwrap(); - let mut cmd_stdin = child.stdin.take().unwrap(); - cmd_stdin.write_all(bytes).await.unwrap(); - - // we might not need the explicit flush here, stdin doesn't seem - // to require it, but there used to be a BufWriter here instead - // and if you drop without a flush the buffered contents are lost, - // so it stays because i am traumatized or something. - cmd_stdin.flush().await.unwrap(); - drop(cmd_stdin); - - child.wait_with_output().await.unwrap() - } else { - cmd.spawn().unwrap().wait_with_output().await.unwrap() - }; - - let response_raw = output.stdout; - let mut curr = response_raw.as_slice(); - loop { - // Find the newline to know where this header ends - let nl = curr.iter().position(|b| *b == b'\n').expect("no nl in header"); - let line = &curr[..nl]; - - // Find the colon to separate the key from the value - let colon = line.iter().position(|b| *b == b':').expect("no colon in header"); - let key = &line[..colon]; - let mut value = &line[colon + 1..]; - - if value[0] == b' ' { - value = &value[1..]; - } - if value[value.len().saturating_sub(1)] == b'\r' { - value = &value[..value.len().saturating_sub(1)]; - } - - response.headers.push((key.to_vec(), value.to_vec())); - - // Is this header a status line? - let key_string = String::from_utf8_lossy(key); - if key_string == "Status" { - let value_string = String::from_utf8_lossy(value); - if let Some((raw_code, _raw_msg)) = value_string.trim().split_once(' ') { - let code: u16 = raw_code.parse().unwrap(); - response.status = code; - } - } - - // Body next? - let next_nl = curr[nl + 1] == b'\n'; - let next_crlf = curr[nl + 1] == b'\r' && curr[nl + 2] == b'\n'; - if next_nl || next_crlf { - let offset = if next_nl { 2 } else { 3 }; - let body = &curr[nl + offset..]; - if body.len() > 0 { - response.body = Some(body.to_vec()); - } - - return response; - } - - // Move past the newline - curr = &curr[nl + 1..]; - } + ret } } -struct CgiResponse { - /// The Status header of the CGI response - status: u16, - /// Headers except "Status" - headers: Vec<(Vec<u8>, Vec<u8>)>, - /// CGI response body - body: Option<Vec<u8>>, -} - fn path_to_name(path: &str) -> String { let mut ret = String::with_capacity(path.len()); for ch in path.chars() { |