about summary refs log tree commit diff
diff options
context:
space:
mode:
authorgennyble <gen@nyble.dev>2025-03-16 13:49:12 -0500
committergennyble <gen@nyble.dev>2025-03-16 13:49:12 -0500
commitaad1583d8b5ae737bb424c461925bc69119c36e9 (patch)
tree8fb341a019fe167270d086f4fa075965c727f506
parent17ca7fb2fbb9b6202dcdb99ed296dfb97822cc19 (diff)
downloadcorgi-aad1583d8b5ae737bb424c461925bc69119c36e9.tar.gz
corgi-aad1583d8b5ae737bb424c461925bc69119c36e9.zip
Refactor and prepare for object loading
-rw-r--r--corgi/Cargo.toml9
-rw-r--r--corgi/src/caller.rs145
-rw-r--r--corgi/src/main.rs178
3 files changed, 204 insertions, 128 deletions
diff --git a/corgi/Cargo.toml b/corgi/Cargo.toml
index 5847211..0abd9df 100644
--- a/corgi/Cargo.toml
+++ b/corgi/Cargo.toml
@@ -1,17 +1,22 @@
 [package]
 name = "corgi"
+description = "a simple CGI server"
+authors = ["gennyble <gen@nyble.dev>"]
+license = "ISC"
+repository = "https://git.nyble.dev/corgi/about"
+readme = "../README.md"
+
 version = "1.0.0"
 edition = "2024"
 
 [dependencies]
 http-body-util = "0.1.3"
 hyper-util = { version = "0.1.10", features = ["tokio"] }
+libloading = "0.8.6"
 regex-lite = "0.1.6"
 
 [dependencies.confindent]
 version = "2.2.1"
-git = "https://github.com/gennyble/confindent"
-branch = "v2"
 
 [dependencies.tokio]
 version = "1.44.0"
diff --git a/corgi/src/caller.rs b/corgi/src/caller.rs
new file mode 100644
index 0000000..1803ccf
--- /dev/null
+++ b/corgi/src/caller.rs
@@ -0,0 +1,145 @@
+use std::{net::IpAddr, process::Stdio};
+
+use tokio::{io::AsyncWriteExt, process::Command};
+
+use crate::{Script, ScriptKind};
+
+pub struct HttpRequest {
+	pub content_type: String,
+	// gateway_interface = "CGI/1.1"
+	pub path_info: String,
+	pub query_string: String,
+	pub remote_addr: IpAddr,
+	pub request_method: String,
+	pub script_name: String,
+	pub server_name: String,
+	pub server_port: u16,
+	pub server_protocol: String,
+	// server_version = Self::SERVER_VERSION
+	pub http_headers: Vec<(String, String)>,
+	pub body: Option<Vec<u8>>,
+}
+
+impl HttpRequest {
+	pub const GATEWAY_INTERFACE: &str = "CGI/1.1";
+	const SERVER_SOFTWARE: &'static str =
+		concat!(env!("CARGO_PKG_NAME"), '/', env!("CARGO_PKG_VERSION"));
+}
+
+pub async fn call_and_parse_cgi(script: Script, http: HttpRequest) -> CgiResponse {
+	if script.kind != ScriptKind::Executable {
+		eprintln!("Somehow made it to executable path with module script");
+		eprintln!("Script: {}", script.name);
+		panic!("TODO: recover")
+	}
+
+	let mut cmd = Command::new(&script.filename);
+	cmd.env("CONTENT_TYPE", http.content_type)
+		.env("GATEWAY_INTERFACE", HttpRequest::GATEWAY_INTERFACE)
+		.env("PATH_INFO", http.path_info)
+		.env("QUERY_STRING", http.query_string)
+		.env("REMOTE_ADDR", http.remote_addr.to_string())
+		.env("REQUEST_METHOD", http.request_method)
+		.env("SCRIPT_NAME", http.script_name)
+		.env("SERVER_NAME", http.server_name)
+		.env("SERVER_PORT", http.server_port.to_string())
+		.env("SERVER_PROTOCOL", http.server_protocol)
+		.env("SERVER_SOFTWARE", HttpRequest::SERVER_SOFTWARE);
+
+	http.http_headers.into_iter().for_each(|(key, val)| {
+		cmd.env(key, val);
+	});
+
+	// Set env specified in the conf. Be sure we do this after we
+	// set the HTTP headers as to overwrite any we might want
+	script.env.iter().for_each(|(key, val)| {
+		cmd.env(key.to_ascii_uppercase(), val);
+	});
+
+	let cmd = cmd.stdout(Stdio::piped()).stderr(Stdio::piped());
+	let output = if let Some(bytes) = http.body {
+		cmd.env("CONTENT_LENGTH", bytes.len().to_string());
+		let mut child = cmd.stdin(Stdio::piped()).spawn().unwrap();
+
+		let mut cmd_stdin = child.stdin.take().unwrap();
+		cmd_stdin.write_all(&bytes).await.unwrap();
+
+		// we might not need the explicit flush here, stdin doesn't seem
+		// to require it, but there used to be a BufWriter here instead
+		// and if you drop without a flush the buffered contents are lost,
+		// so it stays because i am traumatized or something.
+		cmd_stdin.flush().await.unwrap();
+		drop(cmd_stdin);
+
+		child.wait_with_output().await.unwrap()
+	} else {
+		cmd.spawn().unwrap().wait_with_output().await.unwrap()
+	};
+
+	parse_cgi_response(&output.stdout)
+}
+
+fn parse_cgi_response(stdout: &[u8]) -> CgiResponse {
+	let mut response = CgiResponse {
+		// Default status code is 200 per RFC
+		status: 200,
+		headers: vec![],
+		body: None,
+	};
+
+	let mut curr = stdout;
+	loop {
+		// Find the newline to know where this header ends
+		let nl = curr.iter().position(|b| *b == b'\n').expect("no nl in header");
+		let line = &curr[..nl];
+
+		// Find the colon to separate the key from the value
+		let colon = line.iter().position(|b| *b == b':').expect("no colon in header");
+		let key = &line[..colon];
+		let mut value = &line[colon + 1..];
+
+		if value[0] == b' ' {
+			value = &value[1..];
+		}
+		if value[value.len().saturating_sub(1)] == b'\r' {
+			value = &value[..value.len().saturating_sub(1)];
+		}
+
+		response.headers.push((key.to_vec(), value.to_vec()));
+
+		// Is this header a status line?
+		let key_string = String::from_utf8_lossy(key);
+		if key_string == "Status" {
+			let value_string = String::from_utf8_lossy(value);
+			if let Some((raw_code, _raw_msg)) = value_string.trim().split_once(' ') {
+				let code: u16 = raw_code.parse().unwrap();
+				response.status = code;
+			}
+		}
+
+		// Body next?
+		let next_nl = curr[nl + 1] == b'\n';
+		let next_crlf = curr[nl + 1] == b'\r' && curr[nl + 2] == b'\n';
+		if next_nl || next_crlf {
+			let offset = if next_nl { 2 } else { 3 };
+			let body = &curr[nl + offset..];
+			if body.len() > 0 {
+				response.body = Some(body.to_vec());
+			}
+
+			return response;
+		}
+
+		// Move past the newline
+		curr = &curr[nl + 1..];
+	}
+}
+
+pub struct CgiResponse {
+	/// The Status header of the CGI response
+	pub status: u16,
+	/// Headers except "Status"
+	pub headers: Vec<(Vec<u8>, Vec<u8>)>,
+	/// CGI response body
+	pub body: Option<Vec<u8>>,
+}
diff --git a/corgi/src/main.rs b/corgi/src/main.rs
index aa7bf4a..0338d0e 100644
--- a/corgi/src/main.rs
+++ b/corgi/src/main.rs
@@ -5,6 +5,7 @@ use std::{
 	time::Instant,
 };
 
+use caller::HttpRequest;
 use confindent::{Confindent, Value, ValueParseError};
 use http_body_util::{BodyExt, Full};
 use hyper::{
@@ -18,15 +19,24 @@ use hyper_util::rt::TokioIo;
 use regex_lite::Regex;
 use tokio::{io::AsyncWriteExt, net::TcpListener, process::Command, runtime::Runtime};
 
+mod caller;
+
 #[derive(Clone, Debug)]
 pub struct Settings {
 	port: u16,
 	scripts: Vec<Script>,
 }
 
+#[derive(Clone, Debug, PartialEq)]
+pub enum ScriptKind {
+	Executable,
+	Object,
+}
+
 #[derive(Clone, Debug)]
 pub struct Script {
 	name: String,
+	kind: ScriptKind,
 	regex: Option<Regex>,
 	filename: String,
 	env: Vec<(String, String)>,
@@ -76,8 +86,19 @@ fn parse_script_conf(conf: &Value) -> Script {
 		},
 	};
 
+	let kind = match conf.get("Type") {
+		None => ScriptKind::Executable,
+		Some("executable") => ScriptKind::Executable,
+		Some("object") => ScriptKind::Object,
+		Some(kind) => {
+			eprintln!("'{kind}' is not a valid script type");
+			std::process::exit(1)
+		}
+	};
+
 	Script {
 		name,
+		kind,
 		regex,
 		filename,
 		env: env.unwrap_or_default(),
@@ -138,22 +159,31 @@ impl Svc {
 		let path = req.uri().path().to_owned();
 		let query = req.uri().query().unwrap_or_default().to_owned();
 		let headers = req.headers().clone();
-		let body = req.into_body().collect().await.unwrap().to_bytes();
-		let content_length = body.len();
 
-		let mut script = settings.scripts[0].clone();
+		let body = req.into_body().collect().await.unwrap().to_bytes().to_vec();
+		let content_length = body.len();
 
+		let mut maybe_script = None;
 		for set_script in settings.scripts {
 			if let Some(regex) = set_script.regex.as_ref() {
 				if regex.is_match(&path) {
-					script = set_script;
+					maybe_script = Some(set_script);
 					break;
 				}
 			} else {
-				script = set_script;
+				maybe_script = Some(set_script);
+				break;
 			}
 		}
 
+		let script = match maybe_script {
+			Some(script) => script,
+			None => {
+				eprintln!("path didn't match any script");
+				panic!("TODO recover?");
+			}
+		};
+
 		let content_type = headers
 			.get("content-type")
 			.map(|s| s.to_str().ok())
@@ -182,42 +212,22 @@ impl Svc {
 			.to_str()
 			.expect("failed to decode http host as string");
 
-		let mut cmd = Command::new(&script.filename);
-		cmd.env("CONTENT_TYPE", content_type)
-			.env("GATEWAY_INTERFACE", "CGI/1.1")
-			.env("PATH_INFO", &path)
-			.env("QUERY_STRING", query)
-			.env("REMOTE_ADDR", client_addr.to_string())
-			.env("REQUEST_METHOD", method)
-			.env("SCRIPT_NAME", script.filename)
-			.env("SERVER_NAME", server_name)
-			.env("SERVER_PORT", settings.port.to_string())
-			.env("SERVER_PROTOCOL", format!("{:?}", version))
-			.env("SERVER_SOFTWARE", Self::SERVER_SOFTWARE);
-
-		if content_length > 0 {
-			cmd.env("CONTENT_LENGTH", content_length.to_string());
-		}
-
-		// Set env associated with the HTTP request headers
-		Self::set_http_env(headers, &mut cmd);
-
-		// Set env specified in the conf. Be sure we do this after we
-		// set the HTTP headers as to overwrite any we might want
-		for (key, value) in &script.env {
-			cmd.env(key.to_ascii_uppercase(), value);
-		}
-
-		let debugcgi = script.name == "git-backend";
-
-		let cgibody = if content_length > 0 {
-			Some(&body)
-		} else {
-			None
+		let http_request = HttpRequest {
+			content_type,
+			path_info: path.clone(),
+			query_string: query,
+			remote_addr: client_addr,
+			request_method: method,
+			script_name: script.filename.to_owned(),
+			server_name: server_name.to_owned(),
+			server_port: settings.port,
+			server_protocol: format!("{:?}", version),
+			http_headers: Self::build_http_vec(headers),
+			body: if content_length > 0 { Some(body) } else { None },
 		};
 
 		let start_cgi = Instant::now();
-		let cgi_response = Self::call_and_parse_cgi(cmd, cgibody).await;
+		let cgi_response = caller::call_and_parse_cgi(script.clone(), http_request).await;
 		let cgi_time = start_cgi.elapsed();
 
 		let status = StatusCode::from_u16(cgi_response.status).unwrap();
@@ -242,10 +252,9 @@ impl Svc {
 		maybe_hval.map(|h| h.to_str().ok()).flatten().map(|s| s.parse().ok()).flatten()
 	}
 
-	const SERVER_SOFTWARE: &'static str =
-		concat!(env!("CARGO_PKG_NAME"), '/', env!("CARGO_PKG_VERSION"));
+	fn build_http_vec(headers: HeaderMap) -> Vec<(String, String)> {
+		let mut ret = vec![];
 
-	fn set_http_env(headers: HeaderMap, cmd: &mut Command) {
 		for (key, value) in headers.iter() {
 			let key_str = key.as_str();
 
@@ -264,101 +273,18 @@ impl Svc {
 
 			match value.to_str() {
 				Ok(val_str) => {
-					cmd.env(key_upper, val_str);
+					ret.push((key_upper, val_str.to_owned()));
 				}
 				Err(err) => {
 					eprintln!("value for header {key_str} is not a string: {err}")
 				}
 			}
 		}
-	}
-
-	async fn call_and_parse_cgi(mut cmd: Command, body: Option<&Bytes>) -> CgiResponse {
-		let mut response = CgiResponse {
-			// Default status code is 200 per RFC
-			status: 200,
-			headers: vec![],
-			body: None,
-		};
-
-		let cmd = cmd.stdout(Stdio::piped()).stderr(Stdio::piped());
-		let output = if let Some(bytes) = body {
-			let mut child = cmd.stdin(Stdio::piped()).spawn().unwrap();
 
-			let mut cmd_stdin = child.stdin.take().unwrap();
-			cmd_stdin.write_all(bytes).await.unwrap();
-
-			// we might not need the explicit flush here, stdin doesn't seem
-			// to require it, but there used to be a BufWriter here instead
-			// and if you drop without a flush the buffered contents are lost,
-			// so it stays because i am traumatized or something.
-			cmd_stdin.flush().await.unwrap();
-			drop(cmd_stdin);
-
-			child.wait_with_output().await.unwrap()
-		} else {
-			cmd.spawn().unwrap().wait_with_output().await.unwrap()
-		};
-
-		let response_raw = output.stdout;
-		let mut curr = response_raw.as_slice();
-		loop {
-			// Find the newline to know where this header ends
-			let nl = curr.iter().position(|b| *b == b'\n').expect("no nl in header");
-			let line = &curr[..nl];
-
-			// Find the colon to separate the key from the value
-			let colon = line.iter().position(|b| *b == b':').expect("no colon in header");
-			let key = &line[..colon];
-			let mut value = &line[colon + 1..];
-
-			if value[0] == b' ' {
-				value = &value[1..];
-			}
-			if value[value.len().saturating_sub(1)] == b'\r' {
-				value = &value[..value.len().saturating_sub(1)];
-			}
-
-			response.headers.push((key.to_vec(), value.to_vec()));
-
-			// Is this header a status line?
-			let key_string = String::from_utf8_lossy(key);
-			if key_string == "Status" {
-				let value_string = String::from_utf8_lossy(value);
-				if let Some((raw_code, _raw_msg)) = value_string.trim().split_once(' ') {
-					let code: u16 = raw_code.parse().unwrap();
-					response.status = code;
-				}
-			}
-
-			// Body next?
-			let next_nl = curr[nl + 1] == b'\n';
-			let next_crlf = curr[nl + 1] == b'\r' && curr[nl + 2] == b'\n';
-			if next_nl || next_crlf {
-				let offset = if next_nl { 2 } else { 3 };
-				let body = &curr[nl + offset..];
-				if body.len() > 0 {
-					response.body = Some(body.to_vec());
-				}
-
-				return response;
-			}
-
-			// Move past the newline
-			curr = &curr[nl + 1..];
-		}
+		ret
 	}
 }
 
-struct CgiResponse {
-	/// The Status header of the CGI response
-	status: u16,
-	/// Headers except "Status"
-	headers: Vec<(Vec<u8>, Vec<u8>)>,
-	/// CGI response body
-	body: Option<Vec<u8>>,
-}
-
 fn path_to_name(path: &str) -> String {
 	let mut ret = String::with_capacity(path.len());
 	for ch in path.chars() {