diff --git a/Cargo.lock b/Cargo.lock index cfa40fa..4320739 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,16 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "aead" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d122413f284cf2d62fb1b7db97e02edb8cda96d769b16e443a4f6195e35662b0" +dependencies = [ + "crypto-common", + "generic-array", +] + [[package]] name = "aho-corasick" version = "1.1.3" @@ -62,9 +72,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.89" +version = "1.0.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86fdf8605db99b54d3cd748a44c6d04df638eb5dafb219b135d0149bd0db01f6" +checksum = "c1fd03a028ef38ba2276dce7e33fcd6369c158a1bca17946c4b1b701891c1ff7" [[package]] name = "arrayref" @@ -90,6 +100,12 @@ version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + [[package]] name = "base64ct" version = "1.6.0" @@ -155,6 +171,17 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "cipher" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" +dependencies = [ + "crypto-common", + "inout", + "zeroize", +] + [[package]] name = "clap" version = "4.5.20" @@ -179,9 +206,9 @@ dependencies = [ [[package]] name = "clap_complete" -version = "4.5.35" +version = "4.5.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07a13ab5b8cb13dbe35e68b83f6c12f9293b2f601797b71bc9f23befdb329feb" +checksum = "d9647a559c112175f17cf724dc72d3645680a883c58481332779192b0d8e7a01" dependencies = [ "clap", ] @@ -195,7 +222,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.90", ] [[package]] @@ -248,9 +275,39 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" dependencies = [ "generic-array", + "rand_core 0.6.4", "typenum", ] +[[package]] +name = "crypto_box" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16182b4f39a82ec8a6851155cc4c0cda3065bb1db33651726a29e1951de0f009" +dependencies = [ + "aead", + "crypto_secretbox", + "curve25519-dalek", + "salsa20", + "subtle", + "zeroize", +] + +[[package]] +name = "crypto_secretbox" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d6cf87adf719ddf43a805e92c6870a531aedda35ff640442cbaf8674e141e1" +dependencies = [ + "aead", + "cipher", + "generic-array", + "poly1305", + "salsa20", + "subtle", + "zeroize", +] + [[package]] name = "curve25519-dalek" version = "4.1.3" @@ -275,7 +332,7 @@ checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.90", ] [[package]] @@ -409,12 +466,12 @@ dependencies = [ [[package]] name = "errno" -version = "0.3.9" +version = "0.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" +checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -463,12 +520,13 @@ checksum = "8f5f3913fa0bfe7ee1fd8248b6b9f42a5af4b9d65ec2dd2c3c26132b950ecfc2" [[package]] name = "generic-array" -version = "0.14.4" +version = "0.14.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "501466ecc8a30d1d3b7fc9229b122b2ce8ed6e9d9223f1138d4babb253e51817" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" dependencies = [ "typenum", "version_check", + "zeroize", ] [[package]] @@ -525,6 +583,15 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" +[[package]] +name = "inout" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0c10553d664a4d0bcff9f4215d0aac67a639cc68ef660840afe309b807bc9f5" +dependencies = [ + "generic-array", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.1" @@ -533,9 +600,21 @@ checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" [[package]] name = "itoa" -version = "1.0.11" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" + +[[package]] +name = "json_dotpath" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" +checksum = "dbdcfef3cf5591f0cef62da413ae795e3d1f5a00936ccec0b2071499a32efd1a" +dependencies = [ + "serde", + "serde_derive", + "serde_json", + "thiserror", +] [[package]] name = "ketos" @@ -570,9 +649,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.159" +version = "0.2.168" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "561d97a539a36e26a9a5fad1ea11a3039a67714694aaa379433e580854bc3dc5" +checksum = "5aaeb2981e0606ca11d79718f8bb01164f1d6ed75080182d3abf017e6d244b6d" [[package]] name = "linefeed" @@ -732,6 +811,12 @@ version = "1.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" +[[package]] +name = "opaque-debug" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381" + [[package]] name = "option-ext" version = "0.2.0" @@ -786,6 +871,17 @@ dependencies = [ "spki", ] +[[package]] +name = "poly1305" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8159bd90725d2df49889a078b54f4f79e87f1f8a8444194cdca81d38f5393abf" +dependencies = [ + "cpufeatures", + "opaque-debug", + "universal-hash", +] + [[package]] name = "ppv-lite86" version = "0.2.10" @@ -794,9 +890,9 @@ checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857" [[package]] name = "proc-macro2" -version = "1.0.87" +version = "1.0.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3e4daa0dcf6feba26f985457cdf104d4b4256fc5a09547140f3631bb076b19a" +checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" dependencies = [ "unicode-ident", ] @@ -941,9 +1037,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38200e5ee88914975b69f657f0801b6f6dccafd44fd9326302a4aaeecfacb1d8" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" dependencies = [ "aho-corasick", "memchr", @@ -974,7 +1070,7 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4b18820d944b33caa75a71378964ac46f58517c92b6ae5f762636247c09e78fb" dependencies = [ - "base64", + "base64 0.13.0", "blake2b_simd", "constant_time_eq", "crossbeam-utils", @@ -991,22 +1087,31 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.37" +version = "0.38.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811" +checksum = "f93dc38ecbab2eb790ff964bb77fa94faf256fd3e73285fd7ba0903b76bedb85" dependencies = [ "bitflags 2.6.0", - "errno 0.3.9", + "errno 0.3.10", "libc", "linux-raw-sys", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] name = "ryu" -version = "1.0.5" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "salsa20" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97a22f5af31f73a954c10289c93e8a50cc23d971e80ee446f1f6f7137a088213" +dependencies = [ + "cipher", +] [[package]] name = "semver" @@ -1016,29 +1121,29 @@ checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" [[package]] name = "serde" -version = "1.0.210" +version = "1.0.216" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" +checksum = "0b9781016e935a97e8beecf0c933758c97a5520d32930e460142b4cd80c6338e" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.210" +version = "1.0.216" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" +checksum = "46f859dbbf73865c6627ed570e78961cd3ac92407a2d117204c49232485da55e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.90", ] [[package]] name = "serde_json" -version = "1.0.128" +version = "1.0.133" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" +checksum = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377" dependencies = [ "itoa", "memchr", @@ -1062,17 +1167,21 @@ name = "shadowenv" version = "3.0.3" dependencies = [ "anyhow", + "base64 0.22.1", "blake2", "clap", "clap_complete", + "crypto_box", "dirs 5.0.1", "ed25519", "ed25519-dalek", "exec", "hex", + "json_dotpath", "ketos", "ketos_derive", "libc", + "nom", "quickcheck", "quickcheck_macros", "rand 0.8.5", @@ -1166,9 +1275,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.79" +version = "2.0.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89132cd0bf050864e1d38dc3bbc07a0eb8e7530af26344d3d2bbbef83499f590" +checksum = "919d3b74a5dd0ccd15aeb8f93e7006bd9e14c295087c9896a110f490752bcf31" dependencies = [ "proc-macro2", "quote", @@ -1177,9 +1286,9 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.13.0" +version = "3.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0f2c9fc62d0beef6951ccffd757e241266a2c833136efbe35af6cd2567dca5b" +checksum = "28cce251fcbc87fac86a866eeb0d6c2d536fc16d06f184bb61aeae11aa4cee0c" dependencies = [ "cfg-if 1.0.0", "fastrand", @@ -1218,7 +1327,7 @@ checksum = "08904e7672f5eb876eaaf87e0ce17857500934f4981c4a0ab2b4aa98baac7fc3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.90", ] [[package]] @@ -1269,6 +1378,16 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" +[[package]] +name = "universal-hash" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc1de2c688dc15305988b563c3854064043356019f97a4b46276fe734c4f07ea" +dependencies = [ + "crypto-common", + "subtle", +] + [[package]] name = "utf8parse" version = "0.2.2" @@ -1324,15 +1443,6 @@ dependencies = [ "windows-targets 0.48.5", ] -[[package]] -name = "windows-sys" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" -dependencies = [ - "windows-targets 0.52.6", -] - [[package]] name = "windows-sys" version = "0.59.0" diff --git a/Cargo.toml b/Cargo.toml index b52801a..33c0aea 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,32 +1,34 @@ [package] name = "shadowenv" version = "3.0.3" -authors = [ - "Shopify Engineering ", -] +authors = ["Shopify Engineering "] edition = "2021" [dependencies] blake2 = "0.10.6" clap = { version = "4.5.20", features = ["cargo", "derive"] } -clap_complete = "4.5.35" +clap_complete = "4.5.38" dirs = "5.0.1" exec = "0.3.1" -anyhow = "1.0.89" +anyhow = "1.0.94" thiserror = "1.0.64" hex = "0.4.3" ketos = "0.12" ketos_derive = "0.12" -libc = "0.2.48" -regex = "1.11.0" -serde = "1.0.210" -serde_derive = "1.0.210" -serde_json = "1.0.128" +libc = "0.2.168" +regex = "1.11.1" +serde = { version = "1.0.216", features = ["derive"] } +serde_derive = "1.0.216" +serde_json = "1.0.133" +json_dotpath = "1.1.0" shell-escape = "0.1.4" shellexpand = "3.1.0" ed25519-dalek = { version = "2.1.1", features = ["rand_core"] } ed25519 = "2.2.3" rand = "0.8.5" +nom = "7" +crypto_box = "0.9.1" +base64 = "0.22.1" [build-dependencies] clap = { version = "4.5.20", features = ["cargo", "derive"] } @@ -35,4 +37,4 @@ clap_complete = "4.5.35" [dev-dependencies] quickcheck = "1.0.3" quickcheck_macros = "1.0.0" -tempfile = "3.13.0" +tempfile = "3.14.0" diff --git a/src/ejson.rs b/src/ejson.rs new file mode 100644 index 0000000..2f122b1 --- /dev/null +++ b/src/ejson.rs @@ -0,0 +1,198 @@ +//! File TODO: Too many `map_err`, can be beautified. +//! EJSON TODO: Potential improvement: Cache decoded files for +//! multiple `env/ejson` on the same file. +use base64::Engine; +use crypto_box::{aead::Aead, Nonce, PublicKey, SalsaBox, SecretKey}; +use nom::{ + bytes::complete::{tag, take_till}, + character::complete::digit1, + combinator::{map, map_res}, + IResult, +}; +use serde::Deserialize; +use serde_json::{Map, Value}; +use std::{ + fs::{read, read_to_string}, + io, + path::Path, + str::FromStr, +}; +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum EJsonError { + #[error("Invalid EJSON: {}", .0)] + InvalidJson(#[from] serde_json::Error), + + #[error(transparent)] + IoErrorr(#[from] io::Error), + + /// Generic parsing error. + #[error("{}", .0)] + BoxParseError(String), +} + +#[derive(Deserialize, Debug)] +struct EJsonFile { + /// An EJSON file must have a public key associated, otherwise it's invalid. + #[serde(rename = "_public_key")] + pub public_key: String, + + /// All other key-value pairs contained in the file. + #[serde(flatten)] + pub other: Map, +} + +/// Attempts to load an ejson file from the given path. Decodes all values in the +/// file using the public key specified in the file. Keys stay unchanged (no `_` removal). +/// +/// Returns the entire parsed & decoded JSON file, minus the `_public_key` root field. +pub fn load_ejson_file(path: &Path) -> Result, EJsonError> { + let bytes = read(path)?; + let mut parsed_file: EJsonFile = serde_json::from_slice(&bytes)?; + + let priv_key = find_private_key(&parsed_file.public_key)?; + decode_map(&mut parsed_file.other, &priv_key)?; + + Ok(parsed_file.other) +} + +fn decode_value(key: &str, value: &mut Value, private_key: &SecretKey) -> Result<(), EJsonError> { + match value { + Value::Object(obj) => decode_map(obj, private_key)?, + Value::String(s) if !key.starts_with("_") => { + if let Some(s) = decode_ejson_string(s, private_key)? { + *value = Value::String(s); + } + } + Value::Array(array) => { + for elem in array { + decode_value(key, elem, private_key)?; + } + } + _ => (), + }; + + Ok(()) +} + +fn decode_map(map: &mut Map, private_key: &SecretKey) -> Result<(), EJsonError> { + for (key, value) in map.iter_mut() { + decode_value(key, value, private_key)?; + } + + Ok(()) +} + +fn decode_ejson_string(s: &str, private_key: &SecretKey) -> Result, EJsonError> { + let parsed = match parse_ejson_box(&s) { + Ok((_, parsed)) => parsed, + + // Ignore value if we can't parse the box, for now. + // TODO: Do we want to assume that all non-underscore strings should be decodable? Then we can bubble the parse error. + Err(_) => return Ok(None), + }; + + let keybox = SalsaBox::new(&parsed.encrypter_public_key()?, &private_key); + let nonce = Nonce::from(parsed.nonce()?); + let decrypted_plaintext = keybox + .decrypt(&nonce, parsed.boxed_message()?.as_slice()) + .map_err(|err| { + EJsonError::BoxParseError(format!("Unable to decrypt secret box `{s}`: {}", err)) + })?; + + String::from_utf8(decrypted_plaintext) + .map(Some) + .map_err(|err| { + EJsonError::BoxParseError(format!( + "Decrypted message value for secret box `{s}` contains invalid UTF-8: {err}." + )) + }) +} + +#[derive(Debug)] +struct EJsonMessageBox<'input> { + _schema_version: u32, + /// Base64-encoded key used for encryption, + encrypter_key_b64: &'input str, + /// Base64-encoded nonce used for encryption, + nonce_b64: &'input str, + /// The encrypted message. + boxed_message_b64: &'input str, +} + +impl<'input> EJsonMessageBox<'input> { + fn encrypter_public_key(&self) -> Result { + let pk_bytes = base64::engine::general_purpose::STANDARD + .decode(self.encrypter_key_b64) + .map_err(|_err| { + EJsonError::BoxParseError("Encrypter public key is invalid base64".to_owned()) + })?; + + let pk_bytes: [u8; 32] = pk_bytes.try_into().map_err(|pk_bytes: Vec| { + EJsonError::BoxParseError(format!( + "Invalid nonce length: Found {}, must be 24", + pk_bytes.len() + )) + })?; + + Ok(PublicKey::from_bytes(pk_bytes)) + } + + fn nonce(&self) -> Result<[u8; 24], EJsonError> { + let nonce_bytes = base64::engine::general_purpose::STANDARD + .decode(self.nonce_b64) + .map_err(|_err| EJsonError::BoxParseError("Nonce is invalid base64".to_owned()))?; + + nonce_bytes.try_into().map_err(|nonce_bytes: Vec| { + EJsonError::BoxParseError(format!( + "Invalid nonce length: Found {}, must be 24", + nonce_bytes.len() + )) + }) + } + + fn boxed_message(&self) -> Result, EJsonError> { + base64::engine::general_purpose::STANDARD + .decode(self.boxed_message_b64) + .map_err(|_err| EJsonError::BoxParseError("Boxed message is invalid base64".to_owned())) + } +} + +fn parse_ejson_box<'input>(input: &'input str) -> IResult<&str, EJsonMessageBox<'input>> { + let (input, _) = tag("EJ[")(input)?; + let (input, schema_version) = + map(take_till(|c| c == ':'), map_res(digit1, u32::from_str))(input)?; + let (_, schema_version) = schema_version?; + + let (input, _) = tag(":")(input)?; + let (input, encrypter_key_b64) = take_till(|c| c == ':')(input)?; + + let (input, _) = tag(":")(input)?; + let (input, nonce_b64) = take_till(|c| c == ':')(input)?; + + let (input, _) = tag(":")(input)?; + let (_input, boxed_message_b64) = take_till(|c| c == ']')(input)?; + + Ok(( + input, + EJsonMessageBox { + _schema_version: schema_version, + encrypter_key_b64, + nonce_b64, + boxed_message_b64, + }, + )) +} + +fn find_private_key(hexed_key: &str) -> Result { + let hexed_private_key_bytes = read_to_string(format!("/opt/ejson/keys/{hexed_key}"))?; + let decoded_bytes = hex::decode(hexed_private_key_bytes.trim_end_matches("\n")) + .map_err(|_err| EJsonError::BoxParseError("Key is invalid hex".to_owned()))?; + + let key_bytes: [u8; 32] = decoded_bytes[..32].try_into().map_err(|_err| { + EJsonError::BoxParseError("Invalid key length, must be 32 bytes".to_owned()) + })?; + + Ok(SecretKey::from_bytes(key_bytes)) +} diff --git a/src/hash.rs b/src/hash.rs index b6993ff..51cb9bd 100644 --- a/src/hash.rs +++ b/src/hash.rs @@ -5,25 +5,38 @@ use blake2::{ }; use std::{ cmp::{Ord, Ordering}, - collections::VecDeque, + collections::{HashSet, VecDeque}, fmt::Display, - path::PathBuf, + fs::{self, OpenOptions}, + io::Write, + path::{Path, PathBuf}, result::Result, str::FromStr, + time::UNIX_EPOCH, }; +use crate::loader::{SHADOWENV_DIR_NAME, SHADOWENV_LINKED_EJSON_FILES_NAME}; + const FILE_SEPARATOR: &str = "\x1C"; const GROUP_SEPARATOR: &str = "\x1D"; #[derive(Debug, Clone)] pub struct SourceList { - sources: VecDeque, + pub sources: VecDeque, } #[derive(Debug, Clone)] pub struct Source { pub dir: String, pub files: Vec, + + /// Any EJSON files linked to the source files. + /// Retrieved from the ejson files marker in .shadowenv.d. + pub ejson_file_paths: Vec, + + /// Added during execution. Any ejson files that were actually used + /// during evaluation. Used to maintain the marker file. + pub used_ejson_files: HashSet, } #[derive(Debug, Clone, Eq)] @@ -61,13 +74,33 @@ struct WrongInputSize; impl Source { pub fn new(dir: String) -> Self { - Source { dir, files: vec![] } + Source { + dir, + files: vec![], + ejson_file_paths: vec![], + used_ejson_files: HashSet::default(), + } } pub fn add_file(&mut self, name: String, contents: String) { self.files.push(SourceFile { name, contents }) } + pub fn add_ejson_links(&mut self, from_file: &PathBuf) -> Result<(), std::io::Error> { + for line in fs::read_to_string(&from_file)?.lines() { + let path = Path::new(line); + if let Ok(path) = path.canonicalize() { + self.ejson_file_paths.push(path); + } + } + + Ok(()) + } + + pub fn set_used_ejson_paths(&mut self, files: HashSet) { + self.used_ejson_files = files; + } + pub fn hash(&self) -> Option { if self.files.is_empty() { return None; @@ -84,11 +117,78 @@ impl Source { hasher.update(FILE_SEPARATOR.as_bytes()); } + for fingerprint in self.ejson_fingerprints() { + hasher.update(fingerprint.as_bytes()); + hasher.update(GROUP_SEPARATOR.as_bytes()); + } + let mut buf = [0u8; 8]; hasher.finalize_variable(&mut buf).unwrap(); Some(u64::from_ne_bytes(buf)) } + + /// Returns the fingerprints to use, in order the files are listed in the marker file, + /// for the ejson files linked to this source. + /// + /// If a file fails to return its metadata for any reason (eg. the file was deleted), + /// the fingerprint is skipped. + fn ejson_fingerprints(&self) -> Vec { + self.ejson_file_paths + .iter() + .filter_map(|path| { + let metadata = std::fs::metadata(path).ok()?; + let modified = metadata.modified().ok()?; + let modified = modified + .duration_since(UNIX_EPOCH) + .expect("System time to be > UNIX_EPOCH"); + + Some(format!( + "{}:{}.{}", + path.to_str()?, + modified.as_secs(), + modified.subsec_millis() + )) + }) + .collect() + } + + // TODO: rough impl, unwraps like a child on christmas. + fn update_ejson_marker(&mut self) { + // Check if the marker needs to be updated. + let unchanged = self.used_ejson_files.len() == self.ejson_file_paths.len() + && self.used_ejson_files + == HashSet::from_iter(self.ejson_file_paths.clone().into_iter()); + + if !unchanged { + // Just write the used ones normalized into the marker. + let marker_file_path = PathBuf::from(&self.dir) + .join(SHADOWENV_DIR_NAME) + .join(SHADOWENV_LINKED_EJSON_FILES_NAME); + + let mut marker_file = OpenOptions::new() + .write(true) + .create(true) + .truncate(true) + .open(marker_file_path) + .unwrap(); + + let mut lines: Vec<_> = self + .used_ejson_files + .clone() + .into_iter() + .map(|path| path.to_str().unwrap().to_owned()) + .collect(); + + // Ordering matters for the hashing! + lines.sort(); + marker_file + .write_all(&lines.join("\n").into_bytes()) + .unwrap(); + + marker_file.flush().unwrap(); + } + } } impl FromStr for Hash { @@ -155,10 +255,6 @@ impl SourceList { Some(u64::from_ne_bytes(buf)) } - pub fn consume(self) -> Vec { - self.sources.into() - } - pub fn shortened_dirs(&self) -> Vec { let dirs: Vec = self .sources @@ -182,6 +278,12 @@ impl SourceList { }) .collect() } + + pub fn update_ejson_markers(&mut self) { + for source in self.sources.iter_mut() { + source.update_ejson_marker(); + } + } } #[cfg(test)] @@ -205,6 +307,8 @@ mod tests { Source { dir: Arbitrary::arbitrary(g), files: Arbitrary::arbitrary(g), + ejson_file_paths: Arbitrary::arbitrary(g), + used_ejson_files: Arbitrary::arbitrary(g), } } } diff --git a/src/hook.rs b/src/hook.rs index 52edd29..da408f4 100644 --- a/src/hook.rs +++ b/src/hook.rs @@ -84,49 +84,58 @@ pub fn load_env( let prev_hash = parts.next(); let json_data = parts.next().unwrap_or("{}"); - let active: Option = match prev_hash { + let active_sources_hash: Option = match prev_hash { None => None, Some("") => None, Some("0000000000000000") => None, Some(x) => Some(Hash::from_str(x)?), }; - // "targets" are sources of shadowenv lisp files - let targets = load_trusted_sources(pathbuf, false)?; + // Found sources of shadowenv lisp files + let sources = load_trusted_sources(pathbuf, false)?; + let found_sources_hash = sources + .as_ref() + .and_then(|source_lists| source_lists.hash()); - let targets_hash = targets.as_ref().and_then(|targets| targets.hash()); - - // before we had multiple targets, this ensured we only act if we needed to - match (&active, &targets) { - // if there is no active shadowenv and we've got no targets, then we have nothing to compute + // Check if we need to run the shaowenv programs or recompute + match (active_sources_hash, found_sources_hash) { + // If there is no active shadowenv and we found no sources, then we have nothing to compute. (None, None) => { return Ok(None); } - // if there is an active shadowenv and some action we've taken leads us to still be in the same one, we do nothing - // unless the force flag was specified - // probably need to update whatever sets prev_hash to be a hash of all the targets' hashes (?) - (Some(a), Some(_)) if a.hash == targets_hash.unwrap() && !force => { + + // If there is an active shadowenv and some action we've taken leads us to still be in the same one, + // we do nothing unless the force flag was specified. + // TODO: Probably need to update whatever sets prev_hash to be a hash of all the source lists' hashes (?) + (Some(ref active), Some(found)) if active.hash == found && !force => { return Ok(None); } + + // Run shadowenv programs in all other cases. (_, _) => (), } // "data" is used to undo changes made when activating a shadowenv // we will only have "data" if already inside a shadowenv let data = undo::Data::from_str(json_data)?; - let shadowenv = Shadowenv::new(env::vars().collect(), data, targets_hash.unwrap_or(0)); + let shadowenv = Shadowenv::new(env::vars().collect(), data, found_sources_hash.unwrap_or(0)); - match targets { - Some(targets) => { + match sources { + Some(mut source_lists) => { // run_program takes in the shadowenv, evaluates the code we found on it, and returns it - match ShadowLang::run_programs(shadowenv, targets) { + match ShadowLang::run_programs(shadowenv, &mut source_lists) { // no need to return anything descriptive here since we already // had ketos print it to stderr. Err(_) => Err(lang::ShadowlispError {}.into()), // note the "true" since we ran code to activate/modify the shadowenv - Ok(shadowenv) => Ok(Some(shadowenv)), + Ok(shadowenv) => { + // todo maintain marker files + source_lists.update_ejson_markers(); + Ok(Some(shadowenv)) + } } } + // note the "false" since we didn't have anything to run None => Ok(Some(shadowenv)), } @@ -288,7 +297,7 @@ mod tests { .unwrap() .unwrap(); - let sources = result.consume(); + let sources = result.sources; assert_eq!(sources.len(), 2); // Assert that sources are returned in the correct order diff --git a/src/lang.rs b/src/lang.rs index 685e875..d424840 100644 --- a/src/lang.rs +++ b/src/lang.rs @@ -1,12 +1,17 @@ use crate::{ + ejson, hash::{Source, SourceList}, shadowenv::Shadowenv, }; +use anyhow::anyhow; +use json_dotpath::DotPaths; use ketos::{Context, Error, FromValueRef, Name, Value}; use ketos_derive::{ForeignValue, FromValueRef}; use std::{ cell::{Ref, RefCell}, + collections::HashSet, env, fs, + ops::DerefMut, path::{Path, PathBuf}, rc::Rc, }; @@ -46,28 +51,35 @@ macro_rules! assert_args { // Sharing a value with Ketos means we can only access it through `&self`. // Mutation of values is possible through internally mutable containers, // such as `Cell` and `RefCell`. -struct ShadowenvWrapper(RefCell); - -impl ShadowenvWrapper { - fn new(shadowenv: Shadowenv) -> Self { - Self(RefCell::new(shadowenv)) +struct RefCellWrapper(RefCell) +where + T: std::fmt::Debug + 'static; + +impl RefCellWrapper +where + T: std::fmt::Debug + 'static, +{ + fn new(inner: T) -> Self { + Self(RefCell::new(inner)) } - fn borrow_mut_env(&self) -> std::cell::RefMut { + + fn borrow_mut_env(&self) -> std::cell::RefMut { self.0.borrow_mut() } - fn borrow_env(&self) -> Ref<'_, Shadowenv> { + + fn borrow_env(&self) -> Ref<'_, T> { self.0.borrow() } - fn into_inner(self) -> Shadowenv { + fn into_inner(self) -> T { self.0.into_inner() } } -fn get_value(ctx: &Context, shadowenv_name: Name) -> Value { +fn get_value(ctx: &Context, value_name: Name) -> Value { ctx.scope() - .get_constant(shadowenv_name) - .expect("bug: shadowenv not defined") + .get_constant(value_name) + .expect("bug: value not defined") } fn path_concat(vals: &mut [Value]) -> Result { @@ -80,18 +92,31 @@ fn path_concat(vals: &mut [Value]) -> Result { } impl ShadowLang { - pub fn run_programs(shadowenv: Shadowenv, sources: SourceList) -> Result { - let wrapper = Rc::new(ShadowenvWrapper::new(shadowenv)); + pub fn run_programs( + shadowenv: Shadowenv, + sources: &mut SourceList, + ) -> Result { + let wrapper = Rc::new(RefCellWrapper::new(shadowenv)); + let dirs = sources.shortened_dirs(); - for source in sources.consume() { - Self::run(&wrapper, source)?; + for source in sources.sources.iter_mut() { + let ejson_tracker_wrapper = Rc::new(RefCellWrapper::new(HashSet::default())); + Self::run(&wrapper, &ejson_tracker_wrapper, source)?; + source + .set_used_ejson_paths(Rc::try_unwrap(ejson_tracker_wrapper).unwrap().into_inner()); } + let mut result = Rc::try_unwrap(wrapper).unwrap().into_inner(); result.add_dirs(dirs); + Ok(result) } - fn run(rc_wrapper: &Rc, source: Source) -> Result<(), Error> { + fn run( + rc_wrapper: &Rc>, + ejson_tracker_wrapper: &Rc>>, + source: &mut Source, + ) -> Result<(), Error> { let mut restrictions = ketos::RestrictConfig::strict(); // "Maximum size of value stack, in values" // This also puts a cap on the size of string literals in a single function invocation. @@ -107,11 +132,17 @@ impl ShadowLang { .finish(); let shadowenv_name = interp.scope().add_name("shadowenv"); + let ejson_tracker_name = interp.scope().add_name("used_ejson"); interp .scope() .add_constant(shadowenv_name, Value::Foreign(rc_wrapper.clone())); + interp.scope().add_constant( + ejson_tracker_name, + Value::Foreign(ejson_tracker_wrapper.clone()), + ); + ketos_fn2! { interp.scope() => "path-concat" => fn path_concat(...) -> String } @@ -120,7 +151,7 @@ impl ShadowLang { assert_args!(args, 1, name); let value = get_value(ctx, shadowenv_name); - let wrapper: &ShadowenvWrapper = FromValueRef::from_value_ref(&value)?; + let wrapper: &RefCellWrapper = FromValueRef::from_value_ref(&value)?; let name = <&str as FromValueRef>::from_value_ref(&args[0])?; let result = wrapper @@ -137,7 +168,8 @@ impl ShadowLang { assert_args!(args, 2, name); let value = get_value(ctx, shadowenv_name); - let shadowenv = <&ShadowenvWrapper as FromValueRef>::from_value_ref(&value)?; + let shadowenv = + <&RefCellWrapper as FromValueRef>::from_value_ref(&value)?; let name = <&str as FromValueRef>::from_value_ref(&args[0])?; let value = <&str as FromValueRef>::from_value_ref(&args[1]).ok(); @@ -153,7 +185,7 @@ impl ShadowLang { assert_args!(args, 2, name); let value = get_value(ctx, shadowenv_name); - let wrapper: &ShadowenvWrapper = FromValueRef::from_value_ref(&value)?; + let wrapper: &RefCellWrapper = FromValueRef::from_value_ref(&value)?; let name = <&str as FromValueRef>::from_value_ref(&args[0])?; let value = <&str as FromValueRef>::from_value_ref(&args[1])?; @@ -169,7 +201,7 @@ impl ShadowLang { assert_args!(args, 2, name); let value = get_value(ctx, shadowenv_name); - let wrapper: &ShadowenvWrapper = FromValueRef::from_value_ref(&value)?; + let wrapper: &RefCellWrapper = FromValueRef::from_value_ref(&value)?; let name = <&str as FromValueRef>::from_value_ref(&args[0])?; let value = <&str as FromValueRef>::from_value_ref(&args[1])?; @@ -185,7 +217,7 @@ impl ShadowLang { assert_args!(args, 2, name); let value = get_value(ctx, shadowenv_name); - let wrapper: &ShadowenvWrapper = FromValueRef::from_value_ref(&value)?; + let wrapper: &RefCellWrapper = FromValueRef::from_value_ref(&value)?; let name = <&str as FromValueRef>::from_value_ref(&args[0])?; let value = <&str as FromValueRef>::from_value_ref(&args[1])?; @@ -201,7 +233,7 @@ impl ShadowLang { assert_args!(args, 2, name); let value = get_value(ctx, shadowenv_name); - let wrapper: &ShadowenvWrapper = FromValueRef::from_value_ref(&value)?; + let wrapper: &RefCellWrapper = FromValueRef::from_value_ref(&value)?; let name = <&str as FromValueRef>::from_value_ref(&args[0])?; let value = <&str as FromValueRef>::from_value_ref(&args[1])?; @@ -215,7 +247,7 @@ impl ShadowLang { interp.scope().add_value_with_name("provide", |name| { Value::new_foreign_fn(name, move |ctx, args| { let value = get_value(ctx, shadowenv_name); - let wrapper: &ShadowenvWrapper = FromValueRef::from_value_ref(&value)?; + let wrapper: &RefCellWrapper = FromValueRef::from_value_ref(&value)?; let version = match args.len() { 1 => None, @@ -256,6 +288,102 @@ impl ShadowLang { }) }); + // TODO: Should an eval error here stop the entire env injection? Right now, it just logs an error to stderr. + interp.scope().add_value_with_name("env/ejson", |name| { + Value::new_foreign_fn(name, move |ctx, args| { + if args.len() < 1 { + return Err(From::from(ketos::exec::ExecError::ArityError { + name: Some(name), + expected: ketos::function::Arity::Min(1), + found: 0, + })); + } + + let path = <&str as FromValueRef>::from_value_ref(&args[0])?; + let expanded = shellexpand::tilde(path); + let canonicalized = match fs::canonicalize(expanded.to_string()) { + Ok(p) => p, + Err(e) => { + return Err(From::from(ketos::io::IoError { + err: e, + path: PathBuf::from(path), + mode: ketos::io::IoMode::Read, + })); + } + }; + + let subpaths = args.get(1).and_then(|second_arg| match second_arg { + Value::List(elements) => { + // TODO: Handle invalid inputs. + Some( + elements + .iter() + .filter_map(|elem| match elem { + Value::Char(c) => Some(c.to_string()), + Value::String(s) => Some(s.to_string()), + _ => None, + }) + .collect(), + ) + } + Value::String(s) => Some(vec![s.to_string()]), + Value::Unit => None, + _ => None, + }); + + let shadowenv_value = get_value(ctx, shadowenv_name); + let shadowenv = + <&RefCellWrapper as FromValueRef>::from_value_ref(&shadowenv_value)?; + let mut shadowenv_ref = shadowenv.borrow_mut_env(); + + let esjon_tracker_value = get_value(ctx, ejson_tracker_name); + let esjon_tracker = + <&RefCellWrapper> as FromValueRef>::from_value_ref( + &esjon_tracker_value, + )?; + let mut esjon_tracker_ref = esjon_tracker.borrow_mut_env(); + esjon_tracker_ref.insert(canonicalized.clone()); + + // TODO: Technically we shouldn't decode the entire file, only the queried subtree. + // This may matter on large secret files where we only pick a small subset. + // TODO: This code needs some cleanup. + match ejson::load_ejson_file(&canonicalized) { + Ok(ejson) => { + if let Some(subpaths) = subpaths { + for subpath in subpaths { + let _ = identify_ejson_subtree(&subpath, &ejson) + .and_then(|subtree| { + inject_ejson_contents( + subpath.split(".").last().unwrap(), + &subtree, + shadowenv_ref.deref_mut(), + ) + }) + .inspect_err(|err| eprintln!("{err}")); + } + } else { + // Load entire file. + let _ = inject_ejson_contents( + "", + &serde_json::Value::Object(ejson), + shadowenv_ref.deref_mut(), + ) + .inspect_err(|err| eprintln!("{err}")); + } + } + + Err(err) => { + // TODO: How to error handle correctly here? Should we repurpose `output::format_hook_error`? + // Note: Any print to stdout seems to be treated as input to the interpreter, must use stderr. + eprintln!("Error evalutating ejson: {err}"); + return Ok(Value::Unit); + } + }; + + Ok(Value::Unit) + }) + }); + let prelude = r#" ;; Better when/if/let macros (macro (when pred :rest body) `(if ,pred (do ,@body) ())) @@ -304,13 +432,74 @@ impl ShadowLang { return Err(err); }; } + if let Ok(dir) = original_path { let _ = env::set_current_dir(dir); } + Ok(()) } } +fn identify_ejson_subtree( + at_path: &str, + ejson: &serde_json::Map, +) -> Result { + // TODO: It is unclear how the traversal & injection should actually work, eg.: + // - How to handle arrays? Ignore them, use indexed keys (key_0=elem0, key_1=elem1, ...) or use compound values (key="elem1,elem2,...") + // - Compounding has problems with deeper nesting (`a: [{b: "b"}, ...]`). Indexing seems to be more universal. + // - How to handle nulls? + // - How should nested object keys compose? `{ a: { b: "c" }}` -> `A_B=c` or `B=c`? + // - How to compose the name of the env var? `(env/ejson "..." "path.to.obj")` -> `OBJ_KEY1=VAL1`` or `KEY1=VAL1` or ...? + // TODO: Unfortunately this copies the data, we should write our own simple dotpath traversal. + match ejson.dot_get::(at_path)? { + Some(value) => Ok(value), + None => { + return Err(anyhow!("Json path {at_path} does not exist or is null.")); + } + } +} + +fn inject_ejson_contents( + key: &str, + value: &serde_json::Value, + shadowenv: &mut Shadowenv, +) -> Result<(), anyhow::Error> { + let key = key.replace(".", "_").to_ascii_uppercase(); + let prefix = if key.is_empty() { + "".to_owned() + } else { + format!("{key}_") + }; + + match value { + serde_json::Value::Null => return Ok(()), // TODO: Invalid? Unset value? Ignore? Ignoring for now. + serde_json::Value::String(s) => shadowenv.set(&key, Some(s)), + + bool @ serde_json::Value::Bool(_) => { + shadowenv.set(&key, serde_json::to_string(bool).ok().as_deref()) + } + + num @ serde_json::Value::Number(_) => { + shadowenv.set(&key, serde_json::to_string(num).ok().as_deref()) + } + + serde_json::Value::Array(array) => { + for (index, elem) in array.iter().enumerate() { + inject_ejson_contents(&format!("{prefix}{index}"), elem, shadowenv)?; + } + } + + serde_json::Value::Object(map) => { + for (k, v) in map { + inject_ejson_contents(&format!("{prefix}{k}"), v, shadowenv)?; + } + } + }; + + Ok(()) +} + #[cfg(test)] mod tests { use super::*; @@ -326,6 +515,8 @@ mod tests { name: "file.lisp".to_string(), contents: content.to_string(), }], + ejson_file_paths: vec![], + used_ejson_files: HashSet::default(), } } @@ -348,7 +539,7 @@ mod tests { ); let result = - ShadowLang::run_programs(shadowenv, SourceList::new_with_sources(vec![source])); + ShadowLang::run_programs(shadowenv, &mut SourceList::new_with_sources(vec![source])); let env = result.unwrap().exports().unwrap(); assert_eq!(env["VAL_A"].as_ref().unwrap(), "42"); @@ -372,7 +563,7 @@ mod tests { ); let result = - ShadowLang::run_programs(shadowenv, SourceList::new_with_sources(vec![source])); + ShadowLang::run_programs(shadowenv, &mut SourceList::new_with_sources(vec![source])); let env = result.unwrap().exports().unwrap(); assert_eq!(env["PATH_A"].as_ref().unwrap(), "/path3:/path1:/path2"); @@ -398,7 +589,7 @@ mod tests { ); let result = - ShadowLang::run_programs(shadowenv, SourceList::new_with_sources(vec![source])); + ShadowLang::run_programs(shadowenv, &mut SourceList::new_with_sources(vec![source])); let env = result.unwrap().exports().unwrap(); assert_eq!(env["PATH"].as_ref().unwrap(), "/something_else"); @@ -415,7 +606,7 @@ mod tests { ); let shadowenv = - ShadowLang::run_programs(shadowenv, SourceList::new_with_sources(vec![source])) + ShadowLang::run_programs(shadowenv, &mut SourceList::new_with_sources(vec![source])) .unwrap(); let expected = HashSet::from([Feature::new("ruby".to_string(), Some("3.1.2".to_string()))]); assert_eq!(shadowenv.features(), expected); @@ -432,7 +623,7 @@ mod tests { ); let home = dirs::home_dir().map(|p| p.into_os_string().into_string().unwrap()); let shadowenv = - ShadowLang::run_programs(shadowenv, SourceList::new_with_sources(vec![source])) + ShadowLang::run_programs(shadowenv, &mut SourceList::new_with_sources(vec![source])) .unwrap(); assert_eq!(shadowenv.get("EXPANDED"), home); } @@ -456,7 +647,7 @@ mod tests { // The source that comes last in the input list should be executed last let shadowenv = ShadowLang::run_programs( shadowenv, - SourceList::new_with_sources(vec![outer_source, inner_source]), + &mut SourceList::new_with_sources(vec![outer_source, inner_source]), ) .unwrap(); assert_eq!(shadowenv.get("TEST"), Some("TWO".to_string())); diff --git a/src/loader.rs b/src/loader.rs index 5e370cb..3f12dba 100644 --- a/src/loader.rs +++ b/src/loader.rs @@ -7,6 +7,7 @@ use std::{ pub const SHADOWENV_DIR_NAME: &str = ".shadowenv.d"; pub const SHADOWENV_PARENT_LINK_NAME: &str = "parent"; +pub const SHADOWENV_LINKED_EJSON_FILES_NAME: &str = ".ejson-files"; #[derive(thiserror::Error, Debug)] pub enum TraversalError { @@ -161,11 +162,15 @@ pub fn load(dirpath: PathBuf) -> Result, Error> { if path.is_file() { // TODO: there HAS to be a better way to do this. let basename = path.file_name().unwrap().to_string_lossy().to_string(); - if !basename.ends_with(".lisp") { - continue; + + if basename.ends_with(".lisp") { + let contents = fs::read_to_string(&path)?; + source.add_file(basename, contents); + } else if basename == SHADOWENV_LINKED_EJSON_FILES_NAME { + source.add_ejson_links(&path)?; } - let contents = fs::read_to_string(&path)?; - source.add_file(basename, contents); + + continue; } } diff --git a/src/main.rs b/src/main.rs index 26267a2..04d0d25 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,6 @@ mod cli; mod diff; +mod ejson; mod exec_cmd; mod features; mod hash;