From f9dec92f740fcd15e4113fde3fa26eac46d61d42 Mon Sep 17 00:00:00 2001 From: Xander Date: Wed, 14 Jan 2026 17:37:02 +0000 Subject: [PATCH 01/11] Add crypto for AES-GCM --- Cargo.lock | 358 ++++++++++++------ Cargo.toml | 2 + crates/iceberg/Cargo.toml | 2 + crates/iceberg/src/encryption/crypto.rs | 481 ++++++++++++++++++++++++ crates/iceberg/src/encryption/mod.rs | 25 ++ crates/iceberg/src/lib.rs | 1 + 6 files changed, 745 insertions(+), 124 deletions(-) create mode 100644 crates/iceberg/src/encryption/crypto.rs create mode 100644 crates/iceberg/src/encryption/mod.rs diff --git a/Cargo.lock b/Cargo.lock index ebb51dfb7a..28fe47aefe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,6 +8,16 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" +[[package]] +name = "aead" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d122413f284cf2d62fb1b7db97e02edb8cda96d769b16e443a4f6195e35662b0" +dependencies = [ + "crypto-common", + "generic-array", +] + [[package]] name = "aes" version = "0.8.4" @@ -19,6 +29,20 @@ dependencies = [ "cpufeatures", ] +[[package]] +name = "aes-gcm" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "831010a0f742e1209b3bcea8fab6a8e149051ba6099432c8cb2cc117dec3ead1" +dependencies = [ + "aead", + "aes", + "cipher", + "ctr", + "ghash", + "subtle", +] + [[package]] name = "ahash" version = "0.8.12" @@ -80,7 +104,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" dependencies = [ "anstyle", - "anstyle-parse", + "anstyle-parse 0.2.7", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstream" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d" +dependencies = [ + "anstyle", + "anstyle-parse 1.0.0", "anstyle-query", "anstyle-wincon", "colorchoice", @@ -90,9 +129,9 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.13" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" [[package]] name = "anstyle-parse" @@ -103,13 +142,22 @@ dependencies = [ "utf8parse", ] +[[package]] +name = "anstyle-parse" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e" +dependencies = [ + "utf8parse", +] + [[package]] name = "anstyle-query" version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -120,7 +168,7 @@ checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -504,9 +552,9 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "aws-config" -version = "1.8.14" +version = "1.8.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a8fc176d53d6fe85017f230405e3255cedb4a02221cb55ed6d76dccbbb099b2" +checksum = "11493b0bad143270fb8ad284a096dd529ba91924c5409adeac856cc1bf047dbc" dependencies = [ "aws-credential-types", "aws-runtime", @@ -524,7 +572,7 @@ dependencies = [ "fastrand", "hex", "http 1.4.0", - "ring", + "sha1", "time", "tokio", "tracing", @@ -534,9 +582,9 @@ dependencies = [ [[package]] name = "aws-credential-types" -version = "1.2.13" +version = "1.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d203b0bf2626dcba8665f5cd0871d7c2c0930223d6b6be9097592fea21242d0" +checksum = "8f20799b373a1be121fe3005fba0c2090af9411573878f224df44b42727fcaf7" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", @@ -568,9 +616,9 @@ dependencies = [ [[package]] name = "aws-runtime" -version = "1.7.1" +version = "1.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ede2ddc593e6c8acc6ce3358c28d6677a6dc49b65ba4b37a2befe14a11297e75" +checksum = "5fc0651c57e384202e47153c1260b84a9936e19803d747615edf199dc3b98d17" dependencies = [ "aws-credential-types", "aws-sigv4", @@ -593,9 +641,9 @@ dependencies = [ [[package]] name = "aws-sdk-glue" -version = "1.139.0" +version = "1.141.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af3da2f5cf74983a60a7d5a182d76db1609ee4401057c98732ed8be973cb30ee" +checksum = "ebe2debf51404736e52333ea5933e70ff8ea94e4b303e38f2280f7e48ba3a928" dependencies = [ "aws-credential-types", "aws-runtime", @@ -617,9 +665,9 @@ dependencies = [ [[package]] name = "aws-sdk-s3tables" -version = "1.51.0" +version = "1.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14c7f1b4eb404522622f5489fc649ba193c1e3ce4416bfcfbbcb008ad0cbfe4f" +checksum = "db46a06ba33a21a4c4bb3f13a27e892614ee1221a5c486fc28de35a60268eb98" dependencies = [ "aws-credential-types", "aws-runtime", @@ -641,9 +689,9 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "1.95.0" +version = "1.96.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00c5ff27c6ba2cbd95e6e26e2e736676fdf6bcf96495b187733f521cfe4ce448" +checksum = "f64a6eded248c6b453966e915d32aeddb48ea63ad17932682774eb026fbef5b1" dependencies = [ "aws-credential-types", "aws-runtime", @@ -665,9 +713,9 @@ dependencies = [ [[package]] name = "aws-sdk-ssooidc" -version = "1.97.0" +version = "1.98.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d186f1e5a3694a188e5a0640b3115ccc6e084d104e16fd6ba968dca072ffef8" +checksum = "db96d720d3c622fcbe08bae1c4b04a72ce6257d8b0584cb5418da00ae20a344f" dependencies = [ "aws-credential-types", "aws-runtime", @@ -689,9 +737,9 @@ dependencies = [ [[package]] name = "aws-sdk-sts" -version = "1.99.0" +version = "1.100.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9acba7c62f3d4e2408fa998a3a8caacd8b9a5b5549cf36e2372fbdae329d5449" +checksum = "fafbdda43b93f57f699c5dfe8328db590b967b8a820a13ccdd6687355dfcc7ca" dependencies = [ "aws-credential-types", "aws-runtime", @@ -714,9 +762,9 @@ dependencies = [ [[package]] name = "aws-sigv4" -version = "1.4.1" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37411f8e0f4bea0c3ca0958ce7f18f6439db24d555dbd809787262cd00926aa9" +checksum = "b0b660013a6683ab23797778e21f1f854744fdf05f68204b4cca4c8c04b5d1f4" dependencies = [ "aws-credential-types", "aws-smithy-http", @@ -903,9 +951,9 @@ dependencies = [ [[package]] name = "aws-types" -version = "1.3.13" +version = "1.3.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0470cc047657c6e286346bdf10a8719d26efd6a91626992e0e64481e44323e96" +checksum = "47c8323699dd9b3c8d5b3c13051ae9cdef58fd179957c882f8374dd8725962d9" dependencies = [ "aws-credential-types", "aws-smithy-async", @@ -1030,9 +1078,9 @@ dependencies = [ [[package]] name = "bon" -version = "3.9.0" +version = "3.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d13a61f2963b88eef9c1be03df65d42f6996dfeac1054870d950fcf66686f83" +checksum = "f47dbe92550676ee653353c310dfb9cf6ba17ee70396e1f7cf0a2020ad49b2fe" dependencies = [ "bon-macros", "rustversion", @@ -1040,11 +1088,11 @@ dependencies = [ [[package]] name = "bon-macros" -version = "3.9.0" +version = "3.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d314cc62af2b6b0c65780555abb4d02a03dd3b799cd42419044f0c38d99738c0" +checksum = "519bd3116aeeb42d5372c29d982d16d0170d3d4a5ed85fc7dd91642ffff3c67c" dependencies = [ - "darling 0.20.11", + "darling 0.23.0", "ident_case", "prettyplease", "proc-macro2", @@ -1131,9 +1179,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.56" +version = "1.2.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2" +checksum = "7a0dd1ca384932ff3641c8718a02769f1698e7563dc6974ffd03346116310423" dependencies = [ "find-msvc-tools", "jobserver", @@ -1189,9 +1237,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.60" +version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2797f34da339ce31042b27d23607e051786132987f595b02ba4f6a6dffb7030a" +checksum = "b193af5b67834b676abd72466a96c1024e6a6ad978a1f484bd90b85c94041351" dependencies = [ "clap_builder", "clap_derive", @@ -1199,11 +1247,11 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.60" +version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24a241312cea5059b13574bb9b3861cabf758b879c15190b37b6d6fd63ab6876" +checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" dependencies = [ - "anstream", + "anstream 1.0.0", "anstyle", "clap_lex", "strsim", @@ -1211,9 +1259,9 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.55" +version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5" +checksum = "1110bd8a634a1ab8cb04345d8d878267d57c3cf1b38d91b71af6686408bbca6a" dependencies = [ "heck", "proc-macro2", @@ -1223,9 +1271,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "1.0.0" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831" +checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" [[package]] name = "clipboard-win" @@ -1247,9 +1295,9 @@ dependencies = [ [[package]] name = "colorchoice" -version = "1.0.4" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" [[package]] name = "colored" @@ -1257,7 +1305,7 @@ version = "3.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "faf9468729b8cbcea668e36183cb69d317348c2e08e994829fb56ebfdfbaac34" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.61.2", ] [[package]] @@ -1302,13 +1350,12 @@ dependencies = [ [[package]] name = "console" -version = "0.16.2" +version = "0.16.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03e45a4a8926227e4197636ba97a9fc9b00477e9f4bd711395687c5f0734bec4" +checksum = "d64e8af5551369d19cf50138de61f1c42074ab970f74e99be916646777f8fc87" dependencies = [ "encode_unicode", "libc", - "once_cell", "unicode-width 0.2.2", "windows-sys 0.61.2", ] @@ -1449,6 +1496,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" dependencies = [ "generic-array", + "rand_core 0.6.4", "typenum", ] @@ -1473,6 +1521,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "ctr" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0369ee1ad671834580515889b80f2ea915f23b8be8d0daa4bbaf2ac5c7590835" +dependencies = [ + "cipher", +] + [[package]] name = "darling" version = "0.20.11" @@ -1662,9 +1719,9 @@ dependencies = [ [[package]] name = "datafusion-cli" -version = "52.2.0" +version = "52.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8af8e5117e7bcac98fdbf4eb981562986f578b9d8a3c7eb91192dc955d450ee" +checksum = "d6cc57c2a8889e722be7913bb3c053c554f23abafa2e99005ad6fe84c765f7ce" dependencies = [ "arrow", "async-trait", @@ -2245,9 +2302,9 @@ dependencies = [ [[package]] name = "datafusion-spark" -version = "52.2.0" +version = "52.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15d28510abfc85709578fcf9065325d43ee3303012c0ccec2dce351bdc577d00" +checksum = "25f2e5519037772210eee5bb87a95dc953e1bd94bc2f9c9d6bb14b0c7fb9ab0a" dependencies = [ "arrow", "bigdecimal", @@ -2286,9 +2343,9 @@ dependencies = [ [[package]] name = "datafusion-sqllogictest" -version = "52.2.0" +version = "52.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccb859e97759dcbff66b484bdf4f251f9a76784d3dd7883c124de57510b1e1c2" +checksum = "74e697441492ce35353b07842181f0f92765c5d6ac1daaead4974ecf20058247" dependencies = [ "arrow", "async-trait", @@ -2312,9 +2369,9 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "52.2.0" +version = "52.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "199790fd96e852997b30da4ff11109378c944841757d93875ea85fc69587ec91" +checksum = "fe00df31ca03a167d3e40054120930fe5fb689e66bc625b602fac7153b222aea" dependencies = [ "async-recursion", "async-trait", @@ -2418,7 +2475,7 @@ dependencies = [ "libc", "option-ext", "redox_users", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -2434,9 +2491,9 @@ dependencies = [ [[package]] name = "dissimilar" -version = "1.0.10" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8975ffdaa0ef3661bfe02dbdcc06c9f829dfafe6a3c474de366a8d5e44276921" +checksum = "aeda16ab4059c5fd2a83f2b9c9e9c981327b18aa8e3b313f7e6563799d4f093e" [[package]] name = "dlv-list" @@ -2540,7 +2597,7 @@ version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b2daee4ea451f429a58296525ddf28b45a3b64f1acf6587e2067437bb11e218d" dependencies = [ - "anstream", + "anstream 0.6.21", "anstyle", "env_filter", "jiff", @@ -2571,7 +2628,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -2898,24 +2955,34 @@ dependencies = [ "cfg-if", "js-sys", "libc", - "r-efi", + "r-efi 5.3.0", "wasip2", "wasm-bindgen", ] [[package]] name = "getrandom" -version = "0.4.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "139ef39800118c7683f2fd3c98c1b23c09ae076556b435f8e9064ae108aaeeec" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" dependencies = [ "cfg-if", "libc", - "r-efi", + "r-efi 6.0.0", "wasip2", "wasip3", ] +[[package]] +name = "ghash" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0d8a4362ccb29cb0b265253fb0a2728f592895ee6854fd9bc13f2ffda266ff1" +dependencies = [ + "opaque-debug", + "polyval", +] + [[package]] name = "glob" version = "0.3.3" @@ -3248,7 +3315,7 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2 0.5.10", + "socket2 0.6.3", "tokio", "tower-service", "tracing", @@ -3282,6 +3349,7 @@ dependencies = [ name = "iceberg" version = "0.9.0" dependencies = [ + "aes-gcm", "anyhow", "apache-avro", "array-init", @@ -3333,6 +3401,7 @@ dependencies = [ "typetag", "url", "uuid", + "zeroize", "zstd", ] @@ -3800,9 +3869,9 @@ checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" [[package]] name = "jiff" -version = "0.2.22" +version = "0.2.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "819b44bc7c87d9117eb522f14d46e918add69ff12713c475946b0a29363ed1c2" +checksum = "1a3546dc96b6d42c5f24902af9e2538e82e39ad350b0c766eb3fbf2d8f3d8359" dependencies = [ "jiff-static", "jiff-tzdb-platform", @@ -3810,14 +3879,14 @@ dependencies = [ "portable-atomic", "portable-atomic-util", "serde_core", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] name = "jiff-static" -version = "0.2.22" +version = "0.2.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "470252db18ecc35fd766c0891b1e3ec6cbbcd62507e85276c01bf75d8e94d4a1" +checksum = "2a8c8b344124222efd714b73bb41f8b5120b27a7cc1c75593a6ff768d9d05aa4" dependencies = [ "proc-macro2", "quote", @@ -3826,9 +3895,9 @@ dependencies = [ [[package]] name = "jiff-tzdb" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68971ebff725b9e2ca27a601c5eb38a4c5d64422c4cbab0c535f248087eda5c2" +checksum = "c900ef84826f1338a557697dc8fc601df9ca9af4ac137c7fb61d4c6f2dfd3076" [[package]] name = "jiff-tzdb-platform" @@ -3954,9 +4023,9 @@ checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7" [[package]] name = "libc" -version = "0.2.182" +version = "0.2.183" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" +checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" [[package]] name = "liblzma" @@ -4023,7 +4092,7 @@ version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5297962ef19edda4ce33aaa484386e0a5b3d7f2f4e037cbeee00503ef6b29d33" dependencies = [ - "anstream", + "anstream 0.6.21", "anstyle", "clap", "escape8259", @@ -4081,9 +4150,9 @@ checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" [[package]] name = "lz4_flex" -version = "0.12.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab6473172471198271ff72e9379150e9dfd70d8e533e0752a27e515b48dd375e" +checksum = "98c23545df7ecf1b16c303910a69b079e8e251d60f7dd2cc9b4177f2afaf1746" dependencies = [ "twox-hash", ] @@ -4104,6 +4173,12 @@ version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" +[[package]] +name = "memo-map" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38d1115007560874e373613744c6fba374c17688327a71c1476d1a5954cc857b" + [[package]] name = "memoffset" version = "0.9.1" @@ -4137,10 +4212,11 @@ dependencies = [ [[package]] name = "minijinja" -version = "2.17.1" +version = "2.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ea5ea1e90055f200af6b8e52a4a34e05e77e7fee953a9fb40c631efdc43cab1" +checksum = "328251e58ad8e415be6198888fc207502727dc77945806421ab34f35bf012e7d" dependencies = [ + "memo-map", "serde", ] @@ -4337,7 +4413,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -4414,9 +4490,9 @@ dependencies = [ [[package]] name = "num_enum" -version = "0.7.5" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1207a7e20ad57b847bbddc6776b968420d38292bbfe2089accff5e19e82454c" +checksum = "5d0bca838442ec211fa11de3a8b0e0e8f3a4522575b5c4c06ed722e005036f26" dependencies = [ "num_enum_derive", "rustversion", @@ -4424,9 +4500,9 @@ dependencies = [ [[package]] name = "num_enum_derive" -version = "0.7.5" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff32365de1b6743cb203b710788263c44a03de03802daf96092f2da4fe6ba4d7" +checksum = "680998035259dcfcafe653688bf2aa6d3e2dc05e98be6ab46afb089dc84f1df8" dependencies = [ "proc-macro-crate", "proc-macro2", @@ -4482,9 +4558,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.21.3" +version = "1.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" [[package]] name = "once_cell_polyfill" @@ -4492,6 +4568,12 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" +[[package]] +name = "opaque-debug" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381" + [[package]] name = "opendal" version = "0.55.0" @@ -4854,6 +4936,18 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6" +[[package]] +name = "polyval" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d1fe60d06143b2430aa532c94cfe9e29783047f06c0d7fd359a9a51b729fa25" +dependencies = [ + "cfg-if", + "cpufeatures", + "opaque-debug", + "universal-hash", +] + [[package]] name = "portable-atomic" version = "1.13.1" @@ -4862,9 +4956,9 @@ checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" [[package]] name = "portable-atomic-util" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a9db96d7fa8782dd8c15ce32ffe8680bbd1e978a43bf51a34d39483540495f5" +checksum = "091397be61a01d4be58e7841595bd4bfedb15f1cd54977d79b8271e94ed799a3" dependencies = [ "portable-atomic", ] @@ -4941,11 +5035,11 @@ dependencies = [ [[package]] name = "proc-macro-crate" -version = "3.4.0" +version = "3.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" +checksum = "e67ba7e9b2b56446f1d419b1d807906278ffa1a658a8a5d8a39dcb1f5a78614f" dependencies = [ - "toml_edit 0.23.10+spec-1.0.0", + "toml_edit 0.25.4+spec-1.1.0", ] [[package]] @@ -4974,7 +5068,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7" dependencies = [ "heck", - "itertools 0.13.0", + "itertools 0.14.0", "log", "multimap", "petgraph", @@ -4993,7 +5087,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" dependencies = [ "anyhow", - "itertools 0.13.0", + "itertools 0.14.0", "proc-macro2", "quote", "syn", @@ -5077,7 +5171,7 @@ dependencies = [ "quinn-udp", "rustc-hash", "rustls 0.23.37", - "socket2 0.5.10", + "socket2 0.6.3", "thiserror 2.0.18", "tokio", "tracing", @@ -5114,16 +5208,16 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.5.10", + "socket2 0.6.3", "tracing", "windows-sys 0.60.2", ] [[package]] name = "quote" -version = "1.0.44" +version = "1.0.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" dependencies = [ "proc-macro2", ] @@ -5134,6 +5228,12 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + [[package]] name = "radix_trie" version = "0.2.1" @@ -5556,7 +5656,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -5693,9 +5793,9 @@ dependencies = [ [[package]] name = "schannel" -version = "0.1.28" +version = "0.1.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "891d81b926048e76efe18581bf793546b4c0eaf8448d72be8de2bbee5fd166e1" +checksum = "91c1b7e4904c873ef0710c1f407dde2e6287de2bebc1bbbf7d430bb7cbffd939" dependencies = [ "windows-sys 0.61.2", ] @@ -6101,12 +6201,12 @@ dependencies = [ [[package]] name = "socket2" -version = "0.6.2" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86f4aa3ad99f2088c990dfa82d367e19cb29268ed67c574d10d0a4bfe71f07e0" +checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" dependencies = [ "libc", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -6542,10 +6642,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" dependencies = [ "fastrand", - "getrandom 0.3.4", + "getrandom 0.4.2", "once_cell", "rustix", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -6666,9 +6766,9 @@ dependencies = [ [[package]] name = "tinyvec" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa" +checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3" dependencies = [ "tinyvec_macros", ] @@ -6691,7 +6791,7 @@ dependencies = [ "parking_lot", "pin-project-lite", "signal-hook-registry", - "socket2 0.6.2", + "socket2 0.6.3", "tokio-macros", "windows-sys 0.61.2", ] @@ -6774,9 +6874,9 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "0.7.5+spec-1.1.0" +version = "1.0.0+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92e1cfed4a3038bc5a127e35a2d360f145e1f4b971b551a2ba5fd7aedf7e1347" +checksum = "32c2555c699578a4f59f0cc68e5116c8d7cabbd45e1409b989d4be085b53f13e" dependencies = [ "serde_core", ] @@ -6797,12 +6897,12 @@ dependencies = [ [[package]] name = "toml_edit" -version = "0.23.10+spec-1.0.0" +version = "0.25.4+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84c8b9f757e028cee9fa244aea147aab2a9ec09d5325a9b01e0a49730c2b5269" +checksum = "7193cbd0ce53dc966037f54351dbbcf0d5a642c7f0038c382ef9e677ce8c13f2" dependencies = [ "indexmap 2.13.0", - "toml_datetime 0.7.5+spec-1.1.0", + "toml_datetime 1.0.0+spec-1.1.0", "toml_parser", "winnow", ] @@ -6913,9 +7013,9 @@ dependencies = [ [[package]] name = "tracing-subscriber" -version = "0.3.22" +version = "0.3.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e" +checksum = "cb7f578e5945fb242538965c2d0b04418d38ec25c79d160cd279bf0731c8d319" dependencies = [ "nu-ansi-term", "sharded-slab", @@ -7097,6 +7197,16 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81e544489bf3d8ef66c953931f56617f423cd4b5494be343d9b9d3dda037b9a3" +[[package]] +name = "universal-hash" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc1de2c688dc15305988b563c3854064043356019f97a4b46276fe734c4f07ea" +dependencies = [ + "crypto-common", + "subtle", +] + [[package]] name = "unsafe-libyaml" version = "0.2.11" @@ -7145,7 +7255,7 @@ version = "1.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a68d3c8f01c0cfa54a75291d83601161799e4a89a39e0929f4b0354d88757a37" dependencies = [ - "getrandom 0.4.1", + "getrandom 0.4.2", "js-sys", "serde_core", "wasm-bindgen", @@ -7440,7 +7550,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.61.2", ] [[package]] @@ -7735,9 +7845,9 @@ checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" [[package]] name = "winnow" -version = "0.7.14" +version = "0.7.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" +checksum = "df79d97927682d2fd8adb29682d1140b343be4ac0f08fd68b7765d9c059d3945" dependencies = [ "memchr", ] @@ -7873,18 +7983,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.40" +version = "0.8.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a789c6e490b576db9f7e6b6d661bcc9799f7c0ac8352f56ea20193b2681532e5" +checksum = "f2578b716f8a7a858b7f02d5bd870c14bf4ddbbcf3a4c05414ba6503640505e3" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.40" +version = "0.8.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f65c489a7071a749c849713807783f70672b28094011623e200cb86dcb835953" +checksum = "7e6cc098ea4d3bd6246687de65af3f920c430e236bee1e3bf2e441463f08a02f" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 6a361ecbd8..6adef1c268 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,6 +40,7 @@ repository = "https://github.com/apache/iceberg-rust" rust-version = "1.92" [workspace.dependencies] +aes-gcm = "0.10" anyhow = "1.0.72" apache-avro = { version = "0.21", features = ["zstandard"] } array-init = "2" @@ -133,5 +134,6 @@ typetag = "0.2" url = "2.5.7" uuid = { version = "1.18", features = ["v7"] } volo = "0.10.6" +zeroize = "1.7" volo-thrift = "0.10.8" zstd = "0.13.3" diff --git a/crates/iceberg/Cargo.toml b/crates/iceberg/Cargo.toml index 41ee771617..aa1d0cd4a5 100644 --- a/crates/iceberg/Cargo.toml +++ b/crates/iceberg/Cargo.toml @@ -33,6 +33,7 @@ default = [] [dependencies] +aes-gcm = { workspace = true } anyhow = { workspace = true } apache-avro = { workspace = true } array-init = { workspace = true } @@ -78,6 +79,7 @@ typed-builder = { workspace = true } typetag = { workspace = true } url = { workspace = true } uuid = { workspace = true } +zeroize = { workspace = true } zstd = { workspace = true } [dev-dependencies] diff --git a/crates/iceberg/src/encryption/crypto.rs b/crates/iceberg/src/encryption/crypto.rs new file mode 100644 index 0000000000..d179b0097d --- /dev/null +++ b/crates/iceberg/src/encryption/crypto.rs @@ -0,0 +1,481 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Core cryptographic operations for Iceberg encryption. + +use std::str::FromStr; + +use aes_gcm::aead::{Aead, AeadCore, KeyInit, OsRng, Payload}; +use aes_gcm::{Aes128Gcm, Aes256Gcm, Key, Nonce}; +use zeroize::Zeroizing; + +use crate::{Error, ErrorKind, Result}; + +/// Supported encryption algorithms. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum EncryptionAlgorithm { + /// AES-128 in GCM mode + Aes128Gcm, + /// AES-256 in GCM mode + Aes256Gcm, +} + +impl EncryptionAlgorithm { + /// Returns the key length in bytes for this algorithm. + pub fn key_length(&self) -> usize { + match self { + Self::Aes128Gcm => 16, + Self::Aes256Gcm => 32, + } + } + + /// Returns the nonce/IV length in bytes for this algorithm. + pub fn nonce_length(&self) -> usize { + 12 // GCM uses 96-bit nonces + } + + /// Returns the string identifier for this algorithm. + pub fn as_str(&self) -> &'static str { + match self { + Self::Aes128Gcm => "AES_GCM_128", + Self::Aes256Gcm => "AES_GCM_256", + } + } +} + +impl FromStr for EncryptionAlgorithm { + type Err = Error; + + fn from_str(s: &str) -> Result { + match s { + "AES_GCM_128" | "AES128_GCM" => Ok(Self::Aes128Gcm), + "AES_GCM_256" | "AES256_GCM" => Ok(Self::Aes256Gcm), + _ => Err(Error::new( + ErrorKind::DataInvalid, + format!("Unsupported encryption algorithm: {s}"), + )), + } + } +} + +/// A secure encryption key that zeroes its memory on drop. +#[derive(Debug)] +pub struct SecureKey { + key: Zeroizing>, + algorithm: EncryptionAlgorithm, +} + +impl SecureKey { + /// Creates a new secure key with the specified algorithm. + /// + /// # Errors + /// Returns an error if the key length doesn't match the algorithm requirements. + pub fn new(key: Vec, algorithm: EncryptionAlgorithm) -> Result { + if key.len() != algorithm.key_length() { + return Err(Error::new( + ErrorKind::DataInvalid, + format!( + "Invalid key length for {:?}: expected {} bytes, got {}", + algorithm, + algorithm.key_length(), + key.len() + ), + )); + } + Ok(Self { + key: Zeroizing::new(key), + algorithm, + }) + } + + /// Generates a new random key for the specified algorithm. + pub fn generate(algorithm: EncryptionAlgorithm) -> Self { + let mut key = vec![0u8; algorithm.key_length()]; + use rand::RngCore; + OsRng.fill_bytes(&mut key); + Self { + key: Zeroizing::new(key), + algorithm, + } + } + + /// Returns the encryption algorithm for this key. + pub fn algorithm(&self) -> EncryptionAlgorithm { + self.algorithm + } + + /// Returns the key bytes. + pub fn as_bytes(&self) -> &[u8] { + &self.key + } +} + +/// AES-GCM encryptor for encrypting and decrypting data. +pub struct AesGcmEncryptor { + key: SecureKey, +} + +impl AesGcmEncryptor { + /// Creates a new encryptor with the specified key. + pub fn new(key: SecureKey) -> Self { + Self { key } + } + + /// Encrypts data using AES-GCM. + /// + /// # Arguments + /// * `plaintext` - The data to encrypt + /// * `aad` - Additional authenticated data (optional) + /// + /// # Returns + /// The encrypted data in the format: [12-byte nonce][ciphertext][16-byte auth tag] + /// This matches the Java implementation format for compatibility. + pub fn encrypt(&self, plaintext: &[u8], aad: Option<&[u8]>) -> Result> { + match self.key.algorithm() { + EncryptionAlgorithm::Aes128Gcm => self.encrypt_aes128_gcm(plaintext, aad), + EncryptionAlgorithm::Aes256Gcm => self.encrypt_aes256_gcm(plaintext, aad), + } + } + + /// Decrypts data using AES-GCM. + /// + /// # Arguments + /// * `ciphertext` - The encrypted data with format: [12-byte nonce][encrypted data][16-byte auth tag] + /// * `aad` - Additional authenticated data (must match encryption) + /// + /// # Returns + /// The decrypted plaintext. + pub fn decrypt(&self, ciphertext: &[u8], aad: Option<&[u8]>) -> Result> { + const NONCE_LEN: usize = 12; + const TAG_LEN: usize = 16; + + if ciphertext.len() < NONCE_LEN + TAG_LEN { + return Err(Error::new( + ErrorKind::DataInvalid, + format!( + "Ciphertext too short: expected at least {} bytes, got {}", + NONCE_LEN + TAG_LEN, + ciphertext.len() + ), + )); + } + + let nonce = &ciphertext[..NONCE_LEN]; + let encrypted_data = &ciphertext[NONCE_LEN..]; + match self.key.algorithm() { + EncryptionAlgorithm::Aes128Gcm => self.decrypt_aes128_gcm(nonce, encrypted_data, aad), + EncryptionAlgorithm::Aes256Gcm => self.decrypt_aes256_gcm(nonce, encrypted_data, aad), + } + } + + fn encrypt_aes128_gcm(&self, plaintext: &[u8], aad: Option<&[u8]>) -> Result> { + let key = Key::::from_slice(self.key.as_bytes()); + let cipher = Aes128Gcm::new(key); + let nonce = Aes128Gcm::generate_nonce(&mut OsRng); + + let ciphertext = if let Some(aad) = aad { + let payload = Payload { + msg: plaintext, + aad, + }; + cipher.encrypt(&nonce, payload).map_err(|e| { + Error::new(ErrorKind::Unexpected, "AES-128-GCM encryption failed") + .with_source(anyhow::anyhow!(e)) + })? + } else { + cipher.encrypt(&nonce, plaintext).map_err(|e| { + Error::new(ErrorKind::Unexpected, "AES-128-GCM encryption failed") + .with_source(anyhow::anyhow!(e)) + })? + }; + + // Prepend nonce to ciphertext (Java compatible format) + let mut result = Vec::with_capacity(nonce.len() + ciphertext.len()); + result.extend_from_slice(&nonce); + result.extend_from_slice(&ciphertext); + Ok(result) + } + + fn decrypt_aes128_gcm( + &self, + nonce: &[u8], + ciphertext: &[u8], + aad: Option<&[u8]>, + ) -> Result> { + let key = Key::::from_slice(self.key.as_bytes()); + let cipher = Aes128Gcm::new(key); + let nonce = Nonce::from_slice(nonce); + + let plaintext = if let Some(aad) = aad { + let payload = Payload { + msg: ciphertext, + aad, + }; + cipher.decrypt(nonce, payload).map_err(|e| { + Error::new(ErrorKind::Unexpected, "AES-128-GCM decryption failed") + .with_source(anyhow::anyhow!(e)) + })? + } else { + cipher.decrypt(nonce, ciphertext).map_err(|e| { + Error::new(ErrorKind::Unexpected, "AES-128-GCM decryption failed") + .with_source(anyhow::anyhow!(e)) + })? + }; + + Ok(plaintext) + } + + fn encrypt_aes256_gcm(&self, plaintext: &[u8], aad: Option<&[u8]>) -> Result> { + let key = Key::::from_slice(self.key.as_bytes()); + let cipher = Aes256Gcm::new(key); + let nonce = Aes256Gcm::generate_nonce(&mut OsRng); + + let ciphertext = if let Some(aad) = aad { + let payload = Payload { + msg: plaintext, + aad, + }; + cipher.encrypt(&nonce, payload).map_err(|e| { + Error::new(ErrorKind::Unexpected, "AES-256-GCM encryption failed") + .with_source(anyhow::anyhow!(e)) + })? + } else { + cipher.encrypt(&nonce, plaintext).map_err(|e| { + Error::new(ErrorKind::Unexpected, "AES-256-GCM encryption failed") + .with_source(anyhow::anyhow!(e)) + })? + }; + + // Prepend nonce to ciphertext (Java compatible format) + let mut result = Vec::with_capacity(nonce.len() + ciphertext.len()); + result.extend_from_slice(&nonce); + result.extend_from_slice(&ciphertext); + Ok(result) + } + + fn decrypt_aes256_gcm( + &self, + nonce: &[u8], + ciphertext: &[u8], + aad: Option<&[u8]>, + ) -> Result> { + let key = Key::::from_slice(self.key.as_bytes()); + let cipher = Aes256Gcm::new(key); + let nonce = Nonce::from_slice(nonce); + + let plaintext = if let Some(aad) = aad { + let payload = Payload { + msg: ciphertext, + aad, + }; + cipher.decrypt(nonce, payload).map_err(|e| { + Error::new(ErrorKind::Unexpected, "AES-256-GCM decryption failed") + .with_source(anyhow::anyhow!(e)) + })? + } else { + cipher.decrypt(nonce, ciphertext).map_err(|e| { + Error::new(ErrorKind::Unexpected, "AES-256-GCM decryption failed") + .with_source(anyhow::anyhow!(e)) + })? + }; + + Ok(plaintext) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_encryption_algorithm() { + assert_eq!(EncryptionAlgorithm::Aes128Gcm.key_length(), 16); + assert_eq!(EncryptionAlgorithm::Aes256Gcm.key_length(), 32); + assert_eq!(EncryptionAlgorithm::Aes128Gcm.nonce_length(), 12); + assert_eq!(EncryptionAlgorithm::Aes256Gcm.nonce_length(), 12); + + assert_eq!( + EncryptionAlgorithm::from_str("AES_GCM_128").unwrap(), + EncryptionAlgorithm::Aes128Gcm + ); + assert_eq!( + EncryptionAlgorithm::from_str("AES128_GCM").unwrap(), + EncryptionAlgorithm::Aes128Gcm + ); + assert_eq!( + EncryptionAlgorithm::from_str("AES_GCM_256").unwrap(), + EncryptionAlgorithm::Aes256Gcm + ); + assert_eq!( + EncryptionAlgorithm::from_str("AES256_GCM").unwrap(), + EncryptionAlgorithm::Aes256Gcm + ); + + assert!(EncryptionAlgorithm::from_str("INVALID").is_err()); + + assert_eq!(EncryptionAlgorithm::Aes128Gcm.as_str(), "AES_GCM_128"); + assert_eq!(EncryptionAlgorithm::Aes256Gcm.as_str(), "AES_GCM_256"); + } + + #[test] + fn test_secure_key() { + // Test key generation + let key1 = SecureKey::generate(EncryptionAlgorithm::Aes128Gcm); + assert_eq!(key1.as_bytes().len(), 16); + assert_eq!(key1.algorithm(), EncryptionAlgorithm::Aes128Gcm); + + let key2 = SecureKey::generate(EncryptionAlgorithm::Aes256Gcm); + assert_eq!(key2.as_bytes().len(), 32); + assert_eq!(key2.algorithm(), EncryptionAlgorithm::Aes256Gcm); + + // Test key creation with validation + let valid_key = vec![0u8; 16]; + assert!(SecureKey::new(valid_key, EncryptionAlgorithm::Aes128Gcm).is_ok()); + + let invalid_key = vec![0u8; 32]; + assert!(SecureKey::new(invalid_key, EncryptionAlgorithm::Aes128Gcm).is_err()); + } + + #[test] + fn test_aes128_gcm_encryption_roundtrip() { + let key = SecureKey::generate(EncryptionAlgorithm::Aes128Gcm); + let encryptor = AesGcmEncryptor::new(key); + + let plaintext = b"Hello, Iceberg encryption!"; + let aad = b"additional authenticated data"; + + // Test without AAD + let ciphertext = encryptor.encrypt(plaintext, None).unwrap(); + assert!(ciphertext.len() > plaintext.len() + 12); // nonce + tag + assert_ne!(&ciphertext[12..], plaintext); // encrypted portion differs + + let decrypted = encryptor.decrypt(&ciphertext, None).unwrap(); + assert_eq!(decrypted, plaintext); + + // Test with AAD + let ciphertext = encryptor.encrypt(plaintext, Some(aad)).unwrap(); + let decrypted = encryptor.decrypt(&ciphertext, Some(aad)).unwrap(); + assert_eq!(decrypted, plaintext); + + // Test with wrong AAD fails + assert!(encryptor.decrypt(&ciphertext, Some(b"wrong aad")).is_err()); + } + + #[test] + fn test_aes256_gcm_encryption_roundtrip() { + let key = SecureKey::generate(EncryptionAlgorithm::Aes256Gcm); + let encryptor = AesGcmEncryptor::new(key); + + let plaintext = b"Testing AES-256-GCM encryption"; + let aad = b"metadata"; + + // Test without AAD + let ciphertext = encryptor.encrypt(plaintext, None).unwrap(); + assert!(ciphertext.len() > plaintext.len() + 12); // nonce + tag + assert_ne!(&ciphertext[12..], plaintext); // encrypted portion differs + + let decrypted = encryptor.decrypt(&ciphertext, None).unwrap(); + assert_eq!(decrypted, plaintext); + + // Test with AAD + let ciphertext = encryptor.encrypt(plaintext, Some(aad)).unwrap(); + let decrypted = encryptor.decrypt(&ciphertext, Some(aad)).unwrap(); + assert_eq!(decrypted, plaintext); + + // Test with wrong AAD fails + assert!(encryptor.decrypt(&ciphertext, None).is_err()); + } + + #[test] + fn test_encryption_with_empty_plaintext() { + let key = SecureKey::generate(EncryptionAlgorithm::Aes128Gcm); + let encryptor = AesGcmEncryptor::new(key); + + let plaintext = b""; + let ciphertext = encryptor.encrypt(plaintext, None).unwrap(); + + // Even empty plaintext produces nonce + tag + assert_eq!(ciphertext.len(), 12 + 16); // 12-byte nonce + 16-byte tag + + let decrypted = encryptor.decrypt(&ciphertext, None).unwrap(); + assert_eq!(decrypted, plaintext); + } + + #[test] + fn test_decryption_with_tampered_ciphertext() { + let key = SecureKey::generate(EncryptionAlgorithm::Aes128Gcm); + let encryptor = AesGcmEncryptor::new(key); + + let plaintext = b"Sensitive data"; + let mut ciphertext = encryptor.encrypt(plaintext, None).unwrap(); + + // Tamper with the encrypted portion (after the nonce) + if ciphertext.len() > 12 { + ciphertext[12] ^= 0xFF; + } + + // Decryption should fail due to authentication tag mismatch + assert!(encryptor.decrypt(&ciphertext, None).is_err()); + } + + #[test] + fn test_different_keys_produce_different_ciphertexts() { + let key1 = SecureKey::generate(EncryptionAlgorithm::Aes128Gcm); + let key2 = SecureKey::generate(EncryptionAlgorithm::Aes128Gcm); + + let encryptor1 = AesGcmEncryptor::new(key1); + let encryptor2 = AesGcmEncryptor::new(key2); + + let plaintext = b"Same plaintext"; + + let ciphertext1 = encryptor1.encrypt(plaintext, None).unwrap(); + let ciphertext2 = encryptor2.encrypt(plaintext, None).unwrap(); + + // Different keys should produce different ciphertexts (comparing the encrypted portion) + // Note: The nonces will also be different, but we're mainly interested in the encrypted data + assert_ne!(&ciphertext1[12..], &ciphertext2[12..]); + } + + #[test] + fn test_ciphertext_format_java_compatible() { + // Test that our ciphertext format matches Java's: [12-byte nonce][ciphertext][16-byte tag] + let key = SecureKey::generate(EncryptionAlgorithm::Aes128Gcm); + let encryptor = AesGcmEncryptor::new(key); + + let plaintext = b"Test data"; + let ciphertext = encryptor.encrypt(plaintext, None).unwrap(); + + // Format should be: [12-byte nonce][encrypted_data + 16-byte GCM tag] + assert_eq!( + ciphertext.len(), + 12 + plaintext.len() + 16, + "Ciphertext should be nonce + plaintext + tag length" + ); + + // Verify we can decrypt by extracting nonce from the beginning + let nonce = &ciphertext[..12]; + assert_eq!(nonce.len(), 12, "Nonce should be 12 bytes"); + + // The rest is encrypted data + tag + let encrypted_with_tag = &ciphertext[12..]; + assert_eq!( + encrypted_with_tag.len(), + plaintext.len() + 16, + "Encrypted portion should be plaintext length + 16-byte tag" + ); + } +} diff --git a/crates/iceberg/src/encryption/mod.rs b/crates/iceberg/src/encryption/mod.rs new file mode 100644 index 0000000000..496209d591 --- /dev/null +++ b/crates/iceberg/src/encryption/mod.rs @@ -0,0 +1,25 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Encryption module for Apache Iceberg. +//! +//! This module provides core cryptographic primitives for encrypting +//! and decrypting data in Iceberg tables. + +mod crypto; + +pub use crypto::{AesGcmEncryptor, EncryptionAlgorithm, SecureKey}; diff --git a/crates/iceberg/src/lib.rs b/crates/iceberg/src/lib.rs index 8b345deb6e..0b138d2818 100644 --- a/crates/iceberg/src/lib.rs +++ b/crates/iceberg/src/lib.rs @@ -92,6 +92,7 @@ mod runtime; pub mod arrow; pub(crate) mod delete_file_index; +pub mod encryption; pub mod test_utils; mod utils; pub mod writer; From a5d429afffcce10a8948e159c51ca437c40d689a Mon Sep 17 00:00:00 2001 From: Xander Date: Wed, 14 Jan 2026 21:15:03 +0000 Subject: [PATCH 02/11] remove 256 --- crates/iceberg/src/encryption/crypto.rs | 112 ++---------------------- 1 file changed, 6 insertions(+), 106 deletions(-) diff --git a/crates/iceberg/src/encryption/crypto.rs b/crates/iceberg/src/encryption/crypto.rs index d179b0097d..9480d4d984 100644 --- a/crates/iceberg/src/encryption/crypto.rs +++ b/crates/iceberg/src/encryption/crypto.rs @@ -20,18 +20,18 @@ use std::str::FromStr; use aes_gcm::aead::{Aead, AeadCore, KeyInit, OsRng, Payload}; -use aes_gcm::{Aes128Gcm, Aes256Gcm, Key, Nonce}; +use aes_gcm::{Aes128Gcm, Key, Nonce}; use zeroize::Zeroizing; use crate::{Error, ErrorKind, Result}; -/// Supported encryption algorithms. +/// Supported encryption algorithm. +/// Currently only AES-128-GCM is supported as it's the only algorithm +/// compatible with arrow-rs Parquet encryption. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum EncryptionAlgorithm { /// AES-128 in GCM mode Aes128Gcm, - /// AES-256 in GCM mode - Aes256Gcm, } impl EncryptionAlgorithm { @@ -39,7 +39,6 @@ impl EncryptionAlgorithm { pub fn key_length(&self) -> usize { match self { Self::Aes128Gcm => 16, - Self::Aes256Gcm => 32, } } @@ -52,7 +51,6 @@ impl EncryptionAlgorithm { pub fn as_str(&self) -> &'static str { match self { Self::Aes128Gcm => "AES_GCM_128", - Self::Aes256Gcm => "AES_GCM_256", } } } @@ -63,7 +61,6 @@ impl FromStr for EncryptionAlgorithm { fn from_str(s: &str) -> Result { match s { "AES_GCM_128" | "AES128_GCM" => Ok(Self::Aes128Gcm), - "AES_GCM_256" | "AES256_GCM" => Ok(Self::Aes256Gcm), _ => Err(Error::new( ErrorKind::DataInvalid, format!("Unsupported encryption algorithm: {s}"), @@ -147,7 +144,6 @@ impl AesGcmEncryptor { pub fn encrypt(&self, plaintext: &[u8], aad: Option<&[u8]>) -> Result> { match self.key.algorithm() { EncryptionAlgorithm::Aes128Gcm => self.encrypt_aes128_gcm(plaintext, aad), - EncryptionAlgorithm::Aes256Gcm => self.encrypt_aes256_gcm(plaintext, aad), } } @@ -178,7 +174,6 @@ impl AesGcmEncryptor { let encrypted_data = &ciphertext[NONCE_LEN..]; match self.key.algorithm() { EncryptionAlgorithm::Aes128Gcm => self.decrypt_aes128_gcm(nonce, encrypted_data, aad), - EncryptionAlgorithm::Aes256Gcm => self.decrypt_aes256_gcm(nonce, encrypted_data, aad), } } @@ -238,63 +233,6 @@ impl AesGcmEncryptor { Ok(plaintext) } - - fn encrypt_aes256_gcm(&self, plaintext: &[u8], aad: Option<&[u8]>) -> Result> { - let key = Key::::from_slice(self.key.as_bytes()); - let cipher = Aes256Gcm::new(key); - let nonce = Aes256Gcm::generate_nonce(&mut OsRng); - - let ciphertext = if let Some(aad) = aad { - let payload = Payload { - msg: plaintext, - aad, - }; - cipher.encrypt(&nonce, payload).map_err(|e| { - Error::new(ErrorKind::Unexpected, "AES-256-GCM encryption failed") - .with_source(anyhow::anyhow!(e)) - })? - } else { - cipher.encrypt(&nonce, plaintext).map_err(|e| { - Error::new(ErrorKind::Unexpected, "AES-256-GCM encryption failed") - .with_source(anyhow::anyhow!(e)) - })? - }; - - // Prepend nonce to ciphertext (Java compatible format) - let mut result = Vec::with_capacity(nonce.len() + ciphertext.len()); - result.extend_from_slice(&nonce); - result.extend_from_slice(&ciphertext); - Ok(result) - } - - fn decrypt_aes256_gcm( - &self, - nonce: &[u8], - ciphertext: &[u8], - aad: Option<&[u8]>, - ) -> Result> { - let key = Key::::from_slice(self.key.as_bytes()); - let cipher = Aes256Gcm::new(key); - let nonce = Nonce::from_slice(nonce); - - let plaintext = if let Some(aad) = aad { - let payload = Payload { - msg: ciphertext, - aad, - }; - cipher.decrypt(nonce, payload).map_err(|e| { - Error::new(ErrorKind::Unexpected, "AES-256-GCM decryption failed") - .with_source(anyhow::anyhow!(e)) - })? - } else { - cipher.decrypt(nonce, ciphertext).map_err(|e| { - Error::new(ErrorKind::Unexpected, "AES-256-GCM decryption failed") - .with_source(anyhow::anyhow!(e)) - })? - }; - - Ok(plaintext) - } } #[cfg(test)] @@ -304,9 +242,7 @@ mod tests { #[test] fn test_encryption_algorithm() { assert_eq!(EncryptionAlgorithm::Aes128Gcm.key_length(), 16); - assert_eq!(EncryptionAlgorithm::Aes256Gcm.key_length(), 32); assert_eq!(EncryptionAlgorithm::Aes128Gcm.nonce_length(), 12); - assert_eq!(EncryptionAlgorithm::Aes256Gcm.nonce_length(), 12); assert_eq!( EncryptionAlgorithm::from_str("AES_GCM_128").unwrap(), @@ -316,19 +252,12 @@ mod tests { EncryptionAlgorithm::from_str("AES128_GCM").unwrap(), EncryptionAlgorithm::Aes128Gcm ); - assert_eq!( - EncryptionAlgorithm::from_str("AES_GCM_256").unwrap(), - EncryptionAlgorithm::Aes256Gcm - ); - assert_eq!( - EncryptionAlgorithm::from_str("AES256_GCM").unwrap(), - EncryptionAlgorithm::Aes256Gcm - ); assert!(EncryptionAlgorithm::from_str("INVALID").is_err()); + assert!(EncryptionAlgorithm::from_str("AES_GCM_256").is_err()); + assert!(EncryptionAlgorithm::from_str("AES256_GCM").is_err()); assert_eq!(EncryptionAlgorithm::Aes128Gcm.as_str(), "AES_GCM_128"); - assert_eq!(EncryptionAlgorithm::Aes256Gcm.as_str(), "AES_GCM_256"); } #[test] @@ -338,10 +267,6 @@ mod tests { assert_eq!(key1.as_bytes().len(), 16); assert_eq!(key1.algorithm(), EncryptionAlgorithm::Aes128Gcm); - let key2 = SecureKey::generate(EncryptionAlgorithm::Aes256Gcm); - assert_eq!(key2.as_bytes().len(), 32); - assert_eq!(key2.algorithm(), EncryptionAlgorithm::Aes256Gcm); - // Test key creation with validation let valid_key = vec![0u8; 16]; assert!(SecureKey::new(valid_key, EncryptionAlgorithm::Aes128Gcm).is_ok()); @@ -375,31 +300,6 @@ mod tests { assert!(encryptor.decrypt(&ciphertext, Some(b"wrong aad")).is_err()); } - #[test] - fn test_aes256_gcm_encryption_roundtrip() { - let key = SecureKey::generate(EncryptionAlgorithm::Aes256Gcm); - let encryptor = AesGcmEncryptor::new(key); - - let plaintext = b"Testing AES-256-GCM encryption"; - let aad = b"metadata"; - - // Test without AAD - let ciphertext = encryptor.encrypt(plaintext, None).unwrap(); - assert!(ciphertext.len() > plaintext.len() + 12); // nonce + tag - assert_ne!(&ciphertext[12..], plaintext); // encrypted portion differs - - let decrypted = encryptor.decrypt(&ciphertext, None).unwrap(); - assert_eq!(decrypted, plaintext); - - // Test with AAD - let ciphertext = encryptor.encrypt(plaintext, Some(aad)).unwrap(); - let decrypted = encryptor.decrypt(&ciphertext, Some(aad)).unwrap(); - assert_eq!(decrypted, plaintext); - - // Test with wrong AAD fails - assert!(encryptor.decrypt(&ciphertext, None).is_err()); - } - #[test] fn test_encryption_with_empty_plaintext() { let key = SecureKey::generate(EncryptionAlgorithm::Aes128Gcm); From 8d40dd8cfada8eac235b23f47408d2427dd4f026 Mon Sep 17 00:00:00 2001 From: Xander Date: Tue, 17 Feb 2026 09:32:19 +0000 Subject: [PATCH 03/11] remove debug trat from key --- crates/iceberg/src/encryption/crypto.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/iceberg/src/encryption/crypto.rs b/crates/iceberg/src/encryption/crypto.rs index 9480d4d984..5e2bcb7cf3 100644 --- a/crates/iceberg/src/encryption/crypto.rs +++ b/crates/iceberg/src/encryption/crypto.rs @@ -70,7 +70,6 @@ impl FromStr for EncryptionAlgorithm { } /// A secure encryption key that zeroes its memory on drop. -#[derive(Debug)] pub struct SecureKey { key: Zeroizing>, algorithm: EncryptionAlgorithm, From 71d22c9b77371d80dc412d408f251fbdd4345c5d Mon Sep 17 00:00:00 2001 From: Xander Date: Tue, 17 Feb 2026 09:35:45 +0000 Subject: [PATCH 04/11] use at top of module --- Cargo.toml | 2 +- crates/iceberg/src/encryption/crypto.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 6adef1c268..458b73ae1e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -134,6 +134,6 @@ typetag = "0.2" url = "2.5.7" uuid = { version = "1.18", features = ["v7"] } volo = "0.10.6" -zeroize = "1.7" volo-thrift = "0.10.8" +zeroize = "1.7" zstd = "0.13.3" diff --git a/crates/iceberg/src/encryption/crypto.rs b/crates/iceberg/src/encryption/crypto.rs index 5e2bcb7cf3..5cd4ab609a 100644 --- a/crates/iceberg/src/encryption/crypto.rs +++ b/crates/iceberg/src/encryption/crypto.rs @@ -21,6 +21,7 @@ use std::str::FromStr; use aes_gcm::aead::{Aead, AeadCore, KeyInit, OsRng, Payload}; use aes_gcm::{Aes128Gcm, Key, Nonce}; +use rand::RngCore; use zeroize::Zeroizing; use crate::{Error, ErrorKind, Result}; @@ -101,7 +102,6 @@ impl SecureKey { /// Generates a new random key for the specified algorithm. pub fn generate(algorithm: EncryptionAlgorithm) -> Self { let mut key = vec![0u8; algorithm.key_length()]; - use rand::RngCore; OsRng.fill_bytes(&mut key); Self { key: Zeroizing::new(key), From 2b4705bfe56c324e2ade7496d2456fe611fa0159 Mon Sep 17 00:00:00 2001 From: Xander Date: Tue, 17 Feb 2026 09:52:26 +0000 Subject: [PATCH 05/11] remove constants --- crates/iceberg/src/encryption/crypto.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/crates/iceberg/src/encryption/crypto.rs b/crates/iceberg/src/encryption/crypto.rs index 5cd4ab609a..8412456df5 100644 --- a/crates/iceberg/src/encryption/crypto.rs +++ b/crates/iceberg/src/encryption/crypto.rs @@ -19,7 +19,8 @@ use std::str::FromStr; -use aes_gcm::aead::{Aead, AeadCore, KeyInit, OsRng, Payload}; +use aes_gcm::aead::generic_array::typenum::Unsigned; +use aes_gcm::aead::{Aead, AeadCore, KeyInit, KeySizeUser, OsRng, Payload}; use aes_gcm::{Aes128Gcm, Key, Nonce}; use rand::RngCore; use zeroize::Zeroizing; @@ -39,13 +40,15 @@ impl EncryptionAlgorithm { /// Returns the key length in bytes for this algorithm. pub fn key_length(&self) -> usize { match self { - Self::Aes128Gcm => 16, + Self::Aes128Gcm => ::KeySize::USIZE, } } /// Returns the nonce/IV length in bytes for this algorithm. pub fn nonce_length(&self) -> usize { - 12 // GCM uses 96-bit nonces + match self { + Self::Aes128Gcm => ::NonceSize::USIZE, + } } /// Returns the string identifier for this algorithm. From a8f2cd952898a2957fd69b97b3e3ca2ddf2cf57e Mon Sep 17 00:00:00 2001 From: Xander Date: Wed, 18 Feb 2026 21:44:00 +0000 Subject: [PATCH 06/11] make it better --- Cargo.lock | 2 ++ Cargo.toml | 1 + crates/iceberg/Cargo.toml | 1 + crates/iceberg/src/encryption/crypto.rs | 43 ++++++++++++++++++------- 4 files changed, 35 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 28fe47aefe..1663bd2469 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -27,6 +27,7 @@ dependencies = [ "cfg-if", "cipher", "cpufeatures", + "zeroize", ] [[package]] @@ -3349,6 +3350,7 @@ dependencies = [ name = "iceberg" version = "0.9.0" dependencies = [ + "aes", "aes-gcm", "anyhow", "apache-avro", diff --git a/Cargo.toml b/Cargo.toml index 458b73ae1e..70b5fab397 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,6 +40,7 @@ repository = "https://github.com/apache/iceberg-rust" rust-version = "1.92" [workspace.dependencies] +aes = { version = "0.8", features = ["zeroize"] } aes-gcm = "0.10" anyhow = "1.0.72" apache-avro = { version = "0.21", features = ["zstandard"] } diff --git a/crates/iceberg/Cargo.toml b/crates/iceberg/Cargo.toml index aa1d0cd4a5..48f22cec00 100644 --- a/crates/iceberg/Cargo.toml +++ b/crates/iceberg/Cargo.toml @@ -33,6 +33,7 @@ default = [] [dependencies] +aes = { workspace = true } aes-gcm = { workspace = true } anyhow = { workspace = true } apache-avro = { workspace = true } diff --git a/crates/iceberg/src/encryption/crypto.rs b/crates/iceberg/src/encryption/crypto.rs index 8412456df5..ebf23fbd00 100644 --- a/crates/iceberg/src/encryption/crypto.rs +++ b/crates/iceberg/src/encryption/crypto.rs @@ -20,9 +20,9 @@ use std::str::FromStr; use aes_gcm::aead::generic_array::typenum::Unsigned; +use aes_gcm::aead::rand_core::RngCore; use aes_gcm::aead::{Aead, AeadCore, KeyInit, KeySizeUser, OsRng, Payload}; use aes_gcm::{Aes128Gcm, Key, Nonce}; -use rand::RngCore; use zeroize::Zeroizing; use crate::{Error, ErrorKind, Result}; @@ -123,15 +123,30 @@ impl SecureKey { } } +enum CipherImpl { + Aes128Gcm(Aes128Gcm), +} + /// AES-GCM encryptor for encrypting and decrypting data. +/// +/// The cipher is initialized once at construction time. pub struct AesGcmEncryptor { - key: SecureKey, + cipher: CipherImpl, } impl AesGcmEncryptor { /// Creates a new encryptor with the specified key. + /// + /// The key schedule is expanded once here. The `aes` crate's `zeroize` + /// feature ensures the expanded key schedule is zeroed on drop. pub fn new(key: SecureKey) -> Self { - Self { key } + let cipher = match key.algorithm() { + EncryptionAlgorithm::Aes128Gcm => { + let aes_key = Key::::from_slice(key.as_bytes()); + CipherImpl::Aes128Gcm(Aes128Gcm::new(aes_key)) + } + }; + Self { cipher } } /// Encrypts data using AES-GCM. @@ -144,8 +159,8 @@ impl AesGcmEncryptor { /// The encrypted data in the format: [12-byte nonce][ciphertext][16-byte auth tag] /// This matches the Java implementation format for compatibility. pub fn encrypt(&self, plaintext: &[u8], aad: Option<&[u8]>) -> Result> { - match self.key.algorithm() { - EncryptionAlgorithm::Aes128Gcm => self.encrypt_aes128_gcm(plaintext, aad), + match &self.cipher { + CipherImpl::Aes128Gcm(cipher) => self.encrypt_aes128_gcm(cipher, plaintext, aad), } } @@ -174,14 +189,19 @@ impl AesGcmEncryptor { let nonce = &ciphertext[..NONCE_LEN]; let encrypted_data = &ciphertext[NONCE_LEN..]; - match self.key.algorithm() { - EncryptionAlgorithm::Aes128Gcm => self.decrypt_aes128_gcm(nonce, encrypted_data, aad), + match &self.cipher { + CipherImpl::Aes128Gcm(cipher) => { + self.decrypt_aes128_gcm(cipher, nonce, encrypted_data, aad) + } } } - fn encrypt_aes128_gcm(&self, plaintext: &[u8], aad: Option<&[u8]>) -> Result> { - let key = Key::::from_slice(self.key.as_bytes()); - let cipher = Aes128Gcm::new(key); + fn encrypt_aes128_gcm( + &self, + cipher: &Aes128Gcm, + plaintext: &[u8], + aad: Option<&[u8]>, + ) -> Result> { let nonce = Aes128Gcm::generate_nonce(&mut OsRng); let ciphertext = if let Some(aad) = aad { @@ -209,12 +229,11 @@ impl AesGcmEncryptor { fn decrypt_aes128_gcm( &self, + cipher: &Aes128Gcm, nonce: &[u8], ciphertext: &[u8], aad: Option<&[u8]>, ) -> Result> { - let key = Key::::from_slice(self.key.as_bytes()); - let cipher = Aes128Gcm::new(key); let nonce = Nonce::from_slice(nonce); let plaintext = if let Some(aad) = aad { From 57bd6ef368ee16ef25bdafd2f0ce8a3053f540e0 Mon Sep 17 00:00:00 2001 From: Xander Date: Wed, 18 Feb 2026 22:24:47 +0000 Subject: [PATCH 07/11] fix --- Cargo.lock | 2 -- crates/iceberg/Cargo.toml | 1 - 2 files changed, 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1663bd2469..28fe47aefe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -27,7 +27,6 @@ dependencies = [ "cfg-if", "cipher", "cpufeatures", - "zeroize", ] [[package]] @@ -3350,7 +3349,6 @@ dependencies = [ name = "iceberg" version = "0.9.0" dependencies = [ - "aes", "aes-gcm", "anyhow", "apache-avro", diff --git a/crates/iceberg/Cargo.toml b/crates/iceberg/Cargo.toml index 48f22cec00..aa1d0cd4a5 100644 --- a/crates/iceberg/Cargo.toml +++ b/crates/iceberg/Cargo.toml @@ -33,7 +33,6 @@ default = [] [dependencies] -aes = { workspace = true } aes-gcm = { workspace = true } anyhow = { workspace = true } apache-avro = { workspace = true } From b89f4de4750634ecc08085198865c62dbcd5efcd Mon Sep 17 00:00:00 2001 From: Xander Date: Mon, 2 Mar 2026 16:19:39 +0000 Subject: [PATCH 08/11] Add secure bytes type --- crates/iceberg/src/encryption/crypto.rs | 74 ++++++++++++++++++++++--- 1 file changed, 65 insertions(+), 9 deletions(-) diff --git a/crates/iceberg/src/encryption/crypto.rs b/crates/iceberg/src/encryption/crypto.rs index ebf23fbd00..29277f485e 100644 --- a/crates/iceberg/src/encryption/crypto.rs +++ b/crates/iceberg/src/encryption/crypto.rs @@ -17,6 +17,7 @@ //! Core cryptographic operations for Iceberg encryption. +use std::fmt; use std::str::FromStr; use aes_gcm::aead::generic_array::typenum::Unsigned; @@ -27,6 +28,47 @@ use zeroize::Zeroizing; use crate::{Error, ErrorKind, Result}; +/// Wrapper for sensitive byte data (encryption keys, DEKs, etc.) that: +/// - Zeroizes memory on drop +/// - Redacts content in [`Debug`] output +/// - Provides only `&[u8]` access via [`as_bytes()`](Self::as_bytes) +/// - Uses `Box<[u8]>` (immutable boxed slice) since key bytes never grow +/// +/// Use this type for any struct field that holds plaintext key material. +/// Because its [`Debug`] impl always prints `[N bytes REDACTED]`, structs +/// containing `SensitiveBytes` can safely derive or implement `Debug` +/// without risk of leaking key material. +#[derive(Clone, PartialEq, Eq)] +pub struct SensitiveBytes(Zeroizing>); + +impl SensitiveBytes { + /// Wraps the given bytes as sensitive material. + pub fn new(bytes: impl Into>) -> Self { + Self(Zeroizing::new(bytes.into())) + } + + /// Returns the underlying bytes. + pub fn as_bytes(&self) -> &[u8] { + &self.0 + } + + /// Returns the number of bytes. + pub fn len(&self) -> usize { + self.0.len() + } + + /// Returns `true` if the byte slice is empty. + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } +} + +impl fmt::Debug for SensitiveBytes { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "[{} bytes REDACTED]", self.0.len()) + } +} + /// Supported encryption algorithm. /// Currently only AES-128-GCM is supported as it's the only algorithm /// compatible with arrow-rs Parquet encryption. @@ -57,6 +99,20 @@ impl EncryptionAlgorithm { Self::Aes128Gcm => "AES_GCM_128", } } + + /// Returns the algorithm for a given DEK length in bytes. + /// + /// Matches Java's `encryption.data-key-length` property semantics: + /// 16 → AES-128-GCM. + pub fn from_key_length(len: usize) -> Result { + match len { + 16 => Ok(Self::Aes128Gcm), + _ => Err(Error::new( + ErrorKind::DataInvalid, + format!("Unsupported data key length: {len} (must be 16)"), + )), + } + } } impl FromStr for EncryptionAlgorithm { @@ -75,7 +131,7 @@ impl FromStr for EncryptionAlgorithm { /// A secure encryption key that zeroes its memory on drop. pub struct SecureKey { - key: Zeroizing>, + key: SensitiveBytes, algorithm: EncryptionAlgorithm, } @@ -84,7 +140,7 @@ impl SecureKey { /// /// # Errors /// Returns an error if the key length doesn't match the algorithm requirements. - pub fn new(key: Vec, algorithm: EncryptionAlgorithm) -> Result { + pub fn new(key: &[u8], algorithm: EncryptionAlgorithm) -> Result { if key.len() != algorithm.key_length() { return Err(Error::new( ErrorKind::DataInvalid, @@ -97,7 +153,7 @@ impl SecureKey { )); } Ok(Self { - key: Zeroizing::new(key), + key: SensitiveBytes::new(key), algorithm, }) } @@ -107,7 +163,7 @@ impl SecureKey { let mut key = vec![0u8; algorithm.key_length()]; OsRng.fill_bytes(&mut key); Self { - key: Zeroizing::new(key), + key: SensitiveBytes::new(key), algorithm, } } @@ -119,7 +175,7 @@ impl SecureKey { /// Returns the key bytes. pub fn as_bytes(&self) -> &[u8] { - &self.key + &self.key.as_bytes() } } @@ -289,11 +345,11 @@ mod tests { assert_eq!(key1.algorithm(), EncryptionAlgorithm::Aes128Gcm); // Test key creation with validation - let valid_key = vec![0u8; 16]; - assert!(SecureKey::new(valid_key, EncryptionAlgorithm::Aes128Gcm).is_ok()); + let valid_key = [0u8; 16]; + assert!(SecureKey::new(valid_key.as_slice(), EncryptionAlgorithm::Aes128Gcm).is_ok()); - let invalid_key = vec![0u8; 32]; - assert!(SecureKey::new(invalid_key, EncryptionAlgorithm::Aes128Gcm).is_err()); + let invalid_key = [0u8; 32]; + assert!(SecureKey::new(invalid_key.as_slice(), EncryptionAlgorithm::Aes128Gcm).is_err()); } #[test] From fda4e79265984683104bd68377ac090f3d31a558 Mon Sep 17 00:00:00 2001 From: Xander Date: Fri, 13 Mar 2026 10:38:52 +0000 Subject: [PATCH 09/11] clippy --- crates/iceberg/src/encryption/crypto.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/iceberg/src/encryption/crypto.rs b/crates/iceberg/src/encryption/crypto.rs index 29277f485e..2033f969a2 100644 --- a/crates/iceberg/src/encryption/crypto.rs +++ b/crates/iceberg/src/encryption/crypto.rs @@ -175,7 +175,7 @@ impl SecureKey { /// Returns the key bytes. pub fn as_bytes(&self) -> &[u8] { - &self.key.as_bytes() + self.key.as_bytes() } } From 169dc22fdef9c20245a3c84570640ac9c042d607 Mon Sep 17 00:00:00 2001 From: Xander Date: Fri, 13 Mar 2026 11:06:26 +0000 Subject: [PATCH 10/11] Allow unused --- crates/iceberg/src/encryption/crypto.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crates/iceberg/src/encryption/crypto.rs b/crates/iceberg/src/encryption/crypto.rs index 2033f969a2..eea5fd72a7 100644 --- a/crates/iceberg/src/encryption/crypto.rs +++ b/crates/iceberg/src/encryption/crypto.rs @@ -53,11 +53,13 @@ impl SensitiveBytes { } /// Returns the number of bytes. + #[allow(dead_code)] // Encryption work is ongoing to currently unused pub fn len(&self) -> usize { self.0.len() } /// Returns `true` if the byte slice is empty. + #[allow(dead_code)] // Encryption work is ongoing to currently unused pub fn is_empty(&self) -> bool { self.0.is_empty() } From 74b8bab945e15cbb172f28e124fb4625af109983 Mon Sep 17 00:00:00 2001 From: Xander Date: Wed, 18 Mar 2026 08:57:39 -0400 Subject: [PATCH 11/11] comments --- crates/iceberg/src/encryption/crypto.rs | 22 ++++++++++++++-------- crates/iceberg/src/encryption/mod.rs | 2 +- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/crates/iceberg/src/encryption/crypto.rs b/crates/iceberg/src/encryption/crypto.rs index eea5fd72a7..2314abc4a3 100644 --- a/crates/iceberg/src/encryption/crypto.rs +++ b/crates/iceberg/src/encryption/crypto.rs @@ -71,6 +71,12 @@ impl fmt::Debug for SensitiveBytes { } } +impl fmt::Display for SensitiveBytes { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "[{} bytes REDACTED]", self.0.len()) + } +} + /// Supported encryption algorithm. /// Currently only AES-128-GCM is supported as it's the only algorithm /// compatible with arrow-rs Parquet encryption. @@ -188,11 +194,11 @@ enum CipherImpl { /// AES-GCM encryptor for encrypting and decrypting data. /// /// The cipher is initialized once at construction time. -pub struct AesGcmEncryptor { +pub struct AesGcmCipher { cipher: CipherImpl, } -impl AesGcmEncryptor { +impl AesGcmCipher { /// Creates a new encryptor with the specified key. /// /// The key schedule is expanded once here. The `aes` crate's `zeroize` @@ -357,7 +363,7 @@ mod tests { #[test] fn test_aes128_gcm_encryption_roundtrip() { let key = SecureKey::generate(EncryptionAlgorithm::Aes128Gcm); - let encryptor = AesGcmEncryptor::new(key); + let encryptor = AesGcmCipher::new(key); let plaintext = b"Hello, Iceberg encryption!"; let aad = b"additional authenticated data"; @@ -382,7 +388,7 @@ mod tests { #[test] fn test_encryption_with_empty_plaintext() { let key = SecureKey::generate(EncryptionAlgorithm::Aes128Gcm); - let encryptor = AesGcmEncryptor::new(key); + let encryptor = AesGcmCipher::new(key); let plaintext = b""; let ciphertext = encryptor.encrypt(plaintext, None).unwrap(); @@ -397,7 +403,7 @@ mod tests { #[test] fn test_decryption_with_tampered_ciphertext() { let key = SecureKey::generate(EncryptionAlgorithm::Aes128Gcm); - let encryptor = AesGcmEncryptor::new(key); + let encryptor = AesGcmCipher::new(key); let plaintext = b"Sensitive data"; let mut ciphertext = encryptor.encrypt(plaintext, None).unwrap(); @@ -416,8 +422,8 @@ mod tests { let key1 = SecureKey::generate(EncryptionAlgorithm::Aes128Gcm); let key2 = SecureKey::generate(EncryptionAlgorithm::Aes128Gcm); - let encryptor1 = AesGcmEncryptor::new(key1); - let encryptor2 = AesGcmEncryptor::new(key2); + let encryptor1 = AesGcmCipher::new(key1); + let encryptor2 = AesGcmCipher::new(key2); let plaintext = b"Same plaintext"; @@ -433,7 +439,7 @@ mod tests { fn test_ciphertext_format_java_compatible() { // Test that our ciphertext format matches Java's: [12-byte nonce][ciphertext][16-byte tag] let key = SecureKey::generate(EncryptionAlgorithm::Aes128Gcm); - let encryptor = AesGcmEncryptor::new(key); + let encryptor = AesGcmCipher::new(key); let plaintext = b"Test data"; let ciphertext = encryptor.encrypt(plaintext, None).unwrap(); diff --git a/crates/iceberg/src/encryption/mod.rs b/crates/iceberg/src/encryption/mod.rs index 496209d591..e7a453a8d1 100644 --- a/crates/iceberg/src/encryption/mod.rs +++ b/crates/iceberg/src/encryption/mod.rs @@ -22,4 +22,4 @@ mod crypto; -pub use crypto::{AesGcmEncryptor, EncryptionAlgorithm, SecureKey}; +pub use crypto::{AesGcmCipher, EncryptionAlgorithm, SecureKey};