From e2f78dac67993bea8712e62f34e3f4e9ab59fe1e Mon Sep 17 00:00:00 2001 From: Jesse Jaggars Date: Mon, 1 Jun 2026 09:36:30 -0400 Subject: [PATCH 1/4] feat(sandbox): proxy-side AWS SigV4 credential signing for CONNECT tunnels Add proxy-side AWS SigV4 re-signing so sandbox clients can reach AWS services (Bedrock) through the CONNECT tunnel using placeholder credentials. The proxy strips the invalid signature, resolves real credentials from the SecretResolver, re-signs with the aws-sigv4 crate, and forwards. Configuration is policy-driven via two new fields (credential_signing, signing_service). Policy YAML example: credential_signing: sigv4 signing_service: bedrock Implementation: - sigv4.rs: strip_aws_headers removes old auth headers before the fail-closed placeholder scan; apply_sigv4_to_request re-signs using the aws-sigv4 SDK with PayloadChecksumKind::XAmzSha256 enabled. Returns Result instead of panicking. Non-signed headers (Accept, User-Agent, etc.) are preserved in the output. - rest.rs: SigV4 path buffers body (capped at MAX_REWRITE_BODY_BYTES) for signing, then forwards the re-signed request upstream. - Proto: credential_signing (field 19), signing_service (field 20) on NetworkEndpoint. - Policy/OPA: plumbed through serde, proto conversion, and Rego data. - Supports AWS session tokens (STS temporary credentials). - Integration test against real Bedrock (ignored, requires AWS creds). Co-Authored-By: Claude Opus 4.6 (1M context) --- Cargo.lock | 259 ++++++++++++--- architecture/sandbox.md | 6 + crates/openshell-policy/src/lib.rs | 8 + crates/openshell-providers/src/profiles.rs | 2 + crates/openshell-sandbox/Cargo.toml | 11 +- crates/openshell-sandbox/src/l7/mod.rs | 33 ++ crates/openshell-sandbox/src/l7/relay.rs | 12 + crates/openshell-sandbox/src/l7/rest.rs | 98 +++++- crates/openshell-sandbox/src/lib.rs | 1 + crates/openshell-sandbox/src/opa.rs | 65 ++++ crates/openshell-sandbox/src/policy_local.rs | 2 + crates/openshell-sandbox/src/proxy.rs | 10 + crates/openshell-sandbox/src/sigv4.rs | 303 ++++++++++++++++++ .../openshell-sandbox/tests/sigv4_signing.rs | 99 ++++++ proto/sandbox.proto | 7 + 15 files changed, 873 insertions(+), 43 deletions(-) create mode 100644 crates/openshell-sandbox/src/sigv4.rs create mode 100644 crates/openshell-sandbox/tests/sigv4_signing.rs diff --git a/Cargo.lock b/Cargo.lock index ad7efabc9..56238e4fd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -280,6 +280,18 @@ dependencies = [ "cc", ] +[[package]] +name = "aws-credential-types" +version = "1.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f20799b373a1be121fe3005fba0c2090af9411573878f224df44b42727fcaf7" +dependencies = [ + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "zeroize", +] + [[package]] name = "aws-lc-rs" version = "1.16.3" @@ -303,6 +315,112 @@ dependencies = [ "fs_extra", ] +[[package]] +name = "aws-sigv4" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0b660013a6683ab23797778e21f1f854744fdf05f68204b4cca4c8c04b5d1f4" +dependencies = [ + "aws-credential-types", + "aws-smithy-http", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "form_urlencoded", + "hex", + "hmac", + "http 0.2.12", + "http 1.4.0", + "percent-encoding", + "sha2 0.10.9", + "time", + "tracing", +] + +[[package]] +name = "aws-smithy-async" +version = "1.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ffcaf626bdda484571968400c326a244598634dc75fd451325a54ad1a59acfc" +dependencies = [ + "futures-util", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "aws-smithy-http" +version = "0.63.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba1ab2dc1c2c3749ead27180d333c42f11be8b0e934058fb4b2258ee8dbe5231" +dependencies = [ + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "bytes-utils", + "futures-core", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "percent-encoding", + "pin-project-lite", + "pin-utils", + "tracing", +] + +[[package]] +name = "aws-smithy-runtime-api" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc117c179ecf39a62a0a3f49f600e9ac26a7ad7dd172177999f83933af776c32" +dependencies = [ + "aws-smithy-async", + "aws-smithy-runtime-api-macros", + "aws-smithy-types", + "bytes", + "http 0.2.12", + "http 1.4.0", + "pin-project-lite", + "tokio", + "tracing", + "zeroize", +] + +[[package]] +name = "aws-smithy-runtime-api-macros" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d7396fd9500589e62e460e987ecb671bad374934e55ec3b5f498cc7a8a8a7b7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "aws-smithy-types" +version = "1.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "056b66dbce2f81cc0c1e2b05bb402eb58f8a3530479d650efadd5bbae9a4050b" +dependencies = [ + "base64-simd", + "bytes", + "bytes-utils", + "http 0.2.12", + "http 1.4.0", + "http-body 0.4.6", + "http-body 1.0.1", + "http-body-util", + "itoa", + "num-integer", + "pin-project-lite", + "pin-utils", + "ryu", + "serde", + "time", +] + [[package]] name = "axum" version = "0.7.9" @@ -313,8 +431,8 @@ dependencies = [ "axum-core 0.4.5", "bytes", "futures-util", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "http-body-util", "itoa", "matchit 0.7.3", @@ -341,8 +459,8 @@ dependencies = [ "bytes", "form_urlencoded", "futures-util", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "http-body-util", "hyper", "hyper-util", @@ -375,8 +493,8 @@ dependencies = [ "async-trait", "bytes", "futures-util", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "http-body-util", "mime", "pin-project-lite", @@ -394,8 +512,8 @@ checksum = "08c78f31d7b1291f7ee735c1c6780ccde7785daae9a9206026862dab7d8792d1" dependencies = [ "bytes", "futures-core", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "http-body-util", "mime", "pin-project-lite", @@ -464,6 +582,16 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "base64-simd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "339abbe78e73178762e23bea9dfd08e697eb3f3301cd4be981c0f78ba5859195" +dependencies = [ + "outref", + "vsimd", +] + [[package]] name = "base64ct" version = "1.8.3" @@ -566,7 +694,7 @@ dependencies = [ "futures-core", "futures-util", "hex", - "http", + "http 1.4.0", "http-body-util", "hyper", "hyper-named-pipe", @@ -626,6 +754,16 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" +[[package]] +name = "bytes-utils" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dafe3a8757b027e2be6e4e5601ed563c55989fcf1546e933c66c8eb3a058d35" +dependencies = [ + "bytes", + "either", +] + [[package]] name = "bzip2" version = "0.6.1" @@ -1854,7 +1992,7 @@ dependencies = [ "fnv", "futures-core", "futures-sink", - "http", + "http 1.4.0", "indexmap 2.14.0", "slab", "tokio", @@ -2001,6 +2139,17 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "http" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + [[package]] name = "http" version = "1.4.0" @@ -2020,6 +2169,17 @@ dependencies = [ "memchr", ] +[[package]] +name = "http-body" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +dependencies = [ + "bytes", + "http 0.2.12", + "pin-project-lite", +] + [[package]] name = "http-body" version = "1.0.1" @@ -2027,7 +2187,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", - "http", + "http 1.4.0", ] [[package]] @@ -2038,8 +2198,8 @@ checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" dependencies = [ "bytes", "futures-core", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "pin-project-lite", ] @@ -2075,8 +2235,8 @@ dependencies = [ "futures-channel", "futures-core", "h2", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "httparse", "httpdate", "itoa", @@ -2107,7 +2267,7 @@ version = "0.27.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33ca68d021ef39cf6463ab54c1d0f5daf03377b70561305bb89a8f83aab66e0f" dependencies = [ - "http", + "http 1.4.0", "hyper", "hyper-util", "log", @@ -2142,8 +2302,8 @@ dependencies = [ "bytes", "futures-channel", "futures-util", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "hyper", "ipnet", "libc", @@ -2657,8 +2817,8 @@ dependencies = [ "either", "futures", "home", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "http-body-util", "hyper", "hyper-rustls", @@ -2690,7 +2850,7 @@ checksum = "7845bcc3e0f422df4d9049570baedd9bc1942f0504594e393e72fe24092559cf" dependencies = [ "chrono", "form_urlencoded", - "http", + "http 1.4.0", "json-patch", "k8s-openapi", "schemars", @@ -3282,7 +3442,7 @@ dependencies = [ "base64 0.22.1", "chrono", "getrandom 0.2.17", - "http", + "http 1.4.0", "rand 0.8.6", "reqwest 0.12.28", "serde", @@ -3311,7 +3471,7 @@ dependencies = [ "bytes", "chrono", "futures-util", - "http", + "http 1.4.0", "http-auth", "jsonwebtoken 10.3.0", "lazy_static", @@ -3639,6 +3799,9 @@ version = "0.0.0" dependencies = [ "anyhow", "apollo-parser", + "aws-credential-types", + "aws-sigv4", + "aws-smithy-runtime-api", "base64 0.22.1", "bytes", "clap", @@ -3646,7 +3809,7 @@ dependencies = [ "futures", "glob", "hex", - "hmac", + "http 1.4.0", "ipnet", "landlock", "libc", @@ -3697,8 +3860,8 @@ dependencies = [ "futures-util", "hex", "hmac", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "http-body-util", "hyper", "hyper-rustls", @@ -3824,6 +3987,12 @@ dependencies = [ "num-traits", ] +[[package]] +name = "outref" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e" + [[package]] name = "owo-colors" version = "4.3.0" @@ -4076,6 +4245,12 @@ version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + [[package]] name = "pkcs1" version = "0.7.5" @@ -4616,8 +4791,8 @@ dependencies = [ "futures-channel", "futures-core", "futures-util", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "http-body-util", "hyper", "hyper-rustls", @@ -4656,8 +4831,8 @@ dependencies = [ "bytes", "futures-core", "futures-util", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "http-body-util", "hyper", "hyper-rustls", @@ -6224,8 +6399,8 @@ dependencies = [ "base64 0.22.1", "bytes", "h2", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "http-body-util", "hyper", "hyper-timeout", @@ -6304,8 +6479,8 @@ dependencies = [ "base64 0.21.7", "bitflags", "bytes", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "http-body-util", "mime", "pin-project-lite", @@ -6323,8 +6498,8 @@ dependencies = [ "bitflags", "bytes", "futures-util", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "iri-string", "pin-project-lite", "tower 0.5.3", @@ -6448,7 +6623,7 @@ checksum = "4793cb5e56680ecbb1d843515b23b6de9a75eb04b66643e256a396d43be33c13" dependencies = [ "bytes", "data-encoding", - "http", + "http 1.4.0", "httparse", "log", "rand 0.9.4", @@ -6467,7 +6642,7 @@ checksum = "6c01152af293afb9c7c2a57e4b559c5620b421f6d133261c60dd2d0cdb38e6b8" dependencies = [ "bytes", "data-encoding", - "http", + "http 1.4.0", "httparse", "log", "rand 0.9.4", @@ -6655,6 +6830,12 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "vsimd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" + [[package]] name = "walkdir" version = "2.5.0" @@ -7320,7 +7501,7 @@ dependencies = [ "base64 0.22.1", "deadpool", "futures", - "http", + "http 1.4.0", "http-body-util", "hyper", "hyper-util", diff --git a/architecture/sandbox.md b/architecture/sandbox.md index 4bc6803eb..468d13731 100644 --- a/architecture/sandbox.md +++ b/architecture/sandbox.md @@ -74,6 +74,12 @@ Credential placeholders in proxied HTTP requests can be resolved by the proxy when policy allows the target endpoint. Secrets must not be logged in OCSF or plain tracing output. +For AWS endpoints that require request-level signing, the proxy supports SigV4 +re-signing. When `credential_signing: sigv4` is set on an L7 endpoint, the proxy +strips the client's placeholder-based AWS auth headers, buffers the request body, +computes a fresh SigV4 signature using real credentials from the provider, and +forwards the re-signed request upstream. + ## Connect and Logs The supervisor runs an SSH server on a Unix socket inside the sandbox. The diff --git a/crates/openshell-policy/src/lib.rs b/crates/openshell-policy/src/lib.rs index 26c8fc9d3..b9c249fc3 100644 --- a/crates/openshell-policy/src/lib.rs +++ b/crates/openshell-policy/src/lib.rs @@ -135,6 +135,10 @@ struct NetworkEndpointDef { graphql_persisted_queries: BTreeMap, #[serde(default, skip_serializing_if = "is_zero_u32")] graphql_max_body_bytes: u32, + #[serde(default, skip_serializing_if = "String::is_empty")] + credential_signing: String, + #[serde(default, skip_serializing_if = "String::is_empty")] + signing_service: String, } // Signature dictated by serde's `skip_serializing_if`, which requires `&T`. @@ -347,6 +351,8 @@ fn to_proto(raw: PolicyFile) -> SandboxPolicy { }) .collect(), graphql_max_body_bytes: e.graphql_max_body_bytes, + credential_signing: e.credential_signing, + signing_service: e.signing_service, } }) .collect(), @@ -512,6 +518,8 @@ fn from_proto(policy: &SandboxPolicy) -> PolicyFile { }) .collect(), graphql_max_body_bytes: e.graphql_max_body_bytes, + credential_signing: e.credential_signing.clone(), + signing_service: e.signing_service.clone(), } }) .collect(), diff --git a/crates/openshell-providers/src/profiles.rs b/crates/openshell-providers/src/profiles.rs index 68cc06260..274a8b524 100644 --- a/crates/openshell-providers/src/profiles.rs +++ b/crates/openshell-providers/src/profiles.rs @@ -596,6 +596,8 @@ fn endpoint_to_proto(endpoint: &EndpointProfile) -> NetworkEndpoint { .collect(), graphql_max_body_bytes: endpoint.graphql_max_body_bytes, path: endpoint.path.clone(), + credential_signing: String::new(), + signing_service: String::new(), } } diff --git a/crates/openshell-sandbox/Cargo.toml b/crates/openshell-sandbox/Cargo.toml index 6d527bc53..2c62bbe8f 100644 --- a/crates/openshell-sandbox/Cargo.toml +++ b/crates/openshell-sandbox/Cargo.toml @@ -34,9 +34,14 @@ clap = { workspace = true } miette = { workspace = true } thiserror = { workspace = true } anyhow = { workspace = true } -hmac = "0.12" sha2 = { workspace = true } hex = "0.4" +http = { workspace = true } + +# AWS SigV4 request signing +aws-sigv4 = { version = "1", features = ["sign-http", "http1"] } +aws-credential-types = { version = "1", features = ["hardcoded-credentials"] } +aws-smithy-runtime-api = { version = "1", features = ["client"] } russh = "0.57" rand_core = "0.6" @@ -89,6 +94,10 @@ seccompiler = "0.5" tempfile = "3" uuid = { version = "1", features = ["v4"] } +[[test]] +name = "sigv4_signing" +path = "tests/sigv4_signing.rs" + [dev-dependencies] tempfile = "3" temp-env = "0.3" diff --git a/crates/openshell-sandbox/src/l7/mod.rs b/crates/openshell-sandbox/src/l7/mod.rs index 703aafae4..11c0a97ea 100644 --- a/crates/openshell-sandbox/src/l7/mod.rs +++ b/crates/openshell-sandbox/src/l7/mod.rs @@ -50,6 +50,14 @@ pub enum TlsMode { Skip, } +/// Credential signing mode for proxy-side request signing. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum CredentialSigning { + #[default] + None, + SigV4, +} + /// Enforcement mode for L7 policy decisions. #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] pub enum EnforcementMode { @@ -88,6 +96,11 @@ pub struct L7EndpointConfig { /// When true, client-to-server GraphQL-over-WebSocket operation messages /// are classified with the same operation policy used by GraphQL-over-HTTP. pub websocket_graphql_policy: bool, + /// Proxy-side credential signing mode for this endpoint. + pub credential_signing: CredentialSigning, + /// AWS signing service name (e.g. `"bedrock"`). Required when + /// `credential_signing` is `SigV4`. + pub signing_service: String, } /// Result of an L7 policy decision for a single request. @@ -165,6 +178,24 @@ pub fn parse_l7_config(val: ®orus::Value) -> Option { .filter(|v| *v > 0) .unwrap_or(graphql::DEFAULT_MAX_BODY_BYTES); + let credential_signing = match get_object_str(val, "credential_signing").as_deref() { + Some("sigv4") => CredentialSigning::SigV4, + Some(other) if !other.is_empty() => { + let event = openshell_ocsf::NetworkActivityBuilder::new(crate::ocsf_ctx()) + .activity(openshell_ocsf::ActivityId::Other) + .severity(openshell_ocsf::SeverityId::Medium) + .message(format!( + "unrecognized credential_signing value {other:?}, falling back to none" + )) + .build(); + openshell_ocsf::ocsf_emit!(event); + CredentialSigning::None + } + _ => CredentialSigning::None, + }; + + let signing_service = get_object_str(val, "signing_service").unwrap_or_default(); + Some(L7EndpointConfig { protocol, path: get_object_str(val, "path").unwrap_or_default(), @@ -175,6 +206,8 @@ pub fn parse_l7_config(val: ®orus::Value) -> Option { websocket_credential_rewrite, request_body_credential_rewrite, websocket_graphql_policy, + credential_signing, + signing_service, }) } diff --git a/crates/openshell-sandbox/src/l7/relay.rs b/crates/openshell-sandbox/src/l7/relay.rs index 6d271af21..271ee6d01 100644 --- a/crates/openshell-sandbox/src/l7/relay.rs +++ b/crates/openshell-sandbox/src/l7/relay.rs @@ -351,6 +351,9 @@ where websocket_extensions: websocket_extension_mode(config), request_body_credential_rewrite: config.protocol == L7Protocol::Rest && config.request_body_credential_rewrite, + credential_signing: config.credential_signing, + signing_service: &config.signing_service, + host: &ctx.host, }, ) .await?; @@ -769,6 +772,9 @@ where websocket_extensions: websocket_extension_mode(config), request_body_credential_rewrite: config.protocol == L7Protocol::Rest && config.request_body_credential_rewrite, + credential_signing: config.credential_signing, + signing_service: &config.signing_service, + host: &ctx.host, }, ) .await?; @@ -1417,6 +1423,8 @@ network_policies: websocket_credential_rewrite: true, request_body_credential_rewrite: false, websocket_graphql_policy: false, + credential_signing: crate::l7::CredentialSigning::None, + signing_service: String::new(), }]; let ctx = L7EvalContext { host: "gateway.example.test".into(), @@ -1517,6 +1525,8 @@ network_policies: websocket_credential_rewrite: true, request_body_credential_rewrite: false, websocket_graphql_policy: false, + credential_signing: crate::l7::CredentialSigning::None, + signing_service: String::new(), }]; let (child_env, resolver) = SecretResolver::from_provider_env( std::iter::once(("DISCORD_BOT_TOKEN".to_string(), "real-token".to_string())).collect(), @@ -1634,6 +1644,8 @@ network_policies: websocket_credential_rewrite: true, request_body_credential_rewrite: false, websocket_graphql_policy: true, + credential_signing: crate::l7::CredentialSigning::None, + signing_service: String::new(), }]; let (child_env, resolver) = SecretResolver::from_provider_env( std::iter::once(("T".to_string(), "real-token".to_string())).collect(), diff --git a/crates/openshell-sandbox/src/l7/rest.rs b/crates/openshell-sandbox/src/l7/rest.rs index 20d52459c..252793c53 100644 --- a/crates/openshell-sandbox/src/l7/rest.rs +++ b/crates/openshell-sandbox/src/l7/rest.rs @@ -377,6 +377,9 @@ where generation_guard, websocket_extensions: WebSocketExtensionMode::Preserve, request_body_credential_rewrite: false, + credential_signing: crate::l7::CredentialSigning::None, + signing_service: "", + host: "", }, ) .await @@ -389,12 +392,15 @@ pub(crate) enum WebSocketExtensionMode { PermessageDeflate, } -#[derive(Clone, Copy, Default)] +#[derive(Clone, Default)] pub(crate) struct RelayRequestOptions<'a> { pub(crate) resolver: Option<&'a SecretResolver>, pub(crate) generation_guard: Option<&'a PolicyGenerationGuard>, pub(crate) websocket_extensions: WebSocketExtensionMode, pub(crate) request_body_credential_rewrite: bool, + pub(crate) credential_signing: crate::l7::CredentialSigning, + pub(crate) signing_service: &'a str, + pub(crate) host: &'a str, } pub(crate) async fn relay_http_request_with_options_guarded( @@ -421,8 +427,19 @@ where parse_websocket_upgrade_request(&req.raw_header[..header_end])? }; + // When SigV4 signing is configured, strip AWS auth headers before credential + // rewriting so the fail-closed placeholder scan doesn't reject the SigV4 + // Authorization header (which embeds placeholder strings). + let raw_for_rewrite; + let header_source = if options.credential_signing == crate::l7::CredentialSigning::SigV4 { + raw_for_rewrite = crate::sigv4::strip_aws_headers(&req.raw_header[..header_end]); + &raw_for_rewrite[..] + } else { + &req.raw_header[..header_end] + }; + let (header_bytes, expected_websocket_extension) = rewrite_websocket_extensions_for_mode( - &req.raw_header[..header_end], + header_source, options.websocket_extensions, websocket_request.is_some(), )?; @@ -442,7 +459,82 @@ where guard.ensure_current()?; } - if options.request_body_credential_rewrite { + // Apply SigV4 signing if configured. We need the full request (headers + body) + // to compute the signature, so for SigV4 we always buffer the body first. + if options.credential_signing == crate::l7::CredentialSigning::SigV4 { + if let Some(resolver) = options.resolver { + let access_key_placeholder = + crate::secrets::placeholder_for_env_key("AWS_ACCESS_KEY_ID"); + let secret_key_placeholder = + crate::secrets::placeholder_for_env_key("AWS_SECRET_ACCESS_KEY"); + let session_token_placeholder = + crate::secrets::placeholder_for_env_key("AWS_SESSION_TOKEN"); + + match ( + resolver.resolve_placeholder(&access_key_placeholder), + resolver.resolve_placeholder(&secret_key_placeholder), + ) { + (Some(access_key), Some(secret_key)) => { + let session_token = resolver.resolve_placeholder(&session_token_placeholder); + let region = crate::sigv4::extract_aws_region(options.host) + .unwrap_or_else(|| "us-east-1".to_string()); + let service = &options.signing_service; + if service.is_empty() { + return Err(miette!( + "SigV4 signing configured but signing_service not set in policy" + )); + } + debug!( + host = %options.host, + region = %region, + service = %service, + "applying SigV4 signing to CONNECT tunnel request" + ); + + // Collect body from overflow + stream + let overflow = &req.raw_header[header_end..]; + let mut full_request = rewrite_result.rewritten.clone(); + full_request.extend_from_slice(overflow); + // Read remaining body based on content-length + if let BodyLength::ContentLength(body_len) = parse_body_length(header_str)? { + if body_len > MAX_REWRITE_BODY_BYTES as u64 { + return Err(miette!( + "SigV4 signing buffers at most {MAX_REWRITE_BODY_BYTES} bytes" + )); + } + let already_have = overflow.len() as u64; + if body_len > already_have { + let remaining = + usize::try_from(body_len - already_have).unwrap_or(usize::MAX); + let mut body_buf = vec![0u8; remaining]; + client.read_exact(&mut body_buf).await.into_diagnostic()?; + full_request.extend_from_slice(&body_buf); + } + } + + let signed = crate::sigv4::apply_sigv4_to_request( + &full_request, + options.host, + ®ion, + service, + access_key, + secret_key, + session_token, + )?; + upstream.write_all(&signed).await.into_diagnostic()?; + } + _ => { + return Err(miette!( + "SigV4 signing configured but AWS credentials not found in provider" + )); + } + } + } else { + return Err(miette!( + "SigV4 signing configured but no secret resolver available" + )); + } + } else if options.request_body_credential_rewrite { let body = collect_and_rewrite_request_body( req, client, diff --git a/crates/openshell-sandbox/src/lib.rs b/crates/openshell-sandbox/src/lib.rs index 4a0e61e57..4ce6f6070 100644 --- a/crates/openshell-sandbox/src/lib.rs +++ b/crates/openshell-sandbox/src/lib.rs @@ -23,6 +23,7 @@ mod provider_credentials; pub mod proxy; mod sandbox; mod secrets; +pub mod sigv4; mod skills; mod ssh; mod supervisor_session; diff --git a/crates/openshell-sandbox/src/opa.rs b/crates/openshell-sandbox/src/opa.rs index f73f3bc14..545efdea8 100644 --- a/crates/openshell-sandbox/src/opa.rs +++ b/crates/openshell-sandbox/src/opa.rs @@ -1116,6 +1116,12 @@ fn proto_to_opa_data_json(proto: &ProtoSandboxPolicy, entrypoint_pid: u32) -> St if e.request_body_credential_rewrite { ep["request_body_credential_rewrite"] = true.into(); } + if !e.credential_signing.is_empty() { + ep["credential_signing"] = e.credential_signing.clone().into(); + } + if !e.signing_service.is_empty() { + ep["signing_service"] = e.signing_service.clone().into(); + } if !e.persisted_queries.is_empty() { ep["persisted_queries"] = e.persisted_queries.clone().into(); } @@ -2718,6 +2724,65 @@ network_policies: assert!(l7.websocket_credential_rewrite); } + #[test] + fn l7_endpoint_config_preserves_proto_credential_signing() { + let mut network_policies = std::collections::HashMap::new(); + network_policies.insert( + "bedrock".to_string(), + NetworkPolicyRule { + name: "bedrock".to_string(), + endpoints: vec![NetworkEndpoint { + host: "bedrock-runtime.us-east-2.amazonaws.com".to_string(), + port: 443, + protocol: "rest".to_string(), + enforcement: "enforce".to_string(), + access: "read-write".to_string(), + credential_signing: "sigv4".to_string(), + signing_service: "bedrock".to_string(), + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/local/bin/claude".to_string(), + ..Default::default() + }], + }, + ); + let proto = ProtoSandboxPolicy { + version: 1, + filesystem: Some(ProtoFs { + include_workdir: true, + read_only: vec![], + read_write: vec![], + }), + landlock: Some(openshell_core::proto::LandlockPolicy { + compatibility: "best_effort".to_string(), + }), + process: Some(ProtoProc { + run_as_user: "sandbox".to_string(), + run_as_group: "sandbox".to_string(), + }), + network_policies, + }; + + let engine = OpaEngine::from_proto(&proto).expect("engine from proto"); + let input = NetworkInput { + host: "bedrock-runtime.us-east-2.amazonaws.com".into(), + port: 443, + binary_path: PathBuf::from("/usr/local/bin/claude"), + binary_sha256: "unused".into(), + ancestors: vec![], + cmdline_paths: vec![], + }; + + let config = engine + .query_endpoint_config(&input) + .unwrap() + .expect("endpoint config"); + let l7 = crate::l7::parse_l7_config(&config).unwrap(); + assert_eq!(l7.credential_signing, crate::l7::CredentialSigning::SigV4); + assert_eq!(l7.signing_service, "bedrock"); + } + #[test] fn l7_endpoint_config_preserves_proto_request_body_credential_rewrite() { let mut network_policies = std::collections::HashMap::new(); diff --git a/crates/openshell-sandbox/src/policy_local.rs b/crates/openshell-sandbox/src/policy_local.rs index fcf6e1f8e..00d624d02 100644 --- a/crates/openshell-sandbox/src/policy_local.rs +++ b/crates/openshell-sandbox/src/policy_local.rs @@ -1121,6 +1121,8 @@ fn network_endpoint_from_json( graphql_persisted_queries: HashMap::new(), graphql_max_body_bytes: 0, path: String::new(), + credential_signing: String::new(), + signing_service: String::new(), }) } diff --git a/crates/openshell-sandbox/src/proxy.rs b/crates/openshell-sandbox/src/proxy.rs index 30466a465..4e176a7ec 100644 --- a/crates/openshell-sandbox/src/proxy.rs +++ b/crates/openshell-sandbox/src/proxy.rs @@ -2816,6 +2816,9 @@ where generation_guard: Some(options.generation_guard), websocket_extensions: options.websocket_extensions, request_body_credential_rewrite: options.request_body_credential_rewrite, + credential_signing: crate::l7::CredentialSigning::None, + signing_service: "", + host: "", }, ) .await @@ -3752,6 +3755,7 @@ async fn handle_forward_proxy( return Ok(()); } }; + if let Err(e) = forward_generation_guard.ensure_current() { emit_l7_tunnel_close_after_policy_change(&host_lc, port, e); respond( @@ -3903,6 +3907,8 @@ mod tests { websocket_credential_rewrite, request_body_credential_rewrite: false, websocket_graphql_policy: false, + credential_signing: crate::l7::CredentialSigning::None, + signing_service: String::new(), } } @@ -4369,6 +4375,8 @@ network_policies: websocket_credential_rewrite: false, request_body_credential_rewrite: false, websocket_graphql_policy: false, + credential_signing: crate::l7::CredentialSigning::None, + signing_service: String::new(), }, }, L7ConfigSnapshot { @@ -4382,6 +4390,8 @@ network_policies: websocket_credential_rewrite: false, request_body_credential_rewrite: false, websocket_graphql_policy: false, + credential_signing: crate::l7::CredentialSigning::None, + signing_service: String::new(), }, }, ]; diff --git a/crates/openshell-sandbox/src/sigv4.rs b/crates/openshell-sandbox/src/sigv4.rs new file mode 100644 index 000000000..3a6e50b1b --- /dev/null +++ b/crates/openshell-sandbox/src/sigv4.rs @@ -0,0 +1,303 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +use aws_credential_types::Credentials; +use aws_sigv4::http_request::{ + PayloadChecksumKind, SignableBody, SignableRequest, SigningSettings, sign, +}; +use aws_sigv4::sign::v4; +use aws_smithy_runtime_api::client::identity::Identity; +use miette::{Result, miette}; +use std::time::SystemTime; + +/// Extract the AWS region from a standard AWS hostname. +/// Pattern: `..amazonaws.com` → ``. +pub fn extract_aws_region(host: &str) -> Option { + let parts: Vec<&str> = host.split('.').collect(); + if parts.len() >= 4 && parts[parts.len() - 2] == "amazonaws" && parts[parts.len() - 1] == "com" + { + Some(parts[1].to_string()) + } else { + None + } +} + +/// Strip AWS auth headers from raw HTTP request bytes. +/// +/// Removes `Authorization`, `X-Amz-Date`, `X-Amz-Security-Token`, and +/// `X-Amz-Content-Sha256` headers so the request can pass through the +/// proxy's fail-closed placeholder scan before re-signing. +pub fn strip_aws_headers(raw: &[u8]) -> Vec { + let header_end = raw + .windows(4) + .position(|w| w == b"\r\n\r\n") + .map_or(raw.len(), |p| p + 4); + + let header_str = String::from_utf8_lossy(&raw[..header_end]); + let lines: Vec<&str> = header_str.split("\r\n").collect(); + + let mut output = Vec::with_capacity(raw.len()); + + for (i, line) in lines.iter().enumerate() { + if i == 0 { + output.extend_from_slice(line.as_bytes()); + output.extend_from_slice(b"\r\n"); + continue; + } + if line.is_empty() { + break; + } + let lower = line.to_ascii_lowercase(); + if lower.starts_with("authorization:") + || lower.starts_with("x-amz-date:") + || lower.starts_with("x-amz-security-token:") + || lower.starts_with("x-amz-content-sha256:") + { + continue; + } + output.extend_from_slice(line.as_bytes()); + output.extend_from_slice(b"\r\n"); + } + + output.extend_from_slice(b"\r\n"); + + if header_end < raw.len() { + output.extend_from_slice(&raw[header_end..]); + } + + output +} + +/// Apply AWS Signature Version 4 signing to a raw HTTP request buffer. +/// +/// Strips existing AWS auth headers, computes a new signature using the +/// `aws-sigv4` crate, and returns the rewritten request bytes. +pub fn apply_sigv4_to_request( + raw: &[u8], + host: &str, + region: &str, + service: &str, + access_key: &str, + secret_key: &str, + session_token: Option<&str>, +) -> Result> { + let header_end = raw + .windows(4) + .position(|w| w == b"\r\n\r\n") + .map_or(raw.len(), |p| p + 4); + + let body = if header_end < raw.len() { + &raw[header_end..] + } else { + &[] + }; + + let header_str = String::from_utf8_lossy(&raw[..header_end]); + let lines: Vec<&str> = header_str.split("\r\n").collect(); + + let (method, path) = lines.first().map_or(("GET", "/"), |first_line| { + let parts: Vec<&str> = first_line.splitn(3, ' ').collect(); + if parts.len() >= 2 { + (parts[0], parts[1]) + } else { + ("GET", "/") + } + }); + + // Collect all non-AWS headers for forwarding, and a subset for signing. + // Only host, content-type, and content-length are included in the SigV4 + // signature. Signing all headers causes failures when the proxy or + // transport modifies unsigned-by-convention headers (Connection, + // Accept-Encoding, etc.) between signing and delivery. + let mut headers_to_sign: Vec<(String, String)> = Vec::new(); + let mut all_headers: Vec<(String, String)> = Vec::new(); + for line in lines.iter().skip(1) { + if line.is_empty() { + break; + } + if let Some((k, v)) = line.split_once(':') { + let lower = k.trim().to_ascii_lowercase(); + if lower.starts_with("authorization") + || lower.starts_with("x-amz-date") + || lower.starts_with("x-amz-security-token") + || lower.starts_with("x-amz-content-sha256") + { + continue; + } + all_headers.push((lower.clone(), v.trim().to_string())); + if lower == "host" || lower == "content-type" || lower == "content-length" { + headers_to_sign.push((lower, v.trim().to_string())); + } + } + } + + let uri = format!("https://{host}{path}"); + + let identity: Identity = Credentials::new( + access_key, + secret_key, + session_token.map(ToString::to_string), + None, + "openshell", + ) + .into(); + + let mut settings = SigningSettings::default(); + settings.payload_checksum_kind = PayloadChecksumKind::XAmzSha256; + + let signing_params = v4::SigningParams::builder() + .identity(&identity) + .region(region) + .name(service) + .time(SystemTime::now()) + .settings(settings) + .build() + .map_err(|e| miette!("SigV4 signing params: {e}"))? + .into(); + + let signable_request = SignableRequest::new( + method, + &uri, + headers_to_sign + .iter() + .map(|(k, v)| (k.as_str(), v.as_str())), + SignableBody::Bytes(body), + ) + .map_err(|e| miette!("SigV4 signable request: {e}"))?; + + let (instructions, _signature) = sign(signable_request, &signing_params) + .map_err(|e| miette!("SigV4 signing failed: {e}"))? + .into_parts(); + + // Rebuild the request with signed headers + let mut output = Vec::with_capacity(raw.len() + 256); + + // Request line + if let Some(first_line) = lines.first() { + output.extend_from_slice(first_line.as_bytes()); + output.extend_from_slice(b"\r\n"); + } + + // All original non-AWS headers + for (k, v) in &all_headers { + output.extend_from_slice(format!("{k}: {v}\r\n").as_bytes()); + } + + // Signed headers from the SDK + for (name, value) in instructions.headers() { + output.extend_from_slice(format!("{name}: {value}\r\n").as_bytes()); + } + + // End of headers + output.extend_from_slice(b"\r\n"); + + // Body + output.extend_from_slice(body); + + Ok(output) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn extract_region_from_hostname() { + let region = extract_aws_region("bedrock-runtime.us-east-2.amazonaws.com").unwrap(); + assert_eq!(region, "us-east-2"); + } + + #[test] + fn extract_region_from_sts_hostname() { + let region = extract_aws_region("sts.us-east-1.amazonaws.com").unwrap(); + assert_eq!(region, "us-east-1"); + } + + #[test] + fn non_aws_hostname_returns_none() { + assert!(extract_aws_region("api.anthropic.com").is_none()); + } + + #[test] + fn global_endpoint_returns_none() { + assert!(extract_aws_region("s3.amazonaws.com").is_none()); + } + + #[test] + fn sign_produces_valid_format() { + let raw = b"POST /model/us.anthropic.claude-sonnet-4-6/invoke HTTP/1.1\r\nHost: bedrock-runtime.us-east-2.amazonaws.com\r\nContent-Type: application/json\r\n\r\n{}"; + let result = apply_sigv4_to_request( + raw, + "bedrock-runtime.us-east-2.amazonaws.com", + "us-east-2", + "bedrock", + "AKIAIOSFODNN7EXAMPLE", + "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", + None, + ) + .unwrap(); + let result_str = String::from_utf8_lossy(&result); + assert!( + result_str.contains("authorization: AWS4-HMAC-SHA256 Credential=AKIAIOSFODNN7EXAMPLE/") + ); + assert!(result_str.contains("x-amz-content-sha256: ")); + assert!(result_str.contains("x-amz-date: ")); + assert!(!result_str.contains("x-amz-security-token")); + } + + #[test] + fn sign_with_session_token() { + let raw = b"POST /model/test/invoke HTTP/1.1\r\nHost: bedrock-runtime.us-east-2.amazonaws.com\r\nContent-Type: application/json\r\n\r\n{}"; + let result = apply_sigv4_to_request( + raw, + "bedrock-runtime.us-east-2.amazonaws.com", + "us-east-2", + "bedrock", + "ASIAEXAMPLE", + "secret", + Some("FwoGZXIvYXdzEBYaDH+session+token"), + ) + .unwrap(); + let result_str = String::from_utf8_lossy(&result); + assert!(result_str.contains("authorization: AWS4-HMAC-SHA256 Credential=ASIAEXAMPLE/")); + assert!(result_str.contains("x-amz-security-token: FwoGZXIvYXdzEBYaDH+session+token")); + } + + #[test] + fn non_signed_headers_preserved() { + let raw = b"POST /model/test/invoke HTTP/1.1\r\nHost: bedrock-runtime.us-east-2.amazonaws.com\r\nContent-Type: application/json\r\nAccept: application/json\r\nUser-Agent: my-agent/1.0\r\n\r\n{}"; + let result = apply_sigv4_to_request( + raw, + "bedrock-runtime.us-east-2.amazonaws.com", + "us-east-2", + "bedrock", + "AKIAIOSFODNN7EXAMPLE", + "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", + None, + ) + .unwrap(); + let result_str = String::from_utf8_lossy(&result); + assert!(result_str.contains("accept: application/json\r\n")); + assert!(result_str.contains("user-agent: my-agent/1.0\r\n")); + assert!(result_str.contains("authorization: AWS4-HMAC-SHA256 Credential=")); + } + + #[test] + fn apply_sigv4_rewrites_request() { + let raw = b"POST /model/test/invoke HTTP/1.1\r\nHost: bedrock-runtime.us-east-2.amazonaws.com\r\nContent-Type: application/json\r\nAuthorization: AWS4-HMAC-SHA256 old-invalid-sig\r\nX-Amz-Date: old-date\r\n\r\n{}"; + let result = apply_sigv4_to_request( + raw, + "bedrock-runtime.us-east-2.amazonaws.com", + "us-east-2", + "bedrock", + "AKIATEST", + "secret", + None, + ) + .unwrap(); + let result_str = String::from_utf8_lossy(&result); + assert!(result_str.contains("authorization: AWS4-HMAC-SHA256 Credential=AKIATEST/")); + assert!(!result_str.contains("old-invalid-sig")); + assert!(!result_str.contains("old-date")); + } +} diff --git a/crates/openshell-sandbox/tests/sigv4_signing.rs b/crates/openshell-sandbox/tests/sigv4_signing.rs new file mode 100644 index 000000000..934262c8f --- /dev/null +++ b/crates/openshell-sandbox/tests/sigv4_signing.rs @@ -0,0 +1,99 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Integration test for SigV4 proxy-side re-signing. +//! +//! Simulates what the proxy does: takes a raw HTTP request (like the AWS SDK +//! would generate with placeholder credentials), strips the invalid AWS auth +//! headers, re-signs with real credentials, and sends to Bedrock. +//! +//! Run with real AWS credentials: +//! AWS_ACCESS_KEY_ID=AKIAxxx AWS_SECRET_ACCESS_KEY=xxx cargo test \ +//! -p openshell-sandbox --test sigv4_signing -- --ignored --nocapture + +use std::io::{Read, Write}; +use std::net::TcpStream; + +#[test] +#[ignore] // requires real AWS credentials +fn sigv4_resign_and_call_bedrock() { + let access_key = std::env::var("AWS_ACCESS_KEY_ID").expect("AWS_ACCESS_KEY_ID must be set"); + let secret_key = + std::env::var("AWS_SECRET_ACCESS_KEY").expect("AWS_SECRET_ACCESS_KEY must be set"); + let session_token = std::env::var("AWS_SESSION_TOKEN").ok(); + let region = std::env::var("AWS_REGION").unwrap_or_else(|_| "us-east-2".to_string()); + let host = format!("bedrock.{region}.amazonaws.com"); + + // Build a raw HTTP request as if the AWS SDK generated it with fake creds. + // This is what arrives at the proxy from inside the sandbox. + let fake_signed_request = format!( + "GET /foundation-models HTTP/1.1\r\n\ + Host: {host}\r\n\ + Content-Type: application/json\r\n\ + Authorization: AWS4-HMAC-SHA256 Credential=FAKEFAKEFAKE/20260101/us-east-2/bedrock/aws4_request, SignedHeaders=host, Signature=0000000000000000000000000000000000000000000000000000000000000000\r\n\ + X-Amz-Date: 20260101T000000Z\r\n\ + X-Amz-Content-Sha256: fake-hash\r\n\ + Accept: application/json\r\n\ + Connection: keep-alive\r\n\ + \r\n" + ); + + // Step 1: Strip invalid AWS auth headers (proxy does this before + // the fail-closed placeholder scan) + let stripped = openshell_sandbox::sigv4::strip_aws_headers(fake_signed_request.as_bytes()); + let stripped_str = String::from_utf8_lossy(&stripped); + assert!( + !stripped_str.contains("FAKEFAKEFAKE"), + "old auth should be stripped" + ); + assert!( + !stripped_str.contains("fake-hash"), + "old hash should be stripped" + ); + + // Step 2: Re-sign with real credentials + let signed = openshell_sandbox::sigv4::apply_sigv4_to_request( + &stripped, + &host, + ®ion, + "bedrock", + &access_key, + &secret_key, + session_token.as_deref(), + ) + .expect("SigV4 signing should succeed"); + + let signed_str = String::from_utf8_lossy(&signed); + eprintln!("--- Signed request headers ---"); + if let Some(end) = signed_str.find("\r\n\r\n") { + eprintln!("{}", &signed_str[..end]); + } + + // Step 3: Send to Bedrock over TLS + let mut tcp = TcpStream::connect(format!("{host}:443")).expect("TCP connect"); + let mut root_store = rustls::RootCertStore::empty(); + root_store.extend(webpki_roots::TLS_SERVER_ROOTS.iter().cloned()); + let config = rustls::ClientConfig::builder() + .with_root_certificates(root_store) + .with_no_client_auth(); + let server_name: rustls::pki_types::ServerName = host.clone().try_into().unwrap(); + let mut tls = rustls::ClientConnection::new(std::sync::Arc::new(config), server_name).unwrap(); + let mut stream = rustls::Stream::new(&mut tls, &mut tcp); + + stream.write_all(&signed).expect("TLS write"); + stream.flush().expect("TLS flush"); + + let mut response = vec![0u8; 4096]; + let n = stream.read(&mut response).expect("TLS read"); + let response_str = String::from_utf8_lossy(&response[..n]); + + eprintln!("\n--- Response (first {n} bytes) ---"); + eprintln!("{response_str}"); + + // Verify we got HTTP 200, not 403 InvalidSignatureException + assert!( + response_str.starts_with("HTTP/1.1 200"), + "Expected 200 OK but got: {}", + response_str.lines().next().unwrap_or("(empty)") + ); +} diff --git a/proto/sandbox.proto b/proto/sandbox.proto index ef0b0540f..64dc26d7f 100644 --- a/proto/sandbox.proto +++ b/proto/sandbox.proto @@ -128,6 +128,13 @@ message NetworkEndpoint { // Advisor-proposed endpoints must not satisfy exact-host SSRF trust unless // they are converted through an explicit user-authored policy path. bool advisor_proposed = 18; + // Proxy-side credential signing mode: "sigv4" for AWS SigV4 re-signing. + // When set, the proxy strips the client's Authorization header and computes + // a fresh SigV4 signature using real credentials from the provider. + string credential_signing = 19; + // AWS signing service name override. Required when credential_signing is + // "sigv4" — e.g. "bedrock" for bedrock-runtime endpoints. + string signing_service = 20; } // Trusted GraphQL operation classification. From 646df06ff14ad86574845267fed06460ed118dd3 Mon Sep 17 00:00:00 2001 From: Mesut Oezdil <114185853+mesutoezdil@users.noreply.github.com> Date: Mon, 1 Jun 2026 17:35:11 +0200 Subject: [PATCH 2/4] fix(e2e): clean up temp files in sandbox-runner on exit (#1647) --- e2e/policy-advisor/sandbox-runner.sh | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/e2e/policy-advisor/sandbox-runner.sh b/e2e/policy-advisor/sandbox-runner.sh index 947d2163c..7df444cef 100755 --- a/e2e/policy-advisor/sandbox-runner.sh +++ b/e2e/policy-advisor/sandbox-runner.sh @@ -5,6 +5,9 @@ set -euo pipefail +_td="$(mktemp -d)" +trap 'rm -rf "$_td"' EXIT + cmd="$1" shift @@ -23,7 +26,7 @@ case "$cmd" in ;; current-policy) - body="$(mktemp)" + body="$_td/body" status="$(curl -sS -o "$body" -w "%{http_code}" http://policy.local/v1/policy/current)" json_status_response "$status" "$body" ;; @@ -34,8 +37,8 @@ case "$cmd" in branch="$3" file_path="$4" run_id="$5" - body="$(mktemp)" - payload="$(mktemp)" + body="$_td/body" + payload="$_td/payload" python3 - "$branch" "$run_id" > "$payload" <<'PY' import base64 @@ -76,8 +79,8 @@ PY owner="$1" repo="$2" file_path="$3" - body="$(mktemp)" - payload="$(mktemp)" + body="$_td/body" + payload="$_td/payload" python3 - "$owner" "$repo" "$file_path" > "$payload" <<'PY' import json @@ -140,8 +143,8 @@ PY # — we never make outbound calls, the gateway just persists the # chunk and the reviewer decides on it. rule_id="$1" - body="$(mktemp)" - payload="$(mktemp)" + body="$_td/body" + payload="$_td/payload" python3 - "$rule_id" > "$payload" <<'PY' import json @@ -184,7 +187,7 @@ PY proposal-status) chunk_id="$1" - body="$(mktemp)" + body="$_td/body" status="$(curl -sS \ -o "$body" \ -w "%{http_code}" \ @@ -195,7 +198,7 @@ PY proposal-wait) chunk_id="$1" timeout="${2:-60}" - body="$(mktemp)" + body="$_td/body" # No --max-time on curl: the server bounds the wait at `timeout`, # which is already clamped to [1, 300] by policy.local. Let the # request return naturally. From 2de49e1abea5fe89a5c16aad42b9948c670e2396 Mon Sep 17 00:00:00 2001 From: Taylor Mutch Date: Mon, 1 Jun 2026 10:54:30 -0700 Subject: [PATCH 3/4] ci(kubernetes): add HA e2e workflow (#1598) Signed-off-by: Taylor Mutch --- .../skills/debug-openshell-cluster/SKILL.md | 9 ++++ .agents/skills/helm-dev-environment/SKILL.md | 7 ++- .github/workflows/branch-e2e.yml | 45 ++++++++++++++++++- .github/workflows/e2e-kubernetes-ha-test.yml | 37 +++++++++++++++ .github/workflows/e2e-kubernetes-test.yml | 6 +++ .github/workflows/e2e-label-help.yml | 11 ++++- CI.md | 12 ++--- CONTRIBUTING.md | 2 +- deploy/helm/openshell/README.md | 1 + deploy/helm/openshell/README.md.gotmpl | 1 + .../ci/values-high-availability.yaml | 12 +++++ deploy/helm/openshell/skaffold.yaml | 2 + e2e/with-kube-gateway.sh | 31 +++++++++++-- 13 files changed, 162 insertions(+), 14 deletions(-) create mode 100644 .github/workflows/e2e-kubernetes-ha-test.yml create mode 100644 deploy/helm/openshell/ci/values-high-availability.yaml diff --git a/.agents/skills/debug-openshell-cluster/SKILL.md b/.agents/skills/debug-openshell-cluster/SKILL.md index 6c8f73bb6..aeaa503f7 100644 --- a/.agents/skills/debug-openshell-cluster/SKILL.md +++ b/.agents/skills/debug-openshell-cluster/SKILL.md @@ -138,6 +138,15 @@ kubectl -n openshell rollout status statefulset/openshell Look for failed installs, unexpected values, missing namespace, wrong image tag, TLS settings that do not match the registered endpoint, and scheduling failures. +For HA or PostgreSQL-backed installs, also check the service-binding Secret and +bundled PostgreSQL workload: + +```bash +kubectl -n openshell get secret -l app.kubernetes.io/instance=openshell +kubectl -n openshell get statefulset,pod,pvc -l app.kubernetes.io/instance=openshell +kubectl -n openshell logs statefulset/openshell-postgres --tail=200 +``` + Check required Helm deployment secrets: ```bash diff --git a/.agents/skills/helm-dev-environment/SKILL.md b/.agents/skills/helm-dev-environment/SKILL.md index a97395fb1..79c7d5bc8 100644 --- a/.agents/skills/helm-dev-environment/SKILL.md +++ b/.agents/skills/helm-dev-environment/SKILL.md @@ -1,6 +1,6 @@ --- name: helm-dev-environment -description: Start up, tear down, and configure the local Kubernetes development environment for OpenShell. Uses k3d (Docker-backed k3s) + Skaffold + Helm. Covers cluster lifecycle, optional add-ons (Keycloak OIDC, Envoy Gateway), and port mappings. Trigger keywords - local k8s, local cluster, k3d, skaffold, helm dev, start cluster, stop cluster, tear down cluster, delete cluster, create cluster, helm:k3s, helm:skaffold, local dev environment, dev cluster, k8s dev, envoy gateway local, keycloak local. +description: Start up, tear down, and configure the local Kubernetes development environment for OpenShell. Uses k3d (Docker-backed k3s) + Skaffold + Helm. Covers cluster lifecycle, optional add-ons (Keycloak OIDC, Envoy Gateway), HA testing, and port mappings. Trigger keywords - local k8s, local cluster, k3d, skaffold, helm dev, start cluster, stop cluster, tear down cluster, delete cluster, create cluster, helm:k3s, helm:skaffold, local dev environment, dev cluster, k8s dev, envoy gateway local, keycloak local, high availability, HA. --- # Helm Dev Environment @@ -65,6 +65,10 @@ generates mTLS secrets on first install. Envoy Gateway opt-in; see the Optional The gateway Service uses ClusterIP. Access is via Envoy Gateway (port `8080`) or `kubectl port-forward`. +**HA test deploy** (two gateway replicas + bundled PostgreSQL): uncomment +`#- ci/values-high-availability.yaml` in `deploy/helm/openshell/skaffold.yaml`, +then run `mise run helm:skaffold:run` or `mise run helm:skaffold:dev`. + ### TLS behaviour `ci/values-skaffold.yaml` sets `server.disableTls: true`, so Skaffold-based deploys run @@ -198,6 +202,7 @@ mise run helm:k3s:status | `deploy/helm/openshell/ci/values-skaffold.yaml` | Dev overrides (image pull policy, TLS disabled for local Skaffold) | | `deploy/helm/openshell/ci/values-cert-manager.yaml` | cert-manager PKI overlay (opt-in; disables pkiInitJob) | | `deploy/helm/openshell/ci/values-gateway.yaml` | Envoy Gateway GRPCRoute + Gateway overlay | +| `deploy/helm/openshell/ci/values-high-availability.yaml` | HA test overlay (`replicaCount: 2` with bundled PostgreSQL) | | `deploy/helm/openshell/ci/values-keycloak.yaml` | Keycloak OIDC overlay | | `deploy/helm/openshell/ci/values-tls-disabled.yaml` | Lint-only: TLS + auth disabled (reverse-proxy edge termination) | | `deploy/kube/manifests/envoy-gateway-openshell.yaml` | GatewayClass for Envoy Gateway (`mise run helm:gateway:apply`) | diff --git a/.github/workflows/branch-e2e.yml b/.github/workflows/branch-e2e.yml index de8bd5551..1a0782284 100644 --- a/.github/workflows/branch-e2e.yml +++ b/.github/workflows/branch-e2e.yml @@ -23,6 +23,7 @@ jobs: should_run: ${{ steps.gate.outputs.should_run }} run_core_e2e: ${{ steps.labels.outputs.run_core_e2e }} run_gpu_e2e: ${{ steps.labels.outputs.run_gpu_e2e }} + run_kubernetes_ha_e2e: ${{ steps.labels.outputs.run_kubernetes_ha_e2e }} run_any_e2e: ${{ steps.labels.outputs.run_any_e2e }} steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 @@ -39,11 +40,13 @@ jobs: if [ "$EVENT_NAME" != "push" ]; then run_core_e2e=true run_gpu_e2e=true + run_kubernetes_ha_e2e=true else run_core_e2e="$(jq -r 'index("test:e2e") != null' <<< "$LABELS_JSON")" run_gpu_e2e="$(jq -r 'index("test:e2e-gpu") != null' <<< "$LABELS_JSON")" + run_kubernetes_ha_e2e="$(jq -r 'index("test:e2e-kubernetes") != null' <<< "$LABELS_JSON")" fi - if [ "$run_core_e2e" = "true" ] || [ "$run_gpu_e2e" = "true" ]; then + if [ "$run_core_e2e" = "true" ] || [ "$run_gpu_e2e" = "true" ] || [ "$run_kubernetes_ha_e2e" = "true" ]; then run_any_e2e=true else run_any_e2e=false @@ -51,12 +54,13 @@ jobs: { echo "run_core_e2e=$run_core_e2e" echo "run_gpu_e2e=$run_gpu_e2e" + echo "run_kubernetes_ha_e2e=$run_kubernetes_ha_e2e" echo "run_any_e2e=$run_any_e2e" } >> "$GITHUB_OUTPUT" build-gateway: needs: [pr_metadata] - if: needs.pr_metadata.outputs.should_run == 'true' && needs.pr_metadata.outputs.run_core_e2e == 'true' + if: needs.pr_metadata.outputs.should_run == 'true' && (needs.pr_metadata.outputs.run_core_e2e == 'true' || needs.pr_metadata.outputs.run_kubernetes_ha_e2e == 'true') permissions: contents: read packages: write @@ -107,6 +111,16 @@ jobs: with: image-tag: ${{ github.sha }} + kubernetes-ha-e2e: + needs: [pr_metadata, build-gateway, build-supervisor] + if: needs.pr_metadata.outputs.should_run == 'true' && needs.pr_metadata.outputs.run_kubernetes_ha_e2e == 'true' + permissions: + contents: read + packages: read + uses: ./.github/workflows/e2e-kubernetes-ha-test.yml + with: + image-tag: ${{ github.sha }} + core-e2e-result: name: Core E2E result needs: [pr_metadata, build-gateway, build-supervisor, e2e, kubernetes-e2e] @@ -160,3 +174,30 @@ jobs: fi done exit "$failed" + + kubernetes-ha-e2e-result: + name: Kubernetes HA E2E result + needs: [pr_metadata, build-gateway, build-supervisor, kubernetes-ha-e2e] + if: always() && needs.pr_metadata.outputs.should_run == 'true' && needs.pr_metadata.outputs.run_kubernetes_ha_e2e == 'true' + runs-on: ubuntu-latest + steps: + - name: Verify Kubernetes HA E2E jobs + env: + BUILD_GATEWAY_RESULT: ${{ needs.build-gateway.result }} + BUILD_SUPERVISOR_RESULT: ${{ needs.build-supervisor.result }} + KUBERNETES_HA_E2E_RESULT: ${{ needs.kubernetes-ha-e2e.result }} + run: | + set -euo pipefail + failed=0 + for item in \ + "build-gateway:$BUILD_GATEWAY_RESULT" \ + "build-supervisor:$BUILD_SUPERVISOR_RESULT" \ + "kubernetes-ha-e2e:$KUBERNETES_HA_E2E_RESULT"; do + name="${item%%:*}" + result="${item#*:}" + if [ "$result" != "success" ]; then + echo "::error::$name concluded $result" + failed=1 + fi + done + exit "$failed" diff --git a/.github/workflows/e2e-kubernetes-ha-test.yml b/.github/workflows/e2e-kubernetes-ha-test.yml new file mode 100644 index 000000000..756b8024f --- /dev/null +++ b/.github/workflows/e2e-kubernetes-ha-test.yml @@ -0,0 +1,37 @@ +name: Kubernetes HA E2E Test + +on: + workflow_call: + inputs: + image-tag: + description: "Image tag to test (typically the commit SHA)" + required: true + type: string + runner: + description: "GitHub Actions runner label" + required: false + type: string + default: "linux-amd64-cpu8" + checkout-ref: + description: "Git ref to check out for test inputs (defaults to the workflow SHA)" + required: false + type: string + default: "" + +permissions: + contents: read + packages: read + +jobs: + e2e-kubernetes-ha: + name: Kubernetes HA E2E + permissions: + contents: read + packages: read + uses: ./.github/workflows/e2e-kubernetes-test.yml + secrets: inherit + with: + image-tag: ${{ inputs.image-tag }} + runner: ${{ inputs.runner }} + checkout-ref: ${{ inputs.checkout-ref }} + extra-helm-values: deploy/helm/openshell/ci/values-high-availability.yaml diff --git a/.github/workflows/e2e-kubernetes-test.yml b/.github/workflows/e2e-kubernetes-test.yml index c3d16a743..b52a07fe3 100644 --- a/.github/workflows/e2e-kubernetes-test.yml +++ b/.github/workflows/e2e-kubernetes-test.yml @@ -17,6 +17,11 @@ on: required: false type: string default: "" + extra-helm-values: + description: "Colon-separated Helm values files to layer on the Kubernetes e2e chart install" + required: false + type: string + default: "" permissions: contents: read @@ -93,6 +98,7 @@ jobs: - name: Run Kubernetes E2E (Rust smoke) env: OPENSHELL_E2E_KUBE_CONTEXT: kind-${{ env.KIND_CLUSTER_NAME }} + OPENSHELL_E2E_KUBE_EXTRA_VALUES: ${{ inputs.extra-helm-values }} IMAGE_TAG: ${{ inputs.image-tag }} OPENSHELL_REGISTRY: ghcr.io/nvidia/openshell run: mise run --no-deps --skip-deps e2e:kubernetes diff --git a/.github/workflows/e2e-label-help.yml b/.github/workflows/e2e-label-help.yml index 21f4397f7..1190bcd3d 100644 --- a/.github/workflows/e2e-label-help.yml +++ b/.github/workflows/e2e-label-help.yml @@ -19,7 +19,7 @@ permissions: {} jobs: hint: name: Post next-step hint for E2E label - if: github.event.label.name == 'test:e2e' || github.event.label.name == 'test:e2e-gpu' + if: github.event.label.name == 'test:e2e' || github.event.label.name == 'test:e2e-gpu' || github.event.label.name == 'test:e2e-kubernetes' runs-on: ubuntu-latest permissions: pull-requests: write @@ -43,10 +43,17 @@ jobs: test:e2e) suite_summary="the standard E2E suite" build_summary="gateway and supervisor images" + status_summary="The matching required CI gate status on this PR will flip green automatically once the run finishes." ;; test:e2e-gpu) suite_summary="GPU E2E" build_summary="supervisor image" + status_summary="The matching required CI gate status on this PR will flip green automatically once the run finishes." + ;; + test:e2e-kubernetes) + suite_summary="Kubernetes HA E2E" + build_summary="gateway and supervisor images" + status_summary="This is an optional proof-of-life suite; failures are visible in the workflow run but do not publish a required CI gate status." ;; *) echo "Unrecognized label $LABEL_NAME"; exit 1 ;; esac @@ -69,7 +76,7 @@ jobs: workflow_link="[$workflow_name](https://github.com/$GH_REPO/actions/workflows/$workflow_file)" instructions="Open $workflow_link, find the run for commit \`$short_pr\`, and click **Re-run all jobs** to execute with the label set." fi - body="Label \`$LABEL_NAME\` applied for \`$short_pr\`. $instructions The run will execute $suite_summary after building the required $build_summary once. The matching required CI gate status on this PR will flip green automatically once the run finishes." + body="Label \`$LABEL_NAME\` applied for \`$short_pr\`. $instructions The run will execute $suite_summary after building the required $build_summary once. $status_summary" fi gh pr comment "$PR_NUMBER" --body "$body" diff --git a/CI.md b/CI.md index a7ca79c9d..d04668aaf 100644 --- a/CI.md +++ b/CI.md @@ -10,13 +10,15 @@ PR CI that runs on NVIDIA self-hosted runners uses NVIDIA's copy-pr-bot. The bot `Branch Checks` run automatically after copy-pr-bot mirrors the PR. `Required CI Gates` posts PR-head statuses that verify the mirror exists, is current, and ran the expected push-based workflows. E2E suites are opt-in because they are more expensive and publish temporary images. -Two opt-in labels enable the long-running E2E suites: +Three opt-in labels enable the long-running E2E suites: - `test:e2e` runs the standard E2E suite in `Branch E2E Checks` - `test:e2e-gpu` runs GPU E2E in `Branch E2E Checks` +- `test:e2e-kubernetes` runs Kubernetes E2E with the HA Helm overlay + (`replicaCount: 2` and bundled PostgreSQL) in `Branch E2E Checks` -When both labels are present, `Branch E2E Checks` builds the shared gateway and supervisor images once and fans out all enabled suites in parallel. -The `OpenShell / E2E` and `OpenShell / GPU E2E` required statuses are evaluated from separate suite result jobs inside that workflow, so the expensive GPU suite stays independently gated. +When multiple labels are present, `Branch E2E Checks` builds the shared gateway and supervisor images once and fans out all enabled suites in parallel. +The `OpenShell / E2E` and `OpenShell / GPU E2E` required statuses are evaluated from separate suite result jobs inside that workflow. `test:e2e-kubernetes` is optional while HA behavior is under active iteration: failures are visible in the workflow run but do not publish a required CI gate status. The GitHub ruleset should require the `OpenShell / ...` statuses published by `Required CI Gates`, not the push-triggered workflow jobs directly. @@ -69,7 +71,7 @@ Flow: 1. Open the PR. copy-pr-bot mirrors it to `pull-request/` automatically. 2. The mirror push runs `Branch Checks` automatically. `Required CI Gates` keeps the PR blocked until the mirror exists, matches the PR head SHA, and the required push-based workflow succeeds. The first `Branch E2E Checks` run only resolves metadata and skips expensive jobs unless an E2E label is already set. -3. A maintainer applies `test:e2e` and/or `test:e2e-gpu`. `E2E Label Help` posts a comment with a link to the existing gated workflow run. +3. A maintainer applies `test:e2e`, `test:e2e-gpu`, and/or `test:e2e-kubernetes`. `E2E Label Help` posts a comment with a link to the existing gated workflow run. 4. The maintainer opens that link and clicks **Re-run all jobs**. This time `pr_metadata` sees the label and the build/E2E jobs run. 5. When the run finishes, the matching `OpenShell / ...` gate status flips to green automatically. 6. New commits push to the mirror automatically and re-trigger `Branch Checks` plus any labeled E2E jobs in `Branch E2E Checks`. @@ -108,7 +110,7 @@ The bot's full administrator documentation is internal to NVIDIA. The only comma | File | Role | |---|---| | `.github/workflows/branch-checks.yml` | Required non-E2E PR checks. Triggers on `push: pull-request/[0-9]+`. | -| `.github/workflows/branch-e2e.yml` | Opt-in standard and GPU E2E. Triggers on `push: pull-request/[0-9]+` and runs jobs selected by `test:e2e` / `test:e2e-gpu`. | +| `.github/workflows/branch-e2e.yml` | Opt-in standard, GPU, and Kubernetes HA E2E. Triggers on `push: pull-request/[0-9]+` and runs jobs selected by `test:e2e`, `test:e2e-gpu`, or `test:e2e-kubernetes`. | | `.github/workflows/helm-lint.yml` | Helm chart validation. Triggers on `push: pull-request/[0-9]+` and skips lint jobs unless Helm inputs changed. | | `.github/actions/pr-gate/action.yml` | Composite action that resolves PR metadata and verifies the required label is set. | | `.github/actions/pr-merge-base/action.yml` | Composite action that resolves and fetches the merge-base commit for `pull-request/` push workflows. | diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 0f42d3469..406205c35 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -302,4 +302,4 @@ DCO sign-off is separate from cryptographic commit signing. CI requires signing ## CI -How PR CI runs, the `test:e2e` / `test:e2e-gpu` labels, copy-pr-bot, and commit-signing setup are documented in [CI.md](CI.md). +How PR CI runs, the `test:e2e`, `test:e2e-gpu`, and `test:e2e-kubernetes` labels, copy-pr-bot, and commit-signing setup are documented in [CI.md](CI.md). diff --git a/deploy/helm/openshell/README.md b/deploy/helm/openshell/README.md index 1bccd772b..036222f8a 100644 --- a/deploy/helm/openshell/README.md +++ b/deploy/helm/openshell/README.md @@ -56,6 +56,7 @@ See [`values.yaml`](values.yaml) for source defaults. Selected overlays: - [`ci/values-gateway.yaml`](ci/values-gateway.yaml) - gateway-only configuration - [`ci/values-cert-manager.yaml`](ci/values-cert-manager.yaml) - cert-manager integration - [`ci/values-keycloak.yaml`](ci/values-keycloak.yaml) - Keycloak OIDC integration +- [`ci/values-high-availability.yaml`](ci/values-high-availability.yaml) - HA gateway test overlay with bundled PostgreSQL ### Database backend diff --git a/deploy/helm/openshell/README.md.gotmpl b/deploy/helm/openshell/README.md.gotmpl index 2444e18bc..fc391a416 100644 --- a/deploy/helm/openshell/README.md.gotmpl +++ b/deploy/helm/openshell/README.md.gotmpl @@ -56,6 +56,7 @@ See [`values.yaml`](values.yaml) for source defaults. Selected overlays: - [`ci/values-gateway.yaml`](ci/values-gateway.yaml) - gateway-only configuration - [`ci/values-cert-manager.yaml`](ci/values-cert-manager.yaml) - cert-manager integration - [`ci/values-keycloak.yaml`](ci/values-keycloak.yaml) - Keycloak OIDC integration +- [`ci/values-high-availability.yaml`](ci/values-high-availability.yaml) - HA gateway test overlay with bundled PostgreSQL ### Database backend diff --git a/deploy/helm/openshell/ci/values-high-availability.yaml b/deploy/helm/openshell/ci/values-high-availability.yaml new file mode 100644 index 000000000..df4ceae65 --- /dev/null +++ b/deploy/helm/openshell/ci/values-high-availability.yaml @@ -0,0 +1,12 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# CI/dev overlay for exercising the gateway with more than one replica. +# SQLite is not suitable for HA because each replica has its own pod volume, so +# this overlay enables the bundled PostgreSQL dependency added by the chart. +replicaCount: 2 + +postgres: + enabled: true + auth: + password: openshell-ha-ci diff --git a/deploy/helm/openshell/skaffold.yaml b/deploy/helm/openshell/skaffold.yaml index 779211877..0e91db505 100644 --- a/deploy/helm/openshell/skaffold.yaml +++ b/deploy/helm/openshell/skaffold.yaml @@ -95,6 +95,8 @@ deploy: #- ci/values-keycloak.yaml # To enable the Gateway API HTTPRoute (requires Envoy Gateway above): #- ci/values-gateway.yaml + # To test HA gateway behavior with bundled PostgreSQL: + #- ci/values-high-availability.yaml setValueTemplates: image.repository: '{{.IMAGE_REPO_openshell_gateway}}' image.tag: '{{.IMAGE_TAG_openshell_gateway}}' diff --git a/e2e/with-kube-gateway.sh b/e2e/with-kube-gateway.sh index 6da1cbeb9..ebbf91309 100755 --- a/e2e/with-kube-gateway.sh +++ b/e2e/with-kube-gateway.sh @@ -16,6 +16,10 @@ # Helm e2e currently uses plaintext gateway traffic (ci/values-skaffold.yaml). # The certgen hook still runs so the gateway has sandbox JWT signing keys. # +# Set OPENSHELL_E2E_KUBE_EXTRA_VALUES to one or more colon-separated Helm values +# files, relative to the repository root or absolute, to layer additional chart +# configuration on top of ci/values-skaffold.yaml. +# # Image source: # - Ephemeral k3d mode builds local `openshell/{gateway,supervisor}:${IMAGE_TAG}` # images by default, imports them into k3d, then installs the chart. This @@ -241,7 +245,7 @@ run_scenario() { helmctl install "${RELEASE_NAME}" "${ROOT}/deploy/helm/openshell" \ --namespace "${NAMESPACE}" --create-namespace \ - --values "${ROOT}/deploy/helm/openshell/ci/values-skaffold.yaml" \ + "${helm_values_args[@]}" \ --set "fullnameOverride=openshell" \ --set "image.repository=${REGISTRY_VALUE}/gateway" \ --set "image.tag=${IMAGE_TAG_VALUE}" \ @@ -535,6 +539,20 @@ if [ -n "${HOST_GATEWAY_IP}" ]; then helm_extra_args+=(--set "server.hostGatewayIP=${HOST_GATEWAY_IP}") fi +helm_values_args=(--values "${ROOT}/deploy/helm/openshell/ci/values-skaffold.yaml") +helm_extra_values_enabled=0 +if [ -n "${OPENSHELL_E2E_KUBE_EXTRA_VALUES:-}" ]; then + IFS=':' read -r -a extra_values_files <<< "${OPENSHELL_E2E_KUBE_EXTRA_VALUES}" + for values_file in "${extra_values_files[@]}"; do + [ -n "${values_file}" ] || continue + if [[ "${values_file}" != /* ]]; then + values_file="${ROOT}/${values_file}" + fi + helm_values_args+=(--values "${values_file}") + helm_extra_values_enabled=1 + done +fi + if [ "${OPENSHELL_E2E_KUBE_DB_SCENARIOS:-0}" = "1" ]; then helm dependency build "${ROOT}/deploy/helm/openshell" @@ -573,11 +591,18 @@ if [ "${OPENSHELL_E2E_KUBE_DB_SCENARIOS:-0}" = "1" ]; then fi else # --- Single-install mode (default, existing behavior) --- - chart_dir="$(chart_without_dependencies)" + helm_dependency_args=() + if [ "${helm_extra_values_enabled}" = "1" ]; then + chart_dir="${ROOT}/deploy/helm/openshell" + helm_dependency_args=(--dependency-update) + else + chart_dir="$(chart_without_dependencies)" + fi echo "Installing Helm chart (release=${RELEASE_NAME}, namespace=${NAMESPACE}, tag=${IMAGE_TAG_VALUE})..." helmctl install "${RELEASE_NAME}" "${chart_dir}" \ --namespace "${NAMESPACE}" --create-namespace \ - --values "${ROOT}/deploy/helm/openshell/ci/values-skaffold.yaml" \ + "${helm_dependency_args[@]}" \ + "${helm_values_args[@]}" \ --set "fullnameOverride=openshell" \ --set "image.repository=${REGISTRY_VALUE}/gateway" \ --set "image.tag=${IMAGE_TAG_VALUE}" \ From 1ee1ee5accd54786def6925cdabf9ec579cddde9 Mon Sep 17 00:00:00 2001 From: Jesse Jaggars Date: Mon, 1 Jun 2026 16:50:48 -0400 Subject: [PATCH 4/4] fix(policy): validate signing_service at policy load time Reject policies where credential_signing is set but signing_service is empty during validate_sandbox_policy() instead of failing at connection time. The runtime check in rest.rs is kept as defense-in-depth. --- crates/openshell-policy/src/lib.rs | 58 ++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/crates/openshell-policy/src/lib.rs b/crates/openshell-policy/src/lib.rs index b9c249fc3..c97a7155c 100644 --- a/crates/openshell-policy/src/lib.rs +++ b/crates/openshell-policy/src/lib.rs @@ -702,6 +702,8 @@ pub enum PolicyViolation { TooManyPaths { count: usize }, /// A network endpoint uses a TLD wildcard (e.g. `*.com`). TldWildcard { policy_name: String, host: String }, + /// `credential_signing` is set but `signing_service` is missing. + MissingSigningService { policy_name: String, host: String }, } impl fmt::Display for PolicyViolation { @@ -738,6 +740,13 @@ impl fmt::Display for PolicyViolation { use subdomain wildcards like '*.example.com' instead" ) } + Self::MissingSigningService { policy_name, host } => { + write!( + f, + "network policy '{policy_name}': endpoint '{host}' has credential_signing \ + set but signing_service is empty" + ) + } } } } @@ -842,6 +851,12 @@ pub fn validate_sandbox_policy( }); } } + if !ep.credential_signing.is_empty() && ep.signing_service.is_empty() { + violations.push(PolicyViolation::MissingSigningService { + policy_name: name.clone(), + host: ep.host.clone(), + }); + } } } @@ -1401,6 +1416,49 @@ network_policies: assert!(validate_sandbox_policy(&policy).is_ok()); } + #[test] + fn validate_rejects_credential_signing_without_signing_service() { + let mut policy = restrictive_default_policy(); + policy.network_policies.insert( + "aws".into(), + NetworkPolicyRule { + name: "bedrock".into(), + endpoints: vec![NetworkEndpoint { + host: "bedrock-runtime.us-east-1.amazonaws.com".into(), + port: 443, + credential_signing: "sigv4".into(), + signing_service: String::new(), + ..Default::default() + }], + ..Default::default() + }, + ); + let violations = validate_sandbox_policy(&policy).unwrap_err(); + assert!(violations + .iter() + .any(|v| matches!(v, PolicyViolation::MissingSigningService { .. }))); + } + + #[test] + fn validate_accepts_credential_signing_with_signing_service() { + let mut policy = restrictive_default_policy(); + policy.network_policies.insert( + "aws".into(), + NetworkPolicyRule { + name: "bedrock".into(), + endpoints: vec![NetworkEndpoint { + host: "bedrock-runtime.us-east-1.amazonaws.com".into(), + port: 443, + credential_signing: "sigv4".into(), + signing_service: "bedrock".into(), + ..Default::default() + }], + ..Default::default() + }, + ); + assert!(validate_sandbox_policy(&policy).is_ok()); + } + #[test] fn normalize_path_collapses_separators() { assert_eq!(normalize_path("/usr//lib"), "/usr/lib");