From c536ffb6260891bf65719340f49f73bcfd88078f Mon Sep 17 00:00:00 2001 From: Mikola Lysenko Date: Fri, 29 May 2026 09:17:22 -0400 Subject: [PATCH 1/2] fix: repo-wide correctness, security & filesystem-safety hardening pass (v3.2.0) Reviewed every source file in both crates line by line, fixed the bugs found, and added regression tests throughout. Highlights: Security - patch/package.rs: path-traversal via validate-before-normalize (package//etc/passwd escaped the package tree) - patch/diff.rs: clamp unbounded Vec preallocation from untrusted bsdiff target-size header (OOM/abort on a hostile delta) - vex/verify.rs: omit zero-file patches instead of emitting an evidence-free not_affected attestation Filesystem safety / atomicity / rollback - apply: DirWriteGuard for read-only dirs, chown-before-chmod to keep setuid/setgid, parent-dir fsync after rename - cow: atomic rename-over symlink (no pre-unlink), stage cleanup - rollback: delegate to hardened apply_file_patch; AlreadyOriginal before blob check; read-only-dir new-file delete - file_hash/git_sha256: open-once + fstat (TOCTOU), regular-file guard, size/body mismatch detection - cargo/nuget sidecars: hardened writes/deletes in read-only caches - cleanup_blobs: symlink-tolerant, accurate counts - apply_lock: classify genuine flock errors as Io, clamp timeout sleep Crawlers (on-disk layout & metadata) - composer v-prefix + malformed-entry tolerance + on-disk check - go cache-at-root, version case-encoding, GOPATH list, module directive - npm symlink following + nested-recursion guard - nuget global-cache version casing - python macOS framework layout + dist-info dir-name fallback - deno macOS cache path, XDG_CACHE_HOME, empty DENO_DIR - maven XML-comment stripping + skip-section depth - cargo TOML header tolerance + dir-name version split - shared utils/fs::entry_is_dir follows symlinks API client, commands & misc - proxy-url override on binary downloads; deterministic org/title/batch flag; case-insensitive hash compare - USER_AGENT + telemetry version track CARGO_PKG_VERSION (was 1.0.0) - apply release-variant NotFound spurious-failure fix - get/scan/remove char-safe truncation (UTF-8 panic) - setup/repair honest non-zero exit codes + telemetry - rollback no-op miscount; unlock released-snapshot; vex qualified PURLs - package.json non-object/dedup/glob/key-order (preserve_order) - json_envelope status invariant + oldUuid; list ordering; fuzzy_match tie-break; lock_cli sub-second timeout; vex schema/product fixes Updated stale repair/python_crawler e2e expectations to the corrected contracts. Bumped version to 3.2.0 and added the scripts/study-crates.ts audit harness used to drive the review. Co-Authored-By: Claude Opus 4.8 (1M context) --- .gitignore | 3 + CHANGELOG.md | 124 ++++ Cargo.lock | 5 +- Cargo.toml | 6 +- crates/socket-patch-cli/src/args.rs | 117 ++++ crates/socket-patch-cli/src/commands/apply.rs | 208 +++++- crates/socket-patch-cli/src/commands/get.rs | 119 +++- crates/socket-patch-cli/src/commands/list.rs | 290 +++++--- .../socket-patch-cli/src/commands/lock_cli.rs | 112 +++- .../socket-patch-cli/src/commands/remove.rs | 99 ++- .../socket-patch-cli/src/commands/repair.rs | 234 ++++++- .../socket-patch-cli/src/commands/rollback.rs | 168 ++++- crates/socket-patch-cli/src/commands/scan.rs | 182 ++++- crates/socket-patch-cli/src/commands/setup.rs | 21 +- .../socket-patch-cli/src/commands/unlock.rs | 57 +- crates/socket-patch-cli/src/commands/vex.rs | 17 +- .../src/ecosystem_dispatch.rs | 214 +++++- crates/socket-patch-cli/src/json_envelope.rs | 90 ++- crates/socket-patch-cli/src/lib.rs | 66 ++ crates/socket-patch-cli/src/output.rs | 30 + .../tests/api_client_errors_e2e.rs | 6 +- .../socket-patch-cli/tests/cli_global_args.rs | 141 ++++ .../socket-patch-cli/tests/cli_parse_main.rs | 16 + .../tests/e2e_safety_cargo_build.rs | 101 ++- .../tests/e2e_safety_internals.rs | 78 ++- .../tests/e2e_safety_unlock.rs | 46 ++ crates/socket-patch-cli/tests/e2e_vex.rs | 84 +++ .../in_process_remove_repair_lifecycle.rs | 53 ++ .../socket-patch-cli/tests/remove_network.rs | 165 +++++ .../tests/repair_invariants.rs | 11 +- .../tests/setup_invariants.rs | 98 +++ .../socket-patch-core/src/api/blob_fetcher.rs | 194 +++++- crates/socket-patch-core/src/api/client.rs | 509 ++++++++++---- crates/socket-patch-core/src/api/types.rs | 171 +++++ crates/socket-patch-core/src/constants.rs | 53 +- .../src/crawlers/cargo_crawler.rs | 255 ++++++- .../src/crawlers/composer_crawler.rs | 259 +++++++- .../src/crawlers/deno_crawler.rs | 271 +++++++- .../src/crawlers/go_crawler.rs | 192 +++++- .../src/crawlers/maven_crawler.rs | 252 ++++++- crates/socket-patch-core/src/crawlers/mod.rs | 36 +- .../src/crawlers/npm_crawler.rs | 132 ++-- .../src/crawlers/nuget_crawler.rs | 110 +++- .../src/crawlers/pkg_managers.rs | 77 ++- .../src/crawlers/python_crawler.rs | 255 +++++-- .../src/crawlers/ruby_crawler.rs | 115 +++- .../socket-patch-core/src/crawlers/types.rs | 113 ++++ .../socket-patch-core/src/hash/git_sha256.rs | 86 ++- .../src/manifest/operations.rs | 140 +++- .../socket-patch-core/src/manifest/schema.rs | 174 ++++- .../src/package_json/detect.rs | 126 +++- .../src/package_json/find.rs | 315 +++++++-- .../src/package_json/update.rs | 133 ++++ crates/socket-patch-core/src/patch/apply.rs | 507 ++++++++++++-- .../socket-patch-core/src/patch/apply_lock.rs | 118 +++- crates/socket-patch-core/src/patch/cow.rs | 145 +++- crates/socket-patch-core/src/patch/diff.rs | 68 +- .../socket-patch-core/src/patch/file_hash.rs | 115 +++- crates/socket-patch-core/src/patch/package.rs | 73 +- .../socket-patch-core/src/patch/rollback.rs | 584 +++++++++++++--- .../src/patch/sidecars/cargo.rs | 267 +++++++- .../src/patch/sidecars/mod.rs | 199 +++++- .../src/patch/sidecars/nuget.rs | 69 +- .../src/patch/sidecars/types.rs | 63 +- .../src/utils/cleanup_blobs.rs | 191 +++++- .../socket-patch-core/src/utils/env_compat.rs | 90 ++- crates/socket-patch-core/src/utils/fs.rs | 112 +++- .../src/utils/fuzzy_match.rs | 75 ++- crates/socket-patch-core/src/utils/process.rs | 65 ++ crates/socket-patch-core/src/utils/purl.rs | 115 +++- .../socket-patch-core/src/utils/telemetry.rs | 89 ++- crates/socket-patch-core/src/vex/build.rs | 6 +- .../src/vex/conformance_tests.rs | 150 ++++- crates/socket-patch-core/src/vex/mod.rs | 2 +- crates/socket-patch-core/src/vex/product.rs | 267 ++++++-- crates/socket-patch-core/src/vex/schema.rs | 150 ++++- crates/socket-patch-core/src/vex/time.rs | 69 +- crates/socket-patch-core/src/vex/verify.rs | 311 ++++++++- .../tests/blob_fetcher_edges_e2e.rs | 26 +- crates/socket-patch-core/tests/common/mod.rs | 10 +- .../tests/crawler_cargo_e2e.rs | 105 +-- .../tests/crawler_composer_e2e.rs | 108 ++- .../tests/crawler_deno_e2e.rs | 40 +- .../socket-patch-core/tests/crawler_go_e2e.rs | 67 +- .../tests/crawler_maven_e2e.rs | 88 ++- .../tests/crawler_npm_e2e.rs | 175 +++-- .../tests/crawler_nuget_e2e.rs | 150 +++-- .../tests/crawler_python_e2e.rs | 158 +++-- .../tests/crawler_ruby_e2e.rs | 58 +- .../tests/crawlers_empty_paths_e2e.rs | 12 +- .../tests/fuzzy_match_e2e.rs | 23 +- crates/socket-patch-core/tests/package_e2e.rs | 4 +- .../tests/rollback_new_file_e2e.rs | 20 +- .../tests/telemetry_helpers_e2e.rs | 15 +- npm/socket-patch-android-arm64/package.json | 2 +- npm/socket-patch-darwin-arm64/package.json | 2 +- npm/socket-patch-darwin-x64/package.json | 2 +- npm/socket-patch-linux-arm-gnu/package.json | 2 +- npm/socket-patch-linux-arm-musl/package.json | 2 +- npm/socket-patch-linux-arm64-gnu/package.json | 2 +- .../package.json | 2 +- npm/socket-patch-linux-ia32-gnu/package.json | 2 +- npm/socket-patch-linux-ia32-musl/package.json | 2 +- npm/socket-patch-linux-x64-gnu/package.json | 2 +- npm/socket-patch-linux-x64-musl/package.json | 2 +- npm/socket-patch-win32-arm64/package.json | 2 +- npm/socket-patch-win32-ia32/package.json | 2 +- npm/socket-patch-win32-x64/package.json | 2 +- npm/socket-patch/package-lock.json | 32 +- npm/socket-patch/package.json | 30 +- pypi/socket-patch/pyproject.toml | 2 +- scripts/fix-bugs.config.example.ts | 46 ++ scripts/fix-vuln.config.ts | 46 ++ scripts/study-crates.ts | 623 ++++++++++++++++++ 114 files changed, 11189 insertions(+), 1506 deletions(-) create mode 100644 crates/socket-patch-cli/tests/remove_network.rs create mode 100644 scripts/fix-bugs.config.example.ts create mode 100644 scripts/fix-vuln.config.ts create mode 100644 scripts/study-crates.ts diff --git a/.gitignore b/.gitignore index 1ab7447d..76fac0f4 100644 --- a/.gitignore +++ b/.gitignore @@ -148,3 +148,6 @@ npm/socket-patch/bin/socket-patch-* crates/socket-patch-cli/README.md npm/socket-patch/README.md pypi/socket-patch/README.md + +# Generated by scripts/study-crates.ts +study-output/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 96215d0e..9690f14d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,130 @@ in this file — see `.github/workflows/release.yml` (`version` job). ## [Unreleased] +## [3.2.0] — 2026-05-29 + +A repo-wide correctness, security, and filesystem-safety hardening pass: every +source file in both crates was reviewed line by line, the bugs found were fixed, +and regression tests were added throughout (the lib + integration suites grow by +~10k lines of mostly tests). The audit harness used to drive the review lives in +`scripts/study-crates.ts`. + +### Security + +- **Path-traversal in archive extraction.** `read_archive_to_map` + (`patch/package.rs`) validated the raw tar entry path but returned the + `package/`-stripped path, so an entry like `package//etc/passwd` passed every + check and then resolved to an absolute `/etc/passwd` that `Path::join` + writes outside the package tree. Validation now runs on the normalized path + actually written to disk. +- **Unbounded preallocation from an untrusted delta header.** `apply_diff` + (`patch/diff.rs`) reserved a `Vec` sized from the bsdiff target-size header, + which qbsdiff never validates — a tiny hostile delta could claim up to + `i64::MAX` and abort the process. The hint is now clamped to 64 MiB. +- **Evidence-free VEX attestation.** `verify_patch_record` (`vex/verify.rs`) + returned `applied` for a patch touching zero files, producing a + `not_affected` statement with no on-disk evidence; zero-file records are now + omitted (`no_files`). + +### Fixed — filesystem safety, atomicity & rollback + +- **`apply` could not write into read-only directories** (Go module cache marks + dirs `0o555`); added a `DirWriteGuard` that temporarily grants write on the + parent dir around the CoW-break + atomic rename and restores its exact mode. +- **`apply` stripped setuid/setgid bits** on every patched file because `chown` + ran after `chmod`; reordered to chown-before-chmod, plus a parent-dir `fsync` + so the rename survives a crash. +- **Non-atomic symlink break** (`patch/cow.rs`) removed the file before staging + its replacement, destroying it with no rollback on a failed write; now + rename-over the link, matching the hardlink path. Stage files are cleaned up + on every error arm. +- **`rollback` used an unsafe in-place write**; it now delegates to the hardened + `apply_file_patch` (atomic, CoW-safe, validate-before-write, permission + restore). Also: a GC'd before-blob no longer shadows the already-original + short-circuit, and new-file deletion works inside read-only directories. +- **Hash integrity:** `compute_file_git_sha256` (`patch/file_hash.rs`) opened + and stat'd the path separately (TOCTOU) and never checked the target was a + regular file (a directory hashed as the empty blob); now opens once, fstats + the descriptor, and rejects non-regular files. `compute_git_sha256_from_reader` + now errors when the streamed byte count disagrees with the declared size. +- **Sidecar writes in read-only caches:** the cargo `.cargo-checksum.json` + rewrite and the NuGet `.nupkg.metadata` delete used bare, non-atomic I/O that + failed `EACCES` in the locked-down registry trees they exist to serve; both + now go through the hardened write/`DirWriteGuard` paths. +- **Blob cleanup** (`utils/cleanup_blobs.rs`) aborted the whole sweep on one + dangling symlink and inflated the "checked" count with subdirs/dotfiles; now + uses `symlink_metadata`, skips stat errors, and counts only real blobs. +- **Lock acquisition** (`patch/apply_lock.rs`) mapped every `flock` error to + `Held` (masking `ENOLCK`/`EACCES`/unsupported-FS and busy-waiting through the + whole timeout) and overshot sub-100 ms waits; genuine faults now surface + immediately and the sleep is clamped to the remaining budget. + +### Fixed — crawlers (on-disk layout & metadata) + +- **Composer:** normalize the `v`-prefixed `installed.json` version against bare + PURLs, tolerate a single malformed entry instead of dropping the file, and + skip packages absent on disk. +- **Go:** only skip `cache/` at the module-cache root (not at any depth), + decode/encode case-escaped versions (`v1.0.0-RC1` ↔ `…-!r!c1`), treat `GOPATH` + as a path list, and reject malformed/empty `module` directives. +- **npm:** follow symlinked directories during the global-fallback walk + (`DirEntry::metadata()` doesn't follow links) and guard nested recursion so it + doesn't descend through symlinked packages. +- **NuGet:** lowercase the version directory (not just the id) when resolving the + global packages folder, so prerelease-cased versions resolve. +- **Python:** the macOS framework `Versions/` layout uses bare `3.11` dirs, and a + package with missing/malformed `METADATA` now falls back to its + `-.dist-info` directory name instead of vanishing. +- **Deno:** correct the macOS cache path (`~/Library/Caches/deno`), honor + `XDG_CACHE_HOME` on Linux, and treat an empty `DENO_DIR` as unset. +- **Maven:** strip XML comments before tag matching and handle self-closing / + inline skip-sections so a commented or oddly-formatted POM can't leak a + plugin's coordinates as the project's. +- **Cargo:** tolerate `[package]` headers with comments/whitespace and split + `-` dirs at the dotted version (handles numeric pre-releases). +- **Shared:** `utils/fs::entry_is_dir` now follows symlinks, fixing symlinked + package-dir discovery across every dir-walking crawler at once. + +### Fixed — API client, commands & misc + +- **API client:** honor a `--proxy-url` override on binary downloads (was + re-derived from env), and make org selection, patch titles, and the + individual-query batch capability flag deterministic / order-independent; + hash comparison is now case-insensitive. +- **Version reporting:** `USER_AGENT` and telemetry `context.version` were + hardcoded to `1.0`/`1.0.0`; both now derive from `CARGO_PKG_VERSION`. +- **`apply`** no longer emits a spurious `Failed` envelope event for a + release-variant whose first file is `NotFound`. +- **UTF-8 safety:** `get`/`scan`/`remove` truncated display strings with raw + byte slices that panic on multi-byte API text; all use char-safe truncation. +- **Exit codes:** `setup` now exits non-zero (not `already_configured`) when a + `package.json` fails to parse, and `repair` exits non-zero and fires failure + telemetry on a partial download failure (also gates the offline dry-run + "would download" event and threads through `bytes_freed`). +- **`rollback`** no longer miscounts zero-file records as already-original or + double-counts no-ops in dry-run; **`unlock`** reports `released` from a + pre-`acquire` snapshot so a probe-created lock file isn't reported as removed. +- **`vex`** resolves qualified PyPI/Gem/Maven PURLs via the rollback-aware + resolver so those patches are no longer dropped as `package_not_found`. +- **`package.json` handling:** no longer panics on a non-object root or + non-object `scripts`, de-dups overlapping workspace patterns, handles bare + `*`/`**`/deep globs, strips inline YAML comments, and preserves top-level key + order (enabled `serde_json`'s `preserve_order`). +- Smaller fixes: deterministic `list` output ordering, case-insensitive + `fuzzy_match` tie-break, `json_envelope` status-invariant enforcement + + `oldUuid` field, `lock_cli` sub-second timeout message, blob-fetcher + all-skipped formatting, VEX `Statement.timestamp` made optional per OpenVEX + 0.2.0, and VEX git-remote `url` parsing. + +### Tests & tooling + +- Hundreds of regression tests added across the patch engine, crawlers, API + client, manifest, `package.json`, VEX, and CLI command layers; the stale + `repair`/`python_crawler` e2e expectations were updated to the corrected + contracts. Full suite green (`--features cargo`). +- Added the `scripts/study-crates.ts` per-file audit harness (with an example + prompt config) used to drive this review. + ## [3.1.0] — 2026-05-26 ### Added diff --git a/Cargo.lock b/Cargo.lock index 941b8ffb..e4a2a74e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2240,6 +2240,7 @@ version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" dependencies = [ + "indexmap 2.13.0", "itoa", "memchr", "serde", @@ -2402,7 +2403,7 @@ checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" [[package]] name = "socket-patch-cli" -version = "3.1.0" +version = "3.2.0" dependencies = [ "base64", "clap", @@ -2427,7 +2428,7 @@ dependencies = [ [[package]] name = "socket-patch-core" -version = "3.1.0" +version = "3.2.0" dependencies = [ "flate2", "fs2", diff --git a/Cargo.toml b/Cargo.toml index 5bfa77c2..9b9db0f1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,16 +3,16 @@ members = ["crates/socket-patch-core", "crates/socket-patch-cli"] resolver = "2" [workspace.package] -version = "3.1.0" +version = "3.2.0" edition = "2021" license = "MIT" repository = "https://github.com/SocketDev/socket-patch" [workspace.dependencies] -socket-patch-core = { path = "crates/socket-patch-core", version = "=3.1.0" } +socket-patch-core = { path = "crates/socket-patch-core", version = "=3.2.0" } clap = { version = "=4.5.60", features = ["derive", "env"] } serde = { version = "=1.0.228", features = ["derive"] } -serde_json = "=1.0.149" +serde_json = { version = "=1.0.149", features = ["preserve_order"] } sha2 = "=0.10.9" hex = "=0.4.3" reqwest = { version = "=0.12.28", features = ["rustls-tls", "json"], default-features = false } diff --git a/crates/socket-patch-cli/src/args.rs b/crates/socket-patch-cli/src/args.rs index e0d048b9..d9c4529b 100644 --- a/crates/socket-patch-cli/src/args.rs +++ b/crates/socket-patch-cli/src/args.rs @@ -284,3 +284,120 @@ impl Default for GlobalArgs { } } } + +#[cfg(test)] +mod tests { + use super::*; + + /// `api_client_overrides` must forward every populated value verbatim. + #[test] + fn api_client_overrides_forwards_set_values() { + let args = GlobalArgs { + api_url: "https://api.example.com".to_string(), + api_token: Some("tok123".to_string()), + org: Some("acme".to_string()), + proxy_url: "https://proxy.example.com".to_string(), + ..GlobalArgs::default() + }; + let o = args.api_client_overrides(); + assert_eq!(o.api_url.as_deref(), Some("https://api.example.com")); + assert_eq!(o.api_token.as_deref(), Some("tok123")); + assert_eq!(o.org_slug.as_deref(), Some("acme")); + assert_eq!(o.proxy_url.as_deref(), Some("https://proxy.example.com")); + } + + /// `GlobalArgs::default()` leaves `api_url`/`proxy_url` empty and the + /// optional fields `None`, so every override must come back `None` — + /// this is what lets integration tests set `SOCKET_*` env vars *after* + /// constructing args and still have env-var resolution win downstream. + #[test] + fn api_client_overrides_default_is_all_none() { + let o = GlobalArgs::default().api_client_overrides(); + assert!(o.api_url.is_none(), "empty api_url must not be forwarded"); + assert!(o.proxy_url.is_none(), "empty proxy_url must not be forwarded"); + assert!(o.api_token.is_none()); + assert!(o.org_slug.is_none()); + } + + /// Empty strings for url/token/org are filtered out, not forwarded as + /// `Some("")` — otherwise an empty CLI value would mask env-var fallback. + #[test] + fn api_client_overrides_filters_empty_strings() { + let args = GlobalArgs { + api_url: String::new(), + api_token: Some(String::new()), + org: Some(String::new()), + proxy_url: String::new(), + ..GlobalArgs::default() + }; + let o = args.api_client_overrides(); + assert!(o.api_url.is_none()); + assert!(o.api_token.is_none()); + assert!(o.org_slug.is_none()); + assert!(o.proxy_url.is_none()); + } + + /// A relative `manifest_path` is resolved against `cwd`. + #[test] + fn resolved_manifest_path_joins_relative_against_cwd() { + let args = GlobalArgs { + cwd: PathBuf::from("/work/project"), + manifest_path: ".socket/manifest.json".to_string(), + ..GlobalArgs::default() + }; + assert_eq!( + args.resolved_manifest_path(), + PathBuf::from("/work/project/.socket/manifest.json"), + ); + } + + /// An absolute `manifest_path` ignores `cwd` and passes through unchanged. + #[test] + fn resolved_manifest_path_passes_absolute_through() { + let args = GlobalArgs { + cwd: PathBuf::from("/work/project"), + manifest_path: "/etc/socket/manifest.json".to_string(), + ..GlobalArgs::default() + }; + assert_eq!( + args.resolved_manifest_path(), + PathBuf::from("/etc/socket/manifest.json"), + ); + } + + /// `apply_env_toggles` mirrors `--debug` / `--no-telemetry` into the env + /// vars core code reads directly, and is a no-op when the flags are off. + /// `#[serial]` because it mutates process-global env state. + #[test] + #[serial_test::serial] + fn apply_env_toggles_mirrors_flags_into_env() { + let saved_debug = std::env::var("SOCKET_DEBUG").ok(); + let saved_telemetry = std::env::var("SOCKET_TELEMETRY_DISABLED").ok(); + std::env::remove_var("SOCKET_DEBUG"); + std::env::remove_var("SOCKET_TELEMETRY_DISABLED"); + + // Flags off: no-op, env stays unset. + apply_env_toggles(&GlobalArgs::default()); + assert!(std::env::var("SOCKET_DEBUG").is_err()); + assert!(std::env::var("SOCKET_TELEMETRY_DISABLED").is_err()); + + // Flags on: mirrored into the env. + let args = GlobalArgs { + debug: true, + no_telemetry: true, + ..GlobalArgs::default() + }; + apply_env_toggles(&args); + assert_eq!(std::env::var("SOCKET_DEBUG").as_deref(), Ok("1")); + assert_eq!(std::env::var("SOCKET_TELEMETRY_DISABLED").as_deref(), Ok("1")); + + match saved_debug { + Some(v) => std::env::set_var("SOCKET_DEBUG", v), + None => std::env::remove_var("SOCKET_DEBUG"), + } + match saved_telemetry { + Some(v) => std::env::set_var("SOCKET_TELEMETRY_DISABLED", v), + None => std::env::remove_var("SOCKET_TELEMETRY_DISABLED"), + } + } +} diff --git a/crates/socket-patch-cli/src/commands/apply.rs b/crates/socket-patch-cli/src/commands/apply.rs index 2d8f456e..32d79af6 100644 --- a/crates/socket-patch-cli/src/commands/apply.rs +++ b/crates/socket-patch-cli/src/commands/apply.rs @@ -70,6 +70,55 @@ pub struct ApplyArgs { pub force: bool, } +/// True when every file the engine verified for this package is already +/// at its `afterHash` — i.e. the patch is a complete no-op on disk. +/// +/// Single source of truth for the `already_patched` classification, shared +/// by [`result_to_event`] (which feeds the JSON envelope) and the +/// human-readable summaries so both label packages identically. +/// +/// The `!is_empty()` guard is essential: `Iterator::all` over an empty +/// slice is vacuously `true`. Without the guard a result with no verified +/// files — a zero-file patch, or a freshly-applied package whose +/// `files_verified` came back empty — would be mislabeled "already +/// patched" and counted as a no-op even though nothing matched `afterHash`. +fn all_files_already_patched(result: &ApplyResult) -> bool { + !result.files_verified.is_empty() + && result + .files_verified + .iter() + .all(|f| f.status == VerifyStatus::AlreadyPatched) +} + +/// Decide whether a release variant describes the distribution that is +/// actually installed on disk, based on the verification status of its +/// first patched file. +/// +/// This is the apply-side mirror of +/// [`select_installed_variants`](socket_patch_core::patch::apply::select_installed_variants), +/// which `rollback` and `get` use: a variant matches only when its first +/// file is [`Ready`](VerifyStatus::Ready) (its `beforeHash` matches the +/// on-disk bytes) or [`AlreadyPatched`](VerifyStatus::AlreadyPatched) +/// (its `afterHash` already matches). A variant with no files (`None`) +/// has nothing to disqualify it and is treated as a match. +/// +/// Crucially, both [`HashMismatch`](VerifyStatus::HashMismatch) **and** +/// [`NotFound`](VerifyStatus::NotFound) mean "this variant's +/// distribution is not the one on disk" and must be skipped. A +/// `NotFound` arises when a non-installed variant patches a file that +/// only exists in *its* distribution (e.g. an sdist patching `setup.py` +/// while a wheel is installed). Skipping it avoids attempting — and +/// spuriously reporting a `Failed` event for — a variant that was never +/// installed. +fn variant_matches_installed(first_file_status: Option<&VerifyStatus>) -> bool { + match first_file_status { + None => true, + Some(status) => { + *status == VerifyStatus::Ready || *status == VerifyStatus::AlreadyPatched + } + } +} + /// Translate the core engine's per-package [`ApplyResult`] into a single /// patch-level [`PatchEvent`] for the unified envelope. /// @@ -94,13 +143,7 @@ pub(crate) fn result_to_event(result: &ApplyResult, dry_run: bool) -> PatchEvent ); } - let all_already_patched = !result.files_verified.is_empty() - && result - .files_verified - .iter() - .all(|f| f.status == VerifyStatus::AlreadyPatched); - - if all_already_patched { + if all_files_already_patched(result) { return PatchEvent::new(PatchAction::Skipped, purl) .with_reason("already_patched", "All files already match afterHash"); } @@ -274,16 +317,17 @@ pub async fn run(args: ApplyArgs) -> i32 { let patched: Vec<_> = results.iter().filter(|r| r.success).collect(); let already_patched: Vec<_> = results .iter() - .filter(|r| { - r.files_verified - .iter() - .all(|f| f.status == VerifyStatus::AlreadyPatched) - }) + .filter(|r| all_files_already_patched(r)) .collect(); if args.common.dry_run { + // An already-patched package is `Skipped` in the JSON + // envelope, not `Verified`. Mirror that split here so + // "can be patched" excludes the no-ops instead of + // double-counting them against "already patched". + let can_be_patched = patched.len().saturating_sub(already_patched.len()); println!("\nPatch verification complete:"); - println!(" {} package(s) can be patched", patched.len()); + println!(" {} package(s) can be patched", can_be_patched); if !already_patched.is_empty() { println!(" {} package(s) already patched", already_patched.len()); } @@ -308,9 +352,7 @@ pub async fn run(args: ApplyArgs) -> i32 { format!(" (via {})", tags.join("+")) }; println!(" {}{}", result.package_key, suffix); - } else if result.files_verified.iter().all(|f| { - f.status == VerifyStatus::AlreadyPatched - }) { + } else if all_files_already_patched(result) { println!(" {} (already patched)", result.package_key); } } @@ -641,15 +683,20 @@ async fn apply_patches_inner( None => continue, }; - // Check first file hash match (skip when --force). A - // mismatch means this variant's distribution isn't the - // one on disk, so skip it. + // Check the first file's status (skip when --force). A + // mismatch *or* a missing file means this variant's + // distribution isn't the one on disk, so skip it — + // attempting it would only produce a spurious failure. + // Mirrors `select_installed_variants`, used by rollback/get. if !args.force { - if let Some((file_name, file_info)) = patch.files.iter().next() { - let verify = verify_file_patch(pkg_path, file_name, file_info).await; - if verify.status == VerifyStatus::HashMismatch { - continue; + let first_status = match patch.files.iter().next() { + Some((file_name, file_info)) => { + Some(verify_file_patch(pkg_path, file_name, file_info).await.status) } + None => None, + }; + if !variant_matches_installed(first_status.as_ref()) { + continue; } } @@ -753,9 +800,7 @@ async fn apply_patches_inner( // Post-apply summary if !args.common.silent && !args.common.json { let applied_count = results.iter().filter(|r| r.success && !r.files_patched.is_empty()).count(); - let already_count = results.iter().filter(|r| { - r.files_verified.iter().all(|f| f.status == VerifyStatus::AlreadyPatched) - }).count(); + let already_count = results.iter().filter(|r| all_files_already_patched(r)).count(); println!( "\nSummary: {}/{} targeted patches applied, {} already patched, {} not found on disk", applied_count, @@ -896,4 +941,115 @@ mod tests { assert_eq!(by_path["package/b.js"]["appliedVia"], "package"); assert_eq!(by_path["package/c.js"]["appliedVia"], "blob"); } + + /// Build a successful `ApplyResult` whose verified files carry the + /// given statuses, with no patched files. Used to exercise the + /// `already_patched` classification directly. + fn sample_verified(statuses: &[VerifyStatus]) -> ApplyResult { + let files_verified = statuses + .iter() + .enumerate() + .map(|(i, status)| VerifyResult { + file: format!("package/f{i}.js"), + status: status.clone(), + message: None, + current_hash: None, + expected_hash: None, + target_hash: None, + }) + .collect(); + ApplyResult { + package_key: "pkg:npm/foo@1.0.0".to_string(), + package_path: "/tmp/foo".to_string(), + success: true, + files_verified, + files_patched: Vec::new(), + applied_via: HashMap::new(), + error: None, + sidecar: None, + } + } + + #[test] + fn all_files_already_patched_true_when_every_file_matches() { + let result = sample_verified(&[ + VerifyStatus::AlreadyPatched, + VerifyStatus::AlreadyPatched, + ]); + assert!(all_files_already_patched(&result)); + } + + #[test] + fn all_files_already_patched_false_when_any_file_differs() { + let result = sample_verified(&[ + VerifyStatus::AlreadyPatched, + VerifyStatus::Ready, + ]); + assert!(!all_files_already_patched(&result)); + } + + /// Regression: `Iterator::all` over an empty slice is vacuously true. + /// A result with no verified files must NOT be reported as + /// "already patched" — the `!is_empty()` guard enforces this so the + /// human summaries and the JSON envelope agree. + #[test] + fn all_files_already_patched_false_when_no_verified_files() { + let mut result = sample_verified(&[]); + assert!(result.files_verified.is_empty()); + assert!(!all_files_already_patched(&result)); + + // A freshly-applied package (files patched, none left verified) + // is likewise not a no-op. + result.files_patched = vec!["package/a.js".to_string()]; + assert!(!all_files_already_patched(&result)); + } + + /// Regression: a non-installed release variant whose first patched + /// file is `NotFound` (e.g. an sdist patching `setup.py` while only a + /// wheel is on disk) must be treated as NOT installed and skipped — + /// exactly like a `HashMismatch`. Before the fix the loop only skipped + /// `HashMismatch`, so a `NotFound` variant slipped through to + /// `apply_package_patch` and produced a spurious `Failed` event in the + /// JSON envelope. This pins the apply-side decision to the same + /// Ready/AlreadyPatched contract as `select_installed_variants`. + #[test] + fn variant_matches_only_when_first_file_ready_or_already_patched() { + // Installed distribution: first file applies cleanly, or is + // already at afterHash → this variant is the one on disk. + assert!(variant_matches_installed(Some(&VerifyStatus::Ready))); + assert!(variant_matches_installed(Some(&VerifyStatus::AlreadyPatched))); + + // Not the installed distribution → must be skipped. The NotFound + // case is the specific regression this guards. + assert!(!variant_matches_installed(Some(&VerifyStatus::HashMismatch))); + assert!(!variant_matches_installed(Some(&VerifyStatus::NotFound))); + + // A variant with no files has nothing to disqualify it — match, + // mirroring `select_installed_variants`. + assert!(variant_matches_installed(None)); + } + + /// Regression: a freshly-applied result with an empty `files_verified` + /// must map to `Applied`, never `Skipped`/`already_patched`. This is + /// the same classification the human-readable summary relies on via + /// `all_files_already_patched`. + #[test] + fn applied_with_empty_verified_is_not_skipped() { + let mut applied_via = HashMap::new(); + applied_via.insert("package/a.js".to_string(), CoreAppliedVia::Blob); + let result = ApplyResult { + package_key: "pkg:npm/foo@1.0.0".to_string(), + package_path: "/tmp/foo".to_string(), + success: true, + files_verified: Vec::new(), + files_patched: vec!["package/a.js".to_string()], + applied_via, + error: None, + sidecar: None, + }; + let event = result_to_event(&result, false); + let v: serde_json::Value = + serde_json::from_str(&serde_json::to_string(&event).unwrap()).unwrap(); + assert_eq!(v["action"], "applied"); + } } diff --git a/crates/socket-patch-cli/src/commands/get.rs b/crates/socket-patch-cli/src/commands/get.rs index 849ac88c..25f3a5a3 100644 --- a/crates/socket-patch-cli/src/commands/get.rs +++ b/crates/socket-patch-cli/src/commands/get.rs @@ -165,6 +165,30 @@ fn print_json(v: &serde_json::Value) { println!("{}", serde_json::to_string_pretty(v).unwrap()); } +/// Truncate `s` to at most `limit` displayed characters, appending an +/// ellipsis when it was longer (so the result is never wider than +/// `limit`). Operates on `char` boundaries, NOT bytes: a byte-index slice +/// like `&s[..n]` panics when `n` lands in the middle of a multi-byte +/// UTF-8 sequence, and patch descriptions come straight from the API and +/// routinely contain non-ASCII text. +pub(crate) fn truncate_with_ellipsis(s: &str, limit: usize) -> String { + if s.chars().count() <= limit { + s.to_string() + } else { + let head: String = s.chars().take(limit.saturating_sub(3)).collect(); + format!("{head}...") + } +} + +/// Short, display-only prefix of a UUID for `[update]` log lines. Returns +/// the first 8 bytes when they fall on a char boundary, otherwise the +/// whole string. A naive `&uuid[..8]` panics on a malformed/short UUID in +/// the manifest (out-of-bounds or mid-codepoint); this never does. Pure +/// so the no-panic guarantee is unit-testable. +fn short_uuid(uuid: &str) -> &str { + uuid.get(..8).unwrap_or(uuid) +} + /// Build a no-results JSON envelope with the given status code. Used in /// the `no_packages`, `no_match`, and `not_found` branches of `get`, /// which all share the same `{status, counts, patches: []}` shape. @@ -441,11 +465,7 @@ pub fn select_patches( } else { format!(" (fixes: {})", vuln_summary.join(", ")) }; - let desc = if p.description.len() > 60 { - format!("{}...", &p.description[..57]) - } else { - p.description.clone() - }; + let desc = truncate_with_ellipsis(&p.description, 60); format!("{} [{}]{} - {}", p.uuid, p.tier, vulns, desc) }) .collect(); @@ -763,7 +783,10 @@ pub async fn download_and_apply_patches( eprintln!( " [update] {} (replacing {})", search_result.purl, - &existing.uuid[..8] + // Defensive: a malformed/short UUID in the manifest + // must not panic the download loop. `&uuid[..8]` + // would; fall back to the whole string. + short_uuid(&existing.uuid) ); } } @@ -1362,11 +1385,7 @@ fn display_search_results(patches: &[PatchSearchResult], can_access_paid: bool) println!(" {}. {}{}{}", i + 1, patch.purl, tier_label, access_label); println!(" UUID: {}", patch.uuid); if !patch.description.is_empty() { - let desc = if patch.description.len() > 80 { - format!("{}...", &patch.description[..77]) - } else { - patch.description.clone() - }; + let desc = truncate_with_ellipsis(&patch.description, 80); println!(" Description: {desc}"); } @@ -1970,4 +1989,82 @@ mod tests { // shape stays consistent. assert_eq!(meta["vulnerabilities"].as_array().unwrap().len(), 0); } + + // --- truncate_with_ellipsis ------------------------------------------ + // Patch descriptions come from the API and may contain multi-byte + // UTF-8. The old `&desc[..n]` byte slicing panicked when `n` fell mid + // codepoint; these lock in char-safe behavior. + + #[test] + fn truncate_short_string_unchanged() { + assert_eq!(truncate_with_ellipsis("hello", 60), "hello"); + } + + #[test] + fn truncate_at_limit_unchanged() { + let s = "a".repeat(60); + assert_eq!(truncate_with_ellipsis(&s, 60), s); + } + + #[test] + fn truncate_long_ascii_adds_ellipsis_and_respects_limit() { + let s = "a".repeat(100); + let out = truncate_with_ellipsis(&s, 60); + // 57 content chars + "..." == 60, never wider than the limit. + assert_eq!(out.chars().count(), 60); + assert!(out.ends_with("...")); + assert_eq!(out, format!("{}...", "a".repeat(57))); + } + + #[test] + fn truncate_multibyte_does_not_panic_and_is_char_safe() { + // 90 bytes (30 * 3-byte chars) but only 30 chars: the byte length + // exceeds 80 while the char count does not. A `&s[..77]` byte slice + // would land mid-codepoint and panic; this must return the string + // untouched because it fits within the char limit. + let s = "日".repeat(30); + let out = truncate_with_ellipsis(&s, 80); + assert_eq!(out, s); + } + + #[test] + fn truncate_multibyte_long_truncates_on_char_boundary() { + // 100 multi-byte chars (300 bytes) — must truncate to 77 chars plus + // the ellipsis without ever slicing through a codepoint. + let s = "é".repeat(100); + let out = truncate_with_ellipsis(&s, 80); + assert_eq!(out.chars().count(), 80); + assert!(out.ends_with("...")); + assert_eq!(out, format!("{}...", "é".repeat(77))); + } + + // --- short_uuid ------------------------------------------------------ + // The `[update]` log line prints the first 8 chars of the manifest's + // existing UUID. A naive `&uuid[..8]` panics on a short or non-ASCII + // value; `short_uuid` must never panic. + + #[test] + fn short_uuid_truncates_normal_uuid() { + assert_eq!(short_uuid("80630680-4da6-45f9-bba8-b888e0ffd58c"), "80630680"); + } + + #[test] + fn short_uuid_returns_whole_string_when_shorter_than_eight() { + // `&"abc"[..8]` would panic; the helper falls back to the whole value. + assert_eq!(short_uuid("abc"), "abc"); + assert_eq!(short_uuid(""), ""); + } + + #[test] + fn short_uuid_does_not_panic_on_multibyte_boundary() { + // Byte 8 lands mid-codepoint (each "é" is 2 bytes, so byte 8 is a + // char boundary here — but byte 7 would not be). Use a value whose + // 8th byte splits a char to exercise the None fallback. + let s = "ab€cd"; // '€' is 3 bytes: bytes are a b € c d -> len 7 + // get(..8) is out of range -> None -> whole string, no panic. + assert_eq!(short_uuid(s), s); + // A value where byte 8 splits the trailing multibyte char. + let s2 = "abcdef€"; // 6 ascii + 3-byte '€' = 9 bytes; byte 8 mid-char + assert_eq!(short_uuid(s2), s2); + } } diff --git a/crates/socket-patch-cli/src/commands/list.rs b/crates/socket-patch-cli/src/commands/list.rs index a0786c19..84ef724c 100644 --- a/crates/socket-patch-cli/src/commands/list.rs +++ b/crates/socket-patch-cli/src/commands/list.rs @@ -1,5 +1,6 @@ use clap::Args; use socket_patch_core::manifest::operations::read_manifest; +use socket_patch_core::manifest::schema::PatchManifest; use socket_patch_core::utils::telemetry::track_patch_listed; use crate::args::GlobalArgs; @@ -13,6 +14,72 @@ pub struct ListArgs { pub common: GlobalArgs, } +/// Build the `list --json` envelope: one `Discovered` event per manifest +/// entry, with the rich metadata (vulnerabilities, tier, license, +/// description, exportedAt) under `details` per the per-command extension +/// convention. +/// +/// Patches, vulnerabilities, and files are each emitted in a stable sorted +/// order (by PURL / advisory ID / path). `HashMap` iteration is otherwise +/// nondeterministic, so without this the event/vuln/file ordering would +/// change run-to-run — breaking consumers that diff this output in CI logs. +/// Mirrors the stable-ordering guarantee `get` already provides for its +/// vulnerability lists. +/// +/// Shared by `run` and the unit tests so the tests exercise the exact code +/// path `list --json` uses, rather than a hand-copied duplicate. +fn build_list_envelope(manifest: &PatchManifest) -> Envelope { + let mut env = Envelope::new(Command::List); + + let mut patch_entries: Vec<_> = manifest.patches.iter().collect(); + patch_entries.sort_by(|a, b| a.0.cmp(b.0)); + + for (purl, patch) in patch_entries { + let mut file_paths: Vec<_> = patch.files.keys().cloned().collect(); + file_paths.sort(); + let files = file_paths + .into_iter() + .map(|path| PatchEventFile { + path, + verified: false, + applied_via: None, + }) + .collect(); + + let mut vuln_entries: Vec<_> = patch.vulnerabilities.iter().collect(); + vuln_entries.sort_by(|a, b| a.0.cmp(b.0)); + let vulnerabilities: Vec<_> = vuln_entries + .iter() + .map(|(id, vuln)| { + serde_json::json!({ + "id": id, + "cves": vuln.cves, + "summary": vuln.summary, + "severity": vuln.severity, + "description": vuln.description, + }) + }) + .collect(); + + let details = serde_json::json!({ + "exportedAt": patch.exported_at, + "tier": patch.tier, + "license": patch.license, + "description": patch.description, + "vulnerabilities": vulnerabilities, + }); + + env.record( + PatchEvent::new(PatchAction::Discovered, purl.clone()) + .with_uuid(patch.uuid.clone()) + .with_files(files) + .with_details(details), + ); + } + + env +} + /// Emit the top-level envelope for `list` in error states. Used for the /// "manifest not found" and "manifest unreadable" paths so they share /// the same JSON shape as a successful list. @@ -40,7 +107,10 @@ pub async fn run(args: ListArgs) -> i32 { match read_manifest(&manifest_path).await { Ok(Some(manifest)) => { - let patch_entries: Vec<_> = manifest.patches.iter().collect(); + // Sort by PURL so both the JSON envelope and the human-readable + // table list packages in a stable order across runs. + let mut patch_entries: Vec<_> = manifest.patches.iter().collect(); + patch_entries.sort_by(|a, b| a.0.cmp(b.0)); let patches_count = patch_entries.len(); track_patch_listed( patches_count, @@ -50,44 +120,7 @@ pub async fn run(args: ListArgs) -> i32 { .await; if args.common.json { - let mut env = Envelope::new(Command::List); - for (purl, patch) in &patch_entries { - // `list` emits one `Discovered` event per manifest - // entry. The rich metadata (vulnerabilities, tier, - // license, description, exportedAt) lives under - // `details` per the per-command extension convention. - let files = patch - .files - .keys() - .map(|p| PatchEventFile { - path: p.clone(), - verified: false, - applied_via: None, - }) - .collect(); - let details = serde_json::json!({ - "exportedAt": patch.exported_at, - "tier": patch.tier, - "license": patch.license, - "description": patch.description, - "vulnerabilities": patch.vulnerabilities.iter().map(|(id, vuln)| { - serde_json::json!({ - "id": id, - "cves": vuln.cves, - "summary": vuln.summary, - "severity": vuln.severity, - "description": vuln.description, - }) - }).collect::>(), - }); - env.record( - PatchEvent::new(PatchAction::Discovered, (*purl).clone()) - .with_uuid(patch.uuid.clone()) - .with_files(files) - .with_details(details), - ); - } - println!("{}", env.to_pretty_json()); + println!("{}", build_list_envelope(&manifest).to_pretty_json()); } else if patch_entries.is_empty() { println!("No patches found in manifest."); } else { @@ -103,7 +136,9 @@ pub async fn run(args: ListArgs) -> i32 { println!(" Description: {}", patch.description); } - let vuln_entries: Vec<_> = patch.vulnerabilities.iter().collect(); + // Sort vulnerabilities by advisory ID for stable output. + let mut vuln_entries: Vec<_> = patch.vulnerabilities.iter().collect(); + vuln_entries.sort_by(|a, b| a.0.cmp(b.0)); if !vuln_entries.is_empty() { println!(" Vulnerabilities ({}):", vuln_entries.len()); for (id, vuln) in &vuln_entries { @@ -118,7 +153,9 @@ pub async fn run(args: ListArgs) -> i32 { } } - let file_list: Vec<_> = patch.files.keys().collect(); + // Sort patched files by path for stable output. + let mut file_list: Vec<_> = patch.files.keys().collect(); + file_list.sort(); if !file_list.is_empty() { println!(" Files patched ({}):", file_list.len()); for file_path in &file_list { @@ -149,7 +186,7 @@ mod tests { //! so downstream consumers (PR bots, dashboards) can rely on it. use super::*; use socket_patch_core::manifest::schema::{ - PatchFileInfo, PatchManifest, PatchRecord, VulnerabilityInfo, + PatchFileInfo, PatchRecord, VulnerabilityInfo, }; use std::collections::HashMap; @@ -191,49 +228,64 @@ mod tests { PatchManifest { patches } } - /// Build the envelope the same way `run` would for the given manifest. - /// Keeps the test free of binary-spawn overhead while still pinning - /// the exact event shape `list --json` produces. - fn build_envelope(manifest: &PatchManifest) -> Envelope { - let mut env = Envelope::new(Command::List); - for (purl, patch) in &manifest.patches { - let files = patch - .files - .keys() - .map(|p| PatchEventFile { - path: p.clone(), - verified: false, - applied_via: None, - }) - .collect(); - let details = serde_json::json!({ - "exportedAt": patch.exported_at, - "tier": patch.tier, - "license": patch.license, - "description": patch.description, - "vulnerabilities": patch.vulnerabilities.iter().map(|(id, vuln)| { - serde_json::json!({ - "id": id, - "cves": vuln.cves, - "summary": vuln.summary, - "severity": vuln.severity, - "description": vuln.description, - }) - }).collect::>(), - }); - env.record( - PatchEvent::new(PatchAction::Discovered, purl.clone()) - .with_uuid(patch.uuid.clone()) - .with_files(files) - .with_details(details), - ); + /// A manifest with several patches, each carrying multiple + /// vulnerabilities and files, all inserted in deliberately + /// non-alphabetical order. Used to pin the stable sort order the + /// envelope must impose regardless of HashMap iteration. + fn multi_entry_manifest() -> PatchManifest { + fn record(uuid: &str, vuln_ids: &[&str], file_paths: &[&str]) -> PatchRecord { + let mut files = HashMap::new(); + for fp in file_paths { + files.insert( + fp.to_string(), + PatchFileInfo { + before_hash: "b".repeat(64), + after_hash: "a".repeat(64), + }, + ); + } + let mut vulns = HashMap::new(); + for id in vuln_ids { + vulns.insert( + id.to_string(), + VulnerabilityInfo { + cves: vec![], + summary: "s".to_string(), + severity: "high".to_string(), + description: "d".to_string(), + }, + ); + } + PatchRecord { + uuid: uuid.to_string(), + exported_at: "2024-01-01T00:00:00Z".to_string(), + files, + vulnerabilities: vulns, + description: "desc".to_string(), + license: "MIT".to_string(), + tier: "free".to_string(), + } } - env + + let mut patches = HashMap::new(); + patches.insert( + "pkg:npm/zeta@1.0.0".to_string(), + record("uuid-z", &["GHSA-zzzz-2222-3333", "GHSA-aaaa-2222-3333"], &["z/b.js", "z/a.js"]), + ); + patches.insert( + "pkg:npm/alpha@1.0.0".to_string(), + record("uuid-a", &["GHSA-mmmm-2222-3333"], &["a/zz.js", "a/aa.js"]), + ); + patches.insert( + "pkg:npm/mid@1.0.0".to_string(), + record("uuid-m", &["GHSA-cccc-2222-3333"], &["m/x.js"]), + ); + PatchManifest { patches } } #[test] fn list_emits_discovered_event_per_patch() { - let env = build_envelope(&sample_manifest()); + let env = build_list_envelope(&sample_manifest()); let v: serde_json::Value = serde_json::from_str(&env.to_pretty_json()).unwrap(); assert_eq!(v["command"], "list"); assert_eq!(v["status"], "success"); @@ -247,7 +299,7 @@ mod tests { #[test] fn list_event_carries_vulnerability_details() { - let env = build_envelope(&sample_manifest()); + let env = build_list_envelope(&sample_manifest()); let v: serde_json::Value = serde_json::from_str(&env.to_pretty_json()).unwrap(); let event = &v["events"][0]; assert_eq!(event["details"]["tier"], "free"); @@ -261,10 +313,84 @@ mod tests { #[test] fn empty_manifest_emits_empty_events() { - let env = build_envelope(&PatchManifest::new()); + let env = build_list_envelope(&PatchManifest::new()); let v: serde_json::Value = serde_json::from_str(&env.to_pretty_json()).unwrap(); assert_eq!(v["status"], "success"); assert_eq!(v["events"].as_array().unwrap().len(), 0); assert_eq!(v["summary"]["discovered"], 0); } + + // -- Regression: stable ordering ------------------------------------- + // `HashMap` iteration order is randomized per run, so without explicit + // sorting the events / vulnerabilities / files arrays would shuffle + // between invocations. These pin the sorted contract so consumers can + // diff `list --json` output in CI logs. + + #[test] + fn events_are_sorted_by_purl() { + let env = build_list_envelope(&multi_entry_manifest()); + let v: serde_json::Value = serde_json::from_str(&env.to_pretty_json()).unwrap(); + let purls: Vec<&str> = v["events"] + .as_array() + .unwrap() + .iter() + .map(|e| e["purl"].as_str().unwrap()) + .collect(); + assert_eq!( + purls, + vec![ + "pkg:npm/alpha@1.0.0", + "pkg:npm/mid@1.0.0", + "pkg:npm/zeta@1.0.0", + ] + ); + } + + #[test] + fn vulnerabilities_are_sorted_by_id() { + let env = build_list_envelope(&multi_entry_manifest()); + let v: serde_json::Value = serde_json::from_str(&env.to_pretty_json()).unwrap(); + // The zeta entry carries two advisories inserted out of order. + let zeta = v["events"] + .as_array() + .unwrap() + .iter() + .find(|e| e["purl"] == "pkg:npm/zeta@1.0.0") + .unwrap(); + let ids: Vec<&str> = zeta["details"]["vulnerabilities"] + .as_array() + .unwrap() + .iter() + .map(|vuln| vuln["id"].as_str().unwrap()) + .collect(); + assert_eq!(ids, vec!["GHSA-aaaa-2222-3333", "GHSA-zzzz-2222-3333"]); + } + + #[test] + fn files_are_sorted_by_path() { + let env = build_list_envelope(&multi_entry_manifest()); + let v: serde_json::Value = serde_json::from_str(&env.to_pretty_json()).unwrap(); + let zeta = v["events"] + .as_array() + .unwrap() + .iter() + .find(|e| e["purl"] == "pkg:npm/zeta@1.0.0") + .unwrap(); + let paths: Vec<&str> = zeta["files"] + .as_array() + .unwrap() + .iter() + .map(|f| f["path"].as_str().unwrap()) + .collect(); + assert_eq!(paths, vec!["z/a.js", "z/b.js"]); + } + + #[test] + fn ordering_is_deterministic_across_builds() { + // Two independent builds of the same manifest must be byte-identical. + let manifest = multi_entry_manifest(); + let a = build_list_envelope(&manifest).to_pretty_json(); + let b = build_list_envelope(&manifest).to_pretty_json(); + assert_eq!(a, b); + } } diff --git a/crates/socket-patch-cli/src/commands/lock_cli.rs b/crates/socket-patch-cli/src/commands/lock_cli.rs index 3938152c..6c109a33 100644 --- a/crates/socket-patch-cli/src/commands/lock_cli.rs +++ b/crates/socket-patch-cli/src/commands/lock_cli.rs @@ -103,14 +103,7 @@ pub fn acquire_or_emit( match acquire(socket_dir, timeout) { Ok(guard) => Ok(LockAcquired { guard, broke_lock }), Err(LockError::Held) => { - let msg = if timeout > Duration::ZERO { - format!( - "another socket-patch process is operating in this directory (waited {}s)", - timeout.as_secs() - ) - } else { - "another socket-patch process is operating in this directory".to_string() - }; + let msg = held_message(timeout); emit( command, json, @@ -151,6 +144,43 @@ pub fn record_lock_broken(env: &mut Envelope, socket_dir: &Path) { env.record(lock_broken_event(socket_dir)); } +/// Human-readable description of a `lock_held` contention for the given +/// wait budget. A zero budget means the historical non-blocking +/// try-once, so we omit the "(waited …)" clause entirely. +fn held_message(timeout: Duration) -> String { + if timeout > Duration::ZERO { + format!( + "another socket-patch process is operating in this directory (waited {})", + fmt_duration(timeout) + ) + } else { + "another socket-patch process is operating in this directory".to_string() + } +} + +/// Format a wait budget for humans. Whole seconds read naturally +/// (`5s`); sub-second budgets — reachable through the library API even +/// though the CLI only ever passes whole seconds — render as +/// milliseconds rather than truncating to a misleading `0s`. +fn fmt_duration(d: Duration) -> String { + if d.subsec_nanos() == 0 { + format!("{}s", d.as_secs()) + } else { + format!("{}ms", d.as_millis()) + } +} + +/// Build the top-level error envelope emitted in `--json` mode when +/// lock acquisition fails. Split out from [`emit`] so the serialized +/// shape (status / error.code / command / dryRun) is unit-testable +/// without capturing stdout. +fn error_envelope(command: Command, dry_run: bool, code: &str, message: &str) -> Envelope { + let mut env = Envelope::new(command); + env.dry_run = dry_run; + env.mark_error(EnvelopeError::new(code, message)); + env +} + fn emit( command: Command, json: bool, @@ -161,10 +191,7 @@ fn emit( hint_dir: Option<&Path>, ) { if json { - let mut env = Envelope::new(command); - env.dry_run = dry_run; - env.mark_error(EnvelopeError::new(code, message)); - println!("{}", env.to_pretty_json()); + println!("{}", error_envelope(command, dry_run, code, message).to_pretty_json()); } else if !silent { eprintln!("Error: {message}."); if hint_dir.is_some() { @@ -325,6 +352,67 @@ mod tests { ); } + /// Whole-second budgets read naturally in the contention message. + #[test] + fn held_message_reports_whole_seconds() { + assert_eq!( + held_message(Duration::from_secs(5)), + "another socket-patch process is operating in this directory (waited 5s)" + ); + } + + /// Regression: `timeout.as_secs()` truncated a 250ms budget to + /// `(waited 0s)`, which read as "we didn't wait at all". Sub-second + /// budgets now surface as milliseconds. The 250ms budget mirrors + /// `acquire_or_emit_honors_lock_timeout`, so the message stays + /// honest for the exact value that test exercises. + #[test] + fn held_message_does_not_truncate_sub_second_to_zero() { + let msg = held_message(Duration::from_millis(250)); + assert!(msg.contains("250ms"), "expected ms rendering, got: {msg}"); + assert!( + !msg.contains("0s"), + "sub-second budget must not collapse to 0s: {msg}" + ); + } + + /// A zero budget is the non-blocking try-once shape — no "(waited …)" + /// clause, since we never actually waited. + #[test] + fn held_message_zero_timeout_omits_waited_clause() { + let msg = held_message(Duration::ZERO); + assert!(!msg.contains("waited"), "zero budget should not claim a wait: {msg}"); + } + + /// The `--json` failure envelope (previously emitted only via + /// `println!`, so untested) has the stable error shape downstream + /// consumers pattern-match on: top-level `status: "error"` and + /// `error.code` carrying the lock reason tag. + #[test] + fn error_envelope_has_stable_lock_held_shape() { + let env = error_envelope(Command::Apply, false, "lock_held", "held by another run"); + let v: serde_json::Value = serde_json::from_str(&env.to_pretty_json()).unwrap(); + assert_eq!(v["command"], "apply"); + assert_eq!(v["status"], "error"); + assert_eq!(v["dryRun"], false); + assert_eq!(v["error"]["code"], "lock_held"); + assert_eq!(v["error"]["message"], "held by another run"); + // A pre-event failure carries no events. + assert_eq!(v["events"].as_array().unwrap().len(), 0); + } + + /// `dry_run` and `command` are plumbed through to the envelope so a + /// contention during a dry-run apply/rollback is still reported as + /// a dry run. Covers the other two reason tags too. + #[test] + fn error_envelope_propagates_dry_run_and_command() { + let env = error_envelope(Command::Rollback, true, "lock_io", "open failed"); + let v: serde_json::Value = serde_json::from_str(&env.to_pretty_json()).unwrap(); + assert_eq!(v["command"], "rollback"); + assert_eq!(v["dryRun"], true); + assert_eq!(v["error"]["code"], "lock_io"); + } + #[test] fn lock_broken_event_uses_documented_code() { let dir = tempfile::tempdir().unwrap(); diff --git a/crates/socket-patch-cli/src/commands/remove.rs b/crates/socket-patch-cli/src/commands/remove.rs index 9984518c..cc19ad47 100644 --- a/crates/socket-patch-cli/src/commands/remove.rs +++ b/crates/socket-patch-cli/src/commands/remove.rs @@ -147,7 +147,11 @@ pub async fn run(args: RemoveArgs) -> i32 { } for (purl, patch) in &matching { let file_count = patch.files.len(); - eprintln!(" - {} (UUID: {}, {} file(s))", purl, &patch.uuid[..8], file_count); + // Short-UUID for display only. Slice on a char boundary and + // tolerate UUIDs shorter than 8 chars — a malformed manifest + // must not panic the whole command in the display path. + let short_uuid = patch.uuid.get(..8).unwrap_or(patch.uuid.as_str()); + eprintln!(" - {} (UUID: {}, {} file(s))", purl, short_uuid, file_count); } eprintln!(); } @@ -175,7 +179,7 @@ pub async fn run(args: RemoveArgs) -> i32 { Some(&args.identifier), false, args.common.json, // silent when JSON - false, + args.common.offline, args.common.global, args.common.global_prefix.clone(), None, @@ -286,8 +290,12 @@ pub async fn run(args: RemoveArgs) -> i32 { for purl in &removed { env.record(PatchEvent::new(PatchAction::Removed, purl.clone())); } - // One artifact-level Removed event covering swept blobs. - if blobs_removed > 0 { + // One artifact-level Removed event carrying the + // blob-sweep and rollback counts. Emitted whenever either + // is non-zero so the `rolledBack` count is still reported + // even when no blobs happened to be swept (e.g. the removed + // patch's afterHash blobs are still referenced elsewhere). + if blobs_removed > 0 || rollback_count > 0 { env.record( PatchEvent::artifact(PatchAction::Removed).with_details(serde_json::json!({ "blobsRemoved": blobs_removed, @@ -446,4 +454,87 @@ mod tests { assert_eq!(removed, vec!["pkg:pypi/six@1.16.0?artifact_id=wheel-cp312"]); assert_eq!(manifest.patches.len(), 3); } + + /// A plain (qualifier-free) npm PURL removes exactly its own entry and + /// must not accidentally match same-prefix neighbours like + /// `foobar@1.0`. Guards the `strip_purl_qualifiers == identifier` + /// exact-equality path for non-PyPI keys. + #[tokio::test] + async fn remove_npm_purl_is_exact_and_does_not_prefix_match() { + let tmp = tempfile::tempdir().expect("tempdir"); + let mut patches = HashMap::new(); + patches.insert("pkg:npm/foo@1.0".to_string(), make_record("uuid-foo")); + patches.insert("pkg:npm/foobar@1.0".to_string(), make_record("uuid-foobar")); + let manifest = PatchManifest { patches }; + let manifest_path = tmp.path().join("manifest.json"); + write_manifest(&manifest_path, &manifest) + .await + .expect("write manifest"); + + let (removed, manifest) = + remove_patch_from_manifest("pkg:npm/foo@1.0", &manifest_path) + .await + .expect("remove ok"); + + assert_eq!(removed, vec!["pkg:npm/foo@1.0"]); + assert_eq!(manifest.patches.len(), 1); + assert!(manifest.patches.contains_key("pkg:npm/foobar@1.0")); + } + + /// An identifier that matches nothing removes nothing and — crucially + /// — must NOT rewrite the manifest file. We assert byte-identity of + /// the on-disk manifest before/after so a future change that always + /// re-serializes (churning mtime / formatting) is caught. + #[tokio::test] + async fn remove_no_match_leaves_manifest_file_untouched() { + let tmp = tempfile::tempdir().expect("tempdir"); + write_multi_variant(tmp.path()).await; + let manifest_path = tmp.path().join("manifest.json"); + let before_bytes = tokio::fs::read(&manifest_path).await.expect("read before"); + + let (removed, manifest) = + remove_patch_from_manifest("pkg:npm/not-here@9.9.9", &manifest_path) + .await + .expect("remove ok"); + + assert!(removed.is_empty(), "nothing should match"); + assert_eq!(manifest.patches.len(), 4, "manifest left intact"); + let after_bytes = tokio::fs::read(&manifest_path).await.expect("read after"); + assert_eq!( + before_bytes, after_bytes, + "a no-op remove must not rewrite the manifest file" + ); + } + + /// A base PURL must not bleed across versions: removing `six@1.16.0` + /// leaves `six@1.17.0` (and its variants) in place. + #[tokio::test] + async fn remove_base_purl_does_not_touch_other_versions() { + let tmp = tempfile::tempdir().expect("tempdir"); + let mut patches = HashMap::new(); + patches.insert( + "pkg:pypi/six@1.16.0?artifact_id=sdist".to_string(), + make_record("uuid-16-sdist"), + ); + patches.insert( + "pkg:pypi/six@1.17.0?artifact_id=sdist".to_string(), + make_record("uuid-17-sdist"), + ); + let manifest = PatchManifest { patches }; + let manifest_path = tmp.path().join("manifest.json"); + write_manifest(&manifest_path, &manifest) + .await + .expect("write manifest"); + + let (removed, manifest) = + remove_patch_from_manifest("pkg:pypi/six@1.16.0", &manifest_path) + .await + .expect("remove ok"); + + assert_eq!(removed, vec!["pkg:pypi/six@1.16.0?artifact_id=sdist"]); + assert_eq!(manifest.patches.len(), 1); + assert!(manifest + .patches + .contains_key("pkg:pypi/six@1.17.0?artifact_id=sdist")); + } } diff --git a/crates/socket-patch-cli/src/commands/repair.rs b/crates/socket-patch-cli/src/commands/repair.rs index ac064d16..36a8c140 100644 --- a/crates/socket-patch-cli/src/commands/repair.rs +++ b/crates/socket-patch-cli/src/commands/repair.rs @@ -15,7 +15,7 @@ use std::time::Duration; use crate::args::{apply_env_toggles, GlobalArgs}; use crate::commands::lock_cli::{acquire_or_emit, lock_broken_event}; -use crate::json_envelope::{Command, Envelope, EnvelopeError, PatchAction, PatchEvent}; +use crate::json_envelope::{Command, Envelope, EnvelopeError, PatchAction, PatchEvent, Status}; #[derive(Args)] pub struct RepairArgs { @@ -91,18 +91,37 @@ pub async fn run(args: RepairArgs) -> i32 { // stay in sync). env.record(lock_broken_event(socket_dir)); } - track_patch_repaired( - counts.downloaded, - counts.cleaned, - 0, - args.common.api_token.as_deref(), - args.common.org.as_deref(), - ) - .await; + // A repair where some artifacts failed to download is marked a + // partial failure inside `repair_inner` (a `Failed` event plus + // `mark_partial_failure`). Mirror `apply`: surface that as a + // non-zero exit and the failure telemetry, so a CI guarding on + // the exit code doesn't treat a half-finished repair as success. + let had_failure = matches!(env.status, Status::PartialFailure | Status::Error); + if had_failure { + track_patch_repair_failed( + "One or more artifacts failed to download", + args.common.api_token.as_deref(), + args.common.org.as_deref(), + ) + .await; + } else { + track_patch_repaired( + counts.downloaded, + counts.cleaned, + counts.bytes_freed, + args.common.api_token.as_deref(), + args.common.org.as_deref(), + ) + .await; + } if args.common.json { println!("{}", env.to_pretty_json()); } - 0 + if had_failure { + 1 + } else { + 0 + } } Err(e) => { track_patch_repair_failed( @@ -125,12 +144,13 @@ pub async fn run(args: RepairArgs) -> i32 { } /// Aggregate counts surfaced by `repair_inner` for telemetry use. -struct RepairCounts { +pub(crate) struct RepairCounts { downloaded: usize, cleaned: usize, + bytes_freed: u64, } -async fn repair_inner( +pub(crate) async fn repair_inner( args: &RepairArgs, manifest_path: &Path, ) -> Result<(Envelope, RepairCounts), String> { @@ -150,6 +170,7 @@ async fn repair_inner( let mut download_failed_count = 0usize; let mut blobs_cleaned = 0usize; let mut blobs_checked = 0usize; + let mut bytes_freed = 0u64; // Step 1: Check for and download missing artifacts in the requested // mode. Counts below refer to whatever kind of artifact was requested @@ -245,6 +266,7 @@ async fn repair_inner( Ok(cleanup_result) => { blobs_checked += cleanup_result.blobs_checked; blobs_cleaned += cleanup_result.blobs_removed; + bytes_freed += cleanup_result.bytes_freed; if !args.common.json { if cleanup_result.blobs_checked == 0 { println!("No blobs directory found, nothing to clean up."); @@ -270,6 +292,7 @@ async fn repair_inner( Ok(cleanup_result) => { blobs_checked += cleanup_result.blobs_checked; blobs_cleaned += cleanup_result.blobs_removed; + bytes_freed += cleanup_result.bytes_freed; if !args.common.json && cleanup_result.blobs_removed > 0 { println!( "{}", @@ -290,6 +313,7 @@ async fn repair_inner( Ok(cleanup_result) => { blobs_checked += cleanup_result.blobs_checked; blobs_cleaned += cleanup_result.blobs_removed; + bytes_freed += cleanup_result.bytes_freed; if !args.common.json && cleanup_result.blobs_removed > 0 { println!( "{}", @@ -320,7 +344,11 @@ async fn repair_inner( } else { PatchAction::Downloaded }; - if downloaded_count > 0 || (args.common.dry_run && missing_count > 0) { + // Only the online path downloads (or, in dry-run, *would* download). + // In offline mode nothing is fetched even when artifacts are missing, + // so don't record a download/would-download event there — that would + // contradict the human-readable path, which only prints a warning. + if downloaded_count > 0 || (!args.common.offline && args.common.dry_run && missing_count > 0) { let count = if args.common.dry_run { missing_count } else { @@ -358,6 +386,186 @@ async fn repair_inner( RepairCounts { downloaded: downloaded_count, cleaned: blobs_cleaned, + bytes_freed, }, )) } + +#[cfg(test)] +mod tests { + //! Unit tests for `repair_inner` — the offline cleanup / event-recording + //! core. These run without a network (all use `--offline`), exercising + //! the orphan-cleanup and envelope-building paths directly so the + //! contract is pinned independently of the binary harness. + use super::*; + use crate::args::GlobalArgs; + use std::path::PathBuf; + + const MANIFEST_JSON: &str = r#"{ + "patches": { + "pkg:npm/__repair_unit__@1.0.0": { + "uuid": "11111111-1111-4111-8111-111111111111", + "exportedAt": "2024-01-01T00:00:00Z", + "files": { + "package/index.js": { + "beforeHash": "0000000000000000000000000000000000000000000000000000000000000000", + "afterHash": "1111111111111111111111111111111111111111111111111111111111111111" + } + }, + "vulnerabilities": {}, + "description": "unit test patch", + "license": "MIT", + "tier": "free" + } + } + }"#; + + const REFERENCED_HASH: &str = + "1111111111111111111111111111111111111111111111111111111111111111"; + + /// Write a `.socket/manifest.json` under `root` and return the socket dir. + fn make_socket(root: &Path) -> PathBuf { + let socket = root.join(".socket"); + std::fs::create_dir_all(&socket).unwrap(); + std::fs::write(socket.join("manifest.json"), MANIFEST_JSON).unwrap(); + socket + } + + fn write_blob(socket: &Path, hash: &str, content: &[u8]) { + let blobs = socket.join("blobs"); + std::fs::create_dir_all(&blobs).unwrap(); + std::fs::write(blobs.join(hash), content).unwrap(); + } + + fn offline_args(cwd: &Path) -> RepairArgs { + RepairArgs { + common: GlobalArgs { + cwd: cwd.to_path_buf(), + manifest_path: ".socket/manifest.json".to_string(), + offline: true, + json: true, + download_mode: "file".to_string(), + ..GlobalArgs::default() + }, + download_only: false, + } + } + + /// True when `env` carries the download / would-download artifact event + /// (identified by its `details.mode` field, unique to that event). + fn has_download_event(env: &Envelope) -> bool { + env.events.iter().any(|e| { + e.details + .as_ref() + .and_then(|d| d.get("mode")) + .is_some() + }) + } + + /// Regression for the offline + dry-run leak: with `--offline` set, the + /// download phase is skipped entirely, so even in dry-run mode a missing + /// artifact must NOT produce a "would-download" (verified) event. Before + /// the fix the event was recorded unconditionally on `dry_run && + /// missing > 0`, contradicting the human-readable path (which only warns). + #[tokio::test] + async fn offline_dry_run_does_not_record_download_event() { + let tmp = tempfile::tempdir().unwrap(); + let socket = make_socket(tmp.path()); + // No blob on disk → the manifest's afterHash is "missing". + let mut args = offline_args(tmp.path()); + args.common.dry_run = true; + + let (env, counts) = repair_inner(&args, &socket.join("manifest.json")) + .await + .expect("repair_inner"); + + assert!( + !has_download_event(&env), + "offline dry-run must not emit a download/would-download event; events={:?}", + env.events + ); + assert_eq!(counts.downloaded, 0); + assert_eq!(env.status, Status::Success); + } + + /// The online dry-run path *should* still preview the download — this + /// pins that the offline gate didn't over-correct. We can't hit the + /// network here, but `repair_inner`'s dry-run branch records the event + /// from the missing-artifact list without contacting the server. + #[tokio::test] + async fn online_dry_run_records_would_download_event() { + let tmp = tempfile::tempdir().unwrap(); + let socket = make_socket(tmp.path()); + let mut args = offline_args(tmp.path()); + args.common.offline = false; + args.common.dry_run = true; + + let (env, _counts) = repair_inner(&args, &socket.join("manifest.json")) + .await + .expect("repair_inner"); + + assert!( + has_download_event(&env), + "online dry-run must preview the download; events={:?}", + env.events + ); + } + + /// Regression for the dropped `bytes_freed`: cleanup of an orphan blob + /// must report the reclaimed byte count up through `RepairCounts` so the + /// telemetry `bytes_freed` field is non-zero (it was hardcoded to 0). + #[tokio::test] + async fn cleanup_reports_bytes_freed_and_removed_count() { + let tmp = tempfile::tempdir().unwrap(); + let socket = make_socket(tmp.path()); + write_blob(&socket, REFERENCED_HASH, b"kept"); + let orphan_hash = "deadbeef".repeat(8); // 64 hex chars + let orphan_bytes = b"orphaned content bytes"; + write_blob(&socket, &orphan_hash, orphan_bytes); + + let args = offline_args(tmp.path()); + let (env, counts) = repair_inner(&args, &socket.join("manifest.json")) + .await + .expect("repair_inner"); + + assert_eq!(counts.cleaned, 1, "one orphan should be cleaned"); + assert_eq!( + counts.bytes_freed, + orphan_bytes.len() as u64, + "bytes_freed must reflect the reclaimed orphan size" + ); + // The referenced blob survives; the orphan is gone. + assert!(socket.join("blobs").join(REFERENCED_HASH).exists()); + assert!(!socket.join("blobs").join(&orphan_hash).exists()); + // A Removed event is recorded for the swept orphan. + assert_eq!(env.summary.removed, 1); + } + + /// `--download-only` skips the cleanup pass, so an orphan blob survives + /// and `bytes_freed` stays zero. (Run without `--offline`, which is + /// mutually exclusive; the manifest's blob is present so the online + /// download phase has nothing to fetch and never touches the network.) + #[tokio::test] + async fn download_only_skips_cleanup() { + let tmp = tempfile::tempdir().unwrap(); + let socket = make_socket(tmp.path()); + write_blob(&socket, REFERENCED_HASH, b"kept"); + let orphan_hash = "feedface".repeat(8); + write_blob(&socket, &orphan_hash, b"orphan"); + + let mut args = offline_args(tmp.path()); + args.common.offline = false; + args.download_only = true; + + let (_env, counts) = repair_inner(&args, &socket.join("manifest.json")) + .await + .expect("repair_inner"); + + assert_eq!(counts.cleaned, 0, "download-only must skip cleanup"); + assert_eq!(counts.bytes_freed, 0); + assert!( + socket.join("blobs").join(&orphan_hash).exists(), + "orphan must survive when cleanup is skipped" + ); + } +} diff --git a/crates/socket-patch-cli/src/commands/rollback.rs b/crates/socket-patch-cli/src/commands/rollback.rs index 7401c92c..f2690703 100644 --- a/crates/socket-patch-cli/src/commands/rollback.rs +++ b/crates/socket-patch-cli/src/commands/rollback.rs @@ -113,6 +113,38 @@ fn verify_rollback_status_str(status: &VerifyRollbackStatus) -> &'static str { } } +/// True when every file the engine verified for this package is already +/// at its original (`beforeHash`) state — i.e. the rollback is a complete +/// no-op on disk. +/// +/// This is the rollback-side mirror of apply's `all_files_already_patched`. +/// The `!is_empty()` guard is essential: `Iterator::all` over an empty +/// slice is vacuously `true`. Without it a result with no verified files +/// — a zero-file patch record, or a result whose `files_verified` came +/// back empty — would be mislabeled "already original" and miscounted as +/// a no-op even though nothing matched `beforeHash`. +fn all_files_already_original(result: &RollbackResult) -> bool { + !result.files_verified.is_empty() + && result + .files_verified + .iter() + .all(|f| f.status == VerifyRollbackStatus::AlreadyOriginal) +} + +/// Number of packages that have files which actually need restoring, +/// used by the dry-run summary. Successful-but-already-original packages +/// are no-ops reported on their own line, so they are excluded here — +/// mirroring apply's dry-run split — to avoid double-counting them +/// against "can be rolled back". +fn can_rollback_count(results: &[RollbackResult]) -> usize { + let successful = results.iter().filter(|r| r.success).count(); + let already_original = results + .iter() + .filter(|r| r.success && all_files_already_original(r)) + .count(); + successful.saturating_sub(already_original) +} + fn result_to_json(result: &RollbackResult) -> serde_json::Value { serde_json::json!({ "purl": result.package_key, @@ -209,12 +241,7 @@ pub async fn run(args: RollbackArgs) -> i32 { .count(); let already_original_count = results .iter() - .filter(|r| { - r.success - && r.files_verified.iter().all(|f| { - f.status == VerifyRollbackStatus::AlreadyOriginal - }) - }) + .filter(|r| r.success && all_files_already_original(r)) .count(); let failed_count = results.iter().filter(|r| !r.success).count(); @@ -250,18 +277,16 @@ pub async fn run(args: RollbackArgs) -> i32 { .collect(); let already_original: Vec<_> = results .iter() - .filter(|r| { - r.success - && r.files_verified.iter().all(|f| { - f.status == VerifyRollbackStatus::AlreadyOriginal - }) - }) + .filter(|r| r.success && all_files_already_original(r)) .collect(); let failed: Vec<_> = results.iter().filter(|r| !r.success).collect(); if args.common.dry_run { println!("\nRollback verification complete:"); - let can_rollback = results.iter().filter(|r| r.success).count(); + // Exclude already-original packages — they are + // reported separately just below, so counting them + // here too would double-report each no-op. + let can_rollback = can_rollback_count(&results); println!(" {can_rollback} package(s) can be rolled back"); if !already_original.is_empty() { println!( @@ -685,4 +710,121 @@ mod tests { find_patches_to_rollback(&manifest, Some("pkg:pypi/six@1.16.0")); assert!(result.iter().all(|p| p.purl.contains("six@1.16.0"))); } + + // --- Summary-counting regressions ----------------------------------- + // + // These pin the rollback summary to the same contract apply uses: + // an "already original" result must have at least one verified file, + // and the dry-run "can be rolled back" count must not double-report + // packages that are already in their original state. + + use socket_patch_core::patch::rollback::VerifyRollbackResult; + + fn verified(status: VerifyRollbackStatus) -> VerifyRollbackResult { + VerifyRollbackResult { + file: "package/index.js".to_string(), + status, + message: None, + current_hash: None, + expected_hash: None, + target_hash: None, + } + } + + /// Build a `RollbackResult` from verification statuses and the list of + /// files reported rolled back. `success` defaults to whether every + /// verified file is Ready/AlreadyOriginal, matching the engine. + fn make_result( + verified_statuses: &[VerifyRollbackStatus], + rolled_back: &[&str], + ) -> RollbackResult { + let files_verified: Vec<_> = + verified_statuses.iter().cloned().map(verified).collect(); + let success = files_verified.iter().all(|f| { + f.status == VerifyRollbackStatus::Ready + || f.status == VerifyRollbackStatus::AlreadyOriginal + }); + RollbackResult { + package_key: "pkg:npm/foo@1.0.0".to_string(), + package_path: "/tmp/foo".to_string(), + success, + files_verified, + files_rolled_back: rolled_back.iter().map(|s| s.to_string()).collect(), + error: None, + } + } + + #[test] + fn all_files_already_original_true_when_every_file_matches() { + let r = make_result( + &[ + VerifyRollbackStatus::AlreadyOriginal, + VerifyRollbackStatus::AlreadyOriginal, + ], + &[], + ); + assert!(all_files_already_original(&r)); + } + + #[test] + fn all_files_already_original_false_when_any_file_differs() { + let r = make_result( + &[ + VerifyRollbackStatus::AlreadyOriginal, + VerifyRollbackStatus::Ready, + ], + &[], + ); + assert!(!all_files_already_original(&r)); + } + + /// Regression: `Iterator::all` over an empty slice is vacuously true. + /// A successful result with no verified files (a zero-file patch + /// record) must NOT be reported as "already original" — the + /// `!is_empty()` guard enforces this, matching apply. + #[test] + fn all_files_already_original_false_when_no_verified_files() { + let r = make_result(&[], &[]); + assert!(r.files_verified.is_empty()); + assert!(r.success); + assert!(!all_files_already_original(&r)); + } + + /// Regression: the dry-run "can be rolled back" count must exclude + /// already-original packages, which are reported on their own line. + /// Otherwise each no-op is double-counted (once as can-rollback, once + /// as already-original). + #[test] + fn can_rollback_count_excludes_already_original() { + let results = vec![ + // Genuinely needs restoring. + make_result(&[VerifyRollbackStatus::Ready], &[]), + // No-op: already at beforeHash. + make_result(&[VerifyRollbackStatus::AlreadyOriginal], &[]), + // Mixed → still needs restoring. + make_result( + &[ + VerifyRollbackStatus::Ready, + VerifyRollbackStatus::AlreadyOriginal, + ], + &[], + ), + // Failed (e.g. HashMismatch) → not counted as rollbackable. + make_result(&[VerifyRollbackStatus::HashMismatch], &[]), + ]; + // 2 successful non-no-op packages; the already-original one is + // excluded and the failed one was never successful. + assert_eq!(can_rollback_count(&results), 2); + } + + /// A summary made entirely of no-ops reports zero rollbackable + /// packages (and `saturating_sub` keeps it from underflowing). + #[test] + fn can_rollback_count_all_already_original_is_zero() { + let results = vec![ + make_result(&[VerifyRollbackStatus::AlreadyOriginal], &[]), + make_result(&[VerifyRollbackStatus::AlreadyOriginal], &[]), + ]; + assert_eq!(can_rollback_count(&results), 0); + } } diff --git a/crates/socket-patch-cli/src/commands/scan.rs b/crates/socket-patch-cli/src/commands/scan.rs index bf693d4b..9279c83b 100644 --- a/crates/socket-patch-cli/src/commands/scan.rs +++ b/crates/socket-patch-cli/src/commands/scan.rs @@ -18,7 +18,9 @@ use crate::args::{apply_env_toggles, GlobalArgs}; use crate::ecosystem_dispatch::crawl_all_ecosystems; use crate::output::{color, confirm, format_severity, stderr_is_tty, stdout_is_tty}; -use super::get::{download_and_apply_patches, select_patches, DownloadParams}; +use super::get::{ + download_and_apply_patches, select_patches, truncate_with_ellipsis, DownloadParams, +}; const DEFAULT_BATCH_SIZE: usize = 100; @@ -214,6 +216,29 @@ pub(crate) fn detect_updates( updates } +/// Collect the deduplicated CVE and GHSA identifiers across every patch of +/// a package, for the scan table's VULNERABILITIES column. CVEs are listed +/// before GHSAs and each group is sorted, so the rendered output is stable — +/// the per-patch ID lists and set-based dedup are otherwise nondeterministic +/// in order. Pure / no I/O so it's unit-testable. +pub(crate) fn collect_vuln_ids(pkg: &BatchPackagePatches) -> Vec { + let mut cves: HashSet = HashSet::new(); + let mut ghsas: HashSet = HashSet::new(); + for patch in &pkg.patches { + for cve in &patch.cve_ids { + cves.insert(cve.clone()); + } + for ghsa in &patch.ghsa_ids { + ghsas.insert(ghsa.clone()); + } + } + let mut cves: Vec = cves.into_iter().collect(); + cves.sort(); + let mut ghsas: Vec = ghsas.into_iter().collect(); + ghsas.sort(); + cves.into_iter().chain(ghsas).collect() +} + #[derive(Args)] pub struct ScanArgs { #[command(flatten)] @@ -748,12 +773,10 @@ pub async fn run(args: ScanArgs) -> i32 { println!("{}", "=".repeat(100)); for pkg in &all_packages_with_patches { - let max_purl_len = 40; - let display_purl = if pkg.purl.len() > max_purl_len { - format!("{}...", &pkg.purl[..max_purl_len - 3]) - } else { - pkg.purl.clone() - }; + // Char-safe truncation: a byte slice (`&pkg.purl[..37]`) panics + // when the cut lands mid-codepoint. PURLs can carry non-ASCII + // names/qualifiers, so route through the shared helper. + let display_purl = truncate_with_ellipsis(&pkg.purl, 40); let pkg_free = pkg.patches.iter().filter(|p| p.tier == "free").count(); let pkg_paid = pkg.patches.iter().filter(|p| p.tier == "paid").count(); @@ -776,18 +799,9 @@ pub async fn run(args: ScanArgs) -> i32 { .min_by_key(|s| severity_order(s)) .unwrap_or("unknown"); - // Collect vuln IDs - let mut all_cves = HashSet::new(); - let mut all_ghsas = HashSet::new(); - for patch in &pkg.patches { - for cve in &patch.cve_ids { - all_cves.insert(cve.clone()); - } - for ghsa in &patch.ghsa_ids { - all_ghsas.insert(ghsa.clone()); - } - } - let vuln_ids: Vec<_> = all_cves.into_iter().chain(all_ghsas).collect(); + // Collect vuln IDs (deterministic: deduped, CVEs then GHSAs, + // each group sorted — see collect_vuln_ids). + let vuln_ids = collect_vuln_ids(pkg); let vuln_str = if vuln_ids.len() > 2 { format!( "{} (+{})", @@ -960,11 +974,9 @@ pub async fn run(args: ScanArgs) -> i32 { let sev_display = highest_severity.unwrap_or("unknown"); let sev_colored = format_severity(sev_display, use_color); - let desc = if patch.description.len() > 72 { - format!("{}...", &patch.description[..69]) - } else { - patch.description.clone() - }; + // Char-safe: descriptions come straight from the API and routinely + // contain non-ASCII text; a `&desc[..69]` byte slice would panic. + let desc = truncate_with_ellipsis(&patch.description, 72); println!( " {} [{}] {}", @@ -978,11 +990,9 @@ pub async fn run(args: ScanArgs) -> i32 { // Show per-vulnerability summaries for vuln in patch.vulnerabilities.values() { if !vuln.summary.is_empty() { - let summary = if vuln.summary.len() > 76 { - format!("{}...", &vuln.summary[..73]) - } else { - vuln.summary.clone() - }; + // Char-safe: vulnerability summaries are API-sourced free + // text; a `&summary[..73]` byte slice would panic mid-codepoint. + let summary = truncate_with_ellipsis(&vuln.summary, 76); let cve_label = if vuln.cves.is_empty() { String::new() } else { @@ -1287,4 +1297,118 @@ mod tests { let out = detect_prunable(&m, &scanned(&[])); assert_eq!(out.len(), 2, "all variants of a gone package should prune"); } + + // ---- collect_vuln_ids -------------------------------------------------- + + /// Build a single-patch package whose patch carries the given CVE and + /// GHSA identifier lists. + fn batch_with_vulns(purl: &str, cves: &[&str], ghsas: &[&str]) -> BatchPackagePatches { + BatchPackagePatches { + purl: purl.to_string(), + patches: vec![BatchPatchInfo { + uuid: "uuid".to_string(), + purl: purl.to_string(), + tier: "free".to_string(), + cve_ids: cves.iter().map(|s| (*s).to_string()).collect(), + ghsa_ids: ghsas.iter().map(|s| (*s).to_string()).collect(), + severity: None, + title: String::new(), + }], + } + } + + #[test] + fn collect_vuln_ids_empty_when_no_vulns() { + let pkg = batch_with_vulns("pkg:npm/foo@1.0", &[], &[]); + assert!(collect_vuln_ids(&pkg).is_empty()); + } + + #[test] + fn collect_vuln_ids_lists_cves_before_ghsas_each_sorted() { + // Deliberately unsorted input; output must be CVEs (sorted) then + // GHSAs (sorted) so the rendered table column is deterministic. + let pkg = batch_with_vulns( + "pkg:npm/foo@1.0", + &["CVE-2024-2", "CVE-2024-1"], + &["GHSA-zzzz-zzzz-zzzz", "GHSA-aaaa-aaaa-aaaa"], + ); + assert_eq!( + collect_vuln_ids(&pkg), + vec![ + "CVE-2024-1".to_string(), + "CVE-2024-2".to_string(), + "GHSA-aaaa-aaaa-aaaa".to_string(), + "GHSA-zzzz-zzzz-zzzz".to_string(), + ], + ); + } + + #[test] + fn collect_vuln_ids_dedups_across_patches() { + // The same CVE appears on two patches of one package; it must be + // reported once. + let pkg = BatchPackagePatches { + purl: "pkg:npm/foo@1.0".to_string(), + patches: vec![ + BatchPatchInfo { + uuid: "u1".to_string(), + purl: "pkg:npm/foo@1.0".to_string(), + tier: "free".to_string(), + cve_ids: vec!["CVE-2024-1".to_string()], + ghsa_ids: vec![], + severity: None, + title: String::new(), + }, + BatchPatchInfo { + uuid: "u2".to_string(), + purl: "pkg:npm/foo@1.0".to_string(), + tier: "free".to_string(), + cve_ids: vec!["CVE-2024-1".to_string()], + ghsa_ids: vec!["GHSA-aaaa-aaaa-aaaa".to_string()], + severity: None, + title: String::new(), + }, + ], + }; + assert_eq!( + collect_vuln_ids(&pkg), + vec![ + "CVE-2024-1".to_string(), + "GHSA-aaaa-aaaa-aaaa".to_string(), + ], + ); + } + + // ---- truncate_with_ellipsis (scan's display columns) ------------------- + // scan.rs renders PURLs, descriptions, and vulnerability summaries — all + // API-sourced and potentially non-ASCII — into fixed-width columns. These + // pin scan's use of the char-safe helper; a raw `&s[..n]` byte slice + // would panic when the cut lands mid-codepoint. + + #[test] + fn truncate_multibyte_purl_does_not_panic() { + // 30 three-byte chars (90 bytes, 30 chars). The old purl path sliced + // `&purl[..37]` once `len() > 40`; byte 37 splits a codepoint here. + let purl = format!("pkg:npm/{}", "日".repeat(30)); + let out = truncate_with_ellipsis(&purl, 40); + assert!(out.chars().count() <= 40); + } + + #[test] + fn truncate_multibyte_description_truncates_on_char_boundary() { + // 100 two-byte chars; description column truncates at 72. + let desc = "é".repeat(100); + let out = truncate_with_ellipsis(&desc, 72); + assert_eq!(out.chars().count(), 72); + assert!(out.ends_with("...")); + } + + #[test] + fn truncate_multibyte_summary_truncates_on_char_boundary() { + // Summary column truncates at 76. + let summary = "—".repeat(100); // em dash, 3 bytes each + let out = truncate_with_ellipsis(&summary, 76); + assert_eq!(out.chars().count(), 76); + assert!(out.ends_with("...")); + } } diff --git a/crates/socket-patch-cli/src/commands/setup.rs b/crates/socket-patch-cli/src/commands/setup.rs index 904168c6..5a42cfdc 100644 --- a/crates/socket-patch-cli/src/commands/setup.rs +++ b/crates/socket-patch-cli/src/commands/setup.rs @@ -155,12 +155,18 @@ pub async fn run(args: SetupArgs) -> i32 { } if to_update.is_empty() { + // Nothing to update — but that can mean two very different things: + // every file is already configured (a clean exit 0), or some files + // failed to process (e.g. malformed JSON). Errors must surface with + // an honest status and a non-zero exit; otherwise a parse failure is + // silently reported as "already configured" and CI reads it as success. + let errs = errors.len(); if args.common.json { println!("{}", serde_json::to_string_pretty(&serde_json::json!({ - "status": "already_configured", + "status": if errs > 0 { "error" } else { "already_configured" }, "updated": 0, "alreadyConfigured": already_configured.len(), - "errors": errors.len(), + "errors": errs, "files": preview_results.iter().map(|r| { serde_json::json!({ "path": r.path, @@ -173,10 +179,15 @@ pub async fn run(args: SetupArgs) -> i32 { }) }).collect::>(), })).unwrap()); + } else if errs > 0 { + // Individual errors were already listed in the preview above. + println!( + "No files were updated; {errs} file(s) could not be processed (see errors above)." + ); } else { println!("All package.json files are already configured with socket-patch!"); } - return 0; + return if errs > 0 { 1 } else { 0 }; } // If not dry-run, ask for confirmation @@ -283,7 +294,9 @@ pub async fn run(args: SetupArgs) -> i32 { println!(" {errs} error(s)"); } } - 0 + // Mirror the non-dry-run path: an unprocessable package.json is a + // failure regardless of dry-run, so it must yield a non-zero exit. + if errs > 0 { 1 } else { 0 } } } diff --git a/crates/socket-patch-cli/src/commands/unlock.rs b/crates/socket-patch-cli/src/commands/unlock.rs index fab3c13b..e53c1217 100644 --- a/crates/socket-patch-cli/src/commands/unlock.rs +++ b/crates/socket-patch-cli/src/commands/unlock.rs @@ -50,12 +50,22 @@ pub async fn run(args: UnlockArgs) -> i32 { // holding a lock that doesn't exist). Useful for fresh repos // where the operator wants to confirm no stale state remains. if !socket_dir.exists() { - // No lock to inspect → was_held=false, released matches whether - // the user asked for --release (no file existed to remove). - track_patch_unlocked(false, args.release, api_token.as_deref(), org_slug.as_deref()).await; + // No lock to inspect → was_held=false. Nothing existed to + // remove, so `released` is false regardless of whether the + // user passed --release. Telemetry and the emitted envelope + // must agree on this. + track_patch_unlocked(false, false, api_token.as_deref(), org_slug.as_deref()).await; return emit_free(args.common.json, &lock_file, false, args.release); } + // Snapshot whether a lock file already exists *before* acquiring. + // `acquire` opens the file with `create(true)`, so after the call + // the file always exists — even when the operator's tree was + // clean. To honestly report whether `--release` removed a + // pre-existing leftover (vs. a file the probe itself just + // created), we have to capture this now. + let lock_existed = lock_file.exists(); + match acquire(&socket_dir, Duration::ZERO) { Ok(guard) => { // We successfully claimed the lock — nobody else holds @@ -65,16 +75,29 @@ pub async fn run(args: UnlockArgs) -> i32 { if args.release { match std::fs::remove_file(&lock_file) { + // `remove_file` here almost always returns `Ok` + // (the probe's `acquire` ensured the file exists), + // so we can't infer from it whether a real leftover + // was present — `lock_existed` is the source of + // truth for that. We still delete the file (the + // operator asked for a clean slate), but only claim + // we "released" something when a lock file was there + // before we probed. Ok(()) => { - track_patch_unlocked(false, true, api_token.as_deref(), org_slug.as_deref()) - .await; - emit_free(args.common.json, &lock_file, true, true) + track_patch_unlocked( + false, + lock_existed, + api_token.as_deref(), + org_slug.as_deref(), + ) + .await; + emit_free(args.common.json, &lock_file, lock_existed, true) } Err(e) if e.kind() == std::io::ErrorKind::NotFound => { // The file was never created (e.g. socket // dir existed but no run has acquired the // lock yet). Treat as success. - track_patch_unlocked(false, true, api_token.as_deref(), org_slug.as_deref()) + track_patch_unlocked(false, false, api_token.as_deref(), org_slug.as_deref()) .await; emit_free(args.common.json, &lock_file, false, true) } @@ -252,6 +275,26 @@ mod tests { ); } + /// `--release` against a clean `.socket/` (no pre-existing lock + /// file) succeeds, and does not leave behind the file that the + /// probe's `acquire` created on demand. Guards the regression + /// where the probe-created file masqueraded as a released + /// leftover. + #[tokio::test] + async fn run_release_cleans_up_probe_created_file() { + let dir = tempfile::tempdir().unwrap(); + let socket_dir = dir.path().join(".socket"); + std::fs::create_dir_all(&socket_dir).unwrap(); + assert!(!socket_dir.join("apply.lock").exists()); + + let code = run(args_in(dir.path(), true)).await; + assert_eq!(code, 0); + assert!( + !socket_dir.join("apply.lock").exists(), + "--release must not leave a probe-created lock file behind" + ); + } + /// `--release` against a HELD lock refuses (exit 1), file stays. #[tokio::test] async fn run_refuses_release_when_held() { diff --git a/crates/socket-patch-cli/src/commands/vex.rs b/crates/socket-patch-cli/src/commands/vex.rs index f8fbb3f1..f3c38ce3 100644 --- a/crates/socket-patch-cli/src/commands/vex.rs +++ b/crates/socket-patch-cli/src/commands/vex.rs @@ -26,7 +26,7 @@ use socket_patch_core::vex::{ }; use crate::args::{apply_env_toggles, GlobalArgs}; -use crate::ecosystem_dispatch::{find_packages_for_purls, partition_purls}; +use crate::ecosystem_dispatch::{find_packages_for_rollback, partition_purls}; use crate::json_envelope::{ Command, Envelope, EnvelopeError, PatchAction, PatchEvent, }; @@ -270,9 +270,20 @@ async fn resolve_package_paths( cwd: args.common.cwd.clone(), global: args.common.global, global_prefix: args.common.global_prefix.clone(), - batch_size: 0, // unused for find_packages_for_purls + batch_size: 0, // unused for find_packages_for_rollback }; - find_packages_for_purls(&partitioned, &crawler_options, args.common.silent).await + // Use the rollback (qualified-aware) resolver, NOT + // `find_packages_for_purls`. Release-variant ecosystems + // (PyPI / RubyGems / Maven) key the manifest by *qualified* PURLs + // (`?artifact_id=`, `?platform=`, `?classifier=&ext=`), but the + // crawler only knows the *base* PURL. `find_packages_for_purls` + // would key the result map by the base PURL, so the qualified + // lookups in `vex::applied_patches` would all miss and every + // PyPI/Gem/Maven patch would be silently dropped from the VEX doc + // as `package_not_found`. The rollback variant fans each base path + // back out to every qualified manifest PURL — the same mapping the + // manifest was written with (`get` uses the same resolver). + find_packages_for_rollback(&partitioned, &crawler_options, args.common.silent).await } fn emit_envelope_error(args: &VexArgs, code: &str, message: &str) { diff --git a/crates/socket-patch-cli/src/ecosystem_dispatch.rs b/crates/socket-patch-cli/src/ecosystem_dispatch.rs index f18da6a5..eedf8701 100644 --- a/crates/socket-patch-cli/src/ecosystem_dispatch.rs +++ b/crates/socket-patch-cli/src/ecosystem_dispatch.rs @@ -452,6 +452,210 @@ pub async fn crawl_all_ecosystems( mod tests { use super::*; + /// Build a `CrawledPackage` keyed by `purl` whose `path` encodes the + /// supplied directory, for exercising the merge helpers in isolation. + fn pkg(purl: &str, path: &str) -> CrawledPackage { + CrawledPackage { + name: "n".to_string(), + version: "v".to_string(), + namespace: None, + purl: purl.to_string(), + path: PathBuf::from(path), + } + } + + fn packages(entries: &[(&str, &str)]) -> HashMap { + entries + .iter() + .map(|(purl, path)| (purl.to_string(), pkg(purl, path))) + .collect() + } + + // ---- merge_first_wins ------------------------------------------------- + + #[test] + fn merge_first_wins_inserts_crawler_keyed_purls() { + let mut out: HashMap = HashMap::new(); + merge_first_wins( + &mut out, + &[], + packages(&[("pkg:npm/foo@1.0", "/a"), ("pkg:npm/bar@2.0", "/b")]), + ); + assert_eq!(out.len(), 2); + assert_eq!(out.get("pkg:npm/foo@1.0"), Some(&PathBuf::from("/a"))); + assert_eq!(out.get("pkg:npm/bar@2.0"), Some(&PathBuf::from("/b"))); + } + + #[test] + fn merge_first_wins_keeps_first_path_across_calls() { + // Simulates the macro calling on_match once per discovered path: + // the first path that yields a given PURL wins. + let mut out: HashMap = HashMap::new(); + merge_first_wins(&mut out, &[], packages(&[("pkg:npm/foo@1.0", "/first")])); + merge_first_wins(&mut out, &[], packages(&[("pkg:npm/foo@1.0", "/second")])); + assert_eq!(out.get("pkg:npm/foo@1.0"), Some(&PathBuf::from("/first"))); + } + + #[test] + fn merge_first_wins_ignores_purls_arg() { + // The `purls` slice must not influence first-wins merging — only + // the crawler-returned keys matter. + let mut out: HashMap = HashMap::new(); + let unrelated = vec!["pkg:npm/unrelated@9.9".to_string()]; + merge_first_wins(&mut out, &unrelated, packages(&[("pkg:npm/foo@1.0", "/a")])); + assert_eq!(out.len(), 1); + assert!(out.contains_key("pkg:npm/foo@1.0")); + } + + // ---- merge_qualified -------------------------------------------------- + + #[test] + fn merge_qualified_fans_base_out_to_every_variant() { + // Crawler is queried with the base PURL and returns it keyed to a + // single install dir; every caller-supplied qualified variant that + // strips to that base must map to the same path. + let mut out: HashMap = HashMap::new(); + let qualified = vec![ + "pkg:pypi/requests@2.28.0?artifact_id=wheel".to_string(), + "pkg:pypi/requests@2.28.0?artifact_id=sdist".to_string(), + ]; + merge_qualified( + &mut out, + &qualified, + packages(&[("pkg:pypi/requests@2.28.0", "/site-packages")]), + ); + assert_eq!(out.len(), 2); + assert_eq!( + out.get("pkg:pypi/requests@2.28.0?artifact_id=wheel"), + Some(&PathBuf::from("/site-packages")) + ); + assert_eq!( + out.get("pkg:pypi/requests@2.28.0?artifact_id=sdist"), + Some(&PathBuf::from("/site-packages")) + ); + } + + #[test] + fn merge_qualified_matches_bare_base_identifier() { + // A caller may supply the bare base PURL (no `?`); it strips to + // itself and must still map to the crawler result. + let mut out: HashMap = HashMap::new(); + let purls = vec!["pkg:pypi/requests@2.28.0".to_string()]; + merge_qualified( + &mut out, + &purls, + packages(&[("pkg:pypi/requests@2.28.0", "/sp")]), + ); + assert_eq!(out.get("pkg:pypi/requests@2.28.0"), Some(&PathBuf::from("/sp"))); + } + + #[test] + fn merge_qualified_does_not_cross_versions() { + // A variant of a *different* version must not be mapped to the + // crawler result for 2.28.0. + let mut out: HashMap = HashMap::new(); + let purls = vec!["pkg:pypi/requests@2.29.0?artifact_id=wheel".to_string()]; + merge_qualified( + &mut out, + &purls, + packages(&[("pkg:pypi/requests@2.28.0", "/sp")]), + ); + assert!(out.is_empty()); + } + + #[test] + fn merge_qualified_keeps_first_path_per_qualified_key() { + // First discovered path wins for a given qualified key, mirroring + // the per-path iteration in the scan macro. + let mut out: HashMap = HashMap::new(); + let purls = vec!["pkg:gem/nokogiri@1.16.5?platform=arm64-darwin".to_string()]; + merge_qualified(&mut out, &purls, packages(&[("pkg:gem/nokogiri@1.16.5", "/first")])); + merge_qualified(&mut out, &purls, packages(&[("pkg:gem/nokogiri@1.16.5", "/second")])); + assert_eq!( + out.get("pkg:gem/nokogiri@1.16.5?platform=arm64-darwin"), + Some(&PathBuf::from("/first")) + ); + } + + // ---- purls_override helpers ------------------------------------------ + + #[test] + fn dedup_qualified_purls_strips_and_dedupes() { + let purls = vec![ + "pkg:pypi/requests@2.28.0?artifact_id=wheel".to_string(), + "pkg:pypi/requests@2.28.0?artifact_id=sdist".to_string(), + "pkg:pypi/requests@2.28.0".to_string(), + ]; + let mut out = dedup_qualified_purls(&purls); + out.sort(); + assert_eq!(out, vec!["pkg:pypi/requests@2.28.0".to_string()]); + } + + #[test] + fn dedup_qualified_purls_keeps_distinct_bases() { + let purls = vec![ + "pkg:pypi/requests@2.28.0?artifact_id=wheel".to_string(), + "pkg:pypi/flask@3.0.0?artifact_id=wheel".to_string(), + ]; + let mut out = dedup_qualified_purls(&purls); + out.sort(); + assert_eq!( + out, + vec![ + "pkg:pypi/flask@3.0.0".to_string(), + "pkg:pypi/requests@2.28.0".to_string(), + ] + ); + } + + #[test] + fn passthrough_purls_is_identity() { + let purls = vec![ + "pkg:npm/foo@1.0".to_string(), + "pkg:npm/bar@2.0".to_string(), + ]; + assert_eq!(passthrough_purls(&purls), purls); + } + + /// The dedup/merge release-variant treatment must stay aligned with + /// `Ecosystem::supports_release_variants()`. If a new ecosystem flips + /// that predicate, this test flags that `dispatch_find` needs the + /// matching `dedup_qualified_purls` + `variant_merge` wiring. + #[test] + fn release_variant_predicate_matches_dispatch_expectations() { + assert!(Ecosystem::Pypi.supports_release_variants()); + assert!(Ecosystem::Gem.supports_release_variants()); + #[cfg(feature = "maven")] + assert!(Ecosystem::Maven.supports_release_variants()); + assert!(!Ecosystem::Npm.supports_release_variants()); + #[cfg(feature = "cargo")] + assert!(!Ecosystem::Cargo.supports_release_variants()); + #[cfg(feature = "golang")] + assert!(!Ecosystem::Golang.supports_release_variants()); + #[cfg(feature = "composer")] + assert!(!Ecosystem::Composer.supports_release_variants()); + #[cfg(feature = "nuget")] + assert!(!Ecosystem::Nuget.supports_release_variants()); + #[cfg(feature = "deno")] + assert!(!Ecosystem::Deno.supports_release_variants()); + } + + #[cfg(any(feature = "maven", feature = "nuget"))] + #[test] + fn env_truthy_accepts_one_and_true_case_insensitive() { + let key = "SOCKET_TEST_ENV_TRUTHY"; + std::env::set_var(key, "1"); + assert!(env_truthy(key)); + std::env::set_var(key, "TrUe"); + assert!(env_truthy(key)); + std::env::set_var(key, "0"); + assert!(!env_truthy(key)); + std::env::set_var(key, "yes"); + assert!(!env_truthy(key)); + std::env::remove_var(key); + assert!(!env_truthy(key)); + } + #[test] fn partition_purls_no_filter_single_npm() { let purls = vec!["pkg:npm/foo@1.0".to_string()]; @@ -471,7 +675,15 @@ mod tests { "pkg:cargo/baz@3.0".to_string(), ]; let map = partition_purls(&purls, None); - assert_eq!(map.len(), 3); + // `pkg:cargo/...` is only recognized when the `cargo` feature is + // compiled in; otherwise `Ecosystem::from_purl` drops it. Keep the + // expected length in step with the active feature set so this test + // is correct in both configurations. + #[cfg(feature = "cargo")] + let expected_len = 3; + #[cfg(not(feature = "cargo"))] + let expected_len = 2; + assert_eq!(map.len(), expected_len); assert_eq!( map.get(&Ecosystem::Npm), Some(&vec!["pkg:npm/foo@1.0".to_string()]) diff --git a/crates/socket-patch-cli/src/json_envelope.rs b/crates/socket-patch-cli/src/json_envelope.rs index 2af6d651..2db2ef26 100644 --- a/crates/socket-patch-cli/src/json_envelope.rs +++ b/crates/socket-patch-cli/src/json_envelope.rs @@ -10,8 +10,8 @@ //! "status": "success" | "partialFailure" | "error" | "noManifest" | ..., //! "dryRun": false, //! "events": [ { "action": "...", "purl": "...", ... }, ... ], -//! "summary": { "applied": 0, "downloaded": 0, ... }, -//! "error": null +//! "summary": { "applied": 0, "downloaded": 0, ... } +//! // "error": { "code": ..., "message": ... } — present only on failure //! } //! ``` //! @@ -95,8 +95,18 @@ impl Envelope { /// Append an event and bump the matching summary counter. Centralizes /// the "events list must agree with summary counts" invariant so per- /// command code can't drift. + /// + /// Recording a `Failed` event also marks the run as a partial failure + /// (unless it's already a hard `Error`), enforcing the `status` + /// invariant documented on [`Envelope::status`] here rather than + /// relying on every command to remember a follow-up + /// `mark_partial_failure` call. A run can never end up reporting + /// `Success` while carrying a `Failed` event. pub fn record(&mut self, event: PatchEvent) { self.summary.bump(event.action); + if matches!(event.action, PatchAction::Failed) { + self.mark_partial_failure(); + } self.events.push(event); } @@ -142,6 +152,11 @@ pub struct PatchEvent { /// many patches at once. #[serde(skip_serializing_if = "Option::is_none")] pub uuid: Option, + /// The UUID this patch replaced. Set only on `Updated` events so a + /// consumer can diff a manifest update — the new UUID lives in + /// `uuid`, the one it overwrote here. Omitted for every other action. + #[serde(skip_serializing_if = "Option::is_none")] + pub old_uuid: Option, /// Files touched by an `Applied` / `Verified` / `Removed` event. /// Empty for actions that don't operate on files (e.g. `Downloaded`). #[serde(skip_serializing_if = "Vec::is_empty")] @@ -175,6 +190,7 @@ impl PatchEvent { action, purl: Some(purl.into()), uuid: None, + old_uuid: None, files: Vec::new(), reason: None, error_code: None, @@ -190,6 +206,7 @@ impl PatchEvent { action, purl: None, uuid: None, + old_uuid: None, files: Vec::new(), reason: None, error_code: None, @@ -203,6 +220,14 @@ impl PatchEvent { self } + /// Attach the UUID this event's patch replaced. Use on `Updated` + /// events so consumers can diff against the prior manifest entry; + /// serializes as `oldUuid`. + pub fn with_old_uuid(mut self, old_uuid: impl Into) -> Self { + self.old_uuid = Some(old_uuid.into()); + self + } + pub fn with_files(mut self, files: Vec) -> Self { self.files = files; self @@ -255,9 +280,10 @@ pub struct PatchEventFile { /// What kind of thing happened to a patch. /// -/// Serializes to lowercase camelCase strings — e.g. `Applied` → `"applied"`, -/// `PaidRequired` → `"paidRequired"`. The full vocabulary is part of the -/// CLI contract; new variants are MINOR-safe but renames are MAJOR. +/// Serializes to camelCase strings — e.g. `Applied` → `"applied"`, +/// `Downloaded` → `"downloaded"` (a hypothetical multi-word variant would +/// lower-camel, e.g. `FooBar` → `"fooBar"`). The full vocabulary is part of +/// the CLI contract; new variants are MINOR-safe but renames are MAJOR. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)] #[serde(rename_all = "camelCase")] pub enum PatchAction { @@ -456,6 +482,60 @@ mod tests { assert_eq!(env.events.len(), 3); } + #[test] + fn recording_failed_event_marks_partial_failure() { + // The `status` invariant — "PartialFailure when any event has + // action = Failed" — must be enforced by `record` itself, not + // left to each command to remember. Otherwise a Success envelope + // can carry a `failed` event (and a non-zero `summary.failed`). + let mut env = Envelope::new(Command::Apply); + env.record(PatchEvent::new(PatchAction::Applied, "pkg:npm/foo@1.0.0")); + assert_eq!(env.status, Status::Success); + env.record( + PatchEvent::new(PatchAction::Failed, "pkg:npm/bar@2.0.0") + .with_error("apply_failed", "boom"), + ); + assert_eq!(env.status, Status::PartialFailure); + assert_eq!(env.summary.failed, 1); + } + + #[test] + fn recording_failed_event_does_not_demote_hard_error() { + // A prior hard error outranks the per-event partial failure that + // `record` raises — recording a Failed event must not downgrade + // Error to PartialFailure regardless of ordering. + let mut env = Envelope::new(Command::Apply); + env.mark_error(EnvelopeError::new("manifest_unreadable", "bad json")); + env.record( + PatchEvent::new(PatchAction::Failed, "pkg:npm/bar@2.0.0") + .with_error("apply_failed", "boom"), + ); + assert_eq!(env.status, Status::Error); + } + + #[test] + fn updated_event_carries_old_uuid() { + // The CLI contract promises `oldUuid` on `updated` events. The + // new UUID lives in `uuid`; the replaced one in `oldUuid`. + let event = PatchEvent::new(PatchAction::Updated, "pkg:npm/foo@1.0.0") + .with_uuid("uuid-new") + .with_old_uuid("uuid-old"); + let v: serde_json::Value = + serde_json::from_str(&serde_json::to_string(&event).unwrap()).unwrap(); + assert_eq!(v["action"], "updated"); + assert_eq!(v["uuid"], "uuid-new"); + assert_eq!(v["oldUuid"], "uuid-old"); + } + + #[test] + fn old_uuid_omitted_when_unset() { + // Non-Updated events must not leak an `oldUuid` key. + let event = PatchEvent::new(PatchAction::Applied, "pkg:npm/foo@1.0.0"); + let v: serde_json::Value = + serde_json::from_str(&serde_json::to_string(&event).unwrap()).unwrap(); + assert!(!v.as_object().unwrap().contains_key("oldUuid")); + } + #[test] fn skipped_event_omits_uuid_and_files() { let event = PatchEvent::new(PatchAction::Skipped, "pkg:npm/foo@1.0.0") diff --git a/crates/socket-patch-cli/src/lib.rs b/crates/socket-patch-cli/src/lib.rs index 0a16bbf0..dcb4871f 100644 --- a/crates/socket-patch-cli/src/lib.rs +++ b/crates/socket-patch-cli/src/lib.rs @@ -191,6 +191,27 @@ mod tests { assert!(!looks_like_uuid("----")); } + #[test] + fn looks_like_uuid_accepts_nil_uuid() { + // The all-zeros nil UUID is correctly shaped and all-hex. + assert!(looks_like_uuid("00000000-0000-0000-0000-000000000000")); + } + + #[test] + fn looks_like_uuid_rejects_surrounding_whitespace() { + // The predicate must not trim: a leading/trailing space makes the + // first/last group the wrong length (and the space is non-hex). + assert!(!looks_like_uuid(" 80630680-4da6-45f9-bba8-b888e0ffd58c")); + assert!(!looks_like_uuid("80630680-4da6-45f9-bba8-b888e0ffd58c ")); + } + + #[test] + fn looks_like_uuid_rejects_internal_space() { + // A space inside a group keeps the byte length right in one spot but + // fails the hex check — guards against byte-length-only acceptance. + assert!(!looks_like_uuid("8063068 -4da6-45f9-bba8-b888e0ffd58c")); + } + // ---------- parse_with_uuid_fallback ---------- const UUID: &str = "80630680-4da6-45f9-bba8-b888e0ffd58c"; @@ -250,6 +271,51 @@ mod tests { } } + #[test] + fn fallback_forwards_multiple_flags_in_order() { + // Every arg after the program name (UUID included) must be forwarded + // after the synthesized `get`, preserving order, so multiple flags + // all reach the rewritten command. + let cli = parse_with_uuid_fallback(argv(&["socket-patch", UUID, "--id", "--json"])) + .unwrap(); + match cli.command { + Commands::Get(args) => { + assert_eq!(args.identifier, UUID); + assert!(args.id, "--id should be forwarded to get"); + assert!(args.common.json, "--json should be forwarded to get"); + } + _ => panic!("expected Commands::Get"), + } + } + + #[test] + fn fallback_handles_no_args_without_panicking() { + // Only the program name is present (argv.len() == 1). The + // `argv.len() >= 2` guard must short-circuit before indexing argv[1], + // so this returns the original clap error rather than panicking. + let err = match parse_with_uuid_fallback(argv(&["socket-patch"])) { + Ok(_) => panic!("expected parse to fail without a subcommand"), + Err(e) => e, + }; + assert_eq!( + err.kind(), + clap::error::ErrorKind::DisplayHelpOnMissingArgumentOrSubcommand, + "bare invocation should surface clap's missing-subcommand help, not panic" + ); + } + + #[test] + fn fallback_rewrites_uppercase_uuid_end_to_end() { + // The shape check accepts uppercase; confirm the full fallback path + // (not just `looks_like_uuid`) rewrites an uppercase bare UUID to get. + const UPPER: &str = "80630680-4DA6-45F9-BBA8-B888E0FFD58C"; + let cli = parse_with_uuid_fallback(argv(&["socket-patch", UPPER])).unwrap(); + match cli.command { + Commands::Get(args) => assert_eq!(args.identifier, UPPER), + _ => panic!("expected Commands::Get"), + } + } + #[test] fn fallback_surfaces_original_error_when_rewrite_also_fails() { // UUID is valid-shaped so a rewrite is attempted, but `get` doesn't diff --git a/crates/socket-patch-cli/src/output.rs b/crates/socket-patch-cli/src/output.rs index b770e6cd..4bcd06d4 100644 --- a/crates/socket-patch-cli/src/output.rs +++ b/crates/socket-patch-cli/src/output.rs @@ -253,4 +253,34 @@ mod tests { fn confirm_skip_prompt_and_is_json_both_set_returns_default_yes() { assert!(confirm("?", true, true, true)); } + + // ---- select_one ---- + // + // Only the `is_json` branch is exercised here: it returns before reading + // stdin, so it is deterministic regardless of whether the test runs under + // a TTY. The non-TTY auto-select (`Ok(0)`) and the interactive + // `dialoguer` branches both depend on / consume the real stdin and would + // hang or vary by environment, so they are intentionally left to the e2e + // suite (see get.rs `select_patches` coverage). + + #[test] + fn select_one_json_mode_requires_explicit_selection() { + let opts = vec!["first".to_string(), "second".to_string()]; + match select_one("pick one", &opts, true) { + Err(SelectError::JsonModeNeedsExplicit) => {} + Err(SelectError::Cancelled) => panic!("json mode must not report Cancelled"), + Ok(idx) => panic!("json mode must not auto-select (got index {idx})"), + } + } + + #[test] + fn select_one_json_mode_ignores_options_contents() { + // Even with a single option, JSON mode must defer to an explicit + // `--id` rather than silently picking it. + let opts = vec!["only".to_string()]; + assert!(matches!( + select_one("pick", &opts, true), + Err(SelectError::JsonModeNeedsExplicit) + )); + } } diff --git a/crates/socket-patch-cli/tests/api_client_errors_e2e.rs b/crates/socket-patch-cli/tests/api_client_errors_e2e.rs index 056d22f1..f8621662 100644 --- a/crates/socket-patch-cli/tests/api_client_errors_e2e.rs +++ b/crates/socket-patch-cli/tests/api_client_errors_e2e.rs @@ -356,7 +356,11 @@ async fn repair_with_blob_404_marks_failure_in_summary() { .expect("run"); let code = out.status.code().unwrap_or(-1); let stdout = String::from_utf8_lossy(&out.stdout).to_string(); - assert_eq!(code, 0, "repair must exit 0 even with download failures; stdout={stdout}"); + assert_eq!( + code, 1, + "repair must exit non-zero when an artifact download fails so CI guarding on \ + the exit code doesn't treat a half-finished repair as success; stdout={stdout}" + ); let v: serde_json::Value = serde_json::from_str(stdout.trim()).expect("must be JSON"); // The repair envelope's summary tracks failures. diff --git a/crates/socket-patch-cli/tests/cli_global_args.rs b/crates/socket-patch-cli/tests/cli_global_args.rs index af24f247..2835311c 100644 --- a/crates/socket-patch-cli/tests/cli_global_args.rs +++ b/crates/socket-patch-cli/tests/cli_global_args.rs @@ -47,6 +47,8 @@ fn global_flag_cases() -> Vec<(&'static str, Option<&'static str>)> { ("--yes", None), ("--debug", None), ("--no-telemetry", None), + ("--break-lock", None), + ("--lock-timeout", Some("30")), ] } @@ -195,6 +197,8 @@ fn env_vars_populate_global_args() { ("SOCKET_SILENT", "true"), ("SOCKET_DRY_RUN", "true"), ("SOCKET_YES", "true"), + ("SOCKET_LOCK_TIMEOUT", "30"), + ("SOCKET_BREAK_LOCK", "true"), ("SOCKET_DEBUG", "true"), ("SOCKET_TELEMETRY_DISABLED", "true"), ]; @@ -234,6 +238,8 @@ fn env_vars_populate_global_args() { assert!(args.common.silent); assert!(args.common.dry_run); assert!(args.common.yes); + assert_eq!(args.common.lock_timeout, Some(30)); + assert!(args.common.break_lock); assert!(args.common.debug); assert!(args.common.no_telemetry); } else { @@ -350,3 +356,138 @@ fn bool_env_vars_reject_zero_and_falsey() { } } } + +/// Names of every `SOCKET_*` env var that `GlobalArgs` binds, so tests that +/// need a clean slate can save/clear/restore them in one place. +const GLOBAL_ENV_VARS: &[&str] = &[ + "SOCKET_CWD", + "SOCKET_MANIFEST_PATH", + "SOCKET_API_URL", + "SOCKET_API_TOKEN", + "SOCKET_ORG_SLUG", + "SOCKET_PROXY_URL", + "SOCKET_ECOSYSTEMS", + "SOCKET_DOWNLOAD_MODE", + "SOCKET_OFFLINE", + "SOCKET_GLOBAL", + "SOCKET_GLOBAL_PREFIX", + "SOCKET_JSON", + "SOCKET_VERBOSE", + "SOCKET_SILENT", + "SOCKET_DRY_RUN", + "SOCKET_YES", + "SOCKET_LOCK_TIMEOUT", + "SOCKET_BREAK_LOCK", + "SOCKET_DEBUG", + "SOCKET_TELEMETRY_DISABLED", +]; + +fn save_and_clear_global_env() -> Vec<(&'static str, Option)> { + let saved: Vec<(&'static str, Option)> = GLOBAL_ENV_VARS + .iter() + .map(|&k| (k, std::env::var(k).ok())) + .collect(); + for &k in GLOBAL_ENV_VARS { + std::env::remove_var(k); + } + saved +} + +fn restore_global_env(saved: Vec<(&'static str, Option)>) { + for (k, orig) in saved { + match orig { + Some(v) => std::env::set_var(k, v), + None => std::env::remove_var(k), + } + } +} + +/// Regression for the documented precedence (`CLI arg > env var > default`, +/// see the module header in `args.rs`): when both a CLI flag and its env var +/// are set, the CLI value must win. Covers a string field (`--api-url`) and a +/// bool field set on the CLI while the env says falsey. Env-only resolution is +/// asserted too so we know the env var really was live. +#[test] +#[serial_test::serial] +fn cli_arg_overrides_env_var() { + let saved = save_and_clear_global_env(); + + // String field: env set, CLI overrides. + std::env::set_var("SOCKET_API_URL", "https://env-api.example.com"); + let cli = Cli::try_parse_from([ + "socket-patch", + "list", + "--api-url", + "https://cli-api.example.com", + ]) + .expect("parse"); + let socket_patch_cli::Commands::List(args) = cli.command else { + panic!("expected List"); + }; + assert_eq!( + args.common.api_url, "https://cli-api.example.com", + "CLI --api-url must override SOCKET_API_URL" + ); + + // Sanity: with the CLI flag absent, the env value resolves through. + let cli = Cli::try_parse_from(["socket-patch", "list"]).expect("parse"); + let socket_patch_cli::Commands::List(args) = cli.command else { + panic!("expected List"); + }; + assert_eq!( + args.common.api_url, "https://env-api.example.com", + "with no CLI flag the env var must resolve through" + ); + + // Bool field: CLI `--offline` wins over a falsey env value. + std::env::set_var("SOCKET_OFFLINE", "0"); + let cli = Cli::try_parse_from(["socket-patch", "list", "--offline"]).expect("parse"); + let socket_patch_cli::Commands::List(args) = cli.command else { + panic!("expected List"); + }; + assert!( + args.common.offline, + "CLI --offline must win over SOCKET_OFFLINE=0" + ); + + restore_global_env(saved); +} + +/// Regression: with neither CLI flags nor env vars set, clap must populate the +/// documented production defaults (the `default_value = ".."` attributes). This +/// is the production path that `GlobalArgs::default()` deliberately does *not* +/// mirror for `api_url`/`proxy_url`, so it needs its own coverage — and +/// `api_client_overrides()` must therefore forward those concrete URLs. +#[test] +#[serial_test::serial] +fn production_defaults_populate_when_unset() { + let saved = save_and_clear_global_env(); + + let cli = Cli::try_parse_from(["socket-patch", "list"]).expect("parse"); + let socket_patch_cli::Commands::List(args) = cli.command else { + panic!("expected List"); + }; + let c = &args.common; + assert_eq!(c.cwd, std::path::PathBuf::from(".")); + assert_eq!(c.manifest_path, ".socket/manifest.json"); + assert_eq!(c.api_url, "https://api.socket.dev"); + assert_eq!(c.proxy_url, "https://patches-api.socket.dev"); + assert_eq!(c.download_mode, "diff"); + assert!(c.api_token.is_none()); + assert!(c.org.is_none()); + assert!(c.ecosystems.is_none()); + assert!(!c.offline && !c.global && !c.json && !c.verbose && !c.silent); + assert!(!c.dry_run && !c.yes && !c.break_lock && !c.debug && !c.no_telemetry); + assert!(c.lock_timeout.is_none()); + assert!(c.global_prefix.is_none()); + + // On the production path (unlike GlobalArgs::default()) the URLs are + // non-empty, so api_client_overrides must forward them. + let o = c.api_client_overrides(); + assert_eq!(o.api_url.as_deref(), Some("https://api.socket.dev")); + assert_eq!(o.proxy_url.as_deref(), Some("https://patches-api.socket.dev")); + assert!(o.api_token.is_none()); + assert!(o.org_slug.is_none()); + + restore_global_env(saved); +} diff --git a/crates/socket-patch-cli/tests/cli_parse_main.rs b/crates/socket-patch-cli/tests/cli_parse_main.rs index cea8a734..eddfa6d7 100644 --- a/crates/socket-patch-cli/tests/cli_parse_main.rs +++ b/crates/socket-patch-cli/tests/cli_parse_main.rs @@ -116,6 +116,22 @@ fn repair_subcommand_parses() { assert!(matches!(cli.command, Commands::Repair(_))); } +#[test] +fn unlock_subcommand_parses() { + // `unlock` is one of the two newest subcommands and the second-to-last + // arm in main.rs's dispatch match — keep its name + dispatch wiring + // covered alongside the older commands. + let cli = parse(&["socket-patch", "unlock"]).expect("unlock must parse with no positional"); + assert!(matches!(cli.command, Commands::Unlock(_))); +} + +#[test] +fn vex_subcommand_parses() { + // `vex` is the last arm in main.rs's dispatch match; lock its name in. + let cli = parse(&["socket-patch", "vex"]).expect("vex must parse with no positional"); + assert!(matches!(cli.command, Commands::Vex(_))); +} + // ---------- visible aliases ---------- #[test] diff --git a/crates/socket-patch-cli/tests/e2e_safety_cargo_build.rs b/crates/socket-patch-cli/tests/e2e_safety_cargo_build.rs index b66af6f3..1d30d9a1 100644 --- a/crates/socket-patch-cli/tests/e2e_safety_cargo_build.rs +++ b/crates/socket-patch-cli/tests/e2e_safety_cargo_build.rs @@ -569,55 +569,61 @@ fn apply_with_missing_files_field_reports_sidecar_fixup_failed() { ); } -/// Cargo sidecar write-error path: `.cargo-checksum.json` is -/// valid JSON (so `read_to_string` succeeds, parse succeeds, -/// update succeeds in memory) but the file is read-only, so the -/// final `tokio::fs::write` returns `EACCES`. The fixup wraps -/// that as `SidecarError::Io` and the boundary surfaces it as -/// `sidecar_fixup_failed` severity error. +/// Regression (read-only checksum file): a real Cargo registry/vendor +/// tree marks `.cargo-checksum.json` read-only (`0o444`) for tamper +/// detection. The sidecar must STILL rewrite it — the hardened +/// stage+rename write path relaxes the file's mode, swaps a fresh +/// inode in atomically, and restores `0o444` afterward. /// -/// Covers lines 94-99 of cargo.rs (the write `map_err`) — a -/// region the parse/read/no-files-field tests cannot reach. +/// Before the fix the bare in-place `tokio::fs::write` failed `EACCES` +/// here and surfaced a `sidecar_fixup_failed` error, leaving the +/// checksum stale-patched and the crate unbuildable in exactly the +/// real-world (read-only-registry) case the fixup exists to handle. /// -/// Skipped when running as root (chmod 0444 is bypassed by uid 0, -/// which collapses this test into the success path and produces a -/// false negative). On normal dev/CI the test fires fully. +/// Runs under any uid: even where the kernel grants root implicit +/// write, the success assertions (content rewritten, mode restored) +/// hold, so there is no root-skip false-negative to dodge. #[cfg(unix)] #[test] -fn apply_with_readonly_checksum_reports_sidecar_fixup_failed() { +fn apply_with_readonly_checksum_still_rewrites_it() { use std::os::unix::fs::PermissionsExt; - if uid_is_root() { - eprintln!("SKIP: chmod 0444 negative tests no-op as root"); - return; - } let root = tempfile::tempdir().unwrap(); let consumer = stage_consumer(root.path()); stage_socket_manifest(&consumer); - // Source file write doesn't touch the checksum, so locking the - // checksum down to 0444 (r--r--r--) only blocks the sidecar's - // final rewrite — exactly the path we want to exercise. + // Lock the checksum file down exactly as Cargo would for a + // registry/vendor source. let checksum = consumer.join("vendor/safety-fixture/.cargo-checksum.json"); - let mut perms = std::fs::metadata(&checksum).unwrap().permissions(); - perms.set_mode(0o444); - std::fs::set_permissions(&checksum, perms).unwrap(); + std::fs::set_permissions(&checksum, std::fs::Permissions::from_mode(0o444)).unwrap(); let (_code, stdout, _stderr) = run( &consumer, &["apply", "--json", "--cwd", consumer.to_str().unwrap()], ); - // Restore writable perms so tempdir cleanup can unlink. - let mut restore = std::fs::metadata(&checksum).unwrap().permissions(); - restore.set_mode(0o644); - let _ = std::fs::set_permissions(&checksum, restore); - // Patch landed — source file is in a writable subdir. assert_eq!( std::fs::read_to_string(consumer.join("vendor/safety-fixture/src/lib.rs")).unwrap(), PATCHED_LIB_RS, ); + // The read-only checksum was rewritten to match the patched + // source (raw SHA256, the cargo format). + let post: serde_json::Value = + serde_json::from_str(&std::fs::read_to_string(&checksum).unwrap()).unwrap(); + assert_eq!( + post["files"]["src/lib.rs"].as_str().unwrap(), + sha256_hex(PATCHED_LIB_RS.as_bytes()), + "checksum entry must reflect the patched source" + ); + + // The original `0o444` mode was restored bit-for-bit. + let mode = std::fs::metadata(&checksum).unwrap().permissions().mode() & 0o7777; + // Re-grant write so tempdir cleanup can unlink. + let _ = std::fs::set_permissions(&checksum, std::fs::Permissions::from_mode(0o644)); + assert_eq!(mode, 0o444, "checksum file must stay read-only after rewrite"); + + // The sidecar reports a successful rewrite — not a failure advisory. let env = parse_json_envelope(&stdout); let cargo = env["sidecars"] .as_array() @@ -625,34 +631,19 @@ fn apply_with_readonly_checksum_reports_sidecar_fixup_failed() { .iter() .find(|s| s["ecosystem"] == "cargo") .expect("cargo record"); - let advisory = cargo.get("advisory").expect("advisory"); - assert_eq!(advisory["code"], "sidecar_fixup_failed"); - assert_eq!(advisory["severity"], "error"); -} - -/// Helper: detect uid 0 without pulling in `libc`. Tests that rely -/// on chmod 0444 being honored must short-circuit under root -/// because the kernel grants uid 0 implicit write permission -/// regardless of mode bits. -/// -/// Uses `id -u` rather than a direct `getuid` syscall to avoid a -/// `libc` dev-dep just for this one detection. Falls back to -/// "not root" if `id` is missing or its output is garbled — better -/// to attempt the test (and possibly false-pass) than to skip it -/// silently because of a missing helper binary. -#[cfg(unix)] -fn uid_is_root() -> bool { - Command::new("id") - .arg("-u") - .output() - .ok() - .and_then(|o| { - String::from_utf8(o.stdout) - .ok() - .map(|s| s.trim().to_string()) - }) - .map(|s| s == "0") - .unwrap_or(false) + let rewrote = cargo["files"].as_array().is_some_and(|files| { + files + .iter() + .any(|f| f["path"] == ".cargo-checksum.json" && f["action"] == "rewritten") + }); + assert!( + rewrote, + "expected a rewritten .cargo-checksum.json file entry; got {cargo}" + ); + assert!( + cargo.get("advisory").map(|a| a.is_null()).unwrap_or(true), + "successful rewrite must not carry a failure advisory; got {cargo}" + ); } /// Third Malformed branch: when `.cargo-checksum.json` exists but diff --git a/crates/socket-patch-cli/tests/e2e_safety_internals.rs b/crates/socket-patch-cli/tests/e2e_safety_internals.rs index 1549254d..e7909c74 100644 --- a/crates/socket-patch-cli/tests/e2e_safety_internals.rs +++ b/crates/socket-patch-cli/tests/e2e_safety_internals.rs @@ -231,11 +231,13 @@ async fn cow_symlink_to_missing_target_propagates_read_error() { assert_eq!(err.kind(), std::io::ErrorKind::NotFound); } -/// Symlink branch remove-fails arm (cow.rs:70): when the symlink -/// itself carries the `uchg` (user-immutable) flag, `read(path)` -/// follows the link and succeeds, but `remove_file(path)` cannot -/// unlink the immutable symlink. The error propagates before the -/// stage-rename step. +/// Symlink branch rename-fails arm: when the symlink itself carries +/// the `uchg` (user-immutable) flag, `read(path)` follows the link +/// and succeeds and the stage file is created fine, but the atomic +/// `rename(stage, path)` over the immutable symlink is refused with +/// EPERM. The error propagates, the stage is cleaned up, and — the +/// key invariant — the original symlink is left intact (CoW never +/// destructively unlinks before the replacement is committed). /// /// macOS-only: BSD `chflags -h` is the only userspace tool that /// can set flags on a symlink without dereferencing. Linux's @@ -278,8 +280,24 @@ async fn cow_symlink_unremovable_propagates_remove_error() { // Clear so tempdir cleanup can recurse. let _ = Command::new("chflags").arg("-h").arg("nouchg").arg(&link).status(); - let err = result.expect_err("remove of immutable symlink must propagate EPERM"); + let err = result.expect_err("rename over immutable symlink must propagate EPERM"); assert_ne!(err.kind(), std::io::ErrorKind::NotFound); + + // Regression (atomicity): the failed break must NOT have destroyed + // the original. The path still exists and is still the symlink. + let meta = std::fs::symlink_metadata(&link) + .expect("failed CoW must leave the original symlink in place"); + assert!( + meta.file_type().is_symlink(), + "original symlink must survive a failed break, got {meta:?}" + ); + // And no stage litter left behind. + let leftover: Vec<_> = std::fs::read_dir(tmp.path()) + .unwrap() + .filter_map(|e| e.ok()) + .filter(|e| e.file_name().to_string_lossy().starts_with(".socket-cow-")) + .collect(); + assert!(leftover.is_empty(), "stage litter left behind: {leftover:?}"); } /// Hardlink branch read-fails arm (cow.rs:84): a hardlinked file @@ -380,20 +398,22 @@ async fn cow_stage_write_failure_propagates() { assert_ne!(err.kind(), std::io::ErrorKind::NotFound); } -/// Symlink-branch write_via_stage_rename failure arm (cow.rs:71): -/// after `read(symlink)` and `remove_file(symlink)` both succeed, -/// the subsequent `write_via_stage_rename` fails to create its -/// `.socket-cow-*` stage file because the parent directory has a -/// macOS ACL that denies `add_file` while still allowing -/// `delete_child` — a state POSIX mode bits can't express -/// (write perm on a dir is monolithic for create+delete). +/// Symlink-branch `write_via_stage_rename` stage-create failure arm: +/// after `read(symlink)` succeeds, `write_via_stage_rename` fails to +/// create its `.socket-cow-*` stage file because the parent directory +/// has a macOS ACL that denies `add_file` while still allowing +/// `delete_child` — a state POSIX mode bits can't express (write perm +/// on a dir is monolithic for create+delete). /// -/// This is the only filesystem state that lets remove succeed but -/// the next write fail in the same parent dir, which is required -/// to reach the `?` Err arm on cow.rs:71. macOS-only because BSD -/// extended ACLs (`chmod +a`) are the only userspace mechanism -/// for this kind of fine-grained denial. Linux's POSIX.1e ACLs -/// can't split create-vs-delete on directories. +/// This same ACL is what made the old, destructive flow dangerous: +/// the previous code did `remove_file(symlink)` (a `delete_child`, +/// which the ACL *allows*) BEFORE the stage write, so the link was +/// gone the instant the denied stage create failed — destroying the +/// package file with no rollback. The current flow stages first and +/// never pre-unlinks, so this asserts the original symlink survives. +/// macOS-only because BSD extended ACLs (`chmod +a`) are the only +/// userspace mechanism for this kind of fine-grained denial; Linux's +/// POSIX.1e ACLs can't split create-vs-delete on directories. #[cfg(target_os = "macos")] #[tokio::test] async fn cow_symlink_stage_write_failure_propagates() { @@ -439,10 +459,26 @@ async fn cow_symlink_stage_write_failure_propagates() { let _ = Command::new("chmod").arg("-a#").arg("0").arg(&dir).status(); let err = result.expect_err( - "with deny-add_file ACL, write_via_stage_rename's stage create must fail \ - AFTER read + remove succeeded, hitting cow.rs:71's `?` Err arm", + "with deny-add_file ACL, write_via_stage_rename's stage create must fail, \ + surfacing the stage-write `?` Err arm", ); assert_ne!(err.kind(), std::io::ErrorKind::NotFound); + + // Regression (atomicity / rollback): the old code unlinked the + // symlink before this denied stage write, leaving the package file + // gone. The current code stages first, so the original symlink must + // still be present after the failure. + let meta = std::fs::symlink_metadata(&link) + .expect("failed CoW must leave the original symlink in place"); + assert!( + meta.file_type().is_symlink(), + "original symlink must survive a failed stage write, got {meta:?}" + ); + assert_eq!( + std::fs::read(&link).unwrap(), + b"shared bytes", + "symlink must still resolve to its original target content" + ); } /// `break_hardlink_if_needed` failure-cleanup arm (cow.rs:116-120): diff --git a/crates/socket-patch-cli/tests/e2e_safety_unlock.rs b/crates/socket-patch-cli/tests/e2e_safety_unlock.rs index 65c10be3..0360a5c2 100644 --- a/crates/socket-patch-cli/tests/e2e_safety_unlock.rs +++ b/crates/socket-patch-cli/tests/e2e_safety_unlock.rs @@ -95,6 +95,52 @@ fn unlock_release_deletes_lock_file_when_free() { ); } +/// `unlock --release` against a `.socket/` directory that has no +/// lock file reports `released: false` — there was nothing to +/// release. Regression test: `acquire` creates the lock file on +/// demand, so a naive `remove_file().is_ok()` check would wrongly +/// claim it released a pre-existing leftover. The probe must not +/// leave a lock file behind either (clean slate). +#[test] +fn unlock_release_reports_not_released_when_no_lock_file() { + let dir = tempfile::tempdir().unwrap(); + let socket_dir = dir.path().join(".socket"); + std::fs::create_dir_all(&socket_dir).unwrap(); + let lock_file = socket_dir.join("apply.lock"); + assert!(!lock_file.exists(), "pre-stage: no lock file expected"); + + let (code, stdout, stderr) = run(dir.path(), &["unlock", "--json", "--release"]); + assert_eq!(code, 0, "stdout={stdout}\nstderr={stderr}"); + let env = parse_json_envelope(&stdout); + assert_eq!(json_string(&env, "status"), Some("free")); + assert_eq!( + env.get("released").and_then(|v| v.as_bool()), + Some(false), + "nothing pre-existed, so released must be false: {stdout}" + ); + assert!( + !lock_file.exists(), + "--release should not leave a probe-created lock file behind" + ); +} + +/// `unlock --release` against a completely fresh project (no +/// `.socket/` at all) reports `released: false` and exits 0. +/// Mirrors the missing-dir branch's contract. +#[test] +fn unlock_release_reports_not_released_when_no_socket_dir() { + let dir = tempfile::tempdir().unwrap(); + let (code, stdout, stderr) = run(dir.path(), &["unlock", "--json", "--release"]); + assert_eq!(code, 0, "stdout={stdout}\nstderr={stderr}"); + let env = parse_json_envelope(&stdout); + assert_eq!(json_string(&env, "status"), Some("free")); + assert_eq!( + env.get("released").and_then(|v| v.as_bool()), + Some(false), + "no .socket/ existed, so released must be false: {stdout}" + ); +} + /// `unlock --release` refuses when the lock is HELD — the file /// must NOT be removed (otherwise we'd undermine the OS-level /// exclusion). The user has to use `--break-lock` on the mutating diff --git a/crates/socket-patch-cli/tests/e2e_vex.rs b/crates/socket-patch-cli/tests/e2e_vex.rs index fe23104e..3b1031f4 100644 --- a/crates/socket-patch-cli/tests/e2e_vex.rs +++ b/crates/socket-patch-cli/tests/e2e_vex.rs @@ -556,6 +556,90 @@ fn verify_mode_all_failed_exits_non_zero() { assert!(stderr.contains("No applied patches")); } +// ────────────────────────────────────────────────────────────────────── +// Release-variant verify-mode regression — PyPI manifests key patches by +// *qualified* PURLs (`?artifact_id=`), but the crawler only knows the base +// PURL. `vex` must resolve package paths with the qualified-aware +// (rollback) dispatcher, exactly like `get`/`rollback` do; otherwise every +// PyPI/Gem/Maven patch is silently dropped from the VEX doc as +// `package_not_found`. We drive the PyPI crawler at a synthetic +// `site-packages` via `--global-prefix` to keep the test offline. +// ────────────────────────────────────────────────────────────────────── + +#[test] +fn verify_mode_resolves_qualified_pypi_purl() { + let tmp = tempfile::tempdir().unwrap(); + let cwd = tmp.path(); + + // Synthetic site-packages with a dist-info the crawler can read. + let site_packages = cwd.join("site-packages"); + let dist_info = site_packages.join("examplepkg-1.2.3.dist-info"); + std::fs::create_dir_all(&dist_info).unwrap(); + std::fs::write( + dist_info.join("METADATA"), + "Metadata-Version: 2.1\nName: examplepkg\nVersion: 1.2.3\n\n", + ) + .unwrap(); + + // Lay the patched file at the package root (file_name strips the + // leading `package/` segment, so this lands at site-packages/mod.py). + let patched = b"patched python module"; + let after_hash = compute_git_sha256_from_bytes(patched); + std::fs::write(site_packages.join("mod.py"), patched).unwrap(); + + // Manifest keyed by a *qualified* PyPI PURL, as `get --sync` writes + // for release-variant ecosystems. + let qualified_purl = "pkg:pypi/examplepkg@1.2.3?artifact_id=sdist"; + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + qualified_purl.to_string(), + make_record( + "33333333-3333-4333-8333-333333333333", + "package/mod.py", + "a".repeat(64).as_str(), + after_hash.as_str(), + "GHSA-pypi-variant", + &["CVE-2024-PYPI"], + ), + ); + write_manifest(cwd, &manifest); + + let out = Command::new(binary()) + .args([ + "vex", + "--cwd", + cwd.to_str().unwrap(), + "--global-prefix", + site_packages.to_str().unwrap(), + "--ecosystems", + "pypi", + "--product", + "pkg:pypi/app@1.0.0", + ]) + .output() + .expect("invoke vex"); + assert!( + out.status.success(), + "qualified PyPI patch must verify and emit a statement. stderr:\n{}", + String::from_utf8_lossy(&out.stderr) + ); + + let doc: Value = serde_json::from_slice(&out.stdout).unwrap(); + let stmts = doc["statements"].as_array().unwrap(); + assert_eq!( + stmts.len(), + 1, + "the qualified PyPI patch must not be dropped as package_not_found" + ); + assert_eq!(stmts[0]["vulnerability"]["name"], "GHSA-pypi-variant"); + // The subcomponent retains the fully-qualified manifest PURL. + let subs = stmts[0]["products"][0]["subcomponents"].as_array().unwrap(); + assert_eq!(subs.len(), 1); + assert_eq!(subs[0]["@id"], qualified_purl); + + maybe_validate_with_vexctl(&String::from_utf8_lossy(&out.stdout)); +} + // ────────────────────────────────────────────────────────────────────── // vexctl integration (run only when the binary is on PATH) // ────────────────────────────────────────────────────────────────────── diff --git a/crates/socket-patch-cli/tests/in_process_remove_repair_lifecycle.rs b/crates/socket-patch-cli/tests/in_process_remove_repair_lifecycle.rs index 8874d019..bec4ef76 100644 --- a/crates/socket-patch-cli/tests/in_process_remove_repair_lifecycle.rs +++ b/crates/socket-patch-cli/tests/in_process_remove_repair_lifecycle.rs @@ -456,6 +456,59 @@ async fn repair_with_no_manifest_emits_error() { assert_eq!(repair_run(make_repair_args(tmp.path(), "file")).await, 1); } +/// Regression: a repair where a missing artifact fails to download must +/// exit non-zero. The blob the manifest references is absent from disk AND +/// the mock server has no route for it (→ 404 / not found), so the fetch +/// fails. Before the fix `run()` reported success (exit 0) even though it +/// had marked the run a partial failure and emitted a `Failed` event — +/// hiding the failure from any CI guarding on the exit code. +#[tokio::test] +#[serial] +async fn repair_download_failure_exits_nonzero() { + let tmp = tempfile::tempdir().unwrap(); + // A valid-format (64 hex) afterHash the server will never serve. + let after_hash = git_sha256(b"never served by the mock\n"); + + // Mock server with NO blob route → every fetch 404s. + let server = MockServer::start().await; + + let socket = tmp.path().join(".socket"); + std::fs::create_dir_all(&socket).unwrap(); + std::fs::write( + socket.join("manifest.json"), + format!( + r#"{{ "patches": {{ + "pkg:npm/fetch-fail@1.0.0": {{ + "uuid": "17171717-1717-4171-8171-171717171717", + "exportedAt": "2024-01-01T00:00:00Z", + "files": {{ "package/x.js": {{ + "beforeHash": "0000000000000000000000000000000000000000000000000000000000000000", + "afterHash": "{after_hash}" + }}}}, + "vulnerabilities": {{}}, "description": "x", + "license": "MIT", "tier": "free" + }} + }}}}"# + ), + ) + .unwrap(); + + std::env::set_var("SOCKET_API_URL", server.uri()); + std::env::set_var("SOCKET_API_TOKEN", "fake"); + std::env::set_var("SOCKET_ORG_SLUG", ORG); + let code = repair_run(make_repair_args(tmp.path(), "file")).await; + std::env::remove_var("SOCKET_API_URL"); + std::env::remove_var("SOCKET_API_TOKEN"); + std::env::remove_var("SOCKET_ORG_SLUG"); + + assert_eq!( + code, 1, + "a failed artifact download must surface as a non-zero exit" + ); + // The blob must not have been written. + assert!(!socket.join("blobs").join(&after_hash).exists()); +} + #[tokio::test] #[serial] async fn repair_offline_with_present_blobs_succeeds() { diff --git a/crates/socket-patch-cli/tests/remove_network.rs b/crates/socket-patch-cli/tests/remove_network.rs new file mode 100644 index 00000000..bcb5f307 --- /dev/null +++ b/crates/socket-patch-cli/tests/remove_network.rs @@ -0,0 +1,165 @@ +//! Network-path tests for `remove`'s internal rollback. +//! +//! `remove` rolls back files before deleting from the manifest, and +//! rollback fetches any missing `beforeHash` blobs from the API. Two +//! contractual behaviours are exercised here against a wiremock server: +//! +//! 1. **online (default):** a missing `beforeHash` blob is downloaded, +//! rollback succeeds (no package installed → nothing to restore), +//! and the manifest entry is dropped. +//! 2. **`--offline`:** the strict-airgap contract ("never contact the +//! network on *any* command") must hold. With a missing blob, +//! `remove --offline` must refuse to roll back rather than reach out, +//! and therefore must leave the manifest entry intact. +//! +//! Regression guard: `remove` previously hard-coded `offline = false` +//! when delegating to `rollback_patches`, so `--offline` was silently +//! ignored — the binary would contact the mock, succeed, and delete the +//! entry. Test (2) fails loudly if that bug returns. + +use std::path::{Path, PathBuf}; +use std::process::Command; + +use sha2::{Digest, Sha256}; +use wiremock::matchers::{method, path}; +use wiremock::{Mock, MockServer, ResponseTemplate}; + +fn binary() -> PathBuf { + env!("CARGO_BIN_EXE_socket-patch").into() +} + +const ORG_SLUG: &str = "test-org"; +const PURL: &str = "pkg:npm/remove-network-test@1.0.0"; +const UUID: &str = "11111111-1111-4111-8111-111111111111"; + +/// Git-SHA256: SHA256("blob \0" ++ content). +fn git_sha256(content: &[u8]) -> String { + let header = format!("blob {}\0", content.len()); + let mut hasher = Sha256::new(); + hasher.update(header.as_bytes()); + hasher.update(content); + hex::encode(hasher.finalize()) +} + +fn write_manifest(socket: &Path, before_hash: &str, after_hash: &str) { + std::fs::create_dir_all(socket).expect("create .socket"); + let body = format!( + r#"{{ + "patches": {{ + "{PURL}": {{ + "uuid": "{UUID}", + "exportedAt": "2024-01-01T00:00:00Z", + "files": {{ + "package/index.js": {{ + "beforeHash": "{before_hash}", + "afterHash": "{after_hash}" + }} + }}, + "vulnerabilities": {{}}, + "description": "remove network test patch", + "license": "MIT", + "tier": "free" + }} + }} +}}"# + ); + std::fs::write(socket.join("manifest.json"), body).expect("write manifest"); +} + +fn manifest_has_entry(socket: &Path) -> bool { + let body = std::fs::read_to_string(socket.join("manifest.json")).expect("read manifest"); + let v: serde_json::Value = serde_json::from_str(&body).expect("parse manifest"); + v["patches"] + .as_object() + .map(|m| m.contains_key(PURL)) + .unwrap_or(false) +} + +/// Mount the blob endpoint that rollback's `fetch_blobs_by_hash` hits for +/// the missing `beforeHash`. Serving the real bytes lets the online path +/// (and, if the offline bug regressed, the offline path too) succeed. +async fn mount_before_blob(mock: &MockServer, before: &[u8], before_hash: &str) { + Mock::given(method("GET")) + .and(path(format!( + "/v0/orgs/{ORG_SLUG}/patches/blob/{before_hash}" + ))) + .respond_with(ResponseTemplate::new(200).set_body_bytes(before.to_vec())) + .mount(mock) + .await; +} + +fn run_remove(cwd: &Path, api_url: &str, extra: &[&str]) -> (i32, String) { + let mut argv: Vec<&str> = vec!["remove", PURL, "--json", "--yes"]; + argv.extend_from_slice(extra); + let out = Command::new(binary()) + .args(&argv) + .current_dir(cwd) + .env("SOCKET_API_URL", api_url) + .env("SOCKET_API_TOKEN", "fake-token-for-test") + .env("SOCKET_ORG_SLUG", ORG_SLUG) + .env("SOCKET_TELEMETRY_DISABLED", "1") + .output() + .expect("run socket-patch"); + ( + out.status.code().unwrap_or(-1), + String::from_utf8_lossy(&out.stdout).to_string(), + ) +} + +/// Online sanity: a missing beforeHash blob is fetched, rollback finds no +/// installed package (nothing to restore → success), and the entry is +/// removed. Establishes that the mock can satisfy the download, which is +/// what gives the `--offline` regression test (below) its teeth. +#[tokio::test] +async fn remove_online_downloads_missing_before_blob_then_removes() { + let before = b"before\n"; + let after = b"after\n"; + let before_hash = git_sha256(before); + let after_hash = git_sha256(after); + + let mock = MockServer::start().await; + mount_before_blob(&mock, before, &before_hash).await; + + let tmp = tempfile::tempdir().expect("tempdir"); + let socket = tmp.path().join(".socket"); + write_manifest(&socket, &before_hash, &after_hash); + + let (code, stdout) = run_remove(tmp.path(), &mock.uri(), &[]); + assert_eq!(code, 0, "online remove must succeed; stdout=\n{stdout}"); + assert!( + !manifest_has_entry(&socket), + "online remove must drop the manifest entry; stdout=\n{stdout}" + ); +} + +/// `--offline` must NOT contact the network: with the beforeHash blob +/// missing, rollback cannot proceed, so `remove --offline` aborts and +/// leaves the manifest entry in place. The mock IS armed to serve the +/// blob — if `--offline` were ignored (the original bug) the binary would +/// download it, succeed, and delete the entry, flipping both assertions. +#[tokio::test] +async fn remove_offline_does_not_fetch_and_keeps_entry() { + let before = b"before\n"; + let after = b"after\n"; + let before_hash = git_sha256(before); + let after_hash = git_sha256(after); + + let mock = MockServer::start().await; + mount_before_blob(&mock, before, &before_hash).await; + + let tmp = tempfile::tempdir().expect("tempdir"); + let socket = tmp.path().join(".socket"); + write_manifest(&socket, &before_hash, &after_hash); + + let (code, stdout) = run_remove(tmp.path(), &mock.uri(), &["--offline"]); + assert_eq!( + code, 1, + "remove --offline with a missing blob must fail rollback; stdout=\n{stdout}" + ); + let v: serde_json::Value = serde_json::from_str(&stdout).expect("valid JSON"); + assert_eq!(v["error"]["code"], "rollback_failed"); + assert!( + manifest_has_entry(&socket), + "remove --offline must NOT delete the entry when rollback can't run; stdout=\n{stdout}" + ); +} diff --git a/crates/socket-patch-cli/tests/repair_invariants.rs b/crates/socket-patch-cli/tests/repair_invariants.rs index 72d5e842..4c3ca0f8 100644 --- a/crates/socket-patch-cli/tests/repair_invariants.rs +++ b/crates/socket-patch-cli/tests/repair_invariants.rs @@ -258,10 +258,11 @@ fn repair_download_only_skips_cleanup() { // We can't use `run_repair` here because it injects `--offline`, // and `--offline` is mutually exclusive with `--download-only` // (offline = strict airgap, download-only = network-only). Invoke - // the binary directly. The manifest already references every - // patched blob, so even without `--offline` there's nothing - // missing for the download phase to actually fetch — the test - // stays hermetic. + // the binary directly. We pin `--download-mode file` so the + // already-present `afterHash` blob fully satisfies the download + // phase — there's nothing missing to fetch, so the test stays + // hermetic (no network). The default `diff` mode would instead look + // for `.tar.gz`, which is absent, and try to hit the network. let tmp = tempfile::tempdir().expect("tempdir"); let socket = make_socket_dir(tmp.path()); write_blob(&socket, REFERENCED_HASH, b"patched content"); @@ -269,7 +270,7 @@ fn repair_download_only_skips_cleanup() { write_blob(&socket, &orphan_hash, b"orphaned content"); let out = Command::new(binary()) - .args(["repair", "--json", "--download-only"]) + .args(["repair", "--json", "--download-only", "--download-mode", "file"]) .current_dir(tmp.path()) .env_remove("SOCKET_API_TOKEN") .output() diff --git a/crates/socket-patch-cli/tests/setup_invariants.rs b/crates/socket-patch-cli/tests/setup_invariants.rs index e0bc7797..39b5c552 100644 --- a/crates/socket-patch-cli/tests/setup_invariants.rs +++ b/crates/socket-patch-cli/tests/setup_invariants.rs @@ -236,3 +236,101 @@ fn setup_yes_json_files_entry_has_expected_keys() { assert!(entry["path"].is_string()); assert!(entry["status"].is_string()); } + +// --------------------------------------------------------------------------- +// Error handling — a malformed package.json must NOT be reported as success. +// +// Regression: when nothing was updatable but a file errored (e.g. invalid +// JSON), `setup` used to emit `status: "already_configured"` with exit 0, +// masking the failure. A parse error must surface as a non-zero exit. +// --------------------------------------------------------------------------- + +#[test] +fn setup_malformed_package_json_reports_error_and_exits_nonzero() { + let tmp = tempfile::tempdir().expect("tempdir"); + write(&tmp.path().join("package.json"), "not valid json!!!"); + + let (code, stdout) = run_setup(tmp.path(), &["--yes"]); + assert_eq!(code, 1, "a malformed package.json must exit non-zero; stdout=\n{stdout}"); + let v: serde_json::Value = serde_json::from_str(&stdout).expect("valid JSON"); + assert_eq!( + v["status"], "error", + "must not be reported as already_configured" + ); + assert_eq!(v["updated"], 0); + assert_eq!(v["alreadyConfigured"], 0); + assert_eq!(v["errors"], 1); + let files = v["files"].as_array().expect("files array"); + assert_eq!(files[0]["status"], "error"); + assert!(files[0]["error"].is_string()); +} + +#[test] +fn setup_malformed_does_not_claim_already_configured_in_human_mode() { + let tmp = tempfile::tempdir().expect("tempdir"); + write(&tmp.path().join("package.json"), "not valid json!!!"); + + // Human (non-JSON) mode: the misleading "All package.json files are + // already configured" line must not appear when a file errored. + let out = Command::new(binary()) + .args(["setup", "--yes"]) + .current_dir(tmp.path()) + .env_remove("SOCKET_API_TOKEN") + .output() + .expect("run socket-patch"); + let stdout = String::from_utf8_lossy(&out.stdout); + assert_eq!(out.status.code(), Some(1), "human mode must exit 1; stdout=\n{stdout}"); + assert!( + !stdout.contains("already configured with socket-patch"), + "must not falsely claim everything is already configured; stdout=\n{stdout}" + ); +} + +#[test] +fn setup_dry_run_with_error_exits_nonzero() { + // A valid root (would-update) alongside a malformed workspace member: + // dry-run must still surface the parse error via a non-zero exit rather + // than masking it behind the `dry_run` status. + let tmp = tempfile::tempdir().expect("tempdir"); + write( + &tmp.path().join("package.json"), + r#"{ "name": "root", "workspaces": ["packages/*"] } +"#, + ); + write(&tmp.path().join("packages/a/package.json"), "{bad json"); + + let (code, stdout) = run_setup(tmp.path(), &["--dry-run"]); + assert_eq!(code, 1, "dry-run with an error must exit non-zero; stdout=\n{stdout}"); + let v: serde_json::Value = serde_json::from_str(&stdout).expect("valid JSON"); + assert_eq!(v["status"], "dry_run"); + assert_eq!(v["errors"], 1); + assert_eq!(v["wouldUpdate"], 1); + + // dry-run must not have written anything. + let root = std::fs::read_to_string(tmp.path().join("package.json")).unwrap(); + assert!(!root.contains("socket-patch"), "dry-run must not modify files"); +} + +#[test] +fn setup_partial_failure_exits_nonzero_when_applying() { + // One updatable file + one malformed file, applied for real (--yes): + // the run must report partial_failure and exit 1. + let tmp = tempfile::tempdir().expect("tempdir"); + write( + &tmp.path().join("package.json"), + r#"{ "name": "root", "workspaces": ["packages/*"] } +"#, + ); + write(&tmp.path().join("packages/a/package.json"), "{bad json"); + + let (code, stdout) = run_setup(tmp.path(), &["--yes"]); + assert_eq!(code, 1, "partial failure must exit non-zero; stdout=\n{stdout}"); + let v: serde_json::Value = serde_json::from_str(&stdout).expect("valid JSON"); + assert_eq!(v["status"], "partial_failure"); + assert_eq!(v["updated"], 1); + assert_eq!(v["errors"], 1); + + // The valid root file should have been written. + let root = std::fs::read_to_string(tmp.path().join("package.json")).unwrap(); + assert!(root.contains("socket-patch"), "valid file should still be updated"); +} diff --git a/crates/socket-patch-core/src/api/blob_fetcher.rs b/crates/socket-patch-core/src/api/blob_fetcher.rs index cf96a1d5..114af848 100644 --- a/crates/socket-patch-core/src/api/blob_fetcher.rs +++ b/crates/socket-patch-core/src/api/blob_fetcher.rs @@ -76,10 +76,7 @@ pub type OnProgress = Box; /// Only checks `afterHash` blobs because those are the patched file /// contents needed for applying patches. `beforeHash` blobs are /// downloaded on-demand during rollback. -pub async fn get_missing_blobs( - manifest: &PatchManifest, - blobs_path: &Path, -) -> HashSet { +pub async fn get_missing_blobs(manifest: &PatchManifest, blobs_path: &Path) -> HashSet { let after_hash_blobs = get_after_hash_blobs(manifest); let mut missing = HashSet::new(); @@ -140,7 +137,11 @@ where .into_iter() .map(|item| { let (hash, error) = into_pair(item); - BlobFetchResult { hash, success: false, error: Some(error) } + BlobFetchResult { + hash, + success: false, + error: Some(error), + } }) .collect(); let failed = results.len(); @@ -204,8 +205,7 @@ pub async fn fetch_blobs_by_hash( }; } - let download_result = - download_hashes(&to_download, blobs_path, client, on_progress).await; + let download_result = download_hashes(&to_download, blobs_path, client, on_progress).await; FetchMissingBlobsResult { total: hashes.len(), @@ -269,14 +269,16 @@ pub async fn fetch_missing_sources( None => FetchMissingBlobsResult::default(), }, DownloadMode::Package => match sources.packages_path { - Some(dir) => fetch_missing_archives_inner( - manifest, - dir, - ArchiveKind::Package, - client, - on_progress, - ) - .await, + Some(dir) => { + fetch_missing_archives_inner( + manifest, + dir, + ArchiveKind::Package, + client, + on_progress, + ) + .await + } None => FetchMissingBlobsResult::default(), }, } @@ -302,7 +304,10 @@ async fn fetch_missing_archives_inner( if let Err(e) = tokio::fs::create_dir_all(archives_dir).await { return all_failed_result(missing.iter(), |u| { - (u.clone(), format!("Cannot create archives directory: {}", e)) + ( + u.clone(), + format!("Cannot create archives directory: {}", e), + ) }); } @@ -390,6 +395,13 @@ pub fn format_fetch_result(result: &FetchMissingBlobsResult) -> String { lines.push(format!("Downloaded {} blob(s)", result.downloaded)); } + if result.skipped > 0 { + lines.push(format!( + "{} blob(s) already present locally", + result.skipped + )); + } + if result.failed > 0 { lines.push(format!("Failed to download {} blob(s)", result.failed)); @@ -411,11 +423,32 @@ pub fn format_fetch_result(result: &FetchMissingBlobsResult) -> String { } } + // `total > 0` but nothing downloaded, skipped, or failed should not be + // reachable, but guard against emitting a misleading blank string. + if lines.is_empty() { + return "All blobs are present locally.".to_string(); + } + lines.join("\n") } // ── Internal helpers ────────────────────────────────────────────────── +/// Compare an expected blob hash against the hash computed from the +/// downloaded bytes. +/// +/// Git object hashes are hex, and hex is case-insensitive. The content +/// hasher ([`compute_git_sha256_from_bytes`]) always emits lowercase, but +/// [`ApiClient::fetch_blob`]'s validator accepts uppercase hex too — so a +/// manifest (or server) that uses uppercase would download byte-for-byte +/// correct content and then be wrongly rejected by a case-sensitive +/// comparison. Compare ignoring ASCII case to keep the two consistent. +/// +/// [`compute_git_sha256_from_bytes`]: crate::hash::git_sha256::compute_git_sha256_from_bytes +fn blob_hash_matches(expected: &str, actual: &str) -> bool { + expected.eq_ignore_ascii_case(actual) +} + /// Download a list of blob hashes sequentially, writing each to /// `blobs_path/`. async fn download_hashes( @@ -438,7 +471,7 @@ async fn download_hashes( Ok(Some(data)) => { // Verify content hash matches expected hash before writing let actual_hash = crate::hash::git_sha256::compute_git_sha256_from_bytes(&data); - if actual_hash != *hash { + if !blob_hash_matches(hash, &actual_hash) { results.push(BlobFetchResult { hash: hash.clone(), success: false, @@ -511,11 +544,7 @@ mod tests { files.insert( format!("package/file{}.js", i), PatchFileInfo { - before_hash: format!( - "before{}{}", - "0".repeat(58), - format!("{:06}", i) - ), + before_hash: format!("before{}{}", "0".repeat(58), format!("{:06}", i)), after_hash: ah.to_string(), }, ); @@ -564,7 +593,9 @@ mod tests { let h2 = "b".repeat(64); // Write h1 to disk so it is NOT missing - tokio::fs::write(blobs_path.join(&h1), b"data").await.unwrap(); + tokio::fs::write(blobs_path.join(&h1), b"data") + .await + .unwrap(); let manifest = make_manifest_with_hashes(&[&h1, &h2]); let missing = get_missing_blobs(&manifest, &blobs_path).await; @@ -593,7 +624,10 @@ mod tests { skipped: 0, results: Vec::new(), }; - assert_eq!(format_fetch_result(&result), "All blobs are present locally."); + assert_eq!( + format_fetch_result(&result), + "All blobs are present locally." + ); } #[test] @@ -659,9 +693,21 @@ mod tests { failed: 0, skipped: 0, results: vec![ - BlobFetchResult { hash: "a".repeat(64), success: true, error: None }, - BlobFetchResult { hash: "b".repeat(64), success: true, error: None }, - BlobFetchResult { hash: "c".repeat(64), success: true, error: None }, + BlobFetchResult { + hash: "a".repeat(64), + success: true, + error: None, + }, + BlobFetchResult { + hash: "b".repeat(64), + success: true, + error: None, + }, + BlobFetchResult { + hash: "c".repeat(64), + success: true, + error: None, + }, ], }; let output = format_fetch_result(&result); @@ -795,17 +841,103 @@ mod tests { let manifest = make_manifest_with_uuids(&["11111111-1111-4111-8111-111111111111"]); let (client, _) = crate::api::client::get_api_client_from_env(None).await; - let res = fetch_missing_sources(&manifest, &sources, DownloadMode::Diff, &client, None) - .await; + let res = + fetch_missing_sources(&manifest, &sources, DownloadMode::Diff, &client, None).await; assert_eq!(res.total, 0); assert_eq!(res.downloaded, 0); assert_eq!(res.failed, 0); - let res = fetch_missing_sources(&manifest, &sources, DownloadMode::Package, &client, None) - .await; + let res = + fetch_missing_sources(&manifest, &sources, DownloadMode::Package, &client, None).await; assert_eq!(res.total, 0); } + // ── Regression: skipped accounting in format ───────────────────── + + #[test] + fn test_format_all_skipped_is_not_blank() { + // Regression: `fetch_blobs_by_hash` can return total>0 with every + // blob already on disk (downloaded=0, failed=0, skipped=N). The + // formatter must surface that rather than returning a blank line. + let result = FetchMissingBlobsResult { + total: 2, + downloaded: 0, + failed: 0, + skipped: 2, + results: vec![ + BlobFetchResult { + hash: "a".repeat(64), + success: true, + error: None, + }, + BlobFetchResult { + hash: "b".repeat(64), + success: true, + error: None, + }, + ], + }; + let output = format_fetch_result(&result); + assert!(!output.trim().is_empty(), "must not be blank: {:?}", output); + assert!(output.contains("2 blob(s) already present")); + assert!(!output.contains("Downloaded")); + assert!(!output.contains("Failed")); + } + + #[test] + fn test_format_downloaded_and_skipped_mix() { + let result = FetchMissingBlobsResult { + total: 3, + downloaded: 1, + failed: 0, + skipped: 2, + results: vec![ + BlobFetchResult { + hash: "a".repeat(64), + success: true, + error: None, + }, + BlobFetchResult { + hash: "b".repeat(64), + success: true, + error: None, + }, + BlobFetchResult { + hash: "c".repeat(64), + success: true, + error: None, + }, + ], + }; + let output = format_fetch_result(&result); + assert!(output.contains("Downloaded 1 blob(s)")); + assert!(output.contains("2 blob(s) already present")); + } + + // ── Regression: hash comparison is case-insensitive ────────────── + + #[test] + fn test_blob_hash_matches_is_case_insensitive() { + // Hex is case-insensitive. `compute_git_sha256_from_bytes` emits + // lowercase, but `is_valid_sha256_hex` accepts uppercase, so the + // verification must treat the two as equal (otherwise valid + // uppercase-hash content is wrongly rejected as a mismatch). + let lower = "abc123".to_string() + &"0".repeat(58); + let upper = lower.to_ascii_uppercase(); + assert!(blob_hash_matches(&upper, &lower)); + assert!(blob_hash_matches(&lower, &upper)); + assert!(blob_hash_matches(&lower, &lower)); + } + + #[test] + fn test_blob_hash_matches_rejects_genuine_mismatch() { + let a = "a".repeat(64); + let b = "b".repeat(64); + assert!(!blob_hash_matches(&a, &b)); + // Differing length is still a mismatch. + assert!(!blob_hash_matches(&a, "aa")); + } + #[test] fn test_format_only_failed() { let result = FetchMissingBlobsResult { diff --git a/crates/socket-patch-core/src/api/client.rs b/crates/socket-patch-core/src/api/client.rs index a1dfe50f..3e6a2d89 100644 --- a/crates/socket-patch-core/src/api/client.rs +++ b/crates/socket-patch-core/src/api/client.rs @@ -88,10 +88,7 @@ impl ApiClient { header::USER_AGENT, HeaderValue::from_static(USER_AGENT_VALUE), ); - default_headers.insert( - header::ACCEPT, - HeaderValue::from_static("application/json"), - ); + default_headers.insert(header::ACCEPT, HeaderValue::from_static("application/json")); if let Some(ref token) = options.api_token { if let Ok(hv) = HeaderValue::from_str(&format!("Bearer {}", token)) { @@ -180,9 +177,9 @@ impl ApiClient { Ok(Some(body)) } StatusCode::NOT_FOUND => Ok(None), - StatusCode::UNAUTHORIZED => { - Err(ApiError::Unauthorized("Unauthorized: Invalid API token".into())) - } + StatusCode::UNAUTHORIZED => Err(ApiError::Unauthorized( + "Unauthorized: Invalid API token".into(), + )), StatusCode::FORBIDDEN => { let msg = if use_public_proxy { "Forbidden: This patch is only available to paid subscribers. \ @@ -193,11 +190,9 @@ impl ApiClient { }; Err(ApiError::Forbidden(msg.into())) } - StatusCode::TOO_MANY_REQUESTS => { - Err(ApiError::RateLimited( - "Rate limit exceeded. Please try again later.".into(), - )) - } + StatusCode::TOO_MANY_REQUESTS => Err(ApiError::RateLimited( + "Rate limit exceeded. Please try again later.".into(), + )), _ => { let text = resp.text().await.unwrap_or_default(); Err(ApiError::Other(format!( @@ -222,9 +217,7 @@ impl ApiClient { let path = if self.use_public_proxy { format!("/patch/view/{}", uuid) } else { - let slug = org_slug - .or(self.org_slug.as_deref()) - .unwrap_or("default"); + let slug = org_slug.or(self.org_slug.as_deref()).unwrap_or("default"); format!("/v0/orgs/{}/patches/view/{}", slug, uuid) }; self.get_json(&path).await @@ -243,9 +236,7 @@ impl ApiClient { let path = if self.use_public_proxy { format!("/patch/{route}/{encoded}") } else { - let slug = org_slug - .or(self.org_slug.as_deref()) - .unwrap_or("default"); + let slug = org_slug.or(self.org_slug.as_deref()).unwrap_or("default"); format!("/v0/orgs/{slug}/patches/{route}/{encoded}") }; let result = self.get_json::(&path).await?; @@ -261,7 +252,8 @@ impl ApiClient { org_slug: Option<&str>, cve_id: &str, ) -> Result { - self.search_patches_by_route(org_slug, "by-cve", cve_id).await + self.search_patches_by_route(org_slug, "by-cve", cve_id) + .await } /// Search patches by GHSA ID. @@ -270,7 +262,8 @@ impl ApiClient { org_slug: Option<&str>, ghsa_id: &str, ) -> Result { - self.search_patches_by_route(org_slug, "by-ghsa", ghsa_id).await + self.search_patches_by_route(org_slug, "by-ghsa", ghsa_id) + .await } /// Search patches by package PURL. @@ -282,7 +275,8 @@ impl ApiClient { org_slug: Option<&str>, purl: &str, ) -> Result { - self.search_patches_by_route(org_slug, "by-package", purl).await + self.search_patches_by_route(org_slug, "by-package", purl) + .await } /// Search patches for multiple packages (batch). @@ -299,9 +293,7 @@ impl ApiClient { purls: &[String], ) -> Result { if !self.use_public_proxy { - let slug = org_slug - .or(self.org_slug.as_deref()) - .unwrap_or("default"); + let slug = org_slug.or(self.org_slug.as_deref()).unwrap_or("default"); let path = format!("/v0/orgs/{}/patches/batch", slug); let body = BatchSearchBody { components: purls @@ -309,7 +301,9 @@ impl ApiClient { .map(|p| BatchComponent { purl: p.clone() }) .collect(), }; - let result = self.post_json::(&path, &body).await?; + let result = self + .post_json::(&path, &body) + .await?; return Ok(result.unwrap_or_else(|| BatchSearchResponse { packages: Vec::new(), can_access_paid_patches: false, @@ -317,7 +311,8 @@ impl ApiClient { } // Public proxy: fall back to individual per-package GET requests - self.search_patches_batch_via_individual_queries(purls).await + self.search_patches_batch_via_individual_queries(purls) + .await } /// Internal: fall back to individual GET requests per PURL when the @@ -331,9 +326,6 @@ impl ApiClient { ) -> Result { const CONCURRENCY_LIMIT: usize = 10; - let mut packages: Vec = Vec::new(); - let mut can_access_paid_patches = false; - // Collect all (purl, response) pairs let mut all_results: Vec<(String, Option)> = Vec::new(); @@ -366,33 +358,8 @@ impl ApiClient { } } - // Convert individual SearchResponse results to BatchSearchResponse format - for (purl, response) in all_results { - let response = match response { - Some(r) if !r.patches.is_empty() => r, - _ => continue, - }; - - if response.can_access_paid_patches { - can_access_paid_patches = true; - } - - let batch_patches: Vec = response - .patches - .into_iter() - .map(convert_search_result_to_batch_info) - .collect(); - - packages.push(BatchPackagePatches { - purl, - patches: batch_patches, - }); - } - - Ok(BatchSearchResponse { - packages, - can_access_paid_patches, - }) + // Convert the individual SearchResponse results into the batch shape. + Ok(assemble_batch_from_individual(all_results)) } /// Fetch organizations accessible to the current API token. @@ -416,23 +383,7 @@ impl ApiClient { /// If there are none, returns an error. pub async fn resolve_org_slug(&self) -> Result { let orgs = self.fetch_organizations().await?; - match orgs.len() { - 0 => Err(ApiError::Other( - "No organizations found for this API token.".into(), - )), - 1 => Ok(orgs.into_iter().next().unwrap().slug), - _ => { - let slugs: Vec<_> = orgs.iter().map(|o| o.slug.as_str()).collect(); - let first = orgs[0].slug.clone(); - eprintln!( - "Multiple organizations found: {}. Using \"{}\". \ - Pass --org to select a different one.", - slugs.join(", "), - first - ); - Ok(first) - } - } + select_org_slug(orgs) } /// Fetch a blob by its SHA-256 hash. @@ -479,6 +430,44 @@ impl ApiClient { self.fetch_binary("package", "package", uuid).await } + /// Build the URL (and an `is_authenticated` flag) for a binary fetch of + /// `kind` (`blob` / `diff` / `package`) identified by `identifier`. + /// + /// Uses the authenticated `/v0/orgs//patches/...` endpoint when a + /// token and org slug are configured (and we're not pinned to the public + /// proxy). Otherwise it targets the public proxy. + /// + /// In public-proxy mode the base is the client's own configured `api_url` + /// — the same value the JSON endpoints (`get_json`/`post_json`) use — so an + /// explicit `--proxy-url` / `SOCKET_PROXY_URL` override is honored for + /// binary downloads too. Only when falling back from an *authenticated* + /// client that lacks an org slug (so `api_url` is the auth host, not a + /// proxy) do we re-derive the proxy base from the environment. + fn binary_url(&self, kind: &str, identifier: &str) -> (String, bool) { + if self.api_token.is_some() && self.org_slug.is_some() && !self.use_public_proxy { + let slug = self.org_slug.as_deref().unwrap(); + let u = format!( + "{}/v0/orgs/{}/patches/{}/{}", + self.api_url, slug, kind, identifier + ); + (u, true) + } else { + let base = if self.use_public_proxy { + self.api_url.clone() + } else { + read_env_with_legacy("SOCKET_PROXY_URL", "SOCKET_PATCH_PROXY_URL") + .unwrap_or_else(|| DEFAULT_PATCH_API_PROXY_URL.to_string()) + }; + let u = format!( + "{}/patch/{}/{}", + base.trim_end_matches('/'), + kind, + identifier + ); + (u, false) + } + } + /// Shared implementation for `fetch_blob` / `fetch_diff` / `fetch_package`. /// /// `kind` is the URL segment (`blob` / `diff` / `package`). `label` is the @@ -490,26 +479,7 @@ impl ApiClient { label: &str, identifier: &str, ) -> Result>, ApiError> { - let (url, use_auth) = - if self.api_token.is_some() && self.org_slug.is_some() && !self.use_public_proxy { - let slug = self.org_slug.as_deref().unwrap(); - let u = format!( - "{}/v0/orgs/{}/patches/{}/{}", - self.api_url, slug, kind, identifier - ); - (u, true) - } else { - let proxy_url = - read_env_with_legacy("SOCKET_PROXY_URL", "SOCKET_PATCH_PROXY_URL") - .unwrap_or_else(|| DEFAULT_PATCH_API_PROXY_URL.to_string()); - let u = format!( - "{}/patch/{}/{}", - proxy_url.trim_end_matches('/'), - kind, - identifier - ); - (u, false) - }; + let (url, use_auth) = self.binary_url(kind, identifier); debug_log(&format!("GET {} {}", label, url)); @@ -623,9 +593,7 @@ pub async fn get_api_client_from_env(org_slug: Option<&str>) -> (ApiClient, bool /// corresponding env var. Used by CLI commands that expose `--api-url`, /// `--api-token`, `--org`, `--proxy-url` flags via [`crate::utils`] in the /// CLI crate. -pub async fn get_api_client_with_overrides( - overrides: ApiClientEnvOverrides, -) -> (ApiClient, bool) { +pub async fn get_api_client_with_overrides(overrides: ApiClientEnvOverrides) -> (ApiClient, bool) { let api_token = overrides .api_token .or_else(|| std::env::var("SOCKET_API_TOKEN").ok()) @@ -639,9 +607,7 @@ pub async fn get_api_client_with_overrides( read_env_with_legacy("SOCKET_PROXY_URL", "SOCKET_PATCH_PROXY_URL") .unwrap_or_else(|| DEFAULT_PATCH_API_PROXY_URL.to_string()) }); - eprintln!( - "No SOCKET_API_TOKEN set. Using public patch API proxy (free patches only)." - ); + eprintln!("No SOCKET_API_TOKEN set. Using public patch API proxy (free patches only)."); let client = ApiClient::new(ApiClientOptions { api_url: proxy_url, api_token: None, @@ -792,6 +758,36 @@ pub fn is_fallback_candidate(err: &ApiError) -> bool { matches!(err, ApiError::Unauthorized(_) | ApiError::Forbidden(_)) } +/// Choose an org slug from the list returned by `/v0/organizations`. +/// +/// Returns an error when the list is empty, the sole slug when there is +/// exactly one, and the first slug (with a warning) when there are several. +/// +/// `fetch_organizations` collects from a `HashMap`, so the upstream order is +/// not stable across runs. We sort by slug first so the chosen org *and* the +/// warning text are deterministic — otherwise a token with multiple orgs +/// could silently operate against a different org on each invocation. +fn select_org_slug(mut orgs: Vec) -> Result { + orgs.sort_by(|a, b| a.slug.cmp(&b.slug)); + match orgs.len() { + 0 => Err(ApiError::Other( + "No organizations found for this API token.".into(), + )), + 1 => Ok(orgs.into_iter().next().unwrap().slug), + _ => { + let slugs: Vec<_> = orgs.iter().map(|o| o.slug.as_str()).collect(); + let first = orgs[0].slug.clone(); + eprintln!( + "Multiple organizations found: {}. Using \"{}\". \ + Pass --org to select a different one.", + slugs.join(", "), + first + ); + Ok(first) + } + } +} + // ── Helpers ─────────────────────────────────────────────────────────── /// Percent-encode a string for use in URL path segments. @@ -850,7 +846,14 @@ fn convert_search_result_to_batch_info(patch: PatchSearchResult) -> BatchPatchIn let mut seen_cves: HashSet = HashSet::new(); - for (ghsa_id, vuln) in &patch.vulnerabilities { + // `vulnerabilities` is a HashMap, so iterate in a stable (GHSA-id) order. + // Otherwise the chosen `title` (first non-empty summary) — and the + // first-seen tie-break for equal severities — would vary across runs. + let mut entries: Vec<(&String, &VulnerabilityResponse)> = + patch.vulnerabilities.iter().collect(); + entries.sort_by(|a, b| a.0.cmp(b.0)); + + for (ghsa_id, vuln) in entries { ghsa_ids.push(ghsa_id.clone()); for cve in &vuln.cves { @@ -891,6 +894,52 @@ fn convert_search_result_to_batch_info(patch: PatchSearchResult) -> BatchPatchIn } } +/// Assemble a [`BatchSearchResponse`] from the per-PURL [`SearchResponse`]s +/// gathered by the public-proxy fallback (one GET per package). +/// +/// A `None` entry is a query that errored and is skipped. The +/// `can_access_paid_patches` capability is OR-aggregated across **every** +/// successful response — independent of whether that response carried any +/// patches — because it is a global capability signal, not a per-package +/// one. The empty-patches check only governs whether a package is added to +/// the `packages` list (an empty package would be noise), so it must run +/// *after* the flag is observed; folding it into the same skip would drop a +/// `canAccessPaidPatches: true` that arrived alongside an empty patch list. +fn assemble_batch_from_individual( + results: Vec<(String, Option)>, +) -> BatchSearchResponse { + let mut packages: Vec = Vec::new(); + let mut can_access_paid_patches = false; + + for (purl, response) in results { + let Some(response) = response else { continue }; + + if response.can_access_paid_patches { + can_access_paid_patches = true; + } + + if response.patches.is_empty() { + continue; + } + + let batch_patches: Vec = response + .patches + .into_iter() + .map(convert_search_result_to_batch_info) + .collect(); + + packages.push(BatchPackagePatches { + purl, + patches: batch_patches, + }); + } + + BatchSearchResponse { + packages, + can_access_paid_patches, + } +} + // ── Error type ──────────────────────────────────────────────────────── /// Errors returned by [`ApiClient`] methods. @@ -952,7 +1001,10 @@ mod tests { assert!(get_severity_order(Some("high")) < get_severity_order(Some("medium"))); assert!(get_severity_order(Some("medium")) < get_severity_order(Some("low"))); assert!(get_severity_order(Some("low")) < get_severity_order(None)); - assert_eq!(get_severity_order(Some("unknown")), get_severity_order(None)); + assert_eq!( + get_severity_order(Some("unknown")), + get_severity_order(None) + ); } #[test] @@ -1061,7 +1113,11 @@ mod tests { let patch = make_patch(vulns, "desc"); let info = convert_search_result_to_batch_info(patch); // Same CVE in both vulns should only appear once - let cve_count = info.cve_ids.iter().filter(|c| *c == "CVE-2024-0001").count(); + let cve_count = info + .cve_ids + .iter() + .filter(|c| *c == "CVE-2024-0001") + .count(); assert_eq!(cve_count, 1); } @@ -1069,10 +1125,7 @@ mod tests { fn test_convert_title_truncated_at_100() { let long_summary = "x".repeat(150); let mut vulns = HashMap::new(); - vulns.insert( - "GHSA-1111".into(), - make_vuln(&long_summary, "high", vec![]), - ); + vulns.insert("GHSA-1111".into(), make_vuln(&long_summary, "high", vec![])); let patch = make_patch(vulns, "desc"); let info = convert_search_result_to_batch_info(patch); // Should be 97 chars + "..." = 100 chars @@ -1104,10 +1157,7 @@ mod tests { #[test] fn test_convert_title_falls_back_to_description() { let mut vulns = HashMap::new(); - vulns.insert( - "GHSA-1111".into(), - make_vuln("", "high", vec![]), - ); + vulns.insert("GHSA-1111".into(), make_vuln("", "high", vec![])); let patch = make_patch(vulns, "Fallback desc"); let info = convert_search_result_to_batch_info(patch); assert_eq!(info.title, "Fallback desc"); @@ -1116,10 +1166,7 @@ mod tests { #[test] fn test_convert_empty_summary_and_description() { let mut vulns = HashMap::new(); - vulns.insert( - "GHSA-1111".into(), - make_vuln("", "high", vec![]), - ); + vulns.insert("GHSA-1111".into(), make_vuln("", "high", vec![])); let patch = make_patch(vulns, ""); let info = convert_search_result_to_batch_info(patch); assert!(info.title.is_empty()); @@ -1298,8 +1345,7 @@ mod tests { #[test] fn validate_token_shape_flags_too_short() { - let msg = validate_token_shape("sktsec_abc_api") - .expect("short token must be flagged"); + let msg = validate_token_shape("sktsec_abc_api").expect("short token must be flagged"); assert!(msg.contains("does not look like a Socket API token")); assert!(!msg.contains("SRI-format hash")); } @@ -1319,4 +1365,235 @@ mod tests { assert!(!looks_like_token_hash("hello")); assert!(!looks_like_token_hash("")); } + + // ── binary_url: proxy override must reach blob/diff/package fetches ── + // + // Regression: `fetch_binary` used to re-derive the proxy base from + // `SOCKET_PROXY_URL`/default instead of the client's configured + // `api_url`, so a `--proxy-url` override (which sets `api_url` but no env + // var) was honored for searches yet silently ignored for downloads. + + fn proxy_client(api_url: &str) -> ApiClient { + ApiClient::new(ApiClientOptions { + api_url: api_url.into(), + api_token: None, + use_public_proxy: true, + org_slug: None, + }) + } + + #[test] + fn binary_url_proxy_uses_configured_api_url() { + let client = proxy_client("https://custom.proxy.example"); + let (url, use_auth) = client.binary_url("blob", "deadbeef"); + assert!(!use_auth); + assert_eq!(url, "https://custom.proxy.example/patch/blob/deadbeef"); + } + + #[test] + fn binary_url_proxy_covers_diff_and_package() { + let client = proxy_client("https://custom.proxy.example"); + assert_eq!( + client.binary_url("diff", "uuid-1").0, + "https://custom.proxy.example/patch/diff/uuid-1" + ); + assert_eq!( + client.binary_url("package", "uuid-1").0, + "https://custom.proxy.example/patch/package/uuid-1" + ); + } + + #[test] + fn binary_url_proxy_trims_trailing_slash() { + // `new()` trims the trailing slash on api_url; binary_url also trims + // defensively so the path never ends up with a doubled separator. + let client = proxy_client("https://custom.proxy.example/"); + assert_eq!( + client.binary_url("blob", "x").0, + "https://custom.proxy.example/patch/blob/x" + ); + } + + #[test] + fn binary_url_authenticated_uses_org_path() { + let client = ApiClient::new(ApiClientOptions { + api_url: "https://api.socket.dev".into(), + api_token: Some("sktsec_x_api".into()), + use_public_proxy: false, + org_slug: Some("my-org".into()), + }); + let (url, use_auth) = client.binary_url("diff", "uuid-123"); + assert!(use_auth); + assert_eq!( + url, + "https://api.socket.dev/v0/orgs/my-org/patches/diff/uuid-123" + ); + } + + // ── select_org_slug: deterministic org selection ──────────────────── + + fn org(slug: &str) -> crate::api::types::OrganizationInfo { + crate::api::types::OrganizationInfo { + id: format!("id-{slug}"), + name: Some(slug.to_string()), + image: None, + plan: "free".into(), + slug: slug.into(), + } + } + + #[test] + fn select_org_slug_errors_when_empty() { + assert!(matches!(select_org_slug(vec![]), Err(ApiError::Other(_)))); + } + + #[test] + fn select_org_slug_returns_sole_org() { + assert_eq!(select_org_slug(vec![org("acme")]).unwrap(), "acme"); + } + + #[test] + fn select_org_slug_is_deterministic_for_multiple() { + // Regardless of the (HashMap-derived) input order, the + // lexicographically-first slug is chosen so repeated runs agree. + let a = select_org_slug(vec![org("zeta"), org("alpha"), org("mid")]).unwrap(); + let b = select_org_slug(vec![org("mid"), org("zeta"), org("alpha")]).unwrap(); + assert_eq!(a, "alpha"); + assert_eq!(b, "alpha"); + } + + // ── assemble_batch_from_individual: proxy-fallback aggregation ────── + + fn search_response( + purl: &str, + can_access_paid_patches: bool, + patch_uuids: &[&str], + ) -> SearchResponse { + SearchResponse { + patches: patch_uuids + .iter() + .map(|uuid| PatchSearchResult { + uuid: (*uuid).into(), + purl: purl.into(), + published_at: "2024-01-01".into(), + description: "desc".into(), + license: "MIT".into(), + tier: "free".into(), + vulnerabilities: HashMap::new(), + }) + .collect(), + can_access_paid_patches, + } + } + + #[test] + fn assemble_batch_collects_patches_per_purl() { + let results = vec![ + ( + "pkg:npm/a@1".to_string(), + Some(search_response("pkg:npm/a@1", false, &["uuid-a"])), + ), + ( + "pkg:npm/b@1".to_string(), + Some(search_response( + "pkg:npm/b@1", + false, + &["uuid-b1", "uuid-b2"], + )), + ), + ]; + let batch = assemble_batch_from_individual(results); + assert_eq!(batch.packages.len(), 2); + assert!(!batch.can_access_paid_patches); + let a = batch + .packages + .iter() + .find(|p| p.purl == "pkg:npm/a@1") + .unwrap(); + assert_eq!(a.patches.len(), 1); + let b = batch + .packages + .iter() + .find(|p| p.purl == "pkg:npm/b@1") + .unwrap(); + assert_eq!(b.patches.len(), 2); + } + + #[test] + fn assemble_batch_skips_errored_and_empty_responses() { + // None = query errored; an empty patch list contributes no package. + let results = vec![ + ("pkg:npm/err@1".to_string(), None), + ( + "pkg:npm/empty@1".to_string(), + Some(search_response("pkg:npm/empty@1", false, &[])), + ), + ( + "pkg:npm/ok@1".to_string(), + Some(search_response("pkg:npm/ok@1", false, &["uuid-ok"])), + ), + ]; + let batch = assemble_batch_from_individual(results); + // Only the package with at least one patch is listed. + assert_eq!(batch.packages.len(), 1); + assert_eq!(batch.packages[0].purl, "pkg:npm/ok@1"); + } + + #[test] + fn assemble_batch_aggregates_paid_flag_across_all_responses() { + // OR-aggregation: any response with the flag set flips the aggregate. + let results = vec![ + ( + "pkg:npm/a@1".to_string(), + Some(search_response("pkg:npm/a@1", false, &["uuid-a"])), + ), + ( + "pkg:npm/b@1".to_string(), + Some(search_response("pkg:npm/b@1", true, &["uuid-b"])), + ), + ]; + let batch = assemble_batch_from_individual(results); + assert!(batch.can_access_paid_patches); + } + + #[test] + fn assemble_batch_keeps_paid_flag_from_empty_patch_response() { + // Regression: the capability flag must survive even when the response + // that carries it has *no* patches. The empty-patch response must not + // be listed as a package, but its `canAccessPaidPatches: true` must + // still flip the aggregate flag — a fused skip would have dropped it. + let results = vec![ + ( + "pkg:npm/free@1".to_string(), + Some(search_response("pkg:npm/free@1", false, &["uuid-free"])), + ), + ( + "pkg:npm/paid-only@1".to_string(), + Some(search_response("pkg:npm/paid-only@1", true, &[])), + ), + ]; + let batch = assemble_batch_from_individual(results); + assert!( + batch.can_access_paid_patches, + "paid-access flag from an empty-patch response was dropped" + ); + // The empty-patch package must not appear in the listing. + assert_eq!(batch.packages.len(), 1); + assert_eq!(batch.packages[0].purl, "pkg:npm/free@1"); + } + + // ── convert: title selection is deterministic ─────────────────────── + + #[test] + fn test_convert_title_deterministic_across_iteration_order() { + // Two vulns, each with a non-empty summary. The title must always be + // drawn from the lexicographically-first GHSA id so the value is + // stable across runs (HashMap iteration order is not). + let mut vulns = HashMap::new(); + vulns.insert("GHSA-zzzz".into(), make_vuln("Z summary", "high", vec![])); + vulns.insert("GHSA-aaaa".into(), make_vuln("A summary", "high", vec![])); + let patch = make_patch(vulns, "desc"); + let info = convert_search_result_to_batch_info(patch); + assert_eq!(info.title, "A summary"); + } } diff --git a/crates/socket-patch-core/src/api/types.rs b/crates/socket-patch-core/src/api/types.rs index f09c31d0..f3239bfc 100644 --- a/crates/socket-patch-core/src/api/types.rs +++ b/crates/socket-patch-core/src/api/types.rs @@ -244,4 +244,175 @@ mod tests { assert_eq!(back.published_at, "2024-06-15"); assert!(json.contains("publishedAt")); } + + // ── Regression: deserialize from realistic, server-shaped payloads ── + // + // The structs above are pure serde DTOs, so the only thing that can + // break is the JSON field-name contract with the Socket API. The tests + // below pin that contract by deserializing payloads in the *exact* + // camelCase shape the live endpoints emit (mirroring the integration + // fixtures under crates/socket-patch-cli/tests). A dropped or mistyped + // `rename_all` / field rename would fail these. + + #[test] + fn test_patch_response_full_view_payload_deserialize() { + // Mirrors GET /v0/orgs//patches/view/: populated files + // (every PatchFileResponse field present) and a populated + // vulnerabilities map keyed by GHSA id. + let json = r#"{ + "uuid": "11111111-1111-4111-8111-111111111111", + "purl": "pkg:npm/x@1.0.0", + "publishedAt": "2024-01-01T00:00:00Z", + "files": { + "package/index.js": { + "beforeHash": "aaaa000000000000000000000000000000000000000000000000000000000000", + "afterHash": "bbbb000000000000000000000000000000000000000000000000000000000000", + "socketBlob": "blob-ref", + "blobContent": "YWZ0ZXIK", + "beforeBlobContent": "YmVmb3JlCg==" + } + }, + "vulnerabilities": { + "GHSA-jrhj-2j3q-xf3v": { + "cves": ["CVE-2024-1234"], + "summary": "Path traversal", + "severity": "high", + "description": "A path traversal vulnerability" + } + }, + "description": "Fix path traversal", + "license": "MIT", + "tier": "free" + }"#; + let pr: PatchResponse = serde_json::from_str(json).unwrap(); + assert_eq!(pr.published_at, "2024-01-01T00:00:00Z"); + + let file = pr.files.get("package/index.js").expect("file present"); + assert_eq!( + file.before_hash.as_deref(), + Some("aaaa000000000000000000000000000000000000000000000000000000000000") + ); + assert_eq!( + file.after_hash.as_deref(), + Some("bbbb000000000000000000000000000000000000000000000000000000000000") + ); + assert_eq!(file.socket_blob.as_deref(), Some("blob-ref")); + assert_eq!(file.blob_content.as_deref(), Some("YWZ0ZXIK")); + assert_eq!(file.before_blob_content.as_deref(), Some("YmVmb3JlCg==")); + + let vuln = pr + .vulnerabilities + .get("GHSA-jrhj-2j3q-xf3v") + .expect("vuln present"); + assert_eq!(vuln.cves, vec!["CVE-2024-1234"]); + assert_eq!(vuln.severity, "high"); + assert_eq!(vuln.summary, "Path traversal"); + } + + #[test] + fn test_patch_file_response_absent_optional_keys_are_none() { + // serde treats absent Option fields as None. The existing optional- + // fields test only round-trips explicit `null`s; this pins the + // (distinct) absent-key path the server actually uses when a blob + // isn't inlined. + let pfr: PatchFileResponse = serde_json::from_str("{}").unwrap(); + assert!(pfr.before_hash.is_none()); + assert!(pfr.after_hash.is_none()); + assert!(pfr.socket_blob.is_none()); + assert!(pfr.blob_content.is_none()); + assert!(pfr.before_blob_content.is_none()); + } + + #[test] + fn test_batch_search_response_api_payload_deserialize() { + // Mirrors POST /v0/orgs//patches/batch. One patch carries a + // severity, the other omits it (Option -> None). + let json = r#"{ + "packages": [{ + "purl": "pkg:npm/x@1.0.0", + "patches": [ + { + "uuid": "u1", + "purl": "pkg:npm/x@1.0.0", + "tier": "free", + "cveIds": ["CVE-2024-0001"], + "ghsaIds": ["GHSA-1111-2222-3333"], + "severity": "high", + "title": "Patch one" + }, + { + "uuid": "u2", + "purl": "pkg:npm/x@1.0.0", + "tier": "paid", + "cveIds": [], + "ghsaIds": [], + "title": "Patch two" + } + ] + }], + "canAccessPaidPatches": true + }"#; + let bsr: BatchSearchResponse = serde_json::from_str(json).unwrap(); + assert!(bsr.can_access_paid_patches); + assert_eq!(bsr.packages.len(), 1); + let patches = &bsr.packages[0].patches; + assert_eq!(patches.len(), 2); + assert_eq!(patches[0].cve_ids, vec!["CVE-2024-0001"]); + assert_eq!(patches[0].ghsa_ids, vec!["GHSA-1111-2222-3333"]); + assert_eq!(patches[0].severity.as_deref(), Some("high")); + assert!(patches[1].severity.is_none()); + assert!(patches[1].cve_ids.is_empty()); + } + + #[test] + fn test_organizations_response_deserialize() { + // Mirrors GET /v0/organizations: an object keyed by org id. `name` + // and `image` are optional (one org omits both). + let json = r#"{ + "organizations": { + "org-abc": { + "id": "org-abc", + "name": "Acme", + "image": "https://example.com/a.png", + "plan": "team", + "slug": "acme" + }, + "org-def": { + "id": "org-def", + "plan": "free", + "slug": "beta" + } + } + }"#; + let resp: OrganizationsResponse = serde_json::from_str(json).unwrap(); + assert_eq!(resp.organizations.len(), 2); + let acme = resp.organizations.get("org-abc").unwrap(); + assert_eq!(acme.slug, "acme"); + assert_eq!(acme.name.as_deref(), Some("Acme")); + let beta = resp.organizations.get("org-def").unwrap(); + assert_eq!(beta.slug, "beta"); + assert!(beta.name.is_none()); + assert!(beta.image.is_none()); + } + + #[test] + fn test_search_response_api_payload_deserialize() { + // Mirrors GET /v0/orgs//patches/by-package/. + let json = r#"{ + "patches": [{ + "uuid": "u1", + "purl": "pkg:npm/x@1.0.0", + "publishedAt": "2024-01-01T00:00:00Z", + "description": "A patch", + "license": "MIT", + "tier": "free", + "vulnerabilities": {} + }], + "canAccessPaidPatches": false + }"#; + let sr: SearchResponse = serde_json::from_str(json).unwrap(); + assert_eq!(sr.patches.len(), 1); + assert!(!sr.can_access_paid_patches); + assert_eq!(sr.patches[0].published_at, "2024-01-01T00:00:00Z"); + } } diff --git a/crates/socket-patch-core/src/constants.rs b/crates/socket-patch-core/src/constants.rs index b1a05606..d46ac77a 100644 --- a/crates/socket-patch-core/src/constants.rs +++ b/crates/socket-patch-core/src/constants.rs @@ -8,4 +8,55 @@ pub const DEFAULT_PATCH_API_PROXY_URL: &str = "https://patches-api.socket.dev"; pub const DEFAULT_SOCKET_API_URL: &str = "https://api.socket.dev"; /// User-Agent header value for API requests. -pub const USER_AGENT: &str = "SocketPatchCLI/1.0"; +/// +/// The version segment is derived from the crate version at compile time so it +/// tracks the published release (currently `3.x`) instead of drifting from a +/// hardcoded literal. Server-side analytics and any minimum-version gating rely +/// on this reporting the real version. +pub const USER_AGENT: &str = concat!("SocketPatchCLI/", env!("CARGO_PKG_VERSION")); + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn user_agent_reports_real_crate_version() { + // Regression: USER_AGENT was pinned to "SocketPatchCLI/1.0" while the + // crate shipped 3.x, so every API request / telemetry beacon misreported + // the version. It must carry the actual compiled crate version. + let expected = format!("SocketPatchCLI/{}", env!("CARGO_PKG_VERSION")); + assert_eq!(USER_AGENT, expected); + assert!(USER_AGENT.starts_with("SocketPatchCLI/")); + assert!( + !USER_AGENT.ends_with("/1.0"), + "USER_AGENT must not be stuck at the stale 1.0 version" + ); + // The version segment must be non-empty. + let version = USER_AGENT.trim_start_matches("SocketPatchCLI/"); + assert!(!version.is_empty(), "version segment must not be empty"); + } + + #[test] + fn api_urls_are_https_without_trailing_slash() { + for url in [DEFAULT_PATCH_API_PROXY_URL, DEFAULT_SOCKET_API_URL] { + assert!(url.starts_with("https://"), "{url} must use https"); + assert!( + !url.ends_with('/'), + "{url} must not end with a trailing slash" + ); + } + // The proxy and authenticated API are distinct hosts; swapping them + // would silently send authed traffic to the public proxy (or vice versa). + assert_ne!(DEFAULT_PATCH_API_PROXY_URL, DEFAULT_SOCKET_API_URL); + assert_eq!( + DEFAULT_PATCH_API_PROXY_URL, + "https://patches-api.socket.dev" + ); + assert_eq!(DEFAULT_SOCKET_API_URL, "https://api.socket.dev"); + } + + #[test] + fn manifest_path_is_under_dot_socket() { + assert_eq!(DEFAULT_PATCH_MANIFEST_PATH, ".socket/manifest.json"); + } +} diff --git a/crates/socket-patch-core/src/crawlers/cargo_crawler.rs b/crates/socket-patch-core/src/crawlers/cargo_crawler.rs index 0be8c462..9f375c4e 100644 --- a/crates/socket-patch-core/src/crawlers/cargo_crawler.rs +++ b/crates/socket-patch-core/src/crawlers/cargo_crawler.rs @@ -25,13 +25,20 @@ pub fn parse_cargo_toml_name_version(content: &str) -> Option<(String, String)> continue; } - // Track table headers + // Track table headers. Use `parse_table_header` rather than an + // exact `== "[package]"` comparison so a header carrying a + // trailing inline comment (`[package] # ...`) or whitespace + // inside the brackets (`[ package ]`) is still recognized — + // both are valid TOML and a too-strict match would silently + // drop the package's name/version. if trimmed.starts_with('[') { - if trimmed == "[package]" { - in_package = true; - } else { - // We left the [package] section - if in_package { + if let Some(table) = parse_table_header(trimmed) { + if table == "package" { + in_package = true; + } else if in_package { + // We left the [package] section (a sibling table or + // a `[package.*]` subtable — bare keys can no longer + // follow per TOML, so stop scanning). break; } } @@ -62,6 +69,18 @@ pub fn parse_cargo_toml_name_version(content: &str) -> Option<(String, String)> } } +/// Extract the table name from a TOML header line. +/// +/// `[package]` -> `Some("package")`, `[package] # comment` -> +/// `Some("package")`, `[ package ]` -> `Some("package")`. Returns +/// `None` for a line that is not a `[...]` header. Anything after the +/// closing `]` (typically an inline comment) is ignored. +fn parse_table_header(line: &str) -> Option<&str> { + let rest = line.strip_prefix('[')?; + let end = rest.find(']')?; + Some(rest[..end].trim()) +} + /// Extract a quoted string value from a `key = "value"` line. fn extract_string_value(line: &str, key: &str) -> Option { let rest = line.strip_prefix(key)?; @@ -140,7 +159,10 @@ impl CargoCrawler { let mut packages = Vec::new(); let mut seen = HashSet::new(); - let src_paths = self.get_crate_source_paths(options).await.unwrap_or_default(); + let src_paths = self + .get_crate_source_paths(options) + .await + .unwrap_or_default(); for src_path in &src_paths { let found = self.scan_crate_source(src_path, &mut seen).await; @@ -185,10 +207,7 @@ impl CargoCrawler { // Try vendor layout: / let vendor_dir = src_path.join(name); - if self - .verify_crate_at_path(&vendor_dir, name, version) - .await - { + if self.verify_crate_at_path(&vendor_dir, name, version).await { result.insert( purl.clone(), CrawledPackage { @@ -250,8 +269,9 @@ impl CargoCrawler { } let crate_path = src_path.join(&*dir_name_str); - if let Some(pkg) = - self.read_crate_cargo_toml(&crate_path, &dir_name_str, seen).await + if let Some(pkg) = self + .read_crate_cargo_toml(&crate_path, &dir_name_str, seen) + .await { results.push(pkg); } @@ -313,8 +333,7 @@ impl CargoCrawler { .file_name() .map(|n| n.to_string_lossy().to_string()) .unwrap_or_default(); - if let Some((parsed_name, parsed_version)) = - Self::parse_dir_name_version(&dir_name) + if let Some((parsed_name, parsed_version)) = Self::parse_dir_name_version(&dir_name) { parsed_name == name && parsed_version == version } else { @@ -326,18 +345,41 @@ impl CargoCrawler { /// Parse a registry directory name into (name, version). /// - /// Registry directories follow the pattern `-`, - /// where the version is the last `-`-separated component that starts with - /// a digit (handles crate names with hyphens like `serde-json`). + /// Registry directories follow the pattern `-`. + /// Both halves are ambiguous from the bare string: crate names can + /// contain hyphens (`serde-json`) and even hyphen-then-digit runs + /// (`sha-1`), while versions can carry hyphenated pre-release / build + /// metadata (`1.0.0-rc.1`, `0.11.0+wasi-snapshot-preview1`, and the + /// legal-but-rare numeric pre-release `1.0.0-2`). + /// + /// Heuristic: the version begins at a `-` immediately followed by a + /// digit. Prefer the *first* such boundary whose leading component + /// (up to the next `-`) is dotted — the common `major.minor.patch` + /// shape — so `crate-1.0.0-2` keeps `1.0.0-2` as the version rather + /// than splitting off the trailing `2`. When no candidate version is + /// dotted (e.g. a single-integer version like `crate-5`), fall back + /// to the *last* hyphen-before-digit, which keeps hyphenated names + /// like `sha-1-5` parsing as (`sha-1`, `5`). + /// + /// This is only a fallback for when `Cargo.toml` itself cannot be + /// parsed; for registry crates the manifest is authoritative. fn parse_dir_name_version(dir_name: &str) -> Option<(String, String)> { - // Find the last '-' followed by a digit - let mut split_idx = None; + let mut first_dotted: Option = None; + let mut last_any: Option = None; for (i, _) in dir_name.match_indices('-') { - if dir_name[i + 1..].starts_with(|c: char| c.is_ascii_digit()) { - split_idx = Some(i); + let rest = &dir_name[i + 1..]; + if !rest.starts_with(|c: char| c.is_ascii_digit()) { + continue; + } + last_any = Some(i); + if first_dotted.is_none() { + let component_end = rest.find('-').unwrap_or(rest.len()); + if rest[..component_end].contains('.') { + first_dotted = Some(i); + } } } - let idx = split_idx?; + let idx = first_dotted.or(last_any)?; let name = &dir_name[..idx]; let version = &dir_name[idx + 1..]; if name.is_empty() || version.is_empty() { @@ -636,4 +678,171 @@ version = "fake" fn test_parse_dir_name_version_empty_name_guard() { assert_eq!(CargoCrawler::parse_dir_name_version("-1.0.0"), None); } + + // --- regression: table-header parsing tolerance -------------------- + + #[test] + fn test_parse_table_header_variants() { + assert_eq!(parse_table_header("[package]"), Some("package")); + assert_eq!( + parse_table_header("[package] # main crate"), + Some("package") + ); + assert_eq!(parse_table_header("[ package ]"), Some("package")); + assert_eq!( + parse_table_header("[package.metadata]"), + Some("package.metadata") + ); + // Not a header line. + assert_eq!(parse_table_header("name = \"x\""), None); + // Array value lines don't start with '[' once trimmed by the caller, + // but a bare unterminated bracket is rejected. + assert_eq!(parse_table_header("[oops"), None); + } + + /// A `[package]` header with a trailing inline comment is valid TOML. + /// The parser must still recognize it and read name/version — a + /// too-strict `== "[package]"` would drop the crate, and in the + /// vendor layout (dir name carries no version) that crate would + /// become undiscoverable. + #[test] + fn test_parse_cargo_toml_header_with_inline_comment() { + let content = r#" +[package] # the main package +name = "serde" +version = "1.0.200" +"#; + let (name, version) = parse_cargo_toml_name_version(content).unwrap(); + assert_eq!(name, "serde"); + assert_eq!(version, "1.0.200"); + } + + #[test] + fn test_parse_cargo_toml_header_with_inner_spaces() { + let content = "[ package ]\nname = \"tokio\"\nversion = \"1.38.0\"\n"; + let (name, version) = parse_cargo_toml_name_version(content).unwrap(); + assert_eq!(name, "tokio"); + assert_eq!(version, "1.38.0"); + } + + /// A `[package.metadata]` subtable still terminates bare-key scanning. + #[test] + fn test_parse_cargo_toml_stops_at_package_subtable() { + let content = r#" +[package] +name = "foo" + +[package.metadata.docs.rs] +version = "fake" +"#; + // `version` lives under the metadata subtable, not [package]. + assert!(parse_cargo_toml_name_version(content).is_none()); + } + + // --- regression: dir-name version splitting ------------------------ + + /// A numeric pre-release segment (legal SemVer) must stay part of the + /// version. Previously the "last hyphen-before-digit" heuristic split + /// `mycrate-1.0.0-2` into (`mycrate-1.0.0`, `2`). + #[test] + fn test_parse_dir_name_version_numeric_prerelease() { + assert_eq!( + CargoCrawler::parse_dir_name_version("mycrate-1.0.0-2"), + Some(("mycrate".to_string(), "1.0.0-2".to_string())) + ); + } + + #[test] + fn test_parse_dir_name_version_alpha_prerelease() { + assert_eq!( + CargoCrawler::parse_dir_name_version("crate-1.0.0-rc.1"), + Some(("crate".to_string(), "1.0.0-rc.1".to_string())) + ); + } + + #[test] + fn test_parse_dir_name_version_build_metadata() { + assert_eq!( + CargoCrawler::parse_dir_name_version("wasi-0.11.0+wasi-snapshot-preview1"), + Some(( + "wasi".to_string(), + "0.11.0+wasi-snapshot-preview1".to_string() + )) + ); + } + + /// Crate name that itself ends in a hyphen-digit run (`sha-1`) must not + /// be split inside the name when the version is dotted. + #[test] + fn test_parse_dir_name_version_hyphen_digit_name() { + assert_eq!( + CargoCrawler::parse_dir_name_version("sha-1-1.0.0"), + Some(("sha-1".to_string(), "1.0.0".to_string())) + ); + } + + /// Dot-less single-integer version falls back to the last + /// hyphen-before-digit, keeping hyphenated names intact. + #[test] + fn test_parse_dir_name_version_dotless_fallback() { + assert_eq!( + CargoCrawler::parse_dir_name_version("crate-5"), + Some(("crate".to_string(), "5".to_string())) + ); + assert_eq!( + CargoCrawler::parse_dir_name_version("sha-1-5"), + Some(("sha-1".to_string(), "5".to_string())) + ); + } + + // --- regression: header-comment tolerance end-to-end --------------- + + /// A vendored crate whose Cargo.toml header carries an inline comment + /// must still be found by `find_by_purls`. The vendor layout has no + /// version in the directory name, so the version can only come from + /// parsing the manifest — exercising the header-tolerance fix. + #[tokio::test] + async fn test_find_by_purls_vendor_header_comment() { + let dir = tempfile::tempdir().unwrap(); + let serde_dir = dir.path().join("serde"); + tokio::fs::create_dir_all(&serde_dir).await.unwrap(); + tokio::fs::write( + serde_dir.join("Cargo.toml"), + "[package] # serde\nname = \"serde\"\nversion = \"1.0.200\"\n", + ) + .await + .unwrap(); + + let crawler = CargoCrawler::new(); + let purls = vec!["pkg:cargo/serde@1.0.200".to_string()]; + let result = crawler.find_by_purls(dir.path(), &purls).await.unwrap(); + + assert_eq!(result.len(), 1); + assert!(result.contains_key("pkg:cargo/serde@1.0.200")); + } + + #[tokio::test] + async fn test_crawl_all_registry_header_comment() { + let dir = tempfile::tempdir().unwrap(); + let serde_dir = dir.path().join("serde-1.0.200"); + tokio::fs::create_dir_all(&serde_dir).await.unwrap(); + tokio::fs::write( + serde_dir.join("Cargo.toml"), + "[package] # main\nname = \"serde\"\nversion = \"1.0.200\"\n", + ) + .await + .unwrap(); + + let crawler = CargoCrawler::new(); + let options = CrawlerOptions { + cwd: dir.path().to_path_buf(), + global: false, + global_prefix: Some(dir.path().to_path_buf()), + batch_size: 100, + }; + + let packages = crawler.crawl_all(&options).await; + assert_eq!(packages.len(), 1); + assert_eq!(packages[0].purl, "pkg:cargo/serde@1.0.200"); + } } diff --git a/crates/socket-patch-core/src/crawlers/composer_crawler.rs b/crates/socket-patch-core/src/crawlers/composer_crawler.rs index ced5d13f..f246ee80 100644 --- a/crates/socket-patch-core/src/crawlers/composer_crawler.rs +++ b/crates/socket-patch-core/src/crawlers/composer_crawler.rs @@ -1,22 +1,15 @@ use std::collections::{HashMap, HashSet}; use std::path::{Path, PathBuf}; -use serde::Deserialize; - use super::types::{CrawledPackage, CrawlerOptions}; /// PHP/Composer ecosystem crawler for discovering packages in Composer /// vendor directories. pub struct ComposerCrawler; -/// Composer 2 installed.json format: `{"packages": [...]}` -#[derive(Deserialize)] -struct InstalledJsonV2 { - packages: Vec, -} - -/// A single package entry from installed.json. -#[derive(Deserialize)] +/// A single package entry distilled from installed.json. Only the two +/// fields the crawler needs are retained; everything else (source, +/// dist, autoload, ...) is ignored. struct ComposerPackageEntry { name: String, version: String, @@ -81,19 +74,29 @@ impl ComposerCrawler { let entries = read_installed_json(vendor_path).await; for entry in entries { if let Some((namespace, name)) = entry.name.split_once('/') { - let purl = - crate::utils::purl::build_composer_purl(namespace, name, &entry.version); - - if seen.contains(&purl) { + // Skip packages that installed.json lists but that are + // not actually on disk (stale metadata, custom install + // paths). This keeps crawl_all consistent with + // find_by_purls, which only returns packages whose + // vendor directory exists. + let pkg_path = vendor_path.join(namespace).join(name); + if !is_dir(&pkg_path).await { continue; } - seen.insert(purl.clone()); - let pkg_path = vendor_path.join(namespace).join(name); + // Composer's installed.json stores the *pretty* + // version (often `v6.4.1`); PURLs use the bare numeric + // version, so normalize before building the PURL. + let version = normalize_version(&entry.version).to_string(); + let purl = crate::utils::purl::build_composer_purl(namespace, name, &version); + + if !seen.insert(purl.clone()) { + continue; + } packages.push(CrawledPackage { name: name.to_string(), - version: entry.version, + version, namespace: Some(namespace.to_string()), purl, path: pkg_path, @@ -115,10 +118,8 @@ impl ComposerCrawler { // Build a name -> version lookup from installed.json let entries = read_installed_json(vendor_path).await; - let installed: HashMap = entries - .into_iter() - .map(|e| (e.name, e.version)) - .collect(); + let installed: HashMap = + entries.into_iter().map(|e| (e.name, e.version)).collect(); for purl in purls { if let Some(((namespace, name), version)) = @@ -131,9 +132,12 @@ impl ComposerCrawler { continue; } - // Verify version matches installed.json + // Verify version matches installed.json. Compare on the + // normalized version so a `v`-prefixed installed.json + // version (`v6.4.1`) matches a bare PURL version (`6.4.1`) + // and vice versa. if let Some(installed_version) = installed.get(&full_name) { - if installed_version == version { + if normalize_version(installed_version) == normalize_version(version) { result.insert( purl.clone(), CrawledPackage { @@ -238,9 +242,32 @@ async fn get_composer_home() -> Option { None } +/// Normalize a Composer version string for PURL identity. +/// +/// Composer's `installed.json` records the *pretty* version, which for +/// many packages (symfony, twig, ...) carries a leading `v` taken from +/// the upstream git tag (e.g. `v6.4.1`). PURLs use the bare numeric +/// version (`6.4.1`), so strip a single leading `v`/`V` when it +/// directly precedes a digit. Versions that don't fit that shape (e.g. +/// `dev-main`, `1.0.x-dev`) are returned untouched. +fn normalize_version(version: &str) -> &str { + let mut chars = version.chars(); + if matches!(chars.next(), Some('v') | Some('V')) + && chars.next().map(|c| c.is_ascii_digit()).unwrap_or(false) + { + return &version[1..]; + } + version +} + /// Read and parse `vendor/composer/installed.json`. /// -/// Supports both Composer 1 (flat JSON array) and Composer 2 (`{"packages": [...]}`) formats. +/// Supports both Composer 1 (flat JSON array) and Composer 2 +/// (`{"packages": [...]}`) formats. Parsing is intentionally lenient: +/// the file is read as untyped JSON and entries are extracted one at a +/// time, so a single malformed entry (missing/non-string `name` or +/// `version`, or extra unexpected fields) is skipped rather than +/// discarding every package in the file. async fn read_installed_json(vendor_path: &Path) -> Vec { let installed_path = vendor_path.join("composer").join("installed.json"); @@ -249,17 +276,35 @@ async fn read_installed_json(vendor_path: &Path) -> Vec { Err(_) => return Vec::new(), }; - // Try Composer 2 format first (object with packages key) - if let Ok(v2) = serde_json::from_str::(&content) { - return v2.packages; - } + let root: serde_json::Value = match serde_json::from_str(&content) { + Ok(v) => v, + Err(_) => return Vec::new(), + }; - // Fall back to Composer 1 format (flat array) - if let Ok(v1) = serde_json::from_str::>(&content) { - return v1; - } + // Composer 2 wraps the list in `{"packages": [...]}`; Composer 1 is + // a bare top-level array. + let entries = match root.get("packages").and_then(|p| p.as_array()) { + Some(arr) => arr, + None => match root.as_array() { + Some(arr) => arr, + None => return Vec::new(), + }, + }; - Vec::new() + entries + .iter() + .filter_map(|entry| { + let name = entry.get("name")?.as_str()?; + let version = entry.get("version")?.as_str()?; + if name.is_empty() || version.is_empty() { + return None; + } + Some(ComposerPackageEntry { + name: name.to_string(), + version: version.to_string(), + }) + }) + .collect() } /// Check whether a path is a directory. @@ -450,6 +495,154 @@ mod tests { assert!(packages.is_empty()); } + #[test] + fn test_normalize_version() { + // `v`-prefixed semver versions get the prefix stripped. + assert_eq!(normalize_version("v6.4.1"), "6.4.1"); + assert_eq!(normalize_version("V6.4.1"), "6.4.1"); + // Bare versions pass through untouched. + assert_eq!(normalize_version("6.4.1"), "6.4.1"); + // A leading `v` not followed by a digit is part of the version + // and must be preserved. + assert_eq!(normalize_version("dev-main"), "dev-main"); + assert_eq!(normalize_version("vendor-tag"), "vendor-tag"); + assert_eq!(normalize_version("v"), "v"); + assert_eq!(normalize_version(""), ""); + } + + #[tokio::test] + async fn test_crawl_all_strips_v_prefix_from_purl() { + let dir = tempfile::tempdir().unwrap(); + let vendor_dir = dir.path().join("vendor"); + + let composer_dir = vendor_dir.join("composer"); + tokio::fs::create_dir_all(&composer_dir).await.unwrap(); + // symfony tags releases as `v6.4.1`; installed.json keeps that. + tokio::fs::write( + composer_dir.join("installed.json"), + r#"{"packages": [{"name": "symfony/console", "version": "v6.4.1"}]}"#, + ) + .await + .unwrap(); + tokio::fs::create_dir_all(vendor_dir.join("symfony").join("console")) + .await + .unwrap(); + tokio::fs::write(dir.path().join("composer.json"), "{}") + .await + .unwrap(); + + let crawler = ComposerCrawler::new(); + let options = CrawlerOptions { + cwd: dir.path().to_path_buf(), + global: false, + global_prefix: None, + batch_size: 100, + }; + + let packages = crawler.crawl_all(&options).await; + assert_eq!(packages.len(), 1); + // The emitted PURL and version are the bare (canonical) form. + assert_eq!(packages[0].purl, "pkg:composer/symfony/console@6.4.1"); + assert_eq!(packages[0].version, "6.4.1"); + } + + #[tokio::test] + async fn test_find_by_purls_matches_v_prefixed_installed_version() { + let dir = tempfile::tempdir().unwrap(); + let vendor_dir = dir.path().join("vendor"); + + let composer_dir = vendor_dir.join("composer"); + tokio::fs::create_dir_all(&composer_dir).await.unwrap(); + tokio::fs::write( + composer_dir.join("installed.json"), + r#"{"packages": [{"name": "symfony/console", "version": "v6.4.1"}]}"#, + ) + .await + .unwrap(); + tokio::fs::create_dir_all(vendor_dir.join("symfony").join("console")) + .await + .unwrap(); + + let crawler = ComposerCrawler::new(); + // A canonical (bare) PURL must match the `v`-prefixed installed + // version, and a `v`-prefixed PURL must match too. + let purls = vec![ + "pkg:composer/symfony/console@6.4.1".to_string(), + "pkg:composer/symfony/console@v6.4.1".to_string(), + ]; + let result = crawler.find_by_purls(&vendor_dir, &purls).await.unwrap(); + + assert_eq!(result.len(), 2); + assert!(result.contains_key("pkg:composer/symfony/console@6.4.1")); + assert!(result.contains_key("pkg:composer/symfony/console@v6.4.1")); + } + + #[tokio::test] + async fn test_read_installed_json_skips_malformed_entries() { + let dir = tempfile::tempdir().unwrap(); + let vendor_dir = dir.path(); + + let composer_dir = vendor_dir.join("composer"); + tokio::fs::create_dir_all(&composer_dir).await.unwrap(); + // One valid entry surrounded by malformed neighbours: an entry + // missing `version`, one missing `name`, and a non-object. A + // single bad entry must not discard the whole file. + tokio::fs::write( + composer_dir.join("installed.json"), + r#"{"packages": [ + {"name": "good/pkg", "version": "1.0.0"}, + {"name": "bad/no-version"}, + {"version": "2.0.0"}, + "not-an-object" + ]}"#, + ) + .await + .unwrap(); + + let entries = read_installed_json(vendor_dir).await; + assert_eq!(entries.len(), 1); + assert_eq!(entries[0].name, "good/pkg"); + assert_eq!(entries[0].version, "1.0.0"); + } + + #[tokio::test] + async fn test_crawl_all_skips_package_missing_on_disk() { + let dir = tempfile::tempdir().unwrap(); + let vendor_dir = dir.path().join("vendor"); + + let composer_dir = vendor_dir.join("composer"); + tokio::fs::create_dir_all(&composer_dir).await.unwrap(); + // installed.json lists two packages but only one has a vendor + // directory on disk. + tokio::fs::write( + composer_dir.join("installed.json"), + r#"{"packages": [ + {"name": "monolog/monolog", "version": "3.5.0"}, + {"name": "ghost/pkg", "version": "1.0.0"} + ]}"#, + ) + .await + .unwrap(); + tokio::fs::create_dir_all(vendor_dir.join("monolog").join("monolog")) + .await + .unwrap(); + tokio::fs::write(dir.path().join("composer.json"), "{}") + .await + .unwrap(); + + let crawler = ComposerCrawler::new(); + let options = CrawlerOptions { + cwd: dir.path().to_path_buf(), + global: false, + global_prefix: None, + batch_size: 100, + }; + + let packages = crawler.crawl_all(&options).await; + assert_eq!(packages.len(), 1); + assert_eq!(packages[0].name, "monolog"); + } + #[tokio::test] async fn test_find_by_purls_version_mismatch() { let dir = tempfile::tempdir().unwrap(); diff --git a/crates/socket-patch-core/src/crawlers/deno_crawler.rs b/crates/socket-patch-core/src/crawlers/deno_crawler.rs index a01de4e8..2014fc16 100644 --- a/crates/socket-patch-core/src/crawlers/deno_crawler.rs +++ b/crates/socket-patch-core/src/crawlers/deno_crawler.rs @@ -113,9 +113,7 @@ impl DenoCrawler { let mut result: HashMap = HashMap::new(); for purl in purls { - let Some(((scope, name), version)) = - crate::utils::purl::parse_jsr_purl(purl) - else { + let Some(((scope, name), version)) = crate::utils::purl::parse_jsr_purl(purl) else { continue; }; // Cache layout: //// @@ -147,11 +145,7 @@ impl Default for DenoCrawler { /// Walk `/@///` and emit a /// `CrawledPackage` per (scope, name, version) tuple found. -async fn scan_jsr_cache( - root: &Path, - seen: &mut HashSet, - out: &mut Vec, -) { +async fn scan_jsr_cache(root: &Path, seen: &mut HashSet, out: &mut Vec) { // Layer 1: scope dirs like `@std/`, `@luca/`. for scope_entry in crate::utils::fs::list_dir_entries(root).await { if !crate::utils::fs::entry_is_dir(&scope_entry).await { @@ -179,8 +173,7 @@ async fn scan_jsr_cache( } let ver_str = ver_entry.file_name().to_string_lossy().to_string(); let pkg_path = name_path.join(&ver_str); - let purl = - crate::utils::purl::build_jsr_purl(&scope_str, &name_str, &ver_str); + let purl = crate::utils::purl::build_jsr_purl(&scope_str, &name_str, &ver_str); if seen.insert(purl.clone()) { out.push(CrawledPackage { name: name_str.clone(), @@ -212,24 +205,60 @@ async fn is_deno_project(cwd: &Path) -> bool { /// Resolve `$DENO_DIR`, falling back to platform defaults. /// -/// * `$DENO_DIR` env var wins. -/// * Linux/macOS: `$HOME/.cache/deno`. +/// Deno itself derives its default cache root from the platform's +/// *system cache directory* (the `dirs::cache_dir()` convention), not +/// from a single hard-coded `~/.cache` path. We mirror that so global +/// JSR discovery looks where real Deno actually writes: +/// +/// * `$DENO_DIR` env var wins (an empty value is treated as unset). +/// * macOS: `$HOME/Library/Caches/deno` (NOT `~/.cache/deno`). +/// * Linux/other Unix: `$XDG_CACHE_HOME/deno`, else `$HOME/.cache/deno`. /// * Windows: `%LOCALAPPDATA%\deno` (falling back to `~\.cache\deno` /// if LOCALAPPDATA isn't set). fn deno_dir() -> PathBuf { if let Ok(d) = std::env::var("DENO_DIR") { - return PathBuf::from(d); + if !d.is_empty() { + return PathBuf::from(d); + } } - #[cfg(windows)] - { - if let Ok(local) = std::env::var("LOCALAPPDATA") { - return PathBuf::from(local).join("deno"); + default_cache_root().join("deno") +} + +/// Per-platform system cache root that Deno appends `deno` to. +#[cfg(target_os = "macos")] +fn default_cache_root() -> PathBuf { + home_dir().join("Library").join("Caches") +} + +/// Per-platform system cache root that Deno appends `deno` to. +#[cfg(windows)] +fn default_cache_root() -> PathBuf { + if let Ok(local) = std::env::var("LOCALAPPDATA") { + if !local.is_empty() { + return PathBuf::from(local); } } + home_dir().join(".cache") +} + +/// Per-platform system cache root that Deno appends `deno` to. +#[cfg(all(not(target_os = "macos"), not(windows)))] +fn default_cache_root() -> PathBuf { + if let Ok(xdg) = std::env::var("XDG_CACHE_HOME") { + if !xdg.is_empty() { + return PathBuf::from(xdg); + } + } + home_dir().join(".cache") +} + +/// Resolve the user's home directory, mirroring the `HOME` -> +/// `USERPROFILE` -> `~` fallback chain used by the other crawlers. +fn home_dir() -> PathBuf { let home = std::env::var("HOME") .or_else(|_| std::env::var("USERPROFILE")) .unwrap_or_else(|_| "~".to_string()); - PathBuf::from(home).join(".cache").join("deno") + PathBuf::from(home) } /// Check whether a path is a directory. @@ -247,28 +276,36 @@ mod tests { #[tokio::test] async fn is_deno_project_detects_deno_json() { let tmp = tempfile::tempdir().unwrap(); - tokio::fs::write(tmp.path().join("deno.json"), b"{}").await.unwrap(); + tokio::fs::write(tmp.path().join("deno.json"), b"{}") + .await + .unwrap(); assert!(is_deno_project(tmp.path()).await); } #[tokio::test] async fn is_deno_project_detects_deno_jsonc() { let tmp = tempfile::tempdir().unwrap(); - tokio::fs::write(tmp.path().join("deno.jsonc"), b"{}").await.unwrap(); + tokio::fs::write(tmp.path().join("deno.jsonc"), b"{}") + .await + .unwrap(); assert!(is_deno_project(tmp.path()).await); } #[tokio::test] async fn is_deno_project_detects_deno_lock() { let tmp = tempfile::tempdir().unwrap(); - tokio::fs::write(tmp.path().join("deno.lock"), b"{}").await.unwrap(); + tokio::fs::write(tmp.path().join("deno.lock"), b"{}") + .await + .unwrap(); assert!(is_deno_project(tmp.path()).await); } #[tokio::test] async fn is_deno_project_rejects_unrelated_dir() { let tmp = tempfile::tempdir().unwrap(); - tokio::fs::write(tmp.path().join("package.json"), b"{}").await.unwrap(); + tokio::fs::write(tmp.path().join("package.json"), b"{}") + .await + .unwrap(); assert!(!is_deno_project(tmp.path()).await); } @@ -292,4 +329,194 @@ mod tests { }; assert!(crawler.crawl_all(&opts).await.is_empty()); } + + // ── scan_jsr_cache layout behavior ───────────────────────────── + + /// Stage `////mod.ts`. + async fn stage(root: &Path, scope: &str, name: &str, version: &str) { + let pkg = root.join(scope).join(name).join(version); + tokio::fs::create_dir_all(&pkg).await.unwrap(); + tokio::fs::write(pkg.join("mod.ts"), b"export default 1;") + .await + .unwrap(); + } + + #[tokio::test] + async fn scan_emits_every_version_of_a_package() { + let tmp = tempfile::tempdir().unwrap(); + stage(tmp.path(), "@std", "path", "0.220.0").await; + stage(tmp.path(), "@std", "path", "0.221.0").await; + + let mut seen = HashSet::new(); + let mut out = Vec::new(); + scan_jsr_cache(tmp.path(), &mut seen, &mut out).await; + + let mut versions: Vec<&str> = out.iter().map(|p| p.version.as_str()).collect(); + versions.sort(); + assert_eq!(versions, vec!["0.220.0", "0.221.0"]); + // Namespace keeps the leading `@`, matching the PURL convention. + assert!(out.iter().all(|p| p.namespace.as_deref() == Some("@std"))); + } + + #[tokio::test] + async fn scan_dedups_across_repeated_roots() { + let tmp = tempfile::tempdir().unwrap(); + stage(tmp.path(), "@std", "path", "0.220.0").await; + + let mut seen = HashSet::new(); + let mut out = Vec::new(); + // Same root scanned twice (mirrors two cache paths resolving to + // the same package) must not yield a duplicate CrawledPackage. + scan_jsr_cache(tmp.path(), &mut seen, &mut out).await; + scan_jsr_cache(tmp.path(), &mut seen, &mut out).await; + assert_eq!(out.len(), 1); + } + + #[tokio::test] + async fn scan_skips_files_at_scope_and_version_layers() { + let tmp = tempfile::tempdir().unwrap(); + // A real package. + stage(tmp.path(), "@std", "path", "0.220.0").await; + // A stray `@`-prefixed file where a scope dir is expected. + tokio::fs::write(tmp.path().join("@loose-file"), b"x") + .await + .unwrap(); + // A package dir whose only child is a file, not a version dir. + let fs_dir = tmp.path().join("@std").join("fs"); + tokio::fs::create_dir_all(&fs_dir).await.unwrap(); + tokio::fs::write(fs_dir.join("readme.txt"), b"x") + .await + .unwrap(); + + let mut seen = HashSet::new(); + let mut out = Vec::new(); + scan_jsr_cache(tmp.path(), &mut seen, &mut out).await; + + // Only the real `@std/path@0.220.0` package is emitted; the stray + // file at the scope layer and the version-less `@std/fs` (whose + // only child is a file) are both skipped. + assert_eq!(out.len(), 1); + assert_eq!(out[0].purl, "pkg:jsr/@std/path@0.220.0"); + } + + #[tokio::test] + async fn find_by_purls_resolves_qualified_purl_and_keys_by_input() { + let tmp = tempfile::tempdir().unwrap(); + stage(tmp.path(), "@std", "path", "0.220.0").await; + + let qualified = "pkg:jsr/@std/path@0.220.0?repository_url=https://jsr.io"; + let crawler = DenoCrawler; + let result = crawler + .find_by_purls(tmp.path(), &[qualified.to_string()]) + .await + .unwrap(); + + // Resolved despite the qualifier, and keyed by the verbatim input + // PURL (not the stripped form) so callers can look it back up. + let entry = result.get(qualified).unwrap(); + assert_eq!(entry.name, "path"); + assert_eq!(entry.version, "0.220.0"); + assert_eq!(entry.namespace.as_deref(), Some("@std")); + } + + #[tokio::test] + async fn find_by_purls_skips_absent_version_keeps_present() { + let tmp = tempfile::tempdir().unwrap(); + stage(tmp.path(), "@std", "path", "0.220.0").await; + + let crawler = DenoCrawler; + let result = crawler + .find_by_purls( + tmp.path(), + &[ + "pkg:jsr/@std/path@0.220.0".to_string(), + // Same package, version not on disk — must be skipped. + "pkg:jsr/@std/path@9.9.9".to_string(), + ], + ) + .await + .unwrap(); + + assert_eq!(result.len(), 1); + assert!(result.contains_key("pkg:jsr/@std/path@0.220.0")); + } + + // ── deno_dir / cache-root resolution ─────────────────────────── + + /// Save and restore an env var around a closure body. + struct EnvGuard { + key: &'static str, + prev: Option, + } + impl EnvGuard { + fn set(key: &'static str, value: &str) -> Self { + let prev = std::env::var(key).ok(); + std::env::set_var(key, value); + Self { key, prev } + } + fn unset(key: &'static str) -> Self { + let prev = std::env::var(key).ok(); + std::env::remove_var(key); + Self { key, prev } + } + } + impl Drop for EnvGuard { + fn drop(&mut self) { + match &self.prev { + Some(v) => std::env::set_var(self.key, v), + None => std::env::remove_var(self.key), + } + } + } + + #[test] + #[serial_test::serial] + fn deno_dir_honors_explicit_env() { + let _g = EnvGuard::set("DENO_DIR", "/tmp/custom-deno"); + assert_eq!(deno_dir(), PathBuf::from("/tmp/custom-deno")); + } + + #[test] + #[serial_test::serial] + fn deno_dir_treats_empty_env_as_unset() { + // Empty DENO_DIR must NOT resolve to PathBuf::from("") — it falls + // through to the platform default, which always ends in `deno`. + let _g = EnvGuard::set("DENO_DIR", ""); + let dir = deno_dir(); + assert_ne!(dir, PathBuf::from("")); + assert!(dir.ends_with("deno"), "got {dir:?}"); + } + + #[cfg(target_os = "macos")] + #[test] + #[serial_test::serial] + fn deno_dir_uses_library_caches_on_macos() { + let _g = EnvGuard::unset("DENO_DIR"); + let dir = deno_dir(); + // Regression: macOS must NOT use ~/.cache/deno. + assert!( + dir.ends_with("Library/Caches/deno"), + "macOS default should live under Library/Caches, got {dir:?}" + ); + assert!(!dir.to_string_lossy().contains("/.cache/")); + } + + #[cfg(all(not(target_os = "macos"), not(windows)))] + #[test] + #[serial_test::serial] + fn deno_dir_honors_xdg_cache_home_on_linux() { + let _d = EnvGuard::unset("DENO_DIR"); + let _x = EnvGuard::set("XDG_CACHE_HOME", "/tmp/xdg-cache"); + assert_eq!(deno_dir(), PathBuf::from("/tmp/xdg-cache").join("deno")); + } + + #[cfg(all(not(target_os = "macos"), not(windows)))] + #[test] + #[serial_test::serial] + fn deno_dir_falls_back_to_dot_cache_on_linux() { + let _d = EnvGuard::unset("DENO_DIR"); + let _x = EnvGuard::unset("XDG_CACHE_HOME"); + let dir = deno_dir(); + assert!(dir.ends_with(".cache/deno"), "got {dir:?}"); + } } diff --git a/crates/socket-patch-core/src/crawlers/go_crawler.rs b/crates/socket-patch-core/src/crawlers/go_crawler.rs index 7d62a47a..51e169e3 100644 --- a/crates/socket-patch-core/src/crawlers/go_crawler.rs +++ b/crates/socket-patch-core/src/crawlers/go_crawler.rs @@ -52,10 +52,32 @@ pub fn parse_go_mod_module(content: &str) -> Option { for line in content.lines() { let trimmed = line.trim(); if let Some(rest) = trimmed.strip_prefix("module") { + // `module` must be a whole token: the directive is followed by + // whitespace. Without this guard, lines like `modulepath = x` + // would be misparsed as a module declaration. + if !rest.is_empty() && !rest.starts_with(char::is_whitespace) { + continue; + } + // Strip a trailing line comment (`module foo // note`). Module + // paths never contain `//`, so the first occurrence is the comment. + let rest = match rest.find("//") { + Some(idx) => &rest[..idx], + None => rest, + }; let rest = rest.trim(); // Handle quoted module paths - if rest.starts_with('"') && rest.ends_with('"') && rest.len() >= 2 { - return Some(rest[1..rest.len() - 1].to_string()); + if rest.len() >= 2 && rest.starts_with('"') && rest.ends_with('"') { + let inner = &rest[1..rest.len() - 1]; + // A quoted-but-empty path (`module ""`) is malformed: Go + // module paths are never empty. Treat it as absent rather + // than returning `Some("")`, which would later build a + // bogus PURL like `pkg:golang/@`. A go.mod has at + // most one `module` directive, so skipping here falls + // through to `None`. + if inner.is_empty() { + continue; + } + return Some(inner.to_string()); } // Unquoted module path if !rest.is_empty() { @@ -146,11 +168,15 @@ impl GoCrawler { for purl in purls { if let Some((module_path, version)) = crate::utils::purl::parse_golang_purl(purl) { - // Encode the module path for the filesystem + // Encode the module path AND the version for the filesystem. + // Go case-escapes both halves of the directory name, so a + // version like `v1.0.0-RC1` must be looked up as + // `v1.0.0-!r!c1` or the directory is never found. let encoded = encode_module_path(module_path); + let encoded_version = encode_module_path(version); - // Go module cache layout: @/ - let module_dir = cache_path.join(format!("{encoded}@{version}")); + // Go module cache layout: @/ + let module_dir = cache_path.join(format!("{encoded}@{encoded_version}")); if is_dir(&module_dir).await { // Split module_path into namespace and name @@ -186,9 +212,13 @@ impl GoCrawler { } } if let Ok(gopath) = std::env::var("GOPATH") { - let p = PathBuf::from(gopath); - if !p.as_os_str().is_empty() { - return Some(p.join("pkg").join("mod")); + // GOPATH may list several directories separated by the OS path + // separator (`:` on Unix, `;` on Windows). Go uses the FIRST + // entry for the module cache, so split rather than treating the + // whole value as a single path. + if let Some(first) = std::env::split_paths(&gopath).find(|p| !p.as_os_str().is_empty()) + { + return Some(first.join("pkg").join("mod")); } } let home = std::env::var("HOME") @@ -231,12 +261,22 @@ impl GoCrawler { let dir_name = entry.file_name(); let dir_name_str = dir_name.to_string_lossy(); - // Skip hidden directories and the cache metadata directory - if dir_name_str.starts_with('.') || dir_name_str == "cache" { + // Skip hidden directories anywhere, and the module cache's + // `cache/` metadata directory — but ONLY at the cache root. + // The download cache lives at `/cache`; a `cache` path + // component deeper in the tree is a legitimate module name + // (e.g. `github.com/go-redis/cache/v9@v9.0.0`) and must not be + // pruned, or the versioned dir beneath it is never discovered. + if dir_name_str.starts_with('.') + || (dir_name_str == "cache" && current_path == base_path) + { continue; } - let full_path = current_path.join(&*dir_name_str); + // Build the child path from the raw `OsStr` rather than the + // lossy UTF-8 rendering, so non-UTF-8 directory names still + // resolve to the correct on-disk path. + let full_path = current_path.join(entry.file_name()); // Check if this directory has `@` in its name (versioned module) if dir_name_str.contains('@') { @@ -276,10 +316,14 @@ impl GoCrawler { return None; } - // Decode case-encoded path + // Decode case-encoding. Go escapes uppercase letters in BOTH the + // module path and the version, so a pre-release tag such as + // `v1.0.0-RC1` lands on disk as `v1.0.0-!r!c1`. Decoding only the + // path would leave an escaped version in the PURL. let module_path = decode_module_path(encoded_module_path); + let version = decode_module_path(version); - let purl = crate::utils::purl::build_golang_purl(&module_path, version); + let purl = crate::utils::purl::build_golang_purl(&module_path, &version); if seen.contains(&purl) { return None; @@ -409,6 +453,16 @@ mod tests { assert_eq!(parse_go_mod_module(content), None); } + #[test] + fn test_parse_go_mod_module_empty_quoted_path() { + // A quoted-but-empty module path is malformed and must not yield + // `Some("")` (which would later build a bogus `pkg:golang/@...` + // PURL). Mirrors the bare-`module` empty-path regression test. + assert_eq!(parse_go_mod_module("module \"\"\n\ngo 1.21\n"), None); + // Whitespace-padded variant is equally malformed. + assert_eq!(parse_go_mod_module(" module \"\" \n"), None); + } + #[test] fn test_split_module_path() { let (ns, name) = split_module_path("github.com/gin-gonic/gin"); @@ -606,10 +660,7 @@ mod tests { "pkg:golang/github.com/Azure/azure-sdk-for-go@v1.0.0" ); assert_eq!(packages[0].name, "azure-sdk-for-go"); - assert_eq!( - packages[0].namespace, - Some("github.com/Azure".to_string()) - ); + assert_eq!(packages[0].namespace, Some("github.com/Azure".to_string())); } /// `rel_str = "@v1.0.0"` — the dir literally lives at the cache @@ -624,6 +675,111 @@ mod tests { let mut seen = HashSet::new(); let crawler = GoCrawler; let result = crawler.parse_versioned_dir(base, dir, "@v1.0.0", &mut seen); - assert!(result.is_none(), "empty encoded module path must yield None"); + assert!( + result.is_none(), + "empty encoded module path must yield None" + ); + } + + // -- Regression tests ------------------------------------------------- + + #[test] + fn test_parse_go_mod_module_trailing_comment() { + // A trailing line comment must not leak into the module path. + let content = "module github.com/gin-gonic/gin // indirect note\n\ngo 1.21\n"; + assert_eq!( + parse_go_mod_module(content), + Some("github.com/gin-gonic/gin".to_string()) + ); + } + + #[test] + fn test_parse_go_mod_module_word_boundary() { + // `module` must be a whole token; `modulepath` is not the directive. + let content = "modulepath github.com/should/not/match\ngo 1.21\n"; + assert_eq!(parse_go_mod_module(content), None); + } + + #[tokio::test] + async fn test_crawl_finds_module_with_cache_path_component() { + // The `cache` skip must only apply at the cache root, not to a + // legitimate `cache` segment inside a module path. Without the + // fix, `github.com/go-redis/cache/v9@v9.0.0` is pruned entirely. + let dir = tempfile::tempdir().unwrap(); + + let cache_module = dir + .path() + .join("github.com") + .join("go-redis") + .join("cache") + .join("v9@v9.0.0"); + tokio::fs::create_dir_all(&cache_module).await.unwrap(); + + // And the real top-level `cache/` metadata dir must still be skipped. + let metadata = dir.path().join("cache").join("download").join("sumdb"); + tokio::fs::create_dir_all(&metadata).await.unwrap(); + + let crawler = GoCrawler::new(); + let options = CrawlerOptions { + cwd: dir.path().to_path_buf(), + global: false, + global_prefix: Some(dir.path().to_path_buf()), + batch_size: 100, + }; + + let packages = crawler.crawl_all(&options).await; + let purls: HashSet<_> = packages.iter().map(|p| p.purl.as_str()).collect(); + assert_eq!(packages.len(), 1, "only the real module should be found"); + assert!(purls.contains("pkg:golang/github.com/go-redis/cache/v9@v9.0.0")); + } + + #[tokio::test] + async fn test_crawl_decodes_uppercase_version() { + // Go case-escapes uppercase letters in the version too. A pre-release + // tag `v1.0.0-RC1` is stored on disk as `v1.0.0-!r!c1` and must be + // decoded back when forming the PURL. + let dir = tempfile::tempdir().unwrap(); + + let module_dir = dir + .path() + .join("github.com") + .join("foo") + .join("bar@v1.0.0-!r!c1"); + tokio::fs::create_dir_all(&module_dir).await.unwrap(); + + let crawler = GoCrawler::new(); + let options = CrawlerOptions { + cwd: dir.path().to_path_buf(), + global: false, + global_prefix: Some(dir.path().to_path_buf()), + batch_size: 100, + }; + + let packages = crawler.crawl_all(&options).await; + assert_eq!(packages.len(), 1); + assert_eq!(packages[0].version, "v1.0.0-RC1"); + assert_eq!(packages[0].purl, "pkg:golang/github.com/foo/bar@v1.0.0-RC1"); + } + + #[tokio::test] + async fn test_find_by_purls_uppercase_version() { + // Lookup must escape the version to match the on-disk directory. + let dir = tempfile::tempdir().unwrap(); + + let module_dir = dir + .path() + .join("github.com") + .join("foo") + .join("bar@v1.0.0-!r!c1"); + tokio::fs::create_dir_all(&module_dir).await.unwrap(); + + let crawler = GoCrawler::new(); + let purls = vec!["pkg:golang/github.com/foo/bar@v1.0.0-RC1".to_string()]; + let result = crawler.find_by_purls(dir.path(), &purls).await.unwrap(); + + assert_eq!(result.len(), 1); + let pkg = &result["pkg:golang/github.com/foo/bar@v1.0.0-RC1"]; + assert_eq!(pkg.name, "bar"); + assert_eq!(pkg.version, "v1.0.0-RC1"); } } diff --git a/crates/socket-patch-core/src/crawlers/maven_crawler.rs b/crates/socket-patch-core/src/crawlers/maven_crawler.rs index 246763fa..f5b57b37 100644 --- a/crates/socket-patch-core/src/crawlers/maven_crawler.rs +++ b/crates/socket-patch-core/src/crawlers/maven_crawler.rs @@ -22,6 +22,58 @@ fn extract_xml_value(line: &str, element: &str) -> Option { } } +/// Strip the commented-out portions of a single line, threading the +/// `in_comment` state across lines so multi-line `` blocks are +/// handled. XML comments do not nest, so we always close on the first `-->`. +/// +/// This runs before any tag matching: POM files routinely carry license +/// headers and commented-out ``/`` snippets, and naive +/// substring matching would otherwise miscount skip-section depth (e.g. a +/// comment containing `` could "close" a block that is still open +/// and leak a plugin's coordinates as the project's). +fn strip_comment_spans(line: &str, in_comment: &mut bool) -> String { + let mut out = String::new(); + let mut rest = line; + loop { + if *in_comment { + match rest.find("-->") { + Some(end) => { + rest = &rest[end + 3..]; + *in_comment = false; + } + None => return out, // remainder of the line is inside a comment + } + } else { + match rest.find(" + + + org.apache.maven.plugins + maven-compiler-plugin + 3.11.0 + + + + com.example + 1.0.0 +"#; + let (g, a, v) = parse_pom_group_artifact_version(content).unwrap(); + assert_eq!(g, "com.example"); + assert_eq!(a, "my-app"); + assert_eq!(v, "1.0.0"); + } + + #[test] + fn test_parse_pom_commented_out_dependencies_block() { + // A commented-out `` block (no real close tag) must not + // start skipping the rest of the file. + let content = r#" + com.example + my-app + + 1.0.0 +"#; + let (g, a, v) = parse_pom_group_artifact_version(content).unwrap(); + assert_eq!(g, "com.example"); + assert_eq!(a, "my-app"); + assert_eq!(v, "1.0.0"); + } + + #[test] + fn test_parse_pom_multiline_comment() { + // A comment spanning multiple lines that mentions skip-section tags + // must be ignored entirely. + let content = r#" + + com.example + my-app + 1.0.0 +"#; + let (g, a, v) = parse_pom_group_artifact_version(content).unwrap(); + assert_eq!(g, "com.example"); + assert_eq!(a, "my-app"); + assert_eq!(v, "1.0.0"); + } + + #[test] + fn test_parse_pom_self_closing_skip_section() { + // A self-closing `` opens and closes at once and must + // not swallow the coordinates that follow it. + let content = r#" + + com.example + my-app + 1.0.0 +"#; + let (g, a, v) = parse_pom_group_artifact_version(content).unwrap(); + assert_eq!(g, "com.example"); + assert_eq!(a, "my-app"); + assert_eq!(v, "1.0.0"); + } + + #[test] + fn test_parse_pom_inline_skip_section_does_not_unbalance_depth() { + // An inline `` nested inside `` must not + // spuriously decrement the depth and leak the profile's dependency + // coordinates. + let content = r#" + my-app + + + + + + org.leak + leak + 9.9.9 + + + + + com.example + 1.0.0 +"#; + let (g, a, v) = parse_pom_group_artifact_version(content).unwrap(); + assert_eq!(g, "com.example"); + assert_eq!(a, "my-app"); + assert_eq!(v, "1.0.0"); + } + + #[test] + fn test_scan_maven_repo_comment_pom_falls_back_to_path() { + // End-to-end: a POM whose own coordinates can't be trusted (here a + // leaky comment) must still resolve to the correct PURL — either by + // parsing correctly or by falling back to the on-disk path. + let dir = tempfile::tempdir().unwrap(); + let pkg_dir = dir + .path() + .join("com") + .join("example") + .join("my-app") + .join("1.0.0"); + std::fs::create_dir_all(&pkg_dir).unwrap(); + std::fs::write( + pkg_dir.join("my-app-1.0.0.pom"), + r#" + my-app + + + + org.leak + leak + 6.6.6 + + + com.example + 1.0.0 +"#, + ) + .unwrap(); + + let crawler = MavenCrawler::new(); + let mut seen = HashSet::new(); + let pkgs = crawler.scan_maven_repo(dir.path(), &mut seen); + assert_eq!(pkgs.len(), 1); + assert_eq!(pkgs[0].purl, "pkg:maven/com.example/my-app@1.0.0"); + } + // ---- extract_xml_value tests ---- #[test] @@ -567,7 +771,8 @@ mod tests { #[test] fn test_parse_path_coordinates() { let repo = Path::new("/home/user/.m2/repository"); - let version_dir = Path::new("/home/user/.m2/repository/org/apache/commons/commons-lang3/3.12.0"); + let version_dir = + Path::new("/home/user/.m2/repository/org/apache/commons/commons-lang3/3.12.0"); let (g, a, v) = parse_path_coordinates(version_dir, repo).unwrap(); assert_eq!(g, "org.apache.commons"); assert_eq!(a, "commons-lang3"); @@ -589,7 +794,8 @@ mod tests { let dir = tempfile::tempdir().unwrap(); // Create Maven repo layout: org/apache/commons/commons-lang3/3.12.0/ - let pkg_dir = dir.path() + let pkg_dir = dir + .path() .join("org") .join("apache") .join("commons") @@ -631,7 +837,8 @@ mod tests { let dir = tempfile::tempdir().unwrap(); // Create two Maven packages - let pkg1_dir = dir.path() + let pkg1_dir = dir + .path() .join("org") .join("apache") .join("commons") @@ -649,7 +856,8 @@ mod tests { .await .unwrap(); - let pkg2_dir = dir.path() + let pkg2_dir = dir + .path() .join("com") .join("google") .join("guava") @@ -688,7 +896,8 @@ mod tests { let dir = tempfile::tempdir().unwrap(); // Create one package - let pkg_dir = dir.path() + let pkg_dir = dir + .path() .join("com") .join("example") .join("my-lib") @@ -723,7 +932,8 @@ mod tests { let dir = tempfile::tempdir().unwrap(); // Create package with POM that has property references (can't parse) - let pkg_dir = dir.path() + let pkg_dir = dir + .path() .join("com") .join("example") .join("my-lib") diff --git a/crates/socket-patch-core/src/crawlers/mod.rs b/crates/socket-patch-core/src/crawlers/mod.rs index 904b9e4f..7506b455 100644 --- a/crates/socket-patch-core/src/crawlers/mod.rs +++ b/crates/socket-patch-core/src/crawlers/mod.rs @@ -1,35 +1,35 @@ -pub mod npm_crawler; -pub mod pkg_managers; -pub mod python_crawler; -pub mod types; #[cfg(feature = "cargo")] pub mod cargo_crawler; -pub mod ruby_crawler; +#[cfg(feature = "composer")] +pub mod composer_crawler; +#[cfg(feature = "deno")] +pub mod deno_crawler; #[cfg(feature = "golang")] pub mod go_crawler; #[cfg(feature = "maven")] pub mod maven_crawler; -#[cfg(feature = "composer")] -pub mod composer_crawler; +pub mod npm_crawler; #[cfg(feature = "nuget")] pub mod nuget_crawler; -#[cfg(feature = "deno")] -pub mod deno_crawler; +pub mod pkg_managers; +pub mod python_crawler; +pub mod ruby_crawler; +pub mod types; -pub use npm_crawler::NpmCrawler; -pub use pkg_managers::{detect_npm_pkg_manager, NpmPkgManager}; -pub use python_crawler::PythonCrawler; -pub use types::*; #[cfg(feature = "cargo")] pub use cargo_crawler::CargoCrawler; -pub use ruby_crawler::RubyCrawler; +#[cfg(feature = "composer")] +pub use composer_crawler::ComposerCrawler; +#[cfg(feature = "deno")] +pub use deno_crawler::DenoCrawler; #[cfg(feature = "golang")] pub use go_crawler::GoCrawler; #[cfg(feature = "maven")] pub use maven_crawler::MavenCrawler; -#[cfg(feature = "composer")] -pub use composer_crawler::ComposerCrawler; +pub use npm_crawler::NpmCrawler; #[cfg(feature = "nuget")] pub use nuget_crawler::NuGetCrawler; -#[cfg(feature = "deno")] -pub use deno_crawler::DenoCrawler; +pub use pkg_managers::{detect_npm_pkg_manager, NpmPkgManager}; +pub use python_crawler::PythonCrawler; +pub use ruby_crawler::RubyCrawler; +pub use types::*; diff --git a/crates/socket-patch-core/src/crawlers/npm_crawler.rs b/crates/socket-patch-core/src/crawlers/npm_crawler.rs index 579d3470..a8b1b71a 100644 --- a/crates/socket-patch-core/src/crawlers/npm_crawler.rs +++ b/crates/socket-patch-core/src/crawlers/npm_crawler.rs @@ -91,11 +91,12 @@ pub fn get_npm_global_prefix() -> Result { /// exercise the success arm (binary present, stdout parsed) without /// requiring npm on the host's PATH. pub fn get_npm_global_prefix_with(runner: &dyn CommandRunner) -> Result { - parse_npm_root_output(runner.run("npm", &["root", "-g"]).as_deref().unwrap_or("")) - .ok_or_else(|| { + parse_npm_root_output(runner.run("npm", &["root", "-g"]).as_deref().unwrap_or("")).ok_or_else( + || { "Failed to determine npm global prefix. Ensure npm is installed and in PATH." .to_string() - }) + }, + ) } /// Pure parser for `npm root -g` stdout. Returns the trimmed path or @@ -118,7 +119,12 @@ pub fn get_yarn_global_prefix() -> Option { /// Version of `get_yarn_global_prefix` that accepts an injected /// `CommandRunner`. See `get_npm_global_prefix_with`. pub fn get_yarn_global_prefix_with(runner: &dyn CommandRunner) -> Option { - parse_yarn_dir_output(runner.run("yarn", &["global", "dir"]).as_deref().unwrap_or("")) + parse_yarn_dir_output( + runner + .run("yarn", &["global", "dir"]) + .as_deref() + .unwrap_or(""), + ) } /// Pure parser for `yarn global dir` stdout. Returns `/node_modules` @@ -129,7 +135,12 @@ pub fn parse_yarn_dir_output(stdout: &str) -> Option { if dir.is_empty() { return None; } - Some(PathBuf::from(dir).join("node_modules").to_string_lossy().to_string()) + Some( + PathBuf::from(dir) + .join("node_modules") + .to_string_lossy() + .to_string(), + ) } /// Get the pnpm global `node_modules` path via `pnpm root -g`. @@ -161,7 +172,12 @@ pub fn get_bun_global_prefix() -> Option { /// Version of `get_bun_global_prefix` that accepts an injected /// `CommandRunner`. See `get_npm_global_prefix_with`. pub fn get_bun_global_prefix_with(runner: &dyn CommandRunner) -> Option { - parse_bun_bin_output(runner.run("bun", &["pm", "bin", "-g"]).as_deref().unwrap_or("")) + parse_bun_bin_output( + runner + .run("bun", &["pm", "bin", "-g"]) + .as_deref() + .unwrap_or(""), + ) } /// Pure parser for `bun pm bin -g` stdout. Extracted so the @@ -219,13 +235,17 @@ fn find_node_dirs_sync(base: &Path, segments: &[&str]) -> Vec { let mut results = Vec::new(); if let Ok(entries) = std::fs::read_dir(base) { for entry in entries.flatten() { - // Follow symlinks: use metadata() not symlink_metadata() - let is_dir = entry - .metadata() + // Follow symlinks: `DirEntry::metadata()` does NOT traverse + // symlinks (it stats the link itself), so a symlinked version + // dir — fnm's per-version layout, nvm `default`/`current` + // aliases — would be missed. Stat the joined path with the + // free `std::fs::metadata`, which resolves the link target. + let child = base.join(entry.file_name()); + let is_dir = std::fs::metadata(&child) .map(|m| m.is_dir()) .unwrap_or(false); if is_dir { - results.extend(find_node_dirs_sync(&base.join(entry.file_name()), rest)); + results.extend(find_node_dirs_sync(&child, rest)); } } } @@ -257,7 +277,10 @@ impl NpmCrawler { /// In global mode returns well-known global paths; in local mode walks /// the project tree looking for `node_modules` directories (including /// workspace packages). - pub async fn get_node_modules_paths(&self, options: &CrawlerOptions) -> Result, std::io::Error> { + pub async fn get_node_modules_paths( + &self, + options: &CrawlerOptions, + ) -> Result, std::io::Error> { if options.global || options.global_prefix.is_some() { if let Some(ref custom) = options.global_prefix { return Ok(vec![custom.clone()]); @@ -273,7 +296,10 @@ impl NpmCrawler { let mut packages = Vec::new(); let mut seen = HashSet::new(); - let nm_paths = self.get_node_modules_paths(options).await.unwrap_or_default(); + let nm_paths = self + .get_node_modules_paths(options) + .await + .unwrap_or_default(); for nm_path in &nm_paths { let found = self.scan_node_modules(nm_path, &mut seen).await; @@ -300,7 +326,8 @@ impl NpmCrawler { namespace: Option, name: String, version: String, - #[allow(dead_code)] purl: String, + #[allow(dead_code)] + purl: String, dir_key: String, } @@ -329,11 +356,7 @@ impl NpmCrawler { if let Some((_, version)) = read_package_json(&pkg_json_path).await { if version == target.version { - let purl = build_npm_purl( - target.namespace.as_deref(), - &target.name, - &version, - ); + let purl = build_npm_purl(target.namespace.as_deref(), &target.name, &version); if purl_set.contains(purl.as_str()) { result.insert( purl.clone(), @@ -515,8 +538,7 @@ impl NpmCrawler { if name_str.starts_with('@') { // Scoped packages - let scoped = - Self::scan_scoped_packages(&entry_path, seen).await; + let scoped = Self::scan_scoped_packages(&entry_path, seen).await; results.extend(scoped); } else { // Regular package @@ -525,8 +547,7 @@ impl NpmCrawler { } // Nested node_modules only for real directories (not symlinks) if file_type.is_dir() { - let nested = - Self::scan_nested_node_modules(&entry_path, seen).await; + let nested = Self::scan_nested_node_modules(&entry_path, seen).await; results.extend(nested); } } @@ -566,8 +587,7 @@ impl NpmCrawler { // Nested node_modules only for real directories if file_type.is_dir() { - let nested = - Self::scan_nested_node_modules(&pkg_path, seen).await; + let nested = Self::scan_nested_node_modules(&pkg_path, seen).await; results.extend(nested); } } @@ -604,17 +624,21 @@ impl NpmCrawler { let entry_path = nested_nm.join(&name_str); if name_str.starts_with('@') { - let scoped = - Self::scan_scoped_packages(&entry_path, seen).await; + let scoped = Self::scan_scoped_packages(&entry_path, seen).await; results.extend(scoped); } else { if let Some(pkg) = Self::check_package(&entry_path, seen).await { results.push(pkg); } - // Recursively check deeper nested node_modules - let deeper = - Self::scan_nested_node_modules(&entry_path, seen).await; - results.extend(deeper); + // Recurse into deeper nested node_modules only for real + // directories (not symlinks) — matching the invariant in + // `scan_node_modules`/`scan_scoped_packages`. Following a + // symlink here would walk into pnpm's content-addressed + // store (or an `npm link` target outside the project). + if file_type.is_dir() { + let deeper = Self::scan_nested_node_modules(&entry_path, seen).await; + results.extend(deeper); + } } } @@ -624,10 +648,7 @@ impl NpmCrawler { /// Check a package directory and return `CrawledPackage` if valid. /// Deduplicates by PURL via the `seen` set. - async fn check_package( - pkg_path: &Path, - seen: &mut HashSet, - ) -> Option { + async fn check_package(pkg_path: &Path, seen: &mut HashSet) -> Option { let pkg_json_path = pkg_path.join("package.json"); let (full_name, version) = read_package_json(&pkg_json_path).await?; let (namespace, name) = parse_package_name(&full_name); @@ -745,8 +766,7 @@ mod tests { #[test] fn test_parse_purl_components_unscoped() { - let (ns, name, ver) = - NpmCrawler::parse_purl_components("pkg:npm/lodash@4.17.21").unwrap(); + let (ns, name, ver) = NpmCrawler::parse_purl_components("pkg:npm/lodash@4.17.21").unwrap(); assert!(ns.is_none()); assert_eq!(name, "lodash"); assert_eq!(ver, "4.17.21"); @@ -762,12 +782,9 @@ mod tests { async fn test_read_package_json_valid() { let dir = tempfile::tempdir().unwrap(); let pkg_json = dir.path().join("package.json"); - tokio::fs::write( - &pkg_json, - r#"{"name": "test-pkg", "version": "1.0.0"}"#, - ) - .await - .unwrap(); + tokio::fs::write(&pkg_json, r#"{"name": "test-pkg", "version": "1.0.0"}"#) + .await + .unwrap(); let result = read_package_json(&pkg_json).await; assert!(result.is_some()); @@ -870,6 +887,37 @@ mod tests { assert!(results.is_empty()); } + /// Regression: a wildcard segment that matches a *symlinked* + /// directory must be followed. `DirEntry::metadata()` stats the link + /// itself (reports `is_dir == false`), so the resolver previously + /// skipped symlinked version dirs — exactly the layout fnm produces + /// and the `current`/`default` aliases nvm creates. The fix stats the + /// joined path with `std::fs::metadata`, which resolves the target. + #[cfg(unix)] + #[test] + fn test_find_node_dirs_sync_follows_symlinked_segment() { + use std::os::unix::fs::symlink; + + // Real version layout lives in its own tree, away from `base`, + // so the only way to reach it is through the symlink. + let real = tempfile::tempdir().unwrap(); + let real_nm = real.path().join("lib").join("node_modules"); + std::fs::create_dir_all(&real_nm).unwrap(); + + // `base` holds only a symlink standing in for a version dir. + let base = tempfile::tempdir().unwrap(); + let alias = base.path().join("current"); + symlink(real.path(), &alias).unwrap(); + + let results = find_node_dirs_sync(base.path(), &["*", "lib", "node_modules"]); + assert_eq!( + results.len(), + 1, + "a symlinked version dir must be followed, not skipped" + ); + assert_eq!(results[0], alias.join("lib").join("node_modules")); + } + #[test] fn test_find_node_dirs_sync_literal() { // All literal segments (no wildcard) diff --git a/crates/socket-patch-core/src/crawlers/nuget_crawler.rs b/crates/socket-patch-core/src/crawlers/nuget_crawler.rs index 4b2ce70f..2d208f15 100644 --- a/crates/socket-patch-core/src/crawlers/nuget_crawler.rs +++ b/crates/socket-patch-core/src/crawlers/nuget_crawler.rs @@ -74,7 +74,10 @@ impl NuGetCrawler { let mut packages = Vec::new(); let mut seen = HashSet::new(); - let pkg_paths = self.get_nuget_package_paths(options).await.unwrap_or_default(); + let pkg_paths = self + .get_nuget_package_paths(options) + .await + .unwrap_or_default(); for pkg_path in &pkg_paths { let found = self.scan_package_dir(pkg_path, &mut seen).await; @@ -94,8 +97,14 @@ impl NuGetCrawler { for purl in purls { if let Some((name, version)) = crate::utils::purl::parse_nuget_purl(purl) { - // Try global cache layout: // - let global_dir = pkg_path.join(name.to_lowercase()).join(version); + // Try global cache layout: //. + // NuGet lowercases BOTH the id and the version when it lays + // out the global packages folder, so a prerelease tag like + // `2.0.0-RC1` lives on disk as `2.0.0-rc1`. Lowercasing only + // the name (but not the version) would miss those packages. + let global_dir = pkg_path + .join(name.to_lowercase()) + .join(version.to_lowercase()); if self.verify_nuget_package(&global_dir).await { result.insert( purl.clone(), @@ -334,7 +343,12 @@ async fn is_dotnet_project(cwd: &Path) -> bool { /// Parse a legacy packages directory name into (name, version). /// /// Legacy NuGet directories follow the pattern `.`, where -/// the version starts at the last `.` followed by a digit-starting segment. +/// the version starts at the *first* `.` followed by a digit-starting +/// segment. NuGet versions always begin with a numeric major component, +/// and id segments don't start with a digit, so the first numeric-leading +/// segment marks the name/version boundary. Splitting on the *last* such +/// dot would wrongly carve `Newtonsoft.Json.13.0.3` into +/// `("Newtonsoft.Json.13.0", "3")`. fn parse_legacy_dir_name(dir_name: &str) -> Option<(String, String)> { // Find the first '.' followed by a digit let mut split_idx = None; @@ -382,7 +396,10 @@ async fn discover_paths_from_assets(cwd: &Path) -> Vec { if !crate::utils::fs::entry_is_dir(&entry).await { continue; } - let sub_assets = cwd.join(entry.file_name()).join("obj").join("project.assets.json"); + let sub_assets = cwd + .join(entry.file_name()) + .join("obj") + .join("project.assets.json"); if let Some(pkg_folders) = parse_project_assets_package_folders(&sub_assets).await { for folder in pkg_folders { paths.push(folder); @@ -479,7 +496,9 @@ mod tests { // Create legacy layout: ./ let pkg_dir = dir.path().join("Newtonsoft.Json.13.0.3"); - tokio::fs::create_dir_all(pkg_dir.join("lib")).await.unwrap(); + tokio::fs::create_dir_all(pkg_dir.join("lib")) + .await + .unwrap(); let crawler = NuGetCrawler::new(); let purls = vec!["pkg:nuget/Newtonsoft.Json@13.0.3".to_string()]; @@ -595,7 +614,9 @@ mod tests { async fn test_verify_nuget_package_with_lib() { let dir = tempfile::tempdir().unwrap(); let pkg_dir = dir.path().join("testpkg"); - tokio::fs::create_dir_all(pkg_dir.join("lib")).await.unwrap(); + tokio::fs::create_dir_all(pkg_dir.join("lib")) + .await + .unwrap(); let crawler = NuGetCrawler::new(); assert!(crawler.verify_nuget_package(&pkg_dir).await); @@ -617,7 +638,9 @@ mod tests { // Create a single package let pkg_dir = dir.path().join("newtonsoft.json").join("13.0.3"); - tokio::fs::create_dir_all(pkg_dir.join("lib")).await.unwrap(); + tokio::fs::create_dir_all(pkg_dir.join("lib")) + .await + .unwrap(); let crawler = NuGetCrawler::new(); let options = CrawlerOptions { @@ -682,4 +705,75 @@ mod tests { fn test_parse_legacy_dir_name_empty_name_guard() { assert_eq!(parse_legacy_dir_name(".1.0.0"), None); } + + /// Regression: the name/version split must happen at the *first* + /// numeric-leading segment, not the last. A version with three or + /// more numeric components (the common case) would otherwise be + /// truncated to its final segment. + #[test] + fn test_parse_legacy_dir_name_splits_at_first_numeric_segment() { + assert_eq!( + parse_legacy_dir_name("Newtonsoft.Json.13.0.3"), + Some(("Newtonsoft.Json".to_string(), "13.0.3".to_string())) + ); + // A four-component version still keeps every numeric segment. + assert_eq!( + parse_legacy_dir_name("Microsoft.Web.Infrastructure.1.0.0.0"), + Some(( + "Microsoft.Web.Infrastructure".to_string(), + "1.0.0.0".to_string() + )) + ); + } + + /// Regression: NuGet's global packages folder lowercases the version + /// directory as well as the package-id directory. A prerelease tag + /// carrying uppercase characters in the PURL (e.g. `2.0.0-RC1`) must + /// still resolve to the on-disk `2.0.0-rc1` folder. + #[tokio::test] + async fn test_find_by_purls_global_cache_lowercases_version() { + let dir = tempfile::tempdir().unwrap(); + + // On disk both the id and the version are lowercased. + let pkg_dir = dir.path().join("contoso.widgets").join("2.0.0-rc1"); + tokio::fs::create_dir_all(pkg_dir.join("lib")) + .await + .unwrap(); + + let crawler = NuGetCrawler::new(); + // The PURL preserves the original (mixed) case for id and version. + let purls = vec!["pkg:nuget/Contoso.Widgets@2.0.0-RC1".to_string()]; + let result = crawler.find_by_purls(dir.path(), &purls).await.unwrap(); + + assert_eq!(result.len(), 1); + let pkg = result + .get("pkg:nuget/Contoso.Widgets@2.0.0-RC1") + .expect("prerelease package should resolve via lowercased version dir"); + assert_eq!(pkg.path, pkg_dir); + // The reported name/version keep the PURL's original casing. + assert_eq!(pkg.name, "Contoso.Widgets"); + assert_eq!(pkg.version, "2.0.0-RC1"); + } + + /// Companion to the above: the legacy `./` layout + /// preserves the original version casing on disk, and the + /// case-insensitive fallback still resolves it when the PURL casing + /// differs from the folder casing. + #[tokio::test] + async fn test_find_by_purls_legacy_case_insensitive_prerelease() { + let dir = tempfile::tempdir().unwrap(); + + // Legacy folder happens to be stored fully lowercased. + let pkg_dir = dir.path().join("contoso.widgets.2.0.0-rc1"); + tokio::fs::create_dir_all(pkg_dir.join("lib")) + .await + .unwrap(); + + let crawler = NuGetCrawler::new(); + let purls = vec!["pkg:nuget/Contoso.Widgets@2.0.0-RC1".to_string()]; + let result = crawler.find_by_purls(dir.path(), &purls).await.unwrap(); + + assert_eq!(result.len(), 1); + assert!(result.contains_key("pkg:nuget/Contoso.Widgets@2.0.0-RC1")); + } } diff --git a/crates/socket-patch-core/src/crawlers/pkg_managers.rs b/crates/socket-patch-core/src/crawlers/pkg_managers.rs index 421b6ab7..62ce1c37 100644 --- a/crates/socket-patch-core/src/crawlers/pkg_managers.rs +++ b/crates/socket-patch-core/src/crawlers/pkg_managers.rs @@ -72,9 +72,7 @@ pub enum NpmPkgManager { pub fn detect_npm_pkg_manager(project_root: &Path) -> NpmPkgManager { // 1. yarn-berry PnP — highest priority because it determines // whether the npm crawler can find anything at all. - if project_root.join(".pnp.cjs").is_file() - || project_root.join(".pnp.loader.mjs").is_file() - { + if project_root.join(".pnp.cjs").is_file() || project_root.join(".pnp.loader.mjs").is_file() { return NpmPkgManager::YarnBerryPnP; } @@ -83,17 +81,14 @@ pub fn detect_npm_pkg_manager(project_root: &Path) -> NpmPkgManager { // below, we require `node_modules/` to actually exist — // a bare lockfile without an install is a fresh checkout. let node_modules = project_root.join("node_modules"); - if (project_root.join("bun.lock").is_file() - || project_root.join("bun.lockb").is_file()) + if (project_root.join("bun.lock").is_file() || project_root.join("bun.lockb").is_file()) && node_modules.is_dir() { return NpmPkgManager::Bun; } // 3. pnpm — markers live inside node_modules/. - if node_modules.join(".modules.yaml").is_file() - || node_modules.join(".pnpm").is_dir() - { + if node_modules.join(".modules.yaml").is_file() || node_modules.join(".pnpm").is_dir() { return NpmPkgManager::Pnpm; } @@ -235,4 +230,70 @@ mod tests { NpmPkgManager::YarnBerryPnP ); } + + /// The ESM PnP loader variant (`.pnp.loader.mjs`) is sufficient on + /// its own — newer yarn-berry installs ship it instead of (or + /// alongside) `.pnp.cjs`. The end-to-end refusal test pins this at + /// the CLI layer; pin it here at the detector layer too so a unit + /// regression is caught without standing up the whole apply path. + #[test] + fn yarn_berry_pnp_via_loader_mjs() { + let d = tempfile::tempdir().unwrap(); + std::fs::write(d.path().join(".pnp.loader.mjs"), "").unwrap(); + assert_eq!( + detect_npm_pkg_manager(d.path()), + NpmPkgManager::YarnBerryPnP + ); + } + + /// PnP wins even when a real `node_modules/` is also present (a + /// yarn-berry checkout can carry both an installed tree and the + /// loader). The refusal is the safety-critical branch — it must not + /// be masked by the npm fallthrough. + #[test] + fn yarn_berry_pnp_priority_over_node_modules() { + let d = tempfile::tempdir().unwrap(); + std::fs::write(d.path().join(".pnp.cjs"), "").unwrap(); + std::fs::create_dir_all(d.path().join("node_modules")).unwrap(); + assert_eq!( + detect_npm_pkg_manager(d.path()), + NpmPkgManager::YarnBerryPnP + ); + } + + /// pnpm is checked before yarn-classic: a project with both a + /// `yarn.lock` and pnpm's `.pnpm/` store (e.g. a repo migrating + /// package managers without a clean reinstall) classifies as pnpm, + /// matching the documented precedence table. + #[test] + fn pnpm_priority_over_yarn_classic() { + let d = tempfile::tempdir().unwrap(); + std::fs::create_dir_all(d.path().join("node_modules/.pnpm")).unwrap(); + std::fs::write(d.path().join("yarn.lock"), "").unwrap(); + assert_eq!(detect_npm_pkg_manager(d.path()), NpmPkgManager::Pnpm); + } + + /// bun is checked before yarn-classic too: a `bun.lock` plus a + /// stray `yarn.lock` (multi-PM repo) classifies as bun. + #[test] + fn bun_priority_over_yarn_classic() { + let d = tempfile::tempdir().unwrap(); + std::fs::create_dir_all(d.path().join("node_modules")).unwrap(); + std::fs::write(d.path().join("bun.lock"), "").unwrap(); + std::fs::write(d.path().join("yarn.lock"), "").unwrap(); + assert_eq!(detect_npm_pkg_manager(d.path()), NpmPkgManager::Bun); + } + + /// Robustness: a malformed layout where `node_modules` is a regular + /// *file* rather than a directory must not be misclassified. Every + /// non-PnP branch gates on `node_modules.is_dir()` (directly or via + /// a child `join`), so a bun lockfile next to a `node_modules` file + /// falls through to Unknown rather than claiming bun. + #[test] + fn node_modules_as_file_is_not_misclassified() { + let d = tempfile::tempdir().unwrap(); + std::fs::write(d.path().join("node_modules"), "not a dir").unwrap(); + std::fs::write(d.path().join("bun.lock"), "").unwrap(); + assert_eq!(detect_npm_pkg_manager(d.path()), NpmPkgManager::Unknown); + } } diff --git a/crates/socket-patch-core/src/crawlers/python_crawler.rs b/crates/socket-patch-core/src/crawlers/python_crawler.rs index 1ea44e4d..087d7437 100644 --- a/crates/socket-patch-core/src/crawlers/python_crawler.rs +++ b/crates/socket-patch-core/src/crawlers/python_crawler.rs @@ -61,8 +61,36 @@ pub fn canonicalize_pypi_name(name: &str) -> String { // Helpers: read Python metadata from dist-info // --------------------------------------------------------------------------- -/// Read `Name` and `Version` from a `.dist-info/METADATA` file. +/// Read `Name` and `Version` for a `.dist-info` directory. +/// +/// Primary source is the `.dist-info/METADATA` header block. When that +/// file is missing or malformed (no usable `Name`/`Version`), fall back +/// to the `-.dist-info` directory name so a corrupt or +/// partially-written install does not make the package invisible to the +/// crawler — a real risk for a tool whose job is to find and patch +/// packages. The fallback only fires for an actual directory, guarding +/// against a stray `*.dist-info` file masquerading as an install. pub async fn read_python_metadata(dist_info_path: &Path) -> Option<(String, String)> { + if let Some(found) = parse_metadata_headers(dist_info_path).await { + return Some(found); + } + + let is_dir = tokio::fs::metadata(dist_info_path) + .await + .map(|m| m.is_dir()) + .unwrap_or(false); + if !is_dir { + return None; + } + let dir_name = dist_info_path.file_name()?.to_string_lossy(); + parse_dist_info_dir_name(&dir_name) +} + +/// Parse the `Name`/`Version` headers from `/METADATA`. +/// +/// Returns `None` if the file is absent, unreadable, or does not yield a +/// non-empty `Name` and `Version` before the header/body separator. +async fn parse_metadata_headers(dist_info_path: &Path) -> Option<(String, String)> { let metadata_path = dist_info_path.join("METADATA"); let content = tokio::fs::read_to_string(&metadata_path).await.ok()?; @@ -90,6 +118,24 @@ pub async fn read_python_metadata(dist_info_path: &Path) -> Option<(String, Stri } } +/// Derive `(name, version)` from a `-.dist-info` directory +/// name. A PEP 440 version never contains `-` (pre-release and local +/// segments normalize to `aN`/`+local`), so the final `-` is the +/// name/version boundary even when the distribution name itself contains +/// a `-` (older pip kept the raw name; newer pip escapes it to `_`). +/// Either way the caller canonicalizes the name. Returns `None` when the +/// directory name carries no version segment. +fn parse_dist_info_dir_name(dir_name: &str) -> Option<(String, String)> { + let base = dir_name.strip_suffix(".dist-info")?; + let idx = base.rfind('-')?; + let name = &base[..idx]; + let version = &base[idx + 1..]; + if name.is_empty() || version.is_empty() { + return None; + } + Some((name.to_string(), version.to_string())) +} + // --------------------------------------------------------------------------- // Helpers: find Python directories with wildcard matching // --------------------------------------------------------------------------- @@ -127,11 +173,8 @@ pub async fn find_python_dirs(base_path: &Path, segments: &[&str]) -> Vec Vec Vec { ) .await; - // Python.org framework + // Python.org framework: /Library/Frameworks/Python.framework/Versions/ + // holds bare version dirs (`3.11`, `3.12`, `Current`) — NOT `python3.X` + // — so the version segment must be matched with `*`, not `python3.*`. let fw_matches = find_python_dirs( - Path::new("/Library/Frameworks/Python.framework/Versions"), - &["python3.*", "lib", "python3.*", "site-packages"], - ) - .await; - for m in fw_matches { - add_path(m, &mut seen, &mut results); - } - - let fw_matches2 = find_python_dirs( Path::new("/Library/Frameworks/Python.framework"), &["Versions", "*", "lib", "python3.*", "site-packages"], ) .await; - for m in fw_matches2 { + for m in fw_matches { add_path(m, &mut seen, &mut results); } } @@ -371,11 +402,8 @@ pub async fn get_global_python_site_packages() -> Vec { .map(PathBuf::from) .unwrap_or_else(|_| PathBuf::from(&home_dir).join(".pyenv")); let pyenv_versions = pyenv_root.join("versions"); - let pyenv_matches = find_python_dirs( - &pyenv_versions, - &["*", "lib", "python3.*", "site-packages"], - ) - .await; + let pyenv_matches = + find_python_dirs(&pyenv_versions, &["*", "lib", "python3.*", "site-packages"]).await; for m in pyenv_matches { add_path(m, &mut seen, &mut results); } @@ -406,8 +434,7 @@ pub async fn get_global_python_site_packages() -> Vec { // %LOCALAPPDATA%\uv\tools if let Ok(local) = std::env::var("LOCALAPPDATA") { let uv_base = PathBuf::from(local).join("uv").join("tools"); - let uv_matches = - find_python_dirs(&uv_base, &["*", "Lib", "site-packages"]).await; + let uv_matches = find_python_dirs(&uv_base, &["*", "Lib", "site-packages"]).await; for m in uv_matches { add_path(m, &mut seen, &mut results); } @@ -452,8 +479,7 @@ pub async fn get_global_python_site_packages() -> Vec { { if let Ok(local) = std::env::var("LOCALAPPDATA") { let uv_python = PathBuf::from(local).join("uv").join("python"); - let uv_matches = - find_python_dirs(&uv_python, &["*", "Lib", "site-packages"]).await; + let uv_matches = find_python_dirs(&uv_python, &["*", "Lib", "site-packages"]).await; for m in uv_matches { add_path(m, &mut seen, &mut results); } @@ -522,7 +548,10 @@ impl PythonCrawler { /// Without the marker fallback, a fresh clone with /// `pyproject.toml` + `uv.lock` but no `.venv` would silently /// return zero packages. - pub async fn get_site_packages_paths(&self, options: &CrawlerOptions) -> Result, std::io::Error> { + pub async fn get_site_packages_paths( + &self, + options: &CrawlerOptions, + ) -> Result, std::io::Error> { if options.global || options.global_prefix.is_some() { if let Some(ref custom) = options.global_prefix { return Ok(vec![custom.clone()]); @@ -544,7 +573,10 @@ impl PythonCrawler { let mut packages = Vec::new(); let mut seen = HashSet::new(); - let sp_paths = self.get_site_packages_paths(options).await.unwrap_or_default(); + let sp_paths = self + .get_site_packages_paths(options) + .await + .unwrap_or_default(); for sp_path in &sp_paths { let found = self.scan_site_packages(sp_path, &mut seen).await; @@ -765,14 +797,146 @@ mod tests { assert!(read_python_metadata(&dist_info).await.is_none()); } + #[test] + fn test_parse_dist_info_dir_name() { + // Modern pip escapes `-` in the name to `_`. + assert_eq!( + parse_dist_info_dir_name("flask_sqlalchemy-3.0.5.dist-info"), + Some(("flask_sqlalchemy".to_string(), "3.0.5".to_string())) + ); + // Older pip kept the raw name with `-`; the final `-` is still the + // version boundary because a normalized version never contains `-`. + assert_eq!( + parse_dist_info_dir_name("Flask-SQLAlchemy-3.0.5.dist-info"), + Some(("Flask-SQLAlchemy".to_string(), "3.0.5".to_string())) + ); + assert_eq!( + parse_dist_info_dir_name("requests-2.28.0.dist-info"), + Some(("requests".to_string(), "2.28.0".to_string())) + ); + // No version segment, wrong suffix, and empty-name guards. + assert!(parse_dist_info_dir_name("noversion.dist-info").is_none()); + assert!(parse_dist_info_dir_name("requests-2.28.0.egg-info").is_none()); + assert!(parse_dist_info_dir_name("-1.0.dist-info").is_none()); + } + + /// A `.dist-info` directory whose `METADATA` is missing must still be + /// discoverable via the directory name — otherwise a corrupt/partial + /// install silently hides a package the crawler is meant to patch. + #[tokio::test] + async fn test_read_python_metadata_falls_back_to_dir_name() { + let dir = tempfile::tempdir().unwrap(); + let dist_info = dir.path().join("requests-2.28.0.dist-info"); + tokio::fs::create_dir_all(&dist_info).await.unwrap(); + // No METADATA file written at all. + let (name, version) = read_python_metadata(&dist_info).await.unwrap(); + assert_eq!(name, "requests"); + assert_eq!(version, "2.28.0"); + } + + /// Malformed METADATA (present but missing the `Version` header) also + /// falls back to the directory name rather than dropping the package. + #[tokio::test] + async fn test_read_python_metadata_falls_back_on_malformed() { + let dir = tempfile::tempdir().unwrap(); + let dist_info = dir.path().join("urllib3-2.0.7.dist-info"); + tokio::fs::create_dir_all(&dist_info).await.unwrap(); + tokio::fs::write( + dist_info.join("METADATA"), + "Metadata-Version: 2.1\nName: urllib3\n\nDescription body, no Version header\n", + ) + .await + .unwrap(); + let (name, version) = read_python_metadata(&dist_info).await.unwrap(); + assert_eq!(name, "urllib3"); + assert_eq!(version, "2.0.7"); + } + + /// A stray *file* named `*.dist-info` must NOT be surfaced as a package + /// via the directory-name fallback. + #[tokio::test] + async fn test_read_python_metadata_ignores_stray_file() { + let dir = tempfile::tempdir().unwrap(); + let stray = dir.path().join("ghost-1.0.dist-info"); + tokio::fs::write(&stray, b"not a dir").await.unwrap(); + assert!(read_python_metadata(&stray).await.is_none()); + } + + /// `crawl_all` recovers a package whose METADATA is missing by parsing + /// the `.dist-info` directory name. + #[tokio::test] + async fn test_crawl_all_recovers_metadata_less_package() { + let dir = tempfile::tempdir().unwrap(); + let venv = dir.path().join(".venv"); + #[cfg(windows)] + let sp = venv.join("Lib").join("site-packages"); + #[cfg(not(windows))] + let sp = venv.join("lib").join("python3.11").join("site-packages"); + tokio::fs::create_dir_all(&sp).await.unwrap(); + // dist-info dir exists but has no METADATA (partial install). + tokio::fs::create_dir_all(sp.join("flask_sqlalchemy-3.0.5.dist-info")) + .await + .unwrap(); + + let crawler = PythonCrawler::new(); + let options = CrawlerOptions { + cwd: dir.path().to_path_buf(), + global: false, + global_prefix: None, + batch_size: 100, + }; + let packages = crawler.crawl_all(&options).await; + assert_eq!(packages.len(), 1); + assert_eq!(packages[0].name, "flask-sqlalchemy"); + assert_eq!(packages[0].version, "3.0.5"); + assert_eq!(packages[0].purl, "pkg:pypi/flask-sqlalchemy@3.0.5"); + } + + /// Regression for the macOS Python.framework layout: the `Versions/` + /// directory holds bare version dirs (`3.11`), so the version segment + /// must be matched with `*`. A `python3.*` pattern matches nothing — + /// which is exactly the bug that was fixed. + #[tokio::test] + async fn test_find_python_dirs_framework_versions_layout() { + let dir = tempfile::tempdir().unwrap(); + let sp = dir + .path() + .join("Versions") + .join("3.11") + .join("lib") + .join("python3.11") + .join("site-packages"); + tokio::fs::create_dir_all(&sp).await.unwrap(); + + // Correct pattern (`*` for the version dir) finds it. + let ok = find_python_dirs( + &dir.path().join("Versions"), + &["*", "lib", "python3.*", "site-packages"], + ) + .await; + assert_eq!(ok.len(), 1); + assert_eq!(ok[0], sp); + + // The buggy pattern (`python3.*` for the version dir) matches nothing. + let buggy = find_python_dirs( + &dir.path().join("Versions"), + &["python3.*", "lib", "python3.*", "site-packages"], + ) + .await; + assert!(buggy.is_empty()); + } + #[tokio::test] async fn test_find_python_dirs_literal() { let dir = tempfile::tempdir().unwrap(); - let target = dir.path().join("lib").join("python3.11").join("site-packages"); + let target = dir + .path() + .join("lib") + .join("python3.11") + .join("site-packages"); tokio::fs::create_dir_all(&target).await.unwrap(); - let results = - find_python_dirs(dir.path(), &["lib", "python3.*", "site-packages"]).await; + let results = find_python_dirs(dir.path(), &["lib", "python3.*", "site-packages"]).await; assert_eq!(results.len(), 1); assert_eq!(results[0], target); } @@ -780,8 +944,16 @@ mod tests { #[tokio::test] async fn test_find_python_dirs_wildcard() { let dir = tempfile::tempdir().unwrap(); - let sp1 = dir.path().join("lib").join("python3.10").join("site-packages"); - let sp2 = dir.path().join("lib").join("python3.11").join("site-packages"); + let sp1 = dir + .path() + .join("lib") + .join("python3.10") + .join("site-packages"); + let sp2 = dir + .path() + .join("lib") + .join("python3.11") + .join("site-packages"); tokio::fs::create_dir_all(&sp1).await.unwrap(); tokio::fs::create_dir_all(&sp2).await.unwrap(); @@ -789,8 +961,7 @@ mod tests { let non_match = dir.path().join("lib").join("ruby3.0").join("site-packages"); tokio::fs::create_dir_all(&non_match).await.unwrap(); - let results = - find_python_dirs(dir.path(), &["lib", "python3.*", "site-packages"]).await; + let results = find_python_dirs(dir.path(), &["lib", "python3.*", "site-packages"]).await; assert_eq!(results.len(), 2); } diff --git a/crates/socket-patch-core/src/crawlers/ruby_crawler.rs b/crates/socket-patch-core/src/crawlers/ruby_crawler.rs index 048cbd09..e355c634 100644 --- a/crates/socket-patch-core/src/crawlers/ruby_crawler.rs +++ b/crates/socket-patch-core/src/crawlers/ruby_crawler.rs @@ -344,12 +344,7 @@ impl RubyCrawler { /// (`--x86_64-linux/`). Only one platform is installed /// per environment, so we return the exact dir when present, otherwise /// the first verifying `--*` directory. - async fn locate_gem_dir( - &self, - gem_path: &Path, - name: &str, - version: &str, - ) -> Option { + async fn locate_gem_dir(&self, gem_path: &Path, name: &str, version: &str) -> Option { let exact = gem_path.join(format!("{name}-{version}")); if self.verify_gem_at_path(&exact).await { return Some(exact); @@ -456,7 +451,9 @@ mod tests { async fn test_find_by_purls_gem() { let dir = tempfile::tempdir().unwrap(); let rails_dir = dir.path().join("rails-7.1.0"); - tokio::fs::create_dir_all(rails_dir.join("lib")).await.unwrap(); + tokio::fs::create_dir_all(rails_dir.join("lib")) + .await + .unwrap(); let crawler = RubyCrawler::new(); let purls = vec![ @@ -476,10 +473,14 @@ mod tests { // Create fake gem directories with lib/ let rails_dir = dir.path().join("rails-7.1.0"); - tokio::fs::create_dir_all(rails_dir.join("lib")).await.unwrap(); + tokio::fs::create_dir_all(rails_dir.join("lib")) + .await + .unwrap(); let nokogiri_dir = dir.path().join("nokogiri-1.16.5"); - tokio::fs::create_dir_all(nokogiri_dir.join("lib")).await.unwrap(); + tokio::fs::create_dir_all(nokogiri_dir.join("lib")) + .await + .unwrap(); let crawler = RubyCrawler::new(); let options = CrawlerOptions { @@ -520,7 +521,9 @@ mod tests { // Create a single gem directory let rails_dir = dir.path().join("rails-7.1.0"); - tokio::fs::create_dir_all(rails_dir.join("lib")).await.unwrap(); + tokio::fs::create_dir_all(rails_dir.join("lib")) + .await + .unwrap(); let crawler = RubyCrawler::new(); let options = CrawlerOptions { @@ -558,12 +561,96 @@ mod tests { assert!(!crawler.verify_gem_at_path(&gem_dir).await); } - /// `"-1.0.0"` — match_indices finds `i=0` (followed by `1`), - /// split_idx ends up Some(0), name slice is empty. The defensive - /// empty-name guard at the bottom of parse_dir_name_version - /// rejects rather than producing a `Gem("", "1.0.0")` ghost. + /// `"-1.0.0"` — match_indices finds `i=0` (followed by `1`), the + /// name slice is empty. The defensive empty-name guard at the + /// bottom of parse_dir_name_version rejects rather than producing + /// a `Gem("", "1.0.0")` ghost. #[test] fn test_parse_dir_name_version_empty_name_guard() { assert_eq!(RubyCrawler::parse_dir_name_version("-1.0.0"), None); } + + // ── platform-suffix resolution end-to-end ───────────────────── + + /// `find_by_purls` must resolve a base PURL to a platform gem dir + /// that carries a `-` suffix on disk. Exercises the + /// `locate_gem_dir` prefix-scan fallback, which the original + /// suite only covered for the exact (plain-platform) case. + #[tokio::test] + async fn find_by_purls_resolves_platform_suffixed_dir() { + let dir = tempfile::tempdir().unwrap(); + let plat_dir = dir.path().join("nokogiri-1.16.5-x86_64-linux"); + tokio::fs::create_dir_all(plat_dir.join("lib")) + .await + .unwrap(); + + let crawler = RubyCrawler::new(); + let purls = vec!["pkg:gem/nokogiri@1.16.5".to_string()]; + let result = crawler.find_by_purls(dir.path(), &purls).await.unwrap(); + + assert_eq!(result.len(), 1); + let pkg = result.get("pkg:gem/nokogiri@1.16.5").unwrap(); + assert_eq!(pkg.version, "1.16.5"); + assert_eq!(pkg.path, plat_dir); + } + + /// A base PURL must NOT resolve to a platform dir whose version is + /// merely a prefix of the requested one (`1.0` vs `1.0.0`). + #[tokio::test] + async fn find_by_purls_rejects_version_prefix_collision() { + let dir = tempfile::tempdir().unwrap(); + let plat_dir = dir.path().join("foo-1.0.0-x86_64-linux"); + tokio::fs::create_dir_all(plat_dir.join("lib")) + .await + .unwrap(); + + let crawler = RubyCrawler::new(); + // Request version "1.0" — must not match the installed "1.0.0". + let purls = vec!["pkg:gem/foo@1.0".to_string()]; + let result = crawler.find_by_purls(dir.path(), &purls).await.unwrap(); + assert!( + result.is_empty(), + "1.0 must not match foo-1.0.0-*; got {result:?}" + ); + } + + /// `crawl_all` must strip the platform suffix when building the + /// PURL while keeping `path` pointed at the real (platform) dir. + #[tokio::test] + async fn crawl_all_strips_platform_suffix() { + let dir = tempfile::tempdir().unwrap(); + let plat_dir = dir.path().join("nokogiri-1.16.5-arm64-darwin"); + tokio::fs::create_dir_all(plat_dir.join("lib")) + .await + .unwrap(); + + let crawler = RubyCrawler::new(); + let options = CrawlerOptions { + cwd: dir.path().to_path_buf(), + global: false, + global_prefix: Some(dir.path().to_path_buf()), + batch_size: 100, + }; + let packages = crawler.crawl_all(&options).await; + assert_eq!(packages.len(), 1); + assert_eq!(packages[0].purl, "pkg:gem/nokogiri@1.16.5"); + assert_eq!(packages[0].version, "1.16.5"); + assert_eq!(packages[0].path, plat_dir); + } + + /// A plain `-` dir must win over any platform + /// sibling when both are present (exact match short-circuits). + #[tokio::test] + async fn locate_gem_dir_prefers_exact_over_platform() { + let dir = tempfile::tempdir().unwrap(); + let exact = dir.path().join("rails-7.1.0"); + let plat = dir.path().join("rails-7.1.0-x86_64-linux"); + tokio::fs::create_dir_all(exact.join("lib")).await.unwrap(); + tokio::fs::create_dir_all(plat.join("lib")).await.unwrap(); + + let crawler = RubyCrawler::new(); + let purls = vec!["pkg:gem/rails@7.1.0".to_string()]; + let result = crawler.find_by_purls(dir.path(), &purls).await.unwrap(); + assert_eq!(result.get("pkg:gem/rails@7.1.0").unwrap().path, exact); + } } diff --git a/crates/socket-patch-core/src/crawlers/types.rs b/crates/socket-patch-core/src/crawlers/types.rs index 44925a6e..b58e0909 100644 --- a/crates/socket-patch-core/src/crawlers/types.rs +++ b/crates/socket-patch-core/src/crawlers/types.rs @@ -222,6 +222,91 @@ mod tests { assert_eq!(Ecosystem::from_purl("not-a-purl"), None); } + /// The matcher keys on `pkg:/` with the trailing slash. A type that + /// merely *starts with* a known type name (e.g. `npmlock`, `gemfire`) must + /// not be misclassified, and a type with no trailing slash is not a package + /// coordinate. This guards against someone loosening the prefix check. + #[test] + fn test_from_purl_requires_exact_type_with_slash() { + // Near-miss types that share a prefix with a real type. + assert_eq!(Ecosystem::from_purl("pkg:npmlock/foo@1.0"), None); + assert_eq!(Ecosystem::from_purl("pkg:gemfire/foo@1.0"), None); + assert_eq!(Ecosystem::from_purl("pkg:pypiserver/foo@1.0"), None); + // Type present but no trailing slash → not a coordinate. + assert_eq!(Ecosystem::from_purl("pkg:npm"), None); + assert_eq!(Ecosystem::from_purl("pkg:pypi"), None); + // Empty / scheme-only inputs. + assert_eq!(Ecosystem::from_purl(""), None); + assert_eq!(Ecosystem::from_purl("pkg:"), None); + } + + /// PURLs frequently carry qualifiers (`?artifact_id=`, `?platform=`, + /// `?classifier=&ext=`, `?repository_url=`). Classification keys off the + /// type prefix and must ignore anything after the coordinate. + #[test] + fn test_from_purl_ignores_qualifiers() { + assert_eq!( + Ecosystem::from_purl("pkg:npm/lodash@4.17.21?foo=bar"), + Some(Ecosystem::Npm) + ); + assert_eq!( + Ecosystem::from_purl( + "pkg:pypi/requests@2.28.0?artifact_id=requests-2.28.0-py3-none-any.whl" + ), + Some(Ecosystem::Pypi) + ); + assert_eq!( + Ecosystem::from_purl("pkg:gem/nokogiri@1.16.0?platform=x86_64-linux"), + Some(Ecosystem::Gem) + ); + } + + /// cli_name (the `--ecosystems` token) and display_name (user-facing) + /// intentionally diverge for several ecosystems. Lock the divergence so a + /// future "cleanup" can't accidentally collapse the two. + #[test] + fn test_cli_name_display_name_divergence() { + assert_eq!(Ecosystem::Pypi.cli_name(), "pypi"); + assert_eq!(Ecosystem::Pypi.display_name(), "python"); + assert_eq!(Ecosystem::Gem.cli_name(), "gem"); + assert_eq!(Ecosystem::Gem.display_name(), "ruby"); + #[cfg(feature = "golang")] + { + assert_eq!(Ecosystem::Golang.cli_name(), "golang"); + assert_eq!(Ecosystem::Golang.display_name(), "go"); + } + #[cfg(feature = "composer")] + { + assert_eq!(Ecosystem::Composer.cli_name(), "composer"); + assert_eq!(Ecosystem::Composer.display_name(), "php"); + } + } + + /// Every entry returned by `all()` must round-trip through `cli_name()` → + /// `from_purl(...)` so the dispatch tables can never drift apart silently. + #[test] + fn test_all_ecosystems_self_consistent() { + for eco in Ecosystem::all() { + // Names are non-empty and stable. + assert!(!eco.cli_name().is_empty()); + assert!(!eco.display_name().is_empty()); + // A synthetic PURL built from the type re-classifies to itself. + // Deno is the one type whose PURL token (`jsr`) differs from its + // cli_name (`deno`), so it is exercised separately below. + #[cfg(feature = "deno")] + if *eco == Ecosystem::Deno { + continue; + } + let purl = format!("pkg:{}/example@1.0.0", eco.cli_name()); + assert_eq!( + Ecosystem::from_purl(&purl), + Some(*eco), + "round-trip failed for {}", + eco.cli_name() + ); + } + } + #[cfg(feature = "cargo")] #[test] fn test_from_purl_cargo() { @@ -295,6 +380,34 @@ mod tests { assert!(!Ecosystem::Cargo.supports_release_variants()); #[cfg(feature = "nuget")] assert!(!Ecosystem::Nuget.supports_release_variants()); + #[cfg(feature = "golang")] + assert!(!Ecosystem::Golang.supports_release_variants()); + #[cfg(feature = "composer")] + assert!(!Ecosystem::Composer.supports_release_variants()); + #[cfg(feature = "deno")] + assert!(!Ecosystem::Deno.supports_release_variants()); + } + + #[cfg(feature = "deno")] + #[test] + fn test_from_purl_deno_jsr() { + // JSR packages use the `pkg:jsr/` type but route to Ecosystem::Deno. + assert_eq!( + Ecosystem::from_purl("pkg:jsr/@std/path@0.220.0"), + Some(Ecosystem::Deno) + ); + // There is no `pkg:deno/` type; deno's npm-layout packages stay npm. + assert_eq!( + Ecosystem::from_purl("pkg:npm/chalk@5.3.0"), + Some(Ecosystem::Npm) + ); + } + + #[cfg(feature = "deno")] + #[test] + fn test_deno_properties() { + assert_eq!(Ecosystem::Deno.cli_name(), "deno"); + assert_eq!(Ecosystem::Deno.display_name(), "deno"); } #[test] diff --git a/crates/socket-patch-core/src/hash/git_sha256.rs b/crates/socket-patch-core/src/hash/git_sha256.rs index b4ccd429..4597c8c9 100644 --- a/crates/socket-patch-core/src/hash/git_sha256.rs +++ b/crates/socket-patch-core/src/hash/git_sha256.rs @@ -16,6 +16,14 @@ pub fn compute_git_sha256_from_bytes(data: &[u8]) -> String { /// Compute Git-compatible SHA256 hash from an async reader with known size. /// /// This streams the content through the hasher without loading it all into memory. +/// +/// The `size` is written into the Git object header *before* the body is read, +/// so it must match the number of bytes the reader actually yields. If it does +/// not (for example, the underlying file was truncated or extended between the +/// time its size was measured and the time it was read), the resulting hash +/// would correspond to no real Git object. Rather than silently return a +/// corrupt hash, this function reports an [`io::Error`] when the byte count +/// disagrees with `size`. pub async fn compute_git_sha256_from_reader( size: u64, mut reader: R, @@ -25,12 +33,23 @@ pub async fn compute_git_sha256_from_reader( hasher.update(header.as_bytes()); let mut buf = [0u8; 8192]; + let mut total: u64 = 0; loop { let n = reader.read(&mut buf).await?; if n == 0 { break; } hasher.update(&buf[..n]); + total += n as u64; + } + + if total != size { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!( + "git sha256: declared size {size} does not match {total} bytes read from stream" + ), + )); } Ok(hex::encode(hasher.finalize())) @@ -49,6 +68,24 @@ mod tests { assert_eq!(hash, compute_git_sha256_from_bytes(b"")); } + /// Known-answer vectors computed with the actual Git SHA256 object format + /// (`SHA256("blob \0")`). These pin the algorithm to real + /// Git output so a regression cannot hide behind the self-consistent + /// reader-vs-bytes comparisons elsewhere in this module. + #[test] + fn test_git_known_answer_vectors() { + // `printf 'blob 0\0' | shasum -a 256` + assert_eq!( + compute_git_sha256_from_bytes(b""), + "473a0f4c3be8a93681a267e3b1e9a7dcda1185436fe141f7749120a303721813", + ); + // `printf 'blob 13\0Hello, World!' | shasum -a 256` + assert_eq!( + compute_git_sha256_from_bytes(b"Hello, World!"), + "e118a058f018dda253bb692320c940091b15e4f19067e12fff110606a111f5da", + ); + } + #[test] fn test_hello_world() { let content = b"Hello, World!"; @@ -79,11 +116,52 @@ mod tests { let sync_hash = compute_git_sha256_from_bytes(content); let cursor = tokio::io::BufReader::new(&content[..]); - let async_hash = - compute_git_sha256_from_reader(content.len() as u64, cursor) - .await - .unwrap(); + let async_hash = compute_git_sha256_from_reader(content.len() as u64, cursor) + .await + .unwrap(); assert_eq!(sync_hash, async_hash); } + + /// Exercise the streaming loop across many buffer-sized reads (the 8192 + /// byte buffer is filled multiple times). Guards against off-by-one or + /// partial-read mistakes in the chunked update loop. + #[tokio::test] + async fn test_async_reader_multiple_chunks() { + let content: Vec = (0..50_000u32).map(|i| (i % 251) as u8).collect(); + let sync_hash = compute_git_sha256_from_bytes(&content); + + let cursor = tokio::io::BufReader::new(&content[..]); + let async_hash = compute_git_sha256_from_reader(content.len() as u64, cursor) + .await + .unwrap(); + + assert_eq!(sync_hash, async_hash); + } + + /// A declared size larger than the stream (e.g. the file was truncated + /// after its size was measured) must be reported as an error, not hashed + /// into a silently-corrupt object id. + #[tokio::test] + async fn test_async_reader_size_too_large_errors() { + let content = b"short"; + let cursor = tokio::io::BufReader::new(&content[..]); + let result = compute_git_sha256_from_reader(content.len() as u64 + 100, cursor).await; + + let err = result.expect_err("size larger than stream must error"); + assert_eq!(err.kind(), io::ErrorKind::InvalidData); + } + + /// A declared size smaller than the stream (e.g. the file grew after its + /// size was measured) must likewise be reported rather than producing a + /// hash whose header disagrees with its body. + #[tokio::test] + async fn test_async_reader_size_too_small_errors() { + let content = b"this stream is longer than declared"; + let cursor = tokio::io::BufReader::new(&content[..]); + let result = compute_git_sha256_from_reader(4, cursor).await; + + let err = result.expect_err("size smaller than stream must error"); + assert_eq!(err.kind(), io::ErrorKind::InvalidData); + } } diff --git a/crates/socket-patch-core/src/manifest/operations.rs b/crates/socket-patch-core/src/manifest/operations.rs index 1aa78af1..64620a2c 100644 --- a/crates/socket-patch-core/src/manifest/operations.rs +++ b/crates/socket-patch-core/src/manifest/operations.rs @@ -53,29 +53,30 @@ pub fn validate_manifest(value: &serde_json::Value) -> Result) -> Result, std::io::Error> { +pub async fn read_manifest( + path: impl AsRef, +) -> Result, std::io::Error> { let path = path.as_ref(); let content = match tokio::fs::read_to_string(path).await { Ok(c) => c, Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(None), - Err(e) => return Err(e), // FIX: propagate actual I/O error + Err(e) => return Err(e), // FIX: propagate actual I/O error }; let parsed: serde_json::Value = match serde_json::from_str(&content) { Ok(v) => v, - Err(e) => return Err(std::io::Error::new( - std::io::ErrorKind::InvalidData, - format!("Failed to parse manifest JSON: {}", e), - )), + Err(e) => { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!("Failed to parse manifest JSON: {}", e), + )) + } }; match validate_manifest(&parsed) { Ok(manifest) => Ok(Some(manifest)), - Err(e) => Err(std::io::Error::new( - std::io::ErrorKind::InvalidData, - e, - )), + Err(e) => Err(std::io::Error::new(std::io::ErrorKind::InvalidData, e)), } } @@ -98,18 +99,12 @@ mod tests { const TEST_UUID_1: &str = "11111111-1111-4111-8111-111111111111"; const TEST_UUID_2: &str = "22222222-2222-4222-8222-222222222222"; - const BEFORE_HASH_1: &str = - "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1111"; - const AFTER_HASH_1: &str = - "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb1111"; - const BEFORE_HASH_2: &str = - "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc2222"; - const AFTER_HASH_2: &str = - "dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd2222"; - const BEFORE_HASH_3: &str = - "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee3333"; - const AFTER_HASH_3: &str = - "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff3333"; + const BEFORE_HASH_1: &str = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1111"; + const AFTER_HASH_1: &str = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb1111"; + const BEFORE_HASH_2: &str = "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc2222"; + const AFTER_HASH_2: &str = "dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd2222"; + const BEFORE_HASH_3: &str = "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee3333"; + const AFTER_HASH_3: &str = "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff3333"; fn create_test_manifest() -> PatchManifest { let mut patches = HashMap::new(); @@ -210,7 +205,6 @@ mod tests { assert_eq!(blobs.len(), 0); } - #[test] fn test_validate_manifest_valid() { let json = serde_json::json!({ @@ -264,6 +258,106 @@ mod tests { assert!(result.unwrap().is_none()); } + // Regression: a missing file maps to Ok(None), but malformed JSON must + // surface as an InvalidData error -- NOT be silently swallowed as Ok(None). + // The original implementation returned Ok(None) for every failure mode, + // which hid corrupt manifests from callers. + #[tokio::test] + async fn test_read_manifest_malformed_json_is_error() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("manifest.json"); + tokio::fs::write(&path, "{ not valid json").await.unwrap(); + + let result = read_manifest(&path).await; + assert!( + result.is_err(), + "malformed JSON must be an error, not Ok(None)" + ); + assert_eq!(result.unwrap_err().kind(), std::io::ErrorKind::InvalidData); + } + + // Regression: well-formed JSON that doesn't satisfy the schema (missing + // required fields) must also surface as an InvalidData error. + #[tokio::test] + async fn test_read_manifest_invalid_schema_is_error() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("manifest.json"); + // Valid JSON, but `patches` has the wrong shape. + tokio::fs::write(&path, r#"{"patches": "not-an-object"}"#) + .await + .unwrap(); + + let result = read_manifest(&path).await; + assert!( + result.is_err(), + "schema-invalid manifest must be an error, not Ok(None)" + ); + assert_eq!(result.unwrap_err().kind(), std::io::ErrorKind::InvalidData); + } + + // Regression: the two blob extractors must not be swapped. Each must return + // exactly its own side of the hash pair with zero cross-contamination. + #[test] + fn test_blob_extractors_do_not_cross_contaminate() { + let manifest = create_test_manifest(); + let after = get_after_hash_blobs(&manifest); + let before = get_before_hash_blobs(&manifest); + + // The two sets are disjoint for this fixture. + assert!(after.is_disjoint(&before)); + // Every after-blob is an afterHash from the fixture, never a beforeHash. + for b in [BEFORE_HASH_1, BEFORE_HASH_2, BEFORE_HASH_3] { + assert!(!after.contains(b)); + } + for a in [AFTER_HASH_1, AFTER_HASH_2, AFTER_HASH_3] { + assert!(!before.contains(a)); + } + } + + // Regression: a non-NotFound I/O error must propagate as Err -- it must NOT + // be collapsed into Ok(None). Only a genuinely-missing file is Ok(None). + // Reading a directory as if it were a file produces such an I/O error, which + // directly exercises the `Err(e) => return Err(e)` arm. (The malformed-JSON + // and invalid-schema tests cover the parse/validate arms but not this one.) + #[tokio::test] + async fn test_read_manifest_io_error_propagates() { + let dir = tempfile::tempdir().unwrap(); + // Path exists but is a directory, so read_to_string fails with an I/O + // error whose kind is NOT NotFound. + let result = read_manifest(dir.path()).await; + assert!( + result.is_err(), + "a non-NotFound I/O error must surface as Err, not Ok(None)" + ); + assert_ne!( + result.unwrap_err().kind(), + std::io::ErrorKind::NotFound, + "an existing-but-unreadable path is not a 'missing file'" + ); + } + + // Regression: write_manifest -> read_manifest must preserve the full record, + // not merely the patch count. Guards against a serializer that drops nested + // fields (file hashes, vulnerabilities) while still round-tripping the keys. + #[tokio::test] + async fn test_write_manifest_preserves_full_content() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("manifest.json"); + + let manifest = create_test_manifest(); + write_manifest(&path, &manifest).await.unwrap(); + + let read_back = read_manifest(&path).await.unwrap().unwrap(); + // Deep equality: every patch, file, hash, and vulnerability survives. + assert_eq!(read_back, manifest); + + // Spot-check a nested hash to make the intent explicit. + let record = read_back.patches.get("pkg:npm/pkg-a@1.0.0").unwrap(); + let file_info = record.files.get("package/index.js").unwrap(); + assert_eq!(file_info.before_hash, BEFORE_HASH_1); + assert_eq!(file_info.after_hash, AFTER_HASH_1); + } + #[tokio::test] async fn test_write_and_read_manifest() { let dir = tempfile::tempdir().unwrap(); diff --git a/crates/socket-patch-core/src/manifest/schema.rs b/crates/socket-patch-core/src/manifest/schema.rs index bfd7fe3f..8ef03991 100644 --- a/crates/socket-patch-core/src/manifest/schema.rs +++ b/crates/socket-patch-core/src/manifest/schema.rs @@ -99,7 +99,10 @@ mod tests { let manifest: PatchManifest = serde_json::from_str(json).unwrap(); assert_eq!(manifest.patches.len(), 1); - let patch = manifest.patches.get("pkg:npm/simplehttpserver@0.0.6").unwrap(); + let patch = manifest + .patches + .get("pkg:npm/simplehttpserver@0.0.6") + .unwrap(); assert_eq!(patch.uuid, "12345678-1234-1234-1234-123456789abc"); assert_eq!(patch.files.len(), 1); assert_eq!(patch.vulnerabilities.len(), 1); @@ -149,4 +152,173 @@ mod tests { assert!(json.contains("exportedAt")); assert!(!json.contains("exported_at")); } + + // ── Regression: pin the on-the-wire JSON contract with the TS schema ── + // + // schema.rs is a pure serde DTO whose only job is to match the + // camelCase shape that the legacy TS tool (manifest-schema.ts) reads and + // writes. The tests below lock that contract so a dropped or mistyped + // `rename_all`, a renamed field, or a removed field fails loudly rather + // than silently producing a manifest the TS tooling can't read. + + // The camelCase rename must be ENFORCED on input, not merely emitted on + // output. A manifest carrying snake_case keys (as a naive serializer + // without `rename_all` would produce) must be rejected, otherwise the two + // implementations could silently drift apart. + #[test] + fn test_patch_file_info_rejects_snake_case_keys() { + let snake = r#"{"before_hash": "a", "after_hash": "b"}"#; + assert!( + serde_json::from_str::(snake).is_err(), + "snake_case keys must not deserialize -- the wire contract is camelCase" + ); + + let camel = r#"{"beforeHash": "a", "afterHash": "b"}"#; + let parsed: PatchFileInfo = serde_json::from_str(camel).unwrap(); + assert_eq!(parsed.before_hash, "a"); + assert_eq!(parsed.after_hash, "b"); + } + + // Likewise for `exportedAt` on a record: snake_case must be rejected. + #[test] + fn test_patch_record_rejects_snake_case_exported_at() { + let json = r#"{ + "uuid": "11111111-1111-4111-8111-111111111111", + "exported_at": "2024-01-01T00:00:00Z", + "files": {}, + "vulnerabilities": {}, + "description": "d", + "license": "MIT", + "tier": "free" + }"#; + assert!( + serde_json::from_str::(json).is_err(), + "exported_at must be rejected; the contract field is exportedAt" + ); + } + + // VulnerabilityInfo intentionally has NO `rename_all` (all fields are + // single lowercase words). Pin its exact keys so nobody "helpfully" adds a + // rename that would break the contract, and exercise an empty `cves` array + // (the medium-severity shape from the TS test suite). + #[test] + fn test_vulnerability_info_exact_keys_and_empty_cves() { + let json = r#"{ + "cves": [], + "summary": "Some vuln", + "severity": "medium", + "description": "A medium severity vulnerability" + }"#; + let vuln: VulnerabilityInfo = serde_json::from_str(json).unwrap(); + assert!(vuln.cves.is_empty()); + assert_eq!(vuln.severity, "medium"); + + let serialized = serde_json::to_string(&vuln).unwrap(); + for key in ["\"cves\"", "\"summary\"", "\"severity\"", "\"description\""] { + assert!(serialized.contains(key), "missing key {key}"); + } + } + + // Every PatchRecord field is required (mirroring the TS zod schema, which + // rejects records missing any field). Dropping any one must fail. + #[test] + fn test_patch_record_requires_all_fields() { + // A complete record, used as the baseline. + let complete = serde_json::json!({ + "uuid": "11111111-1111-4111-8111-111111111111", + "exportedAt": "2024-01-01T00:00:00Z", + "files": {}, + "vulnerabilities": {}, + "description": "d", + "license": "MIT", + "tier": "free" + }); + assert!(serde_json::from_value::(complete.clone()).is_ok()); + + for field in [ + "uuid", + "exportedAt", + "files", + "vulnerabilities", + "description", + "license", + "tier", + ] { + let mut partial = complete.clone(); + partial.as_object_mut().unwrap().remove(field); + assert!( + serde_json::from_value::(partial).is_err(), + "a record missing `{field}` must be rejected" + ); + } + } + + // A multi-patch manifest mirroring the TS test suite (a free/MIT patch and + // a paid/Apache-2.0 patch) must survive a full deserialize -> serialize -> + // deserialize round-trip with deep equality, guarding against a serializer + // that drops nested records, files, or vulnerabilities. + #[test] + fn test_multi_patch_manifest_deep_roundtrip() { + let json = r#"{ + "patches": { + "pkg:npm/pkg-a@1.0.0": { + "uuid": "550e8400-e29b-41d4-a716-446655440001", + "exportedAt": "2024-01-01T00:00:00Z", + "files": { + "package/lib/index.js": { "beforeHash": "aaa", "afterHash": "bbb" } + }, + "vulnerabilities": {}, + "description": "Patch A", + "license": "MIT", + "tier": "free" + }, + "pkg:npm/pkg-b@2.0.0": { + "uuid": "550e8400-e29b-41d4-a716-446655440002", + "exportedAt": "2024-02-01T00:00:00Z", + "files": { + "package/src/main.js": { "beforeHash": "ccc", "afterHash": "ddd" } + }, + "vulnerabilities": { + "GHSA-xxxx-yyyy-zzzz": { + "cves": [], + "summary": "Some vuln", + "severity": "medium", + "description": "A medium severity vulnerability" + } + }, + "description": "Patch B", + "license": "Apache-2.0", + "tier": "paid" + } + } +}"#; + + let manifest: PatchManifest = serde_json::from_str(json).unwrap(); + assert_eq!(manifest.patches.len(), 2); + + let serialized = serde_json::to_string_pretty(&manifest).unwrap(); + let reparsed: PatchManifest = serde_json::from_str(&serialized).unwrap(); + assert_eq!(manifest, reparsed); + + let b = reparsed.patches.get("pkg:npm/pkg-b@2.0.0").unwrap(); + assert_eq!(b.license, "Apache-2.0"); + assert_eq!(b.tier, "paid"); + assert_eq!(b.vulnerabilities.len(), 1); + assert!(b + .vulnerabilities + .get("GHSA-xxxx-yyyy-zzzz") + .unwrap() + .cves + .is_empty()); + } + + // A manifest missing the top-level `patches` key must be rejected (the TS + // schema requires it; `{}` is not a valid manifest). + #[test] + fn test_manifest_requires_patches_field() { + assert!( + serde_json::from_str::("{}").is_err(), + "a manifest without a `patches` field must be rejected" + ); + } } diff --git a/crates/socket-patch-core/src/package_json/detect.rs b/crates/socket-patch-core/src/package_json/detect.rs index e90f742b..2e499e6d 100644 --- a/crates/socket-patch-core/src/package_json/detect.rs +++ b/crates/socket-patch-core/src/package_json/detect.rs @@ -111,15 +111,23 @@ pub fn update_package_json_object( let status = is_setup_configured(package_json); if !status.needs_update { - return ( - false, - status.postinstall_script, - status.dependencies_script, - ); + return (false, status.postinstall_script, status.dependencies_script); } - // Ensure scripts object exists - if package_json.get("scripts").is_none() { + // We can only attach scripts to an object root. Anything else (array, + // string, number, bool, null) cannot hold a "scripts" key, so indexing it + // below would panic. Bail out as a no-op instead. + if !package_json.is_object() { + return (false, status.postinstall_script, status.dependencies_script); + } + + // Ensure `scripts` exists *and* is an object. A present-but-non-object + // `scripts` (e.g. a string or array) would otherwise panic when indexed. + if !package_json + .get("scripts") + .map(serde_json::Value::is_object) + .unwrap_or(false) + { package_json["scripts"] = serde_json::json!({}); } @@ -156,6 +164,20 @@ pub fn update_package_json_content( let mut package_json: serde_json::Value = serde_json::from_str(content).map_err(|e| format!("Invalid package.json: {e}"))?; + // A package.json must be a JSON object; otherwise there is nowhere to add + // lifecycle scripts. + if !package_json.is_object() { + return Err("Invalid package.json: root is not a JSON object".to_string()); + } + + // Refuse to clobber a malformed (present but non-object) `scripts` value. + // `null` is treated as absent and replaced with a fresh object downstream. + if let Some(scripts) = package_json.get("scripts") { + if !scripts.is_null() && !scripts.is_object() { + return Err("Invalid package.json: \"scripts\" is not a JSON object".to_string()); + } + } + let status = is_setup_configured(&package_json); if !status.needs_update { @@ -172,8 +194,7 @@ pub fn update_package_json_content( let old_postinstall = status.postinstall_script.clone(); let old_dependencies = status.dependencies_script.clone(); - let (_, new_postinstall, new_dependencies) = - update_package_json_object(&mut package_json, pm); + let (_, new_postinstall, new_dependencies) = update_package_json_object(&mut package_json, pm); let new_content = serde_json::to_string_pretty(&package_json).unwrap() + "\n"; Ok(( @@ -280,7 +301,8 @@ mod tests { #[test] fn test_configured_str_legacy_npx_pattern() { - let content = r#"{"scripts":{"postinstall":"npx @socketsecurity/socket-patch apply --silent"}}"#; + let content = + r#"{"scripts":{"postinstall":"npx @socketsecurity/socket-patch apply --silent"}}"#; let status = is_setup_configured_str(content); assert!(status.postinstall_configured); } @@ -440,6 +462,90 @@ mod tests { assert!(result.unwrap_err().contains("Invalid package.json")); } + #[test] + fn test_update_object_scripts_is_string_does_not_panic() { + // Regression: a present-but-non-object `scripts` previously panicked + // when indexed (`cannot access key "postinstall" in JSON string`). + let mut pkg: serde_json::Value = serde_json::json!({ + "name": "test", + "scripts": "build" + }); + let (modified, _, _) = update_package_json_object(&mut pkg, PackageManager::Npm); + // Root is an object but `scripts` is malformed; the object-level helper + // replaces it rather than panicking. + assert!(modified); + assert!(pkg["scripts"]["postinstall"].is_string()); + assert!(pkg["scripts"]["dependencies"].is_string()); + } + + #[test] + fn test_update_object_scripts_is_array_does_not_panic() { + let mut pkg: serde_json::Value = serde_json::json!({ + "name": "test", + "scripts": ["build"] + }); + let (modified, _, _) = update_package_json_object(&mut pkg, PackageManager::Npm); + assert!(modified); + assert!(pkg["scripts"].is_object()); + } + + #[test] + fn test_update_object_scripts_is_null() { + // `null` scripts is treated as absent and replaced with an object. + let mut pkg: serde_json::Value = serde_json::json!({ + "name": "test", + "scripts": null + }); + let (modified, _, _) = update_package_json_object(&mut pkg, PackageManager::Npm); + assert!(modified); + assert!(pkg["scripts"]["postinstall"].is_string()); + } + + #[test] + fn test_update_object_non_object_root_is_noop() { + // Regression: a non-object root previously panicked on `["scripts"] = ...`. + let mut arr: serde_json::Value = serde_json::json!([1, 2, 3]); + let (modified, _, _) = update_package_json_object(&mut arr, PackageManager::Npm); + assert!(!modified); + assert_eq!(arr, serde_json::json!([1, 2, 3])); + } + + #[test] + fn test_update_content_non_object_root_errors() { + // Regression: valid JSON that is not an object must error, not panic. + for content in ["[1,2,3]", "42", "\"hello\"", "true", "null"] { + let result = update_package_json_content(content, PackageManager::Npm); + assert!(result.is_err(), "expected error for content {content:?}"); + assert!(result.unwrap_err().contains("root is not a JSON object")); + } + } + + #[test] + fn test_update_content_non_object_scripts_errors() { + // Regression: a present-but-non-object `scripts` must error rather than + // silently clobbering the user's value or panicking. + let content = r#"{"name":"test","scripts":"build"}"#; + let result = update_package_json_content(content, PackageManager::Npm); + assert!(result.is_err()); + assert!(result + .unwrap_err() + .contains("\"scripts\" is not a JSON object")); + } + + #[test] + fn test_update_content_null_scripts_creates_object() { + // `null` scripts is benign: treated as absent and populated. + let content = r#"{"name":"test","scripts":null}"#; + let (modified, new_content, _, new_pi, _, new_dep) = + update_package_json_content(content, PackageManager::Npm).unwrap(); + assert!(modified); + assert!(new_pi.contains("npx @socketsecurity/socket-patch apply")); + assert!(new_dep.contains("npx @socketsecurity/socket-patch apply")); + let parsed: serde_json::Value = serde_json::from_str(&new_content).unwrap(); + assert!(parsed["scripts"]["postinstall"].is_string()); + assert!(parsed["scripts"]["dependencies"].is_string()); + } + #[test] fn test_update_content_pnpm() { let content = r#"{"name": "test"}"#; diff --git a/crates/socket-patch-core/src/package_json/find.rs b/crates/socket-patch-core/src/package_json/find.rs index b72c5c7f..f4487b12 100644 --- a/crates/socket-patch-core/src/package_json/find.rs +++ b/crates/socket-patch-core/src/package_json/find.rs @@ -46,9 +46,7 @@ pub struct PackageJsonFindResult { } /// Find all package.json files, respecting workspace configurations. -pub async fn find_package_json_files( - start_path: &Path, -) -> PackageJsonFindResult { +pub async fn find_package_json_files(start_path: &Path) -> PackageJsonFindResult { let mut results = Vec::new(); let root_package_json = start_path.join("package.json"); @@ -77,12 +75,18 @@ pub async fn find_package_json_files( } } _ => { - let ws_packages = - find_workspace_packages(start_path, &workspace_config).await; + let ws_packages = find_workspace_packages(start_path, &workspace_config).await; results.extend(ws_packages); } } + // Workspace patterns can overlap (e.g. "packages/*" and "packages/a", or a + // glob plus an exact path), which would otherwise yield the same + // package.json more than once. De-duplicate by path, preserving discovery + // order so the root entry stays first. + let mut seen = std::collections::HashSet::new(); + results.retain(|loc| seen.insert(loc.path.clone())); + PackageJsonFindResult { files: results, workspace_type: workspace_config.ws_type, @@ -96,19 +100,13 @@ pub async fn detect_workspaces(package_json_path: &Path) -> WorkspaceConfig { patterns: Vec::new(), }; - let content = match fs::read_to_string(package_json_path).await { - Ok(c) => c, - Err(_) => return default, - }; - - let pkg: serde_json::Value = match serde_json::from_str(&content) { - Ok(v) => v, - Err(_) => return default, - }; - // Check for pnpm workspaces first — pnpm projects may also have - // "workspaces" in package.json for compatibility, but - // pnpm-workspace.yaml is the definitive signal. + // "workspaces" in package.json for compatibility, but pnpm-workspace.yaml + // is the definitive signal. It lives next to package.json and does not + // depend on package.json being present or even valid JSON, so it must be + // checked *before* parsing package.json — otherwise a malformed (e.g. + // JSONC, or simply broken) root manifest would wrongly demote a real pnpm + // workspace to "no workspace". let dir = package_json_path.parent().unwrap_or(Path::new(".")); let pnpm_workspace = dir.join("pnpm-workspace.yaml"); if let Ok(yaml_content) = fs::read_to_string(&pnpm_workspace).await { @@ -119,6 +117,16 @@ pub async fn detect_workspaces(package_json_path: &Path) -> WorkspaceConfig { }; } + let content = match fs::read_to_string(package_json_path).await { + Ok(c) => c, + Err(_) => return default, + }; + + let pkg: serde_json::Value = match serde_json::from_str(&content) { + Ok(v) => v, + Err(_) => return default, + }; + // Check for npm/yarn workspaces if let Some(workspaces) = pkg.get("workspaces") { let patterns = if let Some(arr) = workspaces.as_array() { @@ -161,17 +169,14 @@ fn parse_pnpm_workspace_patterns(yaml_content: &str) -> Vec { } if in_packages { - if !trimmed.is_empty() - && !trimmed.starts_with('-') - && !trimmed.starts_with('#') - { + if !trimmed.is_empty() && !trimmed.starts_with('-') && !trimmed.starts_with('#') { break; } if let Some(rest) = trimmed.strip_prefix('-') { - let item = rest.trim().trim_matches('\'').trim_matches('"'); + let item = parse_yaml_list_value(rest); if !item.is_empty() { - patterns.push(item.to_string()); + patterns.push(item); } } } @@ -180,6 +185,33 @@ fn parse_pnpm_workspace_patterns(yaml_content: &str) -> Vec { patterns } +/// Extract the scalar value of a YAML list item, handling surrounding quotes +/// and trailing inline comments (`# ...`). +fn parse_yaml_list_value(raw: &str) -> String { + let s = raw.trim(); + + // Quoted scalar: take the content between the first matching pair of + // quotes. Anything after the closing quote (e.g. an inline comment) is + // ignored, and a `#` inside the quotes stays part of the value. + for q in ['\'', '"'] { + if let Some(rest) = s.strip_prefix(q) { + if let Some(end) = rest.find(q) { + return rest[..end].to_string(); + } + } + } + + // Unquoted scalar: a `#` preceded by whitespace begins an inline comment. + let bytes = s.as_bytes(); + let comment_start = + (1..bytes.len()).find(|&i| bytes[i] == b'#' && bytes[i - 1].is_ascii_whitespace()); + let value = match comment_start { + Some(idx) => &s[..idx], + None => s, + }; + value.trim().to_string() +} + /// Find workspace packages based on workspace patterns. async fn find_workspace_packages( root_path: &Path, @@ -203,29 +235,45 @@ async fn find_workspace_packages( } /// Find packages matching a workspace pattern. -async fn find_packages_matching_pattern( - root_path: &Path, - pattern: &str, -) -> Vec { +async fn find_packages_matching_pattern(root_path: &Path, pattern: &str) -> Vec { let mut results = Vec::new(); - let parts: Vec<&str> = pattern.split('/').collect(); - - if parts.len() == 2 && parts[1] == "*" { - let search_path = root_path.join(parts[0]); - search_one_level(&search_path, &mut results).await; - } else if parts.len() == 2 && parts[1] == "**" { - let search_path = root_path.join(parts[0]); - search_recursive(&search_path, &mut results).await; - } else { - let pkg_json = root_path.join(pattern).join("package.json"); - if fs::metadata(&pkg_json).await.is_ok() { - results.push(pkg_json); + + // A trailing `*`/`**` segment is a glob; everything before the final `/` + // is a (possibly empty, possibly multi-segment) directory prefix. Split on + // the *last* `/` so bare globs (`*`, `**`) and deeper prefixes (`a/b/*`) + // are handled, not just the two-segment `prefix/*` form. + let (prefix, last) = pattern.rsplit_once('/').unwrap_or(("", pattern)); + + match last { + "*" | "**" => { + let search_path = if prefix.is_empty() { + root_path.to_path_buf() + } else { + root_path.join(prefix) + }; + if last == "*" { + search_one_level(&search_path, &mut results).await; + } else { + search_recursive(&search_path, &mut results).await; + } + } + _ => { + let pkg_json = root_path.join(pattern).join("package.json"); + if fs::metadata(&pkg_json).await.is_ok() { + results.push(pkg_json); + } } } results } +/// Directories that are never workspace members and must be skipped while +/// walking the tree (hidden dirs plus dependency/output directories). +fn is_ignored_dir(name: &str) -> bool { + name.starts_with('.') || name == "node_modules" || name == "dist" || name == "build" +} + /// Search one level deep for package.json files. async fn search_one_level(dir: &Path, results: &mut Vec) { let mut entries = match fs::read_dir(dir).await { @@ -241,6 +289,11 @@ async fn search_one_level(dir: &Path, results: &mut Vec) { if !ft.is_dir() { continue; } + // A `dir/*` pattern must not pick up node_modules/hidden/output dirs as + // workspace members, matching the recursive searchers below. + if is_ignored_dir(&entry.file_name().to_string_lossy()) { + continue; + } let pkg_json = entry.path().join("package.json"); if fs::metadata(&pkg_json).await.is_ok() { results.push(pkg_json); @@ -268,11 +321,7 @@ async fn search_recursive(dir: &Path, results: &mut Vec) { let name_str = name.to_string_lossy(); // Skip hidden directories, node_modules, dist, build - if name_str.starts_with('.') - || name_str == "node_modules" - || name_str == "dist" - || name_str == "build" - { + if is_ignored_dir(&name_str) { continue; } @@ -287,9 +336,7 @@ async fn search_recursive(dir: &Path, results: &mut Vec) { } /// Find nested package.json files without workspace configuration. -async fn find_nested_package_json_files( - start_path: &Path, -) -> Vec { +async fn find_nested_package_json_files(start_path: &Path) -> Vec { let mut results = Vec::new(); let root_pkg = start_path.join("package.json"); search_nested(start_path, &root_pkg, 0, &mut results).await; @@ -323,11 +370,7 @@ async fn search_nested( let name = entry.file_name(); let name_str = name.to_string_lossy(); - if name_str.starts_with('.') - || name_str == "node_modules" - || name_str == "dist" - || name_str == "build" - { + if is_ignored_dir(&name_str) { continue; } @@ -457,9 +500,7 @@ mod tests { let pkg = dir.path().join("package.json"); fs::write(&pkg, r#"{"name": "root"}"#).await.unwrap(); let pnpm = dir.path().join("pnpm-workspace.yaml"); - fs::write(&pnpm, "packages:\n - packages/*") - .await - .unwrap(); + fs::write(&pnpm, "packages:\n - packages/*").await.unwrap(); let config = detect_workspaces(&pkg).await; assert!(matches!(config.ws_type, WorkspaceType::Pnpm)); assert_eq!(config.patterns, vec!["packages/*"]); @@ -471,12 +512,9 @@ mod tests { // exist, pnpm should take priority let dir = tempfile::tempdir().unwrap(); let pkg = dir.path().join("package.json"); - fs::write( - &pkg, - r#"{"name": "root", "workspaces": ["packages/*"]}"#, - ) - .await - .unwrap(); + fs::write(&pkg, r#"{"name": "root", "workspaces": ["packages/*"]}"#) + .await + .unwrap(); let pnpm = dir.path().join("pnpm-workspace.yaml"); fs::write(&pnpm, "packages:\n - workspaces/*") .await @@ -487,6 +525,24 @@ mod tests { assert_eq!(config.patterns, vec!["workspaces/*"]); } + #[tokio::test] + async fn test_detect_workspaces_pnpm_with_malformed_package_json() { + // Regression: pnpm-workspace.yaml is the definitive signal and must be + // honored even when the root package.json is not valid JSON. Previously + // the JSON parse error short-circuited before the pnpm check. + let dir = tempfile::tempdir().unwrap(); + let pkg = dir.path().join("package.json"); + // JSONC-style comment — valid for some tooling, invalid for serde_json. + fs::write(&pkg, "{\n // a comment\n \"name\": \"root\"\n}") + .await + .unwrap(); + let pnpm = dir.path().join("pnpm-workspace.yaml"); + fs::write(&pnpm, "packages:\n - packages/*").await.unwrap(); + let config = detect_workspaces(&pkg).await; + assert!(matches!(config.ws_type, WorkspaceType::Pnpm)); + assert_eq!(config.patterns, vec!["packages/*"]); + } + #[tokio::test] async fn test_detect_workspaces_none() { let dir = tempfile::tempdir().unwrap(); @@ -654,6 +710,143 @@ mod tests { assert_eq!(result.files.len(), 2); } + #[test] + fn test_parse_pnpm_inline_comment_stripped() { + // A `# ...` inline comment after a pattern must not become part of it. + let yaml = "packages:\n - packages/* # workspace packages\n - apps/*\t# trailing tab"; + assert_eq!( + parse_pnpm_workspace_patterns(yaml), + vec!["packages/*", "apps/*"] + ); + } + + #[test] + fn test_parse_pnpm_quoted_value_keeps_hash() { + // A `#` inside quotes is part of the value, not a comment. + let yaml = "packages:\n - 'packages/#weird' # but this is a comment"; + assert_eq!(parse_pnpm_workspace_patterns(yaml), vec!["packages/#weird"]); + } + + #[tokio::test] + async fn test_find_overlapping_patterns_no_duplicates() { + // "packages/*" and the exact "packages/a" both match the same member; + // the result must contain it only once. + let dir = tempfile::tempdir().unwrap(); + fs::write( + dir.path().join("package.json"), + r#"{"workspaces": ["packages/*", "packages/a"]}"#, + ) + .await + .unwrap(); + let a = dir.path().join("packages").join("a"); + fs::create_dir_all(&a).await.unwrap(); + fs::write(a.join("package.json"), r#"{"name":"a"}"#) + .await + .unwrap(); + let result = find_package_json_files(dir.path()).await; + // root + exactly one workspace member (no duplicate for packages/a) + assert_eq!(result.files.len(), 2); + assert!(result.files[0].is_root); + let workspace_count = result.files.iter().filter(|f| f.is_workspace).count(); + assert_eq!(workspace_count, 1); + } + + #[tokio::test] + async fn test_find_star_pattern_skips_node_modules() { + // A `packages/*` glob must not treat node_modules (or hidden/output + // dirs) as a workspace member, even if they contain a package.json. + let dir = tempfile::tempdir().unwrap(); + fs::write( + dir.path().join("package.json"), + r#"{"workspaces": ["packages/*"]}"#, + ) + .await + .unwrap(); + let real = dir.path().join("packages").join("real"); + fs::create_dir_all(&real).await.unwrap(); + fs::write(real.join("package.json"), r#"{"name":"real"}"#) + .await + .unwrap(); + for ignored in ["node_modules", ".cache", "dist", "build"] { + let d = dir.path().join("packages").join(ignored); + fs::create_dir_all(&d).await.unwrap(); + fs::write(d.join("package.json"), r#"{"name":"x"}"#) + .await + .unwrap(); + } + let result = find_package_json_files(dir.path()).await; + // root + only the "real" member + assert_eq!(result.files.len(), 2); + let workspace_count = result.files.iter().filter(|f| f.is_workspace).count(); + assert_eq!(workspace_count, 1); + } + + #[tokio::test] + async fn test_find_workspace_bare_star() { + // A bare `*` glob means "every immediate subdirectory" and must be + // expanded, not treated as a literal directory named `*`. + let dir = tempfile::tempdir().unwrap(); + fs::write(dir.path().join("package.json"), r#"{"workspaces": ["*"]}"#) + .await + .unwrap(); + for member in ["a", "b"] { + let m = dir.path().join(member); + fs::create_dir_all(&m).await.unwrap(); + fs::write(m.join("package.json"), r#"{"name":"m"}"#) + .await + .unwrap(); + } + // node_modules must still be ignored even for a root-level `*`. + let nm = dir.path().join("node_modules").join("dep"); + fs::create_dir_all(&nm).await.unwrap(); + fs::write(nm.join("package.json"), r#"{"name":"dep"}"#) + .await + .unwrap(); + let result = find_package_json_files(dir.path()).await; + let workspace_count = result.files.iter().filter(|f| f.is_workspace).count(); + // root + members a and b (node_modules excluded) + assert_eq!(workspace_count, 2); + assert!(result.files[0].is_root); + } + + #[tokio::test] + async fn test_find_workspace_bare_double_glob() { + // A bare `**` glob recurses from the root. + let dir = tempfile::tempdir().unwrap(); + fs::write(dir.path().join("package.json"), r#"{"workspaces": ["**"]}"#) + .await + .unwrap(); + let nested = dir.path().join("a").join("b"); + fs::create_dir_all(&nested).await.unwrap(); + fs::write(nested.join("package.json"), r#"{"name":"b"}"#) + .await + .unwrap(); + let result = find_package_json_files(dir.path()).await; + let workspace_count = result.files.iter().filter(|f| f.is_workspace).count(); + assert!(workspace_count >= 1); + } + + #[tokio::test] + async fn test_find_workspace_deep_prefix_glob() { + // A glob with a multi-segment prefix (`group/sub/*`) must expand the + // directory under that prefix, not be treated as a literal path. + let dir = tempfile::tempdir().unwrap(); + fs::write( + dir.path().join("package.json"), + r#"{"workspaces": ["group/sub/*"]}"#, + ) + .await + .unwrap(); + let member = dir.path().join("group").join("sub").join("pkg"); + fs::create_dir_all(&member).await.unwrap(); + fs::write(member.join("package.json"), r#"{"name":"pkg"}"#) + .await + .unwrap(); + let result = find_package_json_files(dir.path()).await; + let workspace_count = result.files.iter().filter(|f| f.is_workspace).count(); + assert_eq!(workspace_count, 1); + } + // ── detect_package_manager ────────────────────────────────────── #[tokio::test] diff --git a/crates/socket-patch-core/src/package_json/update.rs b/crates/socket-patch-core/src/package_json/update.rs index d08422da..79afef33 100644 --- a/crates/socket-patch-core/src/package_json/update.rs +++ b/crates/socket-patch-core/src/package_json/update.rs @@ -213,4 +213,137 @@ mod tests { assert!(content.contains("dependencies")); } + /// Writing back the user's package.json must not reorder their existing + /// keys. Without `serde_json/preserve_order` the value map is sorted + /// alphabetically, so a file like `{"version":..,"name":..}` would be + /// rewritten as `{"name":..,"version":..}` — a destructive, noisy diff + /// over something the tool only meant to append two scripts to. + #[tokio::test] + async fn test_update_preserves_top_level_key_order() { + let dir = tempfile::tempdir().unwrap(); + let pkg = dir.path().join("package.json"); + // Deliberately non-alphabetical key order. + fs::write( + &pkg, + r#"{"version":"1.0.0","name":"x","private":true,"scripts":{"build":"tsc"}}"#, + ) + .await + .unwrap(); + let result = update_package_json(&pkg, false, PackageManager::Npm).await; + assert_eq!(result.status, UpdateStatus::Updated); + + let content = fs::read_to_string(&pkg).await.unwrap(); + let pos_version = content.find("\"version\"").unwrap(); + let pos_name = content.find("\"name\"").unwrap(); + let pos_private = content.find("\"private\"").unwrap(); + let pos_scripts = content.find("\"scripts\"").unwrap(); + assert!( + pos_version < pos_name && pos_name < pos_private && pos_private < pos_scripts, + "original top-level key order must be preserved, got:\n{content}" + ); + } + + /// The pre-existing `build` script (and its position) must survive an + /// update that only appends the lifecycle scripts. + #[tokio::test] + async fn test_update_preserves_existing_scripts() { + let dir = tempfile::tempdir().unwrap(); + let pkg = dir.path().join("package.json"); + fs::write( + &pkg, + r#"{"name":"x","scripts":{"build":"tsc","test":"jest"}}"#, + ) + .await + .unwrap(); + let result = update_package_json(&pkg, false, PackageManager::Npm).await; + assert_eq!(result.status, UpdateStatus::Updated); + + let parsed: serde_json::Value = + serde_json::from_str(&fs::read_to_string(&pkg).await.unwrap()).unwrap(); + assert_eq!(parsed["scripts"]["build"], "tsc"); + assert_eq!(parsed["scripts"]["test"], "jest"); + assert!(parsed["scripts"]["postinstall"].is_string()); + assert!(parsed["scripts"]["dependencies"].is_string()); + } + + /// Running setup twice must be idempotent: the second run reports + /// `AlreadyConfigured` and leaves the file byte-for-byte unchanged (no + /// duplicated `socket-patch apply` commands). + #[tokio::test] + async fn test_update_is_idempotent() { + let dir = tempfile::tempdir().unwrap(); + let pkg = dir.path().join("package.json"); + fs::write(&pkg, r#"{"name":"x","scripts":{"build":"tsc"}}"#) + .await + .unwrap(); + + let r1 = update_package_json(&pkg, false, PackageManager::Npm).await; + assert_eq!(r1.status, UpdateStatus::Updated); + let after_first = fs::read_to_string(&pkg).await.unwrap(); + + let r2 = update_package_json(&pkg, false, PackageManager::Npm).await; + assert_eq!(r2.status, UpdateStatus::AlreadyConfigured); + let after_second = fs::read_to_string(&pkg).await.unwrap(); + + assert_eq!(after_first, after_second); + assert_eq!(after_first.matches("socket-patch apply").count(), 2); + } + + /// Valid JSON whose root is not an object cannot hold lifecycle scripts; + /// it must surface an error rather than panicking or silently succeeding. + #[tokio::test] + async fn test_update_non_object_root_errors() { + let dir = tempfile::tempdir().unwrap(); + for (i, body) in ["[1,2,3]", "42", "\"hi\"", "true", "null"] + .iter() + .enumerate() + { + let pkg = dir.path().join(format!("pkg{i}.json")); + fs::write(&pkg, body).await.unwrap(); + let result = update_package_json(&pkg, false, PackageManager::Npm).await; + assert_eq!(result.status, UpdateStatus::Error, "body={body}"); + assert!(result.error.is_some(), "body={body}"); + } + } + + /// A present-but-non-object `scripts` is malformed; refuse to clobber it. + #[tokio::test] + async fn test_update_non_object_scripts_errors_and_leaves_file() { + let dir = tempfile::tempdir().unwrap(); + let pkg = dir.path().join("package.json"); + let original = r#"{"name":"x","scripts":"build"}"#; + fs::write(&pkg, original).await.unwrap(); + let result = update_package_json(&pkg, false, PackageManager::Npm).await; + assert_eq!(result.status, UpdateStatus::Error); + // File must be left untouched. + assert_eq!(fs::read_to_string(&pkg).await.unwrap(), original); + } + + /// An empty file is invalid JSON and must error without writing. + #[tokio::test] + async fn test_update_empty_file_errors() { + let dir = tempfile::tempdir().unwrap(); + let pkg = dir.path().join("package.json"); + fs::write(&pkg, "").await.unwrap(); + let result = update_package_json(&pkg, false, PackageManager::Npm).await; + assert_eq!(result.status, UpdateStatus::Error); + assert!(result.error.is_some()); + } + + /// Dry-run on a file that needs updating reports `Updated` but must not + /// touch the bytes on disk — the consumer relies on this for its preview. + #[tokio::test] + async fn test_update_dry_run_reports_updated_without_writing_scripts() { + let dir = tempfile::tempdir().unwrap(); + let pkg = dir.path().join("package.json"); + let original = r#"{"name":"x","scripts":{"postinstall":"echo hi"}}"#; + fs::write(&pkg, original).await.unwrap(); + let result = update_package_json(&pkg, true, PackageManager::Npm).await; + assert_eq!(result.status, UpdateStatus::Updated); + // old_script reflects the existing script; new_script the prepended one. + assert_eq!(result.old_script, "echo hi"); + assert!(result.new_script.contains("socket-patch apply")); + assert!(result.new_script.contains("echo hi")); + assert_eq!(fs::read_to_string(&pkg).await.unwrap(), original); + } } diff --git a/crates/socket-patch-core/src/patch/apply.rs b/crates/socket-patch-core/src/patch/apply.rs index 4d3ec07c..761b6694 100644 --- a/crates/socket-patch-core/src/patch/apply.rs +++ b/crates/socket-patch-core/src/patch/apply.rs @@ -1,5 +1,7 @@ use std::collections::HashMap; use std::path::Path; +#[cfg(unix)] +use std::path::PathBuf; use crate::hash::git_sha256::compute_git_sha256_from_bytes; use crate::manifest::schema::PatchFileInfo; @@ -324,13 +326,22 @@ pub async fn apply_file_patch( tokio::fs::create_dir_all(parent).await?; } + // The atomic stage+rename below — and the copy-on-write break, which + // also stages a sibling file — need write permission on the *parent + // directory*, not just on the file. Go's module cache marks both its + // files (0o444) and its directories (0o555) read-only, so without + // this the stage-file creation fails with EACCES (where the old + // in-place write, like `rollback.rs`, only had to relax the file's + // own mode). Temporarily grant owner-write on the directory; the + // guard restores its exact mode below. + let dir_guard = DirWriteGuard::acquire(filepath.parent()).await; + // Copy-on-write defense against pnpm / bazel / nix shared inodes. // If `filepath` is a symlink into a content store, or a hardlink // shared with other projects, give this project a private inode // before we mutate. No-op on regular private files (single // syscall). See `patch::cow`. - break_hardlink_if_needed(&filepath).await?; - + // // Atomic write: stage in the parent directory, fsync, rename onto // the target. POSIX `rename(2)` is atomic — observers see either // the old bytes or the new bytes, never a truncated half-write. @@ -341,7 +352,16 @@ pub async fn apply_file_patch( // user-writable inode over the target instead of trying to open // a read-only file for write. `restore_file_permissions` then // re-applies the pre-patch mode + uid/gid to the new inode. - write_atomic(&filepath, patched_content).await?; + // + // Both steps run inside a closure so the directory mode is ALWAYS + // restored — even if a step errors — before the failure propagates. + let write_result = async { + break_hardlink_if_needed(&filepath).await?; + write_atomic(&filepath, patched_content).await + } + .await; + dir_guard.restore().await; + write_result?; // Restore (or set) the final permissions on the post-rename inode. // On Unix this includes chown back to the pre-patch uid/gid (or @@ -352,6 +372,65 @@ pub async fn apply_file_patch( Ok(()) } +/// Guard that temporarily grants owner-write on a directory so the +/// stage+rename write path can create and move files inside it, then +/// restores the directory's original mode. +/// +/// Go's module cache (and some Nix/Bazel layouts) mark package +/// directories read-only (`0o555`). Creating the `.socket-stage-*` file +/// and renaming it over the target both require write permission on the +/// directory, so we relax it for the duration of the write and put it +/// back exactly as we found it. [`DirWriteGuard::restore`] is a no-op +/// when nothing was changed (already-writable dir, missing dir, a +/// `set_permissions` failure, or non-Unix — where a directory's +/// read-only attribute does not gate file creation). +pub(crate) struct DirWriteGuard { + #[cfg(unix)] + relock: Option<(PathBuf, u32)>, +} + +impl DirWriteGuard { + pub(crate) async fn acquire(dir: Option<&Path>) -> Self { + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + if let Some(dir) = dir { + if let Ok(meta) = tokio::fs::metadata(dir).await { + let mode = meta.permissions().mode(); + // Owner-write bit missing → relax it, remembering the + // original mode so `restore` can re-lock the dir. + if mode & 0o200 == 0 { + let mut perms = meta.permissions(); + perms.set_mode(mode | 0o200); + if tokio::fs::set_permissions(dir, perms).await.is_ok() { + return Self { + relock: Some((dir.to_path_buf(), mode)), + }; + } + } + } + } + Self { relock: None } + } + #[cfg(not(unix))] + { + let _ = dir; + Self {} + } + } + + pub(crate) async fn restore(self) { + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + if let Some((dir, mode)) = self.relock { + let _ = + tokio::fs::set_permissions(&dir, std::fs::Permissions::from_mode(mode)).await; + } + } + } +} + /// Write `content` to `target` atomically via stage + rename. /// /// Two-phase commit: @@ -369,11 +448,7 @@ async fn write_atomic(target: &Path, content: &[u8]) -> std::io::Result<()> { .file_name() .map(|n| n.to_string_lossy().into_owned()) .unwrap_or_else(|| "anon".to_string()); - let stage = parent.join(format!( - ".socket-stage-{}-{}", - stem, - uuid::Uuid::new_v4() - )); + let stage = parent.join(format!(".socket-stage-{}-{}", stem, uuid::Uuid::new_v4())); let mut file = tokio::fs::OpenOptions::new() .write(true) @@ -396,6 +471,20 @@ async fn write_atomic(target: &Path, content: &[u8]) -> std::io::Result<()> { let _ = tokio::fs::remove_file(&stage).await; return Err(e); } + + // Durability: `sync_all` above flushed the file's *data*, but the + // rename only updated the parent directory entry. fsync the + // directory so the rename itself survives a crash — otherwise a + // post-crash filesystem could surface the old name (or neither). + // Unix only; best-effort, since a directory we can't open for fsync + // must not fail an otherwise-successful write. + #[cfg(unix)] + { + if let Ok(dir) = tokio::fs::File::open(parent).await { + let _ = dir.sync_all().await; + } + } + Ok(()) } @@ -418,12 +507,16 @@ async fn restore_file_permissions( match pre_patch { Some(meta) => { - // Existing file: re-apply the original mode + ownership. - let restored = std::fs::Permissions::from_mode(meta.mode()); - tokio::fs::set_permissions(filepath, restored).await?; + // Existing file: re-apply the original ownership FIRST, + // then the mode. Order matters — `chown(2)` clears the + // setuid/setgid bits for an unprivileged caller (even when + // the uid/gid are unchanged), so the chmod must run last + // to restore the mode bit-for-bit, setuid/setgid included. let uid = meta.uid(); let gid = meta.gid(); chown_blocking(filepath.to_path_buf(), Some(uid), Some(gid)).await?; + let restored = std::fs::Permissions::from_mode(meta.mode()); + tokio::fs::set_permissions(filepath, restored).await?; } None => { // New file. Inherit owner/group from the parent dir. @@ -431,8 +524,7 @@ async fn restore_file_permissions( if let Ok(parent_meta) = tokio::fs::metadata(parent).await { let uid = parent_meta.uid(); let gid = parent_meta.gid(); - chown_blocking(filepath.to_path_buf(), Some(uid), Some(gid)) - .await?; + chown_blocking(filepath.to_path_buf(), Some(uid), Some(gid)).await?; } } // Default new-file mode: read-only for all. @@ -570,7 +662,9 @@ pub async fn apply_package_patch( if all_done_or_skipped { // Some or all files were not found but skipped via --force - let not_found_count = result.files_verified.iter() + let not_found_count = result + .files_verified + .iter() .filter(|v| v.status == VerifyStatus::NotFound) .count(); result.success = true; @@ -603,9 +697,7 @@ pub async fn apply_package_patch( for (file_name, file_info) in files { let verify_result = result.files_verified.iter().find(|v| v.file == *file_name); if let Some(vr) = verify_result { - if vr.status == VerifyStatus::AlreadyPatched - || vr.status == VerifyStatus::NotFound - { + if vr.status == VerifyStatus::AlreadyPatched || vr.status == VerifyStatus::NotFound { continue; } } @@ -831,7 +923,10 @@ mod tests { #[test] fn test_normalize_file_path_with_prefix() { - assert_eq!(normalize_file_path("package/lib/server.js"), "lib/server.js"); + assert_eq!( + normalize_file_path("package/lib/server.js"), + "lib/server.js" + ); } #[test] @@ -847,7 +942,10 @@ mod tests { #[test] fn test_normalize_file_path_package_not_prefix() { // "package" without trailing "/" should NOT be stripped - assert_eq!(normalize_file_path("packagefoo/bar.js"), "packagefoo/bar.js"); + assert_eq!( + normalize_file_path("packagefoo/bar.js"), + "packagefoo/bar.js" + ); } #[tokio::test] @@ -925,7 +1023,9 @@ mod tests { let before_hash = compute_git_sha256_from_bytes(content); // File is at lib/server.js but patch refers to package/lib/server.js - tokio::fs::create_dir_all(dir.path().join("lib")).await.unwrap(); + tokio::fs::create_dir_all(dir.path().join("lib")) + .await + .unwrap(); tokio::fs::write(dir.path().join("lib/server.js"), content) .await .unwrap(); @@ -1059,7 +1159,11 @@ mod tests { let written = tokio::fs::read(&path).await.unwrap(); assert_eq!(written, patched); // Mode preserved bit-for-bit. - let mode_after = tokio::fs::metadata(&path).await.unwrap().permissions().mode() + let mode_after = tokio::fs::metadata(&path) + .await + .unwrap() + .permissions() + .mode() & 0o7777; assert_eq!( mode_after, 0o444, @@ -1089,7 +1193,11 @@ mod tests { .await .unwrap(); - let mode_after = tokio::fs::metadata(&path).await.unwrap().permissions().mode() + let mode_after = tokio::fs::metadata(&path) + .await + .unwrap() + .permissions() + .mode() & 0o7777; assert_eq!(mode_after, 0o755); } @@ -1117,7 +1225,11 @@ mod tests { let path = dir.path().join(nested); // Default new-file mode is 0o444. - let mode = tokio::fs::metadata(&path).await.unwrap().permissions().mode() + let mode = tokio::fs::metadata(&path) + .await + .unwrap() + .permissions() + .mode() & 0o7777; assert_eq!(mode, 0o444, "new files default to read-only"); @@ -1154,6 +1266,230 @@ mod tests { assert_eq!(pre.gid(), post.gid()); } + /// Read-only package directory (Go's module cache marks both files + /// 0o444 AND directories 0o555). The stage+rename write path needs + /// owner-write on the directory; `apply_file_patch` must grant it for + /// the write and then restore the directory to its exact prior mode. + /// Regression: before the `DirWriteGuard` fix the stage-file creation + /// failed with EACCES and the patch could not be applied at all. + #[cfg(unix)] + #[tokio::test] + async fn test_apply_file_patch_in_readonly_dir() { + use std::os::unix::fs::PermissionsExt; + + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("index.js"); + let original = b"original"; + let patched = b"patched content"; + let patched_hash = compute_git_sha256_from_bytes(patched); + + tokio::fs::write(&path, original).await.unwrap(); + // Read-only file inside a read-only directory. + tokio::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o444)) + .await + .unwrap(); + tokio::fs::set_permissions(dir.path(), std::fs::Permissions::from_mode(0o555)) + .await + .unwrap(); + + apply_file_patch(dir.path(), "index.js", patched, &patched_hash) + .await + .expect("apply must succeed even inside a read-only directory"); + + // Content updated. + assert_eq!(tokio::fs::read(&path).await.unwrap(), patched); + // File mode restored. + assert_eq!( + tokio::fs::metadata(&path) + .await + .unwrap() + .permissions() + .mode() + & 0o7777, + 0o444 + ); + // Directory mode restored to exactly what it was (0o555). + assert_eq!( + tokio::fs::metadata(dir.path()) + .await + .unwrap() + .permissions() + .mode() + & 0o7777, + 0o555, + "directory mode must be restored after the write" + ); + // No stage litter survived in the directory. + let mut entries = tokio::fs::read_dir(dir.path()).await.unwrap(); + while let Some(entry) = entries.next_entry().await.unwrap() { + let name = entry.file_name().to_string_lossy().to_string(); + assert!(!name.starts_with(".socket-stage-"), "stage leaked: {name}"); + } + + // Re-grant write so the TempDir can clean itself up. + tokio::fs::set_permissions(dir.path(), std::fs::Permissions::from_mode(0o755)) + .await + .unwrap(); + } + + /// A brand-new file created by a patch inside a read-only directory: + /// the directory must be temporarily writable for the create, then + /// restored, and the new file gets the default 0o444 mode. + #[cfg(unix)] + #[tokio::test] + async fn test_apply_file_patch_new_file_in_readonly_dir() { + use std::os::unix::fs::PermissionsExt; + + let dir = tempfile::tempdir().unwrap(); + let patched = b"brand new\n"; + let patched_hash = compute_git_sha256_from_bytes(patched); + + tokio::fs::set_permissions(dir.path(), std::fs::Permissions::from_mode(0o555)) + .await + .unwrap(); + + apply_file_patch(dir.path(), "new.js", patched, &patched_hash) + .await + .expect("new-file apply must succeed inside a read-only directory"); + + let path = dir.path().join("new.js"); + assert_eq!(tokio::fs::read(&path).await.unwrap(), patched); + assert_eq!( + tokio::fs::metadata(&path) + .await + .unwrap() + .permissions() + .mode() + & 0o7777, + 0o444 + ); + // Directory mode restored. + assert_eq!( + tokio::fs::metadata(dir.path()) + .await + .unwrap() + .permissions() + .mode() + & 0o7777, + 0o555 + ); + + tokio::fs::set_permissions(dir.path(), std::fs::Permissions::from_mode(0o755)) + .await + .unwrap(); + } + + /// setuid/setgid bits survive the patch round-trip. `chown(2)` strips + /// these bits even when the uid/gid are unchanged, so the restore + /// must chown BEFORE it chmods. Regression: the prior chmod-then-chown + /// order silently dropped the setuid bit on every patched file. + #[cfg(unix)] + #[tokio::test] + async fn test_apply_file_patch_preserves_setuid_bit() { + use std::os::unix::fs::PermissionsExt; + + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("suid-bin"); + let patched = b"new payload"; + let patched_hash = compute_git_sha256_from_bytes(patched); + + tokio::fs::write(&path, b"old payload").await.unwrap(); + // setuid + rwxr-xr-x. + tokio::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o4755)) + .await + .unwrap(); + // Guard: skip if the filesystem refused the setuid bit (some + // mount options strip it) so the test stays meaningful where it + // can run and never gives a false failure where it can't. + let pre = tokio::fs::metadata(&path) + .await + .unwrap() + .permissions() + .mode() + & 0o7777; + if pre != 0o4755 { + return; + } + + apply_file_patch(dir.path(), "suid-bin", patched, &patched_hash) + .await + .unwrap(); + + let mode_after = tokio::fs::metadata(&path) + .await + .unwrap() + .permissions() + .mode() + & 0o7777; + assert_eq!( + mode_after, 0o4755, + "setuid bit must survive the patch (chown must run before chmod)" + ); + } + + /// End-to-end blob apply against a fully read-only package directory. + #[cfg(unix)] + #[tokio::test] + async fn test_apply_package_patch_in_readonly_dir() { + use std::os::unix::fs::PermissionsExt; + + let pkg_dir = tempfile::tempdir().unwrap(); + let blobs_dir = tempfile::tempdir().unwrap(); + + let original = b"original content"; + let patched = b"patched content"; + let before_hash = compute_git_sha256_from_bytes(original); + let after_hash = compute_git_sha256_from_bytes(patched); + + tokio::fs::write(pkg_dir.path().join("index.js"), original) + .await + .unwrap(); + tokio::fs::write(blobs_dir.path().join(&after_hash), patched) + .await + .unwrap(); + // Lock both the file and the directory down (Go cache layout). + tokio::fs::set_permissions( + pkg_dir.path().join("index.js"), + std::fs::Permissions::from_mode(0o444), + ) + .await + .unwrap(); + tokio::fs::set_permissions(pkg_dir.path(), std::fs::Permissions::from_mode(0o555)) + .await + .unwrap(); + + let mut files = HashMap::new(); + files.insert( + "index.js".to_string(), + PatchFileInfo { + before_hash, + after_hash: after_hash.clone(), + }, + ); + + let result = apply_package_patch( + "pkg:golang/example.com/x@1.0.0", + pkg_dir.path(), + &files, + &PatchSources::blobs_only(blobs_dir.path()), + None, + false, + false, + ) + .await; + + assert!(result.success, "expected success: {:?}", result.error); + assert_eq!(result.files_patched.len(), 1); + let written = tokio::fs::read(pkg_dir.path().join("index.js")) + .await + .unwrap(); + assert_eq!(written, patched); + + tokio::fs::set_permissions(pkg_dir.path(), std::fs::Permissions::from_mode(0o755)) + .await + .unwrap(); + } + #[tokio::test] async fn test_apply_package_patch_success() { let pkg_dir = tempfile::tempdir().unwrap(); @@ -1183,9 +1519,16 @@ mod tests { }, ); - let result = - apply_package_patch("pkg:npm/test@1.0.0", pkg_dir.path(), &files, &PatchSources::blobs_only(blobs_dir.path()), None, false, false) - .await; + let result = apply_package_patch( + "pkg:npm/test@1.0.0", + pkg_dir.path(), + &files, + &PatchSources::blobs_only(blobs_dir.path()), + None, + false, + false, + ) + .await; assert!(result.success); assert_eq!(result.files_patched.len(), 1); @@ -1213,15 +1556,24 @@ mod tests { }, ); - let result = - apply_package_patch("pkg:npm/test@1.0.0", pkg_dir.path(), &files, &PatchSources::blobs_only(blobs_dir.path()), None, true, false) - .await; + let result = apply_package_patch( + "pkg:npm/test@1.0.0", + pkg_dir.path(), + &files, + &PatchSources::blobs_only(blobs_dir.path()), + None, + true, + false, + ) + .await; assert!(result.success); assert_eq!(result.files_patched.len(), 0); // dry run: nothing actually patched // File should still have original content - let content = tokio::fs::read(pkg_dir.path().join("index.js")).await.unwrap(); + let content = tokio::fs::read(pkg_dir.path().join("index.js")) + .await + .unwrap(); assert_eq!(content, original); } @@ -1246,9 +1598,16 @@ mod tests { }, ); - let result = - apply_package_patch("pkg:npm/test@1.0.0", pkg_dir.path(), &files, &PatchSources::blobs_only(blobs_dir.path()), None, false, false) - .await; + let result = apply_package_patch( + "pkg:npm/test@1.0.0", + pkg_dir.path(), + &files, + &PatchSources::blobs_only(blobs_dir.path()), + None, + false, + false, + ) + .await; assert!(result.success); assert_eq!(result.files_patched.len(), 0); @@ -1272,9 +1631,16 @@ mod tests { }, ); - let result = - apply_package_patch("pkg:npm/test@1.0.0", pkg_dir.path(), &files, &PatchSources::blobs_only(blobs_dir.path()), None, false, false) - .await; + let result = apply_package_patch( + "pkg:npm/test@1.0.0", + pkg_dir.path(), + &files, + &PatchSources::blobs_only(blobs_dir.path()), + None, + false, + false, + ) + .await; assert!(!result.success); assert!(result.error.is_some()); @@ -1308,9 +1674,16 @@ mod tests { ); // Without force: should fail - let result = - apply_package_patch("pkg:npm/test@1.0.0", pkg_dir.path(), &files, &PatchSources::blobs_only(blobs_dir.path()), None, false, false) - .await; + let result = apply_package_patch( + "pkg:npm/test@1.0.0", + pkg_dir.path(), + &files, + &PatchSources::blobs_only(blobs_dir.path()), + None, + false, + false, + ) + .await; assert!(!result.success); // Reset the file @@ -1319,13 +1692,22 @@ mod tests { .unwrap(); // With force: should succeed - let result = - apply_package_patch("pkg:npm/test@1.0.0", pkg_dir.path(), &files, &PatchSources::blobs_only(blobs_dir.path()), None, false, true) - .await; + let result = apply_package_patch( + "pkg:npm/test@1.0.0", + pkg_dir.path(), + &files, + &PatchSources::blobs_only(blobs_dir.path()), + None, + false, + true, + ) + .await; assert!(result.success); assert_eq!(result.files_patched.len(), 1); - let written = tokio::fs::read(pkg_dir.path().join("index.js")).await.unwrap(); + let written = tokio::fs::read(pkg_dir.path().join("index.js")) + .await + .unwrap(); assert_eq!(written, patched); } @@ -1344,15 +1726,29 @@ mod tests { ); // Without force: should fail (NotFound for non-new file) - let result = - apply_package_patch("pkg:npm/test@1.0.0", pkg_dir.path(), &files, &PatchSources::blobs_only(blobs_dir.path()), None, false, false) - .await; + let result = apply_package_patch( + "pkg:npm/test@1.0.0", + pkg_dir.path(), + &files, + &PatchSources::blobs_only(blobs_dir.path()), + None, + false, + false, + ) + .await; assert!(!result.success); // With force: should succeed by skipping the missing file - let result = - apply_package_patch("pkg:npm/test@1.0.0", pkg_dir.path(), &files, &PatchSources::blobs_only(blobs_dir.path()), None, false, true) - .await; + let result = apply_package_patch( + "pkg:npm/test@1.0.0", + pkg_dir.path(), + &files, + &PatchSources::blobs_only(blobs_dir.path()), + None, + false, + true, + ) + .await; assert!(result.success); assert_eq!(result.files_patched.len(), 0); } @@ -1399,7 +1795,7 @@ mod tests { /// disk, all of (package, diff, blob) available with valid patched /// content. Caller can then delete sources to test fallback. async fn make_fixture() -> ( - tempfile::TempDir, // root holding pkg/, blobs/, packages/, diffs/ + tempfile::TempDir, // root holding pkg/, blobs/, packages/, diffs/ std::path::PathBuf, // pkg dir std::path::PathBuf, // blobs dir std::path::PathBuf, // packages dir @@ -1449,7 +1845,16 @@ mod tests { }, ); - (root, pkg_dir, blobs_dir, packages_dir, diffs_dir, files, original, patched) + ( + root, + pkg_dir, + blobs_dir, + packages_dir, + diffs_dir, + files, + original, + patched, + ) } #[tokio::test] diff --git a/crates/socket-patch-core/src/patch/apply_lock.rs b/crates/socket-patch-core/src/patch/apply_lock.rs index 0963e23c..33cc079d 100644 --- a/crates/socket-patch-core/src/patch/apply_lock.rs +++ b/crates/socket-patch-core/src/patch/apply_lock.rs @@ -91,16 +91,48 @@ pub fn acquire(socket_dir: &Path, timeout: Duration) -> Result return Ok(LockGuard { _file: file }), - Err(_) => { - if Instant::now() >= deadline { + // Only a genuine "someone else holds it" signal counts as + // contention and feeds the retry/`Held` path. Any other + // failure (ENOLCK, EBADF, a filesystem that doesn't support + // advisory locks, EACCES on a pre-existing read-only lock + // file, …) is a real I/O fault: surface it immediately as + // `Io` rather than busy-sleeping for the whole budget and + // then mislabelling it as `Held`. See `is_lock_contended`. + Err(ref e) if is_lock_contended(e) => { + let now = Instant::now(); + if now >= deadline { return Err(LockError::Held); } - std::thread::sleep(Duration::from_millis(100)); + // Never sleep past the deadline: a sub-100 ms budget + // must not be rounded up to a full 100 ms wait. The + // remaining slice is always > 0 here (now < deadline). + let remaining = deadline - now; + std::thread::sleep(remaining.min(Duration::from_millis(100))); + } + Err(source) => { + return Err(LockError::Io { + path: path.clone(), + source, + }); } } } } +/// Distinguish "the lock is held by someone else" from a real I/O +/// failure of `try_lock_exclusive`. +/// +/// `fs2` reports contention via a fixed OS-error sentinel +/// (`EWOULDBLOCK` on Unix, `ERROR_LOCK_VIOLATION` on Windows), exposed +/// as [`fs2::lock_contended_error`]. We compare raw OS codes — an exact +/// match, and portable, because both that sentinel and any genuine +/// `flock(2)`/`LockFileEx` failure are constructed from an OS error +/// code. A non-OS error (`raw_os_error() == None`) can never be +/// contention, so it correctly falls through to `Io`. +fn is_lock_contended(err: &std::io::Error) -> bool { + err.raw_os_error() == fs2::lock_contended_error().raw_os_error() +} + #[cfg(test)] mod tests { use super::*; @@ -170,4 +202,84 @@ mod tests { elapsed ); } + + /// Regression: `fs2`'s own contended-lock sentinel must be + /// classified as contention (the `Held` path). If `fs2` ever + /// changed the sentinel out from under us, this catches it before + /// the misclassification reaches users. + #[test] + fn contended_sentinel_is_classified_as_contention() { + assert!(is_lock_contended(&fs2::lock_contended_error())); + } + + /// Regression: genuine I/O failures of `try_lock_exclusive` must + /// NOT masquerade as contention. Previously every error funnelled + /// into the retry/`Held` path, so a real fault (e.g. ENOLCK on a + /// full kernel lock table, or a filesystem without advisory locks) + /// was reported as "another process is operating here" — and, with + /// a positive timeout, only after busy-sleeping the entire budget. + #[test] + fn genuine_io_errors_are_not_contention() { + use std::io::{Error, ErrorKind}; + + // Kind-only errors carry no OS code, so they can never equal + // the contended sentinel. + assert!(!is_lock_contended(&Error::from(ErrorKind::NotFound))); + assert!(!is_lock_contended(&Error::from( + ErrorKind::PermissionDenied + ))); + + // A concrete-but-different OS error (EINTR == 4 on Unix) must + // not look like contention either. Skip the exact code match on + // the off chance a platform reuses 4 for the contended sentinel. + let eintr = Error::from_raw_os_error(4); + if eintr.raw_os_error() != fs2::lock_contended_error().raw_os_error() { + assert!(!is_lock_contended(&eintr)); + } + } + + /// A non-blocking (`ZERO`) acquire on a contended lock returns + /// `Held` essentially immediately — it must not pay the 100 ms + /// backoff sleep before giving up. + #[test] + fn zero_timeout_does_not_sleep_before_held() { + let dir = tempfile::tempdir().unwrap(); + let _first = acquire(dir.path(), Duration::ZERO).unwrap(); + let start = Instant::now(); + let err = acquire(dir.path(), Duration::ZERO).unwrap_err(); + let elapsed = start.elapsed(); + assert!(matches!(err, LockError::Held)); + assert!( + elapsed < Duration::from_millis(100), + "non-blocking acquire should not sleep, took {:?}", + elapsed + ); + } + + /// The retry loop must not overshoot the deadline by a full sleep + /// quantum. A 150 ms budget should resolve well under the old + /// fixed-100 ms-sleep worst case (~200 ms) — the final sleep is + /// clamped to the remaining slice. + #[test] + fn wait_respects_deadline_without_full_quantum_overshoot() { + let dir = tempfile::tempdir().unwrap(); + let _first = acquire(dir.path(), Duration::ZERO).unwrap(); + let start = Instant::now(); + let err = acquire(dir.path(), Duration::from_millis(150)).unwrap_err(); + let elapsed = start.elapsed(); + assert!(matches!(err, LockError::Held)); + assert!( + elapsed >= Duration::from_millis(150), + "should wait at least the budget, got {:?}", + elapsed + ); + // Loose upper bound: clamped sleeps mean we don't blow well past + // the budget. Generous slack keeps slow CI hosts non-flaky while + // still failing the old uncapped behaviour's pathological cases. + assert!( + elapsed < Duration::from_millis(450), + "clamped sleep should keep us near the budget, got {:?}", + elapsed + ); + } } diff --git a/crates/socket-patch-core/src/patch/cow.rs b/crates/socket-patch-core/src/patch/cow.rs index 35e816b1..4bdefc5b 100644 --- a/crates/socket-patch-core/src/patch/cow.rs +++ b/crates/socket-patch-core/src/patch/cow.rs @@ -35,8 +35,9 @@ pub enum CowAction { /// Path was a regular private file (one link, not a symlink). /// Caller can mutate it directly. AlreadyPrivate, - /// Path was a symlink. We removed the link and put a fresh - /// regular file with the same content in its place. The link + /// Path was a symlink. We atomically replaced the link with a + /// fresh regular file holding the same content (staged in the same + /// directory and renamed over the link in one step). The link /// target is untouched. BrokeSymlink, /// Path was a hardlinked regular file (`nlink > 1`). We copied @@ -64,10 +65,21 @@ pub async fn break_hardlink_if_needed(path: &Path) -> std::io::Result // current target content. We need it on disk as a regular // file at `path` so the patch write lands on our copy. let target_bytes = tokio::fs::read(path).await?; - // Remove the symlink. This only deletes the link itself; the - // target file (in the store, in a sibling project, wherever) - // is unaffected. - tokio::fs::remove_file(path).await?; + // Stage the private copy in the same directory, then + // atomically rename it OVER the symlink. `rename(2)` operates + // on the final path component itself — it never follows the + // symlink — so this replaces the link with our regular file + // while leaving the link's *target* (the store entry / sibling + // project) untouched. + // + // We deliberately do NOT `remove_file(path)` first. Unlinking + // the symlink before the replacement is committed would open a + // window in which the package file simply does not exist: if + // the staged write then failed (ENOSPC, EPERM on an immutable + // target, a crash), the original would be gone with nothing to + // roll back to. The rename-over-symlink is a single atomic + // step — on any failure `path` still holds the original link. + // This mirrors the hardlink branch below and `write_atomic`. write_via_stage_rename(path, &target_bytes).await?; return Ok(CowAction::BrokeSymlink); } @@ -113,12 +125,15 @@ async fn write_via_stage_rename(path: &Path, bytes: &[u8]) -> std::io::Result<() .file_name() .map(|n| n.to_string_lossy().into_owned()) .expect("cow stage path always has a file_name — callers pass package-internal files"); - let stage: PathBuf = parent.join(format!( - ".socket-cow-{}-{}", - stem, - uuid::Uuid::new_v4() - )); - tokio::fs::write(&stage, bytes).await?; + let stage: PathBuf = parent.join(format!(".socket-cow-{}-{}", stem, uuid::Uuid::new_v4())); + // Stage write. If this fails *after* creating the file (e.g. a + // mid-write ENOSPC), the partial stage would otherwise leak as a + // `.socket-cow-*` turd, so clean it up before propagating — same + // discipline as `apply::write_atomic`'s write arm. + if let Err(e) = tokio::fs::write(&stage, bytes).await { + let _ = tokio::fs::remove_file(&stage).await; + return Err(e); + } // `rename` over the target is atomic on POSIX and best-effort on // Windows (`MoveFileExW` with REPLACE_EXISTING via std). match tokio::fs::rename(&stage, path).await { @@ -229,6 +244,112 @@ mod tests { assert_eq!(tokio::fs::read(&target).await.unwrap(), b"shared bytes"); } + /// Helper: count `.socket-cow-*` stage files left in a directory. + #[cfg(unix)] + fn leftover_stage_count(dir: &Path) -> usize { + std::fs::read_dir(dir) + .unwrap() + .filter_map(|e| e.ok()) + .filter(|e| e.file_name().to_string_lossy().starts_with(".socket-cow-")) + .count() + } + + /// Realistic pnpm shape: `node_modules/` is a *symlink* into + /// the content store, and the store entry is itself *hardlinked* + /// across projects. Breaking the symlink must: + /// - leave the project path a private, single-link regular file, + /// - leave the store entry's content AND its sibling hardlink + /// completely untouched (the whole point of CoW), + /// - leave no `.socket-cow-*` stage litter behind. + #[cfg(unix)] + #[tokio::test] + async fn symlink_to_hardlinked_store_entry_is_fully_isolated() { + use std::os::unix::fs::MetadataExt; + + let dir = tempfile::tempdir().unwrap(); + // The content store entry + a sibling project's hardlink to it. + let store = dir.path().join("store-entry.txt"); + let sibling = dir.path().join("other-project-hardlink.txt"); + tokio::fs::write(&store, b"shared bytes").await.unwrap(); + tokio::fs::hard_link(&store, &sibling).await.unwrap(); + // Our project links to the store entry via a symlink. + let link = dir.path().join("our-project-link.txt"); + tokio::fs::symlink(&store, &link).await.unwrap(); + assert_eq!(tokio::fs::metadata(&store).await.unwrap().nlink(), 2); + + let action = break_hardlink_if_needed(&link).await.unwrap(); + assert_eq!(action, CowAction::BrokeSymlink); + + // Our path is now a private regular file (not a symlink), and + // its inode is distinct from the store entry. + let link_meta = tokio::fs::symlink_metadata(&link).await.unwrap(); + assert!(link_meta.file_type().is_file()); + assert!(!link_meta.file_type().is_symlink()); + assert_ne!( + link_meta.ino(), + tokio::fs::metadata(&store).await.unwrap().ino() + ); + + // Store entry + its sibling hardlink are byte-for-byte intact, + // and still share their inode (nlink unchanged at 2). + assert_eq!(tokio::fs::metadata(&store).await.unwrap().nlink(), 2); + assert_eq!(tokio::fs::read(&store).await.unwrap(), b"shared bytes"); + assert_eq!(tokio::fs::read(&sibling).await.unwrap(), b"shared bytes"); + + // Mutating our copy must not bleed into the store or its sibling. + tokio::fs::write(&link, b"patched").await.unwrap(); + assert_eq!(tokio::fs::read(&store).await.unwrap(), b"shared bytes"); + assert_eq!(tokio::fs::read(&sibling).await.unwrap(), b"shared bytes"); + + // No stage litter survives the successful break. + assert_eq!(leftover_stage_count(dir.path()), 0); + } + + /// Success-path litter check: neither the symlink break nor the + /// hardlink break may leave a `.socket-cow-*` stage file behind. + #[cfg(unix)] + #[tokio::test] + async fn break_leaves_no_stage_litter() { + let dir = tempfile::tempdir().unwrap(); + + let target = dir.path().join("t.txt"); + tokio::fs::write(&target, b"x").await.unwrap(); + let link = dir.path().join("l.txt"); + tokio::fs::symlink(&target, &link).await.unwrap(); + break_hardlink_if_needed(&link).await.unwrap(); + + let a = dir.path().join("a.txt"); + tokio::fs::write(&a, b"y").await.unwrap(); + let b = dir.path().join("b.txt"); + tokio::fs::hard_link(&a, &b).await.unwrap(); + break_hardlink_if_needed(&b).await.unwrap(); + + assert_eq!(leftover_stage_count(dir.path()), 0); + } + + /// Idempotency: breaking a symlink yields a private regular file, + /// and a second call on the now-regular path is a clean + /// `AlreadyPrivate` no-op (no re-break, no litter). + #[cfg(unix)] + #[tokio::test] + async fn idempotent_after_breaking_symlink() { + let dir = tempfile::tempdir().unwrap(); + let target = dir.path().join("store.txt"); + let link = dir.path().join("link.txt"); + tokio::fs::write(&target, b"bytes").await.unwrap(); + tokio::fs::symlink(&target, &link).await.unwrap(); + + assert_eq!( + break_hardlink_if_needed(&link).await.unwrap(), + CowAction::BrokeSymlink + ); + assert_eq!( + break_hardlink_if_needed(&link).await.unwrap(), + CowAction::AlreadyPrivate + ); + assert_eq!(leftover_stage_count(dir.path()), 0); + } + /// Idempotency: calling twice in a row on a regular file is fine /// and reports `AlreadyPrivate` both times. #[tokio::test] diff --git a/crates/socket-patch-core/src/patch/diff.rs b/crates/socket-patch-core/src/patch/diff.rs index e9b1b7dc..47d873a9 100644 --- a/crates/socket-patch-core/src/patch/diff.rs +++ b/crates/socket-patch-core/src/patch/diff.rs @@ -7,13 +7,36 @@ use qbsdiff::Bspatch; +/// Upper bound on how many bytes we pre-reserve for the patched output. +/// +/// `Bspatch::hint_target_size()` returns the target size read verbatim from +/// the bsdiff header (bytes 24..32). qbsdiff's parser validates the control +/// and delta block lengths against the actual payload but never validates +/// this field — so a malformed or hostile delta can claim an arbitrary +/// target size (up to `i64::MAX`) while carrying only a few bytes of data. +/// +/// Feeding that value straight into `Vec::with_capacity` lets a tiny delta +/// request a multi-exabyte reservation, which either panics with "capacity +/// overflow" or aborts the process via the allocator. Neither is something +/// the caller can recover from, so it breaks the never-panic-on-bad-input +/// contract the patch engine depends on (see the tests below). +/// +/// The reservation is a pure optimization: `apply` is driven entirely by the +/// control stream and grows the output `Vec` on demand as it writes, so +/// clamping the hint never changes the result — it only bounds the number of +/// reallocations for legitimately large files. +const MAX_PREALLOC_BYTES: u64 = 64 * 1024 * 1024; // 64 MiB + /// Apply a bsdiff delta to `before` and return the resulting bytes. /// /// Returns an `std::io::Error` when the delta is malformed or applying it /// fails (for example, the delta was produced from a different source). pub fn apply_diff(before: &[u8], delta: &[u8]) -> Result, std::io::Error> { let patcher = Bspatch::new(delta)?; - let mut out = Vec::with_capacity(patcher.hint_target_size() as usize); + // Clamp the attacker-controlled size hint: a corrupt/hostile header must + // not be able to turn a small delta into a process-killing allocation. + let prealloc = patcher.hint_target_size().min(MAX_PREALLOC_BYTES) as usize; + let mut out = Vec::with_capacity(prealloc); patcher.apply(before, std::io::Cursor::new(&mut out))?; Ok(out) } @@ -85,4 +108,47 @@ mod tests { // Result may or may not equal target — what matters is no panic. let _ = apply_diff(src_b, &delta); } + + #[test] + fn test_apply_diff_forged_oversize_header_is_safe() { + // Regression: `apply_diff` used to feed `hint_target_size()` straight + // into `Vec::with_capacity`. That field is the bsdiff header's target + // size (little-endian bytes 24..32) and is NOT validated by qbsdiff + // against the real payload, so a corrupt/hostile delta can claim an + // enormous size. A multi-exabyte `with_capacity` aborts the process + // (allocator failure) or panics with "capacity overflow" — neither is + // recoverable, which would let a single bad patch take the tool down. + // + // We build a genuine, small delta and then overwrite only the target + // size field with ~1.15 EiB. Because `apply` is driven by the control + // stream and ignores the hint, the clamp lets the patch still produce + // the correct bytes instead of dying on the allocation. + let before = b"the quick brown fox jumps over the lazy dog"; + let after = b"the quick brown cat jumps over the lazy dog"; + let mut forged = make_delta(before, after); + assert!(forged.len() >= 32, "delta must contain a full header"); + // Stay positive (top bit clear) so qbsdiff decodes it as a large + // unsigned size rather than a negative offset. + let huge: u64 = 1 << 60; + forged[24..32].copy_from_slice(&huge.to_le_bytes()); + + let result = apply_diff(before, &forged).expect("clamped apply must succeed"); + assert_eq!( + result, after, + "forging the size hint must not corrupt output" + ); + } + + #[test] + fn test_apply_diff_capacity_hint_is_clamped() { + // Pin the clamp itself so the bound can't silently regress back to an + // unbounded reservation. The output capacity is never reserved beyond + // MAX_PREALLOC_BYTES regardless of what the header claims. + let huge_hint: u64 = u64::MAX; + let clamped = huge_hint.min(MAX_PREALLOC_BYTES) as usize; + assert_eq!(clamped, MAX_PREALLOC_BYTES as usize); + // A modest, honest hint passes through untouched. + let small_hint: u64 = 4096; + assert_eq!(small_hint.min(MAX_PREALLOC_BYTES) as usize, 4096); + } } diff --git a/crates/socket-patch-core/src/patch/file_hash.rs b/crates/socket-patch-core/src/patch/file_hash.rs index a9dc362d..8afd40fd 100644 --- a/crates/socket-patch-core/src/patch/file_hash.rs +++ b/crates/socket-patch-core/src/patch/file_hash.rs @@ -4,17 +4,42 @@ use crate::hash::git_sha256::compute_git_sha256_from_reader; /// Compute Git-compatible SHA256 hash of file contents using streaming. /// -/// Gets the file size first, then streams the file through the hasher -/// without loading the entire file into memory. +/// Opens the file *once* and derives the size from that open handle (an +/// `fstat`), then streams the same handle through the hasher without loading +/// the entire file into memory. +/// +/// Deriving the size from the open file descriptor — rather than `stat`-ing the +/// path separately and then re-opening it — is what makes this safe under +/// concurrent mutation. The patch engine hashes files that other processes (or +/// an attacker) may rename/replace at any moment. If we measured the size of +/// one path resolution and read the bytes of another, a swap to a *same-sized* +/// file would slip past the size-mismatch guard in +/// [`compute_git_sha256_from_reader`] and produce a hash whose Git header (the +/// size) and body came from different inodes. Reading both from the same `fd` +/// makes that impossible. +/// +/// Only regular files are accepted. Following a path to a directory or a +/// special file (FIFO, device, …) and hashing it is never meaningful here, and +/// on some platforms a directory can read as zero bytes — which would otherwise +/// be silently reported as the empty-blob hash. pub async fn compute_file_git_sha256(filepath: impl AsRef) -> Result { let filepath = filepath.as_ref(); - // Get file size first - let metadata = tokio::fs::metadata(filepath).await?; - let file_size = metadata.len(); - - // Open file for streaming read + // Open the file once; everything below operates on this single descriptor. let file = tokio::fs::File::open(filepath).await?; + + // Size comes from the open handle (fstat), so it and the bytes we hash are + // guaranteed to refer to the same inode even if the path is replaced. + let metadata = file.metadata().await?; + + if !metadata.is_file() { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + format!("git sha256: {} is not a regular file", filepath.display()), + )); + } + + let file_size = metadata.len(); let reader = tokio::io::BufReader::new(file); compute_git_sha256_from_reader(file_size, reader).await @@ -72,4 +97,80 @@ mod tests { assert_eq!(file_hash, bytes_hash); } + + /// A directory must be rejected with an error, not silently hashed as the + /// empty blob. On some platforms reading a directory descriptor yields zero + /// bytes; without the `is_file` guard that would return the hash of `""` + /// and the patch engine would compare a real file's expected hash against a + /// directory's bogus one. + #[tokio::test] + async fn test_compute_file_git_sha256_rejects_directory() { + let dir = tempfile::tempdir().unwrap(); + + let result = compute_file_git_sha256(dir.path()).await; + let err = result.expect_err("hashing a directory must error"); + assert_eq!(err.kind(), std::io::ErrorKind::InvalidInput); + + // It must specifically NOT have returned the empty-blob hash. + let empty_blob = compute_git_sha256_from_bytes(b""); + assert_ne!( + err.to_string(), + empty_blob, + "directory should error, never produce the empty-blob hash" + ); + } + + /// A symlink to a regular file follows through `File::open` and hashes the + /// target's contents (the size also comes from the resolved file via + /// fstat), matching a direct byte hash of that content. + #[cfg(unix)] + #[tokio::test] + async fn test_compute_file_git_sha256_follows_symlink_to_file() { + let dir = tempfile::tempdir().unwrap(); + let target = dir.path().join("target.txt"); + let link = dir.path().join("link.txt"); + + let content = b"symlinked content"; + tokio::fs::write(&target, content).await.unwrap(); + tokio::fs::symlink(&target, &link).await.unwrap(); + + let link_hash = compute_file_git_sha256(&link).await.unwrap(); + let bytes_hash = compute_git_sha256_from_bytes(content); + + assert_eq!(link_hash, bytes_hash); + } + + /// A symlink whose target is a directory must be rejected, exactly like a + /// directory passed directly — the `is_file` check operates on the resolved + /// open handle. + #[cfg(unix)] + #[tokio::test] + async fn test_compute_file_git_sha256_rejects_symlink_to_directory() { + let dir = tempfile::tempdir().unwrap(); + let subdir = dir.path().join("subdir"); + let link = dir.path().join("dirlink"); + + tokio::fs::create_dir(&subdir).await.unwrap(); + tokio::fs::symlink(&subdir, &link).await.unwrap(); + + let result = compute_file_git_sha256(&link).await; + let err = result.expect_err("symlink to a directory must error"); + assert_eq!(err.kind(), std::io::ErrorKind::InvalidInput); + } + + /// A broken symlink (dangling target) must surface the open error rather + /// than panicking or returning a hash. + #[cfg(unix)] + #[tokio::test] + async fn test_compute_file_git_sha256_broken_symlink_errors() { + let dir = tempfile::tempdir().unwrap(); + let link = dir.path().join("dangling"); + + tokio::fs::symlink(dir.path().join("does-not-exist"), &link) + .await + .unwrap(); + + let result = compute_file_git_sha256(&link).await; + assert!(result.is_err(), "dangling symlink must error"); + } } diff --git a/crates/socket-patch-core/src/patch/package.rs b/crates/socket-patch-core/src/patch/package.rs index c99d91de..f25f9250 100644 --- a/crates/socket-patch-core/src/patch/package.rs +++ b/crates/socket-patch-core/src/patch/package.rs @@ -93,6 +93,18 @@ pub fn read_archive_to_map(archive_path: &Path) -> Result Result Result &str { /// /// A file is ready for rollback if: /// 1. The file exists on disk. -/// 2. The before-hash blob exists in the blobs directory. -/// 3. Its current hash matches the afterHash (patched state). +/// 2. Its current hash matches the afterHash (patched state). +/// 3. The before-hash blob exists in the blobs directory. +/// +/// A file whose current hash already matches the beforeHash is reported +/// `AlreadyOriginal` *before* the blob is checked — a finished rollback is +/// a no-op and must not be blocked by a missing (e.g. GC'd) blob it would +/// never need to read. pub async fn verify_file_rollback( pkg_path: &Path, file_name: &str, @@ -116,22 +121,6 @@ pub async fn verify_file_rollback( }; } - // Check if before blob exists (required for rollback) - let before_blob_path = blobs_path.join(&file_info.before_hash); - if tokio::fs::metadata(&before_blob_path).await.is_err() { - return VerifyRollbackResult { - file: file_name.to_string(), - status: VerifyRollbackStatus::MissingBlob, - message: Some(format!( - "Before blob not found: {}. Re-download the patch to enable rollback.", - file_info.before_hash - )), - current_hash: None, - expected_hash: None, - target_hash: Some(file_info.before_hash.clone()), - }; - } - // Compute current hash let current_hash = match compute_file_git_sha256(&filepath).await { Ok(h) => h, @@ -147,7 +136,11 @@ pub async fn verify_file_rollback( } }; - // Check if already in original state + // Check if already in original state. This must be tested BEFORE the + // before-blob existence check: a file that is already rolled back + // needs no blob to restore, so a garbage-collected blob must not turn + // a finished, no-op rollback into a spurious `MissingBlob` failure + // (which would otherwise block the whole package's rollback). if current_hash == file_info.before_hash { return VerifyRollbackResult { file: file_name.to_string(), @@ -159,6 +152,22 @@ pub async fn verify_file_rollback( }; } + // Check if before blob exists (required to actually restore the file) + let before_blob_path = blobs_path.join(&file_info.before_hash); + if tokio::fs::metadata(&before_blob_path).await.is_err() { + return VerifyRollbackResult { + file: file_name.to_string(), + status: VerifyRollbackStatus::MissingBlob, + message: Some(format!( + "Before blob not found: {}. Re-download the patch to enable rollback.", + file_info.before_hash + )), + current_hash: Some(current_hash), + expected_hash: None, + target_hash: Some(file_info.before_hash.clone()), + }; + } + // Check if matches expected patched hash (afterHash) if current_hash != file_info.after_hash { return VerifyRollbackResult { @@ -183,46 +192,44 @@ pub async fn verify_file_rollback( } } -/// Rollback a single file to its original state. -/// Writes the original content and verifies the resulting hash. +/// Rollback a single file to its original state by writing +/// `original_content` (whose Git SHA256 must equal `expected_hash`). +/// +/// This delegates to [`apply_file_patch`](crate::patch::apply::apply_file_patch), +/// the hardened write path shared with apply. Rolling a file back is the +/// exact same operation as patching it forward — "safely overwrite this +/// file with these hash-verified bytes" — so it must get the exact same +/// guarantees: +/// +/// * **Atomic** — the bytes are staged in the parent directory, fsync'd, +/// and `rename(2)`d over the target. A crash or `ENOSPC` mid-write +/// leaves either the old or the new content, never a truncated file. +/// * **Copy-on-write safe** — a symlink/hardlink into a shared content +/// store (pnpm, Nix, the Go module cache) is broken into a private +/// inode first, so a rollback never bleeds into a sibling project's +/// copy or the store entry. +/// * **Validate-before-write** — `original_content` is hash-checked in +/// memory *before* any disk write, so a corrupt blob is refused +/// instead of being committed over the file and only then flagged. +/// * **Permission-faithful** — the file's mode + uid/gid are restored +/// afterward. Because apply preserves a file's original permissions +/// when patching, the on-disk patched file already carries the +/// pre-patch mode (e.g. a read-only `0o444` Go-cache source), and +/// that exact mode is re-applied to the rolled-back inode. +/// +/// The previous implementation used a bare in-place `tokio::fs::write`, +/// which had none of these properties: it could corrupt a hardlinked +/// sibling, leave a half-written file on a crash, write a bad blob over +/// the file *before* discovering the hash mismatch, and leave a +/// read-only file writable. pub async fn rollback_file_patch( pkg_path: &Path, file_name: &str, original_content: &[u8], expected_hash: &str, ) -> Result<(), std::io::Error> { - let normalized = normalize_file_path(file_name); - let filepath = pkg_path.join(normalized); - - // Make file writable if it is read-only (e.g. Go module cache) - #[cfg(unix)] - if let Ok(meta) = tokio::fs::metadata(&filepath).await { - use std::os::unix::fs::PermissionsExt; - let perms = meta.permissions(); - if perms.readonly() { - let mode = perms.mode(); - let mut new_perms = perms; - new_perms.set_mode(mode | 0o200); - tokio::fs::set_permissions(&filepath, new_perms).await?; - } - } - - // Write the original content - tokio::fs::write(&filepath, original_content).await?; - - // Verify the hash after writing - let verify_hash = compute_file_git_sha256(&filepath).await?; - if verify_hash != expected_hash { - return Err(std::io::Error::new( - std::io::ErrorKind::InvalidData, - format!( - "Hash verification failed after rollback. Expected: {}, Got: {}", - expected_hash, verify_hash - ), - )); - } - - Ok(()) + crate::patch::apply::apply_file_patch(pkg_path, file_name, original_content, expected_hash) + .await } /// Verify and rollback patches for a single package. @@ -249,8 +256,7 @@ pub async fn rollback_package_patch( // First, verify all files for (file_name, file_info) in files { - let verify_result = - verify_file_rollback(pkg_path, file_name, file_info, blobs_path).await; + let verify_result = verify_file_rollback(pkg_path, file_name, file_info, blobs_path).await; // If any file has issues (not ready and not already original), we can't proceed if verify_result.status != VerifyRollbackStatus::Ready @@ -260,10 +266,7 @@ pub async fn rollback_package_patch( .message .clone() .unwrap_or_else(|| format!("{:?}", verify_result.status)); - result.error = Some(format!( - "Cannot rollback: {} - {}", - verify_result.file, msg - )); + result.error = Some(format!("Cannot rollback: {} - {}", verify_result.file, msg)); result.files_verified.push(verify_result); return result; } @@ -289,10 +292,7 @@ pub async fn rollback_package_patch( // Rollback files that need it for (file_name, file_info) in files { - let verify_result = result - .files_verified - .iter() - .find(|v| v.file == *file_name); + let verify_result = result.files_verified.iter().find(|v| v.file == *file_name); if let Some(vr) = verify_result { if vr.status == VerifyRollbackStatus::AlreadyOriginal { continue; @@ -303,7 +303,16 @@ pub async fn rollback_package_patch( if file_info.before_hash.is_empty() { let normalized = normalize_file_path(file_name); let filepath = pkg_path.join(normalized); - if let Err(e) = tokio::fs::remove_file(&filepath).await { + // Unlinking a directory entry requires write permission on the + // *parent directory*, not the file. Go's module cache marks + // package directories read-only (0o555), so — exactly as the + // apply write path does — temporarily grant owner-write on the + // parent and restore its exact mode afterward, whether the + // delete succeeds or fails. + let dir_guard = crate::patch::apply::DirWriteGuard::acquire(filepath.parent()).await; + let remove_result = tokio::fs::remove_file(&filepath).await; + dir_guard.restore().await; + if let Err(e) = remove_result { result.error = Some(format!("Failed to delete {}: {}", file_name, e)); return result; } @@ -325,9 +334,13 @@ pub async fn rollback_package_patch( }; // Rollback the file - if let Err(e) = - rollback_file_patch(pkg_path, file_name, &original_content, &file_info.before_hash) - .await + if let Err(e) = rollback_file_patch( + pkg_path, + file_name, + &original_content, + &file_info.before_hash, + ) + .await { result.error = Some(e.to_string()); return result; @@ -355,9 +368,13 @@ mod tests { after_hash: "bbb".to_string(), }; - let result = - verify_file_rollback(pkg_dir.path(), "nonexistent.js", &file_info, blobs_dir.path()) - .await; + let result = verify_file_rollback( + pkg_dir.path(), + "nonexistent.js", + &file_info, + blobs_dir.path(), + ) + .await; assert_eq!(result.status, VerifyRollbackStatus::NotFound); } @@ -467,10 +484,7 @@ mod tests { let result = verify_file_rollback(pkg_dir.path(), "index.js", &file_info, blobs_dir.path()).await; assert_eq!(result.status, VerifyRollbackStatus::HashMismatch); - assert!(result - .message - .unwrap() - .contains("modified after patching")); + assert!(result.message.unwrap().contains("modified after patching")); } #[tokio::test] @@ -508,6 +522,193 @@ mod tests { .contains("Hash verification failed")); } + /// Validate-before-write: a corrupt/mismatched rollback blob must be + /// refused *before* any disk write, leaving the on-disk file + /// byte-identical to its pre-call (patched) state and dropping no + /// `.socket-stage-*` litter. Regression: the old in-place + /// `tokio::fs::write` committed the bad bytes over the file and only + /// then hashed, leaving the file corrupted on the error path. + #[tokio::test] + async fn test_rollback_file_patch_hash_mismatch_leaves_file_intact() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("index.js"); + tokio::fs::write(&path, b"patched bytes on disk") + .await + .unwrap(); + + let result = + rollback_file_patch(dir.path(), "index.js", b"original content", "wrong_hash").await; + assert!(result.is_err()); + + // The file must NOT have been overwritten with the bad blob. + assert_eq!( + tokio::fs::read(&path).await.unwrap(), + b"patched bytes on disk" + ); + + // No staged temp file leaked into the directory. + let mut entries = tokio::fs::read_dir(dir.path()).await.unwrap(); + while let Some(entry) = entries.next_entry().await.unwrap() { + let name = entry.file_name().to_string_lossy().to_string(); + assert!( + !name.starts_with(".socket-stage-") && !name.starts_with(".socket-cow-"), + "stage/cow litter leaked: {name}" + ); + } + } + + /// Copy-on-write safety: rolling back a file that shares an inode + /// with a sibling (the pnpm / Go-cache hardlink case) must only + /// restore *our* copy. The sibling — another project's view or the + /// shared store entry — must keep its bytes. Regression: the old + /// in-place write mutated the shared inode and corrupted the sibling. + #[cfg(unix)] + #[tokio::test] + async fn test_rollback_file_patch_does_not_propagate_to_hardlinked_sibling() { + let dir = tempfile::tempdir().unwrap(); + let project = dir.path().join("project").join("foo.js"); + let sibling = dir.path().join("sibling.js"); + tokio::fs::create_dir_all(project.parent().unwrap()) + .await + .unwrap(); + + // Both paths point at the same inode, both currently "patched". + tokio::fs::write(&sibling, b"patched bytes").await.unwrap(); + tokio::fs::hard_link(&sibling, &project).await.unwrap(); + + let original = b"original bytes"; + let original_hash = compute_git_sha256_from_bytes(original); + rollback_file_patch( + project.parent().unwrap(), + "foo.js", + original, + &original_hash, + ) + .await + .unwrap(); + + // Our project view is rolled back... + assert_eq!(tokio::fs::read(&project).await.unwrap(), original); + // ...but the sibling inode is untouched. + assert_eq!(tokio::fs::read(&sibling).await.unwrap(), b"patched bytes"); + } + + /// Permission fidelity: rolling back a read-only file (Go module + /// cache marks sources `0o444`) must restore the original content + /// AND leave the file read-only afterward. Regression: the old code + /// relaxed the mode to `0o644` to write and never restored it, + /// silently leaving rolled-back cache files writable. + #[cfg(unix)] + #[tokio::test] + async fn test_rollback_file_patch_preserves_readonly_mode() { + use std::os::unix::fs::PermissionsExt; + + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("index.js"); + let original = b"original content"; + let original_hash = compute_git_sha256_from_bytes(original); + + tokio::fs::write(&path, b"patched content").await.unwrap(); + // Read-only patched file, as apply would have left a Go-cache source. + tokio::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o444)) + .await + .unwrap(); + + rollback_file_patch(dir.path(), "index.js", original, &original_hash) + .await + .unwrap(); + + assert_eq!(tokio::fs::read(&path).await.unwrap(), original); + let mode = tokio::fs::metadata(&path) + .await + .unwrap() + .permissions() + .mode() + & 0o7777; + assert_eq!( + mode, 0o444, + "rollback must restore the read-only mode, not leave the file writable" + ); + } + + /// End-to-end rollback against a fully read-only package directory + /// (Go cache: `0o444` files inside a `0o555` directory). The atomic + /// stage+rename path must temporarily grant directory write, restore + /// content, and put the directory mode back. Regression: the old + /// in-place write could not stage inside a read-only directory. + #[cfg(unix)] + #[tokio::test] + async fn test_rollback_package_patch_in_readonly_dir() { + use std::os::unix::fs::PermissionsExt; + + let pkg_dir = tempfile::tempdir().unwrap(); + let blobs_dir = tempfile::tempdir().unwrap(); + + let original = b"original content"; + let patched = b"patched content"; + let before_hash = compute_git_sha256_from_bytes(original); + let after_hash = compute_git_sha256_from_bytes(patched); + + tokio::fs::write(pkg_dir.path().join("index.js"), patched) + .await + .unwrap(); + tokio::fs::write(blobs_dir.path().join(&before_hash), original) + .await + .unwrap(); + // Lock the file and directory down, Go-cache style. + tokio::fs::set_permissions( + pkg_dir.path().join("index.js"), + std::fs::Permissions::from_mode(0o444), + ) + .await + .unwrap(); + tokio::fs::set_permissions(pkg_dir.path(), std::fs::Permissions::from_mode(0o555)) + .await + .unwrap(); + + let mut files = HashMap::new(); + files.insert( + "index.js".to_string(), + PatchFileInfo { + before_hash, + after_hash, + }, + ); + + let result = rollback_package_patch( + "pkg:golang/example.com/x@1.0.0", + pkg_dir.path(), + &files, + blobs_dir.path(), + false, + ) + .await; + + assert!(result.success, "expected success: {:?}", result.error); + assert_eq!(result.files_rolled_back.len(), 1); + assert_eq!( + tokio::fs::read(pkg_dir.path().join("index.js")) + .await + .unwrap(), + original + ); + // Directory mode restored to exactly 0o555. + assert_eq!( + tokio::fs::metadata(pkg_dir.path()) + .await + .unwrap() + .permissions() + .mode() + & 0o7777, + 0o555, + ); + + // Re-grant write so the TempDir can clean itself up. + tokio::fs::set_permissions(pkg_dir.path(), std::fs::Permissions::from_mode(0o755)) + .await + .unwrap(); + } + #[tokio::test] async fn test_rollback_package_patch_success() { let pkg_dir = tempfile::tempdir().unwrap(); @@ -551,7 +752,9 @@ mod tests { assert!(result.error.is_none()); // Verify file was restored - let content = tokio::fs::read(pkg_dir.path().join("index.js")).await.unwrap(); + let content = tokio::fs::read(pkg_dir.path().join("index.js")) + .await + .unwrap(); assert_eq!(content, original); } @@ -594,7 +797,9 @@ mod tests { assert_eq!(result.files_rolled_back.len(), 0); // dry run // File should still be patched - let content = tokio::fs::read(pkg_dir.path().join("index.js")).await.unwrap(); + let content = tokio::fs::read(pkg_dir.path().join("index.js")) + .await + .unwrap(); assert_eq!(content, patched); } @@ -666,4 +871,231 @@ mod tests { assert!(!result.success); assert!(result.error.is_some()); } + + /// Regression (blob-vs-already-original ordering): a file already at + /// its original (`beforeHash`) state must verify as `AlreadyOriginal` + /// even when the before-blob is gone. A finished rollback needs no + /// blob to restore, so a GC'd blob must NOT downgrade it to + /// `MissingBlob`. Before the fix the blob check ran first and a + /// re-run rollback (or one after blob cleanup) reported a spurious + /// missing-blob failure. + #[tokio::test] + async fn test_verify_file_rollback_already_original_without_blob() { + let pkg_dir = tempfile::tempdir().unwrap(); + let blobs_dir = tempfile::tempdir().unwrap(); + + let original = b"original content"; + let before_hash = compute_git_sha256_from_bytes(original); + + // File is already at its original state, but NO before-blob exists. + tokio::fs::write(pkg_dir.path().join("index.js"), original) + .await + .unwrap(); + + let file_info = PatchFileInfo { + before_hash, + after_hash: "some_after_hash".to_string(), + }; + + let result = + verify_file_rollback(pkg_dir.path(), "index.js", &file_info, blobs_dir.path()).await; + assert_eq!(result.status, VerifyRollbackStatus::AlreadyOriginal); + } + + /// Package-level consequence of the ordering fix: an already-original + /// file whose blob was GC'd must not block its sibling's real + /// rollback. The whole package should succeed and the ready file + /// should be restored. Before the fix the missing blob on the + /// no-op file aborted the entire package rollback. + #[tokio::test] + async fn test_rollback_package_patch_already_original_missing_blob_does_not_block() { + let pkg_dir = tempfile::tempdir().unwrap(); + let blobs_dir = tempfile::tempdir().unwrap(); + + // File A: already at original state; its before-blob is absent. + let a_original = b"a original"; + let a_before = compute_git_sha256_from_bytes(a_original); + tokio::fs::write(pkg_dir.path().join("a.js"), a_original) + .await + .unwrap(); + + // File B: still patched; before-blob present, ready to roll back. + let b_original = b"b original"; + let b_patched = b"b patched"; + let b_before = compute_git_sha256_from_bytes(b_original); + let b_after = compute_git_sha256_from_bytes(b_patched); + tokio::fs::write(pkg_dir.path().join("b.js"), b_patched) + .await + .unwrap(); + tokio::fs::write(blobs_dir.path().join(&b_before), b_original) + .await + .unwrap(); + + let mut files = HashMap::new(); + files.insert( + "a.js".to_string(), + PatchFileInfo { + before_hash: a_before, + after_hash: "a_after".to_string(), + }, + ); + files.insert( + "b.js".to_string(), + PatchFileInfo { + before_hash: b_before, + after_hash: b_after, + }, + ); + + let result = rollback_package_patch( + "pkg:npm/test@1.0.0", + pkg_dir.path(), + &files, + blobs_dir.path(), + false, + ) + .await; + + assert!(result.success, "expected success: {:?}", result.error); + assert_eq!(result.files_rolled_back, vec!["b.js".to_string()]); + assert_eq!( + tokio::fs::read(pkg_dir.path().join("b.js")).await.unwrap(), + b_original + ); + // A was already original and untouched. + assert_eq!( + tokio::fs::read(pkg_dir.path().join("a.js")).await.unwrap(), + a_original + ); + } + + /// New-file rollback (empty `beforeHash`): the file the patch added + /// is deleted when its content still matches `afterHash`. + #[tokio::test] + async fn test_rollback_package_patch_new_file_deleted() { + let pkg_dir = tempfile::tempdir().unwrap(); + let blobs_dir = tempfile::tempdir().unwrap(); + + let added = b"file added by the patch\n"; + let after_hash = compute_git_sha256_from_bytes(added); + let path = pkg_dir.path().join("added.js"); + tokio::fs::write(&path, added).await.unwrap(); + + let mut files = HashMap::new(); + files.insert( + "added.js".to_string(), + PatchFileInfo { + before_hash: String::new(), + after_hash, + }, + ); + + let result = rollback_package_patch( + "pkg:npm/test@1.0.0", + pkg_dir.path(), + &files, + blobs_dir.path(), + false, + ) + .await; + + assert!(result.success, "expected success: {:?}", result.error); + assert_eq!(result.files_rolled_back, vec!["added.js".to_string()]); + assert!( + tokio::fs::metadata(&path).await.is_err(), + "the patch-added file must be deleted on rollback" + ); + } + + /// New-file rollback is a no-op (success, nothing deleted) when the + /// added file is already gone — e.g. the operator removed it by hand. + #[tokio::test] + async fn test_rollback_package_patch_new_file_already_gone() { + let pkg_dir = tempfile::tempdir().unwrap(); + let blobs_dir = tempfile::tempdir().unwrap(); + + let mut files = HashMap::new(); + files.insert( + "added.js".to_string(), + PatchFileInfo { + before_hash: String::new(), + after_hash: compute_git_sha256_from_bytes(b"whatever"), + }, + ); + + let result = rollback_package_patch( + "pkg:npm/test@1.0.0", + pkg_dir.path(), + &files, + blobs_dir.path(), + false, + ) + .await; + + assert!(result.success, "expected success: {:?}", result.error); + assert_eq!(result.files_rolled_back.len(), 0); + } + + /// Regression (read-only-dir delete): deleting a patch-added file + /// requires write permission on the *parent directory*. A Go-cache + /// style read-only directory (0o555) must be temporarily relaxed for + /// the unlink and restored to its exact prior mode afterward. Before + /// the fix the bare `remove_file` failed with EACCES. + #[cfg(unix)] + #[tokio::test] + async fn test_rollback_package_patch_new_file_delete_in_readonly_dir() { + use std::os::unix::fs::PermissionsExt; + + let pkg_dir = tempfile::tempdir().unwrap(); + let blobs_dir = tempfile::tempdir().unwrap(); + + let added = b"added by patch\n"; + let after_hash = compute_git_sha256_from_bytes(added); + let path = pkg_dir.path().join("added.js"); + tokio::fs::write(&path, added).await.unwrap(); + // Read-only file inside a read-only directory (Go cache layout). + tokio::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o444)) + .await + .unwrap(); + tokio::fs::set_permissions(pkg_dir.path(), std::fs::Permissions::from_mode(0o555)) + .await + .unwrap(); + + let mut files = HashMap::new(); + files.insert( + "added.js".to_string(), + PatchFileInfo { + before_hash: String::new(), + after_hash, + }, + ); + + let result = rollback_package_patch( + "pkg:golang/example.com/x@1.0.0", + pkg_dir.path(), + &files, + blobs_dir.path(), + false, + ) + .await; + + assert!(result.success, "expected success: {:?}", result.error); + assert_eq!(result.files_rolled_back, vec!["added.js".to_string()]); + assert!(tokio::fs::metadata(&path).await.is_err()); + // Directory mode restored to exactly 0o555. + assert_eq!( + tokio::fs::metadata(pkg_dir.path()) + .await + .unwrap() + .permissions() + .mode() + & 0o7777, + 0o555, + ); + + // Re-grant write so the TempDir can clean itself up. + tokio::fs::set_permissions(pkg_dir.path(), std::fs::Permissions::from_mode(0o755)) + .await + .unwrap(); + } } diff --git a/crates/socket-patch-core/src/patch/sidecars/cargo.rs b/crates/socket-patch-core/src/patch/sidecars/cargo.rs index a0434052..9ae48570 100644 --- a/crates/socket-patch-core/src/patch/sidecars/cargo.rs +++ b/crates/socket-patch-core/src/patch/sidecars/cargo.rs @@ -31,7 +31,8 @@ use std::path::Path; use serde_json::{Map, Value}; use sha2::{Digest, Sha256}; -use crate::patch::apply::normalize_file_path; +use crate::hash::git_sha256::compute_git_sha256_from_bytes; +use crate::patch::apply::{apply_file_patch, normalize_file_path}; use super::{SidecarError, SidecarFile, SidecarFileAction, SidecarPayload}; @@ -66,11 +67,10 @@ pub(crate) async fn fixup( } }; - let mut json: Value = - serde_json::from_str(&raw).map_err(|e| SidecarError::Malformed { - path: checksum_path.display().to_string(), - detail: e.to_string(), - })?; + let mut json: Value = serde_json::from_str(&raw).map_err(|e| SidecarError::Malformed { + path: checksum_path.display().to_string(), + detail: e.to_string(), + })?; let files = json .get_mut("files") @@ -96,12 +96,39 @@ pub(crate) async fn fixup( .expect("serializing a Value just deserialized from valid JSON must succeed"); out.push(b'\n'); - tokio::fs::write(&checksum_path, out).await.map_err(|source| { - SidecarError::Io { + // Commit through the hardened shared write path — NOT a bare + // `tokio::fs::write`. The checksum file lives inside a Cargo + // registry/vendor `-/` tree, which Cargo marks + // read-only (files `0o444` inside `0o555` dirs) for tamper + // detection. A plain in-place truncating write has three defects + // there, all of which the rest of the patch engine was hardened + // against (see `apply::apply_file_patch` and `rollback`): + // + // 1. **Read-only-hostile.** Opening the existing `0o444` file + // `O_TRUNC` fails `EACCES`, so the fixup errored out exactly + // in the real-registry case it exists to handle — leaving the + // checksum stale-patched and every future `cargo build` of the + // crate refusing the (correctly) patched sources. + // 2. **Non-atomic.** A crash / `ENOSPC` mid-write leaves a + // truncated, unparseable `.cargo-checksum.json` — strictly + // worse than a stale hash, because cargo can no longer even + // parse it to report a mismatch; the crate is wedged. + // 3. **Copy-on-write-unsafe.** A vendored tree hardlinked into a + // shared store would have its sibling mutated in place. + // + // `apply_file_patch` stages a sibling, fsyncs, and `rename(2)`s + // atomically; breaks CoW inodes; relaxes then restores BOTH the + // file's and the directory's read-only modes; and verifies the + // bytes that landed. The `expected_hash` is just the digest of the + // bytes we hand it (a self-check) — the file already exists, so + // its original mode is snapshotted and restored bit-for-bit. + let expected_hash = compute_git_sha256_from_bytes(&out); + apply_file_patch(pkg_path, CHECKSUM_FILE, &out, &expected_hash) + .await + .map_err(|source| SidecarError::Io { path: checksum_path.display().to_string(), source, - } - })?; + })?; Ok(Some(SidecarPayload { files: vec![SidecarFile { @@ -127,10 +154,12 @@ async fn update_entries( for file_name in patched { let normalized = normalize_file_path(file_name).to_string(); let on_disk = pkg_path.join(&normalized); - let hash = sha256_file(&on_disk).await.map_err(|source| SidecarError::Io { - path: on_disk.display().to_string(), - source, - })?; + let hash = sha256_file(&on_disk) + .await + .map_err(|source| SidecarError::Io { + path: on_disk.display().to_string(), + source, + })?; files.insert(normalized, Value::String(hash)); } Ok(()) @@ -174,7 +203,9 @@ mod tests { .await .unwrap(); // Write a file we do NOT patch — its hash stays stale. - tokio::fs::write(pkg.join("Cargo.toml"), b"unchanged").await.unwrap(); + tokio::fs::write(pkg.join("Cargo.toml"), b"unchanged") + .await + .unwrap(); // Pre-existing checksum file with bogus hashes for both. let starting = serde_json::json!({ @@ -200,7 +231,9 @@ mod tests { // Read back and assert. let post: serde_json::Value = serde_json::from_str( - &tokio::fs::read_to_string(pkg.join(CHECKSUM_FILE)).await.unwrap(), + &tokio::fs::read_to_string(pkg.join(CHECKSUM_FILE)) + .await + .unwrap(), ) .unwrap(); let files = post["files"].as_object().unwrap(); @@ -224,7 +257,9 @@ mod tests { let d = tempfile::tempdir().unwrap(); let pkg = d.path(); tokio::fs::create_dir_all(pkg.join("src")).await.unwrap(); - tokio::fs::write(pkg.join("src/new.rs"), b"brand new").await.unwrap(); + tokio::fs::write(pkg.join("src/new.rs"), b"brand new") + .await + .unwrap(); let starting = serde_json::json!({ "files": { @@ -242,7 +277,9 @@ mod tests { let _ = fixup(pkg, &["src/new.rs".to_string()]).await.unwrap(); let post: serde_json::Value = serde_json::from_str( - &tokio::fs::read_to_string(pkg.join(CHECKSUM_FILE)).await.unwrap(), + &tokio::fs::read_to_string(pkg.join(CHECKSUM_FILE)) + .await + .unwrap(), ) .unwrap(); let files = post["files"].as_object().unwrap(); @@ -260,7 +297,9 @@ mod tests { let d = tempfile::tempdir().unwrap(); let pkg = d.path(); tokio::fs::create_dir_all(pkg.join("src")).await.unwrap(); - tokio::fs::write(pkg.join("src/lib.rs"), b"patched").await.unwrap(); + tokio::fs::write(pkg.join("src/lib.rs"), b"patched") + .await + .unwrap(); let starting = serde_json::json!({ "files": { "src/lib.rs": "00".repeat(32) }, @@ -274,10 +313,14 @@ mod tests { .unwrap(); // Patch list uses the "package/" prefix. - let _ = fixup(pkg, &["package/src/lib.rs".to_string()]).await.unwrap(); + let _ = fixup(pkg, &["package/src/lib.rs".to_string()]) + .await + .unwrap(); let post: serde_json::Value = serde_json::from_str( - &tokio::fs::read_to_string(pkg.join(CHECKSUM_FILE)).await.unwrap(), + &tokio::fs::read_to_string(pkg.join(CHECKSUM_FILE)) + .await + .unwrap(), ) .unwrap(); assert_eq!( @@ -311,4 +354,186 @@ mod tests { .unwrap_err(); assert!(matches!(err, SidecarError::Malformed { .. })); } + + /// Regression (read-only checksum file): a real Cargo registry/vendor + /// tree marks `.cargo-checksum.json` read-only (`0o444`) for tamper + /// detection. The rewrite must still succeed — the hardened + /// stage+rename path relaxes the file's mode, swaps a fresh inode in + /// atomically, and restores the original `0o444` mode afterward. + /// Before the fix the bare in-place `tokio::fs::write` failed `EACCES` + /// here, leaving the checksum stale-patched and the crate unbuildable. + #[cfg(unix)] + #[tokio::test] + async fn rewrites_readonly_checksum_file_and_restores_mode() { + use std::os::unix::fs::PermissionsExt; + + let d = tempfile::tempdir().unwrap(); + let pkg = d.path(); + tokio::fs::create_dir_all(pkg.join("src")).await.unwrap(); + tokio::fs::write(pkg.join("src/lib.rs"), b"patched lib") + .await + .unwrap(); + + let starting = serde_json::json!({ + "files": { "src/lib.rs": "00".repeat(32) }, + "package": "stale", + }); + let checksum = pkg.join(CHECKSUM_FILE); + tokio::fs::write(&checksum, serde_json::to_string_pretty(&starting).unwrap()) + .await + .unwrap(); + // Lock the checksum file down exactly as Cargo would. + tokio::fs::set_permissions(&checksum, std::fs::Permissions::from_mode(0o444)) + .await + .unwrap(); + + let out = fixup(pkg, &["src/lib.rs".to_string()]).await.unwrap(); + assert!(out.is_some(), "read-only checksum must still be rewritten"); + + // The new hash landed... + let post: serde_json::Value = + serde_json::from_str(&tokio::fs::read_to_string(&checksum).await.unwrap()).unwrap(); + assert_eq!( + post["files"]["src/lib.rs"].as_str().unwrap(), + expected_sha256(b"patched lib") + ); + // ...and the original read-only mode was restored bit-for-bit. + let mode = tokio::fs::metadata(&checksum) + .await + .unwrap() + .permissions() + .mode() + & 0o7777; + assert_eq!( + mode, 0o444, + "checksum file must stay read-only after rewrite" + ); + } + + /// Regression (read-only package directory): Cargo also marks the + /// crate directory `0o555`. The atomic stage+rename needs write + /// permission on the *parent dir* to create its sibling stage file, + /// so the write path must temporarily grant directory write and + /// restore the exact `0o555` mode afterward. The bare write could + /// not stage inside a read-only directory at all. + #[cfg(unix)] + #[tokio::test] + async fn rewrites_inside_readonly_package_dir() { + use std::os::unix::fs::PermissionsExt; + + let d = tempfile::tempdir().unwrap(); + let pkg = d.path(); + tokio::fs::create_dir_all(pkg.join("src")).await.unwrap(); + tokio::fs::write(pkg.join("src/lib.rs"), b"patched lib") + .await + .unwrap(); + let checksum = pkg.join(CHECKSUM_FILE); + let starting = serde_json::json!({ + "files": { "src/lib.rs": "00".repeat(32) }, + "package": "x", + }); + tokio::fs::write(&checksum, serde_json::to_string_pretty(&starting).unwrap()) + .await + .unwrap(); + // Lock the directory down, Cargo-cache style. + tokio::fs::set_permissions(pkg, std::fs::Permissions::from_mode(0o555)) + .await + .unwrap(); + + let out = fixup(pkg, &["src/lib.rs".to_string()]).await; + + // Re-grant write so the TempDir can clean itself up regardless + // of the assertion outcome. + tokio::fs::set_permissions(pkg, std::fs::Permissions::from_mode(0o755)) + .await + .unwrap(); + + assert!( + out.expect("fixup in read-only dir must not error") + .is_some(), + "read-only package dir must still be rewritten", + ); + let post: serde_json::Value = + serde_json::from_str(&tokio::fs::read_to_string(&checksum).await.unwrap()).unwrap(); + assert_eq!( + post["files"]["src/lib.rs"].as_str().unwrap(), + expected_sha256(b"patched lib") + ); + } + + /// Atomicity hygiene: the stage+rename commit must leave no + /// `.socket-stage-*` litter in the package directory. + #[tokio::test] + async fn rewrite_leaves_no_stage_litter() { + let d = tempfile::tempdir().unwrap(); + let pkg = d.path(); + tokio::fs::create_dir_all(pkg.join("src")).await.unwrap(); + tokio::fs::write(pkg.join("src/lib.rs"), b"patched lib") + .await + .unwrap(); + let starting = serde_json::json!({ + "files": { "src/lib.rs": "00".repeat(32) }, + "package": "x", + }); + tokio::fs::write( + pkg.join(CHECKSUM_FILE), + serde_json::to_string_pretty(&starting).unwrap(), + ) + .await + .unwrap(); + + fixup(pkg, &["src/lib.rs".to_string()]).await.unwrap(); + + let mut entries = tokio::fs::read_dir(pkg).await.unwrap(); + while let Some(entry) = entries.next_entry().await.unwrap() { + let name = entry.file_name().to_string_lossy().to_string(); + assert!( + !name.starts_with(".socket-stage-") && !name.starts_with(".socket-cow-"), + "stage/cow litter leaked into package dir: {name}" + ); + } + } + + /// Copy-on-write safety: when `.cargo-checksum.json` is hardlinked + /// into a shared store (a vendored tree shared between projects), + /// the rewrite must give us a private inode and leave the sibling + /// untouched. The atomic rename-over-target achieves this; the old + /// in-place write would have mutated the shared inode. + #[cfg(unix)] + #[tokio::test] + async fn rewrite_does_not_mutate_hardlinked_sibling() { + let d = tempfile::tempdir().unwrap(); + let pkg = d.path().join("pkg"); + tokio::fs::create_dir_all(pkg.join("src")).await.unwrap(); + tokio::fs::write(pkg.join("src/lib.rs"), b"patched lib") + .await + .unwrap(); + + let starting = serde_json::json!({ + "files": { "src/lib.rs": "00".repeat(32) }, + "package": "x", + }); + let checksum = pkg.join(CHECKSUM_FILE); + let original_json = serde_json::to_string_pretty(&starting).unwrap(); + tokio::fs::write(&checksum, &original_json).await.unwrap(); + + // A sibling in the shared store points at the same inode. + let sibling = d.path().join("shared-store-checksum.json"); + tokio::fs::hard_link(&checksum, &sibling).await.unwrap(); + + fixup(&pkg, &["src/lib.rs".to_string()]).await.unwrap(); + + // Our copy was rewritten... + let post: serde_json::Value = + serde_json::from_str(&tokio::fs::read_to_string(&checksum).await.unwrap()).unwrap(); + assert_eq!( + post["files"]["src/lib.rs"].as_str().unwrap(), + expected_sha256(b"patched lib") + ); + // ...but the shared-store sibling kept its original bytes. + assert_eq!( + tokio::fs::read_to_string(&sibling).await.unwrap(), + original_json, + ); + } } diff --git a/crates/socket-patch-core/src/patch/sidecars/mod.rs b/crates/socket-patch-core/src/patch/sidecars/mod.rs index 9f06da04..19ceb057 100644 --- a/crates/socket-patch-core/src/patch/sidecars/mod.rs +++ b/crates/socket-patch-core/src/patch/sidecars/mod.rs @@ -217,10 +217,7 @@ mod tests { let record = out.expect("gem should return a record"); assert_eq!(record.ecosystem, "gem"); let advisory = record.advisory.expect("gem must carry an advisory"); - assert_eq!( - advisory.code, - SidecarAdvisoryCode::GemBundleInstallReverts - ); + assert_eq!(advisory.code, SidecarAdvisoryCode::GemBundleInstallReverts); } #[tokio::test] @@ -237,4 +234,198 @@ mod tests { .unwrap(); assert!(out.is_none()); } + + /// Regression: an empty `patched` list short-circuits to `None` + /// *before* the PURL is classified, even for an ecosystem that + /// would otherwise always emit an advisory (pypi). Guards the + /// `patched.is_empty()` early return at the top of `dispatch_fixup` + /// against being reordered below the advisory arms (which would + /// emit spurious advisories for no-op applies). + #[tokio::test] + async fn empty_patched_short_circuits_before_advisory() { + let d = tempfile::tempdir().unwrap(); + let out = dispatch_fixup("pkg:pypi/requests@2.28.0", d.path(), &[], &empty_files()) + .await + .unwrap(); + assert!( + out.is_none(), + "no files patched ⇒ no sidecar record, even for advisory ecosystems" + ); + } + + // ── Full-path dispatch coverage ────────────────────────────────── + // The tests above this point exercise advisory ecosystems and the + // None paths. The ones below drive `dispatch_fixup` end-to-end for + // the *file-touching* ecosystems (cargo rewrite, nuget delete) and + // the error boundary — the wiring between `dispatch_fixup` and the + // per-ecosystem fixups that the direct `cargo::fixup`/`nuget::fixup` + // unit tests don't cover. + + /// Cargo PURL routes through `dispatch_fixup` to the checksum + /// rewriter and the resulting record denormalizes purl + ecosystem + /// and carries the rewritten-file entry. + #[cfg(feature = "cargo")] + #[tokio::test] + async fn cargo_dispatch_rewrites_checksum_and_builds_record() { + let d = tempfile::tempdir().unwrap(); + let pkg = d.path(); + tokio::fs::create_dir_all(pkg.join("src")).await.unwrap(); + tokio::fs::write(pkg.join("src/lib.rs"), b"patched lib") + .await + .unwrap(); + let starting = serde_json::json!({ + "files": { "src/lib.rs": "00".repeat(32) }, + "package": "x", + }); + tokio::fs::write( + pkg.join(".cargo-checksum.json"), + serde_json::to_string_pretty(&starting).unwrap(), + ) + .await + .unwrap(); + + let out = dispatch_fixup( + "pkg:cargo/mycrate@1.0.0", + pkg, + &["src/lib.rs".to_string()], + &empty_files(), + ) + .await + .unwrap(); + + let record = out.expect("cargo dispatch must produce a record"); + assert_eq!(record.ecosystem, "cargo"); + assert_eq!(record.purl, "pkg:cargo/mycrate@1.0.0"); + assert_eq!(record.files.len(), 1); + assert_eq!(record.files[0].path, ".cargo-checksum.json"); + assert_eq!(record.files[0].action, SidecarFileAction::Rewritten); + assert!(record.advisory.is_none()); + } + + /// Cargo crate with no `.cargo-checksum.json` → the sub-fixup + /// returns `None`, so `dispatch_fixup` produces no record (not an + /// empty-files record). + #[cfg(feature = "cargo")] + #[tokio::test] + async fn cargo_dispatch_without_checksum_returns_none() { + let d = tempfile::tempdir().unwrap(); + let out = dispatch_fixup( + "pkg:cargo/mycrate@1.0.0", + d.path(), + &["src/lib.rs".to_string()], + &empty_files(), + ) + .await + .unwrap(); + assert!(out.is_none()); + } + + /// A malformed `.cargo-checksum.json` makes the sub-fixup error; + /// `dispatch_fixup` must propagate the `SidecarError` (the apply + /// boundary converts it to a `sidecar_fixup_failed` advisory) and + /// must NOT swallow it into `Ok(None)`. + #[cfg(feature = "cargo")] + #[tokio::test] + async fn cargo_dispatch_propagates_malformed_error() { + let d = tempfile::tempdir().unwrap(); + tokio::fs::write(d.path().join(".cargo-checksum.json"), b"not json") + .await + .unwrap(); + let err = dispatch_fixup( + "pkg:cargo/mycrate@1.0.0", + d.path(), + &["src/lib.rs".to_string()], + &empty_files(), + ) + .await + .unwrap_err(); + assert!(matches!(err, SidecarError::Malformed { .. })); + } + + /// NuGet PURL routes through `dispatch_fixup` to the metadata + /// neutralizer; the on-disk `.nupkg.metadata` is deleted and the + /// record records it as `Deleted`. + #[cfg(feature = "nuget")] + #[tokio::test] + async fn nuget_dispatch_deletes_metadata_and_builds_record() { + let d = tempfile::tempdir().unwrap(); + tokio::fs::write(d.path().join(".nupkg.metadata"), b"{}") + .await + .unwrap(); + + let out = dispatch_fixup( + "pkg:nuget/Newtonsoft.Json@13.0.3", + d.path(), + &["lib/x.dll".to_string()], + &empty_files(), + ) + .await + .unwrap(); + + let record = out.expect("nuget dispatch must produce a record"); + assert_eq!(record.ecosystem, "nuget"); + assert_eq!(record.files.len(), 1); + assert_eq!(record.files[0].path, ".nupkg.metadata"); + assert_eq!(record.files[0].action, SidecarFileAction::Deleted); + assert!(record.advisory.is_none()); + assert!(tokio::fs::metadata(d.path().join(".nupkg.metadata")) + .await + .is_err()); + } + + /// NuGet package with neither metadata nor signature → no record. + #[cfg(feature = "nuget")] + #[tokio::test] + async fn nuget_dispatch_nothing_to_do_returns_none() { + let d = tempfile::tempdir().unwrap(); + let out = dispatch_fixup( + "pkg:nuget/Newtonsoft.Json@13.0.3", + d.path(), + &["lib/x.dll".to_string()], + &empty_files(), + ) + .await + .unwrap(); + assert!(out.is_none()); + } + + /// Go PURL routes through `dispatch_fixup` to the advisory-only + /// path and denormalizes the ecosystem name to `golang`. + #[cfg(feature = "golang")] + #[tokio::test] + async fn golang_dispatch_returns_structured_advisory() { + let d = tempfile::tempdir().unwrap(); + let out = dispatch_fixup( + "pkg:golang/github.com/gin-gonic/gin@v1.9.1", + d.path(), + &["gin.go".to_string()], + &empty_files(), + ) + .await + .unwrap(); + let record = out.expect("golang should return a record"); + assert_eq!(record.ecosystem, "golang"); + assert!(record.files.is_empty()); + let advisory = record.advisory.expect("golang must carry an advisory"); + assert_eq!(advisory.code, SidecarAdvisoryCode::GoModVerifyFails); + assert_eq!(advisory.severity, SidecarSeverity::Warning); + } + + /// When the `cargo` feature is disabled, a `pkg:cargo/` PURL is + /// unrecognized by `Ecosystem::from_purl` and `dispatch_fixup` + /// returns `None` rather than attempting (or panicking on) a fixup. + #[cfg(not(feature = "cargo"))] + #[tokio::test] + async fn cargo_purl_without_feature_returns_none() { + let d = tempfile::tempdir().unwrap(); + let out = dispatch_fixup( + "pkg:cargo/mycrate@1.0.0", + d.path(), + &["src/lib.rs".to_string()], + &empty_files(), + ) + .await + .unwrap(); + assert!(out.is_none()); + } } diff --git a/crates/socket-patch-core/src/patch/sidecars/nuget.rs b/crates/socket-patch-core/src/patch/sidecars/nuget.rs index abfb2033..d0d1b767 100644 --- a/crates/socket-patch-core/src/patch/sidecars/nuget.rs +++ b/crates/socket-patch-core/src/patch/sidecars/nuget.rs @@ -20,6 +20,8 @@ use std::path::Path; +use crate::patch::apply::DirWriteGuard; + use super::{ SidecarAdvisory, SidecarAdvisoryCode, SidecarError, SidecarFile, SidecarFileAction, SidecarPayload, SidecarSeverity, @@ -41,7 +43,21 @@ pub(crate) async fn fixup(pkg_path: &Path) -> Result, Sid let mut files = Vec::new(); let metadata_path = pkg_path.join(METADATA_FILE); - match tokio::fs::remove_file(&metadata_path).await { + + // `unlink(2)` needs write permission on the *parent directory*, not + // on the file. NuGet caches can live inside a read-only (`0o555`) + // tree — the same tamper-proofing layout the apply path hardened + // against for Cargo/Go (see `apply::DirWriteGuard`). Without the + // guard a bare `remove_file` fails `EACCES` exactly in the + // real-cache case, leaving the stale-hash metadata in place so every + // future `dotnet restore` flags the (correctly) patched package as + // tampered. Grant directory-write for the unlink, then restore the + // directory's exact mode — even if the unlink itself errors. + let dir_guard = DirWriteGuard::acquire(Some(pkg_path)).await; + let remove_result = tokio::fs::remove_file(&metadata_path).await; + dir_guard.restore().await; + + match remove_result { Ok(()) => files.push(SidecarFile { path: METADATA_FILE.to_string(), action: SidecarFileAction::Deleted, @@ -152,6 +168,57 @@ mod tests { assert_eq!(adv.severity, SidecarSeverity::Warning); } + /// Regression (read-only package directory): NuGet caches — like + /// Cargo's registry and Go's module cache — can live inside a + /// directory the host marks read-only (`0o555`) for tamper + /// detection. Removing `.nupkg.metadata` requires *write permission + /// on the parent directory*, not on the file itself, so a bare + /// `remove_file` fails `EACCES` there — leaving the stale-hash + /// metadata in place and every future `dotnet restore` flagging the + /// (correctly) patched package as tampered. The fixup must grant + /// directory-write for the unlink and restore the original mode. + #[cfg(unix)] + #[tokio::test] + async fn deletes_metadata_inside_readonly_dir() { + use std::os::unix::fs::PermissionsExt; + + let d = tempfile::tempdir().unwrap(); + let pkg = d.path(); + tokio::fs::write(pkg.join(METADATA_FILE), b"{}") + .await + .unwrap(); + // Lock the package directory down exactly as a tamper-proofed + // cache would. + tokio::fs::set_permissions(pkg, std::fs::Permissions::from_mode(0o555)) + .await + .unwrap(); + + let out = fixup(pkg).await; + + // Capture the post-fixup directory mode BEFORE re-granting write + // for cleanup — the guard must have restored it to 0o555 itself. + let mode = tokio::fs::metadata(pkg).await.unwrap().permissions().mode() & 0o7777; + + // Re-grant write so the TempDir can clean itself up regardless + // of the assertion outcome. + tokio::fs::set_permissions(pkg, std::fs::Permissions::from_mode(0o755)) + .await + .unwrap(); + + let payload = out + .expect("delete inside a read-only dir must not error") + .expect("metadata existed, expect a payload"); + assert_eq!(payload.files.len(), 1); + assert_eq!(payload.files[0].action, SidecarFileAction::Deleted); + // The metadata is actually gone. + assert!(tokio::fs::metadata(pkg.join(METADATA_FILE)).await.is_err()); + // ...and the directory's original read-only mode was restored. + assert_eq!( + mode, 0o555, + "package dir mode must be restored after the unlink" + ); + } + /// Signed package WITH metadata: the typed payload now carries /// BOTH the file entry and the advisory — the lossy collapse /// from the old design is fixed. diff --git a/crates/socket-patch-core/src/patch/sidecars/types.rs b/crates/socket-patch-core/src/patch/sidecars/types.rs index 19b4529a..ca97304c 100644 --- a/crates/socket-patch-core/src/patch/sidecars/types.rs +++ b/crates/socket-patch-core/src/patch/sidecars/types.rs @@ -44,8 +44,10 @@ pub struct SidecarRecord { pub advisory: Option, } -/// One file the fixup rewrote, deleted, or created. Paths are -/// relative to the package directory the patch landed in. +/// One file the fixup rewrote or deleted. Paths are relative to the +/// package directory the patch landed in. (There is deliberately no +/// "created" action — see [`SidecarFileAction`], which reserves no +/// variants ahead of an ecosystem that actually produces them.) #[derive(Debug, Clone, Serialize, PartialEq, Eq)] #[serde(rename_all = "camelCase")] pub struct SidecarFile { @@ -220,6 +222,63 @@ mod tests { ); } + /// Contract: `files` is ALWAYS present in the serialized record, + /// even for advisory-only ecosystems (PyPI / gem / Go) whose record + /// carries an empty file list. Consumers iterate `.sidecars[].files[]` + /// unconditionally; dropping the key — e.g. via a stray + /// `skip_serializing_if = "Vec::is_empty"` copied from + /// `Envelope.sidecars` one layer up — would silently force every + /// consumer to null-guard. Locks the "Empty (but always present)" + /// guarantee documented on `SidecarRecord::files`. + #[test] + fn files_always_present_even_when_empty() { + let r = SidecarRecord { + purl: "pkg:pypi/requests@2.28.0".to_string(), + ecosystem: "pypi".to_string(), + files: Vec::new(), + advisory: Some(SidecarAdvisory { + code: SidecarAdvisoryCode::PypiRecordStale, + severity: SidecarSeverity::Warning, + message: "advisory only".to_string(), + }), + }; + let v: serde_json::Value = serde_json::to_value(&r).unwrap(); + let obj = v.as_object().unwrap(); + assert!( + obj.contains_key("files"), + "`files` must always serialize, even when empty" + ); + assert_eq!( + obj["files"], + serde_json::Value::Array(Vec::new()), + "empty file list must serialize as `[]`, not be omitted" + ); + } + + /// Contract: the fixup-failed path — the only scenario that emits + /// `SidecarSeverity::Error` (see `apply.rs`) — pairs the `Error` + /// severity with the `SidecarFixupFailed` code, an empty `files` + /// list, and an advisory. Pins the exact JSON a consumer branches + /// on to distinguish "the patch landed but the sidecar fixup blew + /// up" from an informational advisory. + #[test] + fn fixup_failed_serializes_error_severity_and_code() { + let r = SidecarRecord { + purl: "pkg:cargo/x@1.0.0".to_string(), + ecosystem: "cargo".to_string(), + files: Vec::new(), + advisory: Some(SidecarAdvisory { + code: SidecarAdvisoryCode::SidecarFixupFailed, + severity: SidecarSeverity::Error, + message: "sidecar fixup failed (patch still applied): boom".to_string(), + }), + }; + let v: serde_json::Value = serde_json::to_value(&r).unwrap(); + assert_eq!(v["advisory"]["code"], "sidecar_fixup_failed"); + assert_eq!(v["advisory"]["severity"], "error"); + assert_eq!(v["files"], serde_json::Value::Array(Vec::new())); + } + /// Multi-file record + advisory together — the NuGet /// signed-package case that the old design lost. Verify both /// surface in the JSON simultaneously. diff --git a/crates/socket-patch-core/src/utils/cleanup_blobs.rs b/crates/socket-patch-core/src/utils/cleanup_blobs.rs index 0122aec9..8f639beb 100644 --- a/crates/socket-patch-core/src/utils/cleanup_blobs.rs +++ b/crates/socket-patch-core/src/utils/cleanup_blobs.rs @@ -33,10 +33,7 @@ async fn cleanup_dir bool>( entries.push(entry); } - let mut result = CleanupResult { - blobs_checked: entries.len(), - ..CleanupResult::default() - }; + let mut result = CleanupResult::default(); for entry in &entries { let file_name_str = entry.file_name().to_string_lossy().to_string(); @@ -44,10 +41,20 @@ async fn cleanup_dir bool>( continue; } let path = dir.join(&file_name_str); - let metadata = tokio::fs::metadata(&path).await?; + // Use symlink_metadata (lstat) rather than metadata (stat) so we never + // follow symlinks: a symlink is not a real socket-patch blob, and a + // dangling symlink would otherwise return an error. Tolerate any stat + // error (e.g. the entry was removed concurrently) by skipping that + // entry instead of aborting cleanup of every other orphan. + let metadata = match tokio::fs::symlink_metadata(&path).await { + Ok(m) => m, + Err(_) => continue, + }; if !metadata.is_file() { continue; } + // Only regular, non-hidden files are actually considered/checked. + result.blobs_checked += 1; if is_used(&file_name_str) { continue; } @@ -94,8 +101,7 @@ pub async fn cleanup_unused_archives( archives_dir: &Path, dry_run: bool, ) -> Result { - let used_uuids: HashSet = - manifest.patches.values().map(|r| r.uuid.clone()).collect(); + let used_uuids: HashSet = manifest.patches.values().map(|r| r.uuid.clone()).collect(); cleanup_dir(archives_dir, dry_run, |name| { // Strip the .tar.gz suffix to recover the UUID; if it doesn't // end in .tar.gz, treat the entry as orphaned (not "used"). @@ -112,10 +118,7 @@ pub fn format_cleanup_result(result: &CleanupResult, dry_run: bool) -> String { } if result.blobs_removed == 0 { - return format!( - "Checked {} blob(s), all are in use.", - result.blobs_checked - ); + return format!("Checked {} blob(s), all are in use.", result.blobs_checked); } let action = if dry_run { "Would remove" } else { "Removed" }; @@ -164,16 +167,11 @@ mod tests { use std::collections::HashMap; const TEST_UUID: &str = "11111111-1111-4111-8111-111111111111"; - const BEFORE_HASH_1: &str = - "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1111"; - const AFTER_HASH_1: &str = - "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb1111"; - const BEFORE_HASH_2: &str = - "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc2222"; - const AFTER_HASH_2: &str = - "dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd2222"; - const ORPHAN_HASH: &str = - "oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo"; + const BEFORE_HASH_1: &str = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1111"; + const AFTER_HASH_1: &str = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb1111"; + const BEFORE_HASH_2: &str = "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc2222"; + const AFTER_HASH_2: &str = "dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd2222"; + const ORPHAN_HASH: &str = "oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo"; fn create_test_manifest() -> PatchManifest { let mut files = HashMap::new(); @@ -446,12 +444,16 @@ mod tests { assert!(result .removed_blobs .contains(&format!("{SECOND_UUID}.tar.gz"))); - assert!(tokio::fs::metadata(archives.join(format!("{TEST_UUID}.tar.gz"))) - .await - .is_ok()); - assert!(tokio::fs::metadata(archives.join(format!("{SECOND_UUID}.tar.gz"))) - .await - .is_err()); + assert!( + tokio::fs::metadata(archives.join(format!("{TEST_UUID}.tar.gz"))) + .await + .is_ok() + ); + assert!( + tokio::fs::metadata(archives.join(format!("{SECOND_UUID}.tar.gz"))) + .await + .is_err() + ); } #[tokio::test] @@ -470,9 +472,11 @@ mod tests { .unwrap(); assert_eq!(result.blobs_removed, 1); - assert!(tokio::fs::metadata(archives.join(format!("{SECOND_UUID}.tar.gz"))) - .await - .is_ok()); + assert!( + tokio::fs::metadata(archives.join(format!("{SECOND_UUID}.tar.gz"))) + .await + .is_ok() + ); } #[tokio::test] @@ -513,6 +517,133 @@ mod tests { assert_eq!(result.blobs_removed, 0); } + #[tokio::test] + async fn test_cleanup_does_not_count_subdirs_or_hidden_files() { + // Regression: blobs_checked must only count regular, non-hidden files + // that are actually considered -- not subdirectories or dotfiles. This + // count is surfaced to users (human-readable + JSON in `repair`), so an + // inflated number is a real reporting bug. + let dir = tempfile::tempdir().unwrap(); + let blobs_dir = dir.path().join("blobs"); + tokio::fs::create_dir_all(&blobs_dir).await.unwrap(); + + let manifest = create_test_manifest(); + + // One real (used) blob, plus noise that must be ignored entirely. + tokio::fs::write(blobs_dir.join(AFTER_HASH_1), "after content 1") + .await + .unwrap(); + tokio::fs::create_dir_all(blobs_dir.join("subdir")) + .await + .unwrap(); + tokio::fs::write(blobs_dir.join(".hidden"), "hidden") + .await + .unwrap(); + + let result = cleanup_unused_blobs(&manifest, &blobs_dir, false) + .await + .unwrap(); + + // Only the single regular, non-hidden file is checked; nothing removed. + assert_eq!(result.blobs_checked, 1); + assert_eq!(result.blobs_removed, 0); + + // The subdirectory and hidden file are left untouched. + assert!(tokio::fs::metadata(blobs_dir.join("subdir")).await.is_ok()); + assert!(tokio::fs::metadata(blobs_dir.join(".hidden")).await.is_ok()); + } + + #[tokio::test] + async fn test_cleanup_empty_existing_dir_checks_nothing() { + // An existing-but-empty directory must report zero checked (no entries + // to consider), distinct from a populated one. + let dir = tempfile::tempdir().unwrap(); + let blobs_dir = dir.path().join("blobs"); + tokio::fs::create_dir_all(&blobs_dir).await.unwrap(); + + let result = cleanup_unused_blobs(&create_test_manifest(), &blobs_dir, false) + .await + .unwrap(); + + assert_eq!(result.blobs_checked, 0); + assert_eq!(result.blobs_removed, 0); + } + + #[cfg(unix)] + #[tokio::test] + async fn test_cleanup_dangling_symlink_does_not_abort() { + // Regression: a single dangling symlink must not abort cleanup of every + // other orphan. Previously `tokio::fs::metadata(..)?` followed the link, + // hit a NotFound error, and propagated it out of the whole operation. + use std::os::unix::fs::symlink; + + let dir = tempfile::tempdir().unwrap(); + let blobs_dir = dir.path().join("blobs"); + tokio::fs::create_dir_all(&blobs_dir).await.unwrap(); + + let manifest = create_test_manifest(); + + // A real orphan that should still be removed despite the bad symlink. + tokio::fs::write(blobs_dir.join(ORPHAN_HASH), "orphan content") + .await + .unwrap(); + // A dangling symlink (target does not exist). + symlink( + blobs_dir.join("missing-target"), + blobs_dir.join("dangling-link"), + ) + .unwrap(); + + let result = cleanup_unused_blobs(&manifest, &blobs_dir, false) + .await + .unwrap(); + + // The orphan is removed; the symlink is counted as neither checked nor + // removed (it is not a regular file) and is left in place. + assert_eq!(result.blobs_removed, 1); + assert!(result.removed_blobs.contains(&ORPHAN_HASH.to_string())); + assert!(tokio::fs::metadata(blobs_dir.join(ORPHAN_HASH)) + .await + .is_err()); + assert!(tokio::fs::symlink_metadata(blobs_dir.join("dangling-link")) + .await + .is_ok()); + } + + #[cfg(unix)] + #[tokio::test] + async fn test_cleanup_does_not_follow_symlink_to_used_target() { + // A symlink is never treated as a blob, so its target's size is never + // attributed to bytes_freed and the link is never removed. + use std::os::unix::fs::symlink; + + let dir = tempfile::tempdir().unwrap(); + let blobs_dir = dir.path().join("blobs"); + tokio::fs::create_dir_all(&blobs_dir).await.unwrap(); + + let manifest = create_test_manifest(); + + // A real file outside the managed set, plus a symlink pointing at it. + let outside = dir.path().join("outside.bin"); + tokio::fs::write(&outside, vec![0u8; 4096]).await.unwrap(); + symlink(&outside, blobs_dir.join("link-to-outside")).unwrap(); + + let result = cleanup_unused_blobs(&manifest, &blobs_dir, false) + .await + .unwrap(); + + assert_eq!(result.blobs_checked, 0); + assert_eq!(result.blobs_removed, 0); + assert_eq!(result.bytes_freed, 0); + // The symlink and its target both survive. + assert!( + tokio::fs::symlink_metadata(blobs_dir.join("link-to-outside")) + .await + .is_ok() + ); + assert!(tokio::fs::metadata(&outside).await.is_ok()); + } + #[test] fn test_format_cleanup_result_dry_run_lists_blobs() { let result = CleanupResult { diff --git a/crates/socket-patch-core/src/utils/env_compat.rs b/crates/socket-patch-core/src/utils/env_compat.rs index f7b72881..10265192 100644 --- a/crates/socket-patch-core/src/utils/env_compat.rs +++ b/crates/socket-patch-core/src/utils/env_compat.rs @@ -65,8 +65,6 @@ pub fn warn_legacy_once(legacy_name: &'static str, new_name: &'static str) { } } -/// Read the new env var; if it isn't set, also probe the legacy name and -/// surface a deprecation warning when the legacy name is set. Returns the /// Renamed env vars whose legacy `SOCKET_PATCH_*` names are still honored. /// /// First entry of each tuple is the new name (what clap and current code @@ -74,7 +72,10 @@ pub fn warn_legacy_once(legacy_name: &'static str, new_name: &'static str) { pub const LEGACY_ENV_RENAMES: &[(&str, &str)] = &[ ("SOCKET_PROXY_URL", "SOCKET_PATCH_PROXY_URL"), ("SOCKET_DEBUG", "SOCKET_PATCH_DEBUG"), - ("SOCKET_TELEMETRY_DISABLED", "SOCKET_PATCH_TELEMETRY_DISABLED"), + ( + "SOCKET_TELEMETRY_DISABLED", + "SOCKET_PATCH_TELEMETRY_DISABLED", + ), ]; /// Promote legacy `SOCKET_PATCH_*` env vars to their new `SOCKET_*` names @@ -109,16 +110,91 @@ pub fn promote_legacy_env_vars() { mod tests { use super::*; - /// The warning bookkeeping is process-global, so any test that flips a - /// real env var would race with parallel tests. Exercise the dedup - /// path directly instead. + /// The warning bookkeeping is process-global, so tests must use env-var + /// names that no other test touches. `std::env` serializes access behind + /// an internal lock, so distinct names never race for memory safety; the + /// only hazard is two tests fighting over the *same* name, which unique + /// names avoid. #[test] fn warn_legacy_once_fires_only_once_per_name() { let name = "SOCKET_TEST_LEGACY_ONCE_PATCH"; let new = "SOCKET_TEST_LEGACY_ONCE"; warn_legacy_once(name, new); warn_legacy_once(name, new); - let warned = WARNED.lock().unwrap(); + // The dedup is driven by `HashSet::insert` returning `false` once the + // name has been recorded. Prove that directly: after `warn_legacy_once` + // ran, re-inserting the same name must report "already present", which + // is exactly what suppresses any second eprintln. + let mut warned = WARNED.lock().unwrap(); assert!(warned.contains(name)); + assert!( + !warned.insert(name), + "name should already be recorded, so a second warning is suppressed" + ); + } + + #[test] + fn read_env_prefers_new_var_over_legacy() { + const NEW: &str = "SOCKET_TEST_READ_PREFERS_NEW"; + const LEGACY: &str = "SOCKET_TEST_READ_PREFERS_NEW_PATCH"; + std::env::set_var(NEW, "new-value"); + std::env::set_var(LEGACY, "legacy-value"); + assert_eq!( + read_env_with_legacy(NEW, LEGACY), + Some("new-value".to_string()) + ); + std::env::remove_var(NEW); + std::env::remove_var(LEGACY); + } + + #[test] + fn read_env_falls_back_to_legacy_when_new_unset() { + const NEW: &str = "SOCKET_TEST_READ_FALLBACK_NEW"; + const LEGACY: &str = "SOCKET_TEST_READ_FALLBACK_NEW_PATCH"; + std::env::remove_var(NEW); + std::env::set_var(LEGACY, "legacy-value"); + assert_eq!( + read_env_with_legacy(NEW, LEGACY), + Some("legacy-value".to_string()) + ); + std::env::remove_var(LEGACY); + } + + /// Regression: an empty new var must be treated as "unset" and fall back to + /// the legacy name, matching the prior call sites' `!is_empty()` filtering. + #[test] + fn read_env_empty_new_falls_back_to_legacy() { + const NEW: &str = "SOCKET_TEST_READ_EMPTY_NEW"; + const LEGACY: &str = "SOCKET_TEST_READ_EMPTY_NEW_PATCH"; + std::env::set_var(NEW, ""); + std::env::set_var(LEGACY, "legacy-value"); + assert_eq!( + read_env_with_legacy(NEW, LEGACY), + Some("legacy-value".to_string()) + ); + std::env::remove_var(NEW); + std::env::remove_var(LEGACY); + } + + #[test] + fn read_env_none_when_neither_set() { + const NEW: &str = "SOCKET_TEST_READ_NONE_NEW"; + const LEGACY: &str = "SOCKET_TEST_READ_NONE_NEW_PATCH"; + std::env::remove_var(NEW); + std::env::remove_var(LEGACY); + assert_eq!(read_env_with_legacy(NEW, LEGACY), None); + } + + /// Regression: both names set but empty → `None` (empty == unset on both + /// sides), per the documented contract. + #[test] + fn read_env_none_when_both_empty() { + const NEW: &str = "SOCKET_TEST_READ_BOTH_EMPTY_NEW"; + const LEGACY: &str = "SOCKET_TEST_READ_BOTH_EMPTY_NEW_PATCH"; + std::env::set_var(NEW, ""); + std::env::set_var(LEGACY, ""); + assert_eq!(read_env_with_legacy(NEW, LEGACY), None); + std::env::remove_var(NEW); + std::env::remove_var(LEGACY); } } diff --git a/crates/socket-patch-core/src/utils/fs.rs b/crates/socket-patch-core/src/utils/fs.rs index 397a293e..56432aa5 100644 --- a/crates/socket-patch-core/src/utils/fs.rs +++ b/crates/socket-patch-core/src/utils/fs.rs @@ -26,16 +26,17 @@ use std::path::Path; -use tokio::fs::DirEntry; use std::fs::FileType; +use tokio::fs::DirEntry; /// List the immediate children of `path`. /// /// Returns an empty vector if the directory cannot be read (does not -/// exist, permission denied, etc.) or if any individual `next_entry` -/// call fails. The crawlers treat both cases the same way: surface -/// no packages from the unreadable subtree, but don't abort the -/// whole crawl. +/// exist, permission denied, etc.). If a later `next_entry` call +/// fails mid-iteration, the entries gathered so far are returned and +/// iteration stops. The crawlers treat all of these the same way: +/// surface whatever the readable portion of the subtree yields, but +/// don't abort the whole crawl. pub async fn list_dir_entries(path: &Path) -> Vec { let mut entries = match tokio::fs::read_dir(path).await { Ok(rd) => rd, @@ -51,11 +52,18 @@ pub async fn list_dir_entries(path: &Path) -> Vec { /// Resolve whether `entry` is a directory, following symlinks. /// -/// Returns `false` if `file_type()` errors — the caller then skips -/// the entry rather than aborting the walk. +/// Returns `false` if the stat fails (broken symlink, permission +/// error, etc.) — the caller then skips the entry rather than +/// aborting the walk. +/// +/// `DirEntry::metadata()` does **not** traverse symlinks (it behaves +/// like `symlink_metadata`), so a symlink pointing at a directory +/// would wrongly report `false`. To honor the documented +/// symlink-following contract — which crawlers like deno/python/ruby +/// rely on for symlinked package directories — we stat the resolved +/// `entry.path()` via `tokio::fs::metadata`, which does follow links. pub async fn entry_is_dir(entry: &DirEntry) -> bool { - entry - .metadata() + tokio::fs::metadata(entry.path()) .await .map(|m| m.is_dir()) .unwrap_or(false) @@ -96,7 +104,9 @@ mod tests { let tmp = tempfile::tempdir().unwrap(); tokio::fs::create_dir(tmp.path().join("a")).await.unwrap(); tokio::fs::create_dir(tmp.path().join("b")).await.unwrap(); - tokio::fs::write(tmp.path().join("c.txt"), b"").await.unwrap(); + tokio::fs::write(tmp.path().join("c.txt"), b"") + .await + .unwrap(); let mut names: Vec = list_dir_entries(tmp.path()) .await .into_iter() @@ -122,4 +132,86 @@ mod tests { } } } + + /// Regression: `entry_is_dir` must follow symlinks. A symlink that + /// points at a directory has to report `true`, otherwise crawlers + /// silently skip symlinked package directories (pnpm stores, + /// virtualenvs, vendored gems, etc.). `DirEntry::metadata()` does + /// NOT traverse symlinks, so this guards against regressing back to + /// it. + #[cfg(unix)] + #[tokio::test] + async fn entry_is_dir_follows_symlink_to_dir() { + let tmp = tempfile::tempdir().unwrap(); + let target = tmp.path().join("real_dir"); + tokio::fs::create_dir(&target).await.unwrap(); + tokio::fs::symlink(&target, tmp.path().join("link_to_dir")) + .await + .unwrap(); + + let entries = list_dir_entries(tmp.path()).await; + let link = entries + .into_iter() + .find(|e| e.file_name().to_string_lossy() == "link_to_dir") + .expect("symlink entry present"); + assert!( + entry_is_dir(&link).await, + "symlink pointing at a directory must resolve to is_dir = true" + ); + } + + /// A symlink pointing at a regular file must report `false`, and a + /// broken/dangling symlink must report `false` rather than panic. + #[cfg(unix)] + #[tokio::test] + async fn entry_is_dir_symlink_to_file_and_broken_link() { + let tmp = tempfile::tempdir().unwrap(); + let file_target = tmp.path().join("real_file"); + tokio::fs::write(&file_target, b"x").await.unwrap(); + tokio::fs::symlink(&file_target, tmp.path().join("link_to_file")) + .await + .unwrap(); + tokio::fs::symlink( + tmp.path().join("missing_target"), + tmp.path().join("dangling"), + ) + .await + .unwrap(); + + for entry in list_dir_entries(tmp.path()).await { + let name = entry.file_name().to_string_lossy().to_string(); + let is_dir = entry_is_dir(&entry).await; + match name.as_str() { + "real_file" | "link_to_file" | "dangling" => { + assert!(!is_dir, "{name} should not be a dir"); + } + other => panic!("unexpected entry: {other}"), + } + } + } + + /// `entry_file_type` is the symlink-aware counterpart: it reports + /// the link itself (`is_symlink`), never the resolved target. + #[cfg(unix)] + #[tokio::test] + async fn entry_file_type_does_not_follow_symlinks() { + let tmp = tempfile::tempdir().unwrap(); + let target = tmp.path().join("real_dir"); + tokio::fs::create_dir(&target).await.unwrap(); + tokio::fs::symlink(&target, tmp.path().join("link_to_dir")) + .await + .unwrap(); + + let entries = list_dir_entries(tmp.path()).await; + let link = entries + .into_iter() + .find(|e| e.file_name().to_string_lossy() == "link_to_dir") + .expect("symlink entry present"); + let ft = entry_file_type(&link).await.expect("file_type available"); + assert!( + ft.is_symlink(), + "entry_file_type must surface the link kind" + ); + assert!(!ft.is_dir(), "entry_file_type must not resolve the target"); + } } diff --git a/crates/socket-patch-core/src/utils/fuzzy_match.rs b/crates/socket-patch-core/src/utils/fuzzy_match.rs index c12178c5..93153582 100644 --- a/crates/socket-patch-core/src/utils/fuzzy_match.rs +++ b/crates/socket-patch-core/src/utils/fuzzy_match.rs @@ -130,14 +130,16 @@ pub fn fuzzy_match_packages( if type_cmp != std::cmp::Ordering::Equal { return type_cmp; } - get_full_name(&a.package).cmp(&get_full_name(&b.package)) + // Tie-break alphabetically by full name. Matching is case-insensitive, + // so the ordering must be too — otherwise byte order sorts uppercase + // ('Z' = 0x5A) before lowercase ('a' = 0x61), which is not alphabetical + // and can flip which package lands at `matches[0]`. + get_full_name(&a.package) + .to_lowercase() + .cmp(&get_full_name(&b.package).to_lowercase()) }); - matches - .into_iter() - .take(limit) - .map(|m| m.package) - .collect() + matches.into_iter().take(limit).map(|m| m.package).collect() } #[cfg(test)] @@ -145,11 +147,7 @@ mod tests { use super::*; use std::path::PathBuf; - fn make_pkg( - name: &str, - version: &str, - namespace: Option<&str>, - ) -> CrawledPackage { + fn make_pkg(name: &str, version: &str, namespace: Option<&str>) -> CrawledPackage { let ns = namespace.map(|s| s.to_string()); let purl = match &ns { Some(n) => format!("pkg:npm/{n}/{name}@{version}"), @@ -242,4 +240,59 @@ mod tests { assert_eq!(results.len(), 10); } + /// Regression: within a single match tier the alphabetical tie-break must + /// be case-insensitive, matching the case-insensitive matching above. With + /// a raw byte-order comparison, 'Z' (0x5A) sorts before 'a' (0x61), so + /// "Zebra" would wrongly precede "apple" and become `matches[0]`. + #[test] + fn test_tiebreak_is_case_insensitive() { + let packages = vec![ + make_pkg("Zebra", "1.0.0", None), + make_pkg("apple", "1.0.0", None), + ]; + // "e" is a substring of both names but a prefix of neither, so both + // land in the same ContainsFull tier and the tie-break decides order. + let results = fuzzy_match_packages("e", &packages, 20); + assert_eq!(results.len(), 2); + assert_eq!( + results[0].name, "apple", + "alphabetical tie-break must ignore case" + ); + assert_eq!(results[1].name, "Zebra"); + } + + /// A better match tier must outrank alphabetical order, and the `limit` + /// truncation must keep the best matches (it is applied after sorting). + #[test] + fn test_best_tier_survives_limit() { + let packages = vec![ + make_pkg("ax", "1.0.0", None), + make_pkg("bx", "1.0.0", None), + make_pkg("x", "1.0.0", None), // ExactFull, but alphabetically last + ]; + let results = fuzzy_match_packages("x", &packages, 1); + assert_eq!(results.len(), 1); + assert_eq!( + results[0].name, "x", + "exact match must beat alphabetically-earlier contains matches" + ); + } + + /// A namespaced package whose bare name (but not its namespace-qualified + /// full name) prefixes the query is a PrefixName match, which ranks below + /// a non-namespaced PrefixFull match for the same query. + #[test] + fn test_namespaced_prefix_name_ranks_below_full() { + let packages = vec![ + make_pkg("lodash", "4.17.21", Some("@scope")), + make_pkg("lodash-es", "4.17.21", None), + ]; + let results = fuzzy_match_packages("lod", &packages, 20); + assert_eq!(results.len(), 2); + assert_eq!( + results[0].name, "lodash-es", + "PrefixFull (no namespace) outranks PrefixName (namespaced)" + ); + assert!(results[0].namespace.is_none()); + } } diff --git a/crates/socket-patch-core/src/utils/process.rs b/crates/socket-patch-core/src/utils/process.rs index 68c2d71c..88475d2f 100644 --- a/crates/socket-patch-core/src/utils/process.rs +++ b/crates/socket-patch-core/src/utils/process.rs @@ -91,4 +91,69 @@ mod tests { let out = runner.run("false", &[]); assert_eq!(out, None); } + + /// Exit 0 but stdout is empty → None. This is the fourth arm of + /// the contract and was previously untested. A successful command + /// that prints nothing carries no information for the crawlers. + #[cfg(unix)] + #[test] + fn system_runner_returns_none_on_empty_stdout_despite_success() { + let runner = SystemCommandRunner; + let out = runner.run("true", &[]); + assert_eq!(out, None); + } + + /// Exit 0 with whitespace-only stdout → None: the empty check + /// happens *after* trimming, so a command that prints only spaces + /// and newlines is treated as "no output". + #[cfg(unix)] + #[test] + fn system_runner_treats_whitespace_only_stdout_as_empty() { + let runner = SystemCommandRunner; + let out = runner.run("sh", &["-c", "printf ' \\t\\n '"]); + assert_eq!(out, None); + } + + /// Surrounding whitespace is trimmed from a non-empty result, so + /// callers that join the value into a path don't get stray + /// newlines (e.g. `npm root -g` emits a trailing `\n`). + #[cfg(unix)] + #[test] + fn system_runner_trims_surrounding_whitespace() { + let runner = SystemCommandRunner; + let out = runner.run("sh", &["-c", "printf ' /some/path \\n'"]); + assert_eq!(out.as_deref(), Some("/some/path")); + } + + /// stderr never leaks into the result. When stdout is empty but + /// the process wrote to stderr and still exited 0, the result is + /// None — stderr is captured and dropped, not returned. + #[cfg(unix)] + #[test] + fn system_runner_ignores_stderr_when_stdout_empty() { + let runner = SystemCommandRunner; + let out = runner.run("sh", &["-c", "printf 'diagnostic' >&2"]); + assert_eq!(out, None); + } + + /// When a command writes to both streams, only stdout comes back — + /// the stderr line must not be appended or interleaved. + #[cfg(unix)] + #[test] + fn system_runner_returns_only_stdout_when_both_streams_used() { + let runner = SystemCommandRunner; + let out = runner.run("sh", &["-c", "printf 'good\\n'; printf 'bad\\n' >&2"]); + assert_eq!(out.as_deref(), Some("good")); + } + + /// Every element of `args` is forwarded to the child in order. + /// Here `$0` is `sh` and `$1` is `forwarded`; printing `$1` proves + /// positional args survive the hop into `Command::args`. + #[cfg(unix)] + #[test] + fn system_runner_forwards_all_args_in_order() { + let runner = SystemCommandRunner; + let out = runner.run("sh", &["-c", "printf '%s' \"$1\"", "sh", "forwarded"]); + assert_eq!(out.as_deref(), Some("forwarded")); + } } diff --git a/crates/socket-patch-core/src/utils/purl.rs b/crates/socket-patch-core/src/utils/purl.rs index e049121b..d2fccca5 100644 --- a/crates/socket-patch-core/src/utils/purl.rs +++ b/crates/socket-patch-core/src/utils/purl.rs @@ -219,7 +219,6 @@ pub fn build_cargo_purl(name: &str, version: &str) -> String { format!("pkg:cargo/{name}@{version}") } - /// Check if a string looks like a PURL. pub fn is_purl(s: &str) -> bool { s.starts_with("pkg:") @@ -378,10 +377,7 @@ mod tests { #[test] fn test_build_gem_purl() { - assert_eq!( - build_gem_purl("rails", "7.1.0"), - "pkg:gem/rails@7.1.0" - ); + assert_eq!(build_gem_purl("rails", "7.1.0"), "pkg:gem/rails@7.1.0"); } #[test] @@ -405,8 +401,14 @@ mod tests { ); assert_eq!(parse_maven_purl("pkg:npm/lodash@4.17.21"), None); assert_eq!(parse_maven_purl("pkg:maven/@3.12.0"), None); - assert_eq!(parse_maven_purl("pkg:maven/org.apache.commons/@3.12.0"), None); - assert_eq!(parse_maven_purl("pkg:maven/org.apache.commons/commons-lang3@"), None); + assert_eq!( + parse_maven_purl("pkg:maven/org.apache.commons/@3.12.0"), + None + ); + assert_eq!( + parse_maven_purl("pkg:maven/org.apache.commons/commons-lang3@"), + None + ); } #[cfg(feature = "maven")] @@ -572,4 +574,103 @@ mod tests { assert_eq!(version, "8.0.0"); } + // --- Regression: qualifier handling ------------------------------------- + // + // Qualifiers are stripped *before* the version is split off with + // `rfind('@')`. This matters because a qualifier *value* can itself + // contain an `@` (e.g. a `git@github.com` source URL). If stripping + // ran after the `@` search, that trailing `@` would be mistaken for + // the version separator and corrupt both name and version. + + #[test] + fn test_strip_qualifiers_with_embedded_at() { + assert_eq!( + strip_purl_qualifiers("pkg:pypi/requests@2.28.0?vcs_url=git@github.com:psf/requests"), + "pkg:pypi/requests@2.28.0" + ); + } + + #[test] + fn test_parse_pypi_qualifier_with_embedded_at() { + // The `@github.com` inside the qualifier value must not be read + // as the version separator. + assert_eq!( + parse_pypi_purl("pkg:pypi/requests@2.28.0?vcs_url=git@github.com"), + Some(("requests", "2.28.0")) + ); + } + + #[test] + fn test_parse_gem_with_trailing_qualifier() { + assert_eq!( + parse_gem_purl("pkg:gem/nokogiri@1.16.5?platform=java"), + Some(("nokogiri", "1.16.5")) + ); + } + + #[cfg(feature = "maven")] + #[test] + fn test_parse_maven_qualifier_with_embedded_at() { + // groupId/artifactId split must survive an `@` buried in a + // qualifier value. + assert_eq!( + parse_maven_purl( + "pkg:maven/org.apache.commons/commons-lang3@3.12.0?repository_url=user@host" + ), + Some(("org.apache.commons", "commons-lang3", "3.12.0")) + ); + } + + #[cfg(feature = "composer")] + #[test] + fn test_parse_composer_qualifier_with_embedded_at() { + assert_eq!( + parse_composer_purl("pkg:composer/monolog/monolog@3.5.0?source=git@github.com"), + Some((("monolog", "monolog"), "3.5.0")) + ); + } + + #[cfg(feature = "golang")] + #[test] + fn test_parse_golang_keeps_full_module_path() { + // The module path retains its internal slashes — only the + // version is split off. A trailing qualifier is ignored. + assert_eq!( + parse_golang_purl("pkg:golang/github.com/gin-gonic/gin@v1.9.1?type=module"), + Some(("github.com/gin-gonic/gin", "v1.9.1")) + ); + } + + #[cfg(feature = "deno")] + #[test] + fn test_parse_jsr_with_trailing_qualifier() { + // Scope `@` + version `@` + qualifier `@` all coexist; only the + // version `@` should be honored. + assert_eq!( + parse_jsr_purl("pkg:jsr/@std/path@0.220.0?download_url=x@y"), + Some((("@std", "path"), "0.220.0")) + ); + } + + // --- Regression: purl_matches_identifier for non-PyPI keys -------------- + + #[test] + fn test_purl_matches_identifier_qualified_id_needs_exact_key() { + // A qualified identifier must not match an unqualified manifest + // key, even when their bases are equal. + assert!(!purl_matches_identifier( + "pkg:npm/lodash@4.17.21", + "pkg:npm/lodash@4.17.21?foo=bar" + )); + } + + #[test] + fn test_purl_matches_identifier_base_id_matches_qualified_nonpypi_key() { + // A base identifier matches a qualified manifest key in any + // ecosystem (gems can carry a `?platform=` qualifier). + assert!(purl_matches_identifier( + "pkg:gem/nokogiri@1.16.5?platform=java", + "pkg:gem/nokogiri@1.16.5" + )); + } } diff --git a/crates/socket-patch-core/src/utils/telemetry.rs b/crates/socket-patch-core/src/utils/telemetry.rs index 207b1a48..c59bae07 100644 --- a/crates/socket-patch-core/src/utils/telemetry.rs +++ b/crates/socket-patch-core/src/utils/telemetry.rs @@ -14,8 +14,11 @@ use crate::utils::env_compat::read_env_with_legacy; /// Shared across all telemetry events in a single run. static SESSION_ID: Lazy = Lazy::new(|| Uuid::new_v4().to_string()); -/// Package version — updated during build. -const PACKAGE_VERSION: &str = "1.0.0"; +/// Package version — sourced from the crate's `Cargo.toml` at build time so +/// it always tracks the real release (matching `USER_AGENT` in `constants.rs` +/// and the `vex` tooling string). A hardcoded literal here silently drifts +/// from the published version. +const PACKAGE_VERSION: &str = env!("CARGO_PKG_VERSION"); // --------------------------------------------------------------------------- // Types @@ -139,9 +142,11 @@ pub struct TrackPatchEventOptions { /// is set the CLI dispatcher sets `SOCKET_TELEMETRY_DISABLED=1` for the /// duration of the process so this check stays the single source of truth. pub fn is_telemetry_disabled() -> bool { - let env_value = - read_env_with_legacy("SOCKET_TELEMETRY_DISABLED", "SOCKET_PATCH_TELEMETRY_DISABLED") - .unwrap_or_default(); + let env_value = read_env_with_legacy( + "SOCKET_TELEMETRY_DISABLED", + "SOCKET_PATCH_TELEMETRY_DISABLED", + ) + .unwrap_or_default(); let disabled_via_env = matches!(env_value.as_str(), "1" | "true"); let vitest = std::env::var("VITEST").unwrap_or_default() == "true"; let offline = matches!( @@ -205,12 +210,13 @@ fn home_dir_string() -> Option { /// Build a telemetry event from the given options. fn build_telemetry_event(options: &TrackPatchEventOptions) -> PatchTelemetryEvent { - let error = options.error.as_ref().map(|(error_type, message)| { - PatchTelemetryError { + let error = options + .error + .as_ref() + .map(|(error_type, message)| PatchTelemetryError { error_type: error_type.clone(), message: Some(sanitize_error_message(message)), - } - }); + }); PatchTelemetryEvent { event_sender_created_at: chrono_now_iso(), @@ -239,9 +245,7 @@ fn chrono_now_iso() -> String { let (year, month, day) = days_to_ymd(days); - format!( - "{year:04}-{month:02}-{day:02}T{hours:02}:{minutes:02}:{seconds:02}.{millis:03}Z" - ) + format!("{year:04}-{month:02}-{day:02}T{hours:02}:{minutes:02}:{seconds:02}.{millis:03}Z") } /// Convert days since Unix epoch to (year, month, day). @@ -280,9 +284,8 @@ async fn send_telemetry_event( (format!("{api_url}/v0/orgs/{slug}/telemetry"), true) } _ => { - let proxy_url = - read_env_with_legacy("SOCKET_PROXY_URL", "SOCKET_PATCH_PROXY_URL") - .unwrap_or_else(|| DEFAULT_PATCH_API_PROXY_URL.to_string()); + let proxy_url = read_env_with_legacy("SOCKET_PROXY_URL", "SOCKET_PATCH_PROXY_URL") + .unwrap_or_else(|| DEFAULT_PATCH_API_PROXY_URL.to_string()); (format!("{proxy_url}/patch/telemetry"), false) } }; @@ -676,11 +679,7 @@ pub async fn track_patch_repair_failed( /// Track a successful `setup`. Reports the detected package manager so /// we can tell which install hooks are exercised in the wild. -pub async fn track_patch_setup( - manager: &str, - api_token: Option<&str>, - org_slug: Option<&str>, -) { +pub async fn track_patch_setup(manager: &str, api_token: Option<&str>, org_slug: Option<&str>) { fire( PatchTelemetryEventType::PatchSetup, "setup", @@ -865,12 +864,18 @@ mod tests { #[test] fn test_event_type_as_str() { // Write-side - assert_eq!(PatchTelemetryEventType::PatchApplied.as_str(), "patch_applied"); + assert_eq!( + PatchTelemetryEventType::PatchApplied.as_str(), + "patch_applied" + ); assert_eq!( PatchTelemetryEventType::PatchApplyFailed.as_str(), "patch_apply_failed" ); - assert_eq!(PatchTelemetryEventType::PatchRemoved.as_str(), "patch_removed"); + assert_eq!( + PatchTelemetryEventType::PatchRemoved.as_str(), + "patch_removed" + ); assert_eq!( PatchTelemetryEventType::PatchRemoveFailed.as_str(), "patch_remove_failed" @@ -884,18 +889,27 @@ mod tests { "patch_rollback_failed" ); // Read-side - assert_eq!(PatchTelemetryEventType::PatchScanned.as_str(), "patch_scanned"); + assert_eq!( + PatchTelemetryEventType::PatchScanned.as_str(), + "patch_scanned" + ); assert_eq!( PatchTelemetryEventType::PatchScanFailed.as_str(), "patch_scan_failed" ); - assert_eq!(PatchTelemetryEventType::PatchFetched.as_str(), "patch_fetched"); + assert_eq!( + PatchTelemetryEventType::PatchFetched.as_str(), + "patch_fetched" + ); assert_eq!( PatchTelemetryEventType::PatchFetchFailed.as_str(), "patch_fetch_failed" ); // Inspection / housekeeping - assert_eq!(PatchTelemetryEventType::PatchListed.as_str(), "patch_listed"); + assert_eq!( + PatchTelemetryEventType::PatchListed.as_str(), + "patch_listed" + ); assert_eq!( PatchTelemetryEventType::PatchRepaired.as_str(), "patch_repaired" @@ -914,7 +928,10 @@ mod tests { "patch_unlock_failed" ); // OpenVEX - assert_eq!(PatchTelemetryEventType::VexGenerated.as_str(), "vex_generated"); + assert_eq!( + PatchTelemetryEventType::VexGenerated.as_str(), + "vex_generated" + ); assert_eq!(PatchTelemetryEventType::VexFailed.as_str(), "vex_failed"); } @@ -927,6 +944,26 @@ mod tests { assert!(!ctx.arch.is_empty()); } + /// Regression: the reported version must track the real crate version, + /// not a hardcoded literal that drifts from the published release. + /// Anchoring on `CARGO_PKG_VERSION` (rather than the `PACKAGE_VERSION` + /// const) is deliberate — comparing the context against the same const it + /// is built from is self-referential and can never catch a stale value. + #[test] + fn test_telemetry_version_tracks_crate_version() { + assert_eq!(PACKAGE_VERSION, env!("CARGO_PKG_VERSION")); + assert_eq!( + build_telemetry_context("apply").version, + env!("CARGO_PKG_VERSION") + ); + // The previously-hardcoded literal must never reappear unless the crate + // is genuinely at that version. + assert!( + PACKAGE_VERSION != "1.0.0" || env!("CARGO_PKG_VERSION") == "1.0.0", + "telemetry version is still hardcoded to the stale 1.0.0 literal" + ); + } + #[test] fn test_build_telemetry_event_basic() { let options = TrackPatchEventOptions { diff --git a/crates/socket-patch-core/src/vex/build.rs b/crates/socket-patch-core/src/vex/build.rs index a233f9ce..381523cd 100644 --- a/crates/socket-patch-core/src/vex/build.rs +++ b/crates/socket-patch-core/src/vex/build.rs @@ -117,7 +117,7 @@ pub fn build_document( name: vuln_id, aliases, }, - timestamp: timestamp.clone(), + timestamp: Some(timestamp.clone()), last_updated: None, products: vec![Product { id: opts.product_id.clone(), @@ -580,7 +580,7 @@ mod tests { let strip = |mut d: Document| -> Document { d.timestamp = String::new(); for s in d.statements.iter_mut() { - s.timestamp = String::new(); + s.timestamp = None; } d }; @@ -604,7 +604,7 @@ mod tests { build_document(&manifest, &["pkg:npm/x@1.0.0".to_string()], &opts()) .unwrap(); for st in &doc.statements { - assert_eq!(st.timestamp, doc.timestamp); + assert_eq!(st.timestamp.as_deref(), Some(doc.timestamp.as_str())); } } diff --git a/crates/socket-patch-core/src/vex/conformance_tests.rs b/crates/socket-patch-core/src/vex/conformance_tests.rs index 459f7c65..19d25625 100644 --- a/crates/socket-patch-core/src/vex/conformance_tests.rs +++ b/crates/socket-patch-core/src/vex/conformance_tests.rs @@ -82,6 +82,37 @@ fn sample_doc() -> Document { .expect("build sample doc") } +/// A document whose single statement is the result of MERGING two +/// patches that share one vuln id and one overlapping CVE. Unlike +/// `sample_doc` (every statement carries a single subcomponent and a +/// set of all-distinct aliases), this fixture forces the builder's +/// transpose to collapse: +/// * two PURLs into one product with TWO subcomponents, and +/// * the duplicated `CVE-DUP` into a single alias. +/// The uniqueness/dedup conformance invariants below are vacuous +/// against `sample_doc`; they only have teeth against a merged +/// statement. +fn merged_doc() -> Document { + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + "pkg:npm/aaa@1.0.0".to_string(), + record("uuid-a", &[("GHSA-shared", &["CVE-DUP", "CVE-A-ONLY"])]), + ); + manifest.patches.insert( + "pkg:npm/bbb@2.0.0".to_string(), + record("uuid-b", &[("GHSA-shared", &["CVE-DUP", "CVE-B-ONLY"])]), + ); + build_document( + &manifest, + &[ + "pkg:npm/aaa@1.0.0".to_string(), + "pkg:npm/bbb@2.0.0".to_string(), + ], + &options(), + ) + .expect("build merged doc") +} + // ── 1. `@context` literal value ───────────────────────────────── #[test] @@ -213,7 +244,7 @@ fn affected_statement_in_json_omits_justification() { name: "CVE-X".to_string(), aliases: Vec::new(), }, - timestamp: "2024-01-01T00:00:00Z".to_string(), + timestamp: Some("2024-01-01T00:00:00Z".to_string()), last_updated: None, products: vec![Product { id: "pkg:npm/x@1.0.0".to_string(), @@ -313,7 +344,8 @@ fn all_statement_timestamps_match_document_timestamp() { let doc = sample_doc(); for st in &doc.statements { assert_eq!( - st.timestamp, doc.timestamp, + st.timestamp.as_deref(), + Some(doc.timestamp.as_str()), "statement timestamp must match document timestamp" ); } @@ -369,7 +401,7 @@ fn fully_populated_doc_round_trips_through_serde() { name: "GHSA-xxx".to_string(), aliases: vec!["CVE-2024-1".to_string(), "CVE-2024-2".to_string()], }, - timestamp: "2024-01-01T00:00:00Z".to_string(), + timestamp: Some("2024-01-01T00:00:00Z".to_string()), last_updated: Some("2024-06-01T00:00:00Z".to_string()), products: vec![Product { id: "pkg:npm/app@1.0.0".to_string(), @@ -431,7 +463,16 @@ fn builder_output_is_valid_utf8_json() { let pretty = serde_json::to_string_pretty(&doc).unwrap(); let v_compact: serde_json::Value = serde_json::from_str(&compact).unwrap(); let v_pretty: serde_json::Value = serde_json::from_str(&pretty).unwrap(); + // NOTE: compact-vs-pretty equality alone is a tautology — it holds + // for ANY serializable value. The real interop invariant is that + // the emitted JSON deserializes back into an *equal* `Document` + // (this is what `vexctl merge` / Grype / Trivy rely on), so assert + // that too. assert_eq!(v_compact, v_pretty); + let reparsed_compact: Document = serde_json::from_str(&compact).unwrap(); + let reparsed_pretty: Document = serde_json::from_str(&pretty).unwrap(); + assert_eq!(reparsed_compact, doc, "compact output must round-trip"); + assert_eq!(reparsed_pretty, doc, "pretty output must round-trip"); } // ── 12. Each emitted statement has at least one product ───────── @@ -450,7 +491,12 @@ fn every_emitted_statement_has_at_least_one_product() { #[test] fn vulnerability_aliases_are_unique_within_statement() { - let doc = sample_doc(); + // Built from a MERGED statement so the dedup path is actually + // exercised: two patches both list `CVE-DUP`, and the builder must + // collapse it. (Against `sample_doc`, where every alias is already + // distinct, this loop can never observe a duplicate — see the doc + // comment on `merged_doc`.) + let doc = merged_doc(); for st in &doc.statements { let mut seen = std::collections::HashSet::new(); for alias in &st.vulnerability.aliases { @@ -460,15 +506,35 @@ fn vulnerability_aliases_are_unique_within_statement() { ); } } + // Non-vacuous guard: the merged statement carries multiple aliases + // with the overlapping CVE present exactly once. If alias dedup + // regressed, the loop above would fire on `CVE-DUP`. + assert_eq!(doc.statements.len(), 1, "fixture must merge to one statement"); + assert_eq!( + doc.statements[0].vulnerability.aliases, + vec![ + "CVE-A-ONLY".to_string(), + "CVE-B-ONLY".to_string(), + "CVE-DUP".to_string(), + ], + ); } // ── 14. Subcomponent @ids are unique within a product ─────────── #[test] fn subcomponent_ids_are_unique_within_product() { - let doc = sample_doc(); + // Built from a MERGED statement so a product with MORE THAN ONE + // subcomponent actually exists. Against `sample_doc` every product + // has exactly one subcomponent, so the uniqueness loop body runs at + // most once and can never catch a duplicate. + let doc = merged_doc(); + let mut saw_multi_subcomponent_product = false; for st in &doc.statements { for p in &st.products { + if p.subcomponents.len() > 1 { + saw_multi_subcomponent_product = true; + } let mut seen = std::collections::HashSet::new(); for sub in &p.subcomponents { assert!( @@ -479,4 +545,78 @@ fn subcomponent_ids_are_unique_within_product() { } } } + assert!( + saw_multi_subcomponent_product, + "fixture must exercise a product with >1 subcomponent, else this test is vacuous" + ); +} + +// ── 15. Merged-statement transpose, at the JSON layer ─────────── + +#[test] +fn merged_statement_emits_all_subcomponents_with_at_id_in_serialized_json() { + // The `PURL -> {vulnId}` → `vulnId -> {PURL}` transpose is the + // crux of the builder; pin its serialized shape (not just the + // in-memory structs `merged_doc` already exercises). The two + // merged PURLs must surface as sorted subcomponents, each under the + // JSON-LD `@id` key (never raw `id`). + let doc = merged_doc(); + let v = serde_json::to_value(&doc).unwrap(); + let statements = v["statements"].as_array().unwrap(); + assert_eq!(statements.len(), 1, "two patches sharing a vuln → one statement"); + + let subs = statements[0]["products"][0]["subcomponents"] + .as_array() + .unwrap(); + let ids: Vec<&str> = subs + .iter() + .map(|s| { + let obj = s.as_object().unwrap(); + assert!(obj.contains_key("@id"), "subcomponent must use @id"); + assert!(!obj.contains_key("id"), "raw `id` must not leak"); + s["@id"].as_str().unwrap() + }) + .collect(); + // Sorted for deterministic downstream diffs. + assert_eq!(ids, vec!["pkg:npm/aaa@1.0.0", "pkg:npm/bbb@2.0.0"]); +} + +// ── 16. Statement-level `@id` rename (gap in test #2) ─────────── + +#[test] +fn statement_level_id_renders_under_at_sign() { + // The builder never sets `Statement.id`, so the cross-cutting + // `@`-prefix test above (which walks builder output) never covers + // the statement-level rename. Pin it directly: a present statement + // id MUST serialize as `@id`, never raw `id`. + let mut s = Statement { + id: Some("urn:uuid:stmt-7".to_string()), + vulnerability: Vulnerability { + name: "GHSA-z".to_string(), + aliases: Vec::new(), + }, + timestamp: Some("2024-01-01T00:00:00Z".to_string()), + last_updated: None, + products: vec![Product { + id: "pkg:npm/x@1.0.0".to_string(), + identifiers: None, + hashes: None, + subcomponents: Vec::new(), + }], + status: Status::NotAffected, + supplier: None, + justification: Some(Justification::InlineMitigationsAlreadyExist), + impact_statement: Some("Patched via Socket".to_string()), + action_statement: None, + }; + let v = serde_json::to_value(&s).unwrap(); + assert_eq!(v["@id"], "urn:uuid:stmt-7"); + assert!(!v.as_object().unwrap().contains_key("id")); + + // And when absent, neither `@id` nor `id` appears. + s.id = None; + let v = serde_json::to_value(&s).unwrap(); + let obj = v.as_object().unwrap(); + assert!(!obj.contains_key("@id"), "absent statement id must omit @id"); + assert!(!obj.contains_key("id")); } diff --git a/crates/socket-patch-core/src/vex/mod.rs b/crates/socket-patch-core/src/vex/mod.rs index 122d3a2d..47033a27 100644 --- a/crates/socket-patch-core/src/vex/mod.rs +++ b/crates/socket-patch-core/src/vex/mod.rs @@ -78,7 +78,7 @@ mod reexport_tests { name: "GHSA-x".to_string(), aliases: Vec::new(), }, - timestamp: String::new(), + timestamp: None, last_updated: None, products: Vec::new(), status: Status::NotAffected, diff --git a/crates/socket-patch-core/src/vex/product.rs b/crates/socket-patch-core/src/vex/product.rs index b4dc014c..d53a2682 100644 --- a/crates/socket-patch-core/src/vex/product.rs +++ b/crates/socket-patch-core/src/vex/product.rs @@ -46,45 +46,54 @@ pub async fn detect_product(cwd: &Path) -> DetectResult { let pyproject_exists = tokio::fs::metadata(&pyproject).await.is_ok(); let cargo_exists = tokio::fs::metadata(&cargo).await.is_ok(); - // Collect a warning if more than one manifest is present. - let present_count = [pkg_json_exists, pyproject_exists, cargo_exists] - .iter() - .filter(|b| **b) - .count(); - if present_count > 1 { - let mut found = Vec::new(); - if pkg_json_exists { - found.push("package.json"); - } - if pyproject_exists { - found.push("pyproject.toml"); - } - if cargo_exists { - found.push("Cargo.toml"); - } - result.warnings.push(format!( - "Multiple project manifests detected ({}); using {} for the top-level product", - found.join(", "), - found[0] - )); + // Names of every manifest present, in priority order — used for the + // "detected (...)" portion of the multi-manifest warning. + let mut present = Vec::new(); + if pkg_json_exists { + present.push("package.json"); + } + if pyproject_exists { + present.push("pyproject.toml"); + } + if cargo_exists { + present.push("Cargo.toml"); } + // Read manifests in priority order, taking the first that yields a + // usable PURL. `selected` records the manifest ACTUALLY used — not + // merely the highest-priority one present, because that one may fail + // to parse (invalid JSON, missing version, workspace inheritance) and + // fall through to a lower-priority manifest. The warning must name + // what we used, otherwise it misreports the source. + let mut selected: Option<&str> = None; if pkg_json_exists { if let Some(purl) = read_package_json(&pkg_json).await { result.purl = Some(purl); - return result; + selected = Some("package.json"); } } - if pyproject_exists { + if result.purl.is_none() && pyproject_exists { if let Some(purl) = read_pyproject(&pyproject).await { result.purl = Some(purl); - return result; + selected = Some("pyproject.toml"); } } - if cargo_exists { + if result.purl.is_none() && cargo_exists { if let Some(purl) = read_cargo_toml(&cargo).await { result.purl = Some(purl); - return result; + selected = Some("Cargo.toml"); + } + } + + // Warn only when more than one manifest is present AND we actually + // settled on one — naming the manifest we used. + if present.len() > 1 { + if let Some(used) = selected { + result.warnings.push(format!( + "Multiple project manifests detected ({}); using {} for the top-level product", + present.join(", "), + used + )); } } @@ -219,14 +228,23 @@ fn scan_remote_origin_url(content: &str) -> Option { if !in_section { continue; } - if let Some(rest) = line.strip_prefix("url") { - let rest = rest.trim_start(); - let rest = rest.strip_prefix('=')?.trim(); - if rest.is_empty() { - return None; - } - return Some(rest.to_string()); + // Parse `key = value`. Only the EXACT `url` key counts: a + // `url`-prefixed-but-different key (git permits arbitrary + // config keys, e.g. a custom `urlsuffix`) or a malformed + // `url ...` line without an `=` must be SKIPPED, not abort + // the scan — otherwise a later, valid `url = ...` line in the + // same section would never be read. + let Some((key, value)) = line.split_once('=') else { + continue; + }; + if key.trim() != "url" { + continue; } + let value = value.trim(); + if value.is_empty() { + return None; + } + return Some(value.to_string()); } None } @@ -243,11 +261,19 @@ fn scan_remote_origin_url(content: &str) -> Option { /// * Anything else (self-hosted gitea, generic SSH, etc.) → URL as-is. fn remote_url_to_purl(url: &str) -> String { if let Some((host, path)) = split_remote_host_path(url) { - let cleaned = path.strip_suffix(".git").unwrap_or(path); + // Trim slashes BEFORE stripping `.git`: a URL like + // `https://github.com/owner/repo.git/` carries a trailing + // slash, so stripping `.git` first would no-op and leave + // `repo.git` baked into the PURL. Trim again afterward in case + // the `.git` strip exposes a slash. + let cleaned = path.trim_matches('/'); + let cleaned = cleaned.strip_suffix(".git").unwrap_or(cleaned); let cleaned = cleaned.trim_matches('/'); let parts: Vec<&str> = cleaned.split('/').collect(); if parts.len() == 2 && !parts[0].is_empty() && !parts[1].is_empty() { - let ecosystem = match host { + // Hostnames are case-insensitive per DNS; match on a + // lowercased copy so `git@GitHub.com:...` still normalizes. + let ecosystem = match host.to_ascii_lowercase().as_str() { "github.com" => Some("github"), "gitlab.com" => Some("gitlab"), "bitbucket.org" => Some("bitbucket"), @@ -515,6 +541,44 @@ mod tests { assert!(scan_remote_origin_url("[core]\nbare = false\n").is_none()); } + /// Regression: a key that merely *starts with* `url` (e.g. a + /// custom `urlsuffix` git permits) must NOT be treated as the + /// `url` key, and — critically — must not abort the scan before + /// the real `url = ...` line that follows it is read. + #[test] + fn scan_origin_url_ignores_url_prefixed_key_and_keeps_scanning() { + let cfg = "[remote \"origin\"]\n\turlsuffix = nonsense\n\turl = git@github.com:foo/bar.git\n"; + assert_eq!( + scan_remote_origin_url(cfg).as_deref(), + Some("git@github.com:foo/bar.git") + ); + } + + /// Regression: a malformed `url ...` line WITHOUT an `=` must be + /// skipped, allowing a later well-formed `url = ...` line in the + /// same section to still be picked up. (Previously the `?` on the + /// `=` strip aborted the whole function, returning None.) + #[test] + fn scan_origin_url_skips_malformed_url_line_then_finds_valid_one() { + let cfg = "[remote \"origin\"]\n\turl no-equals-here\n\turl = git@github.com:foo/bar.git\n"; + assert_eq!( + scan_remote_origin_url(cfg).as_deref(), + Some("git@github.com:foo/bar.git") + ); + } + + /// A `url` value embedding an `=` (rare, but the scp/https forms + /// permit query-ish suffixes) keeps everything after the FIRST + /// `=`, matching the prior behavior. + #[test] + fn scan_origin_url_preserves_equals_inside_value() { + let cfg = "[remote \"origin\"]\n\turl = https://host/p?token=abc\n"; + assert_eq!( + scan_remote_origin_url(cfg).as_deref(), + Some("https://host/p?token=abc") + ); + } + #[tokio::test] async fn detect_prefers_git_remote_over_package_manifest() { let dir = tempfile::tempdir().unwrap(); @@ -619,12 +683,9 @@ mod tests { .unwrap(); let git_dir = dir.path().join(".git"); tokio::fs::create_dir_all(&git_dir).await.unwrap(); - tokio::fs::write( - git_dir.join("config"), - "[remote \"origin\"]\n\turl = \n", - ) - .await - .unwrap(); + tokio::fs::write(git_dir.join("config"), "[remote \"origin\"]\n\turl = \n") + .await + .unwrap(); let r = detect_product(dir.path()).await; assert_eq!(r.purl.as_deref(), Some("pkg:npm/fallback-app@1.0.0")); @@ -635,8 +696,7 @@ mod tests { /// would surface the regression. #[test] fn scan_origin_url_handles_crlf_line_endings() { - let cfg = - "[remote \"origin\"]\r\n\turl = git@github.com:foo/bar.git\r\n"; + let cfg = "[remote \"origin\"]\r\n\turl = git@github.com:foo/bar.git\r\n"; assert_eq!( scan_remote_origin_url(cfg).as_deref(), Some("git@github.com:foo/bar.git") @@ -756,12 +816,9 @@ mod tests { #[tokio::test] async fn package_json_missing_name_returns_none() { let dir = tempfile::tempdir().unwrap(); - tokio::fs::write( - dir.path().join("package.json"), - r#"{"version":"1.0.0"}"#, - ) - .await - .unwrap(); + tokio::fs::write(dir.path().join("package.json"), r#"{"version":"1.0.0"}"#) + .await + .unwrap(); let r = detect_product(dir.path()).await; assert!(r.purl.is_none()); } @@ -784,7 +841,9 @@ mod tests { #[tokio::test] async fn package_json_invalid_json_returns_none() { let dir = tempfile::tempdir().unwrap(); - tokio::fs::write(dir.path().join("package.json"), "{ not json").await.unwrap(); + tokio::fs::write(dir.path().join("package.json"), "{ not json") + .await + .unwrap(); let r = detect_product(dir.path()).await; assert!(r.purl.is_none()); } @@ -969,13 +1028,117 @@ mod tests { #[tokio::test] async fn package_json_missing_version_key_returns_none() { let dir = tempfile::tempdir().unwrap(); + tokio::fs::write(dir.path().join("package.json"), r#"{"name":"x"}"#) + .await + .unwrap(); + let r = detect_product(dir.path()).await; + assert!(r.purl.is_none()); + } + + // ── Regression: `.git` strip ordering ───────────────────────── + + /// Regression: a remote URL carrying BOTH a `.git` suffix AND a + /// trailing slash (`https://github.com/owner/repo.git/`) must still + /// normalize to `pkg:github/owner/repo`. Previously `.git` was + /// stripped before the slash was trimmed, so the strip no-opped and + /// the PURL kept `repo.git`. + #[test] + fn remote_url_dotgit_with_trailing_slash_is_normalized() { + assert_eq!( + remote_url_to_purl("https://github.com/owner/repo.git/"), + "pkg:github/owner/repo" + ); + } + + /// scp-style SSH form with the same `.git/` combination. + #[test] + fn remote_url_ssh_dotgit_with_trailing_slash_is_normalized() { + assert_eq!( + remote_url_to_purl("git@github.com:owner/repo.git/"), + "pkg:github/owner/repo" + ); + } + + /// Regression: hostnames are case-insensitive (DNS), so a remote + /// with a mixed-case host (`GitHub.com`) must still map to the + /// `github` ecosystem rather than fall through to the raw URL. + #[test] + fn remote_url_mixed_case_host_is_normalized() { + assert_eq!( + remote_url_to_purl("git@GitHub.com:owner/repo.git"), + "pkg:github/owner/repo" + ); + assert_eq!( + remote_url_to_purl("https://GitLab.com/foo/bar"), + "pkg:gitlab/foo/bar" + ); + } + + /// The owner/repo path segments stay case-preserved even though the + /// host is lowercased for the ecosystem match — repo names are + /// case-sensitive. + #[test] + fn remote_url_path_case_is_preserved() { + assert_eq!( + remote_url_to_purl("git@GITHUB.COM:SocketDev/Socket-Patch.git"), + "pkg:github/SocketDev/Socket-Patch" + ); + } + + // ── Regression: multi-manifest warning names the USED manifest ── + + /// Regression: when the highest-priority manifest is present but + /// fails to parse (invalid JSON), detection falls through to the + /// next manifest — and the warning must name the manifest ACTUALLY + /// used, not the one that failed. Previously the warning hard-coded + /// `found[0]` ("package.json") even though Cargo.toml was used. + #[tokio::test] + async fn multi_manifest_warning_names_actually_used_manifest() { + let dir = tempfile::tempdir().unwrap(); + // package.json present but unparseable → falls through to Cargo. + tokio::fs::write(dir.path().join("package.json"), "{ not json") + .await + .unwrap(); tokio::fs::write( - dir.path().join("package.json"), - r#"{"name":"x"}"#, + dir.path().join("Cargo.toml"), + "[package]\nname = \"alt\"\nversion = \"9.9.9\"\n", + ) + .await + .unwrap(); + + let r = detect_product(dir.path()).await; + assert_eq!(r.purl.as_deref(), Some("pkg:cargo/alt@9.9.9")); + assert_eq!(r.warnings.len(), 1); + // The "detected (...)" list still mentions both manifests. + assert!(r.warnings[0].contains("package.json")); + assert!(r.warnings[0].contains("Cargo.toml")); + // But the "using X" clause must name Cargo.toml, the one used. + assert!( + r.warnings[0].contains("using Cargo.toml"), + "warning should name the manifest actually used: {}", + r.warnings[0] + ); + } + + /// When multiple manifests are present but NONE parse, there is no + /// product to surface and therefore no "using X" warning to emit + /// (it would name a manifest that wasn't actually used). + #[tokio::test] + async fn multi_manifest_all_unparseable_emits_no_warning() { + let dir = tempfile::tempdir().unwrap(); + tokio::fs::write(dir.path().join("package.json"), "{ not json") + .await + .unwrap(); + // Cargo.toml present but version is workspace-inherited (unsupported). + tokio::fs::write( + dir.path().join("Cargo.toml"), + "[package]\nname = \"alt\"\nversion.workspace = true\n", ) .await .unwrap(); + let r = detect_product(dir.path()).await; assert!(r.purl.is_none()); + assert!(r.warnings.is_empty()); } } diff --git a/crates/socket-patch-core/src/vex/schema.rs b/crates/socket-patch-core/src/vex/schema.rs index 1539b92b..1d6405a2 100644 --- a/crates/socket-patch-core/src/vex/schema.rs +++ b/crates/socket-patch-core/src/vex/schema.rs @@ -57,7 +57,15 @@ pub struct Statement { #[serde(rename = "@id", skip_serializing_if = "Option::is_none", default)] pub id: Option, pub vulnerability: Vulnerability, - pub timestamp: String, + /// RFC 3339 timestamp the statement's assertion was known true. + /// Optional per spec — it cascades down from the document when a + /// statement omits it (see OpenVEX inheritance rules), so a + /// spec-valid document may legitimately leave it out. We always + /// emit one (the builder clones the document timestamp), but the + /// type must still accept its absence on parse, mirroring the + /// sibling `last_updated` field below. + #[serde(skip_serializing_if = "Option::is_none", default)] + pub timestamp: Option, /// RFC 3339 timestamp of the most recent revision of this statement. #[serde(skip_serializing_if = "Option::is_none", default)] pub last_updated: Option, @@ -171,8 +179,7 @@ mod tests { #[test] fn every_status_variant_deserializes_from_spec_literal() { for (variant, literal) in STATUS_LITERALS { - let parsed: Status = - serde_json::from_str(&format!("\"{literal}\"")).unwrap(); + let parsed: Status = serde_json::from_str(&format!("\"{literal}\"")).unwrap(); assert_eq!(parsed, *variant, "literal {literal:?}"); } } @@ -216,16 +223,14 @@ mod tests { #[test] fn every_justification_variant_deserializes_from_spec_literal() { for (variant, literal) in JUSTIFICATION_LITERALS { - let parsed: Justification = - serde_json::from_str(&format!("\"{literal}\"")).unwrap(); + let parsed: Justification = serde_json::from_str(&format!("\"{literal}\"")).unwrap(); assert_eq!(parsed, *variant, "literal {literal:?}"); } } #[test] fn justification_rejects_unknown_literal() { - let r: Result = - serde_json::from_str("\"hand_waving\""); + let r: Result = serde_json::from_str("\"hand_waving\""); assert!(r.is_err()); } @@ -314,7 +319,7 @@ mod tests { name: "GHSA-xxxx".to_string(), aliases: Vec::new(), }, - timestamp: "2024-01-01T00:00:00Z".to_string(), + timestamp: Some("2024-01-01T00:00:00Z".to_string()), last_updated: None, products: vec![Product { id: "pkg:npm/app@1.0.0".to_string(), @@ -551,7 +556,7 @@ mod tests { name: "GHSA-xxx".to_string(), aliases: vec!["CVE-2024-0001".to_string()], }, - timestamp: "2024-01-01T00:00:00Z".to_string(), + timestamp: Some("2024-01-01T00:00:00Z".to_string()), last_updated: Some("2024-06-01T00:00:00Z".to_string()), products: vec![Product { id: "pkg:npm/app@1.0.0".to_string(), @@ -604,4 +609,131 @@ mod tests { assert!(st.supplier.is_none()); assert!(st.action_statement.is_none()); } + + // ── Statement timestamp is optional/inheritable per spec ─────── + + /// Regression: the statement-level `timestamp` is OPTIONAL in + /// OpenVEX 0.2.0 — it cascades from the document when omitted. A + /// spec-valid statement that leaves it out (the canonical spec + /// example does exactly this for a `fixed` statement) MUST parse, + /// not error with "missing field `timestamp`". Previously the + /// field was a required `String`, so this document was rejected. + #[test] + fn statement_without_timestamp_parses_and_leaves_it_none() { + let doc_json = r#"{ + "@context": "https://openvex.dev/ns/v0.2.0", + "@id": "urn:uuid:1", + "author": "Socket", + "timestamp": "2024-01-01T00:00:00Z", + "version": 1, + "statements": [ + { + "vulnerability": {"name": "CVE-2014-123456"}, + "products": [{"@id": "pkg:apk/wolfi/bash@1.0.0"}], + "status": "fixed" + } + ] + }"#; + let doc: Document = + serde_json::from_str(doc_json).expect("statement may omit timestamp (inherited)"); + assert_eq!(doc.statements.len(), 1); + assert!( + doc.statements[0].timestamp.is_none(), + "omitted statement timestamp must deserialize to None, not error" + ); + } + + /// A statement timestamp that IS present round-trips through the + /// `Option` field, and an absent one is omitted from the + /// serialized JSON (no `null`, no empty string). + #[test] + fn statement_timestamp_some_emits_none_omits() { + let mut s = minimal_statement(); // carries Some(timestamp) + let v = serde_json::to_value(&s).unwrap(); + assert_eq!(v["timestamp"], "2024-01-01T00:00:00Z"); + + s.timestamp = None; + let v = serde_json::to_value(&s).unwrap(); + assert!( + v.as_object().unwrap().get("timestamp").is_none(), + "None timestamp must be omitted, never serialized as null/empty" + ); + } + + // ── Forward-compat: unmodeled spec fields are tolerated ──────── + + /// OpenVEX 0.2.0 carries fields we intentionally don't model + /// (statement-level `version`, `status_notes`, + /// `action_statement_timestamp`, vulnerability `@id`/`description`). + /// Real documents and future spec revisions will include them. + /// Because no struct uses `#[serde(deny_unknown_fields)]`, parsing + /// MUST ignore them rather than erroring — pin that so a future + /// `deny_unknown_fields` (which would break interop) regresses here. + #[test] + fn parsing_tolerates_unmodeled_spec_fields() { + let doc_json = r#"{ + "@context": "https://openvex.dev/ns/v0.2.0", + "@id": "urn:uuid:1", + "author": "Socket", + "timestamp": "2024-01-01T00:00:00Z", + "version": 1, + "extra_doc_field": "ignored", + "statements": [ + { + "@id": "urn:uuid:stmt-1", + "version": 2, + "vulnerability": { + "@id": "https://nvd.example/CVE-2024-1", + "name": "GHSA-x", + "description": "an unmodeled field", + "aliases": ["CVE-2024-1"] + }, + "timestamp": "2024-01-01T00:00:00Z", + "status_notes": "determined by hand", + "products": [{ + "@id": "pkg:npm/app@1.0.0", + "subcomponents": [{"@id": "pkg:npm/lodash@4.17.21"}] + }], + "status": "not_affected", + "justification": "inline_mitigations_already_exist", + "action_statement_timestamp": "2024-01-02T00:00:00Z" + } + ] + }"#; + let doc: Document = + serde_json::from_str(doc_json).expect("unmodeled spec fields must be ignored"); + assert_eq!(doc.statements.len(), 1); + let st = &doc.statements[0]; + assert_eq!(st.vulnerability.name, "GHSA-x"); + assert_eq!(st.vulnerability.aliases, vec!["CVE-2024-1".to_string()]); + assert_eq!(st.status, Status::NotAffected); + assert_eq!(st.products[0].subcomponents[0].id, "pkg:npm/lodash@4.17.21"); + } + + // ── Wire format: multi-word keys stay snake_case ─────────────── + + /// The statement-level multi-word keys MUST be emitted in the + /// OpenVEX snake_case spelling. `Statement` has no `rename_all`, so + /// this relies on the field idents already being snake_case. + /// Round-trip tests can't catch a switch to + /// `rename_all = "camelCase"` (ser/de would stay symmetric), so pin + /// the exact emitted keys — and assert the camelCase forms are absent. + #[test] + fn statement_multiword_keys_emit_in_snake_case() { + let mut s = minimal_statement(); + s.last_updated = Some("2024-02-01T00:00:00Z".to_string()); + s.impact_statement = Some("x".to_string()); + s.action_statement = Some("y".to_string()); + let v = serde_json::to_value(&s).unwrap(); + let obj = v.as_object().unwrap(); + for snake in ["last_updated", "impact_statement", "action_statement"] { + assert!(obj.contains_key(snake), "missing snake_case key {snake:?}"); + } + for camel in ["lastUpdated", "impactStatement", "actionStatement"] { + assert!( + !obj.contains_key(camel), + "camelCase key {camel:?} must never be emitted" + ); + } + } } diff --git a/crates/socket-patch-core/src/vex/time.rs b/crates/socket-patch-core/src/vex/time.rs index dfd35371..096661a7 100644 --- a/crates/socket-patch-core/src/vex/time.rs +++ b/crates/socket-patch-core/src/vex/time.rs @@ -220,6 +220,71 @@ mod tests { ); } + /// Century years 2200 and 2300 are divisible by 100 but NOT by + /// 400, so neither has a Feb 29 — Feb 28 must roll straight to + /// Mar 1. Complements `century_year_2100_is_not_a_leap_year` and + /// guards the `doe / 36_524` / `doe / 146_096` era corrections at + /// timestamps where the `era` quotient is ≥ 1 (post-2099). + #[test] + fn far_future_century_years_are_not_leap() { + assert_eq!( + format_unix_secs_rfc3339(7_263_215_999), + "2200-02-28T23:59:59Z" + ); + assert_eq!( + format_unix_secs_rfc3339(7_263_216_000), + "2200-03-01T00:00:00Z" + ); + assert_eq!( + format_unix_secs_rfc3339(10_418_889_599), + "2300-02-28T23:59:59Z" + ); + assert_eq!( + format_unix_secs_rfc3339(10_418_889_600), + "2300-03-01T00:00:00Z" + ); + } + + /// 2400 is divisible by 400 → leap year, so Feb 29 2400 exists. + /// This is the four-century reset point one full era past 2000, + /// exercising the `era * 400` year reconstruction with `era` ≥ 1. + #[test] + fn year_2400_is_a_leap_year() { + assert_eq!( + format_unix_secs_rfc3339(13_574_606_400), + "2400-02-29T12:00:00Z" + ); + } + + /// A far-future leap day (2248-02-29) with a non-trivial time of + /// day. Pins the full Y/M/D/h/m/s reconstruction at a timestamp + /// well into the `era == 1` range. + #[test] + fn far_future_leap_day_with_time_of_day() { + assert_eq!( + format_unix_secs_rfc3339(8_777_917_815), + "2248-02-29T06:30:15Z" + ); + } + + /// Time-of-day rollovers: second→minute, minute→hour, and the + /// noon midpoint. The date-boundary tests above never cross a + /// `:59 → :00` minute/hour carry within a fixed day, so these pin + /// the `secs_of_day` div/mod arithmetic directly. + #[test] + fn time_of_day_rollovers() { + let cases: &[(u64, &str)] = &[ + (1_704_067_259, "2024-01-01T00:00:59Z"), // last second of minute 0 + (1_704_067_260, "2024-01-01T00:01:00Z"), // minute carry + (1_704_070_799, "2024-01-01T00:59:59Z"), // last second of hour 0 + (1_704_070_800, "2024-01-01T01:00:00Z"), // hour carry + (1_704_110_400, "2024-01-01T12:00:00Z"), // noon + ]; + for &(secs, expected) in cases { + assert_eq!(format_unix_secs_rfc3339(secs), expected, "secs={secs}"); + } + } + /// `u64::MAX` does not panic. Output isn't asserted byte-for-byte /// because the algorithm uses an `i64` cast that overflows in /// well-defined wrapping in debug-release but the function MUST @@ -230,9 +295,7 @@ mod tests { // Wrap in `std::panic::catch_unwind` for safety even though // the function uses pure arithmetic — a regression that // introduced an unsafe cast would still be caught. - let result = std::panic::catch_unwind(|| { - format_unix_secs_rfc3339(u64::MAX) - }); + let result = std::panic::catch_unwind(|| format_unix_secs_rfc3339(u64::MAX)); assert!(result.is_ok(), "u64::MAX must not panic"); // The output shape should still end in `Z`. let s = result.unwrap(); diff --git a/crates/socket-patch-core/src/vex/verify.rs b/crates/socket-patch-core/src/vex/verify.rs index c930affe..86bcce9e 100644 --- a/crates/socket-patch-core/src/vex/verify.rs +++ b/crates/socket-patch-core/src/vex/verify.rs @@ -73,10 +73,21 @@ pub async fn applied_patches( /// Returns `Ok(())` if every file in `record.files` is `AlreadyPatched`. /// Otherwise returns a short routing tag describing the first failure. +/// +/// A record with **no files** is *not* treated as applied. Verification +/// is the strict counterpart to `--no-verify`: it must produce positive +/// on-disk evidence before a patch is attested as `not_affected`. A +/// zero-file record offers nothing to hash, so — per the module's +/// "omit when unconfirmed" contract — it is reported as `no_files` and +/// dropped from the VEX document rather than vacuously attested. async fn verify_patch_record( pkg_path: &Path, record: &crate::manifest::schema::PatchRecord, ) -> Result<(), String> { + if record.files.is_empty() { + return Err("no_files".to_string()); + } + for (file_name, file_info) in &record.files { let result = verify_file_patch(pkg_path, file_name, file_info).await; match result.status { @@ -141,9 +152,10 @@ mod tests { #[tokio::test] async fn missing_path_falls_into_failed() { let mut manifest = PatchManifest::new(); - manifest - .patches - .insert("pkg:npm/x@1.0.0".to_string(), record_with_one_file("deadbeef")); + manifest.patches.insert( + "pkg:npm/x@1.0.0".to_string(), + record_with_one_file("deadbeef"), + ); let paths: HashMap = HashMap::new(); let out = applied_patches(&manifest, &paths).await; @@ -162,7 +174,9 @@ mod tests { let mut manifest = PatchManifest::new(); manifest.patches.insert( "pkg:npm/x@1.0.0".to_string(), - record_with_one_file("ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"), + record_with_one_file( + "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", + ), ); let mut paths = HashMap::new(); @@ -179,7 +193,9 @@ mod tests { let mut manifest = PatchManifest::new(); manifest.patches.insert( "pkg:npm/x@1.0.0".to_string(), - record_with_one_file("ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"), + record_with_one_file( + "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", + ), ); let mut paths = HashMap::new(); @@ -278,13 +294,16 @@ mod tests { assert!(out.failed.is_empty()); } - /// A patch with `files = {}` is vacuously applied — the - /// "all files match" predicate is `true` over an empty set. - /// This is intentional behavior: a "patch" that touches no - /// files is always-applied. Documented here so a future - /// refactor that flips the predicate is forced to revisit it. + /// A patch with `files = {}` must NOT be treated as applied. + /// Verification requires positive on-disk evidence before a patch + /// is attested as `not_affected`; a zero-file record offers nothing + /// to hash, so it is omitted with reason `no_files`. Attesting it as + /// "fixed" would be an evidence-free claim, contradicting the + /// module's "omit when unconfirmed" contract. (The `--no-verify` + /// path, which trusts the manifest wholesale, is unaffected — it + /// never calls this function.) #[tokio::test] - async fn patch_record_with_zero_files_is_vacuously_applied() { + async fn patch_record_with_zero_files_is_not_applied() { let pkg_dir = tempfile::tempdir().unwrap(); let mut manifest = PatchManifest::new(); manifest.patches.insert( @@ -307,8 +326,13 @@ mod tests { ); let out = applied_patches(&manifest, &paths).await; - assert_eq!(out.applied, vec!["pkg:npm/empty@1.0.0".to_string()]); - assert!(out.failed.is_empty()); + assert!( + out.applied.is_empty(), + "a zero-file patch must not be attested as applied" + ); + assert_eq!(out.failed.len(), 1); + assert_eq!(out.failed[0].purl, "pkg:npm/empty@1.0.0"); + assert_eq!(out.failed[0].reason, "no_files"); } /// Extra `package_paths` entries that aren't in the manifest @@ -408,4 +432,265 @@ mod tests { "unexpected reason: {reason}" ); } + + /// A new-file patch (empty `beforeHash`) whose file exists on disk + /// at the `afterHash` content counts as applied. `verify_file_patch` + /// returns `AlreadyPatched` before its is-new-file `Ready` branch, so + /// the created-and-applied case is not misreported as `not_applied`. + #[tokio::test] + async fn new_file_present_at_after_hash_is_applied() { + let pkg_dir = tempfile::tempdir().unwrap(); + let created = b"freshly-created-file"; + let hash = compute_git_sha256_from_bytes(created); + tokio::fs::write(pkg_dir.path().join("new.js"), created) + .await + .unwrap(); + + let mut files = HashMap::new(); + files.insert( + "new.js".to_string(), + PatchFileInfo { + before_hash: String::new(), // new file + after_hash: hash, + }, + ); + + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + "pkg:npm/x@1.0.0".to_string(), + PatchRecord { + uuid: "u".to_string(), + exported_at: String::new(), + files, + vulnerabilities: HashMap::new(), + description: String::new(), + license: String::new(), + tier: String::new(), + }, + ); + + let mut paths = HashMap::new(); + paths.insert("pkg:npm/x@1.0.0".to_string(), pkg_dir.path().to_path_buf()); + + let out = applied_patches(&manifest, &paths).await; + assert_eq!(out.applied, vec!["pkg:npm/x@1.0.0".to_string()]); + assert!(out.failed.is_empty()); + } + + /// A new-file patch whose file is absent on disk is `not_applied` + /// (the creation hasn't happened yet) — NOT `file_not_found`. The + /// empty `beforeHash` routes through the `Ready` branch. + #[tokio::test] + async fn new_file_absent_is_not_applied() { + let pkg_dir = tempfile::tempdir().unwrap(); + let mut files = HashMap::new(); + files.insert( + "new.js".to_string(), + PatchFileInfo { + before_hash: String::new(), // new file, not yet created + after_hash: + "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff" + .to_string(), + }, + ); + + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + "pkg:npm/x@1.0.0".to_string(), + PatchRecord { + uuid: "u".to_string(), + exported_at: String::new(), + files, + vulnerabilities: HashMap::new(), + description: String::new(), + license: String::new(), + tier: String::new(), + }, + ); + + let mut paths = HashMap::new(); + paths.insert("pkg:npm/x@1.0.0".to_string(), pkg_dir.path().to_path_buf()); + + let out = applied_patches(&manifest, &paths).await; + assert!(out.applied.is_empty()); + assert_eq!(out.failed[0].reason, "not_applied"); + } + + /// A no-op patch where `beforeHash == afterHash` and the file is at + /// that content is applied — `verify_file_patch` checks `afterHash` + /// first, so it never mistakes the file for the un-patched `Ready` + /// state. + #[tokio::test] + async fn noop_patch_before_equals_after_is_applied() { + let pkg_dir = tempfile::tempdir().unwrap(); + let content = b"unchanged-content"; + let hash = compute_git_sha256_from_bytes(content); + tokio::fs::write(pkg_dir.path().join("index.js"), content) + .await + .unwrap(); + + let mut files = HashMap::new(); + files.insert( + "index.js".to_string(), + PatchFileInfo { + before_hash: hash.clone(), + after_hash: hash, + }, + ); + + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + "pkg:npm/x@1.0.0".to_string(), + PatchRecord { + uuid: "u".to_string(), + exported_at: String::new(), + files, + vulnerabilities: HashMap::new(), + description: String::new(), + license: String::new(), + tier: String::new(), + }, + ); + + let mut paths = HashMap::new(); + paths.insert("pkg:npm/x@1.0.0".to_string(), pkg_dir.path().to_path_buf()); + + let out = applied_patches(&manifest, &paths).await; + assert_eq!(out.applied, vec!["pkg:npm/x@1.0.0".to_string()]); + assert!(out.failed.is_empty()); + } + + /// A multi-file patch where EVERY file is at its `afterHash` is + /// applied — the loop must run to completion (no early `Ok`) and + /// bucket the PURL into `applied`. + #[tokio::test] + async fn multi_file_all_patched_is_applied() { + let pkg_dir = tempfile::tempdir().unwrap(); + let a = b"patched-a"; + let b = b"patched-b"; + let hash_a = compute_git_sha256_from_bytes(a); + let hash_b = compute_git_sha256_from_bytes(b); + tokio::fs::write(pkg_dir.path().join("a.js"), a).await.unwrap(); + tokio::fs::write(pkg_dir.path().join("b.js"), b).await.unwrap(); + + let mut files = HashMap::new(); + files.insert( + "a.js".to_string(), + PatchFileInfo { before_hash: "aaaa".to_string(), after_hash: hash_a }, + ); + files.insert( + "b.js".to_string(), + PatchFileInfo { before_hash: "bbbb".to_string(), after_hash: hash_b }, + ); + + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + "pkg:npm/x@1.0.0".to_string(), + PatchRecord { + uuid: "u".to_string(), + exported_at: String::new(), + files, + vulnerabilities: HashMap::new(), + description: String::new(), + license: String::new(), + tier: String::new(), + }, + ); + + let mut paths = HashMap::new(); + paths.insert("pkg:npm/x@1.0.0".to_string(), pkg_dir.path().to_path_buf()); + + let out = applied_patches(&manifest, &paths).await; + assert_eq!(out.applied, vec!["pkg:npm/x@1.0.0".to_string()]); + assert!(out.failed.is_empty()); + } + + /// A manifest with both an applied PURL and a failing PURL splits + /// cleanly across the two buckets. Order is HashMap-nondeterministic, + /// so we assert membership, not index. + #[tokio::test] + async fn mixed_manifest_splits_into_both_buckets() { + let ok_dir = tempfile::tempdir().unwrap(); + let patched = b"patched-content"; + let hash = compute_git_sha256_from_bytes(patched); + tokio::fs::write(ok_dir.path().join("index.js"), patched) + .await + .unwrap(); + + // Failing package: file present but at the wrong content. + let bad_dir = tempfile::tempdir().unwrap(); + tokio::fs::write(bad_dir.path().join("index.js"), b"wrong") + .await + .unwrap(); + + let mut manifest = PatchManifest::new(); + manifest + .patches + .insert("pkg:npm/ok@1.0.0".to_string(), record_with_one_file(&hash)); + manifest.patches.insert( + "pkg:npm/bad@1.0.0".to_string(), + record_with_one_file( + "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", + ), + ); + + let mut paths = HashMap::new(); + paths.insert("pkg:npm/ok@1.0.0".to_string(), ok_dir.path().to_path_buf()); + paths.insert("pkg:npm/bad@1.0.0".to_string(), bad_dir.path().to_path_buf()); + + let out = applied_patches(&manifest, &paths).await; + assert_eq!(out.applied, vec!["pkg:npm/ok@1.0.0".to_string()]); + assert_eq!(out.failed.len(), 1); + assert_eq!(out.failed[0].purl, "pkg:npm/bad@1.0.0"); + assert_eq!(out.failed[0].reason, "hash_mismatch"); + } + + /// At most ONE `FailedPatch` is recorded per PURL even when several + /// files would fail — `verify_patch_record` returns on the first + /// failure. Two distinct failing files, single failure recorded. + #[tokio::test] + async fn at_most_one_failure_recorded_per_purl() { + let pkg_dir = tempfile::tempdir().unwrap(); + // a.js: hash mismatch (neither before nor after). + tokio::fs::write(pkg_dir.path().join("a.js"), b"garbage") + .await + .unwrap(); + // b.js: absent → would be file_not_found. + + let mut files = HashMap::new(); + files.insert( + "a.js".to_string(), + PatchFileInfo { before_hash: "aaaa".to_string(), after_hash: "deadbeef".to_string() }, + ); + files.insert( + "b.js".to_string(), + PatchFileInfo { before_hash: "bbbb".to_string(), after_hash: "deadbeef".to_string() }, + ); + + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + "pkg:npm/x@1.0.0".to_string(), + PatchRecord { + uuid: "u".to_string(), + exported_at: String::new(), + files, + vulnerabilities: HashMap::new(), + description: String::new(), + license: String::new(), + tier: String::new(), + }, + ); + + let mut paths = HashMap::new(); + paths.insert("pkg:npm/x@1.0.0".to_string(), pkg_dir.path().to_path_buf()); + + let out = applied_patches(&manifest, &paths).await; + assert!(out.applied.is_empty()); + assert_eq!(out.failed.len(), 1, "one FailedPatch per PURL, not per file"); + assert!( + matches!(out.failed[0].reason.as_str(), "hash_mismatch" | "file_not_found"), + "unexpected reason: {}", + out.failed[0].reason + ); + } } diff --git a/crates/socket-patch-core/tests/blob_fetcher_edges_e2e.rs b/crates/socket-patch-core/tests/blob_fetcher_edges_e2e.rs index 76ce26c4..011469b9 100644 --- a/crates/socket-patch-core/tests/blob_fetcher_edges_e2e.rs +++ b/crates/socket-patch-core/tests/blob_fetcher_edges_e2e.rs @@ -118,15 +118,27 @@ async fn fetch_missing_sources_diff_mode_with_no_diffs_path() { /// `"blob"` synonym for `File`, and rejects unknown strings. #[test] fn download_mode_parse_covers_all_branches() { - assert!(matches!(DownloadMode::parse("diff"), Ok(DownloadMode::Diff))); + assert!(matches!( + DownloadMode::parse("diff"), + Ok(DownloadMode::Diff) + )); assert!(matches!( DownloadMode::parse("package"), Ok(DownloadMode::Package) )); - assert!(matches!(DownloadMode::parse("file"), Ok(DownloadMode::File))); - assert!(matches!(DownloadMode::parse("blob"), Ok(DownloadMode::File))); + assert!(matches!( + DownloadMode::parse("file"), + Ok(DownloadMode::File) + )); + assert!(matches!( + DownloadMode::parse("blob"), + Ok(DownloadMode::File) + )); // Case-insensitive. - assert!(matches!(DownloadMode::parse("DIFF"), Ok(DownloadMode::Diff))); + assert!(matches!( + DownloadMode::parse("DIFF"), + Ok(DownloadMode::Diff) + )); assert!(matches!( DownloadMode::parse("Package"), Ok(DownloadMode::Package) @@ -139,7 +151,11 @@ fn download_mode_parse_covers_all_branches() { /// `DownloadMode::as_tag` round-trips with `parse` for all variants. #[test] fn download_mode_as_tag_round_trips_with_parse() { - for mode in [DownloadMode::Diff, DownloadMode::Package, DownloadMode::File] { + for mode in [ + DownloadMode::Diff, + DownloadMode::Package, + DownloadMode::File, + ] { let tag = mode.as_tag(); assert_eq!(DownloadMode::parse(tag).unwrap(), mode); } diff --git a/crates/socket-patch-core/tests/common/mod.rs b/crates/socket-patch-core/tests/common/mod.rs index 5f63a626..78e9b18f 100644 --- a/crates/socket-patch-core/tests/common/mod.rs +++ b/crates/socket-patch-core/tests/common/mod.rs @@ -76,7 +76,10 @@ impl MockCommandRunner { /// `Some(stdout)` simulates the binary returning success; a /// `None` simulates spawn failure or non-zero exit. pub fn with_response(mut self, bin: &str, args: &[&str], stdout: Option<&str>) -> Self { - let key = (bin.to_string(), args.iter().map(|s| s.to_string()).collect()); + let key = ( + bin.to_string(), + args.iter().map(|s| s.to_string()).collect(), + ); self.responses.insert(key, stdout.map(|s| s.to_string())); self } @@ -84,7 +87,10 @@ impl MockCommandRunner { impl socket_patch_core::utils::process::CommandRunner for MockCommandRunner { fn run(&self, bin: &str, args: &[&str]) -> Option { - let key = (bin.to_string(), args.iter().map(|s| s.to_string()).collect()); + let key = ( + bin.to_string(), + args.iter().map(|s| s.to_string()).collect(), + ); self.responses.get(&key).cloned().unwrap_or(None) } } diff --git a/crates/socket-patch-core/tests/crawler_cargo_e2e.rs b/crates/socket-patch-core/tests/crawler_cargo_e2e.rs index f5e9d372..fa797a03 100644 --- a/crates/socket-patch-core/tests/crawler_cargo_e2e.rs +++ b/crates/socket-patch-core/tests/crawler_cargo_e2e.rs @@ -22,21 +22,25 @@ fn options_at(root: &Path) -> CrawlerOptions { async fn stage_registry_crate(src: &Path, name: &str, version: &str) -> std::path::PathBuf { let pkg = src.join(format!("{name}-{version}")); tokio::fs::create_dir_all(pkg.join("src")).await.unwrap(); - let cargo_toml = format!( - "[package]\nname = \"{name}\"\nversion = \"{version}\"\nedition = \"2021\"\n" - ); - tokio::fs::write(pkg.join("Cargo.toml"), cargo_toml).await.unwrap(); - tokio::fs::write(pkg.join("src").join("lib.rs"), b"// stub").await.unwrap(); + let cargo_toml = + format!("[package]\nname = \"{name}\"\nversion = \"{version}\"\nedition = \"2021\"\n"); + tokio::fs::write(pkg.join("Cargo.toml"), cargo_toml) + .await + .unwrap(); + tokio::fs::write(pkg.join("src").join("lib.rs"), b"// stub") + .await + .unwrap(); pkg } async fn stage_vendor_crate(src: &Path, name: &str, version: &str) -> std::path::PathBuf { let pkg = src.join(name); tokio::fs::create_dir_all(pkg.join("src")).await.unwrap(); - let cargo_toml = format!( - "[package]\nname = \"{name}\"\nversion = \"{version}\"\nedition = \"2021\"\n" - ); - tokio::fs::write(pkg.join("Cargo.toml"), cargo_toml).await.unwrap(); + let cargo_toml = + format!("[package]\nname = \"{name}\"\nversion = \"{version}\"\nedition = \"2021\"\n"); + tokio::fs::write(pkg.join("Cargo.toml"), cargo_toml) + .await + .unwrap(); pkg } @@ -44,8 +48,7 @@ async fn stage_vendor_crate(src: &Path, name: &str, version: &str) -> std::path: #[test] fn parse_cargo_toml_well_formed() { - let toml = - "[package]\nname = \"serde\"\nversion = \"1.0.200\"\nedition = \"2021\"\n"; + let toml = "[package]\nname = \"serde\"\nversion = \"1.0.200\"\nedition = \"2021\"\n"; assert_eq!( parse_cargo_toml_name_version(toml), Some(("serde".to_string(), "1.0.200".to_string())) @@ -87,7 +90,8 @@ fn parse_cargo_toml_stops_at_next_section() { /// (e.g. inside an earlier [profile.release] table). #[test] fn parse_cargo_toml_ignores_lines_before_package_section() { - let toml = "[profile.release]\nname = \"wrong\"\n\n[package]\nname = \"foo\"\nversion = \"1.0.0\"\n"; + let toml = + "[profile.release]\nname = \"wrong\"\n\n[package]\nname = \"foo\"\nversion = \"1.0.0\"\n"; assert_eq!( parse_cargo_toml_name_version(toml), Some(("foo".to_string(), "1.0.0".to_string())) @@ -182,10 +186,7 @@ async fn find_by_purls_vendor_version_mismatch_returns_empty() { let crawler = CargoCrawler; let result = crawler - .find_by_purls( - tmp.path(), - &["pkg:cargo/serde@99.99.99".to_string()], - ) + .find_by_purls(tmp.path(), &["pkg:cargo/serde@99.99.99".to_string()]) .await .unwrap(); assert!(result.is_empty(), "version mismatch in vendor must skip"); @@ -207,10 +208,7 @@ async fn find_by_purls_invalid_purl_skipped() { let tmp = tempfile::tempdir().unwrap(); let crawler = CargoCrawler; let result = crawler - .find_by_purls( - tmp.path(), - &["pkg:not-cargo/serde@1.0".to_string()], - ) + .find_by_purls(tmp.path(), &["pkg:not-cargo/serde@1.0".to_string()]) .await .unwrap(); assert!(result.is_empty()); @@ -272,7 +270,10 @@ async fn get_crate_source_paths_with_vendor_dir_returns_vendor() { tokio::fs::create_dir(&vendor).await.unwrap(); let crawler = CargoCrawler; - let paths = crawler.get_crate_source_paths(&options_at(tmp.path())).await.unwrap(); + let paths = crawler + .get_crate_source_paths(&options_at(tmp.path())) + .await + .unwrap(); assert_eq!(paths, vec![vendor]); } @@ -281,7 +282,10 @@ async fn get_crate_source_paths_no_cargo_project_returns_empty() { let tmp = tempfile::tempdir().unwrap(); // No Cargo.toml, no Cargo.lock, no vendor. let crawler = CargoCrawler; - let paths = crawler.get_crate_source_paths(&options_at(tmp.path())).await.unwrap(); + let paths = crawler + .get_crate_source_paths(&options_at(tmp.path())) + .await + .unwrap(); assert!(paths.is_empty(), "non-Cargo dir must return empty paths"); } @@ -353,10 +357,7 @@ async fn find_by_purls_verify_fallback_via_dir_name() { let crawler = CargoCrawler; let result = crawler - .find_by_purls( - tmp.path(), - &["pkg:cargo/workspace-crate@0.1.0".to_string()], - ) + .find_by_purls(tmp.path(), &["pkg:cargo/workspace-crate@0.1.0".to_string()]) .await .unwrap(); assert_eq!(result.len(), 1, "verify must fall back to dir name"); @@ -425,7 +426,10 @@ async fn crawl_all_skips_hidden_dirs() { let result = crawler.crawl_all(&opts).await; let names: Vec<&str> = result.iter().map(|p| p.name.as_str()).collect(); assert!(names.contains(&"real-crate")); - assert!(!names.contains(&"hidden-crate"), "hidden dir must be skipped"); + assert!( + !names.contains(&"hidden-crate"), + "hidden dir must be skipped" + ); } /// `read_crate_cargo_toml` early-returns when the purl has already @@ -453,7 +457,11 @@ async fn crawl_all_dedups_same_purl() { batch_size: 100, }; let result = crawler.crawl_all(&opts).await; - assert_eq!(result.len(), 1, "duplicate purls must dedup; got {result:?}"); + assert_eq!( + result.len(), + 1, + "duplicate purls must dedup; got {result:?}" + ); } /// `get_crate_source_paths` in local mode without a vendor dir but @@ -464,14 +472,19 @@ async fn crawl_all_dedups_same_purl() { #[serial_test::serial] async fn get_crate_source_paths_local_cargo_toml_falls_back_to_registry() { let tmp = tempfile::tempdir().unwrap(); - tokio::fs::write(tmp.path().join("Cargo.toml"), b"[package]\n").await.unwrap(); + tokio::fs::write(tmp.path().join("Cargo.toml"), b"[package]\n") + .await + .unwrap(); // CARGO_HOME points at an empty tempdir → no registry/src to scan. let cargo_home = tempfile::tempdir().unwrap(); let prev = std::env::var("CARGO_HOME").ok(); std::env::set_var("CARGO_HOME", cargo_home.path()); let crawler = CargoCrawler; - let paths = crawler.get_crate_source_paths(&options_at(tmp.path())).await.unwrap(); + let paths = crawler + .get_crate_source_paths(&options_at(tmp.path())) + .await + .unwrap(); if let Some(v) = prev { std::env::set_var("CARGO_HOME", v); @@ -491,7 +504,9 @@ async fn get_crate_source_paths_local_cargo_toml_falls_back_to_registry() { async fn crawl_all_skips_top_level_files() { let tmp = tempfile::tempdir().unwrap(); stage_registry_crate(tmp.path(), "real-crate", "1.0.0").await; - tokio::fs::write(tmp.path().join("README"), b"not a crate").await.unwrap(); + tokio::fs::write(tmp.path().join("README"), b"not a crate") + .await + .unwrap(); let crawler = CargoCrawler; let opts = CrawlerOptions { @@ -515,7 +530,9 @@ async fn crawl_all_skips_crate_with_unparseable_toml_and_no_version_dir_name() { let tmp = tempfile::tempdir().unwrap(); let bad = tmp.path().join("no-version-suffix"); tokio::fs::create_dir(&bad).await.unwrap(); - tokio::fs::write(bad.join("Cargo.toml"), b"this is not valid toml").await.unwrap(); + tokio::fs::write(bad.join("Cargo.toml"), b"this is not valid toml") + .await + .unwrap(); let crawler = CargoCrawler; let opts = CrawlerOptions { @@ -525,7 +542,10 @@ async fn crawl_all_skips_crate_with_unparseable_toml_and_no_version_dir_name() { batch_size: 100, }; let result = crawler.crawl_all(&opts).await; - assert!(result.is_empty(), "unparseable + no-version dir name must be skipped"); + assert!( + result.is_empty(), + "unparseable + no-version dir name must be skipped" + ); } #[path = "common/mod.rs"] @@ -570,7 +590,9 @@ async fn find_by_purls_verify_fails_when_both_parsers_fail() { let tmp = tempfile::tempdir().unwrap(); let bad = tmp.path().join("not-cargo-like-at-all"); tokio::fs::create_dir(&bad).await.unwrap(); - tokio::fs::write(bad.join("Cargo.toml"), b"this is not toml").await.unwrap(); + tokio::fs::write(bad.join("Cargo.toml"), b"this is not toml") + .await + .unwrap(); let crawler = CargoCrawler; // The strict registry dir for `pkg:cargo/foo@1.0.0` is @@ -589,16 +611,25 @@ async fn find_by_purls_verify_fails_when_both_parsers_fail() { #[serial_test::serial] async fn get_crate_source_paths_local_cargo_toml_with_registry_src() { let tmp = tempfile::tempdir().unwrap(); - tokio::fs::write(tmp.path().join("Cargo.toml"), b"[package]\n").await.unwrap(); + tokio::fs::write(tmp.path().join("Cargo.toml"), b"[package]\n") + .await + .unwrap(); let cargo_home = tempfile::tempdir().unwrap(); - let index_dir = cargo_home.path().join("registry").join("src").join("index.crates.io-stub"); + let index_dir = cargo_home + .path() + .join("registry") + .join("src") + .join("index.crates.io-stub"); tokio::fs::create_dir_all(&index_dir).await.unwrap(); let prev = std::env::var("CARGO_HOME").ok(); std::env::set_var("CARGO_HOME", cargo_home.path()); let crawler = CargoCrawler; - let paths = crawler.get_crate_source_paths(&options_at(tmp.path())).await.unwrap(); + let paths = crawler + .get_crate_source_paths(&options_at(tmp.path())) + .await + .unwrap(); if let Some(v) = prev { std::env::set_var("CARGO_HOME", v); diff --git a/crates/socket-patch-core/tests/crawler_composer_e2e.rs b/crates/socket-patch-core/tests/crawler_composer_e2e.rs index f841448b..d694b528 100644 --- a/crates/socket-patch-core/tests/crawler_composer_e2e.rs +++ b/crates/socket-patch-core/tests/crawler_composer_e2e.rs @@ -55,10 +55,14 @@ async fn stage_composer_project(root: &Path, vendor_name: &str, pkg_name: &str, ] }}"# ); - tokio::fs::write(installed_dir.join("installed.json"), installed_json).await.unwrap(); + tokio::fs::write(installed_dir.join("installed.json"), installed_json) + .await + .unwrap(); // composer.json marker on the project root. - tokio::fs::write(root.join("composer.json"), b"{}").await.unwrap(); + tokio::fs::write(root.join("composer.json"), b"{}") + .await + .unwrap(); } // ── find_by_purls ────────────────────────────────────────────── @@ -75,7 +79,10 @@ async fn find_by_purls_finds_package_in_vendor() { .unwrap(); assert_eq!(result.len(), 1); let pkg = result.get(ORG_PURL).unwrap(); - assert_eq!(pkg.path, tmp.path().join("vendor").join("monolog").join("monolog")); + assert_eq!( + pkg.path, + tmp.path().join("vendor").join("monolog").join("monolog") + ); } #[tokio::test] @@ -150,8 +157,12 @@ async fn crawl_all_with_corrupt_installed_json_returns_empty() { let vendor = tmp.path().join("vendor"); let composer = vendor.join("composer"); tokio::fs::create_dir_all(&composer).await.unwrap(); - tokio::fs::write(composer.join("installed.json"), b"{ this is not json").await.unwrap(); - tokio::fs::write(tmp.path().join("composer.json"), b"{}").await.unwrap(); + tokio::fs::write(composer.join("installed.json"), b"{ this is not json") + .await + .unwrap(); + tokio::fs::write(tmp.path().join("composer.json"), b"{}") + .await + .unwrap(); let crawler = ComposerCrawler; let opts = CrawlerOptions { @@ -184,7 +195,10 @@ async fn get_vendor_paths_with_global_prefix_passthrough() { async fn get_vendor_paths_local_no_vendor_returns_empty() { let tmp = tempfile::tempdir().unwrap(); let crawler = ComposerCrawler; - let paths = crawler.get_vendor_paths(&options_at(tmp.path())).await.unwrap(); + let paths = crawler + .get_vendor_paths(&options_at(tmp.path())) + .await + .unwrap(); assert!(paths.is_empty()); } @@ -194,11 +208,19 @@ async fn get_vendor_paths_local_no_installed_json_returns_empty() { let vendor = tmp.path().join("vendor"); tokio::fs::create_dir(&vendor).await.unwrap(); // vendor exists but no installed.json inside. - tokio::fs::write(tmp.path().join("composer.json"), b"{}").await.unwrap(); + tokio::fs::write(tmp.path().join("composer.json"), b"{}") + .await + .unwrap(); let crawler = ComposerCrawler; - let paths = crawler.get_vendor_paths(&options_at(tmp.path())).await.unwrap(); - assert!(paths.is_empty(), "vendor without installed.json must not match"); + let paths = crawler + .get_vendor_paths(&options_at(tmp.path())) + .await + .unwrap(); + assert!( + paths.is_empty(), + "vendor without installed.json must not match" + ); } #[tokio::test] @@ -207,12 +229,20 @@ async fn get_vendor_paths_local_no_composer_marker_returns_empty() { let vendor = tmp.path().join("vendor"); let composer = vendor.join("composer"); tokio::fs::create_dir_all(&composer).await.unwrap(); - tokio::fs::write(composer.join("installed.json"), b"{\"packages\":[]}").await.unwrap(); + tokio::fs::write(composer.join("installed.json"), b"{\"packages\":[]}") + .await + .unwrap(); // No composer.json or composer.lock on the project root. let crawler = ComposerCrawler; - let paths = crawler.get_vendor_paths(&options_at(tmp.path())).await.unwrap(); - assert!(paths.is_empty(), "no composer.json must mean not-a-PHP-project"); + let paths = crawler + .get_vendor_paths(&options_at(tmp.path())) + .await + .unwrap(); + assert!( + paths.is_empty(), + "no composer.json must mean not-a-PHP-project" + ); } #[tokio::test] @@ -221,11 +251,18 @@ async fn get_vendor_paths_local_full_setup_returns_vendor() { let vendor = tmp.path().join("vendor"); let composer = vendor.join("composer"); tokio::fs::create_dir_all(&composer).await.unwrap(); - tokio::fs::write(composer.join("installed.json"), b"{\"packages\":[]}").await.unwrap(); - tokio::fs::write(tmp.path().join("composer.json"), b"{}").await.unwrap(); + tokio::fs::write(composer.join("installed.json"), b"{\"packages\":[]}") + .await + .unwrap(); + tokio::fs::write(tmp.path().join("composer.json"), b"{}") + .await + .unwrap(); let crawler = ComposerCrawler; - let paths = crawler.get_vendor_paths(&options_at(tmp.path())).await.unwrap(); + let paths = crawler + .get_vendor_paths(&options_at(tmp.path())) + .await + .unwrap(); assert_eq!(paths, vec![vendor]); } @@ -411,7 +448,10 @@ async fn get_vendor_paths_global_no_composer_no_home_layout_returns_empty() { std::env::remove_var("PATH"); } - assert!(paths.is_empty(), "no composer source anywhere must yield empty; got {paths:?}"); + assert!( + paths.is_empty(), + "no composer source anywhere must yield empty; got {paths:?}" + ); } #[path = "common/mod.rs"] @@ -432,7 +472,9 @@ async fn find_by_purls_handles_unreadable_installed_json() { let composer = vendor.join("composer"); tokio::fs::create_dir_all(&composer).await.unwrap(); let installed = composer.join("installed.json"); - tokio::fs::write(&installed, r#"{"packages":[]}"#).await.unwrap(); + tokio::fs::write(&installed, r#"{"packages":[]}"#) + .await + .unwrap(); common::chmod_unreadable(&installed); let crawler = ComposerCrawler; @@ -442,7 +484,10 @@ async fn find_by_purls_handles_unreadable_installed_json() { .unwrap(); common::chmod_readable(&installed); - assert!(result.is_empty(), "unreadable installed.json must yield empty"); + assert!( + result.is_empty(), + "unreadable installed.json must yield empty" + ); } /// `crawl_all` should dedup packages discovered across multiple @@ -457,8 +502,12 @@ async fn crawl_all_dedups_across_vendor_paths() { let pkg_dir = custom_vendor.join("monolog").join("monolog"); tokio::fs::create_dir_all(&pkg_dir).await.unwrap(); let installed = r#"{"packages":[{"name":"monolog/monolog","version":"3.5.0"},{"name":"monolog/monolog","version":"3.5.0"}]}"#; - tokio::fs::write(composer_dir.join("installed.json"), installed).await.unwrap(); - tokio::fs::write(tmp.path().join("composer.json"), b"{}").await.unwrap(); + tokio::fs::write(composer_dir.join("installed.json"), installed) + .await + .unwrap(); + tokio::fs::write(tmp.path().join("composer.json"), b"{}") + .await + .unwrap(); let crawler = ComposerCrawler; let opts = CrawlerOptions { @@ -468,7 +517,11 @@ async fn crawl_all_dedups_across_vendor_paths() { batch_size: 100, }; let result = crawler.crawl_all(&opts).await; - assert_eq!(result.len(), 1, "duplicates inside installed.json must dedup"); + assert_eq!( + result.len(), + 1, + "duplicates inside installed.json must dedup" + ); } #[tokio::test] @@ -477,10 +530,17 @@ async fn get_vendor_paths_local_with_lock_marker_also_works() { let vendor = tmp.path().join("vendor"); let composer = vendor.join("composer"); tokio::fs::create_dir_all(&composer).await.unwrap(); - tokio::fs::write(composer.join("installed.json"), b"{\"packages\":[]}").await.unwrap(); - tokio::fs::write(tmp.path().join("composer.lock"), b"{}").await.unwrap(); + tokio::fs::write(composer.join("installed.json"), b"{\"packages\":[]}") + .await + .unwrap(); + tokio::fs::write(tmp.path().join("composer.lock"), b"{}") + .await + .unwrap(); let crawler = ComposerCrawler; - let paths = crawler.get_vendor_paths(&options_at(tmp.path())).await.unwrap(); + let paths = crawler + .get_vendor_paths(&options_at(tmp.path())) + .await + .unwrap(); assert_eq!(paths, vec![vendor]); } diff --git a/crates/socket-patch-core/tests/crawler_deno_e2e.rs b/crates/socket-patch-core/tests/crawler_deno_e2e.rs index a28c400e..da741a70 100644 --- a/crates/socket-patch-core/tests/crawler_deno_e2e.rs +++ b/crates/socket-patch-core/tests/crawler_deno_e2e.rs @@ -22,15 +22,12 @@ fn options_at(root: &Path) -> CrawlerOptions { } /// Stage a JSR package: `////mod.ts`. -async fn stage_jsr_pkg( - root: &Path, - scope: &str, - name: &str, - version: &str, -) -> std::path::PathBuf { +async fn stage_jsr_pkg(root: &Path, scope: &str, name: &str, version: &str) -> std::path::PathBuf { let pkg = root.join(scope).join(name).join(version); tokio::fs::create_dir_all(&pkg).await.unwrap(); - tokio::fs::write(pkg.join("mod.ts"), b"export default 1;").await.unwrap(); + tokio::fs::write(pkg.join("mod.ts"), b"export default 1;") + .await + .unwrap(); pkg } @@ -70,13 +67,13 @@ async fn find_by_purls_non_jsr_purl_skipped() { let tmp = tempfile::tempdir().unwrap(); let crawler = DenoCrawler; let result = crawler - .find_by_purls( - tmp.path(), - &["pkg:npm/lodash@4.17.21".to_string()], - ) + .find_by_purls(tmp.path(), &["pkg:npm/lodash@4.17.21".to_string()]) .await .unwrap(); - assert!(result.is_empty(), "non-jsr PURLs must be ignored by DenoCrawler"); + assert!( + result.is_empty(), + "non-jsr PURLs must be ignored by DenoCrawler" + ); } // ── crawl_all ───────────────────────────────────────────────── @@ -123,7 +120,10 @@ async fn crawl_all_skips_dirs_not_starting_with_at() { let result = crawler.crawl_all(&opts).await; let names: Vec<&str> = result.iter().map(|p| p.name.as_str()).collect(); assert!(names.contains(&"path")); - assert!(!names.contains(&"foo"), "non-`@`-prefixed dir must be skipped"); + assert!( + !names.contains(&"foo"), + "non-`@`-prefixed dir must be skipped" + ); } // ── get_jsr_cache_paths ──────────────────────────────────────── @@ -176,7 +176,10 @@ async fn get_jsr_cache_paths_local_no_marker_returns_empty() { let tmp = tempfile::tempdir().unwrap(); // No deno.json / .jsonc / .lock — not a Deno project. let crawler = DenoCrawler; - let paths = crawler.get_jsr_cache_paths(&options_at(tmp.path())).await.unwrap(); + let paths = crawler + .get_jsr_cache_paths(&options_at(tmp.path())) + .await + .unwrap(); assert!(paths.is_empty()); } @@ -185,7 +188,9 @@ async fn get_jsr_cache_paths_local_no_marker_returns_empty() { async fn get_jsr_cache_paths_local_with_deno_json_falls_back_to_cache() { let project = tempfile::tempdir().unwrap(); let deno_home = tempfile::tempdir().unwrap(); - tokio::fs::write(project.path().join("deno.json"), b"{}").await.unwrap(); + tokio::fs::write(project.path().join("deno.json"), b"{}") + .await + .unwrap(); let jsr = deno_home.path().join("npm").join("jsr.io"); tokio::fs::create_dir_all(&jsr).await.unwrap(); @@ -193,7 +198,10 @@ async fn get_jsr_cache_paths_local_with_deno_json_falls_back_to_cache() { std::env::set_var("DENO_DIR", deno_home.path()); let crawler = DenoCrawler; - let paths = crawler.get_jsr_cache_paths(&options_at(project.path())).await.unwrap(); + let paths = crawler + .get_jsr_cache_paths(&options_at(project.path())) + .await + .unwrap(); if let Some(v) = prev { std::env::set_var("DENO_DIR", v); diff --git a/crates/socket-patch-core/tests/crawler_go_e2e.rs b/crates/socket-patch-core/tests/crawler_go_e2e.rs index 455f747e..2268f501 100644 --- a/crates/socket-patch-core/tests/crawler_go_e2e.rs +++ b/crates/socket-patch-core/tests/crawler_go_e2e.rs @@ -114,10 +114,7 @@ async fn find_by_purls_invalid_purl_skipped() { let tmp = tempfile::tempdir().unwrap(); let crawler = GoCrawler; let result = crawler - .find_by_purls( - tmp.path(), - &["pkg:not-golang/foo@1.0".to_string()], - ) + .find_by_purls(tmp.path(), &["pkg:not-golang/foo@1.0".to_string()]) .await .unwrap(); assert!(result.is_empty()); @@ -146,7 +143,10 @@ async fn get_module_cache_paths_local_no_go_mod_returns_empty() { let crawler = GoCrawler; let prev_cache = std::env::var("GOMODCACHE").ok(); std::env::remove_var("GOMODCACHE"); - let paths = crawler.get_module_cache_paths(&options_at(tmp.path())).await.unwrap(); + let paths = crawler + .get_module_cache_paths(&options_at(tmp.path())) + .await + .unwrap(); if let Some(v) = prev_cache { std::env::set_var("GOMODCACHE", v); } @@ -157,15 +157,21 @@ async fn get_module_cache_paths_local_no_go_mod_returns_empty() { #[serial] async fn get_module_cache_paths_with_go_mod_returns_cache() { let tmp = tempfile::tempdir().unwrap(); - tokio::fs::write(tmp.path().join("go.mod"), b"module example.com/test\n\ngo 1.21\n") - .await - .unwrap(); + tokio::fs::write( + tmp.path().join("go.mod"), + b"module example.com/test\n\ngo 1.21\n", + ) + .await + .unwrap(); let cache = tempfile::tempdir().unwrap(); let prev = std::env::var("GOMODCACHE").ok(); std::env::set_var("GOMODCACHE", cache.path()); let crawler = GoCrawler; - let paths = crawler.get_module_cache_paths(&options_at(tmp.path())).await.unwrap(); + let paths = crawler + .get_module_cache_paths(&options_at(tmp.path())) + .await + .unwrap(); std::env::remove_var("GOMODCACHE"); if let Some(v) = prev { @@ -263,7 +269,11 @@ async fn find_by_purls_module_dir_missing_returns_empty() { async fn crawl_all_finds_nested_versioned_module() { let tmp = tempfile::tempdir().unwrap(); // Stage /github.com/gin-gonic/gin@v1.9.1/ - let module_dir = tmp.path().join("github.com").join("gin-gonic").join("gin@v1.9.1"); + let module_dir = tmp + .path() + .join("github.com") + .join("gin-gonic") + .join("gin@v1.9.1"); tokio::fs::create_dir_all(&module_dir).await.unwrap(); let crawler = GoCrawler; @@ -287,7 +297,9 @@ async fn crawl_all_finds_nested_versioned_module() { async fn crawl_all_skips_cache_metadata_dir() { let tmp = tempfile::tempdir().unwrap(); let cache_meta = tmp.path().join("cache"); - tokio::fs::create_dir_all(cache_meta.join("download").join("module@v1.0.0")).await.unwrap(); + tokio::fs::create_dir_all(cache_meta.join("download").join("module@v1.0.0")) + .await + .unwrap(); let crawler = GoCrawler; let opts = CrawlerOptions { @@ -297,7 +309,10 @@ async fn crawl_all_skips_cache_metadata_dir() { batch_size: 100, }; let result = crawler.crawl_all(&opts).await; - assert!(result.is_empty(), "cache/ subtree must be skipped; got {result:?}"); + assert!( + result.is_empty(), + "cache/ subtree must be skipped; got {result:?}" + ); } /// With GOMODCACHE and GOPATH both unset, `get_gomodcache` falls @@ -306,9 +321,12 @@ async fn crawl_all_skips_cache_metadata_dir() { #[serial] async fn get_module_cache_paths_home_go_pkg_mod_fallback() { let tmp = tempfile::tempdir().unwrap(); - tokio::fs::write(tmp.path().join("go.mod"), b"module example.com/test\n\ngo 1.21\n") - .await - .unwrap(); + tokio::fs::write( + tmp.path().join("go.mod"), + b"module example.com/test\n\ngo 1.21\n", + ) + .await + .unwrap(); let prev_gomod = std::env::var("GOMODCACHE").ok(); let prev_gopath = std::env::var("GOPATH").ok(); let prev_home = std::env::var("HOME").ok(); @@ -317,7 +335,10 @@ async fn get_module_cache_paths_home_go_pkg_mod_fallback() { std::env::set_var("HOME", tmp.path()); let crawler = GoCrawler; - let paths = crawler.get_module_cache_paths(&options_at(tmp.path())).await.unwrap(); + let paths = crawler + .get_module_cache_paths(&options_at(tmp.path())) + .await + .unwrap(); if let Some(v) = prev_gomod { std::env::set_var("GOMODCACHE", v); @@ -342,9 +363,12 @@ async fn get_module_cache_paths_home_go_pkg_mod_fallback() { #[serial] async fn get_module_cache_paths_gopath_fallback_when_gomodcache_unset() { let tmp = tempfile::tempdir().unwrap(); - tokio::fs::write(tmp.path().join("go.mod"), b"module example.com/test\n\ngo 1.21\n") - .await - .unwrap(); + tokio::fs::write( + tmp.path().join("go.mod"), + b"module example.com/test\n\ngo 1.21\n", + ) + .await + .unwrap(); let gopath = tempfile::tempdir().unwrap(); let expected = gopath.path().join("pkg").join("mod"); let prev_gomod = std::env::var("GOMODCACHE").ok(); @@ -353,7 +377,10 @@ async fn get_module_cache_paths_gopath_fallback_when_gomodcache_unset() { std::env::set_var("GOPATH", gopath.path()); let crawler = GoCrawler; - let paths = crawler.get_module_cache_paths(&options_at(tmp.path())).await.unwrap(); + let paths = crawler + .get_module_cache_paths(&options_at(tmp.path())) + .await + .unwrap(); std::env::remove_var("GOPATH"); if let Some(v) = prev_gomod { diff --git a/crates/socket-patch-core/tests/crawler_maven_e2e.rs b/crates/socket-patch-core/tests/crawler_maven_e2e.rs index 1da605ac..28f4abb6 100644 --- a/crates/socket-patch-core/tests/crawler_maven_e2e.rs +++ b/crates/socket-patch-core/tests/crawler_maven_e2e.rs @@ -8,9 +8,9 @@ use std::path::Path; use serial_test::serial; +use socket_patch_core::crawlers::maven_crawler::parse_pom_group_artifact_version; use socket_patch_core::crawlers::types::CrawlerOptions; use socket_patch_core::crawlers::MavenCrawler; -use socket_patch_core::crawlers::maven_crawler::parse_pom_group_artifact_version; fn options_at(root: &Path) -> CrawlerOptions { CrawlerOptions { @@ -23,7 +23,12 @@ fn options_at(root: &Path) -> CrawlerOptions { /// Stage a maven m2-layout package: //// /// with a minimal pom.xml. -async fn stage_maven_pkg(repo: &Path, group: &str, artifact: &str, version: &str) -> std::path::PathBuf { +async fn stage_maven_pkg( + repo: &Path, + group: &str, + artifact: &str, + version: &str, +) -> std::path::PathBuf { let group_path = group.replace('.', "/"); let pkg_dir = repo.join(group_path).join(artifact).join(version); tokio::fs::create_dir_all(&pkg_dir).await.unwrap(); @@ -36,7 +41,9 @@ async fn stage_maven_pkg(repo: &Path, group: &str, artifact: &str, version: &str {version} "# ); - tokio::fs::write(pkg_dir.join(format!("{artifact}-{version}.pom")), pom).await.unwrap(); + tokio::fs::write(pkg_dir.join(format!("{artifact}-{version}.pom")), pom) + .await + .unwrap(); pkg_dir } @@ -203,7 +210,9 @@ async fn get_maven_repo_paths_home_dot_m2_fallback() { let tmp = tempfile::tempdir().unwrap(); let m2 = tmp.path().join(".m2").join("repository"); tokio::fs::create_dir_all(&m2).await.unwrap(); - tokio::fs::write(tmp.path().join("pom.xml"), b"").await.unwrap(); + tokio::fs::write(tmp.path().join("pom.xml"), b"") + .await + .unwrap(); let prev_local = std::env::var("MAVEN_REPO_LOCAL").ok(); let prev_m2 = std::env::var("M2_HOME").ok(); @@ -213,7 +222,10 @@ async fn get_maven_repo_paths_home_dot_m2_fallback() { std::env::set_var("HOME", tmp.path()); let crawler = MavenCrawler; - let paths = crawler.get_maven_repo_paths(&options_at(tmp.path())).await.unwrap(); + let paths = crawler + .get_maven_repo_paths(&options_at(tmp.path())) + .await + .unwrap(); if let Some(v) = prev_local { std::env::set_var("MAVEN_REPO_LOCAL", v); @@ -298,7 +310,10 @@ async fn get_maven_repo_paths_global_mode_no_m2_returns_empty() { std::env::remove_var("HOME"); } - assert!(paths.is_empty(), "no m2 anywhere must yield empty; got {paths:?}"); + assert!( + paths.is_empty(), + "no m2 anywhere must yield empty; got {paths:?}" + ); } /// `find_by_purls` for a version directory that contains a non-`.pom` @@ -308,10 +323,16 @@ async fn get_maven_repo_paths_global_mode_no_m2_returns_empty() { async fn find_by_purls_version_dir_without_pom_returns_empty() { let tmp = tempfile::tempdir().unwrap(); let group_path = "org/apache/commons"; - let pkg_dir = tmp.path().join(group_path).join("commons-lang3").join("3.12.0"); + let pkg_dir = tmp + .path() + .join(group_path) + .join("commons-lang3") + .join("3.12.0"); tokio::fs::create_dir_all(&pkg_dir).await.unwrap(); // Put a non-.pom file in there — has_pom_file must reject. - tokio::fs::write(pkg_dir.join("commons-lang3-3.12.0.jar"), b"fake jar").await.unwrap(); + tokio::fs::write(pkg_dir.join("commons-lang3-3.12.0.jar"), b"fake jar") + .await + .unwrap(); let crawler = MavenCrawler; let result = crawler @@ -377,10 +398,7 @@ async fn find_by_purls_invalid_purl_skipped() { let tmp = tempfile::tempdir().unwrap(); let crawler = MavenCrawler; let result = crawler - .find_by_purls( - tmp.path(), - &["pkg:not-maven/foo@1.0".to_string()], - ) + .find_by_purls(tmp.path(), &["pkg:not-maven/foo@1.0".to_string()]) .await .unwrap(); assert!(result.is_empty()); @@ -402,7 +420,10 @@ async fn crawl_all_discovers_packages_in_repo() { batch_size: 100, }; let result = crawler.crawl_all(&opts).await; - assert!(result.len() >= 2, "must discover both packages; got {result:?}"); + assert!( + result.len() >= 2, + "must discover both packages; got {result:?}" + ); } #[tokio::test] @@ -441,7 +462,10 @@ async fn get_maven_repo_paths_no_marker_returns_empty() { let tmp = tempfile::tempdir().unwrap(); // No pom.xml, no build.gradle — not a Java project. let crawler = MavenCrawler; - let paths = crawler.get_maven_repo_paths(&options_at(tmp.path())).await.unwrap(); + let paths = crawler + .get_maven_repo_paths(&options_at(tmp.path())) + .await + .unwrap(); assert!(paths.is_empty(), "non-Java dir must return empty paths"); } @@ -449,13 +473,18 @@ async fn get_maven_repo_paths_no_marker_returns_empty() { #[serial] async fn get_maven_repo_paths_with_pom_xml_returns_repo() { let tmp = tempfile::tempdir().unwrap(); - tokio::fs::write(tmp.path().join("pom.xml"), b"").await.unwrap(); + tokio::fs::write(tmp.path().join("pom.xml"), b"") + .await + .unwrap(); let repo = tempfile::tempdir().unwrap(); let prev = std::env::var("MAVEN_REPO_LOCAL").ok(); std::env::set_var("MAVEN_REPO_LOCAL", repo.path()); let crawler = MavenCrawler; - let paths = crawler.get_maven_repo_paths(&options_at(tmp.path())).await.unwrap(); + let paths = crawler + .get_maven_repo_paths(&options_at(tmp.path())) + .await + .unwrap(); std::env::remove_var("MAVEN_REPO_LOCAL"); if let Some(v) = prev { @@ -469,13 +498,18 @@ async fn get_maven_repo_paths_with_pom_xml_returns_repo() { #[serial] async fn get_maven_repo_paths_with_build_gradle_returns_repo() { let tmp = tempfile::tempdir().unwrap(); - tokio::fs::write(tmp.path().join("build.gradle"), b"plugins {}").await.unwrap(); + tokio::fs::write(tmp.path().join("build.gradle"), b"plugins {}") + .await + .unwrap(); let repo = tempfile::tempdir().unwrap(); let prev = std::env::var("MAVEN_REPO_LOCAL").ok(); std::env::set_var("MAVEN_REPO_LOCAL", repo.path()); let crawler = MavenCrawler; - let paths = crawler.get_maven_repo_paths(&options_at(tmp.path())).await.unwrap(); + let paths = crawler + .get_maven_repo_paths(&options_at(tmp.path())) + .await + .unwrap(); std::env::remove_var("MAVEN_REPO_LOCAL"); if let Some(v) = prev { @@ -489,13 +523,18 @@ async fn get_maven_repo_paths_with_build_gradle_returns_repo() { #[serial] async fn get_maven_repo_paths_with_build_gradle_kts_returns_repo() { let tmp = tempfile::tempdir().unwrap(); - tokio::fs::write(tmp.path().join("build.gradle.kts"), b"plugins {}").await.unwrap(); + tokio::fs::write(tmp.path().join("build.gradle.kts"), b"plugins {}") + .await + .unwrap(); let repo = tempfile::tempdir().unwrap(); let prev = std::env::var("MAVEN_REPO_LOCAL").ok(); std::env::set_var("MAVEN_REPO_LOCAL", repo.path()); let crawler = MavenCrawler; - let paths = crawler.get_maven_repo_paths(&options_at(tmp.path())).await.unwrap(); + let paths = crawler + .get_maven_repo_paths(&options_at(tmp.path())) + .await + .unwrap(); std::env::remove_var("MAVEN_REPO_LOCAL"); if let Some(v) = prev { @@ -509,7 +548,9 @@ async fn get_maven_repo_paths_with_build_gradle_kts_returns_repo() { #[serial] async fn get_maven_repo_paths_m2_home_fallback() { let tmp = tempfile::tempdir().unwrap(); - tokio::fs::write(tmp.path().join("pom.xml"), b"").await.unwrap(); + tokio::fs::write(tmp.path().join("pom.xml"), b"") + .await + .unwrap(); let m2_home = tempfile::tempdir().unwrap(); let repo_dir = m2_home.path().join("repository"); tokio::fs::create_dir(&repo_dir).await.unwrap(); @@ -519,7 +560,10 @@ async fn get_maven_repo_paths_m2_home_fallback() { std::env::set_var("M2_HOME", m2_home.path()); let crawler = MavenCrawler; - let paths = crawler.get_maven_repo_paths(&options_at(tmp.path())).await.unwrap(); + let paths = crawler + .get_maven_repo_paths(&options_at(tmp.path())) + .await + .unwrap(); std::env::remove_var("M2_HOME"); if let Some(v) = prev_maven_repo { diff --git a/crates/socket-patch-core/tests/crawler_npm_e2e.rs b/crates/socket-patch-core/tests/crawler_npm_e2e.rs index 9474fd63..057ac57b 100644 --- a/crates/socket-patch-core/tests/crawler_npm_e2e.rs +++ b/crates/socket-patch-core/tests/crawler_npm_e2e.rs @@ -30,7 +30,9 @@ async fn stage_npm_pkg(node_modules: &Path, name: &str, version: &str) { let pkg_dir = node_modules.join(name); tokio::fs::create_dir_all(&pkg_dir).await.unwrap(); let pkg_json = format!(r#"{{"name":"{name}","version":"{version}"}}"#); - tokio::fs::write(pkg_dir.join("package.json"), pkg_json).await.unwrap(); + tokio::fs::write(pkg_dir.join("package.json"), pkg_json) + .await + .unwrap(); } // ── parse_package_name ───────────────────────────────────────── @@ -77,13 +79,12 @@ fn build_npm_purl_scoped() { async fn read_package_json_well_formed() { let tmp = tempfile::tempdir().unwrap(); let pkg = tmp.path().join("package.json"); - tokio::fs::write(&pkg, r#"{"name":"lodash","version":"4.17.21"}"#).await.unwrap(); + tokio::fs::write(&pkg, r#"{"name":"lodash","version":"4.17.21"}"#) + .await + .unwrap(); let result = read_package_json(&pkg).await; - assert_eq!( - result, - Some(("lodash".to_string(), "4.17.21".to_string())) - ); + assert_eq!(result, Some(("lodash".to_string(), "4.17.21".to_string()))); } #[tokio::test] @@ -107,7 +108,9 @@ async fn read_package_json_malformed_returns_none() { async fn read_package_json_missing_name_returns_none() { let tmp = tempfile::tempdir().unwrap(); let pkg = tmp.path().join("package.json"); - tokio::fs::write(&pkg, r#"{"version":"1.0.0"}"#).await.unwrap(); + tokio::fs::write(&pkg, r#"{"version":"1.0.0"}"#) + .await + .unwrap(); let result = read_package_json(&pkg).await; assert_eq!(result, None); @@ -117,7 +120,9 @@ async fn read_package_json_missing_name_returns_none() { async fn read_package_json_missing_version_returns_none() { let tmp = tempfile::tempdir().unwrap(); let pkg = tmp.path().join("package.json"); - tokio::fs::write(&pkg, r#"{"name":"lodash"}"#).await.unwrap(); + tokio::fs::write(&pkg, r#"{"name":"lodash"}"#) + .await + .unwrap(); let result = read_package_json(&pkg).await; assert_eq!(result, None); @@ -129,7 +134,9 @@ async fn read_package_json_missing_version_returns_none() { async fn read_package_json_empty_name_returns_none() { let tmp = tempfile::tempdir().unwrap(); let pkg = tmp.path().join("package.json"); - tokio::fs::write(&pkg, r#"{"name":"","version":"1.0.0"}"#).await.unwrap(); + tokio::fs::write(&pkg, r#"{"name":"","version":"1.0.0"}"#) + .await + .unwrap(); assert_eq!(read_package_json(&pkg).await, None); } @@ -137,7 +144,9 @@ async fn read_package_json_empty_name_returns_none() { async fn read_package_json_empty_version_returns_none() { let tmp = tempfile::tempdir().unwrap(); let pkg = tmp.path().join("package.json"); - tokio::fs::write(&pkg, r#"{"name":"lodash","version":""}"#).await.unwrap(); + tokio::fs::write(&pkg, r#"{"name":"lodash","version":""}"#) + .await + .unwrap(); assert_eq!(read_package_json(&pkg).await, None); } @@ -246,7 +255,10 @@ fn with_empty_path(f: F) { fn get_npm_global_prefix_returns_err_when_npm_not_on_path() { with_empty_path(|| { let result = get_npm_global_prefix(); - assert!(result.is_err(), "npm-not-on-PATH must return Err; got {result:?}"); + assert!( + result.is_err(), + "npm-not-on-PATH must return Err; got {result:?}" + ); }); } @@ -293,8 +305,7 @@ fn get_npm_global_prefix_with_mock_runner_returns_path() { #[test] fn get_npm_global_prefix_with_mock_runner_empty_stdout_returns_err() { - let runner = - common::MockCommandRunner::new().with_response("npm", &["root", "-g"], Some("")); + let runner = common::MockCommandRunner::new().with_response("npm", &["root", "-g"], Some("")); assert!(get_npm_global_prefix_with(&runner).is_err()); } @@ -303,8 +314,11 @@ fn get_npm_global_prefix_with_mock_runner_empty_stdout_returns_err() { #[cfg(unix)] #[test] fn get_yarn_global_prefix_with_mock_runner_success() { - let runner = - common::MockCommandRunner::new().with_response("yarn", &["global", "dir"], Some("/Users/foo/.yarn/global\n")); + let runner = common::MockCommandRunner::new().with_response( + "yarn", + &["global", "dir"], + Some("/Users/foo/.yarn/global\n"), + ); assert_eq!( get_yarn_global_prefix_with(&runner).as_deref(), Some("/Users/foo/.yarn/global/node_modules") @@ -449,10 +463,7 @@ async fn find_by_purls_strips_qualifiers() { let crawler = NpmCrawler; let result = crawler - .find_by_purls( - &nm, - &["pkg:npm/lodash@4.17.21?extension=tgz".to_string()], - ) + .find_by_purls(&nm, &["pkg:npm/lodash@4.17.21?extension=tgz".to_string()]) .await .unwrap(); // Note: result key uses the original purl, but lookup back uses @@ -462,7 +473,10 @@ async fn find_by_purls_strips_qualifiers() { // `pkg:npm/lodash@4.17.21` which doesn't match the qualified // input — so the result is empty. The important coverage is that // parse_purl_components successfully strips the qualifier. - assert!(result.is_empty(), "qualifier strip + synth mismatch must yield empty"); + assert!( + result.is_empty(), + "qualifier strip + synth mismatch must yield empty" + ); } /// PURL with no `@` (no version separator) must be rejected via the @@ -526,10 +540,7 @@ async fn find_by_purls_invalid_purl_skipped() { let tmp = tempfile::tempdir().unwrap(); let crawler = NpmCrawler; let result = crawler - .find_by_purls( - tmp.path(), - &["pkg:not-npm/foo@1.0".to_string()], - ) + .find_by_purls(tmp.path(), &["pkg:not-npm/foo@1.0".to_string()]) .await .unwrap(); assert!(result.is_empty()); @@ -556,7 +567,9 @@ async fn crawl_all_discovers_unscoped_and_scoped() { async fn crawl_all_skips_dirs_without_package_json() { let tmp = tempfile::tempdir().unwrap(); let nm = tmp.path().join("node_modules"); - tokio::fs::create_dir_all(nm.join("not_a_pkg")).await.unwrap(); + tokio::fs::create_dir_all(nm.join("not_a_pkg")) + .await + .unwrap(); // No package.json — must be skipped. let crawler = NpmCrawler; @@ -589,18 +602,39 @@ async fn crawl_all_recurses_into_workspace_packages() { async fn crawl_all_skips_hidden_and_skip_dirs() { let tmp = tempfile::tempdir().unwrap(); // Hidden dirs and SKIP_DIRS entries (dist/build/coverage/tmp/...) are skipped. - stage_npm_pkg(&tmp.path().join(".hidden").join("node_modules"), "should-not-find", "1.0").await; - stage_npm_pkg(&tmp.path().join("dist").join("node_modules"), "also-not", "1.0").await; + stage_npm_pkg( + &tmp.path().join(".hidden").join("node_modules"), + "should-not-find", + "1.0", + ) + .await; + stage_npm_pkg( + &tmp.path().join("dist").join("node_modules"), + "also-not", + "1.0", + ) + .await; // But a real workspace dir should be picked up. - stage_npm_pkg(&tmp.path().join("real-ws").join("node_modules"), "found-me", "1.0").await; + stage_npm_pkg( + &tmp.path().join("real-ws").join("node_modules"), + "found-me", + "1.0", + ) + .await; let crawler = NpmCrawler; let opts = options_at(tmp.path()); let result = crawler.crawl_all(&opts).await; let names: Vec<&str> = result.iter().map(|p| p.name.as_str()).collect(); assert!(names.contains(&"found-me")); - assert!(!names.contains(&"should-not-find"), "hidden dir must be skipped"); - assert!(!names.contains(&"also-not"), "SKIP_DIRS dir must be skipped"); + assert!( + !names.contains(&"should-not-find"), + "hidden dir must be skipped" + ); + assert!( + !names.contains(&"also-not"), + "SKIP_DIRS dir must be skipped" + ); } #[path = "common/mod.rs"] @@ -624,7 +658,10 @@ async fn crawl_all_handles_unreadable_node_modules() { let result = crawler.crawl_all(&opts).await; common::chmod_readable(&nm); - assert!(result.is_empty(), "unreadable node_modules must yield empty"); + assert!( + result.is_empty(), + "unreadable node_modules must yield empty" + ); } /// `find_workspace_node_modules` short-circuits cleanly when it @@ -640,7 +677,12 @@ async fn crawl_all_handles_unreadable_workspace_dir() { } let tmp = tempfile::tempdir().unwrap(); // Readable workspace. - stage_npm_pkg(&tmp.path().join("readable").join("node_modules"), "ok", "1.0.0").await; + stage_npm_pkg( + &tmp.path().join("readable").join("node_modules"), + "ok", + "1.0.0", + ) + .await; // Unreadable workspace. let blocked = tmp.path().join("blocked"); tokio::fs::create_dir(&blocked).await.unwrap(); @@ -654,7 +696,10 @@ async fn crawl_all_handles_unreadable_workspace_dir() { let names: Vec<&str> = result.iter().map(|p| p.name.as_str()).collect(); assert!(names.contains(&"ok")); - assert!(!names.contains(&"hidden"), "unreadable workspace must be skipped"); + assert!( + !names.contains(&"hidden"), + "unreadable workspace must be skipped" + ); } /// Drives scoped-package scanning + nested node_modules recursion + @@ -683,13 +728,19 @@ async fn crawl_all_handles_nested_and_messy_scope_dir() { .await; // Hidden subdir inside @scope — must be skipped (L581-583). - tokio::fs::create_dir_all(nm.join("@scope").join(".hidden")).await.unwrap(); + tokio::fs::create_dir_all(nm.join("@scope").join(".hidden")) + .await + .unwrap(); // A plain file inside @scope — must be skipped via the !is_dir && // !is_symlink arm (L590-591). - tokio::fs::write(nm.join("@scope").join("README.md"), b"x").await.unwrap(); + tokio::fs::write(nm.join("@scope").join("README.md"), b"x") + .await + .unwrap(); // A plain file at top of node_modules too — exercises the same arm // in scan_node_modules. - tokio::fs::write(nm.join("top-level-file.txt"), b"y").await.unwrap(); + tokio::fs::write(nm.join("top-level-file.txt"), b"y") + .await + .unwrap(); // Nested node_modules with a scoped subentry — drives the L650-653 arm // (nested → scan_scoped_packages). @@ -717,10 +768,60 @@ async fn crawl_all_skips_dirs_with_corrupt_package_json() { let nm = tmp.path().join("node_modules"); let bad = nm.join("broken"); tokio::fs::create_dir_all(&bad).await.unwrap(); - tokio::fs::write(bad.join("package.json"), b"{ corrupt").await.unwrap(); + tokio::fs::write(bad.join("package.json"), b"{ corrupt") + .await + .unwrap(); let crawler = NpmCrawler; let opts = options_at(tmp.path()); let result = crawler.crawl_all(&opts).await; assert!(result.is_empty()); } + +/// Regression: a symlinked package inside a nested `node_modules` (the +/// shape pnpm and `npm link` produce — top-level entries are symlinks +/// into a content-addressed store) must itself be recorded, but the +/// crawler must NOT recurse *through* the symlink into the store. Doing +/// so would surface store-internal packages that aren't part of the +/// project's dependency tree and could escape the project root +/// entirely. `scan_nested_node_modules` guards its deeper recursion with +/// `if file_type.is_dir()`, matching its sibling scanners; this pins +/// that behavior. +#[cfg(unix)] +#[tokio::test] +async fn crawl_all_does_not_recurse_through_symlinked_nested_package() { + use std::os::unix::fs::symlink; + + // The "store" lives OUTSIDE the crawled cwd, so the only route to it + // is through the symlink — not via workspace discovery. + let store = tempfile::tempdir().unwrap(); + let linked_pkg = store.path().join("linked-pkg"); + stage_npm_pkg(store.path(), "linked-pkg", "2.0.0").await; + // The store package has its own nested node_modules with a package + // that must only be reachable by following the symlink. + stage_npm_pkg(&linked_pkg.join("node_modules"), "buried", "3.0.0").await; + + let tmp = tempfile::tempdir().unwrap(); + let nm = tmp.path().join("node_modules"); + // A real host package with a real nested node_modules... + stage_npm_pkg(&nm, "host", "1.0.0").await; + let host_nm = nm.join("host").join("node_modules"); + tokio::fs::create_dir_all(&host_nm).await.unwrap(); + // ...containing a SYMLINK to the out-of-tree store package. + symlink(&linked_pkg, host_nm.join("linked-pkg")).unwrap(); + + let crawler = NpmCrawler; + let opts = options_at(tmp.path()); + let result = crawler.crawl_all(&opts).await; + let names: Vec<&str> = result.iter().map(|p| p.name.as_str()).collect(); + + assert!(names.contains(&"host"), "real host package must be found"); + assert!( + names.contains(&"linked-pkg"), + "the symlinked package itself must still be recorded" + ); + assert!( + !names.contains(&"buried"), + "crawler must not recurse through the symlink into the store" + ); +} diff --git a/crates/socket-patch-core/tests/crawler_nuget_e2e.rs b/crates/socket-patch-core/tests/crawler_nuget_e2e.rs index 95e18316..deb28910 100644 --- a/crates/socket-patch-core/tests/crawler_nuget_e2e.rs +++ b/crates/socket-patch-core/tests/crawler_nuget_e2e.rs @@ -46,7 +46,9 @@ async fn stage_global_cache_pkg(root: &Path, name: &str, version: &str) -> std:: /// `packages.config` projects. async fn stage_legacy_pkg(root: &Path, name: &str, version: &str) -> std::path::PathBuf { let pkg_dir = root.join(format!("{name}.{version}")); - tokio::fs::create_dir_all(pkg_dir.join("lib")).await.unwrap(); + tokio::fs::create_dir_all(pkg_dir.join("lib")) + .await + .unwrap(); tokio::fs::write( pkg_dir.join(format!("{name}.nuspec")), format!( @@ -109,10 +111,18 @@ async fn find_by_purls_case_insensitive_legacy_layout() { .find_by_purls(tmp.path(), &[ORG_PURL_A.to_string()]) .await .unwrap(); - assert_eq!(result.len(), 1, "package must be found via either fast or case-insensitive path"); + assert_eq!( + result.len(), + 1, + "package must be found via either fast or case-insensitive path" + ); let found = result.get(ORG_PURL_A).unwrap(); // Either casing is acceptable; the contract is "matched something". - assert!(found.path.exists(), "returned path must exist; got {:?}", found.path); + assert!( + found.path.exists(), + "returned path must exist; got {:?}", + found.path + ); } #[tokio::test] @@ -133,10 +143,7 @@ async fn find_by_purls_invalid_purl_skipped() { stage_global_cache_pkg(tmp.path(), "Newtonsoft.Json", "13.0.3").await; let crawler = NuGetCrawler; let result = crawler - .find_by_purls( - tmp.path(), - &["pkg:not-nuget/Foo@1.0".to_string()], - ) + .find_by_purls(tmp.path(), &["pkg:not-nuget/Foo@1.0".to_string()]) .await .unwrap(); assert!(result.is_empty(), "non-nuget PURLs must be skipped"); @@ -161,10 +168,7 @@ async fn crawl_all_discovers_global_cache_layout() { let result = crawler.crawl_all(&opts).await; assert_eq!(result.len(), 2); // The crawler lowercases the discovered name from the directory. - let purls: Vec = result - .iter() - .map(|p| p.purl.to_ascii_lowercase()) - .collect(); + let purls: Vec = result.iter().map(|p| p.purl.to_ascii_lowercase()).collect(); assert!(purls.iter().any(|p| p.contains("newtonsoft.json"))); assert!(purls.iter().any(|p| p.contains("serilog"))); } @@ -183,7 +187,10 @@ async fn crawl_all_discovers_legacy_layout() { batch_size: 100, }; let result = crawler.crawl_all(&opts).await; - assert!(result.len() >= 2, "legacy layout must be discovered; got {result:?}"); + assert!( + result.len() >= 2, + "legacy layout must be discovered; got {result:?}" + ); } #[tokio::test] @@ -194,7 +201,9 @@ async fn crawl_all_skips_hidden_directories() { // Hidden dir that mimics a package layout — must be skipped. let hidden = tmp.path().join(".cache").join("13.0.3"); tokio::fs::create_dir_all(&hidden).await.unwrap(); - tokio::fs::write(hidden.join(".cache.nuspec"), b"").await.unwrap(); + tokio::fs::write(hidden.join(".cache.nuspec"), b"") + .await + .unwrap(); let crawler = NuGetCrawler; let opts = CrawlerOptions { @@ -207,7 +216,10 @@ async fn crawl_all_skips_hidden_directories() { // Only the real package should show up. assert_eq!(result.len(), 1); assert!( - result[0].purl.to_ascii_lowercase().contains("newtonsoft.json"), + result[0] + .purl + .to_ascii_lowercase() + .contains("newtonsoft.json"), "expected newtonsoft.json; got {:?}", result[0].purl ); @@ -238,8 +250,14 @@ async fn get_nuget_package_paths_local_discovers_packages_dir() { tokio::fs::create_dir_all(&pkg).await.unwrap(); let crawler = NuGetCrawler; - let paths = crawler.get_nuget_package_paths(&options_at(tmp.path())).await.unwrap(); - assert!(paths.iter().any(|p| p == &pkg), "packages/ must be discovered; got {paths:?}"); + let paths = crawler + .get_nuget_package_paths(&options_at(tmp.path())) + .await + .unwrap(); + assert!( + paths.iter().any(|p| p == &pkg), + "packages/ must be discovered; got {paths:?}" + ); } #[tokio::test] @@ -259,7 +277,10 @@ async fn get_nuget_package_paths_local_with_csproj_falls_back_to_global() { std::env::set_var("NUGET_PACKAGES", nuget_root.path()); let crawler = NuGetCrawler; - let paths = crawler.get_nuget_package_paths(&options_at(tmp.path())).await.unwrap(); + let paths = crawler + .get_nuget_package_paths(&options_at(tmp.path())) + .await + .unwrap(); std::env::remove_var("NUGET_PACKAGES"); if let Some(v) = prev { @@ -278,7 +299,10 @@ async fn get_nuget_package_paths_local_no_project_returns_empty() { let tmp = tempfile::tempdir().unwrap(); // No `packages/`, no `.csproj`, no `.sln`, no `obj/`. let crawler = NuGetCrawler; - let paths = crawler.get_nuget_package_paths(&options_at(tmp.path())).await.unwrap(); + let paths = crawler + .get_nuget_package_paths(&options_at(tmp.path())) + .await + .unwrap(); assert!(paths.is_empty(), "non-.NET dir must return empty paths"); } @@ -286,15 +310,21 @@ async fn get_nuget_package_paths_local_no_project_returns_empty() { #[serial] async fn get_nuget_package_paths_with_sln_falls_back_to_global() { let tmp = tempfile::tempdir().unwrap(); - tokio::fs::write(tmp.path().join("MySolution.sln"), b"Microsoft Visual Studio Solution File") - .await - .unwrap(); + tokio::fs::write( + tmp.path().join("MySolution.sln"), + b"Microsoft Visual Studio Solution File", + ) + .await + .unwrap(); let nuget_root = tempfile::tempdir().unwrap(); let prev = std::env::var("NUGET_PACKAGES").ok(); std::env::set_var("NUGET_PACKAGES", nuget_root.path()); let crawler = NuGetCrawler; - let paths = crawler.get_nuget_package_paths(&options_at(tmp.path())).await.unwrap(); + let paths = crawler + .get_nuget_package_paths(&options_at(tmp.path())) + .await + .unwrap(); std::env::remove_var("NUGET_PACKAGES"); if let Some(v) = prev { @@ -316,21 +346,28 @@ async fn find_by_purls_rejects_dir_without_nuspec_or_lib() { let pkg_dir = tmp.path().join("newtonsoft.json").join("13.0.3"); tokio::fs::create_dir_all(&pkg_dir).await.unwrap(); // No .nuspec, no lib/ — just an unrelated file. - tokio::fs::write(pkg_dir.join("README.md"), b"hello").await.unwrap(); + tokio::fs::write(pkg_dir.join("README.md"), b"hello") + .await + .unwrap(); let crawler = NuGetCrawler; let result = crawler .find_by_purls(tmp.path(), &[ORG_PURL_A.to_string()]) .await .unwrap(); - assert!(result.is_empty(), "dir without nuspec or lib/ must not match"); + assert!( + result.is_empty(), + "dir without nuspec or lib/ must not match" + ); } #[tokio::test] async fn find_by_purls_with_lib_dir_marker_succeeds() { let tmp = tempfile::tempdir().unwrap(); let pkg_dir = tmp.path().join("newtonsoft.json").join("13.0.3"); - tokio::fs::create_dir_all(pkg_dir.join("lib")).await.unwrap(); + tokio::fs::create_dir_all(pkg_dir.join("lib")) + .await + .unwrap(); // No .nuspec but lib/ is present — verify accepts it. let crawler = NuGetCrawler; @@ -408,7 +445,9 @@ async fn crawl_all_skips_files_at_top_level() { // Stage a real package so the scan actually runs. let _pkg = stage_global_cache_pkg(tmp.path(), "newtonsoft.json", "13.0.3").await; // Plain file at the top level — must be skipped. - tokio::fs::write(tmp.path().join("readme.txt"), b"not a package").await.unwrap(); + tokio::fs::write(tmp.path().join("readme.txt"), b"not a package") + .await + .unwrap(); let crawler = NuGetCrawler; let opts = CrawlerOptions { @@ -419,7 +458,9 @@ async fn crawl_all_skips_files_at_top_level() { }; let result = crawler.crawl_all(&opts).await; let names: Vec<&str> = result.iter().map(|p| p.name.as_str()).collect(); - assert!(names.iter().any(|n| n.eq_ignore_ascii_case("newtonsoft.json"))); + assert!(names + .iter() + .any(|n| n.eq_ignore_ascii_case("newtonsoft.json"))); assert_eq!(result.len(), 1, "plain file must be skipped"); } @@ -518,7 +559,10 @@ async fn get_nuget_package_paths_global_mode_missing_home_returns_empty() { std::env::remove_var("HOME"); } - assert!(paths.is_empty(), "missing global cache dir must yield empty; got {paths:?}"); + assert!( + paths.is_empty(), + "missing global cache dir must yield empty; got {paths:?}" + ); } /// `is_dotnet_project` accepts a NuGet.Config marker without any @@ -528,13 +572,18 @@ async fn get_nuget_package_paths_global_mode_missing_home_returns_empty() { #[serial] async fn get_nuget_package_paths_with_nuget_config_falls_back_to_global() { let tmp = tempfile::tempdir().unwrap(); - tokio::fs::write(tmp.path().join("NuGet.Config"), b"").await.unwrap(); + tokio::fs::write(tmp.path().join("NuGet.Config"), b"") + .await + .unwrap(); let nuget_root = tempfile::tempdir().unwrap(); let prev = std::env::var("NUGET_PACKAGES").ok(); std::env::set_var("NUGET_PACKAGES", nuget_root.path()); let crawler = NuGetCrawler; - let paths = crawler.get_nuget_package_paths(&options_at(tmp.path())).await.unwrap(); + let paths = crawler + .get_nuget_package_paths(&options_at(tmp.path())) + .await + .unwrap(); std::env::remove_var("NUGET_PACKAGES"); if let Some(v) = prev { @@ -569,7 +618,9 @@ async fn get_nuget_package_paths_discovers_assets_json_package_folders() { serde_json::Value::Object(serde_json::Map::new()), ); let assets = serde_json::json!({ "packageFolders": folders }).to_string(); - tokio::fs::write(obj.join("project.assets.json"), assets).await.unwrap(); + tokio::fs::write(obj.join("project.assets.json"), assets) + .await + .unwrap(); // Also need a project marker to satisfy is_dotnet_project (so the // global-cache fallback path runs as well) — but assets discovery // is independent, so this test exercises the obj-path branch even @@ -579,7 +630,10 @@ async fn get_nuget_package_paths_discovers_assets_json_package_folders() { std::env::set_var("NUGET_PACKAGES", nuget_root.path()); let crawler = NuGetCrawler; - let paths = crawler.get_nuget_package_paths(&options_at(tmp.path())).await.unwrap(); + let paths = crawler + .get_nuget_package_paths(&options_at(tmp.path())) + .await + .unwrap(); std::env::remove_var("NUGET_PACKAGES"); if let Some(v) = prev { @@ -609,14 +663,19 @@ async fn get_nuget_package_paths_discovers_assets_json_in_subproject() { serde_json::Value::Object(serde_json::Map::new()), ); let assets = serde_json::json!({ "packageFolders": folders }).to_string(); - tokio::fs::write(sub_obj.join("project.assets.json"), assets).await.unwrap(); + tokio::fs::write(sub_obj.join("project.assets.json"), assets) + .await + .unwrap(); let prev = std::env::var("NUGET_PACKAGES").ok(); let nuget_root = tempfile::tempdir().unwrap(); std::env::set_var("NUGET_PACKAGES", nuget_root.path()); let crawler = NuGetCrawler; - let paths = crawler.get_nuget_package_paths(&options_at(tmp.path())).await.unwrap(); + let paths = crawler + .get_nuget_package_paths(&options_at(tmp.path())) + .await + .unwrap(); std::env::remove_var("NUGET_PACKAGES"); if let Some(v) = prev { @@ -637,7 +696,9 @@ async fn get_nuget_package_paths_assets_json_empty_packagefolders_yields_no_path let tmp = tempfile::tempdir().unwrap(); let obj = tmp.path().join("obj"); tokio::fs::create_dir_all(&obj).await.unwrap(); - tokio::fs::write(obj.join("project.assets.json"), br#"{"packageFolders":{}}"#).await.unwrap(); + tokio::fs::write(obj.join("project.assets.json"), br#"{"packageFolders":{}}"#) + .await + .unwrap(); let prev = std::env::var("NUGET_PACKAGES").ok(); let prev_home = std::env::var("HOME").ok(); @@ -645,7 +706,10 @@ async fn get_nuget_package_paths_assets_json_empty_packagefolders_yields_no_path std::env::set_var("HOME", tmp.path()); let crawler = NuGetCrawler; - let paths = crawler.get_nuget_package_paths(&options_at(tmp.path())).await.unwrap(); + let paths = crawler + .get_nuget_package_paths(&options_at(tmp.path())) + .await + .unwrap(); std::env::remove_var("NUGET_PACKAGES"); if let Some(v) = prev { @@ -668,7 +732,9 @@ async fn get_nuget_package_paths_assets_json_malformed_skipped() { let tmp = tempfile::tempdir().unwrap(); let obj = tmp.path().join("obj"); tokio::fs::create_dir_all(&obj).await.unwrap(); - tokio::fs::write(obj.join("project.assets.json"), b"this is not json").await.unwrap(); + tokio::fs::write(obj.join("project.assets.json"), b"this is not json") + .await + .unwrap(); let prev = std::env::var("NUGET_PACKAGES").ok(); let prev_home = std::env::var("HOME").ok(); @@ -677,7 +743,10 @@ async fn get_nuget_package_paths_assets_json_malformed_skipped() { let crawler = NuGetCrawler; // Must succeed with no panic, returning empty. - let paths = crawler.get_nuget_package_paths(&options_at(tmp.path())).await.unwrap(); + let paths = crawler + .get_nuget_package_paths(&options_at(tmp.path())) + .await + .unwrap(); std::env::remove_var("NUGET_PACKAGES"); if let Some(v) = prev { @@ -689,5 +758,8 @@ async fn get_nuget_package_paths_assets_json_malformed_skipped() { std::env::remove_var("HOME"); } - assert!(paths.is_empty(), "malformed assets.json must be skipped; got {paths:?}"); + assert!( + paths.is_empty(), + "malformed assets.json must be skipped; got {paths:?}" + ); } diff --git a/crates/socket-patch-core/tests/crawler_python_e2e.rs b/crates/socket-patch-core/tests/crawler_python_e2e.rs index 4bffa74f..eae589d5 100644 --- a/crates/socket-patch-core/tests/crawler_python_e2e.rs +++ b/crates/socket-patch-core/tests/crawler_python_e2e.rs @@ -23,10 +23,14 @@ use socket_patch_core::crawlers::PythonCrawler; #[test] fn parse_python_site_packages_output_well_formed() { - let stdout = "/usr/local/lib/python3.11/site-packages\n/usr/local/lib/python3.11/dist-packages\n"; + let stdout = + "/usr/local/lib/python3.11/site-packages\n/usr/local/lib/python3.11/dist-packages\n"; let paths = parse_python_site_packages_output(stdout); assert_eq!(paths.len(), 2); - assert_eq!(paths[0], std::path::PathBuf::from("/usr/local/lib/python3.11/site-packages")); + assert_eq!( + paths[0], + std::path::PathBuf::from("/usr/local/lib/python3.11/site-packages") + ); } #[test] @@ -50,8 +54,11 @@ fn parse_python_site_packages_output_trims_and_skips_blanks() { /// without needing python3 on the host's PATH. #[test] fn find_python_command_with_mock_runner_prefers_python3() { - let runner = common::MockCommandRunner::new() - .with_response("python3", &["--version"], Some("Python 3.11.5\n")); + let runner = common::MockCommandRunner::new().with_response( + "python3", + &["--version"], + Some("Python 3.11.5\n"), + ); assert_eq!(find_python_command_with(&runner), Some("python3")); } @@ -59,8 +66,11 @@ fn find_python_command_with_mock_runner_prefers_python3() { /// fall through to the second candidate. #[test] fn find_python_command_with_mock_runner_falls_through_to_python() { - let runner = common::MockCommandRunner::new() - .with_response("python", &["--version"], Some("Python 2.7.18\n")); + let runner = common::MockCommandRunner::new().with_response( + "python", + &["--version"], + Some("Python 2.7.18\n"), + ); assert_eq!(find_python_command_with(&runner), Some("python")); } @@ -100,8 +110,11 @@ async fn find_python_dirs_python3_wildcard_matches_versions() { .await .unwrap(); - let result = - find_python_dirs(tmp.path(), &["python3.*", "lib", "python3.*", "site-packages"]).await; + let result = find_python_dirs( + tmp.path(), + &["python3.*", "lib", "python3.*", "site-packages"], + ) + .await; assert!( result.iter().any(|r| r == &p1), "must find python3.11 layout; got {result:?}" @@ -114,15 +127,26 @@ async fn find_python_dirs_python3_wildcard_matches_versions() { #[tokio::test] async fn find_python_dirs_star_wildcard_matches_all() { let tmp = tempfile::tempdir().unwrap(); - tokio::fs::create_dir_all(tmp.path().join("pkg_a").join("lib").join("python3.11").join("site-packages")) - .await - .unwrap(); - tokio::fs::create_dir_all(tmp.path().join("pkg_b").join("lib").join("python3.11").join("site-packages")) - .await - .unwrap(); + tokio::fs::create_dir_all( + tmp.path() + .join("pkg_a") + .join("lib") + .join("python3.11") + .join("site-packages"), + ) + .await + .unwrap(); + tokio::fs::create_dir_all( + tmp.path() + .join("pkg_b") + .join("lib") + .join("python3.11") + .join("site-packages"), + ) + .await + .unwrap(); - let result = - find_python_dirs(tmp.path(), &["*", "lib", "python3.*", "site-packages"]).await; + let result = find_python_dirs(tmp.path(), &["*", "lib", "python3.*", "site-packages"]).await; assert_eq!(result.len(), 2, "* must match both pkg_a and pkg_b"); } @@ -132,14 +156,21 @@ async fn find_python_dirs_star_wildcard_matches_all() { async fn find_python_dirs_star_wildcard_skips_files() { let tmp = tempfile::tempdir().unwrap(); // A regular file at the wildcard position must NOT cause issues. - tokio::fs::write(tmp.path().join("not_a_dir.txt"), b"x").await.unwrap(); - // And one real match. - tokio::fs::create_dir_all(tmp.path().join("real").join("lib").join("python3.11").join("site-packages")) + tokio::fs::write(tmp.path().join("not_a_dir.txt"), b"x") .await .unwrap(); + // And one real match. + tokio::fs::create_dir_all( + tmp.path() + .join("real") + .join("lib") + .join("python3.11") + .join("site-packages"), + ) + .await + .unwrap(); - let result = - find_python_dirs(tmp.path(), &["*", "lib", "python3.*", "site-packages"]).await; + let result = find_python_dirs(tmp.path(), &["*", "lib", "python3.*", "site-packages"]).await; assert_eq!(result.len(), 1, "regular file must be skipped"); } @@ -470,7 +501,9 @@ async fn get_site_packages_paths_falls_back_via_pyproject_marker() { async fn get_site_packages_paths_falls_back_via_uv_lock_marker() { let project = tempfile::tempdir().unwrap(); let home = tempfile::tempdir().unwrap(); - tokio::fs::write(project.path().join("uv.lock"), b"version = 1\n").await.unwrap(); + tokio::fs::write(project.path().join("uv.lock"), b"version = 1\n") + .await + .unwrap(); let prev_home = std::env::var("HOME").ok(); std::env::set_var("HOME", home.path()); @@ -534,27 +567,26 @@ async fn read_python_metadata_well_formed() { .unwrap(); let result = read_python_metadata(&dist_info).await; - assert_eq!( - result, - Some(("requests".to_string(), "2.28.0".to_string())) - ); + assert_eq!(result, Some(("requests".to_string(), "2.28.0".to_string()))); } -/// Missing METADATA file → None. +/// Missing METADATA file → fall back to the `-.dist-info` +/// directory name so a partially-written install stays discoverable. #[tokio::test] -async fn read_python_metadata_missing_file_returns_none() { +async fn read_python_metadata_missing_file_falls_back_to_dir_name() { let tmp = tempfile::tempdir().unwrap(); let dist_info = tmp.path().join("requests-2.28.0.dist-info"); tokio::fs::create_dir(&dist_info).await.unwrap(); // No METADATA file. let result = read_python_metadata(&dist_info).await; - assert_eq!(result, None); + assert_eq!(result, Some(("requests".to_string(), "2.28.0".to_string()))); } -/// METADATA missing Name field → None. +/// METADATA missing Name field → headers are unusable, so fall back to the +/// directory name rather than dropping the package. #[tokio::test] -async fn read_python_metadata_missing_name_returns_none() { +async fn read_python_metadata_missing_name_falls_back_to_dir_name() { let tmp = tempfile::tempdir().unwrap(); let dist_info = tmp.path().join("requests-2.28.0.dist-info"); tokio::fs::create_dir(&dist_info).await.unwrap(); @@ -566,7 +598,7 @@ async fn read_python_metadata_missing_name_returns_none() { .unwrap(); let result = read_python_metadata(&dist_info).await; - assert_eq!(result, None); + assert_eq!(result, Some(("requests".to_string(), "2.28.0".to_string()))); } #[path = "common/mod.rs"] @@ -638,7 +670,9 @@ async fn stage_dist_info(site_packages: &Path, raw_name: &str, version: &str) { let dist = site_packages.join(format!("{raw_name}-{version}.dist-info")); tokio::fs::create_dir_all(&dist).await.unwrap(); let metadata = format!("Metadata-Version: 2.1\nName: {raw_name}\nVersion: {version}\n"); - tokio::fs::write(dist.join("METADATA"), metadata).await.unwrap(); + tokio::fs::write(dist.join("METADATA"), metadata) + .await + .unwrap(); } #[tokio::test] @@ -728,7 +762,9 @@ async fn crawl_all_via_site_packages_finds_dist_info_packages() { stage_dist_info(tmp.path(), "Requests", "2.28.0").await; stage_dist_info(tmp.path(), "urllib3", "2.0.0").await; // A non-dist-info dir should be skipped. - tokio::fs::create_dir_all(tmp.path().join("ignore-me")).await.unwrap(); + tokio::fs::create_dir_all(tmp.path().join("ignore-me")) + .await + .unwrap(); let crawler = PythonCrawler; let opts = CrawlerOptions { @@ -745,11 +781,13 @@ async fn crawl_all_via_site_packages_finds_dist_info_packages() { } #[tokio::test] -async fn crawl_all_with_corrupt_metadata_skips() { +async fn crawl_all_with_unparseable_dist_info_skips() { let tmp = tempfile::tempdir().unwrap(); - let dist = tmp.path().join("broken-1.0.0.dist-info"); + // No version segment in the directory name, so neither the (empty) + // METADATA nor the dir-name fallback can yield a name/version — the + // package is genuinely unidentifiable and must be skipped. + let dist = tmp.path().join("corrupt.dist-info"); tokio::fs::create_dir_all(&dist).await.unwrap(); - // Empty METADATA — read_python_metadata returns None. tokio::fs::write(dist.join("METADATA"), b"").await.unwrap(); let crawler = PythonCrawler; @@ -760,7 +798,10 @@ async fn crawl_all_with_corrupt_metadata_skips() { batch_size: 100, }; let result = crawler.crawl_all(&opts).await; - assert!(result.is_empty(), "broken METADATA must be skipped"); + assert!( + result.is_empty(), + "a dist-info with no usable metadata or version-bearing name must be skipped" + ); } /// `get_site_packages_paths` with `global_prefix` set returns just that @@ -784,36 +825,35 @@ async fn get_site_packages_paths_with_global_prefix_passthrough() { // ── METADATA early-break arm ─────────────────────────────────── -/// METADATA with extra header lines AFTER the blank line should NOT be -/// parsed — the parser must stop at the first blank line after -/// collecting name+version. Covers `python_crawler.rs:80-81` (the -/// blank-line break path that fires before both fields are set). +/// METADATA header lines AFTER the blank line must NOT be parsed — the +/// header parser stops at the first blank line. Here only `Name` is set +/// before the blank line, so the `Version` below it is never read from the +/// headers; the function then falls back to the directory name. We give the +/// directory a *different* version (`9.9.9`) than the post-blank-line header +/// (`2.28.0`) so the result proves the blank-line break fired: a `2.28.0` +/// result would mean the break leaked the trailing header. #[tokio::test] -async fn read_python_metadata_stops_at_blank_line_after_headers() { +async fn read_python_metadata_stops_at_blank_line_then_falls_back() { let tmp = tempfile::tempdir().unwrap(); - let dist = tmp.path().join("requests-2.28.0.dist-info"); + let dist = tmp.path().join("requests-9.9.9.dist-info"); tokio::fs::create_dir(&dist).await.unwrap(); - // Only `Name` is set when we hit the blank line — version is still - // None, so the early both-set break (L71-72) does NOT fire. Instead - // we must take the blank-line break at L80-81. After break, the - // final-match arm returns None because version was never set. - tokio::fs::write( - dist.join("METADATA"), - "Name: requests\n\nVersion: 2.28.0\n", - ) - .await - .unwrap(); + tokio::fs::write(dist.join("METADATA"), "Name: requests\n\nVersion: 2.28.0\n") + .await + .unwrap(); let result = read_python_metadata(&dist).await; assert_eq!( - result, None, - "blank-line break must fire before Version is read; got {result:?}" + result, + Some(("requests".to_string(), "9.9.9".to_string())), + "blank-line break must fire before Version is read, so the version \ + comes from the dir name; got {result:?}" ); } -/// METADATA missing Version field → None. +/// METADATA missing Version field → headers unusable, fall back to the +/// directory name. #[tokio::test] -async fn read_python_metadata_missing_version_returns_none() { +async fn read_python_metadata_missing_version_falls_back_to_dir_name() { let tmp = tempfile::tempdir().unwrap(); let dist_info = tmp.path().join("requests-2.28.0.dist-info"); tokio::fs::create_dir(&dist_info).await.unwrap(); @@ -825,5 +865,5 @@ async fn read_python_metadata_missing_version_returns_none() { .unwrap(); let result = read_python_metadata(&dist_info).await; - assert_eq!(result, None); + assert_eq!(result, Some(("requests".to_string(), "2.28.0".to_string()))); } diff --git a/crates/socket-patch-core/tests/crawler_ruby_e2e.rs b/crates/socket-patch-core/tests/crawler_ruby_e2e.rs index e4789fad..1e33f4e2 100644 --- a/crates/socket-patch-core/tests/crawler_ruby_e2e.rs +++ b/crates/socket-patch-core/tests/crawler_ruby_e2e.rs @@ -40,7 +40,9 @@ fn options_at(root: &Path) -> CrawlerOptions { /// accepts it. async fn stage_gem(gem_path: &Path, name: &str, version: &str) -> std::path::PathBuf { let pkg_dir = gem_path.join(format!("{name}-{version}")); - tokio::fs::create_dir_all(pkg_dir.join("lib")).await.unwrap(); + tokio::fs::create_dir_all(pkg_dir.join("lib")) + .await + .unwrap(); pkg_dir } @@ -66,7 +68,9 @@ async fn find_by_purls_accepts_gem_with_gemspec_only() { // Stage with .gemspec but NO lib/ directory (alternate marker). let pkg_dir = tmp.path().join("rails-7.1.0"); tokio::fs::create_dir(&pkg_dir).await.unwrap(); - tokio::fs::write(pkg_dir.join("rails.gemspec"), b"# gemspec").await.unwrap(); + tokio::fs::write(pkg_dir.join("rails.gemspec"), b"# gemspec") + .await + .unwrap(); let crawler = RubyCrawler; let result = crawler @@ -107,10 +111,7 @@ async fn find_by_purls_invalid_purl_skipped() { let tmp = tempfile::tempdir().unwrap(); let crawler = RubyCrawler; let result = crawler - .find_by_purls( - tmp.path(), - &["pkg:not-gem/rails@7.1.0".to_string()], - ) + .find_by_purls(tmp.path(), &["pkg:not-gem/rails@7.1.0".to_string()]) .await .unwrap(); assert!(result.is_empty()); @@ -161,7 +162,10 @@ async fn get_gem_paths_vendor_bundle_takes_precedence_over_global() { tokio::fs::create_dir_all(&gems).await.unwrap(); let crawler = RubyCrawler; - let paths = crawler.get_gem_paths(&options_at(tmp.path())).await.unwrap(); + let paths = crawler + .get_gem_paths(&options_at(tmp.path())) + .await + .unwrap(); assert!( paths.iter().any(|p| p == &gems), "vendor/bundle gems dir must be discovered; got {paths:?}" @@ -173,7 +177,10 @@ async fn get_gem_paths_no_gemfile_returns_empty() { let tmp = tempfile::tempdir().unwrap(); // No Gemfile, no Gemfile.lock, no vendor/bundle. let crawler = RubyCrawler; - let paths = crawler.get_gem_paths(&options_at(tmp.path())).await.unwrap(); + let paths = crawler + .get_gem_paths(&options_at(tmp.path())) + .await + .unwrap(); assert!(paths.is_empty(), "non-Ruby dir must return empty paths"); } @@ -185,10 +192,15 @@ async fn get_gem_paths_with_gemfile_no_vendor_returns_paths() { // This either returns paths (if `gem` is on PATH and produces output) // or empty (if `gem` is missing). Both are valid — the contract is // "doesn't crash". - tokio::fs::write(tmp.path().join("Gemfile"), b"source 'https://rubygems.org'").await.unwrap(); + tokio::fs::write(tmp.path().join("Gemfile"), b"source 'https://rubygems.org'") + .await + .unwrap(); let crawler = RubyCrawler; - let _ = crawler.get_gem_paths(&options_at(tmp.path())).await.unwrap(); + let _ = crawler + .get_gem_paths(&options_at(tmp.path())) + .await + .unwrap(); // No assertion on contents — just contract that no panic occurs. } @@ -196,9 +208,14 @@ async fn get_gem_paths_with_gemfile_no_vendor_returns_paths() { #[serial] async fn get_gem_paths_with_gemfile_lock_only_works_too() { let tmp = tempfile::tempdir().unwrap(); - tokio::fs::write(tmp.path().join("Gemfile.lock"), b"GEM\n").await.unwrap(); + tokio::fs::write(tmp.path().join("Gemfile.lock"), b"GEM\n") + .await + .unwrap(); let crawler = RubyCrawler; - let _ = crawler.get_gem_paths(&options_at(tmp.path())).await.unwrap(); + let _ = crawler + .get_gem_paths(&options_at(tmp.path())) + .await + .unwrap(); } // ── global gem discovery ─────────────────────────────────────── @@ -282,14 +299,22 @@ fn ruby_crawler_default_and_new_construct_cleanly() { #[serial] async fn get_gem_paths_local_gemfile_no_gem_binary_returns_empty() { let tmp = tempfile::tempdir().unwrap(); - tokio::fs::write(tmp.path().join("Gemfile"), b"source 'https://rubygems.org'\n").await.unwrap(); + tokio::fs::write( + tmp.path().join("Gemfile"), + b"source 'https://rubygems.org'\n", + ) + .await + .unwrap(); let empty_path = tempfile::tempdir().unwrap(); let prev = std::env::var("PATH").ok(); std::env::set_var("PATH", empty_path.path()); let crawler = RubyCrawler; - let paths = crawler.get_gem_paths(&options_at(tmp.path())).await.unwrap(); + let paths = crawler + .get_gem_paths(&options_at(tmp.path())) + .await + .unwrap(); if let Some(v) = prev { std::env::set_var("PATH", v); @@ -297,7 +322,10 @@ async fn get_gem_paths_local_gemfile_no_gem_binary_returns_empty() { std::env::remove_var("PATH"); } - assert!(paths.is_empty(), "no gem binary + no vendor must yield empty"); + assert!( + paths.is_empty(), + "no gem binary + no vendor must yield empty" + ); } /// Global mode with `gem` not on PATH and HOME pointing at a tempdir diff --git a/crates/socket-patch-core/tests/crawlers_empty_paths_e2e.rs b/crates/socket-patch-core/tests/crawlers_empty_paths_e2e.rs index d1fbca1f..c93c3d8c 100644 --- a/crates/socket-patch-core/tests/crawlers_empty_paths_e2e.rs +++ b/crates/socket-patch-core/tests/crawlers_empty_paths_e2e.rs @@ -5,7 +5,6 @@ //! exercise because those tests always pre-stage a layout. use socket_patch_core::crawlers::types::CrawlerOptions; -use socket_patch_core::crawlers::{NpmCrawler, PythonCrawler, RubyCrawler}; #[cfg(feature = "cargo")] use socket_patch_core::crawlers::CargoCrawler; #[cfg(feature = "golang")] @@ -14,6 +13,7 @@ use socket_patch_core::crawlers::GoCrawler; use socket_patch_core::crawlers::MavenCrawler; #[cfg(feature = "nuget")] use socket_patch_core::crawlers::NuGetCrawler; +use socket_patch_core::crawlers::{NpmCrawler, PythonCrawler, RubyCrawler}; use std::path::PathBuf; /// `CrawlerOptions::default()` should populate cwd from @@ -47,10 +47,7 @@ fn options_at(root: &std::path::Path) -> CrawlerOptions { async fn npm_crawler_find_by_purls_with_empty_purls_returns_empty_map() { let tmp = tempfile::tempdir().unwrap(); let crawler = NpmCrawler; - let result = crawler - .find_by_purls(tmp.path(), &[]) - .await - .unwrap(); + let result = crawler.find_by_purls(tmp.path(), &[]).await.unwrap(); assert!(result.is_empty(), "empty PURL list → empty result"); } @@ -60,10 +57,7 @@ async fn npm_crawler_find_by_purls_with_nonexistent_node_modules_returns_empty() let nonexistent = tmp.path().join("missing_node_modules"); let crawler = NpmCrawler; let result = crawler - .find_by_purls( - &nonexistent, - &["pkg:npm/lodash@4.17.21".to_string()], - ) + .find_by_purls(&nonexistent, &["pkg:npm/lodash@4.17.21".to_string()]) .await .unwrap(); assert!(result.is_empty(), "nonexistent node_modules → empty"); diff --git a/crates/socket-patch-core/tests/fuzzy_match_e2e.rs b/crates/socket-patch-core/tests/fuzzy_match_e2e.rs index c61eccb0..0e725ff8 100644 --- a/crates/socket-patch-core/tests/fuzzy_match_e2e.rs +++ b/crates/socket-patch-core/tests/fuzzy_match_e2e.rs @@ -32,7 +32,11 @@ fn exact_full_name_match_wins() { pkg("node-fetch", "3.0.0", None), ]; let results = fuzzy_match_packages("@types/node", &packages, 20); - assert_eq!(results.len(), 1, "exact full-name match excludes substrings"); + assert_eq!( + results.len(), + 1, + "exact full-name match excludes substrings" + ); assert_eq!(results[0].name, "node"); assert_eq!(results[0].namespace.as_deref(), Some("@types")); } @@ -52,7 +56,10 @@ fn exact_name_match_wins_over_prefix() { #[test] fn prefix_match_orders_before_contains() { - let packages = vec![pkg("lodash", "4.17.21", None), pkg("lodash-es", "4.17.21", None)]; + let packages = vec![ + pkg("lodash", "4.17.21", None), + pkg("lodash-es", "4.17.21", None), + ]; let results = fuzzy_match_packages("lodash", &packages, 20); assert_eq!(results.len(), 2); assert_eq!( @@ -90,6 +97,18 @@ fn case_insensitive_match() { assert_eq!(results.len(), 1); } +#[test] +fn same_tier_ties_break_case_insensitively() { + // Both names contain "e" (prefix of neither), so they share a match tier + // and the alphabetical tie-break — which must ignore case — decides which + // package becomes `matches[0]` and drives the patch lookup in `get`. + let packages = vec![pkg("Zebra", "1.0.0", None), pkg("apple", "1.0.0", None)]; + let results = fuzzy_match_packages("e", &packages, 20); + assert_eq!(results.len(), 2); + assert_eq!(results[0].name, "apple"); + assert_eq!(results[1].name, "Zebra"); +} + #[test] fn limit_caps_result_count() { let packages: Vec = (0..50) diff --git a/crates/socket-patch-core/tests/package_e2e.rs b/crates/socket-patch-core/tests/package_e2e.rs index 39503e35..264e8891 100644 --- a/crates/socket-patch-core/tests/package_e2e.rs +++ b/crates/socket-patch-core/tests/package_e2e.rs @@ -14,9 +14,7 @@ use std::path::Path; use flate2::write::GzEncoder; use flate2::Compression; use socket_patch_core::manifest::schema::PatchFileInfo; -use socket_patch_core::patch::package::{ - read_archive_filtered, read_archive_to_map, ArchiveError, -}; +use socket_patch_core::patch::package::{read_archive_filtered, read_archive_to_map, ArchiveError}; use tar::Builder; /// Helper: write a small gzipped tar archive containing `(name, diff --git a/crates/socket-patch-core/tests/rollback_new_file_e2e.rs b/crates/socket-patch-core/tests/rollback_new_file_e2e.rs index 056492f0..0a5f71dc 100644 --- a/crates/socket-patch-core/tests/rollback_new_file_e2e.rs +++ b/crates/socket-patch-core/tests/rollback_new_file_e2e.rs @@ -57,8 +57,7 @@ async fn verify_new_file_rollback_already_original_when_missing() { before_hash: String::new(), after_hash: git_sha256(b"never written"), }; - let result = - verify_file_rollback(pkg, "package/never_existed.txt", &file_info, &blobs).await; + let result = verify_file_rollback(pkg, "package/never_existed.txt", &file_info, &blobs).await; assert_eq!(result.status, VerifyRollbackStatus::AlreadyOriginal); } @@ -76,14 +75,17 @@ async fn verify_new_file_rollback_hash_mismatch_when_user_modified() { // Manifest claims this is the post-patch content... let after = git_sha256(b"patched content the file should have had"); // ...but the on-disk content has been mutated since. - std::fs::write(pkg.join("user_modified.txt"), b"user wrote something different").unwrap(); + std::fs::write( + pkg.join("user_modified.txt"), + b"user wrote something different", + ) + .unwrap(); let file_info = PatchFileInfo { before_hash: String::new(), after_hash: after, }; - let result = - verify_file_rollback(pkg, "package/user_modified.txt", &file_info, &blobs).await; + let result = verify_file_rollback(pkg, "package/user_modified.txt", &file_info, &blobs).await; assert_eq!(result.status, VerifyRollbackStatus::HashMismatch); assert!(result.message.as_ref().unwrap().contains("modified")); } @@ -102,13 +104,7 @@ async fn verify_existing_file_rollback_not_found_when_missing() { before_hash: git_sha256(b"original"), after_hash: git_sha256(b"patched"), }; - let result = verify_file_rollback( - pkg, - "package/does_not_exist.txt", - &file_info, - &blobs, - ) - .await; + let result = verify_file_rollback(pkg, "package/does_not_exist.txt", &file_info, &blobs).await; assert_eq!(result.status, VerifyRollbackStatus::NotFound); assert!(result.message.as_ref().unwrap().contains("not found")); } diff --git a/crates/socket-patch-core/tests/telemetry_helpers_e2e.rs b/crates/socket-patch-core/tests/telemetry_helpers_e2e.rs index aeccfeee..14a0c668 100644 --- a/crates/socket-patch-core/tests/telemetry_helpers_e2e.rs +++ b/crates/socket-patch-core/tests/telemetry_helpers_e2e.rs @@ -48,7 +48,10 @@ fn telemetry_disabled_when_vitest_env_is_true() { let prev_vitest = std::env::var("VITEST").ok(); std::env::remove_var("SOCKET_TELEMETRY_DISABLED"); std::env::set_var("VITEST", "true"); - assert!(is_telemetry_disabled(), "VITEST=true must disable telemetry"); + assert!( + is_telemetry_disabled(), + "VITEST=true must disable telemetry" + ); std::env::remove_var("VITEST"); if let Some(v) = prev { std::env::set_var("SOCKET_TELEMETRY_DISABLED", v); @@ -155,9 +158,15 @@ fn telemetry_not_disabled_when_socket_offline_unset_or_falsy() { std::env::remove_var("SOCKET_PATCH_TELEMETRY_DISABLED"); std::env::remove_var("VITEST"); std::env::set_var("SOCKET_OFFLINE", "0"); - assert!(!is_telemetry_disabled(), "SOCKET_OFFLINE=0 must not engage gate"); + assert!( + !is_telemetry_disabled(), + "SOCKET_OFFLINE=0 must not engage gate" + ); std::env::set_var("SOCKET_OFFLINE", ""); - assert!(!is_telemetry_disabled(), "SOCKET_OFFLINE='' must not engage gate"); + assert!( + !is_telemetry_disabled(), + "SOCKET_OFFLINE='' must not engage gate" + ); std::env::remove_var("SOCKET_OFFLINE"); if let Some(v) = prev_disabled { std::env::set_var("SOCKET_TELEMETRY_DISABLED", v); diff --git a/npm/socket-patch-android-arm64/package.json b/npm/socket-patch-android-arm64/package.json index 2a25c1ab..d77225e4 100644 --- a/npm/socket-patch-android-arm64/package.json +++ b/npm/socket-patch-android-arm64/package.json @@ -1,6 +1,6 @@ { "name": "@socketsecurity/socket-patch-android-arm64", - "version": "3.1.0", + "version": "3.2.0", "description": "socket-patch binary for Android ARM64", "os": [ "android" diff --git a/npm/socket-patch-darwin-arm64/package.json b/npm/socket-patch-darwin-arm64/package.json index 74e430dc..c5f78a44 100644 --- a/npm/socket-patch-darwin-arm64/package.json +++ b/npm/socket-patch-darwin-arm64/package.json @@ -1,6 +1,6 @@ { "name": "@socketsecurity/socket-patch-darwin-arm64", - "version": "3.1.0", + "version": "3.2.0", "description": "socket-patch binary for macOS ARM64", "os": [ "darwin" diff --git a/npm/socket-patch-darwin-x64/package.json b/npm/socket-patch-darwin-x64/package.json index d6d355d5..3059a3f8 100644 --- a/npm/socket-patch-darwin-x64/package.json +++ b/npm/socket-patch-darwin-x64/package.json @@ -1,6 +1,6 @@ { "name": "@socketsecurity/socket-patch-darwin-x64", - "version": "3.1.0", + "version": "3.2.0", "description": "socket-patch binary for macOS x64", "os": [ "darwin" diff --git a/npm/socket-patch-linux-arm-gnu/package.json b/npm/socket-patch-linux-arm-gnu/package.json index 500b1dd5..62344232 100644 --- a/npm/socket-patch-linux-arm-gnu/package.json +++ b/npm/socket-patch-linux-arm-gnu/package.json @@ -1,6 +1,6 @@ { "name": "@socketsecurity/socket-patch-linux-arm-gnu", - "version": "3.1.0", + "version": "3.2.0", "description": "socket-patch binary for Linux ARM (glibc)", "os": [ "linux" diff --git a/npm/socket-patch-linux-arm-musl/package.json b/npm/socket-patch-linux-arm-musl/package.json index 765934f6..9b41b15a 100644 --- a/npm/socket-patch-linux-arm-musl/package.json +++ b/npm/socket-patch-linux-arm-musl/package.json @@ -1,6 +1,6 @@ { "name": "@socketsecurity/socket-patch-linux-arm-musl", - "version": "3.1.0", + "version": "3.2.0", "description": "socket-patch binary for Linux ARM (musl)", "os": [ "linux" diff --git a/npm/socket-patch-linux-arm64-gnu/package.json b/npm/socket-patch-linux-arm64-gnu/package.json index fe4191f6..a247d4a3 100644 --- a/npm/socket-patch-linux-arm64-gnu/package.json +++ b/npm/socket-patch-linux-arm64-gnu/package.json @@ -1,6 +1,6 @@ { "name": "@socketsecurity/socket-patch-linux-arm64-gnu", - "version": "3.1.0", + "version": "3.2.0", "description": "socket-patch binary for Linux ARM64 (glibc)", "os": [ "linux" diff --git a/npm/socket-patch-linux-arm64-musl/package.json b/npm/socket-patch-linux-arm64-musl/package.json index c54a2a42..df8e25f9 100644 --- a/npm/socket-patch-linux-arm64-musl/package.json +++ b/npm/socket-patch-linux-arm64-musl/package.json @@ -1,6 +1,6 @@ { "name": "@socketsecurity/socket-patch-linux-arm64-musl", - "version": "3.1.0", + "version": "3.2.0", "description": "socket-patch binary for Linux ARM64 (musl)", "os": [ "linux" diff --git a/npm/socket-patch-linux-ia32-gnu/package.json b/npm/socket-patch-linux-ia32-gnu/package.json index f44a47e0..71473e6b 100644 --- a/npm/socket-patch-linux-ia32-gnu/package.json +++ b/npm/socket-patch-linux-ia32-gnu/package.json @@ -1,6 +1,6 @@ { "name": "@socketsecurity/socket-patch-linux-ia32-gnu", - "version": "3.1.0", + "version": "3.2.0", "description": "socket-patch binary for Linux ia32 (glibc)", "os": [ "linux" diff --git a/npm/socket-patch-linux-ia32-musl/package.json b/npm/socket-patch-linux-ia32-musl/package.json index f444e43d..d368ce36 100644 --- a/npm/socket-patch-linux-ia32-musl/package.json +++ b/npm/socket-patch-linux-ia32-musl/package.json @@ -1,6 +1,6 @@ { "name": "@socketsecurity/socket-patch-linux-ia32-musl", - "version": "3.1.0", + "version": "3.2.0", "description": "socket-patch binary for Linux ia32 (musl)", "os": [ "linux" diff --git a/npm/socket-patch-linux-x64-gnu/package.json b/npm/socket-patch-linux-x64-gnu/package.json index 6a59a363..a41e71d9 100644 --- a/npm/socket-patch-linux-x64-gnu/package.json +++ b/npm/socket-patch-linux-x64-gnu/package.json @@ -1,6 +1,6 @@ { "name": "@socketsecurity/socket-patch-linux-x64-gnu", - "version": "3.1.0", + "version": "3.2.0", "description": "socket-patch binary for Linux x64 (glibc)", "os": [ "linux" diff --git a/npm/socket-patch-linux-x64-musl/package.json b/npm/socket-patch-linux-x64-musl/package.json index e589aa2b..9fa95ace 100644 --- a/npm/socket-patch-linux-x64-musl/package.json +++ b/npm/socket-patch-linux-x64-musl/package.json @@ -1,6 +1,6 @@ { "name": "@socketsecurity/socket-patch-linux-x64-musl", - "version": "3.1.0", + "version": "3.2.0", "description": "socket-patch binary for Linux x64 (musl)", "os": [ "linux" diff --git a/npm/socket-patch-win32-arm64/package.json b/npm/socket-patch-win32-arm64/package.json index 634cc2ed..0434d5c3 100644 --- a/npm/socket-patch-win32-arm64/package.json +++ b/npm/socket-patch-win32-arm64/package.json @@ -1,6 +1,6 @@ { "name": "@socketsecurity/socket-patch-win32-arm64", - "version": "3.1.0", + "version": "3.2.0", "description": "socket-patch binary for Windows ARM64", "os": [ "win32" diff --git a/npm/socket-patch-win32-ia32/package.json b/npm/socket-patch-win32-ia32/package.json index 0acad0f5..9f90a93d 100644 --- a/npm/socket-patch-win32-ia32/package.json +++ b/npm/socket-patch-win32-ia32/package.json @@ -1,6 +1,6 @@ { "name": "@socketsecurity/socket-patch-win32-ia32", - "version": "3.1.0", + "version": "3.2.0", "description": "socket-patch binary for Windows ia32", "os": [ "win32" diff --git a/npm/socket-patch-win32-x64/package.json b/npm/socket-patch-win32-x64/package.json index 72920af1..d117a98d 100644 --- a/npm/socket-patch-win32-x64/package.json +++ b/npm/socket-patch-win32-x64/package.json @@ -1,6 +1,6 @@ { "name": "@socketsecurity/socket-patch-win32-x64", - "version": "3.1.0", + "version": "3.2.0", "description": "socket-patch binary for Windows x64", "os": [ "win32" diff --git a/npm/socket-patch/package-lock.json b/npm/socket-patch/package-lock.json index 50066ae3..fe00334c 100644 --- a/npm/socket-patch/package-lock.json +++ b/npm/socket-patch/package-lock.json @@ -1,12 +1,12 @@ { "name": "@socketsecurity/socket-patch", - "version": "3.0.0", + "version": "3.2.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@socketsecurity/socket-patch", - "version": "3.0.0", + "version": "3.2.0", "license": "MIT", "dependencies": { "zod": "3.25.76" @@ -22,20 +22,20 @@ "node": ">=18.0.0" }, "optionalDependencies": { - "@socketsecurity/socket-patch-android-arm64": "3.0.0", - "@socketsecurity/socket-patch-darwin-arm64": "3.0.0", - "@socketsecurity/socket-patch-darwin-x64": "3.0.0", - "@socketsecurity/socket-patch-linux-arm-gnu": "3.0.0", - "@socketsecurity/socket-patch-linux-arm-musl": "3.0.0", - "@socketsecurity/socket-patch-linux-arm64-gnu": "3.0.0", - "@socketsecurity/socket-patch-linux-arm64-musl": "3.0.0", - "@socketsecurity/socket-patch-linux-ia32-gnu": "3.0.0", - "@socketsecurity/socket-patch-linux-ia32-musl": "3.0.0", - "@socketsecurity/socket-patch-linux-x64-gnu": "3.0.0", - "@socketsecurity/socket-patch-linux-x64-musl": "3.0.0", - "@socketsecurity/socket-patch-win32-arm64": "3.0.0", - "@socketsecurity/socket-patch-win32-ia32": "3.0.0", - "@socketsecurity/socket-patch-win32-x64": "3.0.0" + "@socketsecurity/socket-patch-android-arm64": "3.2.0", + "@socketsecurity/socket-patch-darwin-arm64": "3.2.0", + "@socketsecurity/socket-patch-darwin-x64": "3.2.0", + "@socketsecurity/socket-patch-linux-arm-gnu": "3.2.0", + "@socketsecurity/socket-patch-linux-arm-musl": "3.2.0", + "@socketsecurity/socket-patch-linux-arm64-gnu": "3.2.0", + "@socketsecurity/socket-patch-linux-arm64-musl": "3.2.0", + "@socketsecurity/socket-patch-linux-ia32-gnu": "3.2.0", + "@socketsecurity/socket-patch-linux-ia32-musl": "3.2.0", + "@socketsecurity/socket-patch-linux-x64-gnu": "3.2.0", + "@socketsecurity/socket-patch-linux-x64-musl": "3.2.0", + "@socketsecurity/socket-patch-win32-arm64": "3.2.0", + "@socketsecurity/socket-patch-win32-ia32": "3.2.0", + "@socketsecurity/socket-patch-win32-x64": "3.2.0" } }, "node_modules/@socketsecurity/socket-patch-android-arm64": { diff --git a/npm/socket-patch/package.json b/npm/socket-patch/package.json index 7cfab151..cd7c1cb2 100644 --- a/npm/socket-patch/package.json +++ b/npm/socket-patch/package.json @@ -1,6 +1,6 @@ { "name": "@socketsecurity/socket-patch", - "version": "3.1.0", + "version": "3.2.0", "description": "CLI tool and schema library for applying security patches to dependencies", "bin": { "socket-patch": "bin/socket-patch" @@ -42,19 +42,19 @@ "@types/node": "20.19.41" }, "optionalDependencies": { - "@socketsecurity/socket-patch-android-arm64": "3.1.0", - "@socketsecurity/socket-patch-darwin-arm64": "3.1.0", - "@socketsecurity/socket-patch-darwin-x64": "3.1.0", - "@socketsecurity/socket-patch-linux-arm-gnu": "3.1.0", - "@socketsecurity/socket-patch-linux-arm-musl": "3.1.0", - "@socketsecurity/socket-patch-linux-arm64-gnu": "3.1.0", - "@socketsecurity/socket-patch-linux-arm64-musl": "3.1.0", - "@socketsecurity/socket-patch-linux-ia32-gnu": "3.1.0", - "@socketsecurity/socket-patch-linux-ia32-musl": "3.1.0", - "@socketsecurity/socket-patch-linux-x64-gnu": "3.1.0", - "@socketsecurity/socket-patch-linux-x64-musl": "3.1.0", - "@socketsecurity/socket-patch-win32-arm64": "3.1.0", - "@socketsecurity/socket-patch-win32-ia32": "3.1.0", - "@socketsecurity/socket-patch-win32-x64": "3.1.0" + "@socketsecurity/socket-patch-android-arm64": "3.2.0", + "@socketsecurity/socket-patch-darwin-arm64": "3.2.0", + "@socketsecurity/socket-patch-darwin-x64": "3.2.0", + "@socketsecurity/socket-patch-linux-arm-gnu": "3.2.0", + "@socketsecurity/socket-patch-linux-arm-musl": "3.2.0", + "@socketsecurity/socket-patch-linux-arm64-gnu": "3.2.0", + "@socketsecurity/socket-patch-linux-arm64-musl": "3.2.0", + "@socketsecurity/socket-patch-linux-ia32-gnu": "3.2.0", + "@socketsecurity/socket-patch-linux-ia32-musl": "3.2.0", + "@socketsecurity/socket-patch-linux-x64-gnu": "3.2.0", + "@socketsecurity/socket-patch-linux-x64-musl": "3.2.0", + "@socketsecurity/socket-patch-win32-arm64": "3.2.0", + "@socketsecurity/socket-patch-win32-ia32": "3.2.0", + "@socketsecurity/socket-patch-win32-x64": "3.2.0" } } diff --git a/pypi/socket-patch/pyproject.toml b/pypi/socket-patch/pyproject.toml index 9a101b38..9a6c8890 100644 --- a/pypi/socket-patch/pyproject.toml +++ b/pypi/socket-patch/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "socket-patch" -version = "3.1.0" +version = "3.2.0" description = "CLI tool for applying security patches to dependencies" readme = "README.md" license = "MIT" diff --git a/scripts/fix-bugs.config.example.ts b/scripts/fix-bugs.config.example.ts new file mode 100644 index 00000000..79983215 --- /dev/null +++ b/scripts/fix-bugs.config.example.ts @@ -0,0 +1,46 @@ +/** + * Example prompt module for scripts/study-crates.ts. + * + * Pass it with: + * npx tsx scripts/study-crates.ts --prompt-file scripts/study-crates.config.example.ts + * + * The module's default export is a function `(ctx: FileCtx) => string` that + * returns the prompt for one file. This gives you full programmatic control: + * branch on the crate, the path, the file name, inject extra instructions for + * specific subsystems, etc. + * + * FileCtx fields available: + * file repo-relative POSIX path, e.g. "crates/socket-patch-core/src/lib.rs" + * abspath absolute path on disk + * crate crate dir name, e.g. "socket-patch-core" + * name basename, e.g. "lib.rs" + * stem basename without extension, e.g. "lib" + * relInCrate path within the crate's src dir, e.g. "api/client.rs" + */ + +import type { FileCtx } from "./study-crates.ts"; + +export default function render(ctx: FileCtx): string { + const base = [ + `There are bugs in ${ctx.file} in the ${ctx.crate} crate.`, + `Carefully read the code line by line and fix all of the bugs. Add additional tests to prevent regressions.`, + `If you can't find any problems, it's ok to quit.` + ]; + + // Example of path-specific emphasis: be extra careful around the patch engine + // and crawlers, which carry the most invariants. + if (ctx.relInCrate.startsWith("patch/")) { + base.push( + `This file is part of the patch engine — pay special attention to`, + `filesystem safety, atomicity, and rollback correctness.`, + ); + } else if (ctx.relInCrate.startsWith("crawlers/")) { + base.push( + `This is a package-manager crawler — note the on-disk layout assumptions`, + `it makes and how it handles missing or malformed package metadata.`, + ); + } + + base.push(`End with a concise 3-6 bullet summary of the most important takeaways.`); + return base.join(" "); +} diff --git a/scripts/fix-vuln.config.ts b/scripts/fix-vuln.config.ts new file mode 100644 index 00000000..79983215 --- /dev/null +++ b/scripts/fix-vuln.config.ts @@ -0,0 +1,46 @@ +/** + * Example prompt module for scripts/study-crates.ts. + * + * Pass it with: + * npx tsx scripts/study-crates.ts --prompt-file scripts/study-crates.config.example.ts + * + * The module's default export is a function `(ctx: FileCtx) => string` that + * returns the prompt for one file. This gives you full programmatic control: + * branch on the crate, the path, the file name, inject extra instructions for + * specific subsystems, etc. + * + * FileCtx fields available: + * file repo-relative POSIX path, e.g. "crates/socket-patch-core/src/lib.rs" + * abspath absolute path on disk + * crate crate dir name, e.g. "socket-patch-core" + * name basename, e.g. "lib.rs" + * stem basename without extension, e.g. "lib" + * relInCrate path within the crate's src dir, e.g. "api/client.rs" + */ + +import type { FileCtx } from "./study-crates.ts"; + +export default function render(ctx: FileCtx): string { + const base = [ + `There are bugs in ${ctx.file} in the ${ctx.crate} crate.`, + `Carefully read the code line by line and fix all of the bugs. Add additional tests to prevent regressions.`, + `If you can't find any problems, it's ok to quit.` + ]; + + // Example of path-specific emphasis: be extra careful around the patch engine + // and crawlers, which carry the most invariants. + if (ctx.relInCrate.startsWith("patch/")) { + base.push( + `This file is part of the patch engine — pay special attention to`, + `filesystem safety, atomicity, and rollback correctness.`, + ); + } else if (ctx.relInCrate.startsWith("crawlers/")) { + base.push( + `This is a package-manager crawler — note the on-disk layout assumptions`, + `it makes and how it handles missing or malformed package metadata.`, + ); + } + + base.push(`End with a concise 3-6 bullet summary of the most important takeaways.`); + return base.join(" "); +} diff --git a/scripts/study-crates.ts b/scripts/study-crates.ts new file mode 100644 index 00000000..7652986c --- /dev/null +++ b/scripts/study-crates.ts @@ -0,0 +1,623 @@ +#!/usr/bin/env -S npx tsx +/** + * study-crates.ts — drive `claude` once per non-test source file in each crate. + * + * For every `crates/*\/src/**\/*.rs` file, this spawns a non-interactive Claude + * Code session with a configurable prompt, streams its output live to stdout, + * logs incremental progress, and aggregates every session's final result into a + * single `SUMMARY.md` (plus raw stream logs per file). + * + * Each session runs with `--dangerously-skip-permissions` and full autonomy + * (Claude may read/edit code, run commands, etc.). Sessions run sequentially by + * default. + * + * Usage: + * npx tsx scripts/study-crates.ts [options] + * + * Common examples: + * # Dry run — list discovered files and the rendered prompt, run nothing: + * npx tsx scripts/study-crates.ts --dry-run + * + * # Study only the CLI crate with the default prompt: + * npx tsx scripts/study-crates.ts --crate socket-patch-cli + * + * # Custom inline prompt with placeholders: + * npx tsx scripts/study-crates.ts --filter 'utils/purl' \ + * -p 'Inspect {file} for panics and unwraps. Summarize risks.' + * + * # Fully programmatic prompt via a TS module: + * npx tsx scripts/study-crates.ts --prompt-file scripts/study-crates.config.example.ts + * + * Options: + * -p, --prompt