@@ -16,7 +16,31 @@ use std::time::Duration;
1616
1717use socket_patch_core:: patch:: apply_lock:: { acquire, LockError , LockGuard } ;
1818
19- use crate :: json_envelope:: { Command , Envelope , EnvelopeError } ;
19+ use crate :: json_envelope:: {
20+ Command , Envelope , EnvelopeError , PatchAction , PatchEvent ,
21+ } ;
22+
23+ /// Stable `errorCode` tag emitted as a `Skipped` warning event when
24+ /// `--break-lock` actually deletes a pre-existing lock file. Exposed
25+ /// for downstream consumers and integration tests that pattern-match
26+ /// on it.
27+ pub const LOCK_BROKEN_CODE : & str = "lock_broken" ;
28+
29+ /// Outcome of a successful lock acquisition. Callers attach a
30+ /// `lock_broken` event to their own envelope when [`broke_lock`] is
31+ /// true, so the audit trail follows the same conventions as the
32+ /// rest of the command's output.
33+ ///
34+ /// [`broke_lock`]: LockAcquired::broke_lock
35+ #[ derive( Debug ) ]
36+ pub struct LockAcquired {
37+ pub guard : LockGuard ,
38+ /// True iff `--break-lock` was set AND the helper actually
39+ /// removed a pre-existing `apply.lock` file before acquiring.
40+ /// False when the file didn't exist (nothing to break) — the
41+ /// flag was a no-op in that case so no warning is warranted.
42+ pub broke_lock : bool ,
43+ }
2044
2145/// Try to acquire `<socket_dir>/apply.lock` and return the guard, or
2246/// emit a failure envelope and a non-zero exit code.
@@ -26,23 +50,74 @@ use crate::json_envelope::{Command, Envelope, EnvelopeError};
2650/// than a generic "lock failed". `dry_run` is plumbed through to the
2751/// envelope's `dry_run` field for the (rare) case where lock
2852/// contention happens during a dry-run apply.
53+ ///
54+ /// `timeout = Duration::ZERO` keeps the historical non-blocking
55+ /// try-once shape. Positive values wait with a 100 ms backoff —
56+ /// see `socket_patch_core::patch::apply_lock::acquire`.
57+ ///
58+ /// `break_lock = true` deletes `<socket_dir>/apply.lock` before the
59+ /// acquire attempt. The motivating case is a crashed prior run that
60+ /// left the file but no OS lock. When the file exists and is
61+ /// successfully removed the return value's `broke_lock` is true and
62+ /// the caller should attach a `lock_broken` warning event to their
63+ /// envelope.
2964pub fn acquire_or_emit (
3065 socket_dir : & Path ,
3166 command : Command ,
3267 json : bool ,
3368 silent : bool ,
3469 dry_run : bool ,
35- ) -> Result < LockGuard , i32 > {
36- match acquire ( socket_dir, Duration :: ZERO ) {
37- Ok ( guard) => Ok ( guard) ,
70+ timeout : Duration ,
71+ break_lock : bool ,
72+ ) -> Result < LockAcquired , i32 > {
73+ let mut broke_lock = false ;
74+ if break_lock {
75+ let path = socket_dir. join ( "apply.lock" ) ;
76+ match std:: fs:: remove_file ( & path) {
77+ Ok ( ( ) ) => {
78+ broke_lock = true ;
79+ if !silent && !json {
80+ eprintln ! (
81+ "Warning: --break-lock removed {} before acquisition." ,
82+ path. display( )
83+ ) ;
84+ }
85+ }
86+ Err ( e) if e. kind ( ) == std:: io:: ErrorKind :: NotFound => {
87+ // No file to break — silently proceed to the normal
88+ // acquire path. Documented as a no-op so scripts can
89+ // pass --break-lock unconditionally on retry.
90+ }
91+ Err ( source) => {
92+ let msg = format ! (
93+ "failed to remove lock file at {}: {}" ,
94+ path. display( ) ,
95+ source
96+ ) ;
97+ emit ( command, json, silent, dry_run, "lock_break_failed" , & msg, None ) ;
98+ return Err ( 1 ) ;
99+ }
100+ }
101+ }
102+
103+ match acquire ( socket_dir, timeout) {
104+ Ok ( guard) => Ok ( LockAcquired { guard, broke_lock } ) ,
38105 Err ( LockError :: Held ) => {
106+ let msg = if timeout > Duration :: ZERO {
107+ format ! (
108+ "another socket-patch process is operating in this directory (waited {}s)" ,
109+ timeout. as_secs( )
110+ )
111+ } else {
112+ "another socket-patch process is operating in this directory" . to_string ( )
113+ } ;
39114 emit (
40115 command,
41116 json,
42117 silent,
43118 dry_run,
44119 "lock_held" ,
45- "another socket-patch process is operating in this directory" ,
120+ & msg ,
46121 Some ( socket_dir) ,
47122 ) ;
48123 Err ( 1 )
@@ -55,6 +130,27 @@ pub fn acquire_or_emit(
55130 }
56131}
57132
133+ /// Build the warning event that callers attach to their envelope
134+ /// when [`LockAcquired::broke_lock`] is true. Artifact-level (no
135+ /// PURL) since the action targets the `.socket/` directory itself,
136+ /// not a specific package.
137+ pub fn lock_broken_event ( socket_dir : & Path ) -> PatchEvent {
138+ PatchEvent :: artifact ( PatchAction :: Skipped ) . with_reason (
139+ LOCK_BROKEN_CODE ,
140+ format ! (
141+ "--break-lock removed {}/apply.lock before acquisition" ,
142+ socket_dir. display( )
143+ ) ,
144+ )
145+ }
146+
147+ /// Convenience: record the `lock_broken` warning event on an
148+ /// envelope. Mirrors the inline pattern at each call site so we
149+ /// don't drift on the action / errorCode pair.
150+ pub fn record_lock_broken ( env : & mut Envelope , socket_dir : & Path ) {
151+ env. record ( lock_broken_event ( socket_dir) ) ;
152+ }
153+
58154fn emit (
59155 command : Command ,
60156 json : bool ,
@@ -71,10 +167,9 @@ fn emit(
71167 println ! ( "{}" , env. to_pretty_json( ) ) ;
72168 } else if !silent {
73169 eprintln ! ( "Error: {message}." ) ;
74- if let Some ( dir ) = hint_dir {
170+ if hint_dir . is_some ( ) {
75171 eprintln ! (
76- " If you are sure no other process is running, remove {}/apply.lock and retry." ,
77- dir. display( )
172+ " Run `socket-patch unlock` to inspect, or rerun with --break-lock if you're sure no holder exists."
78173 ) ;
79174 }
80175 }
@@ -87,17 +182,43 @@ mod tests {
87182 #[ test]
88183 fn acquire_or_emit_succeeds_on_fresh_dir ( ) {
89184 let dir = tempfile:: tempdir ( ) . unwrap ( ) ;
90- let guard = acquire_or_emit ( dir. path ( ) , Command :: Apply , false , true , false ) . unwrap ( ) ;
91- drop ( guard) ;
185+ let acquired = acquire_or_emit (
186+ dir. path ( ) ,
187+ Command :: Apply ,
188+ false ,
189+ true ,
190+ false ,
191+ Duration :: ZERO ,
192+ false ,
193+ )
194+ . unwrap ( ) ;
195+ assert ! ( !acquired. broke_lock) ;
196+ drop ( acquired. guard ) ;
92197 }
93198
94199 #[ test]
95200 fn acquire_or_emit_returns_one_on_contention ( ) {
96201 let dir = tempfile:: tempdir ( ) . unwrap ( ) ;
97- let _first =
98- acquire_or_emit ( dir. path ( ) , Command :: Apply , false , true , false ) . unwrap ( ) ;
99- let code =
100- acquire_or_emit ( dir. path ( ) , Command :: Apply , false , true , false ) . unwrap_err ( ) ;
202+ let _first = acquire_or_emit (
203+ dir. path ( ) ,
204+ Command :: Apply ,
205+ false ,
206+ true ,
207+ false ,
208+ Duration :: ZERO ,
209+ false ,
210+ )
211+ . unwrap ( ) ;
212+ let code = acquire_or_emit (
213+ dir. path ( ) ,
214+ Command :: Apply ,
215+ false ,
216+ true ,
217+ false ,
218+ Duration :: ZERO ,
219+ false ,
220+ )
221+ . unwrap_err ( ) ;
101222 assert_eq ! ( code, 1 ) ;
102223 }
103224
@@ -110,8 +231,111 @@ mod tests {
110231 false ,
111232 true ,
112233 false ,
234+ Duration :: ZERO ,
235+ false ,
236+ )
237+ . unwrap_err ( ) ;
238+ assert_eq ! ( code, 1 ) ;
239+ }
240+
241+ /// Positive timeout waits then errors `lock_held` — confirms the
242+ /// budget is plumbed through to `acquire`. Mirrors the
243+ /// `apply_lock::tests::timeout_held` shape so a regression in
244+ /// either layer surfaces here.
245+ #[ test]
246+ fn acquire_or_emit_honors_lock_timeout ( ) {
247+ let dir = tempfile:: tempdir ( ) . unwrap ( ) ;
248+ let _first = acquire_or_emit (
249+ dir. path ( ) ,
250+ Command :: Apply ,
251+ false ,
252+ true ,
253+ false ,
254+ Duration :: ZERO ,
255+ false ,
256+ )
257+ . unwrap ( ) ;
258+ let start = std:: time:: Instant :: now ( ) ;
259+ let code = acquire_or_emit (
260+ dir. path ( ) ,
261+ Command :: Apply ,
262+ false ,
263+ true ,
264+ false ,
265+ Duration :: from_millis ( 250 ) ,
266+ false ,
113267 )
114268 . unwrap_err ( ) ;
269+ let elapsed = start. elapsed ( ) ;
115270 assert_eq ! ( code, 1 ) ;
271+ assert ! (
272+ elapsed >= Duration :: from_millis( 200 ) ,
273+ "expected at least 200ms wait, got {:?}" ,
274+ elapsed
275+ ) ;
276+ }
277+
278+ /// `break_lock=true` against a pre-existing lock file with no
279+ /// holder removes the file and acquires fresh. `broke_lock` flag
280+ /// surfaces so callers can attach the warning event.
281+ #[ test]
282+ fn acquire_or_emit_break_lock_removes_and_acquires ( ) {
283+ let dir = tempfile:: tempdir ( ) . unwrap ( ) ;
284+ // Pre-stage a lock file with no holder — simulates the
285+ // post-crash leftover scenario.
286+ std:: fs:: write ( dir. path ( ) . join ( "apply.lock" ) , b"" ) . unwrap ( ) ;
287+
288+ let acquired = acquire_or_emit (
289+ dir. path ( ) ,
290+ Command :: Apply ,
291+ false ,
292+ true ,
293+ false ,
294+ Duration :: ZERO ,
295+ true ,
296+ )
297+ . unwrap ( ) ;
298+ assert ! (
299+ acquired. broke_lock,
300+ "broke_lock should be true when a lock file existed and was removed"
301+ ) ;
302+ // Lock file has been re-created by `acquire` and we hold it.
303+ assert ! ( dir. path( ) . join( "apply.lock" ) . is_file( ) ) ;
304+ }
305+
306+ /// `break_lock=true` on a clean directory (no lock file) is a
307+ /// no-op for the warning surface — `broke_lock` stays false so
308+ /// callers don't emit a spurious event.
309+ #[ test]
310+ fn acquire_or_emit_break_lock_is_noop_when_no_file ( ) {
311+ let dir = tempfile:: tempdir ( ) . unwrap ( ) ;
312+ let acquired = acquire_or_emit (
313+ dir. path ( ) ,
314+ Command :: Apply ,
315+ false ,
316+ true ,
317+ false ,
318+ Duration :: ZERO ,
319+ true ,
320+ )
321+ . unwrap ( ) ;
322+ assert ! (
323+ !acquired. broke_lock,
324+ "broke_lock should be false when there was nothing to remove"
325+ ) ;
326+ }
327+
328+ #[ test]
329+ fn lock_broken_event_uses_documented_code ( ) {
330+ let dir = tempfile:: tempdir ( ) . unwrap ( ) ;
331+ let event = lock_broken_event ( dir. path ( ) ) ;
332+ let v: serde_json:: Value =
333+ serde_json:: from_str ( & serde_json:: to_string ( & event) . unwrap ( ) ) . unwrap ( ) ;
334+ assert_eq ! ( v[ "action" ] , "skipped" ) ;
335+ assert_eq ! ( v[ "errorCode" ] , LOCK_BROKEN_CODE ) ;
336+ assert ! (
337+ v. as_object( ) . unwrap( ) . get( "purl" ) . is_none( ) ,
338+ "lock_broken is an artifact-level event — no purl"
339+ ) ;
116340 }
117341}
0 commit comments