Skip to content

Commit 5db81c3

Browse files
jensengwraithgar
andauthored
fix: allow concurrent non-local npx calls (#8512)
If you kick off multiple npx processes at the same time for the same non-local package(s), they can potentially install atop each other in the same npx cache directory. This can cause one or both processes to fail silently, or with various errors (e.g. `TAR_ENTRY_ERROR`, `ENOTEMPTY`, `EJSONPARSE`, `MODULE_NOT_FOUND`), depending on when/where something gets clobbered. See [this issue](#8224) for more context and previous discussion. This pull request introduces a lock around reading and reifying the tree in the npx cache directory, so that concurrent npx executions for the same non-local package(s) can succeed. The lock mechanism is based on `mkdir`s atomicity and loosely inspired by [proper-lockfile](https://www.npmjs.com/package/proper-lockfile), though inlined in order to give us more control and enable some improvements. ## References Fixes #8224 --------- Co-authored-by: Gar <[email protected]>
1 parent 3b30e0b commit 5db81c3

File tree

6 files changed

+424
-4
lines changed

6 files changed

+424
-4
lines changed

DEPENDENCIES.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,9 +349,11 @@ graph LR;
349349
libnpmexec-->npmcli-template-oss["@npmcli/template-oss"];
350350
libnpmexec-->pacote;
351351
libnpmexec-->proc-log;
352+
libnpmexec-->promise-retry;
352353
libnpmexec-->read-package-json-fast;
353354
libnpmexec-->read;
354355
libnpmexec-->semver;
356+
libnpmexec-->signal-exit;
355357
libnpmexec-->tap;
356358
libnpmexec-->walk-up-path;
357359
libnpmfund-->npmcli-arborist["@npmcli/arborist"];

package-lock.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18950,9 +18950,11 @@
1895018950
"npm-package-arg": "^12.0.0",
1895118951
"pacote": "^21.0.0",
1895218952
"proc-log": "^5.0.0",
18953+
"promise-retry": "^2.0.1",
1895318954
"read": "^4.0.0",
1895418955
"read-package-json-fast": "^4.0.0",
1895518956
"semver": "^7.3.7",
18957+
"signal-exit": "^4.1.0",
1895618958
"walk-up-path": "^4.0.0"
1895718959
},
1895818960
"devDependencies": {

workspaces/libnpmexec/lib/index.js

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
'use strict'
22

3-
const { dirname, resolve } = require('node:path')
3+
const { dirname, join, resolve } = require('node:path')
44
const crypto = require('node:crypto')
55
const { mkdir } = require('node:fs/promises')
66
const Arborist = require('@npmcli/arborist')
@@ -16,6 +16,7 @@ const getBinFromManifest = require('./get-bin-from-manifest.js')
1616
const noTTY = require('./no-tty.js')
1717
const runScript = require('./run-script.js')
1818
const isWindows = require('./is-windows.js')
19+
const withLock = require('./with-lock.js')
1920

2021
const binPaths = []
2122

@@ -247,7 +248,8 @@ const exec = async (opts) => {
247248
...flatOptions,
248249
path: installDir,
249250
})
250-
const npxTree = await npxArb.loadActual()
251+
const lockPath = join(installDir, 'concurrency.lock')
252+
const npxTree = await withLock(lockPath, () => npxArb.loadActual())
251253
await Promise.all(needInstall.map(async ({ spec }) => {
252254
const { manifest } = await missingFromTree({
253255
spec,
@@ -290,11 +292,11 @@ const exec = async (opts) => {
290292
}
291293
}
292294
}
293-
await npxArb.reify({
295+
await withLock(lockPath, () => npxArb.reify({
294296
...flatOptions,
295297
save: true,
296298
add,
297-
})
299+
}))
298300
}
299301
binPaths.push(resolve(installDir, 'node_modules/.bin'))
300302
const pkgJson = await PackageJson.load(installDir)
Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
const fs = require('node:fs/promises')
2+
const { rmdirSync } = require('node:fs')
3+
const promiseRetry = require('promise-retry')
4+
const { onExit } = require('signal-exit')
5+
6+
// a lockfile implementation inspired by the unmaintained proper-lockfile library
7+
//
8+
// similarities:
9+
// - based on mkdir's atomicity
10+
// - works across processes and even machines (via NFS)
11+
// - cleans up after itself
12+
// - detects compromised locks
13+
//
14+
// differences:
15+
// - higher-level API (just a withLock function)
16+
// - written in async/await style
17+
// - uses mtime + inode for more reliable compromised lock detection
18+
// - more ergonomic compromised lock handling (i.e. withLock will reject, and callbacks have access to an AbortSignal)
19+
// - uses a more recent version of signal-exit
20+
21+
const touchInterval = 100
22+
// mtime precision is platform dependent, so use a reasonably large threshold
23+
const staleThreshold = 5_000
24+
25+
// track current locks and their cleanup functions
26+
const currentLocks = new Map()
27+
28+
function cleanupLocks () {
29+
for (const [, cleanup] of currentLocks) {
30+
try {
31+
cleanup()
32+
} catch (err) {
33+
//
34+
}
35+
}
36+
}
37+
38+
// clean up any locks that were not released normally
39+
onExit(cleanupLocks)
40+
41+
/**
42+
* Acquire an advisory lock for the given path and hold it for the duration of the callback.
43+
*
44+
* The lock will be released automatically when the callback resolves or rejects.
45+
* Concurrent calls to withLock() for the same path will wait until the lock is released.
46+
*/
47+
async function withLock (lockPath, cb) {
48+
try {
49+
const signal = await acquireLock(lockPath)
50+
return await new Promise((resolve, reject) => {
51+
signal.addEventListener('abort', () => {
52+
reject(Object.assign(new Error('Lock compromised'), { code: 'ECOMPROMISED' }))
53+
});
54+
55+
(async () => {
56+
try {
57+
resolve(await cb(signal))
58+
} catch (err) {
59+
reject(err)
60+
}
61+
})()
62+
})
63+
} finally {
64+
releaseLock(lockPath)
65+
}
66+
}
67+
68+
function acquireLock (lockPath) {
69+
return promiseRetry({
70+
minTimeout: 100,
71+
maxTimeout: 5_000,
72+
// if another process legitimately holds the lock, wait for it to release; if it dies abnormally and the lock becomes stale, we'll acquire it automatically
73+
forever: true,
74+
}, async (retry) => {
75+
try {
76+
await fs.mkdir(lockPath)
77+
} catch (err) {
78+
if (err.code !== 'EEXIST') {
79+
throw err
80+
}
81+
82+
const status = await getLockStatus(lockPath)
83+
84+
if (status === 'locked') {
85+
// let's see if we can acquire it on the next attempt 🤞
86+
return retry(err)
87+
}
88+
if (status === 'stale') {
89+
// there is a very tiny window where another process could also release the stale lock and acquire it before we release it here; the lock compromise checker should detect this and throw an error
90+
deleteLock(lockPath, ['ENOENT', 'EBUSY']) // on windows, EBUSY can happen if another process is creating the lock; we'll just retry
91+
}
92+
return await acquireLock(lockPath)
93+
}
94+
try {
95+
const signal = await maintainLock(lockPath)
96+
return signal
97+
} catch (err) {
98+
throw Object.assign(new Error('Lock compromised'), { code: 'ECOMPROMISED' })
99+
}
100+
})
101+
}
102+
103+
function deleteLock (lockPath, ignoreCodes = ['ENOENT']) {
104+
try {
105+
// synchronous, so we can call in an exit handler
106+
rmdirSync(lockPath)
107+
} catch (err) {
108+
if (!ignoreCodes.includes(err.code)) {
109+
throw err
110+
}
111+
}
112+
}
113+
114+
function releaseLock (lockPath) {
115+
currentLocks.get(lockPath)?.()
116+
currentLocks.delete(lockPath)
117+
}
118+
119+
async function getLockStatus (lockPath) {
120+
try {
121+
const stat = await fs.stat(lockPath)
122+
return (Date.now() - stat.mtimeMs > staleThreshold) ? 'stale' : 'locked'
123+
} catch (err) {
124+
if (err.code === 'ENOENT') {
125+
return 'unlocked'
126+
}
127+
throw err
128+
}
129+
}
130+
131+
async function maintainLock (lockPath) {
132+
const controller = new AbortController()
133+
const stats = await fs.stat(lockPath)
134+
let mtimeMs = stats.mtimeMs
135+
const signal = controller.signal
136+
137+
async function touchLock () {
138+
try {
139+
const currentStats = (await fs.stat(lockPath))
140+
if (currentStats.ino !== stats.ino || currentStats.mtimeMs !== mtimeMs) {
141+
throw new Error('Lock compromised')
142+
}
143+
mtimeMs = Date.now()
144+
const mtime = new Date(mtimeMs)
145+
await fs.utimes(lockPath, mtime, mtime)
146+
} catch (err) {
147+
// stats mismatch or other fs error means the lock was compromised, unless we just released the lock during this iteration
148+
if (currentLocks.has(lockPath)) {
149+
controller.abort()
150+
}
151+
}
152+
}
153+
154+
const timeout = setInterval(touchLock, touchInterval)
155+
timeout.unref()
156+
function cleanup () {
157+
deleteLock(lockPath)
158+
clearInterval(timeout)
159+
}
160+
currentLocks.set(lockPath, cleanup)
161+
return signal
162+
}
163+
164+
module.exports = withLock

workspaces/libnpmexec/package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,11 @@
6767
"npm-package-arg": "^12.0.0",
6868
"pacote": "^21.0.0",
6969
"proc-log": "^5.0.0",
70+
"promise-retry": "^2.0.1",
7071
"read": "^4.0.0",
7172
"read-package-json-fast": "^4.0.0",
7273
"semver": "^7.3.7",
74+
"signal-exit": "^4.1.0",
7375
"walk-up-path": "^4.0.0"
7476
},
7577
"templateOSS": {

0 commit comments

Comments
 (0)