Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 26 additions & 15 deletions workspaces/libnpmexec/lib/with-lock.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ const { onExit } = require('signal-exit')
// - more ergonomic compromised lock handling (i.e. withLock will reject, and callbacks have access to an AbortSignal)
// - uses a more recent version of signal-exit

const touchInterval = 100
const touchInterval = 1_000
// mtime precision is platform dependent, so use a reasonably large threshold
const staleThreshold = 5_000

Expand Down Expand Up @@ -75,7 +75,7 @@ function acquireLock (lockPath) {
try {
await fs.mkdir(lockPath)
} catch (err) {
if (err.code !== 'EEXIST') {
if (err.code !== 'EEXIST' && err.code !== 'EBUSY' && err.code !== 'EPERM') {
throw err
}

Expand All @@ -86,9 +86,18 @@ function acquireLock (lockPath) {
return retry(err)
}
if (status === 'stale') {
// there is a very tiny window where another process could also release the stale lock and acquire it before we release it here; the lock compromise checker should detect this and throw an error
deleteLock(lockPath, ['ENOENT', 'EBUSY']) // on windows, EBUSY can happen if another process is creating the lock; we'll just retry
try {
// there is a very tiny window where another process could also release the stale lock and acquire it before we release it here; the lock compromise checker should detect this and throw an error
deleteLock(lockPath)
} catch (e) {
// on windows, EBUSY/EPERM can happen if another process is (re)creating the lock; maybe we can acquire it on a subsequent attempt 🤞
if (e.code === 'EBUSY' || e.code === 'EPERM') {
return retry(e)
}
throw e
}
}
// immediately attempt to acquire the lock (no backoff)
return await acquireLock(lockPath)
}
try {
Expand All @@ -100,12 +109,12 @@ function acquireLock (lockPath) {
})
}

function deleteLock (lockPath, ignoreCodes = ['ENOENT']) {
function deleteLock (lockPath) {
try {
// synchronous, so we can call in an exit handler
rmdirSync(lockPath)
} catch (err) {
if (!ignoreCodes.includes(err.code)) {
if (err.code !== 'ENOENT') {
throw err
}
}
Expand All @@ -131,31 +140,33 @@ async function getLockStatus (lockPath) {
async function maintainLock (lockPath) {
const controller = new AbortController()
const stats = await fs.stat(lockPath)
let mtimeMs = stats.mtimeMs
// fs.utimes operates on floating points seconds (directly, or via strings/Date objects), which may not match the underlying filesystem's mtime precision, meaning that we might read a slightly different mtime than we write. always round to the nearest second, since all filesystems support at least second precision
let mtime = Math.round(stats.mtimeMs / 1000)
const signal = controller.signal

async function touchLock () {
try {
const currentStats = (await fs.stat(lockPath))
if (currentStats.ino !== stats.ino || currentStats.mtimeMs !== mtimeMs) {
const currentMtime = Math.round(currentStats.mtimeMs / 1000)
if (currentStats.ino !== stats.ino || currentMtime !== mtime) {
throw new Error('Lock compromised')
}
mtimeMs = Date.now()
const mtime = new Date(mtimeMs)
await fs.utimes(lockPath, mtime, mtime)
} catch (err) {
// stats mismatch or other fs error means the lock was compromised, unless we just released the lock during this iteration
mtime = Math.round(Date.now() / 1000)
// touch the lock, unless we just released it during this iteration
if (currentLocks.has(lockPath)) {
controller.abort()
await fs.utimes(lockPath, mtime, mtime)
}
} catch (err) {
// stats mismatch or other fs error means the lock was compromised
controller.abort()
}
}

const timeout = setInterval(touchLock, touchInterval)
timeout.unref()
function cleanup () {
deleteLock(lockPath)
clearInterval(timeout)
deleteLock(lockPath)
}
currentLocks.set(lockPath, cleanup)
return signal
Expand Down
102 changes: 96 additions & 6 deletions workspaces/libnpmexec/test/with-lock.js
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ t.test('stale lock takeover', async (t) => {
}
}
let statCalls = 0
const mtimeMs = Date.now()
const mtimeMs = Math.round(Date.now() / 1000) * 1000
mockStat = async () => {
if (++statCalls === 1) {
return { mtimeMs: mtimeMs - 10_000 }
Expand All @@ -122,6 +122,48 @@ t.test('stale lock takeover', async (t) => {
t.equal(mkdirCalls, 2, 'should make two mkdir calls')
})

t.test('EBUSY during lock acquisition', async (t) => {
let mkdirCalls = 0
mockMkdir = async (...args) => {
if (++mkdirCalls === 1) {
throw Object.assign(new Error(), { code: 'EBUSY' })
}
return fs.promises.mkdir(...args)
}
const lockPath = path.join(fs.mkdtempSync(path.join(getTempDir(), 'test-')), 'concurrency.lock')
t.ok(await withLock(lockPath, async () => true))
t.equal(mkdirCalls, 2, 'should make two mkdir calls')
})

t.test('EBUSY during stale lock takeover', async (t) => {
let mkdirCalls = 0
mockMkdir = async () => {
if (++mkdirCalls === 1) {
throw Object.assign(new Error(), { code: 'EEXIST' })
}
}
let statCalls = 0
const mtimeMs = Math.round(Date.now() / 1000) * 1000
mockStat = async () => {
if (++statCalls === 1) {
return { mtimeMs: mtimeMs - 10_000 }
} else {
return { mtimeMs, ino: 1 }
}
}
let rmdirSyncCalls = 0
mockRmdirSync = () => {
if (++rmdirSyncCalls === 1) {
throw Object.assign(new Error(), { code: 'EBUSY' })
}
}

const lockPath = path.join(fs.mkdtempSync(path.join(getTempDir(), 'test-')), 'concurrency.lock')
const lockPromise = withLock(lockPath, async () => 'test value')
t.equal(await lockPromise, 'test value', 'should take over the lock')
t.equal(mkdirCalls, 2, 'should make two mkdir calls')
})

t.test('concurrent stale lock takeover', async (t) => {
const lockPath = path.join(fs.mkdtempSync(path.join(getTempDir(), 'test-')), 'concurrency.lock')
// make a stale lock
Expand All @@ -147,7 +189,7 @@ t.test('mkdir -> getLockStatus race', async (t) => {
}
}
let statCalls = 0
const mtimeMs = Date.now()
const mtimeMs = Math.round(Date.now() / 1000) * 1000
mockStat = async () => {
if (++statCalls === 1) {
throw Object.assign(new Error(), { code: 'ENOENT' })
Expand All @@ -167,6 +209,22 @@ t.test('mkdir -> getLockStatus race', async (t) => {
t.equal(mkdirCalls, 2, 'should make two mkdir calls')
})

t.test('mtime floating point mismatch', async (t) => {
let mtimeMs = Math.round(Date.now() / 1000) * 1000
mockStat = async () => {
return { mtimeMs, ino: 1 }
}
mockUtimes = async (_, nextMtimeSeconds) => {
mtimeMs = nextMtimeSeconds * 1000 - 0.001
}

const lockPath = path.join(fs.mkdtempSync(path.join(getTempDir(), 'test-')), 'concurrency.lock')
t.ok(await withLock(lockPath, async () => {
await setTimeout(2000)
return true
}), 'should handle mtime floating point mismatches')
})

t.test('unexpected errors', async (t) => {
t.test('can\'t create lock', async (t) => {
const lockPath = '/these/parent/directories/do/not/exist/so/it/should/fail.lock'
Expand Down Expand Up @@ -217,7 +275,7 @@ t.test('unexpected errors', async (t) => {
mockStat = async () => {
return { mtimeMs: Date.now(), ino: Math.floor(Math.random() * 1000000) }
}
await t.rejects(withLock(lockPath, () => setTimeout(1000)), { code: 'ECOMPROMISED' })
await t.rejects(withLock(lockPath, () => setTimeout(2000)), { code: 'ECOMPROMISED' })
})

t.test('lock compromised (deleted)', async (t) => {
Expand All @@ -226,10 +284,42 @@ t.test('unexpected errors', async (t) => {
mockStat = async () => {
throw Object.assign(new Error(), { code: 'ENOENT' })
}
await t.rejects(withLock(lockPath, () => setTimeout(1000)), { code: 'ECOMPROMISED' })
await t.rejects(withLock(lockPath, () => setTimeout(2000)), { code: 'ECOMPROMISED' })
})
})

t.test('lock released during maintenance', async (t) => {
// this test validates that if we release the lock while touchLock is running, it doesn't interfere with subsequent locks
const lockPath = path.join(fs.mkdtempSync(path.join(getTempDir(), 'test-')), 'concurrency.lock')

let releaseLock
const releaseLockPromise = new Promise((resolve) => {
releaseLock = resolve
})

let statCalls = 0
mockStat = async (...args) => {
const value = await fs.promises.stat(...args)
if (++statCalls > 1) {
// this runs during the setInterval; release the lock so that we no longer hold it
await releaseLock('test value')
await setTimeout()
}
return value
}

let utimesCalls = 0
mockUtimes = async () => {
utimesCalls++
}

const lockPromise = withLock(lockPath, () => releaseLockPromise)
// since we unref the interval timeout, we need to wait to ensure it actually runs
await setTimeout(2000)
t.equal(await lockPromise, 'test value', 'should acquire the lock')
t.equal(utimesCalls, 0, 'should never call utimes')
})

t.test('onExit handler', async (t) => {
t.ok(onExitHandler, 'should be registered')
let rmdirSyncCalls = 0
Expand All @@ -241,8 +331,8 @@ t.test('onExit handler', async (t) => {
const lockPath = path.join(fs.mkdtempSync(path.join(getTempDir(), 'test-')), 'concurrency.lock')
// don't await it since the promise never resolves
withLock(lockPath, () => new Promise(() => {})).catch(() => {})
// ensure the lock is acquired
await setTimeout(0)
// since we unref the interval timeout, we need to wait to ensure it actually runs
await setTimeout(2000)
onExitHandler()
t.ok(rmdirSyncCalls > 0, 'should have removed outstanding locks')
})
Loading