From ddae85445eb3c4e6b7f5fc288419172b10c9ab8d Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Mon, 22 Apr 2019 18:47:11 +0800 Subject: [PATCH 01/39] feat: implement ipfs refs --- test/fixtures/test-data/refs/animals/land/african.txt | 2 ++ test/fixtures/test-data/refs/animals/land/americas.txt | 2 ++ test/fixtures/test-data/refs/animals/land/australian.txt | 2 ++ test/fixtures/test-data/refs/animals/sea/atlantic.txt | 2 ++ test/fixtures/test-data/refs/animals/sea/indian.txt | 2 ++ test/fixtures/test-data/refs/atlantic-animals | 1 + test/fixtures/test-data/refs/fruits/tropical.txt | 2 ++ test/fixtures/test-data/refs/mushroom.txt | 1 + test/utils/ipfs-exec.js | 7 ++----- 9 files changed, 16 insertions(+), 5 deletions(-) create mode 100644 test/fixtures/test-data/refs/animals/land/african.txt create mode 100644 test/fixtures/test-data/refs/animals/land/americas.txt create mode 100644 test/fixtures/test-data/refs/animals/land/australian.txt create mode 100644 test/fixtures/test-data/refs/animals/sea/atlantic.txt create mode 100644 test/fixtures/test-data/refs/animals/sea/indian.txt create mode 120000 test/fixtures/test-data/refs/atlantic-animals create mode 100644 test/fixtures/test-data/refs/fruits/tropical.txt create mode 100644 test/fixtures/test-data/refs/mushroom.txt diff --git a/test/fixtures/test-data/refs/animals/land/african.txt b/test/fixtures/test-data/refs/animals/land/african.txt new file mode 100644 index 0000000000..29decfcd50 --- /dev/null +++ b/test/fixtures/test-data/refs/animals/land/african.txt @@ -0,0 +1,2 @@ +elephant +rhinocerous \ No newline at end of file diff --git a/test/fixtures/test-data/refs/animals/land/americas.txt b/test/fixtures/test-data/refs/animals/land/americas.txt new file mode 100644 index 0000000000..21368a871d --- /dev/null +++ b/test/fixtures/test-data/refs/animals/land/americas.txt @@ -0,0 +1,2 @@ +ñandu +tapir \ No newline at end of file diff --git a/test/fixtures/test-data/refs/animals/land/australian.txt b/test/fixtures/test-data/refs/animals/land/australian.txt new file mode 100644 index 0000000000..a78c7bc9c3 --- /dev/null +++ b/test/fixtures/test-data/refs/animals/land/australian.txt @@ -0,0 +1,2 @@ +emu +kangaroo \ No newline at end of file diff --git a/test/fixtures/test-data/refs/animals/sea/atlantic.txt b/test/fixtures/test-data/refs/animals/sea/atlantic.txt new file mode 100644 index 0000000000..f77ffe5119 --- /dev/null +++ b/test/fixtures/test-data/refs/animals/sea/atlantic.txt @@ -0,0 +1,2 @@ +dolphin +whale \ No newline at end of file diff --git a/test/fixtures/test-data/refs/animals/sea/indian.txt b/test/fixtures/test-data/refs/animals/sea/indian.txt new file mode 100644 index 0000000000..c455106f6c --- /dev/null +++ b/test/fixtures/test-data/refs/animals/sea/indian.txt @@ -0,0 +1,2 @@ +cuttlefish +octopus \ No newline at end of file diff --git a/test/fixtures/test-data/refs/atlantic-animals b/test/fixtures/test-data/refs/atlantic-animals new file mode 120000 index 0000000000..670958bfa8 --- /dev/null +++ b/test/fixtures/test-data/refs/atlantic-animals @@ -0,0 +1 @@ +animals/sea/atlantic.txt \ No newline at end of file diff --git a/test/fixtures/test-data/refs/fruits/tropical.txt b/test/fixtures/test-data/refs/fruits/tropical.txt new file mode 100644 index 0000000000..4f331bc7d2 --- /dev/null +++ b/test/fixtures/test-data/refs/fruits/tropical.txt @@ -0,0 +1,2 @@ +banana +pineapple \ No newline at end of file diff --git a/test/fixtures/test-data/refs/mushroom.txt b/test/fixtures/test-data/refs/mushroom.txt new file mode 100644 index 0000000000..8b248aa9c8 --- /dev/null +++ b/test/fixtures/test-data/refs/mushroom.txt @@ -0,0 +1 @@ +mushroom \ No newline at end of file diff --git a/test/utils/ipfs-exec.js b/test/utils/ipfs-exec.js index f4be357c02..eb23d2604b 100644 --- a/test/utils/ipfs-exec.js +++ b/test/utils/ipfs-exec.js @@ -7,6 +7,7 @@ const expect = chai.expect chai.use(dirtyChai) const path = require('path') const _ = require('lodash') +const yargs = require('yargs') // This is our new test utility to easily check and execute ipfs cli commands. // @@ -33,11 +34,7 @@ module.exports = (repoPath, opts) => { })) const execute = (exec, args) => { - if (args.length === 1) { - args = args[0].split(' ') - } - - const cp = exec(args) + const cp = exec(yargs('-- ' + args[0]).argv._) const res = cp.then((res) => { // We can't escape the os.tmpdir warning due to: // https://github.com/shelljs/shelljs/blob/master/src/tempdir.js#L43 From 95a360f4533d4b6bbaca41336894df1ec37e1469 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Wed, 24 Apr 2019 14:03:46 +0800 Subject: [PATCH 02/39] feat: refs support in http api --- .../files-regular/refs-pull-stream.js | 39 +++++++++++++++++ src/http/api/resources/files-regular.js | 42 +++++++++++++++++++ 2 files changed, 81 insertions(+) diff --git a/src/core/components/files-regular/refs-pull-stream.js b/src/core/components/files-regular/refs-pull-stream.js index aec4ba8a6d..c9e81b7dae 100644 --- a/src/core/components/files-regular/refs-pull-stream.js +++ b/src/core/components/files-regular/refs-pull-stream.js @@ -178,3 +178,42 @@ function getNodeLinks (node, path = '') { } return links } + +// Get links as a DAG Object +// { : [link2, link3, link4], : [...] } +function getLinkDAG (links) { + const linkNames = {} + for (const link of links) { + linkNames[link.name] = link + } + + const linkDAG = {} + for (const link of links) { + const parentName = link.path.substring(0, link.path.lastIndexOf('/')) + linkDAG[parentName] = linkDAG[parentName] || [] + linkDAG[parentName].push(link) + } + return linkDAG +} + +// Recursively get refs for a link +function getRefs (linkDAG, link, format, uniques) { + let refs = [] + const children = linkDAG[link.path] || [] + for (const child of children) { + if (!uniques || !uniques.has(child.hash)) { + uniques && uniques.add(child.hash) + refs.push(formatLink(link, child, format)) + refs = refs.concat(getRefs(linkDAG, child, format, uniques)) + } + } + return refs +} + +// Get formatted link +function formatLink (src, dst, format) { + let out = format.replace(//g, src.hash) + out = out.replace(//g, dst.hash) + out = out.replace(//g, dst.name) + return out +} diff --git a/src/http/api/resources/files-regular.js b/src/http/api/resources/files-regular.js index 7175b17972..8887f144e9 100644 --- a/src/http/api/resources/files-regular.js +++ b/src/http/api/resources/files-regular.js @@ -316,6 +316,48 @@ exports.ls = { } } +exports.refs = { + validate: { + query: Joi.object().keys({ + r: Joi.boolean().default(false), + recursive: Joi.boolean().default(false), + format: Joi.string().default(Format.default), + e: Joi.boolean().default(false), + edges: Joi.boolean().default(false), + u: Joi.boolean().default(false), + unique: Joi.boolean().default(false), + 'max-depth': Joi.number().integer().min(-1), + maxDepth: Joi.number().integer().min(-1) + }).unknown() + }, + + // uses common parseKey method that returns a `key` + parseArgs: exports.parseKey, + + // main route handler which is called after the above `parseArgs`, but only if the args were valid + async handler (request, h) { + const { ipfs } = request.server.app + const { key } = request.pre.args + const recursive = request.query.r === 'true' || request.query.recursive === 'true' + const format = request.query.format + const e = request.query.e === 'true' || request.query.edges === 'true' + const u = request.query.u === 'true' || request.query.unique === 'true' + let maxDepth = request.query['max-depth'] || request.query.maxDepth + if (typeof maxDepth === 'string') { + maxDepth = parseInt(maxDepth) + } + + let refs + try { + refs = await ipfs.refs(key, { recursive, format, e, u, maxDepth }) + } catch (err) { + throw Boom.boomify(err, { message: 'Failed to get refs for path' }) + } + + return h.response(refs) + } +} + function toTypeCode (type) { switch (type) { case 'dir': From 22232e5c8eab3f4593376886df9abbd46409e054 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Sun, 28 Apr 2019 23:11:19 +0800 Subject: [PATCH 03/39] feat: use ipld instead of unix-fs-exporter for refs --- .../files-regular/refs-pull-stream.js | 88 +++++++++++++------ .../test-data/refs/animals/land/african.txt | 2 - .../test-data/refs/animals/land/americas.txt | 2 - .../refs/animals/land/australian.txt | 2 - .../test-data/refs/animals/sea/atlantic.txt | 2 - .../test-data/refs/animals/sea/indian.txt | 2 - test/fixtures/test-data/refs/atlantic-animals | 1 - .../test-data/refs/fruits/tropical.txt | 2 - test/fixtures/test-data/refs/mushroom.txt | 1 - 9 files changed, 59 insertions(+), 43 deletions(-) delete mode 100644 test/fixtures/test-data/refs/animals/land/african.txt delete mode 100644 test/fixtures/test-data/refs/animals/land/americas.txt delete mode 100644 test/fixtures/test-data/refs/animals/land/australian.txt delete mode 100644 test/fixtures/test-data/refs/animals/sea/atlantic.txt delete mode 100644 test/fixtures/test-data/refs/animals/sea/indian.txt delete mode 120000 test/fixtures/test-data/refs/atlantic-animals delete mode 100644 test/fixtures/test-data/refs/fruits/tropical.txt delete mode 100644 test/fixtures/test-data/refs/mushroom.txt diff --git a/src/core/components/files-regular/refs-pull-stream.js b/src/core/components/files-regular/refs-pull-stream.js index c9e81b7dae..15875eb025 100644 --- a/src/core/components/files-regular/refs-pull-stream.js +++ b/src/core/components/files-regular/refs-pull-stream.js @@ -12,6 +12,13 @@ const { Format } = require('./refs') module.exports = function (self) { return function (ipfsPath, options = {}) { + setOptionsAlias(options, [ + ['recursive', 'r'], + ['e', 'edges'], + ['u', 'unique'], + ['maxDepth', 'max-depth'] + ]) + if (options.maxDepth === 0) { return pull.empty() } @@ -179,41 +186,64 @@ function getNodeLinks (node, path = '') { return links } -// Get links as a DAG Object -// { : [link2, link3, link4], : [...] } -function getLinkDAG (links) { - const linkNames = {} - for (const link of links) { - linkNames[link.name] = link - } +// Do a depth first search of the DAG, starting from the given root cid +function objectStream (ipfs, rootCid, maxDepth, isUnique) { + const uniques = new Set() - const linkDAG = {} - for (const link of links) { - const parentName = link.path.substring(0, link.path.lastIndexOf('/')) - linkDAG[parentName] = linkDAG[parentName] || [] - linkDAG[parentName].push(link) - } - return linkDAG -} + const root = { node: { cid: rootCid }, depth: 0 } + const traverseLevel = (obj) => { + const { node, depth } = obj + + // Check the depth + const nextLevelDepth = depth + 1 + if (nextLevelDepth > maxDepth) { + return pull.empty() + } -// Recursively get refs for a link -function getRefs (linkDAG, link, format, uniques) { - let refs = [] - const children = linkDAG[link.path] || [] - for (const child of children) { - if (!uniques || !uniques.has(child.hash)) { - uniques && uniques.add(child.hash) - refs.push(formatLink(link, child, format)) - refs = refs.concat(getRefs(linkDAG, child, format, uniques)) + // If unique option is enabled, check if the CID has been seen before. + // Note we need to do this here rather than before adding to the stream + // so that the unique check happens in the order that items are examined + // in the DAG. + if (isUnique) { + if (uniques.has(node.cid.toString())) { + // Mark this object as a duplicate so we can filter it out later + obj.isDuplicate = true + return pull.empty() + } + uniques.add(node.cid.toString()) } + + const deferred = pullDefer.source() + + // Get this object's links + ipfs.object.links(node.cid, (err, links) => { + if (err) { + if (err.code === 'ERR_NOT_FOUND') { + err.message = `Could not find object with CID: ${node.cid}` + } + return deferred.resolve(pull.error(err)) + } + + // Add to the stream each link, parent and the new depth + const vals = links.map(link => ({ + parent: node, + node: link, + depth: nextLevelDepth + })) + + deferred.resolve(pull.values(vals)) + }) + + return deferred } - return refs + + return pullTraverse.depthFirst(root, traverseLevel) } // Get formatted link -function formatLink (src, dst, format) { - let out = format.replace(//g, src.hash) - out = out.replace(//g, dst.hash) - out = out.replace(//g, dst.name) +function formatLink (srcCid, dstCid, linkName, format) { + let out = format.replace(//g, srcCid.toString()) + out = out.replace(//g, dstCid.toString()) + out = out.replace(//g, linkName) return out } diff --git a/test/fixtures/test-data/refs/animals/land/african.txt b/test/fixtures/test-data/refs/animals/land/african.txt deleted file mode 100644 index 29decfcd50..0000000000 --- a/test/fixtures/test-data/refs/animals/land/african.txt +++ /dev/null @@ -1,2 +0,0 @@ -elephant -rhinocerous \ No newline at end of file diff --git a/test/fixtures/test-data/refs/animals/land/americas.txt b/test/fixtures/test-data/refs/animals/land/americas.txt deleted file mode 100644 index 21368a871d..0000000000 --- a/test/fixtures/test-data/refs/animals/land/americas.txt +++ /dev/null @@ -1,2 +0,0 @@ -ñandu -tapir \ No newline at end of file diff --git a/test/fixtures/test-data/refs/animals/land/australian.txt b/test/fixtures/test-data/refs/animals/land/australian.txt deleted file mode 100644 index a78c7bc9c3..0000000000 --- a/test/fixtures/test-data/refs/animals/land/australian.txt +++ /dev/null @@ -1,2 +0,0 @@ -emu -kangaroo \ No newline at end of file diff --git a/test/fixtures/test-data/refs/animals/sea/atlantic.txt b/test/fixtures/test-data/refs/animals/sea/atlantic.txt deleted file mode 100644 index f77ffe5119..0000000000 --- a/test/fixtures/test-data/refs/animals/sea/atlantic.txt +++ /dev/null @@ -1,2 +0,0 @@ -dolphin -whale \ No newline at end of file diff --git a/test/fixtures/test-data/refs/animals/sea/indian.txt b/test/fixtures/test-data/refs/animals/sea/indian.txt deleted file mode 100644 index c455106f6c..0000000000 --- a/test/fixtures/test-data/refs/animals/sea/indian.txt +++ /dev/null @@ -1,2 +0,0 @@ -cuttlefish -octopus \ No newline at end of file diff --git a/test/fixtures/test-data/refs/atlantic-animals b/test/fixtures/test-data/refs/atlantic-animals deleted file mode 120000 index 670958bfa8..0000000000 --- a/test/fixtures/test-data/refs/atlantic-animals +++ /dev/null @@ -1 +0,0 @@ -animals/sea/atlantic.txt \ No newline at end of file diff --git a/test/fixtures/test-data/refs/fruits/tropical.txt b/test/fixtures/test-data/refs/fruits/tropical.txt deleted file mode 100644 index 4f331bc7d2..0000000000 --- a/test/fixtures/test-data/refs/fruits/tropical.txt +++ /dev/null @@ -1,2 +0,0 @@ -banana -pineapple \ No newline at end of file diff --git a/test/fixtures/test-data/refs/mushroom.txt b/test/fixtures/test-data/refs/mushroom.txt deleted file mode 100644 index 8b248aa9c8..0000000000 --- a/test/fixtures/test-data/refs/mushroom.txt +++ /dev/null @@ -1 +0,0 @@ -mushroom \ No newline at end of file From 2dc926537010e89c6c7eaaaf6ca0229ea6d68c6e Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Tue, 30 Apr 2019 22:42:35 +0800 Subject: [PATCH 04/39] feat: refs local --- src/http/api/resources/files-regular.js | 32 ++++++++++++++++++------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/src/http/api/resources/files-regular.js b/src/http/api/resources/files-regular.js index 8887f144e9..45901ac550 100644 --- a/src/http/api/resources/files-regular.js +++ b/src/http/api/resources/files-regular.js @@ -316,6 +316,17 @@ exports.ls = { } } +function toTypeCode (type) { + switch (type) { + case 'dir': + return 1 + case 'file': + return 2 + default: + return 0 + } +} + exports.refs = { validate: { query: Joi.object().keys({ @@ -358,14 +369,19 @@ exports.refs = { } } -function toTypeCode (type) { - switch (type) { - case 'dir': - return 1 - case 'file': - return 2 - default: - return 0 +exports.refs.local = { + // main route handler + async handler (request, h) { + const { ipfs } = request.server.app + + let refs + try { + refs = await ipfs.refs.local() + } catch (err) { + throw Boom.boomify(err, { message: 'Failed to get local refs' }) + } + + return h.response(refs) } } From 659bb5ca8213bbcee641ff059bb75147a06ba636 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Wed, 1 May 2019 19:34:57 +0800 Subject: [PATCH 05/39] feat: add refs.localPullStream && refs.localReadableStream --- src/http/api/resources/files-regular.js | 68 ++++++++++++++++++------- 1 file changed, 51 insertions(+), 17 deletions(-) diff --git a/src/http/api/resources/files-regular.js b/src/http/api/resources/files-regular.js index 45901ac550..e726707c19 100644 --- a/src/http/api/resources/files-regular.js +++ b/src/http/api/resources/files-regular.js @@ -346,7 +346,7 @@ exports.refs = { parseArgs: exports.parseKey, // main route handler which is called after the above `parseArgs`, but only if the args were valid - async handler (request, h) { + handler (request, h) { const { ipfs } = request.server.app const { key } = request.pre.args const recursive = request.query.r === 'true' || request.query.recursive === 'true' @@ -358,31 +358,65 @@ exports.refs = { maxDepth = parseInt(maxDepth) } - let refs - try { - refs = await ipfs.refs(key, { recursive, format, e, u, maxDepth }) - } catch (err) { - throw Boom.boomify(err, { message: 'Failed to get refs for path' }) - } - - return h.response(refs) + const source = ipfs.refsPullStream(key, { recursive, format, e, u, maxDepth }) + return sendRefsReplyStream(request, h, `refs for ${key}`, source) } } exports.refs.local = { // main route handler - async handler (request, h) { + handler (request, h) { const { ipfs } = request.server.app + const source = ipfs.refs.localPullStream() + return sendRefsReplyStream(request, h, 'local refs', source) + } +} - let refs - try { - refs = await ipfs.refs.local() - } catch (err) { - throw Boom.boomify(err, { message: 'Failed to get local refs' }) - } +function sendRefsReplyStream (request, h, desc, source) { + const replyStream = pushable() + const aborter = abortable() + + const stream = toStream.source(pull( + replyStream, + aborter, + ndjson.serialize() + )) - return h.response(refs) + // const stream = toStream.source(replyStream.source) + // hapi is not very clever and throws if no + // - _read method + // - _readableState object + // are there :( + if (!stream._read) { + stream._read = () => {} + stream._readableState = {} + stream.unpipe = () => {} } + + pull( + source, + pull.drain( + (ref) => replyStream.push(ref), + (err) => { + if (err) { + request.raw.res.addTrailers({ + 'X-Stream-Error': JSON.stringify({ + Message: `Failed to get ${desc}: ${err.message || ''}`, + Code: 0 + }) + }) + return aborter.abort() + } + + replyStream.end() + } + ) + ) + + return h.response(stream) + .header('x-chunked-output', '1') + .header('content-type', 'application/json') + .header('Trailer', 'X-Stream-Error') } exports.refs = { From 1f40f7a29e68a8440ebd04500959f371b5355156 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Fri, 3 May 2019 23:00:24 +0800 Subject: [PATCH 06/39] feat: make object.links work with CBOR --- src/core/components/object.js | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/core/components/object.js b/src/core/components/object.js index 9db82771eb..530877ae1a 100644 --- a/src/core/components/object.js +++ b/src/core/components/object.js @@ -107,6 +107,22 @@ function findLinks (node, links = []) { return links } +// Recursively search the node for CIDs +function getNodeLinks (node, path = '') { + let links = [] + for (const [name, value] of Object.entries(node)) { + if (CID.isCID(value)) { + links.push({ + name: path + name, + cid: value + }) + } else if (typeof value === 'object') { + links = links.concat(getNodeLinks(value, path + name + '/')) + } + } + return links +} + module.exports = function object (self) { function editAndSave (edit) { return (multihash, options, callback) => { From 7be97252a9e5ba4bdca69bd7e50f56a285819430 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Fri, 3 May 2019 23:03:03 +0800 Subject: [PATCH 07/39] feat: handle multiple refs. Better param handling --- .../files-regular/refs-pull-stream.js | 7 ------ src/http/api/resources/files-regular.js | 22 +++++++------------ 2 files changed, 8 insertions(+), 21 deletions(-) diff --git a/src/core/components/files-regular/refs-pull-stream.js b/src/core/components/files-regular/refs-pull-stream.js index 15875eb025..49953c3653 100644 --- a/src/core/components/files-regular/refs-pull-stream.js +++ b/src/core/components/files-regular/refs-pull-stream.js @@ -12,13 +12,6 @@ const { Format } = require('./refs') module.exports = function (self) { return function (ipfsPath, options = {}) { - setOptionsAlias(options, [ - ['recursive', 'r'], - ['e', 'edges'], - ['u', 'unique'], - ['maxDepth', 'max-depth'] - ]) - if (options.maxDepth === 0) { return pull.empty() } diff --git a/src/http/api/resources/files-regular.js b/src/http/api/resources/files-regular.js index e726707c19..add12b9496 100644 --- a/src/http/api/resources/files-regular.js +++ b/src/http/api/resources/files-regular.js @@ -330,15 +330,11 @@ function toTypeCode (type) { exports.refs = { validate: { query: Joi.object().keys({ - r: Joi.boolean().default(false), recursive: Joi.boolean().default(false), format: Joi.string().default(Format.default), - e: Joi.boolean().default(false), edges: Joi.boolean().default(false), - u: Joi.boolean().default(false), unique: Joi.boolean().default(false), - 'max-depth': Joi.number().integer().min(-1), - maxDepth: Joi.number().integer().min(-1) + 'max-depth': Joi.number().integer().min(-1) }).unknown() }, @@ -349,16 +345,14 @@ exports.refs = { handler (request, h) { const { ipfs } = request.server.app const { key } = request.pre.args - const recursive = request.query.r === 'true' || request.query.recursive === 'true' + + const recursive = request.query.recursive const format = request.query.format - const e = request.query.e === 'true' || request.query.edges === 'true' - const u = request.query.u === 'true' || request.query.unique === 'true' - let maxDepth = request.query['max-depth'] || request.query.maxDepth - if (typeof maxDepth === 'string') { - maxDepth = parseInt(maxDepth) - } + const edges = request.query.edges + const unique = request.query.unique + const maxDepth = request.query['max-depth'] - const source = ipfs.refsPullStream(key, { recursive, format, e, u, maxDepth }) + const source = ipfs.refsPullStream(key, { recursive, format, edges, unique, maxDepth }) return sendRefsReplyStream(request, h, `refs for ${key}`, source) } } @@ -396,7 +390,7 @@ function sendRefsReplyStream (request, h, desc, source) { pull( source, pull.drain( - (ref) => replyStream.push(ref), + (ref) => replyStream.push({ Ref: ref.ref, Err: ref.err }), (err) => { if (err) { request.raw.res.addTrailers({ From 6c9ae7de9c1102d86c5e7f5d1144650029aaab4d Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Wed, 8 May 2019 17:31:43 +0800 Subject: [PATCH 08/39] feat: GC --- src/cli/commands/repo/gc.js | 31 ++++++-- src/core/components/gc.js | 150 ++++++++++++++++++++++++++++++++++++ src/core/components/repo.js | 7 +- 3 files changed, 177 insertions(+), 11 deletions(-) create mode 100644 src/core/components/gc.js diff --git a/src/cli/commands/repo/gc.js b/src/cli/commands/repo/gc.js index ec3b547e93..0f7bfff29a 100644 --- a/src/cli/commands/repo/gc.js +++ b/src/cli/commands/repo/gc.js @@ -1,16 +1,37 @@ 'use strict' +const { print } = require('../../utils') + module.exports = { command: 'gc', describe: 'Perform a garbage collection sweep on the repo.', - builder: {}, + builder: { + quiet: { + alias: 'q', + desc: 'Write minimal output', + type: 'boolean', + default: false + }, + 'stream-errors': { + desc: 'Output individual errors thrown when deleting blocks.', + type: 'boolean', + default: false + } + }, - handler (argv) { - argv.resolve((async () => { - const ipfs = await argv.getIpfs() - await ipfs.repo.gc() + handler ({ getIpfs, quiet, streamErrors, resolve }) { + resolve((async () => { + const ipfs = await getIpfs() + const res = await ipfs.repo.gc() + for (const r of res) { + if (res.err) { + streamErrors && print(res.err, true, true) + } else { + print((quiet ? '' : 'Removed ') + r.cid) + } + } })()) } } diff --git a/src/core/components/gc.js b/src/core/components/gc.js new file mode 100644 index 0000000000..5f93530c04 --- /dev/null +++ b/src/core/components/gc.js @@ -0,0 +1,150 @@ +'use strict' + +const promisify = require('promisify-es6') +const CID = require('cids') +const base32 = require('base32.js') +const parallel = require('async/parallel') +const map = require('async/map') + +module.exports = function gc (self) { + return promisify(async (opts, callback) => { + if (typeof opts === 'function') { + callback = opts + opts = {} + } + + const start = Date.now() + self.log(`GC: Creating set of marked blocks`) + + parallel([ + // Get all blocks from the blockstore + (cb) => self._repo.blocks.query({ keysOnly: true }, cb), + // Mark all blocks that are being used + (cb) => createColoredSet(self, cb) + ], (err, [blocks, coloredSet]) => { + if (err) { + self.log(`GC: Error - ${err.message}`) + return callback(err) + } + + // Delete blocks that are not being used + deleteUnmarkedBlocks(self, coloredSet, blocks, start, (err, res) => { + err && self.log(`GC: Error - ${err.message}`) + callback(err, res) + }) + }) + }) +} + +// TODO: make global constants +const { Key } = require('interface-datastore') +const pinDataStoreKey = new Key('/local/pins') +const MFS_ROOT_KEY = new Key('/local/filesroot') + +function createColoredSet (ipfs, callback) { + parallel([ + // "Empty block" used by the pinner + (cb) => cb(null, ['QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n']), + + // All pins, direct and indirect + (cb) => ipfs.pin.ls((err, pins) => { + if (err) { + return cb(new Error(`Could not list pinned blocks: ${err.message}`)) + } + ipfs.log(`GC: Found ${pins.length} pinned blocks`) + cb(null, pins.map(p => p.hash)) + }), + + // Blocks used internally by the pinner + (cb) => ipfs._repo.datastore.get(pinDataStoreKey, (err, mh) => { + if (err) { + if (err.code === 'ERR_NOT_FOUND') { + ipfs.log(`GC: No pinned blocks`) + return cb(null, []) + } + return cb(new Error(`Could not get pin sets root from datastore: ${err.message}`)) + } + + const cid = new CID(mh) + ipfs.dag.get(cid, '', { preload: false }, (err, obj) => { + // TODO: Handle not found? + if (err) { + return cb(new Error(`Could not get pin sets from store: ${err.message}`)) + } + + // The pinner stores an object that has two links to pin sets: + // 1. The directly pinned CIDs + // 2. The recursively pinned CIDs + cb(null, [cid.toString(), ...obj.value.links.map(l => l.cid.toString())]) + }) + }), + + // The MFS root and all its descendants + (cb) => ipfs._repo.datastore.get(MFS_ROOT_KEY, (err, mh) => { + if (err) { + if (err.code === 'ERR_NOT_FOUND') { + ipfs.log(`GC: No blocks in MFS`) + return cb(null, []) + } + return cb(new Error(`Could not get MFS root from datastore: ${err.message}`)) + } + + getDescendants(ipfs, new CID(mh), cb) + }) + ], (err, res) => callback(err, !err && new Set(res.flat()))) +} + +function getDescendants (ipfs, cid, callback) { + // TODO: Make sure we don't go out to the network + ipfs.refs(cid, { recursive: true }, (err, refs) => { + if (err) { + return callback(new Error(`Could not get MFS root descendants from store: ${err.message}`)) + } + ipfs.log(`GC: Found ${refs.length} MFS blocks`) + callback(null, [cid.toString(), ...refs.map(r => r.ref)]) + }) +} + +function deleteUnmarkedBlocks (ipfs, coloredSet, blocks, start, callback) { + // Iterate through all blocks and find those that are not in the marked set + // The blocks variable has the form { { key: Key() }, { key: Key() }, ... } + const unreferenced = [] + const res = [] + for (const { key: k } of blocks) { + try { + const cid = dsKeyToCid(k) + if (!coloredSet.has(cid.toString())) { + unreferenced.push(cid) + } + } catch (err) { + res.push({ err: new Error(`Could not convert block with key '${k}' to CID: ${err.message}`) }) + } + } + + const msg = `GC: Marked set has ${coloredSet.size} blocks. Blockstore has ${blocks.length} blocks. ` + + `Deleting ${unreferenced.length} blocks.` + ipfs.log(msg) + + // TODO: limit concurrency + map(unreferenced, (cid, cb) => { + // Delete blocks from blockstore + ipfs._repo.blocks.delete(cid, (err) => { + const res = { + cid: cid.toString(), + err: err && new Error(`Could not delete block with CID ${cid}: ${err.message}`) + } + cb(null, res) + }) + }, (_, delRes) => { + ipfs.log(`GC: Complete (${Date.now() - start}ms)`) + + callback(null, res.concat(delRes)) + }) +} + +function dsKeyToCid (key) { + // Block key is of the form / + const decoder = new base32.Decoder() + const buff = decoder.write(key.toString().slice(1)).finalize() + return new CID(buff) +} diff --git a/src/core/components/repo.js b/src/core/components/repo.js index 23116d8cf5..d88ad887a3 100644 --- a/src/core/components/repo.js +++ b/src/core/components/repo.js @@ -39,12 +39,7 @@ module.exports = function repo (self) { }), gc: promisify((options, callback) => { - if (typeof options === 'function') { - callback = options - options = {} - } - - callback(new Error('Not implemented')) + require('./gc')(self)(options, callback) }), stat: promisify((options, callback) => { From 9aae8b45a97de31abd6f6e1de4058f86b75b388a Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Wed, 8 May 2019 23:08:05 +0800 Subject: [PATCH 09/39] chore: add comment to explain cli param parsing --- test/utils/ipfs-exec.js | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/utils/ipfs-exec.js b/test/utils/ipfs-exec.js index eb23d2604b..92f3e0f4e0 100644 --- a/test/utils/ipfs-exec.js +++ b/test/utils/ipfs-exec.js @@ -34,6 +34,9 @@ module.exports = (repoPath, opts) => { })) const execute = (exec, args) => { + // Adding '--' at the front of the command allows us to parse commands that + // have a parameter with spaces in it, eg + // ipfs refs --format=" -> " const cp = exec(yargs('-- ' + args[0]).argv._) const res = cp.then((res) => { // We can't escape the os.tmpdir warning due to: From ee8edb32217c4e80187f7fdd355feb25d8ed6fa1 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Fri, 10 May 2019 03:48:31 +0800 Subject: [PATCH 10/39] refactor: move links retrieval from object to refs --- .../files-regular/refs-pull-stream.js | 32 +++++++++++++++---- src/core/components/object.js | 20 +++--------- 2 files changed, 29 insertions(+), 23 deletions(-) diff --git a/src/core/components/files-regular/refs-pull-stream.js b/src/core/components/files-regular/refs-pull-stream.js index 49953c3653..8447192ae0 100644 --- a/src/core/components/files-regular/refs-pull-stream.js +++ b/src/core/components/files-regular/refs-pull-stream.js @@ -209,7 +209,7 @@ function objectStream (ipfs, rootCid, maxDepth, isUnique) { const deferred = pullDefer.source() // Get this object's links - ipfs.object.links(node.cid, (err, links) => { + getLinks(ipfs, node.cid, (err, links) => { if (err) { if (err.code === 'ERR_NOT_FOUND') { err.message = `Could not find object with CID: ${node.cid}` @@ -233,10 +233,28 @@ function objectStream (ipfs, rootCid, maxDepth, isUnique) { return pullTraverse.depthFirst(root, traverseLevel) } -// Get formatted link -function formatLink (srcCid, dstCid, linkName, format) { - let out = format.replace(//g, srcCid.toString()) - out = out.replace(//g, dstCid.toString()) - out = out.replace(//g, linkName) - return out +// Fetch a node from IPLD then get all its links +function getLinks (ipfs, cid, callback) { + ipfs._ipld.get(new CID(cid), (err, node) => { + if (err) { + return callback(err) + } + callback(null, node.value.links || getNodeLinks(node.value)) + }) +} + +// Recursively search the node for CIDs +function getNodeLinks (node, path = '') { + let links = [] + for (const [name, value] of Object.entries(node)) { + if (CID.isCID(value)) { + links.push({ + name: path + name, + cid: value + }) + } else if (typeof value === 'object') { + links = links.concat(getNodeLinks(value, path + name + '/')) + } + } + return links } diff --git a/src/core/components/object.js b/src/core/components/object.js index 530877ae1a..36b1701768 100644 --- a/src/core/components/object.js +++ b/src/core/components/object.js @@ -107,22 +107,6 @@ function findLinks (node, links = []) { return links } -// Recursively search the node for CIDs -function getNodeLinks (node, path = '') { - let links = [] - for (const [name, value] of Object.entries(node)) { - if (CID.isCID(value)) { - links.push({ - name: path + name, - cid: value - }) - } else if (typeof value === 'object') { - links = links.concat(getNodeLinks(value, path + name + '/')) - } - } - return links -} - module.exports = function object (self) { function editAndSave (edit) { return (multihash, options, callback) => { @@ -338,6 +322,7 @@ module.exports = function object (self) { return callback(err) } +<<<<<<< HEAD if (cid.codec === 'raw') { return callback(null, []) } @@ -353,6 +338,9 @@ module.exports = function object (self) { } callback(new Error(`Cannot resolve links from codec ${cid.codec}`)) +======= + callback(null, node.links) +>>>>>>> refactor: move links retrieval from object to refs }) }), From 40ae451096c0f43a5f727161c99f6b8cddd7d2a4 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Fri, 10 May 2019 08:29:54 -0400 Subject: [PATCH 11/39] feat: expose GC to http api --- src/http/api/resources/repo.js | 21 ++++++++++++++++---- src/http/api/routes/repo.js | 8 ++++++++ test/cli/repo.js | 35 ++++++++++++++++++++++++++++++++++ 3 files changed, 60 insertions(+), 4 deletions(-) diff --git a/src/http/api/resources/repo.js b/src/http/api/resources/repo.js index 998431111a..1bf14d3a1f 100644 --- a/src/http/api/resources/repo.js +++ b/src/http/api/resources/repo.js @@ -1,9 +1,22 @@ 'use strict' -exports.gc = async (request, h) => { - const { ipfs } = request.server.app - await ipfs.repo.gc() - return h.response() +const Joi = require('joi') + +exports.gc = { + validate: { + query: Joi.object().keys({ + quiet: Joi.boolean().default(false), + 'stream-errors': Joi.boolean().default(false) + }).unknown() + }, + + async handler (request, h) { + const quiet = request.query.quiet + const streamErrors = request.query['stream-errors'] + const { ipfs } = request.server.app + const res = await ipfs.repo.gc({ quiet, streamErrors }) + return h.response(res.map(r => ({ Err: r.err, Key: { '/': r.cid } }))) + } } exports.version = async (request, h) => { diff --git a/src/http/api/routes/repo.js b/src/http/api/routes/repo.js index 21b306e51e..5f2212385c 100644 --- a/src/http/api/routes/repo.js +++ b/src/http/api/routes/repo.js @@ -12,6 +12,14 @@ module.exports = [ method: '*', path: '/api/v0/repo/stat', handler: resources.repo.stat + }, + { + method: '*', + path: '/api/v0/repo/gc', + options: { + validate: resources.repo.gc.validate + }, + handler: resources.repo.gc.handler } // TODO: implement the missing spec https://github.com/ipfs/interface-ipfs-core/blob/master/SPEC/REPO.md ] diff --git a/test/cli/repo.js b/test/cli/repo.js index 17c04aaaa3..cbe7acd42b 100644 --- a/test/cli/repo.js +++ b/test/cli/repo.js @@ -2,7 +2,12 @@ 'use strict' const expect = require('chai').expect +const fs = require('fs') +const os = require('os') +const path = require('path') const repoVersion = require('ipfs-repo').repoVersion +const hat = require('hat') +const clean = require('../utils/clean') const runOnAndOff = require('../utils/on-and-off') @@ -18,4 +23,34 @@ describe('repo', () => runOnAndOff((thing) => { expect(out).to.eql(`${repoVersion}\n`) }) }) + + // Note: There are more comprehensive GC tests in interface-js-ipfs-core + it('should run garbage collection', async () => { + // Create and add a file to IPFS + const filePath = path.join(os.tmpdir(), hat()) + const content = String(Math.random()) + fs.writeFileSync(filePath, content) + const cid = (await ipfs(`add -Q ${filePath}`)).trim() + + // File hash should be in refs local + const localRefs = await ipfs('refs local') + expect(localRefs.split('\n')).includes(cid) + + // Run GC, file should not have been removed because it's pinned + const gcOut = await ipfs('repo gc') + expect(gcOut.split('\n')).not.includes('Removed ' + cid) + + // Unpin file + await ipfs('pin rm ' + cid) + + // Run GC, file should now be removed + const gcOutAfterUnpin = await ipfs('repo gc') + expect(gcOutAfterUnpin.split('\n')).to.includes('Removed ' + cid) + + const localRefsAfterGc = await ipfs('refs local') + expect(localRefsAfterGc.split('\n')).not.includes(cid) + + // Clean up file + await clean(filePath) + }) })) From 072ddfd632c8b4429a0efed89b79d52ce732ce1f Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Fri, 10 May 2019 08:33:37 -0400 Subject: [PATCH 12/39] test: unskip repo gc test --- test/core/interface.spec.js | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/test/core/interface.spec.js b/test/core/interface.spec.js index df572c19e2..bade8cafcd 100644 --- a/test/core/interface.spec.js +++ b/test/core/interface.spec.js @@ -172,15 +172,7 @@ describe('interface-ipfs-core tests', function () { } }) - tests.repo(defaultCommonFactory, { - skip: [ - // repo.gc - { - name: 'gc', - reason: 'TODO: repo.gc is not implemented in js-ipfs yet!' - } - ] - }) + tests.repo(defaultCommonFactory) tests.stats(defaultCommonFactory) From 732a02c7fd96c687507d914ee834eb32e7b56168 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Fri, 10 May 2019 09:58:31 -0400 Subject: [PATCH 13/39] fix: refactor and fix some bugs with GC --- src/cli/commands/repo/gc.js | 4 +-- src/core/components/gc.js | 64 ++++++++++++++++++---------------- src/core/components/repo.js | 6 ++-- src/http/api/resources/repo.js | 11 +++--- test/cli/repo.js | 18 +++------- 5 files changed, 51 insertions(+), 52 deletions(-) diff --git a/src/cli/commands/repo/gc.js b/src/cli/commands/repo/gc.js index 0f7bfff29a..d45f4fdd07 100644 --- a/src/cli/commands/repo/gc.js +++ b/src/cli/commands/repo/gc.js @@ -26,8 +26,8 @@ module.exports = { const ipfs = await getIpfs() const res = await ipfs.repo.gc() for (const r of res) { - if (res.err) { - streamErrors && print(res.err, true, true) + if (r.err) { + streamErrors && print(r.err, true, true) } else { print((quiet ? '' : 'Removed ') + r.cid) } diff --git a/src/core/components/gc.js b/src/core/components/gc.js index 5f93530c04..81411a9772 100644 --- a/src/core/components/gc.js +++ b/src/core/components/gc.js @@ -4,15 +4,17 @@ const promisify = require('promisify-es6') const CID = require('cids') const base32 = require('base32.js') const parallel = require('async/parallel') -const map = require('async/map') +const mapLimit = require('async/mapLimit') +const { Key } = require('interface-datastore') -module.exports = function gc (self) { - return promisify(async (opts, callback) => { - if (typeof opts === 'function') { - callback = opts - opts = {} - } +// Limit on the number of parallel block remove operations +const BLOCK_RM_CONCURRENCY = 256 +const PIN_DS_KEY = new Key('/local/pins') +const MFS_ROOT_DS_KEY = new Key('/local/filesroot') +// Perform mark and sweep garbage collection +module.exports = function gc (self) { + return promisify(async (callback) => { const start = Date.now() self.log(`GC: Creating set of marked blocks`) @@ -20,15 +22,15 @@ module.exports = function gc (self) { // Get all blocks from the blockstore (cb) => self._repo.blocks.query({ keysOnly: true }, cb), // Mark all blocks that are being used - (cb) => createColoredSet(self, cb) - ], (err, [blocks, coloredSet]) => { + (cb) => createMarkedSet(self, cb) + ], (err, [blocks, markedSet]) => { if (err) { self.log(`GC: Error - ${err.message}`) return callback(err) } // Delete blocks that are not being used - deleteUnmarkedBlocks(self, coloredSet, blocks, start, (err, res) => { + deleteUnmarkedBlocks(self, markedSet, blocks, start, (err, res) => { err && self.log(`GC: Error - ${err.message}`) callback(err, res) }) @@ -36,15 +38,11 @@ module.exports = function gc (self) { }) } -// TODO: make global constants -const { Key } = require('interface-datastore') -const pinDataStoreKey = new Key('/local/pins') -const MFS_ROOT_KEY = new Key('/local/filesroot') - -function createColoredSet (ipfs, callback) { +// Get Set of CIDs of blocks to keep +function createMarkedSet (ipfs, callback) { parallel([ // "Empty block" used by the pinner - (cb) => cb(null, ['QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n']), + (cb) => cb(null, [new CID('QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n')]), // All pins, direct and indirect (cb) => ipfs.pin.ls((err, pins) => { @@ -52,11 +50,11 @@ function createColoredSet (ipfs, callback) { return cb(new Error(`Could not list pinned blocks: ${err.message}`)) } ipfs.log(`GC: Found ${pins.length} pinned blocks`) - cb(null, pins.map(p => p.hash)) + cb(null, pins.map(p => new CID(p.hash))) }), // Blocks used internally by the pinner - (cb) => ipfs._repo.datastore.get(pinDataStoreKey, (err, mh) => { + (cb) => ipfs._repo.datastore.get(PIN_DS_KEY, (err, mh) => { if (err) { if (err.code === 'ERR_NOT_FOUND') { ipfs.log(`GC: No pinned blocks`) @@ -67,7 +65,6 @@ function createColoredSet (ipfs, callback) { const cid = new CID(mh) ipfs.dag.get(cid, '', { preload: false }, (err, obj) => { - // TODO: Handle not found? if (err) { return cb(new Error(`Could not get pin sets from store: ${err.message}`)) } @@ -75,12 +72,12 @@ function createColoredSet (ipfs, callback) { // The pinner stores an object that has two links to pin sets: // 1. The directly pinned CIDs // 2. The recursively pinned CIDs - cb(null, [cid.toString(), ...obj.value.links.map(l => l.cid.toString())]) + cb(null, [cid, ...obj.value.links.map(l => l.cid)]) }) }), // The MFS root and all its descendants - (cb) => ipfs._repo.datastore.get(MFS_ROOT_KEY, (err, mh) => { + (cb) => ipfs._repo.datastore.get(MFS_ROOT_DS_KEY, (err, mh) => { if (err) { if (err.code === 'ERR_NOT_FOUND') { ipfs.log(`GC: No blocks in MFS`) @@ -91,21 +88,29 @@ function createColoredSet (ipfs, callback) { getDescendants(ipfs, new CID(mh), cb) }) - ], (err, res) => callback(err, !err && new Set(res.flat()))) + ], (err, res) => { + if (err) { + return callback(err) + } + + const cids = res.flat().map(cid => cid.toV1().toString('base32')) + return callback(null, new Set(cids)) + }) } +// Recursively get descendants of the given CID function getDescendants (ipfs, cid, callback) { - // TODO: Make sure we don't go out to the network ipfs.refs(cid, { recursive: true }, (err, refs) => { if (err) { return callback(new Error(`Could not get MFS root descendants from store: ${err.message}`)) } ipfs.log(`GC: Found ${refs.length} MFS blocks`) - callback(null, [cid.toString(), ...refs.map(r => r.ref)]) + callback(null, [cid, ...refs.map(r => new CID(r.ref))]) }) } -function deleteUnmarkedBlocks (ipfs, coloredSet, blocks, start, callback) { +// Delete all blocks that are not marked as in use +function deleteUnmarkedBlocks (ipfs, markedSet, blocks, start, callback) { // Iterate through all blocks and find those that are not in the marked set // The blocks variable has the form { { key: Key() }, { key: Key() }, ... } const unreferenced = [] @@ -113,7 +118,7 @@ function deleteUnmarkedBlocks (ipfs, coloredSet, blocks, start, callback) { for (const { key: k } of blocks) { try { const cid = dsKeyToCid(k) - if (!coloredSet.has(cid.toString())) { + if (!markedSet.has(cid.toV1().toString('base32'))) { unreferenced.push(cid) } } catch (err) { @@ -121,12 +126,11 @@ function deleteUnmarkedBlocks (ipfs, coloredSet, blocks, start, callback) { } } - const msg = `GC: Marked set has ${coloredSet.size} blocks. Blockstore has ${blocks.length} blocks. ` + + const msg = `GC: Marked set has ${markedSet.size} blocks. Blockstore has ${blocks.length} blocks. ` + `Deleting ${unreferenced.length} blocks.` ipfs.log(msg) - // TODO: limit concurrency - map(unreferenced, (cid, cb) => { + mapLimit(unreferenced, BLOCK_RM_CONCURRENCY, (cid, cb) => { // Delete blocks from blockstore ipfs._repo.blocks.delete(cid, (err) => { const res = { diff --git a/src/core/components/repo.js b/src/core/components/repo.js index d88ad887a3..5b0914aade 100644 --- a/src/core/components/repo.js +++ b/src/core/components/repo.js @@ -4,6 +4,8 @@ const promisify = require('promisify-es6') const repoVersion = require('ipfs-repo').repoVersion module.exports = function repo (self) { + const gc = require('./gc')(self) + return { init: (bits, empty, callback) => { // 1. check if repo already exists @@ -38,8 +40,8 @@ module.exports = function repo (self) { }) }), - gc: promisify((options, callback) => { - require('./gc')(self)(options, callback) + gc: promisify((callback) => { + gc(callback) }), stat: promisify((options, callback) => { diff --git a/src/http/api/resources/repo.js b/src/http/api/resources/repo.js index 1bf14d3a1f..9ca4734a1c 100644 --- a/src/http/api/resources/repo.js +++ b/src/http/api/resources/repo.js @@ -5,17 +5,20 @@ const Joi = require('joi') exports.gc = { validate: { query: Joi.object().keys({ - quiet: Joi.boolean().default(false), 'stream-errors': Joi.boolean().default(false) }).unknown() }, async handler (request, h) { - const quiet = request.query.quiet const streamErrors = request.query['stream-errors'] const { ipfs } = request.server.app - const res = await ipfs.repo.gc({ quiet, streamErrors }) - return h.response(res.map(r => ({ Err: r.err, Key: { '/': r.cid } }))) + const res = await ipfs.repo.gc() + + const filtered = res.filter(r => !r.err || streamErrors) + const response = filtered.map(r => { + return { Err: r.err, Key: !r.err && { '/': r.cid } } + }) + return h.response(response) } } diff --git a/test/cli/repo.js b/test/cli/repo.js index cbe7acd42b..9fd5d8f19d 100644 --- a/test/cli/repo.js +++ b/test/cli/repo.js @@ -2,15 +2,11 @@ 'use strict' const expect = require('chai').expect -const fs = require('fs') -const os = require('os') -const path = require('path') const repoVersion = require('ipfs-repo').repoVersion -const hat = require('hat') -const clean = require('../utils/clean') - const runOnAndOff = require('../utils/on-and-off') +const fixturePath = 'test/fixtures/planets/solar-system.md' + describe('repo', () => runOnAndOff((thing) => { let ipfs @@ -26,11 +22,8 @@ describe('repo', () => runOnAndOff((thing) => { // Note: There are more comprehensive GC tests in interface-js-ipfs-core it('should run garbage collection', async () => { - // Create and add a file to IPFS - const filePath = path.join(os.tmpdir(), hat()) - const content = String(Math.random()) - fs.writeFileSync(filePath, content) - const cid = (await ipfs(`add -Q ${filePath}`)).trim() + // Add a file to IPFS + const cid = (await ipfs(`add -Q ${fixturePath}`)).trim() // File hash should be in refs local const localRefs = await ipfs('refs local') @@ -49,8 +42,5 @@ describe('repo', () => runOnAndOff((thing) => { const localRefsAfterGc = await ipfs('refs local') expect(localRefsAfterGc.split('\n')).not.includes(cid) - - // Clean up file - await clean(filePath) }) })) From ff289238efeb42a48f26b221c9490411bf3c79ca Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Mon, 20 May 2019 11:47:58 -0400 Subject: [PATCH 14/39] feat: GC locking --- src/core/components/block.js | 23 +-- .../files-regular/add-pull-stream.js | 6 +- src/core/components/gc-lock.js | 102 +++++++++++ src/core/components/gc.js | 36 ++-- src/core/components/object.js | 32 ++-- src/core/components/pin.js | 168 +++++++++--------- src/core/components/repo.js | 6 +- src/core/index.js | 2 + 8 files changed, 244 insertions(+), 131 deletions(-) create mode 100644 src/core/components/gc-lock.js diff --git a/src/core/components/block.js b/src/core/components/block.js index 6a1bb960d1..f5a9f610ed 100644 --- a/src/core/components/block.js +++ b/src/core/components/block.js @@ -81,17 +81,19 @@ module.exports = function block (self) { cb(null, new Block(block, cid)) }) }, - (block, cb) => self._blockService.put(block, (err) => { - if (err) { - return cb(err) - } + (block, cb) => self._gcLock.writeLock((_cb) => { + self._blockService.put(block, (err) => { + if (err) { + return _cb(err) + } - if (options.preload !== false) { - self._preload(block.cid) - } + if (options.preload !== false) { + self._preload(block.cid) + } - cb(null, block) - }) + _cb(null, block) + }) + }, cb) ], callback) }), rm: promisify((cid, callback) => { @@ -100,7 +102,8 @@ module.exports = function block (self) { } catch (err) { return setImmediate(() => callback(errCode(err, 'ERR_INVALID_CID'))) } - self._blockService.delete(cid, callback) + + self._gcLock.readLock((cb) => self._blockService.delete(cid, cb), callback) }), stat: promisify((cid, options, callback) => { if (typeof options === 'function') { diff --git a/src/core/components/files-regular/add-pull-stream.js b/src/core/components/files-regular/add-pull-stream.js index c976fcf5df..559a3dc951 100644 --- a/src/core/components/files-regular/add-pull-stream.js +++ b/src/core/components/files-regular/add-pull-stream.js @@ -152,7 +152,9 @@ module.exports = function (self) { } opts.progress = progress - return pull( + + + return self._gcLock.pullReadLock(() => pull( pullMap(content => normalizeContent(content, opts)), pullFlatten(), pullMap(file => ({ @@ -163,6 +165,6 @@ module.exports = function (self) { pullAsyncMap((file, cb) => prepareFile(file, self, opts, cb)), pullMap(file => preloadFile(file, self, opts)), pullAsyncMap((file, cb) => pinFile(file, self, opts, cb)) - ) + )) } } diff --git a/src/core/components/gc-lock.js b/src/core/components/gc-lock.js new file mode 100644 index 0000000000..0d3b5ceca5 --- /dev/null +++ b/src/core/components/gc-lock.js @@ -0,0 +1,102 @@ +'use strict' + +const mortice = require('mortice') +const pull = require('pull-stream') +const log = require('debug')('ipfs:repo:gc:lock') + +class GCLock { + constructor () { + this.mutex = mortice() + } + + readLock (lockedFn, cb) { + return this.lock('readLock', lockedFn, cb) + } + + writeLock (lockedFn, cb) { + return this.lock('writeLock', lockedFn, cb) + } + + lock (type, lockedFn, cb) { + log(`${type} requested`) + const locked = () => new Promise((resolve, reject) => { + log(`${type} started`) + lockedFn((err, res) => err ? reject(err) : resolve(res)) + }) + + const lock = this.mutex[type](locked) + return lock.then(res => cb(null, res)).catch(cb).finally(() => { + log(`${type} released`) + }) + } + + pullReadLock (lockedPullFn) { + return this.pullLock('readLock', lockedPullFn) + } + + pullWriteLock (lockedPullFn) { + return this.pullLock('writeLock', lockedPullFn) + } + + pullLock (type, lockedPullFn) { + const pullLocker = new PullLocker(this.mutex, type) + + return pull( + pullLocker.take(), + lockedPullFn(), + pullLocker.release() + ) + } +} + +class PullLocker { + constructor (mutex, type) { + this.mutex = mutex + this.type = type + + // This Promise resolves when the mutex gives us permission to start + // running the locked piece of code + this.lockReady = new Promise((resolve) => { + this.lockReadyResolver = resolve + }) + } + + // Returns a Promise that resolves when the locked piece of code completes + locked () { + return new Promise((resolve) => { + this.releaseLock = resolve + log(`${this.type} (pull) started`) + + // The locked piece of code is ready to start, so resolve the + // this.lockReady Promise (created in the constructor) + this.lockReadyResolver() + }) + } + + // Requests a lock and then waits for the mutex to give us permission to run + // the locked piece of code + take () { + return pull( + pull.asyncMap((i, cb) => { + if (!this.lock) { + log(`${this.type} (pull) requested`) + // Request the lock + this.lock = this.mutex[this.type](() => this.locked()) + } + + // Wait for the mutex to give us permission + this.lockReady.then(() => cb(null, i)) + }) + ) + } + + // Releases the lock + release () { + return pull.through(null, () => { + log(`${this.type} (pull) released`) + this.releaseLock() + }) + } +} + +module.exports = GCLock diff --git a/src/core/components/gc.js b/src/core/components/gc.js index 81411a9772..c2abe39c5a 100644 --- a/src/core/components/gc.js +++ b/src/core/components/gc.js @@ -18,23 +18,28 @@ module.exports = function gc (self) { const start = Date.now() self.log(`GC: Creating set of marked blocks`) - parallel([ - // Get all blocks from the blockstore - (cb) => self._repo.blocks.query({ keysOnly: true }, cb), - // Mark all blocks that are being used - (cb) => createMarkedSet(self, cb) - ], (err, [blocks, markedSet]) => { - if (err) { - self.log(`GC: Error - ${err.message}`) - return callback(err) - } + self._gcLock.writeLock((lockCb) => { + parallel([ + // Get all blocks from the blockstore + (cb) => self._repo.blocks.query({ keysOnly: true }, cb), + // Mark all blocks that are being used + (cb) => createMarkedSet(self, cb) + ], (err, [blocks, markedSet]) => { + if (err) { + self.log(`GC: Error - ${err.message}`) + return lockCb(err) + } - // Delete blocks that are not being used - deleteUnmarkedBlocks(self, markedSet, blocks, start, (err, res) => { - err && self.log(`GC: Error - ${err.message}`) - callback(err, res) + // Delete blocks that are not being used + deleteUnmarkedBlocks(self, markedSet, blocks, start, (err, res) => { + if (err) { + self.log(`GC: Error - ${err.message}`) + return lockCb(err) + } + lockCb(null, res) + }) }) - }) + }, callback) }) } @@ -42,6 +47,7 @@ module.exports = function gc (self) { function createMarkedSet (ipfs, callback) { parallel([ // "Empty block" used by the pinner + // TODO: This CID is replicated in pin.js (cb) => cb(null, [new CID('QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n')]), // All pins, direct and indirect diff --git a/src/core/components/object.js b/src/core/components/object.js index 36b1701768..5295a76ec8 100644 --- a/src/core/components/object.js +++ b/src/core/components/object.js @@ -242,19 +242,21 @@ module.exports = function object (self) { } function next () { - self._ipld.put(node, multicodec.DAG_PB, { - cidVersion: 0, - hashAlg: multicodec.SHA2_256 - }).then( - (cid) => { - if (options.preload !== false) { - self._preload(cid) - } - - callback(null, cid) - }, - (error) => callback(error) - ) + self._gcLock.readLock((cb) => { + self._ipld.put(node, multicodec.DAG_PB, { + cidVersion: 0, + hashAlg: multicodec.SHA2_256 + }).then( + (cid) => { + if (options.preload !== false) { + self._preload(cid) + } + + cb(null, cid) + }, + cb + ) + }, callback) } }), @@ -322,7 +324,6 @@ module.exports = function object (self) { return callback(err) } -<<<<<<< HEAD if (cid.codec === 'raw') { return callback(null, []) } @@ -338,9 +339,6 @@ module.exports = function object (self) { } callback(new Error(`Cannot resolve links from codec ${cid.codec}`)) -======= - callback(null, node.links) ->>>>>>> refactor: move links retrieval from object to refs }) }), diff --git a/src/core/components/pin.js b/src/core/components/pin.js index 0afa939e38..e4214ec003 100644 --- a/src/core/components/pin.js +++ b/src/core/components/pin.js @@ -182,52 +182,54 @@ module.exports = (self) => { resolvePath(self.object, paths, (err, mhs) => { if (err) { return callback(err) } - // verify that each hash can be pinned - map(mhs, (multihash, cb) => { - const key = toB58String(multihash) - if (recursive) { - if (recursivePins.has(key)) { - // it's already pinned recursively - return cb(null, key) - } + self._gcLock.readLock((lockCb) => { + // verify that each hash can be pinned + map(mhs, (multihash, cb) => { + const key = toB58String(multihash) + if (recursive) { + if (recursivePins.has(key)) { + // it's already pinned recursively + return cb(null, key) + } - // entire graph of nested links should be pinned, - // so make sure we have all the objects - dag._getRecursive(key, { preload: options.preload }, (err) => { - if (err) { return cb(err) } - // found all objects, we can add the pin - return cb(null, key) - }) - } else { - if (recursivePins.has(key)) { - // recursive supersedes direct, can't have both - return cb(new Error(`${key} already pinned recursively`)) - } - if (directPins.has(key)) { - // already directly pinned - return cb(null, key) - } + // entire graph of nested links should be pinned, + // so make sure we have all the objects + dag._getRecursive(key, { preload: options.preload }, (err) => { + if (err) { return cb(err) } + // found all objects, we can add the pin + return cb(null, key) + }) + } else { + if (recursivePins.has(key)) { + // recursive supersedes direct, can't have both + return cb(new Error(`${key} already pinned recursively`)) + } + if (directPins.has(key)) { + // already directly pinned + return cb(null, key) + } - // make sure we have the object - dag.get(new CID(multihash), { preload: options.preload }, (err) => { - if (err) { return cb(err) } - // found the object, we can add the pin - return cb(null, key) - }) - } - }, (err, results) => { - if (err) { return callback(err) } + // make sure we have the object + dag.get(new CID(multihash), { preload: options.preload }, (err) => { + if (err) { return cb(err) } + // found the object, we can add the pin + return cb(null, key) + }) + } + }, (err, results) => { + if (err) { return lockCb(err) } - // update the pin sets in memory - const pinset = recursive ? recursivePins : directPins - results.forEach(key => pinset.add(key)) + // update the pin sets in memory + const pinset = recursive ? recursivePins : directPins + results.forEach(key => pinset.add(key)) - // persist updated pin sets to datastore - flushPins((err, root) => { - if (err) { return callback(err) } - callback(null, results.map(hash => ({ hash }))) + // persist updated pin sets to datastore + flushPins((err, root) => { + if (err) { return lockCb(err) } + lockCb(null, results.map(hash => ({ hash }))) + }) }) - }) + }, callback) }) }), @@ -249,50 +251,52 @@ module.exports = (self) => { resolvePath(self.object, paths, (err, mhs) => { if (err) { return callback(err) } - // verify that each hash can be unpinned - map(mhs, (multihash, cb) => { - pin._isPinnedWithType(multihash, types.all, (err, res) => { - if (err) { return cb(err) } - const { pinned, reason } = res - const key = toB58String(multihash) - if (!pinned) { - return cb(new Error(`${key} is not pinned`)) - } - - switch (reason) { - case (types.recursive): - if (recursive) { + self._gcLock.readLock((lockCb) => { + // verify that each hash can be unpinned + map(mhs, (multihash, cb) => { + pin._isPinnedWithType(multihash, types.all, (err, res) => { + if (err) { return cb(err) } + const { pinned, reason } = res + const key = toB58String(multihash) + if (!pinned) { + return cb(new Error(`${key} is not pinned`)) + } + + switch (reason) { + case (types.recursive): + if (recursive) { + return cb(null, key) + } else { + return cb(new Error(`${key} is pinned recursively`)) + } + case (types.direct): return cb(null, key) - } else { - return cb(new Error(`${key} is pinned recursively`)) - } - case (types.direct): - return cb(null, key) - default: - return cb(new Error( - `${key} is pinned indirectly under ${reason}` - )) - } - }) - }, (err, results) => { - if (err) { return callback(err) } - - // update the pin sets in memory - results.forEach(key => { - if (recursive && recursivePins.has(key)) { - recursivePins.delete(key) - } else { - directPins.delete(key) - } - }) + default: + return cb(new Error( + `${key} is pinned indirectly under ${reason}` + )) + } + }) + }, (err, results) => { + if (err) { return lockCb(err) } + + // update the pin sets in memory + results.forEach(key => { + if (recursive && recursivePins.has(key)) { + recursivePins.delete(key) + } else { + directPins.delete(key) + } + }) - // persist updated pin sets to datastore - flushPins((err, root) => { - if (err) { return callback(err) } - self.log(`Removed pins: ${results}`) - callback(null, results.map(hash => ({ hash }))) + // persist updated pin sets to datastore + flushPins((err, root) => { + if (err) { return lockCb(err) } + self.log(`Removed pins: ${results}`) + lockCb(null, results.map(hash => ({ hash }))) + }) }) - }) + }, callback) }) }), diff --git a/src/core/components/repo.js b/src/core/components/repo.js index 5b0914aade..76d9262096 100644 --- a/src/core/components/repo.js +++ b/src/core/components/repo.js @@ -4,8 +4,6 @@ const promisify = require('promisify-es6') const repoVersion = require('ipfs-repo').repoVersion module.exports = function repo (self) { - const gc = require('./gc')(self) - return { init: (bits, empty, callback) => { // 1. check if repo already exists @@ -40,9 +38,7 @@ module.exports = function repo (self) { }) }), - gc: promisify((callback) => { - gc(callback) - }), + gc: require('./gc')(self), stat: promisify((options, callback) => { if (typeof options === 'function') { diff --git a/src/core/index.js b/src/core/index.js index d457cb6149..109be0bda2 100644 --- a/src/core/index.js +++ b/src/core/index.js @@ -26,6 +26,7 @@ const defaultRepo = require('./runtime/repo-nodejs') const preload = require('./preload') const mfsPreload = require('./mfs-preload') const ipldOptions = require('./runtime/ipld-nodejs') +const GCLock = require('./components/gc-lock') class IPFS extends EventEmitter { constructor (options) { @@ -79,6 +80,7 @@ class IPFS extends EventEmitter { this._ipns = undefined // eslint-disable-next-line no-console this._print = this._options.silent ? this.log : console.log + this._gcLock = new GCLock() // IPFS Core exposed components // - for booting up a node From eae37e18f747e7b1ba38bf593bf4e78eaee9bbd4 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Tue, 21 May 2019 17:18:43 -0400 Subject: [PATCH 15/39] test: add gc locking tests --- package.json | 1 + src/core/components/block.js | 4 +- .../files-regular/add-pull-stream.js | 2 +- src/core/components/gc-lock.js | 46 +++- src/core/components/gc.js | 21 +- src/core/components/pin.js | 19 +- test/core/gc.spec.js | 238 ++++++++++++++++++ 7 files changed, 300 insertions(+), 31 deletions(-) create mode 100644 test/core/gc.spec.js diff --git a/package.json b/package.json index cc2e3843e4..6f805ecc82 100644 --- a/package.json +++ b/package.json @@ -166,6 +166,7 @@ "multihashes": "~0.4.14", "multihashing-async": "~0.6.0", "node-fetch": "^2.3.0", + "p-event": "^4.1.0", "peer-book": "~0.9.0", "peer-id": "~0.12.0", "peer-info": "~0.15.0", diff --git a/src/core/components/block.js b/src/core/components/block.js index f5a9f610ed..2a7196bbc3 100644 --- a/src/core/components/block.js +++ b/src/core/components/block.js @@ -81,7 +81,7 @@ module.exports = function block (self) { cb(null, new Block(block, cid)) }) }, - (block, cb) => self._gcLock.writeLock((_cb) => { + (block, cb) => self._gcLock.readLock((_cb) => { self._blockService.put(block, (err) => { if (err) { return _cb(err) @@ -103,7 +103,7 @@ module.exports = function block (self) { return setImmediate(() => callback(errCode(err, 'ERR_INVALID_CID'))) } - self._gcLock.readLock((cb) => self._blockService.delete(cid, cb), callback) + self._gcLock.writeLock((cb) => self._blockService.delete(cid, cb), callback) }), stat: promisify((cid, options, callback) => { if (typeof options === 'function') { diff --git a/src/core/components/files-regular/add-pull-stream.js b/src/core/components/files-regular/add-pull-stream.js index 559a3dc951..5cf53bdf2c 100644 --- a/src/core/components/files-regular/add-pull-stream.js +++ b/src/core/components/files-regular/add-pull-stream.js @@ -112,7 +112,7 @@ function pinFile (file, self, opts, cb) { const isRootDir = !file.path.includes('/') const shouldPin = pin && isRootDir && !opts.onlyHash && !opts.hashAlg if (shouldPin) { - return self.pin.add(file.hash, { preload: false }, err => cb(err, file)) + return self.pin.add(file.hash, { preload: false, lock: false }, err => cb(err, file)) } else { cb(null, file) } diff --git a/src/core/components/gc-lock.js b/src/core/components/gc-lock.js index 0d3b5ceca5..4a427c6d13 100644 --- a/src/core/components/gc-lock.js +++ b/src/core/components/gc-lock.js @@ -2,11 +2,14 @@ const mortice = require('mortice') const pull = require('pull-stream') -const log = require('debug')('ipfs:repo:gc:lock') +const EventEmitter = require('events') +const log = require('debug')('ipfs:gc:lock') -class GCLock { +class GCLock extends EventEmitter { constructor () { + super() this.mutex = mortice() + this.lockId = 0 } readLock (lockedFn, cb) { @@ -18,16 +21,28 @@ class GCLock { } lock (type, lockedFn, cb) { - log(`${type} requested`) + if (typeof lockedFn !== 'function') { + throw new Error(`first argument to ${type} must be a function`) + } + if (typeof cb !== 'function') { + throw new Error(`second argument to ${type} must be a callback function`) + } + + const lockId = this.lockId++ + log(`[${lockId}] ${type} requested`) + this.emit(`${type} request`, lockId) const locked = () => new Promise((resolve, reject) => { - log(`${type} started`) - lockedFn((err, res) => err ? reject(err) : resolve(res)) + this.emit(`${type} start`, lockId) + log(`[${lockId}] ${type} started`) + lockedFn((err, res) => { + this.emit(`${type} release`, lockId) + log(`[${lockId}] ${type} released`) + err ? reject(err) : resolve(res) + }) }) const lock = this.mutex[type](locked) - return lock.then(res => cb(null, res)).catch(cb).finally(() => { - log(`${type} released`) - }) + return lock.then(res => cb(null, res)).catch(cb) } pullReadLock (lockedPullFn) { @@ -39,7 +54,7 @@ class GCLock { } pullLock (type, lockedPullFn) { - const pullLocker = new PullLocker(this.mutex, type) + const pullLocker = new PullLocker(this, this.mutex, type, this.lockId++) return pull( pullLocker.take(), @@ -50,9 +65,11 @@ class GCLock { } class PullLocker { - constructor (mutex, type) { + constructor (emitter, mutex, type, lockId) { + this.emitter = emitter this.mutex = mutex this.type = type + this.lockId = lockId // This Promise resolves when the mutex gives us permission to start // running the locked piece of code @@ -65,7 +82,8 @@ class PullLocker { locked () { return new Promise((resolve) => { this.releaseLock = resolve - log(`${this.type} (pull) started`) + log(`[${this.lockId}] ${this.type} (pull) started`) + this.emitter.emit(`${this.type} start`, this.lockId) // The locked piece of code is ready to start, so resolve the // this.lockReady Promise (created in the constructor) @@ -79,7 +97,8 @@ class PullLocker { return pull( pull.asyncMap((i, cb) => { if (!this.lock) { - log(`${this.type} (pull) requested`) + log(`[${this.lockId}] ${this.type} (pull) requested`) + this.emitter.emit(`${this.type} request`, this.lockId) // Request the lock this.lock = this.mutex[this.type](() => this.locked()) } @@ -93,7 +112,8 @@ class PullLocker { // Releases the lock release () { return pull.through(null, () => { - log(`${this.type} (pull) released`) + log(`[${this.lockId}] ${this.type} (pull) released`) + this.emitter.emit(`${this.type} release`, this.lockId) this.releaseLock() }) } diff --git a/src/core/components/gc.js b/src/core/components/gc.js index c2abe39c5a..21b54e6cf2 100644 --- a/src/core/components/gc.js +++ b/src/core/components/gc.js @@ -6,6 +6,7 @@ const base32 = require('base32.js') const parallel = require('async/parallel') const mapLimit = require('async/mapLimit') const { Key } = require('interface-datastore') +const log = require('debug')('ipfs:gc') // Limit on the number of parallel block remove operations const BLOCK_RM_CONCURRENCY = 256 @@ -16,7 +17,7 @@ const MFS_ROOT_DS_KEY = new Key('/local/filesroot') module.exports = function gc (self) { return promisify(async (callback) => { const start = Date.now() - self.log(`GC: Creating set of marked blocks`) + log(`Creating set of marked blocks`) self._gcLock.writeLock((lockCb) => { parallel([ @@ -26,14 +27,14 @@ module.exports = function gc (self) { (cb) => createMarkedSet(self, cb) ], (err, [blocks, markedSet]) => { if (err) { - self.log(`GC: Error - ${err.message}`) + log(`Error - ${err.message}`) return lockCb(err) } // Delete blocks that are not being used deleteUnmarkedBlocks(self, markedSet, blocks, start, (err, res) => { if (err) { - self.log(`GC: Error - ${err.message}`) + log(`Error - ${err.message}`) return lockCb(err) } lockCb(null, res) @@ -55,7 +56,7 @@ function createMarkedSet (ipfs, callback) { if (err) { return cb(new Error(`Could not list pinned blocks: ${err.message}`)) } - ipfs.log(`GC: Found ${pins.length} pinned blocks`) + log(`Found ${pins.length} pinned blocks`) cb(null, pins.map(p => new CID(p.hash))) }), @@ -63,7 +64,7 @@ function createMarkedSet (ipfs, callback) { (cb) => ipfs._repo.datastore.get(PIN_DS_KEY, (err, mh) => { if (err) { if (err.code === 'ERR_NOT_FOUND') { - ipfs.log(`GC: No pinned blocks`) + log(`No pinned blocks`) return cb(null, []) } return cb(new Error(`Could not get pin sets root from datastore: ${err.message}`)) @@ -86,7 +87,7 @@ function createMarkedSet (ipfs, callback) { (cb) => ipfs._repo.datastore.get(MFS_ROOT_DS_KEY, (err, mh) => { if (err) { if (err.code === 'ERR_NOT_FOUND') { - ipfs.log(`GC: No blocks in MFS`) + log(`No blocks in MFS`) return cb(null, []) } return cb(new Error(`Could not get MFS root from datastore: ${err.message}`)) @@ -110,7 +111,7 @@ function getDescendants (ipfs, cid, callback) { if (err) { return callback(new Error(`Could not get MFS root descendants from store: ${err.message}`)) } - ipfs.log(`GC: Found ${refs.length} MFS blocks`) + log(`Found ${refs.length} MFS blocks`) callback(null, [cid, ...refs.map(r => new CID(r.ref))]) }) } @@ -132,9 +133,9 @@ function deleteUnmarkedBlocks (ipfs, markedSet, blocks, start, callback) { } } - const msg = `GC: Marked set has ${markedSet.size} blocks. Blockstore has ${blocks.length} blocks. ` + + const msg = `Marked set has ${markedSet.size} blocks. Blockstore has ${blocks.length} blocks. ` + `Deleting ${unreferenced.length} blocks.` - ipfs.log(msg) + log(msg) mapLimit(unreferenced, BLOCK_RM_CONCURRENCY, (cid, cb) => { // Delete blocks from blockstore @@ -146,7 +147,7 @@ function deleteUnmarkedBlocks (ipfs, markedSet, blocks, start, callback) { cb(null, res) }) }, (_, delRes) => { - ipfs.log(`GC: Complete (${Date.now() - start}ms)`) + log(`Complete (${Date.now() - start}ms)`) callback(null, res.concat(delRes)) }) diff --git a/src/core/components/pin.js b/src/core/components/pin.js index e4214ec003..dcaf553b92 100644 --- a/src/core/components/pin.js +++ b/src/core/components/pin.js @@ -182,7 +182,7 @@ module.exports = (self) => { resolvePath(self.object, paths, (err, mhs) => { if (err) { return callback(err) } - self._gcLock.readLock((lockCb) => { + const pin = (pinComplete) => { // verify that each hash can be pinned map(mhs, (multihash, cb) => { const key = toB58String(multihash) @@ -217,7 +217,7 @@ module.exports = (self) => { }) } }, (err, results) => { - if (err) { return lockCb(err) } + if (err) { return pinComplete(err) } // update the pin sets in memory const pinset = recursive ? recursivePins : directPins @@ -225,11 +225,20 @@ module.exports = (self) => { // persist updated pin sets to datastore flushPins((err, root) => { - if (err) { return lockCb(err) } - lockCb(null, results.map(hash => ({ hash }))) + if (err) { return pinComplete(err) } + pinComplete(null, results.map(hash => ({ hash }))) }) }) - }, callback) + } + + // When adding a file, we take a lock that gets released after pinning + // is complete, so don't take a second lock here + const lock = options.lock !== false + if (lock) { + self._gcLock.readLock(pin, callback) + } else { + pin(callback) + } }) }), diff --git a/test/core/gc.spec.js b/test/core/gc.spec.js new file mode 100644 index 0000000000..8b7a48645b --- /dev/null +++ b/test/core/gc.spec.js @@ -0,0 +1,238 @@ +/* eslint max-nested-callbacks: ["error", 8] */ +/* eslint-env mocha */ +'use strict' + +const chai = require('chai') +const dirtyChai = require('dirty-chai') +const expect = chai.expect +chai.use(dirtyChai) + +const fs = require('fs') + +const IPFS = require('../../src/core') +const createTempRepo = require('../utils/create-repo-nodejs') +const pEvent = require('p-event') + +describe('gc', function () { + const fixtures = [ + 'test/fixtures/planets/mercury/wiki.md', + 'test/fixtures/planets/solar-system.md' + ].map(path => ({ + path, + content: fs.readFileSync(path) + })) + + let ipfs + let repo + + before(function (done) { + this.timeout(20 * 1000) + repo = createTempRepo() + ipfs = new IPFS({ + repo, + config: { + Bootstrap: [] + } + }) + ipfs.on('ready', done) + }) + + after(function (done) { + this.timeout(60 * 1000) + ipfs.stop(done) + }) + + after((done) => repo.teardown(done)) + + const blockAddTests = [{ + name: 'add', + add1: () => ipfs.add(fixtures[0], { pin: false }), + add2: () => ipfs.add(fixtures[1], { pin: false }), + resToCid: (res) => res[0].hash + }, { + name: 'object put', + add1: () => ipfs.object.put({ Data: 'obj put 1', Links: [] }), + add2: () => ipfs.object.put({ Data: 'obj put 2', Links: [] }), + resToCid: (res) => res.toString() + }, { + name: 'block put', + add1: () => ipfs.block.put(Buffer.from('block put 1'), null), + add2: () => ipfs.block.put(Buffer.from('block put 2'), null), + resToCid: (res) => res.cid.toString() + }] + + describe('locks', function () { + for (const test of blockAddTests) { + // eslint-disable-next-line no-loop-func + it(`garbage collection should wait for pending ${test.name} to finish`, async () => { + // Add blocks to IPFS + // Note: add operation will take a read lock + const addLockRequested = pEvent(ipfs._gcLock, 'readLock request') + const add1 = test.add1() + + // Once add lock has been requested, start GC + await addLockRequested + // Note: GC will take a write lock + const gcStarted = pEvent(ipfs._gcLock, 'writeLock request') + const gc = ipfs.repo.gc() + + // Once GC has started, start second add + await gcStarted + const add2 = test.add2() + + const deleted = (await gc).map(i => i.cid) + const add1Res = test.resToCid(await add1) + const add2Res = test.resToCid(await add2) + + // Should have garbage collected blocks from first add, because GC should + // have waited for first add to finish + expect(deleted).includes(add1Res) + + // Should not have garbage collected blocks from second add, because + // second add should have waited for GC to finish + expect(deleted).not.includes(add2Res) + }) + } + + it('garbage collection should wait for pending add + pin to finish', async () => { + // Add blocks to IPFS + // Note: add operation will take a read lock + const addLockRequested = pEvent(ipfs._gcLock, 'readLock request') + const add1 = ipfs.add(fixtures[0], { pin: true }) + + // Once add lock has been requested, start GC + await addLockRequested + // Note: GC will take a write lock + const gcStarted = pEvent(ipfs._gcLock, 'writeLock request') + const gc = ipfs.repo.gc() + + // Once GC has started, start second add + await gcStarted + const add2 = ipfs.add(fixtures[1], { pin: true }) + + const deleted = (await gc).map(i => i.cid) + const add1Res = (await add1)[0].hash + const add2Res = (await add2)[0].hash + + // Should not have garbage collected blocks from first add, because GC should + // have waited for first add + pin to finish (protected by pin) + expect(deleted).not.includes(add1Res) + + // Should not have garbage collected blocks from second add, because + // second add should have waited for GC to finish + expect(deleted).not.includes(add2Res) + }) + + it('garbage collection should wait for pending block rm to finish', async () => { + // Add two blocks so that we can remove them + const cid1 = (await ipfs.block.put(Buffer.from('block to rm 1'), null)).cid + const cid2 = (await ipfs.block.put(Buffer.from('block to rm 2'), null)).cid + + // Remove first block from IPFS + // Note: block rm will take a write lock + const rmLockRequested = pEvent(ipfs._gcLock, 'writeLock request') + const rm1 = ipfs.block.rm(cid1) + + // Once rm lock has been requested, start GC + await rmLockRequested + // Note: GC will take a write lock + const gcStarted = pEvent(ipfs._gcLock, 'writeLock request') + const gc = ipfs.repo.gc() + + // Once GC has started, start second rm + await gcStarted + const rm2 = ipfs.block.rm(cid2) + + const deleted = (await gc).map(i => i.cid) + await rm1 + + // Second rm should fail because GC has already removed that block + try { + await rm2 + } catch (err) { + expect(err.code).eql('ERR_DB_DELETE_FAILED') + } + + // Confirm second second block has been removed + const localRefs = (await ipfs.refs.local()).map(r => r.ref) + expect(localRefs).not.includes(cid2.toString()) + + // Should not have garbage collected block from first block put, because + // GC should have waited for first rm (removing first block put) to finish + expect(deleted).not.includes(cid1.toString()) + + // Should have garbage collected block from second block put, because GC + // should have completed before second rm (removing second block put) + expect(deleted).includes(cid2.toString()) + }) + + it('garbage collection should wait for pending pin add to finish', async () => { + // Add two blocks so that we can pin them + const cid1 = (await ipfs.block.put(Buffer.from('block to pin add 1'), null)).cid + const cid2 = (await ipfs.block.put(Buffer.from('block to pin add 2'), null)).cid + + // Pin first block + // Note: pin add will take a read lock + const pinLockRequested = pEvent(ipfs._gcLock, 'readLock request') + const pin1 = ipfs.pin.add(cid1) + + // Once pin lock has been requested, start GC + await pinLockRequested + const gc = ipfs.repo.gc() + const deleted = (await gc).map(i => i.cid) + await pin1 + + // TODO: Adding pin for removed block never returns, which means the lock + // never gets released + // const pin2 = ipfs.pin.add(cid2) + + // Confirm second second block has been removed + const localRefs = (await ipfs.refs.local()).map(r => r.ref) + expect(localRefs).not.includes(cid2.toString()) + + // Should not have garbage collected block from first block put, because + // GC should have waited for pin (protecting first block put) to finish + expect(deleted).not.includes(cid1.toString()) + + // Should have garbage collected block from second block put, because GC + // should have completed before second pin + expect(deleted).includes(cid2.toString()) + }) + + it('garbage collection should wait for pending pin rm to finish', async () => { + // Add two blocks so that we can pin them + const cid1 = (await ipfs.block.put(Buffer.from('block to pin rm 1'), null)).cid + const cid2 = (await ipfs.block.put(Buffer.from('block to pin rm 2'), null)).cid + + // Pin blocks + await Promise.all([ipfs.pin.add(cid1), ipfs.pin.add(cid2)]) + + // Unpin first block + // Note: pin rm will take a read lock + const pinLockRequested = pEvent(ipfs._gcLock, 'readLock request') + const pinRm1 = ipfs.pin.rm(cid1) + + // Once pin lock has been requested, start GC + await pinLockRequested + // Note: GC will take a write lock + const gcStarted = pEvent(ipfs._gcLock, 'writeLock request') + const gc = ipfs.repo.gc() + + // Once GC has started, start second pin rm + await gcStarted + const pinRm2 = ipfs.pin.rm(cid2) + + const deleted = (await gc).map(i => i.cid) + await pinRm1 + await pinRm2 + + // Should have garbage collected block from first block put, because + // GC should have waited for pin rm (unpinning first block put) to finish + expect(deleted).includes(cid1.toString()) + + // Should not have garbage collected block from second block put, because + // GC should have completed before second block was unpinned + expect(deleted).not.includes(cid2.toString()) + }) + }) +}) From 730a96d6700292ba941a4a5542363fcbb811336f Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Tue, 21 May 2019 18:24:11 -0400 Subject: [PATCH 16/39] refactor: rebase --- .../files-regular/add-pull-stream.js | 2 - .../files-regular/refs-pull-stream.js | 80 ----------------- src/http/api/resources/files-regular.js | 86 ------------------- 3 files changed, 168 deletions(-) diff --git a/src/core/components/files-regular/add-pull-stream.js b/src/core/components/files-regular/add-pull-stream.js index 5cf53bdf2c..d00e4d5138 100644 --- a/src/core/components/files-regular/add-pull-stream.js +++ b/src/core/components/files-regular/add-pull-stream.js @@ -152,8 +152,6 @@ module.exports = function (self) { } opts.progress = progress - - return self._gcLock.pullReadLock(() => pull( pullMap(content => normalizeContent(content, opts)), pullFlatten(), diff --git a/src/core/components/files-regular/refs-pull-stream.js b/src/core/components/files-regular/refs-pull-stream.js index 8447192ae0..aec4ba8a6d 100644 --- a/src/core/components/files-regular/refs-pull-stream.js +++ b/src/core/components/files-regular/refs-pull-stream.js @@ -178,83 +178,3 @@ function getNodeLinks (node, path = '') { } return links } - -// Do a depth first search of the DAG, starting from the given root cid -function objectStream (ipfs, rootCid, maxDepth, isUnique) { - const uniques = new Set() - - const root = { node: { cid: rootCid }, depth: 0 } - const traverseLevel = (obj) => { - const { node, depth } = obj - - // Check the depth - const nextLevelDepth = depth + 1 - if (nextLevelDepth > maxDepth) { - return pull.empty() - } - - // If unique option is enabled, check if the CID has been seen before. - // Note we need to do this here rather than before adding to the stream - // so that the unique check happens in the order that items are examined - // in the DAG. - if (isUnique) { - if (uniques.has(node.cid.toString())) { - // Mark this object as a duplicate so we can filter it out later - obj.isDuplicate = true - return pull.empty() - } - uniques.add(node.cid.toString()) - } - - const deferred = pullDefer.source() - - // Get this object's links - getLinks(ipfs, node.cid, (err, links) => { - if (err) { - if (err.code === 'ERR_NOT_FOUND') { - err.message = `Could not find object with CID: ${node.cid}` - } - return deferred.resolve(pull.error(err)) - } - - // Add to the stream each link, parent and the new depth - const vals = links.map(link => ({ - parent: node, - node: link, - depth: nextLevelDepth - })) - - deferred.resolve(pull.values(vals)) - }) - - return deferred - } - - return pullTraverse.depthFirst(root, traverseLevel) -} - -// Fetch a node from IPLD then get all its links -function getLinks (ipfs, cid, callback) { - ipfs._ipld.get(new CID(cid), (err, node) => { - if (err) { - return callback(err) - } - callback(null, node.value.links || getNodeLinks(node.value)) - }) -} - -// Recursively search the node for CIDs -function getNodeLinks (node, path = '') { - let links = [] - for (const [name, value] of Object.entries(node)) { - if (CID.isCID(value)) { - links.push({ - name: path + name, - cid: value - }) - } else if (typeof value === 'object') { - links = links.concat(getNodeLinks(value, path + name + '/')) - } - } - return links -} diff --git a/src/http/api/resources/files-regular.js b/src/http/api/resources/files-regular.js index add12b9496..7175b17972 100644 --- a/src/http/api/resources/files-regular.js +++ b/src/http/api/resources/files-regular.js @@ -412,89 +412,3 @@ function sendRefsReplyStream (request, h, desc, source) { .header('content-type', 'application/json') .header('Trailer', 'X-Stream-Error') } - -exports.refs = { - validate: { - query: Joi.object().keys({ - recursive: Joi.boolean().default(false), - format: Joi.string().default(Format.default), - edges: Joi.boolean().default(false), - unique: Joi.boolean().default(false), - 'max-depth': Joi.number().integer().min(-1) - }).unknown() - }, - - // uses common parseKey method that returns a `key` - parseArgs: exports.parseKey, - - // main route handler which is called after the above `parseArgs`, but only if the args were valid - handler (request, h) { - const { ipfs } = request.server.app - const { key } = request.pre.args - - const recursive = request.query.recursive - const format = request.query.format - const edges = request.query.edges - const unique = request.query.unique - const maxDepth = request.query['max-depth'] - - const source = ipfs.refsPullStream(key, { recursive, format, edges, unique, maxDepth }) - return sendRefsReplyStream(request, h, `refs for ${key}`, source) - } -} - -exports.refs.local = { - // main route handler - handler (request, h) { - const { ipfs } = request.server.app - const source = ipfs.refs.localPullStream() - return sendRefsReplyStream(request, h, 'local refs', source) - } -} - -function sendRefsReplyStream (request, h, desc, source) { - const replyStream = pushable() - const aborter = abortable() - - const stream = toStream.source(pull( - replyStream, - aborter, - ndjson.serialize() - )) - - // const stream = toStream.source(replyStream.source) - // hapi is not very clever and throws if no - // - _read method - // - _readableState object - // are there :( - if (!stream._read) { - stream._read = () => {} - stream._readableState = {} - stream.unpipe = () => {} - } - - pull( - source, - pull.drain( - (ref) => replyStream.push({ Ref: ref.ref, Err: ref.err }), - (err) => { - if (err) { - request.raw.res.addTrailers({ - 'X-Stream-Error': JSON.stringify({ - Message: `Failed to get ${desc}: ${err.message || ''}`, - Code: 0 - }) - }) - return aborter.abort() - } - - replyStream.end() - } - ) - ) - - return h.response(stream) - .header('x-chunked-output', '1') - .header('content-type', 'application/json') - .header('Trailer', 'X-Stream-Error') -} From bdaaed9e8fe4732ce77727243c0cc273ca5f8d14 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Tue, 21 May 2019 18:43:12 -0400 Subject: [PATCH 17/39] fix: gc use uppercase dag.Links --- src/core/components/gc.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/components/gc.js b/src/core/components/gc.js index 21b54e6cf2..f04d80bcda 100644 --- a/src/core/components/gc.js +++ b/src/core/components/gc.js @@ -79,7 +79,7 @@ function createMarkedSet (ipfs, callback) { // The pinner stores an object that has two links to pin sets: // 1. The directly pinned CIDs // 2. The recursively pinned CIDs - cb(null, [cid, ...obj.value.links.map(l => l.cid)]) + cb(null, [cid, ...obj.value.Links.map(l => l.Hash)]) }) }), From 72fc90503a895822c66856d340102d4ca9542100 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Tue, 21 May 2019 18:44:15 -0400 Subject: [PATCH 18/39] chore: update package.json deps --- package.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/package.json b/package.json index 6f805ecc82..548d7154ab 100644 --- a/package.json +++ b/package.json @@ -87,7 +87,7 @@ "@hapi/joi": "^15.0.1", "async": "^2.6.1", "async-iterator-all": "0.0.2", - "async-iterator-to-pull-stream": "^1.1.0", + "async-iterator-to-pull-stream": "^1.3.0", "async-iterator-to-stream": "^1.1.0", "base32.js": "~0.1.0", "bignumber.js": "^8.0.2", @@ -159,6 +159,7 @@ "merge-options": "^1.0.1", "mime-types": "^2.1.21", "mkdirp": "~0.5.1", + "mortice": "dirkmc/mortice#fix/read-then-write", "multiaddr": "^6.0.5", "multiaddr-to-uri": "^4.0.1", "multibase": "~0.6.0", From a4bffb286bba7ce868f862d1489aeac64ad31ba5 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Tue, 21 May 2019 18:48:33 -0400 Subject: [PATCH 19/39] chore: rebase --- test/utils/ipfs-exec.js | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/utils/ipfs-exec.js b/test/utils/ipfs-exec.js index 92f3e0f4e0..f4be357c02 100644 --- a/test/utils/ipfs-exec.js +++ b/test/utils/ipfs-exec.js @@ -7,7 +7,6 @@ const expect = chai.expect chai.use(dirtyChai) const path = require('path') const _ = require('lodash') -const yargs = require('yargs') // This is our new test utility to easily check and execute ipfs cli commands. // @@ -34,10 +33,11 @@ module.exports = (repoPath, opts) => { })) const execute = (exec, args) => { - // Adding '--' at the front of the command allows us to parse commands that - // have a parameter with spaces in it, eg - // ipfs refs --format=" -> " - const cp = exec(yargs('-- ' + args[0]).argv._) + if (args.length === 1) { + args = args[0].split(' ') + } + + const cp = exec(args) const res = cp.then((res) => { // We can't escape the os.tmpdir warning due to: // https://github.com/shelljs/shelljs/blob/master/src/tempdir.js#L43 From b27e7a264fbd3d543021c8573f53fa214111fd2c Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Wed, 22 May 2019 10:36:27 -0400 Subject: [PATCH 20/39] chore: add joi to package.json --- package.json | 1 + 1 file changed, 1 insertion(+) diff --git a/package.json b/package.json index 548d7154ab..7e3a312f25 100644 --- a/package.json +++ b/package.json @@ -139,6 +139,7 @@ "is-pull-stream": "~0.0.0", "is-stream": "^2.0.0", "iso-url": "~0.4.6", + "joi": "^14.3.1", "just-flatten-it": "^2.1.0", "just-safe-set": "^2.1.0", "kind-of": "^6.0.2", From e2f68a83035ac7108020523fa671a1c2e6abe61b Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Wed, 22 May 2019 16:04:16 -0400 Subject: [PATCH 21/39] refactor: pin/gc common code --- package.json | 4 +- src/core/components/pin.js | 307 +++------------------ src/core/components/{ => pin}/gc-lock.js | 0 src/core/components/{ => pin}/gc.js | 32 +-- src/core/components/pin/pin-manager.js | 301 ++++++++++++++++++++ src/core/components/{ => pin}/pin-set.js | 2 + src/core/components/{ => pin}/pin.proto.js | 0 src/core/components/repo.js | 2 +- src/core/index.js | 2 +- test/core/pin-set.js | 2 +- 10 files changed, 351 insertions(+), 301 deletions(-) rename src/core/components/{ => pin}/gc-lock.js (100%) rename src/core/components/{ => pin}/gc.js (78%) create mode 100644 src/core/components/pin/pin-manager.js rename src/core/components/{ => pin}/pin-set.js (99%) rename src/core/components/{ => pin}/pin.proto.js (100%) diff --git a/package.json b/package.json index 7e3a312f25..30f6bc04e8 100644 --- a/package.json +++ b/package.json @@ -72,7 +72,7 @@ "execa": "^1.0.0", "form-data": "^2.3.3", "hat": "0.0.3", - "interface-ipfs-core": "~0.103.0", + "interface-ipfs-core": "ipfs/interface-js-ipfs-core#feat/gc", "ipfsd-ctl": "~0.42.0", "libp2p-websocket-star": "~0.10.2", "ncp": "^2.0.0", @@ -117,7 +117,7 @@ "ipfs-bitswap": "~0.24.0", "ipfs-block": "~0.8.1", "ipfs-block-service": "~0.15.1", - "ipfs-http-client": "^32.0.0", + "ipfs-http-client": "ipfs/js-ipfs-http-client#feat/gc", "ipfs-http-response": "~0.3.0", "ipfs-mfs": "~0.11.2", "ipfs-multipart": "~0.1.0", diff --git a/src/core/components/pin.js b/src/core/components/pin.js index dcaf553b92..168a488654 100644 --- a/src/core/components/pin.js +++ b/src/core/components/pin.js @@ -2,171 +2,25 @@ 'use strict' const promisify = require('promisify-es6') -const { DAGNode, DAGLink, util } = require('ipld-dag-pb') const CID = require('cids') const map = require('async/map') const mapSeries = require('async/mapSeries') -const series = require('async/series') -const parallel = require('async/parallel') -const eachLimit = require('async/eachLimit') const waterfall = require('async/waterfall') -const detectLimit = require('async/detectLimit') const setImmediate = require('async/setImmediate') -const { Key } = require('interface-datastore') const errCode = require('err-code') const multibase = require('multibase') -const multicodec = require('multicodec') -const createPinSet = require('./pin-set') const { resolvePath } = require('../utils') - -// arbitrary limit to the number of concurrent dag operations -const concurrencyLimit = 300 -const pinDataStoreKey = new Key('/local/pins') +const PinManager = require('./pin/pin-manager') +const PinTypes = PinManager.PinTypes function toB58String (hash) { return new CID(hash).toBaseEncodedString() } -function invalidPinTypeErr (type) { - const errMsg = `Invalid type '${type}', must be one of {direct, indirect, recursive, all}` - return errCode(new Error(errMsg), 'ERR_INVALID_PIN_TYPE') -} - module.exports = (self) => { - const repo = self._repo const dag = self.dag - const pinset = createPinSet(dag) - const types = { - direct: 'direct', - recursive: 'recursive', - indirect: 'indirect', - all: 'all' - } - - let directPins = new Set() - let recursivePins = new Set() - - const directKeys = () => - Array.from(directPins).map(key => new CID(key).buffer) - const recursiveKeys = () => - Array.from(recursivePins).map(key => new CID(key).buffer) - - function getIndirectKeys (callback) { - const indirectKeys = new Set() - eachLimit(recursiveKeys(), concurrencyLimit, (multihash, cb) => { - dag._getRecursive(multihash, (err, nodes) => { - if (err) { - return cb(err) - } - - map(nodes, (node, cb) => util.cid(util.serialize(node), { - cidVersion: 0 - }).then(cid => cb(null, cid), cb), (err, cids) => { - if (err) { - return cb(err) - } - - cids - .map(cid => cid.toString()) - // recursive pins pre-empt indirect pins - .filter(key => !recursivePins.has(key)) - .forEach(key => indirectKeys.add(key)) - - cb() - }) - }) - }, (err) => { - if (err) { return callback(err) } - callback(null, Array.from(indirectKeys)) - }) - } - - // Encode and write pin key sets to the datastore: - // a DAGLink for each of the recursive and direct pinsets - // a DAGNode holding those as DAGLinks, a kind of root pin - function flushPins (callback) { - let dLink, rLink, root - series([ - // create a DAGLink to the node with direct pins - cb => waterfall([ - cb => pinset.storeSet(directKeys(), cb), - ({ node, cid }, cb) => { - try { - cb(null, new DAGLink(types.direct, node.size, cid)) - } catch (err) { - cb(err) - } - }, - (link, cb) => { dLink = link; cb(null) } - ], cb), - - // create a DAGLink to the node with recursive pins - cb => waterfall([ - cb => pinset.storeSet(recursiveKeys(), cb), - ({ node, cid }, cb) => { - try { - cb(null, new DAGLink(types.recursive, node.size, cid)) - } catch (err) { - cb(err) - } - }, - (link, cb) => { rLink = link; cb(null) } - ], cb), - - // the pin-set nodes link to a special 'empty' node, so make sure it exists - cb => { - let empty - - try { - empty = DAGNode.create(Buffer.alloc(0)) - } catch (err) { - return cb(err) - } - - dag.put(empty, { - version: 0, - format: multicodec.DAG_PB, - hashAlg: multicodec.SHA2_256, - preload: false - }, cb) - }, - - // create a root node with DAGLinks to the direct and recursive DAGs - cb => { - let node - - try { - node = DAGNode.create(Buffer.alloc(0), [dLink, rLink]) - } catch (err) { - return cb(err) - } - - root = node - dag.put(root, { - version: 0, - format: multicodec.DAG_PB, - hashAlg: multicodec.SHA2_256, - preload: false - }, (err, cid) => { - if (!err) { - root.multihash = cid.buffer - } - cb(err) - }) - }, - - // hack for CLI tests - cb => repo.closed ? repo.open(cb) : cb(null, null), - - // save root to datastore under a consistent key - cb => repo.datastore.put(pinDataStoreKey, root.multihash, cb) - ], (err, res) => { - if (err) { return callback(err) } - self.log(`Flushed pins with root: ${root}`) - return callback(null, root) - }) - } + const pinManager = new PinManager(self._repo, dag, self.log) const pin = { add: promisify((paths, options, callback) => { @@ -187,7 +41,7 @@ module.exports = (self) => { map(mhs, (multihash, cb) => { const key = toB58String(multihash) if (recursive) { - if (recursivePins.has(key)) { + if (pinManager.recursivePins.has(key)) { // it's already pinned recursively return cb(null, key) } @@ -200,11 +54,11 @@ module.exports = (self) => { return cb(null, key) }) } else { - if (recursivePins.has(key)) { + if (pinManager.recursivePins.has(key)) { // recursive supersedes direct, can't have both return cb(new Error(`${key} already pinned recursively`)) } - if (directPins.has(key)) { + if (pinManager.directPins.has(key)) { // already directly pinned return cb(null, key) } @@ -220,11 +74,11 @@ module.exports = (self) => { if (err) { return pinComplete(err) } // update the pin sets in memory - const pinset = recursive ? recursivePins : directPins + const pinset = recursive ? pinManager.recursivePins : pinManager.directPins results.forEach(key => pinset.add(key)) // persist updated pin sets to datastore - flushPins((err, root) => { + pinManager.flushPins((err, root) => { if (err) { return pinComplete(err) } pinComplete(null, results.map(hash => ({ hash }))) }) @@ -263,7 +117,7 @@ module.exports = (self) => { self._gcLock.readLock((lockCb) => { // verify that each hash can be unpinned map(mhs, (multihash, cb) => { - pin._isPinnedWithType(multihash, types.all, (err, res) => { + pinManager.isPinnedWithType(multihash, PinTypes.all, (err, res) => { if (err) { return cb(err) } const { pinned, reason } = res const key = toB58String(multihash) @@ -272,13 +126,13 @@ module.exports = (self) => { } switch (reason) { - case (types.recursive): + case (PinTypes.recursive): if (recursive) { return cb(null, key) } else { return cb(new Error(`${key} is pinned recursively`)) } - case (types.direct): + case (PinTypes.direct): return cb(null, key) default: return cb(new Error( @@ -291,15 +145,15 @@ module.exports = (self) => { // update the pin sets in memory results.forEach(key => { - if (recursive && recursivePins.has(key)) { - recursivePins.delete(key) + if (recursive && pinManager.recursivePins.has(key)) { + pinManager.recursivePins.delete(key) } else { - directPins.delete(key) + pinManager.directPins.delete(key) } }) // persist updated pin sets to datastore - flushPins((err, root) => { + pinManager.flushPins((err, root) => { if (err) { return lockCb(err) } self.log(`Removed pins: ${results}`) lockCb(null, results.map(hash => ({ hash }))) @@ -310,7 +164,7 @@ module.exports = (self) => { }), ls: promisify((paths, options, callback) => { - let type = types.all + let type = PinTypes.all if (typeof paths === 'function') { callback = paths options = {} @@ -327,27 +181,28 @@ module.exports = (self) => { options = options || {} if (options.type) { - if (typeof options.type !== 'string') { - return setImmediate(() => callback(invalidPinTypeErr(options.type))) + type = options.type + if (typeof options.type === 'string') { + type = options.type.toLowerCase() + } + const err = PinManager.checkPinType(type) + if (err) { + return setImmediate(() => callback(err)) } - type = options.type.toLowerCase() - } - if (!Object.keys(types).includes(type)) { - return setImmediate(() => callback(invalidPinTypeErr(type))) } if (paths) { // check the pinned state of specific hashes waterfall([ (cb) => resolvePath(self.object, paths, cb), - (hashes, cb) => mapSeries(hashes, (hash, done) => pin._isPinnedWithType(hash, types.all, done), cb), + (hashes, cb) => mapSeries(hashes, (hash, done) => pinManager.isPinnedWithType(hash, PinTypes.all, done), cb), (results, cb) => { results = results .filter(result => result.pinned) .map(({ key, reason }) => { switch (reason) { - case types.direct: - case types.recursive: + case PinTypes.direct: + case PinTypes.recursive: return { hash: key, type: reason @@ -355,7 +210,7 @@ module.exports = (self) => { default: return { hash: key, - type: `${types.indirect} through ${reason}` + type: `${PinTypes.indirect} through ${reason}` } } }) @@ -370,34 +225,34 @@ module.exports = (self) => { } else { // show all pinned items of type let pins = [] - if (type === types.direct || type === types.all) { + if (type === PinTypes.direct || type === PinTypes.all) { pins = pins.concat( - Array.from(directPins).map(hash => ({ - type: types.direct, + Array.from(pinManager.directPins).map(hash => ({ + type: PinTypes.direct, hash })) ) } - if (type === types.recursive || type === types.all) { + if (type === PinTypes.recursive || type === PinTypes.all) { pins = pins.concat( - Array.from(recursivePins).map(hash => ({ - type: types.recursive, + Array.from(pinManager.recursivePins).map(hash => ({ + type: PinTypes.recursive, hash })) ) } - if (type === types.indirect || type === types.all) { - getIndirectKeys((err, indirects) => { + if (type === PinTypes.indirect || type === PinTypes.all) { + pinManager.getIndirectKeys((err, indirects) => { if (err) { return callback(err) } pins = pins // if something is pinned both directly and indirectly, // report the indirect entry .filter(({ hash }) => !indirects.includes(hash) || - (indirects.includes(hash) && !directPins.has(hash)) + (indirects.includes(hash) && !pinManager.directPins.has(hash)) ) .concat(indirects.map(hash => ({ - type: types.indirect, + type: PinTypes.indirect, hash }))) return callback(null, pins) @@ -408,93 +263,9 @@ module.exports = (self) => { } }), - _isPinnedWithType: promisify((multihash, type, callback) => { - const key = toB58String(multihash) - const { recursive, direct, all } = types - - // recursive - if ((type === recursive || type === all) && recursivePins.has(key)) { - return callback(null, { - key, - pinned: true, - reason: recursive - }) - } - - if (type === recursive) { - return callback(null, { - key, - pinned: false - }) - } - - // direct - if ((type === direct || type === all) && directPins.has(key)) { - return callback(null, { - key, - pinned: true, - reason: direct - }) - } - - if (type === direct) { - return callback(null, { - key, - pinned: false - }) - } - - // indirect (default) - // check each recursive key to see if multihash is under it - // arbitrary limit, enables handling 1000s of pins. - detectLimit(recursiveKeys().map(key => new CID(key)), concurrencyLimit, (cid, cb) => { - waterfall([ - (done) => dag.get(cid, '', { preload: false }, done), - (result, done) => done(null, result.value), - (node, done) => pinset.hasDescendant(node, key, done) - ], cb) - }, (err, cid) => callback(err, { - key, - pinned: Boolean(cid), - reason: cid - })) - }), - - _load: promisify(callback => { - waterfall([ - // hack for CLI tests - (cb) => repo.closed ? repo.datastore.open(cb) : cb(null, null), - (_, cb) => repo.datastore.has(pinDataStoreKey, cb), - (has, cb) => has ? cb() : cb(new Error('No pins to load')), - (cb) => repo.datastore.get(pinDataStoreKey, cb), - (mh, cb) => { - dag.get(new CID(mh), '', { preload: false }, cb) - } - ], (err, pinRoot) => { - if (err) { - if (err.message === 'No pins to load') { - self.log('No pins to load') - return callback() - } else { - return callback(err) - } - } - - parallel([ - cb => pinset.loadSet(pinRoot.value, types.recursive, cb), - cb => pinset.loadSet(pinRoot.value, types.direct, cb) - ], (err, keys) => { - if (err) { return callback(err) } - const [ rKeys, dKeys ] = keys + _getInternalBlocks: (cb) => pinManager.getInternalBlocks(cb), - directPins = new Set(dKeys.map(toB58String)) - recursivePins = new Set(rKeys.map(toB58String)) - - self.log('Loaded pins from the datastore') - return callback(null) - }) - }) - }) + _load: (cb) => pinManager.load(cb) } return pin diff --git a/src/core/components/gc-lock.js b/src/core/components/pin/gc-lock.js similarity index 100% rename from src/core/components/gc-lock.js rename to src/core/components/pin/gc-lock.js diff --git a/src/core/components/gc.js b/src/core/components/pin/gc.js similarity index 78% rename from src/core/components/gc.js rename to src/core/components/pin/gc.js index f04d80bcda..4ff06db698 100644 --- a/src/core/components/gc.js +++ b/src/core/components/pin/gc.js @@ -10,7 +10,6 @@ const log = require('debug')('ipfs:gc') // Limit on the number of parallel block remove operations const BLOCK_RM_CONCURRENCY = 256 -const PIN_DS_KEY = new Key('/local/pins') const MFS_ROOT_DS_KEY = new Key('/local/filesroot') // Perform mark and sweep garbage collection @@ -47,10 +46,6 @@ module.exports = function gc (self) { // Get Set of CIDs of blocks to keep function createMarkedSet (ipfs, callback) { parallel([ - // "Empty block" used by the pinner - // TODO: This CID is replicated in pin.js - (cb) => cb(null, [new CID('QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n')]), - // All pins, direct and indirect (cb) => ipfs.pin.ls((err, pins) => { if (err) { @@ -61,31 +56,12 @@ function createMarkedSet (ipfs, callback) { }), // Blocks used internally by the pinner - (cb) => ipfs._repo.datastore.get(PIN_DS_KEY, (err, mh) => { - if (err) { - if (err.code === 'ERR_NOT_FOUND') { - log(`No pinned blocks`) - return cb(null, []) - } - return cb(new Error(`Could not get pin sets root from datastore: ${err.message}`)) - } - - const cid = new CID(mh) - ipfs.dag.get(cid, '', { preload: false }, (err, obj) => { - if (err) { - return cb(new Error(`Could not get pin sets from store: ${err.message}`)) - } - - // The pinner stores an object that has two links to pin sets: - // 1. The directly pinned CIDs - // 2. The recursively pinned CIDs - cb(null, [cid, ...obj.value.Links.map(l => l.Hash)]) - }) - }), + (cb) => ipfs.pin._getInternalBlocks(cb), // The MFS root and all its descendants - (cb) => ipfs._repo.datastore.get(MFS_ROOT_DS_KEY, (err, mh) => { + (cb) => ipfs._repo.root.get(MFS_ROOT_DS_KEY, (err, mh) => { if (err) { + console.error(err) if (err.code === 'ERR_NOT_FOUND') { log(`No blocks in MFS`) return cb(null, []) @@ -100,7 +76,7 @@ function createMarkedSet (ipfs, callback) { return callback(err) } - const cids = res.flat().map(cid => cid.toV1().toString('base32')) + const cids = [].concat(...res).map(cid => cid.toV1().toString('base32')) return callback(null, new Set(cids)) }) } diff --git a/src/core/components/pin/pin-manager.js b/src/core/components/pin/pin-manager.js new file mode 100644 index 0000000000..912ddd5a41 --- /dev/null +++ b/src/core/components/pin/pin-manager.js @@ -0,0 +1,301 @@ +/* eslint max-nested-callbacks: ["error", 8] */ +'use strict' + +const { DAGNode, DAGLink, util } = require('ipld-dag-pb') +const CID = require('cids') +const map = require('async/map') +const series = require('async/series') +const parallel = require('async/parallel') +const eachLimit = require('async/eachLimit') +const waterfall = require('async/waterfall') +const detectLimit = require('async/detectLimit') +const { Key } = require('interface-datastore') +const errCode = require('err-code') +const multicodec = require('multicodec') + +const createPinSet = require('./pin-set') +const { EMPTY_KEY_HASH } = createPinSet + +// arbitrary limit to the number of concurrent dag operations +const concurrencyLimit = 300 +const PIN_DS_KEY = new Key('/local/pins') + +function toB58String (hash) { + return new CID(hash).toBaseEncodedString() +} + +function invalidPinTypeErr (type) { + const errMsg = `Invalid type '${type}', must be one of {direct, indirect, recursive, all}` + return errCode(new Error(errMsg), 'ERR_INVALID_PIN_TYPE') +} + +const PinTypes = { + direct: 'direct', + recursive: 'recursive', + indirect: 'indirect', + all: 'all' +} + +class PinManager { + constructor (repo, dag, log) { + this.repo = repo + this.dag = dag + this.log = log + this.pinset = createPinSet(dag) + this.directPins = new Set() + this.recursivePins = new Set() + } + + directKeys () { + return Array.from(this.directPins).map(key => new CID(key).buffer) + } + + recursiveKeys () { + return Array.from(this.recursivePins).map(key => new CID(key).buffer) + } + + getIndirectKeys (callback) { + const indirectKeys = new Set() + eachLimit(this.recursiveKeys(), concurrencyLimit, (multihash, cb) => { + this.dag._getRecursive(multihash, (err, nodes) => { + if (err) { + return cb(err) + } + + map(nodes, (node, cb) => util.cid(util.serialize(node), { + cidVersion: 0 + }).then(cid => cb(null, cid), cb), (err, cids) => { + if (err) { + return cb(err) + } + + cids + .map(cid => cid.toString()) + // recursive pins pre-empt indirect pins + .filter(key => !this.recursivePins.has(key)) + .forEach(key => indirectKeys.add(key)) + + cb() + }) + }) + }, (err) => { + if (err) { return callback(err) } + callback(null, Array.from(indirectKeys)) + }) + } + + // Encode and write pin key sets to the datastore: + // a DAGLink for each of the recursive and direct pinsets + // a DAGNode holding those as DAGLinks, a kind of root pin + flushPins (callback) { + let dLink, rLink, root + series([ + // create a DAGLink to the node with direct pins + cb => waterfall([ + cb => this.pinset.storeSet(this.directKeys(), cb), + ({ node, cid }, cb) => { + try { + cb(null, new DAGLink(PinTypes.direct, node.size, cid)) + } catch (err) { + cb(err) + } + }, + (link, cb) => { dLink = link; cb(null) } + ], cb), + + // create a DAGLink to the node with recursive pins + cb => waterfall([ + cb => this.pinset.storeSet(this.recursiveKeys(), cb), + ({ node, cid }, cb) => { + try { + cb(null, new DAGLink(PinTypes.recursive, node.size, cid)) + } catch (err) { + cb(err) + } + }, + (link, cb) => { rLink = link; cb(null) } + ], cb), + + // the pin-set nodes link to a special 'empty' node, so make sure it exists + cb => { + let empty + + try { + empty = DAGNode.create(Buffer.alloc(0)) + } catch (err) { + return cb(err) + } + + this.dag.put(empty, { + version: 0, + format: multicodec.DAG_PB, + hashAlg: multicodec.SHA2_256, + preload: false + }, cb) + }, + + // create a root node with DAGLinks to the direct and recursive DAGs + cb => { + let node + + try { + node = DAGNode.create(Buffer.alloc(0), [dLink, rLink]) + } catch (err) { + return cb(err) + } + + root = node + this.dag.put(root, { + version: 0, + format: multicodec.DAG_PB, + hashAlg: multicodec.SHA2_256, + preload: false + }, (err, cid) => { + if (!err) { + root.multihash = cid.buffer + } + cb(err) + }) + }, + + // hack for CLI tests + cb => this.repo.closed ? this.repo.open(cb) : cb(null, null), + + // save root to datastore under a consistent key + cb => this.repo.datastore.put(PIN_DS_KEY, root.multihash, cb) + ], (err, res) => { + if (err) { return callback(err) } + this.log(`Flushed pins with root: ${root}`) + return callback(null, root) + }) + } + + load (callback) { + waterfall([ + // hack for CLI tests + (cb) => this.repo.closed ? this.repo.datastore.open(cb) : cb(null, null), + (_, cb) => this.repo.datastore.has(PIN_DS_KEY, cb), + (has, cb) => has ? cb() : cb(new Error('No pins to load')), + (cb) => this.repo.datastore.get(PIN_DS_KEY, cb), + (mh, cb) => { + this.dag.get(new CID(mh), '', { preload: false }, cb) + } + ], (err, pinRoot) => { + if (err) { + if (err.message === 'No pins to load') { + this.log('No pins to load') + return callback() + } else { + return callback(err) + } + } + + parallel([ + cb => this.pinset.loadSet(pinRoot.value, PinTypes.recursive, cb), + cb => this.pinset.loadSet(pinRoot.value, PinTypes.direct, cb) + ], (err, keys) => { + if (err) { return callback(err) } + const [ rKeys, dKeys ] = keys + + this.directPins = new Set(dKeys.map(toB58String)) + this.recursivePins = new Set(rKeys.map(toB58String)) + + this.log('Loaded pins from the datastore') + return callback(null) + }) + }) + } + + isPinnedWithType (multihash, type, callback) { + const key = toB58String(multihash) + const { recursive, direct, all } = PinTypes + + // recursive + if ((type === recursive || type === all) && this.recursivePins.has(key)) { + return callback(null, { + key, + pinned: true, + reason: recursive + }) + } + + if (type === recursive) { + return callback(null, { + key, + pinned: false + }) + } + + // direct + if ((type === direct || type === all) && this.directPins.has(key)) { + return callback(null, { + key, + pinned: true, + reason: direct + }) + } + + if (type === direct) { + return callback(null, { + key, + pinned: false + }) + } + + // indirect (default) + // check each recursive key to see if multihash is under it + // arbitrary limit, enables handling 1000s of pins. + detectLimit(this.recursiveKeys().map(key => new CID(key)), concurrencyLimit, (cid, cb) => { + waterfall([ + (done) => this.dag.get(cid, '', { preload: false }, done), + (result, done) => done(null, result.value), + (node, done) => this.pinset.hasDescendant(node, key, done) + ], cb) + }, (err, cid) => callback(err, { + key, + pinned: Boolean(cid), + reason: cid + })) + } + + // Gets CIDs of blocks used internally by the pinner + getInternalBlocks (callback) { + this.repo.datastore.get(PIN_DS_KEY, (err, mh) => { + if (err) { + if (err.code === 'ERR_NOT_FOUND') { + this.log(`No pinned blocks`) + return callback(null, []) + } + return callback(new Error(`Could not get pin sets root from datastore: ${err.message}`)) + } + + const cid = new CID(mh) + this.dag.get(cid, '', { preload: false }, (err, obj) => { + if (err) { + return callback(new Error(`Could not get pin sets from store: ${err.message}`)) + } + + // "Empty block" used by the pinner + const emptyBlockCid = new CID(EMPTY_KEY_HASH) + + // The pinner stores an object that has two links to pin sets: + // 1. The directly pinned CIDs + // 2. The recursively pinned CIDs + const pinSetCids = [cid, ...obj.value.Links.map(l => l.Hash)] + + callback(null, pinSetCids.concat([emptyBlockCid])) + }) + }) + } + + // Returns an error if the pin type is invalid + static checkPinType (type) { + if (typeof type !== 'string' || !Object.keys(PinTypes).includes(type)) { + return invalidPinTypeErr(type) + } + } +} + +PinManager.PinTypes = PinTypes + +module.exports = PinManager diff --git a/src/core/components/pin-set.js b/src/core/components/pin/pin-set.js similarity index 99% rename from src/core/components/pin-set.js rename to src/core/components/pin/pin-set.js index 6f3a9f98dc..5dc0564a13 100644 --- a/src/core/components/pin-set.js +++ b/src/core/components/pin/pin-set.js @@ -270,3 +270,5 @@ exports = module.exports = function (dag) { } return pinSet } + +module.exports.EMPTY_KEY_HASH = emptyKeyHash diff --git a/src/core/components/pin.proto.js b/src/core/components/pin/pin.proto.js similarity index 100% rename from src/core/components/pin.proto.js rename to src/core/components/pin/pin.proto.js diff --git a/src/core/components/repo.js b/src/core/components/repo.js index 76d9262096..25b7cf02ea 100644 --- a/src/core/components/repo.js +++ b/src/core/components/repo.js @@ -38,7 +38,7 @@ module.exports = function repo (self) { }) }), - gc: require('./gc')(self), + gc: require('./pin/gc')(self), stat: promisify((options, callback) => { if (typeof options === 'function') { diff --git a/src/core/index.js b/src/core/index.js index 109be0bda2..7119a6df4c 100644 --- a/src/core/index.js +++ b/src/core/index.js @@ -26,7 +26,7 @@ const defaultRepo = require('./runtime/repo-nodejs') const preload = require('./preload') const mfsPreload = require('./mfs-preload') const ipldOptions = require('./runtime/ipld-nodejs') -const GCLock = require('./components/gc-lock') +const GCLock = require('./components/pin/gc-lock') class IPFS extends EventEmitter { constructor (options) { diff --git a/test/core/pin-set.js b/test/core/pin-set.js index 088d248a12..b8eb97e243 100644 --- a/test/core/pin-set.js +++ b/test/core/pin-set.js @@ -19,7 +19,7 @@ const { const CID = require('cids') const IPFS = require('../../src/core') -const createPinSet = require('../../src/core/components/pin-set') +const createPinSet = require('../../src/core/components/pin/pin-set') const createTempRepo = require('../utils/create-repo-nodejs') const defaultFanout = 256 From 13d380132bf66709c04166470de89662421f0df3 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Wed, 22 May 2019 16:53:37 -0400 Subject: [PATCH 22/39] fix: browser gc tests --- test/core/gc.spec.js | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/test/core/gc.spec.js b/test/core/gc.spec.js index 8b7a48645b..a669c3dd03 100644 --- a/test/core/gc.spec.js +++ b/test/core/gc.spec.js @@ -7,20 +7,24 @@ const dirtyChai = require('dirty-chai') const expect = chai.expect chai.use(dirtyChai) -const fs = require('fs') - const IPFS = require('../../src/core') const createTempRepo = require('../utils/create-repo-nodejs') const pEvent = require('p-event') describe('gc', function () { - const fixtures = [ - 'test/fixtures/planets/mercury/wiki.md', - 'test/fixtures/planets/solar-system.md' - ].map(path => ({ - path, - content: fs.readFileSync(path) - })) + const fixtures = [{ + path: 'test/my/path1', + content: Buffer.from('path1') + }, { + path: 'test/my/path2', + content: Buffer.from('path2') + }, { + path: 'test/my/path3', + content: Buffer.from('path3') + }, { + path: 'test/my/path4', + content: Buffer.from('path4') + }] let ipfs let repo @@ -98,7 +102,7 @@ describe('gc', function () { // Add blocks to IPFS // Note: add operation will take a read lock const addLockRequested = pEvent(ipfs._gcLock, 'readLock request') - const add1 = ipfs.add(fixtures[0], { pin: true }) + const add1 = ipfs.add(fixtures[2], { pin: true }) // Once add lock has been requested, start GC await addLockRequested @@ -108,7 +112,7 @@ describe('gc', function () { // Once GC has started, start second add await gcStarted - const add2 = ipfs.add(fixtures[1], { pin: true }) + const add2 = ipfs.add(fixtures[3], { pin: true }) const deleted = (await gc).map(i => i.cid) const add1Res = (await add1)[0].hash From 64a15665bd9063f2d5684ad86bebb0d01696d796 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Wed, 22 May 2019 18:33:51 -0400 Subject: [PATCH 23/39] fix: gc parsing of block cid in browser --- src/core/components/pin.js | 6 +++--- src/core/components/pin/gc.js | 24 +++++++++++++++++------- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/src/core/components/pin.js b/src/core/components/pin.js index 168a488654..274013c791 100644 --- a/src/core/components/pin.js +++ b/src/core/components/pin.js @@ -263,9 +263,9 @@ module.exports = (self) => { } }), - _getInternalBlocks: (cb) => pinManager.getInternalBlocks(cb), - - _load: (cb) => pinManager.load(cb) + _isPinnedWithType: promisify(pinManager.isPinnedWithType.bind(pinManager)), + _getInternalBlocks: promisify(pinManager.getInternalBlocks.bind(pinManager)), + _load: promisify(pinManager.load.bind(pinManager)) } return pin diff --git a/src/core/components/pin/gc.js b/src/core/components/pin/gc.js index 4ff06db698..4ac98393d5 100644 --- a/src/core/components/pin/gc.js +++ b/src/core/components/pin/gc.js @@ -56,12 +56,17 @@ function createMarkedSet (ipfs, callback) { }), // Blocks used internally by the pinner - (cb) => ipfs.pin._getInternalBlocks(cb), + (cb) => ipfs.pin._getInternalBlocks((err, cids) => { + if (err) { + return cb(new Error(`Could not list pinner internal blocks: ${err.message}`)) + } + log(`Found ${cids.length} pinner internal blocks`) + cb(null, cids) + }), // The MFS root and all its descendants (cb) => ipfs._repo.root.get(MFS_ROOT_DS_KEY, (err, mh) => { if (err) { - console.error(err) if (err.code === 'ERR_NOT_FOUND') { log(`No blocks in MFS`) return cb(null, []) @@ -87,7 +92,7 @@ function getDescendants (ipfs, cid, callback) { if (err) { return callback(new Error(`Could not get MFS root descendants from store: ${err.message}`)) } - log(`Found ${refs.length} MFS blocks`) + log(`Found ${1 + refs.length} MFS blocks`) callback(null, [cid, ...refs.map(r => new CID(r.ref))]) }) } @@ -98,19 +103,24 @@ function deleteUnmarkedBlocks (ipfs, markedSet, blocks, start, callback) { // The blocks variable has the form { { key: Key() }, { key: Key() }, ... } const unreferenced = [] const res = [] + let errCount = 0 for (const { key: k } of blocks) { try { const cid = dsKeyToCid(k) - if (!markedSet.has(cid.toV1().toString('base32'))) { + const b32 = cid.toV1().toString('base32') + if (!markedSet.has(b32)) { unreferenced.push(cid) } } catch (err) { - res.push({ err: new Error(`Could not convert block with key '${k}' to CID: ${err.message}`) }) + errCount++ + const msg = `Could not convert block with key '${k}' to CID: ${err.message}` + log(msg) + res.push({ err: new Error(msg) }) } } const msg = `Marked set has ${markedSet.size} blocks. Blockstore has ${blocks.length} blocks. ` + - `Deleting ${unreferenced.length} blocks.` + `Deleting ${unreferenced.length} blocks.` + (errCount ? ` (${errCount} errors)` : '') log(msg) mapLimit(unreferenced, BLOCK_RM_CONCURRENCY, (cid, cb) => { @@ -133,5 +143,5 @@ function dsKeyToCid (key) { // Block key is of the form / const decoder = new base32.Decoder() const buff = decoder.write(key.toString().slice(1)).finalize() - return new CID(buff) + return new CID(Buffer.from(buff)) } From 088041ddfafdd63724cf64ed5ca9ab996b2528a2 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Thu, 23 May 2019 11:52:19 -0400 Subject: [PATCH 24/39] test: add gc-lock tests --- test/core/gc-lock.spec.js | 197 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 197 insertions(+) create mode 100644 test/core/gc-lock.spec.js diff --git a/test/core/gc-lock.spec.js b/test/core/gc-lock.spec.js new file mode 100644 index 0000000000..41ea1b0d57 --- /dev/null +++ b/test/core/gc-lock.spec.js @@ -0,0 +1,197 @@ +/* eslint max-nested-callbacks: ["error", 8] */ +/* eslint-env mocha */ +'use strict' + +const chai = require('chai') +const dirtyChai = require('dirty-chai') +const expect = chai.expect +chai.use(dirtyChai) + +const parallel = require('async/parallel') +const pull = require('pull-stream') +const GCLock = require('../../src/core/components/pin/gc-lock') + +const cbTakeLock = (type, lock, out, id, duration) => { + return (cb) => lock[type + 'Lock']((lockCb) => { + out.push(`${type} ${id} start`) + setTimeout(() => { + out.push(`${type} ${id} end`) + lockCb() + }, duration) + }, cb) +} +const cbReadLock = (lock, out, id, duration) => { + return cbTakeLock('read', lock, out, id, duration) +} +const cbWriteLock = (lock, out, id, duration) => { + return cbTakeLock('write', lock, out, id, duration) +} + +const pullTakeLock = (type, lock, out, id, duration) => { + const lockFn = type === 'read' ? 'pullReadLock' : 'pullWriteLock' + const vals = ['a', 'b', 'c'] + return (cb) => { + pull( + pull.values(vals), + lock[lockFn](() => { + let started = false + return pull( + pull.through((i) => { + if (!started) { + out.push(`${type} ${id} start`) + started = true + } + }), + pull.asyncMap((i, cb) => { + setTimeout(() => cb(null, i), duration / vals.length) + }) + ) + }), + pull.collect(() => { + out.push(`${type} ${id} end`) + cb() + }) + ) + } +} +const pullReadLock = (lock, out, id, duration) => { + return pullTakeLock('read', lock, out, id, duration) +} +const pullWriteLock = (lock, out, id, duration) => { + return pullTakeLock('write', lock, out, id, duration) +} + +const expectResult = (out, exp, done) => { + return () => { + try { + expect(out).to.eql(exp) + } catch (err) { + return done(err) + } + done() + } +} + +const runTests = (suiteName, { readLock, writeLock }) => { + describe(suiteName, () => { + it('multiple simultaneous reads', (done) => { + const lock = new GCLock() + const out = [] + parallel([ + readLock(lock, out, 1, 100), + readLock(lock, out, 2, 200), + readLock(lock, out, 3, 300) + ], expectResult(out, [ + 'read 1 start', + 'read 2 start', + 'read 3 start', + 'read 1 end', + 'read 2 end', + 'read 3 end' + ], done)) + }) + + it('multiple simultaneous writes', (done) => { + const lock = new GCLock() + const out = [] + parallel([ + writeLock(lock, out, 1, 100), + writeLock(lock, out, 2, 200), + writeLock(lock, out, 3, 300) + ], expectResult(out, [ + 'write 1 start', + 'write 1 end', + 'write 2 start', + 'write 2 end', + 'write 3 start', + 'write 3 end' + ], done)) + }) + + it('read then write then read', (done) => { + const lock = new GCLock() + const out = [] + parallel([ + readLock(lock, out, 1, 100), + writeLock(lock, out, 1, 100), + readLock(lock, out, 2, 100) + ], expectResult(out, [ + 'read 1 start', + 'read 1 end', + 'write 1 start', + 'write 1 end', + 'read 2 start', + 'read 2 end' + ], done)) + }) + + it('write then read then write', (done) => { + const lock = new GCLock() + const out = [] + parallel([ + writeLock(lock, out, 1, 100), + readLock(lock, out, 1, 100), + writeLock(lock, out, 2, 100) + ], expectResult(out, [ + 'write 1 start', + 'write 1 end', + 'read 1 start', + 'read 1 end', + 'write 2 start', + 'write 2 end' + ], done)) + }) + + it('two simultaneous reads then write then read', (done) => { + const lock = new GCLock() + const out = [] + parallel([ + readLock(lock, out, 1, 100), + readLock(lock, out, 2, 200), + writeLock(lock, out, 1, 100), + readLock(lock, out, 3, 100) + ], expectResult(out, [ + 'read 1 start', + 'read 2 start', + 'read 1 end', + 'read 2 end', + 'write 1 start', + 'write 1 end', + 'read 3 start', + 'read 3 end' + ], done)) + }) + + it('two simultaneous writes then read then write', (done) => { + const lock = new GCLock() + const out = [] + parallel([ + writeLock(lock, out, 1, 100), + writeLock(lock, out, 2, 100), + readLock(lock, out, 1, 100), + writeLock(lock, out, 3, 100) + ], expectResult(out, [ + 'write 1 start', + 'write 1 end', + 'write 2 start', + 'write 2 end', + 'read 1 start', + 'read 1 end', + 'write 3 start', + 'write 3 end' + ], done)) + }) + }) +} + +describe('gc-lock', function () { + runTests('cb style lock', { + readLock: cbReadLock, + writeLock: cbWriteLock + }) + + runTests('pull stream style lock', { + readLock: pullReadLock, + writeLock: pullWriteLock + }) +}) From d772ed75720b42e96564036ce2b5b7c2966903a3 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Thu, 23 May 2019 12:48:24 -0400 Subject: [PATCH 25/39] fix: gc lock error handling --- src/core/components/pin/gc-lock.js | 13 ++-- test/core/gc-lock.spec.js | 110 +++++++++++++++++++++++++++-- 2 files changed, 114 insertions(+), 9 deletions(-) diff --git a/src/core/components/pin/gc-lock.js b/src/core/components/pin/gc-lock.js index 4a427c6d13..f9e0728025 100644 --- a/src/core/components/pin/gc-lock.js +++ b/src/core/components/pin/gc-lock.js @@ -80,8 +80,9 @@ class PullLocker { // Returns a Promise that resolves when the locked piece of code completes locked () { - return new Promise((resolve) => { - this.releaseLock = resolve + return new Promise((resolve, reject) => { + this.releaseLock = (err) => err ? reject(err) : resolve() + log(`[${this.lockId}] ${this.type} (pull) started`) this.emitter.emit(`${this.type} start`, this.lockId) @@ -101,6 +102,10 @@ class PullLocker { this.emitter.emit(`${this.type} request`, this.lockId) // Request the lock this.lock = this.mutex[this.type](() => this.locked()) + // If there is an error, it gets passed through to the caller using + // pull streams, so here we just catch the error and ignore it so + // that there isn't an UnhandledPromiseRejectionWarning + this.lock.catch(() => {}) } // Wait for the mutex to give us permission @@ -111,10 +116,10 @@ class PullLocker { // Releases the lock release () { - return pull.through(null, () => { + return pull.through(null, (err) => { log(`[${this.lockId}] ${this.type} (pull) released`) this.emitter.emit(`${this.type} release`, this.lockId) - this.releaseLock() + this.releaseLock(err) }) } } diff --git a/test/core/gc-lock.spec.js b/test/core/gc-lock.spec.js index 41ea1b0d57..96e9a3c838 100644 --- a/test/core/gc-lock.spec.js +++ b/test/core/gc-lock.spec.js @@ -1,4 +1,3 @@ -/* eslint max-nested-callbacks: ["error", 8] */ /* eslint-env mocha */ 'use strict' @@ -26,6 +25,24 @@ const cbReadLock = (lock, out, id, duration) => { const cbWriteLock = (lock, out, id, duration) => { return cbTakeLock('write', lock, out, id, duration) } +const cbTakeLockError = (type, lock, out, errs, id, duration) => { + return (cb) => lock[type + 'Lock']((lockCb) => { + out.push(`${type} ${id} start`) + setTimeout(() => { + out.push(`${type} ${id} error`) + lockCb(new Error('err')) + }, duration) + }, (err) => { + errs.push(err) + cb() + }) +} +const cbReadLockError = (lock, out, errs, id, duration) => { + return cbTakeLockError('read', lock, out, errs, id, duration) +} +const cbWriteLockError = (lock, out, errs, id, duration) => { + return cbTakeLockError('write', lock, out, errs, id, duration) +} const pullTakeLock = (type, lock, out, id, duration) => { const lockFn = type === 'read' ? 'pullReadLock' : 'pullWriteLock' @@ -60,11 +77,54 @@ const pullReadLock = (lock, out, id, duration) => { const pullWriteLock = (lock, out, id, duration) => { return pullTakeLock('write', lock, out, id, duration) } +const pullTakeLockError = (type, lock, out, errs, id, duration) => { + const lockFn = type === 'read' ? 'pullReadLock' : 'pullWriteLock' + const vals = ['a', 'b', 'c'] + return (cb) => { + pull( + pull.values(vals), + lock[lockFn](() => { + let started = false + return pull( + pull.through((i) => { + if (!started) { + out.push(`${type} ${id} start`) + started = true + } + }), + pull.asyncMap((i, cb) => { + setTimeout(() => cb(new Error('err')), duration) + }) + ) + }), + pull.collect((err) => { + out.push(`${type} ${id} error`) + errs.push(err) + cb() + }) + ) + } +} +const pullReadLockError = (lock, out, errs, id, duration) => { + return pullTakeLockError('read', lock, out, errs, id, duration) +} +const pullWriteLockError = (lock, out, errs, id, duration) => { + return pullTakeLockError('write', lock, out, errs, id, duration) +} -const expectResult = (out, exp, done) => { +const expectResult = (out, exp, errs, expErrCount, done) => { + if (typeof errs === 'function') { + done = errs + } return () => { try { expect(out).to.eql(exp) + if (typeof expErrCount === 'number') { + expect(errs.length).to.eql(expErrCount) + for (const e of errs) { + expect(e.message).to.eql('err') + } + } } catch (err) { return done(err) } @@ -72,7 +132,7 @@ const expectResult = (out, exp, done) => { } } -const runTests = (suiteName, { readLock, writeLock }) => { +const runTests = (suiteName, { readLock, writeLock, readLockError, writeLockError }) => { describe(suiteName, () => { it('multiple simultaneous reads', (done) => { const lock = new GCLock() @@ -181,17 +241,57 @@ const runTests = (suiteName, { readLock, writeLock }) => { 'write 3 end' ], done)) }) + + it('simultaneous reads with error then write', (done) => { + const lock = new GCLock() + const out = [] + const errs = [] + parallel([ + readLockError(lock, out, errs, 1, 100), + readLock(lock, out, 2, 200), + writeLock(lock, out, 1, 100) + ], expectResult(out, [ + 'read 1 start', + 'read 2 start', + 'read 1 error', + 'read 2 end', + 'write 1 start', + 'write 1 end' + ], errs, 1, done)) + }) + + it('simultaneous writes with error then read', (done) => { + const lock = new GCLock() + const out = [] + const errs = [] + parallel([ + writeLockError(lock, out, errs, 1, 100), + writeLock(lock, out, 2, 100), + readLock(lock, out, 1, 100) + ], expectResult(out, [ + 'write 1 start', + 'write 1 error', + 'write 2 start', + 'write 2 end', + 'read 1 start', + 'read 1 end' + ], errs, 1, done)) + }) }) } describe('gc-lock', function () { runTests('cb style lock', { readLock: cbReadLock, - writeLock: cbWriteLock + writeLock: cbWriteLock, + readLockError: cbReadLockError, + writeLockError: cbWriteLockError }) runTests('pull stream style lock', { readLock: pullReadLock, - writeLock: pullWriteLock + writeLock: pullWriteLock, + readLockError: pullReadLockError, + writeLockError: pullWriteLockError }) }) From 45db74d2ea0121fcb990f9d80675429cad936184 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Thu, 23 May 2019 18:15:32 -0400 Subject: [PATCH 26/39] fix: gc - take pin lock after resolve --- src/core/components/pin.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/core/components/pin.js b/src/core/components/pin.js index 274013c791..dd1dfd0fb3 100644 --- a/src/core/components/pin.js +++ b/src/core/components/pin.js @@ -36,7 +36,7 @@ module.exports = (self) => { resolvePath(self.object, paths, (err, mhs) => { if (err) { return callback(err) } - const pin = (pinComplete) => { + const pinAdd = (pinComplete) => { // verify that each hash can be pinned map(mhs, (multihash, cb) => { const key = toB58String(multihash) @@ -89,9 +89,9 @@ module.exports = (self) => { // is complete, so don't take a second lock here const lock = options.lock !== false if (lock) { - self._gcLock.readLock(pin, callback) + self._gcLock.readLock(pinAdd, callback) } else { - pin(callback) + pinAdd(callback) } }) }), From f6f9a720e3288dd738b3ccd28f048b13683707e7 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Thu, 23 May 2019 19:12:19 -0400 Subject: [PATCH 27/39] fix: make sure each GCLock instance uses distinct mutex --- src/core/components/pin/gc-lock.js | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/core/components/pin/gc-lock.js b/src/core/components/pin/gc-lock.js index f9e0728025..3ec3256111 100644 --- a/src/core/components/pin/gc-lock.js +++ b/src/core/components/pin/gc-lock.js @@ -8,7 +8,13 @@ const log = require('debug')('ipfs:gc:lock') class GCLock extends EventEmitter { constructor () { super() - this.mutex = mortice() + + // Ensure that we get a different mutex for each instance of GCLock + // (There should only be one GCLock instance per IPFS instance, but + // there may be multiple IPFS instances, eg in unit tests) + const randId = (~~(Math.random() * 1e9)).toString(36) + Date.now() + this.mutex = mortice(randId) + this.lockId = 0 } From 3a185d53d006ff0c836b271fd5173b6a0cba8a77 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Fri, 24 May 2019 11:07:58 -0400 Subject: [PATCH 28/39] fix: choose non-overlapping port for GC test --- test/core/gc.spec.js | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/test/core/gc.spec.js b/test/core/gc.spec.js index a669c3dd03..790b8a4068 100644 --- a/test/core/gc.spec.js +++ b/test/core/gc.spec.js @@ -11,7 +11,7 @@ const IPFS = require('../../src/core') const createTempRepo = require('../utils/create-repo-nodejs') const pEvent = require('p-event') -describe('gc', function () { +describe('pin-gc', function () { const fixtures = [{ path: 'test/my/path1', content: Buffer.from('path1') @@ -35,7 +35,13 @@ describe('gc', function () { ipfs = new IPFS({ repo, config: { - Bootstrap: [] + Bootstrap: [], + Addresses: { + Swarm: [ + // Pick a port that doesn't overlap with the test in pin.js + '/ip4/0.0.0.0/tcp/4102' + ] + } } }) ipfs.on('ready', done) From 0af694af6e11c57332e64f0374babe2735fe979e Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Fri, 24 May 2019 12:55:46 -0400 Subject: [PATCH 29/39] fix: better gc test port config --- test/core/gc.spec.js | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/test/core/gc.spec.js b/test/core/gc.spec.js index 790b8a4068..1ab371ce9f 100644 --- a/test/core/gc.spec.js +++ b/test/core/gc.spec.js @@ -7,11 +7,12 @@ const dirtyChai = require('dirty-chai') const expect = chai.expect chai.use(dirtyChai) +const isNode = require('detect-node') +const pEvent = require('p-event') const IPFS = require('../../src/core') const createTempRepo = require('../utils/create-repo-nodejs') -const pEvent = require('p-event') -describe('pin-gc', function () { +describe('gc', function () { const fixtures = [{ path: 'test/my/path1', content: Buffer.from('path1') @@ -32,18 +33,13 @@ describe('pin-gc', function () { before(function (done) { this.timeout(20 * 1000) repo = createTempRepo() - ipfs = new IPFS({ - repo, - config: { - Bootstrap: [], - Addresses: { - Swarm: [ - // Pick a port that doesn't overlap with the test in pin.js - '/ip4/0.0.0.0/tcp/4102' - ] - } + let config = { Bootstrap: [] } + if (isNode) { + config.Addresses = { + Swarm: ['/ip4/127.0.0.1/tcp/0'] } - }) + } + ipfs = new IPFS({ repo, config }) ipfs.on('ready', done) }) From ffb4eb323e597ac545c4b28dcafce6e11b5557f3 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Fri, 24 May 2019 13:48:59 -0400 Subject: [PATCH 30/39] test: increase timeout for repo gc test --- test/cli/repo.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/cli/repo.js b/test/cli/repo.js index 9fd5d8f19d..b49c6ea396 100644 --- a/test/cli/repo.js +++ b/test/cli/repo.js @@ -21,7 +21,9 @@ describe('repo', () => runOnAndOff((thing) => { }) // Note: There are more comprehensive GC tests in interface-js-ipfs-core - it('should run garbage collection', async () => { + it('should run garbage collection', async function () { + this.timeout(60000) + // Add a file to IPFS const cid = (await ipfs(`add -Q ${fixturePath}`)).trim() From 9dd14021646c0c81db569db98955336d513edce8 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Tue, 28 May 2019 14:07:46 -0400 Subject: [PATCH 31/39] fix: webworkers + mortice --- src/core/components/pin/gc-lock.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/core/components/pin/gc-lock.js b/src/core/components/pin/gc-lock.js index 3ec3256111..8601c47da8 100644 --- a/src/core/components/pin/gc-lock.js +++ b/src/core/components/pin/gc-lock.js @@ -13,7 +13,9 @@ class GCLock extends EventEmitter { // (There should only be one GCLock instance per IPFS instance, but // there may be multiple IPFS instances, eg in unit tests) const randId = (~~(Math.random() * 1e9)).toString(36) + Date.now() - this.mutex = mortice(randId) + this.mutex = mortice(randId, { + singleProcess: true + }) this.lockId = 0 } From f35f492c02ca97bccde09c5263e36b5f9744bd3e Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Tue, 28 May 2019 15:30:10 -0400 Subject: [PATCH 32/39] chore: refactor mortice options --- src/core/components/pin/gc-lock.js | 4 ++-- src/core/index.js | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/core/components/pin/gc-lock.js b/src/core/components/pin/gc-lock.js index 8601c47da8..2dc540b8fb 100644 --- a/src/core/components/pin/gc-lock.js +++ b/src/core/components/pin/gc-lock.js @@ -6,7 +6,7 @@ const EventEmitter = require('events') const log = require('debug')('ipfs:gc:lock') class GCLock extends EventEmitter { - constructor () { + constructor (repoOwner) { super() // Ensure that we get a different mutex for each instance of GCLock @@ -14,7 +14,7 @@ class GCLock extends EventEmitter { // there may be multiple IPFS instances, eg in unit tests) const randId = (~~(Math.random() * 1e9)).toString(36) + Date.now() this.mutex = mortice(randId, { - singleProcess: true + singleProcess: repoOwner }) this.lockId = 0 diff --git a/src/core/index.js b/src/core/index.js index 7119a6df4c..c1e660f076 100644 --- a/src/core/index.js +++ b/src/core/index.js @@ -80,7 +80,7 @@ class IPFS extends EventEmitter { this._ipns = undefined // eslint-disable-next-line no-console this._print = this._options.silent ? this.log : console.log - this._gcLock = new GCLock() + this._gcLock = new GCLock(this._options.repoOwner) // IPFS Core exposed components // - for booting up a node From b76737cf35a51e168503ee49b2074f092ac779ab Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Tue, 28 May 2019 15:32:59 -0400 Subject: [PATCH 33/39] fix: gc rm test on Windows --- test/core/gc.spec.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/core/gc.spec.js b/test/core/gc.spec.js index 1ab371ce9f..cf9593c6a6 100644 --- a/test/core/gc.spec.js +++ b/test/core/gc.spec.js @@ -211,7 +211,8 @@ describe('gc', function () { const cid2 = (await ipfs.block.put(Buffer.from('block to pin rm 2'), null)).cid // Pin blocks - await Promise.all([ipfs.pin.add(cid1), ipfs.pin.add(cid2)]) + await ipfs.pin.add(cid1) + await ipfs.pin.add(cid2) // Unpin first block // Note: pin rm will take a read lock From e31650c22d9327b6ce9ebfabc2a0bdfcd2a65c42 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Fri, 7 Jun 2019 15:40:45 -0400 Subject: [PATCH 34/39] fix: ensure gc filters all internal pins --- src/core/components/pin/pin-manager.js | 14 ++++---- src/core/components/pin/pin-set.js | 29 ++++++++++++--- test/core/pin-set.js | 50 ++++++++++++++++++++++++-- 3 files changed, 81 insertions(+), 12 deletions(-) diff --git a/src/core/components/pin/pin-manager.js b/src/core/components/pin/pin-manager.js index 912ddd5a41..cecad5f37d 100644 --- a/src/core/components/pin/pin-manager.js +++ b/src/core/components/pin/pin-manager.js @@ -14,7 +14,6 @@ const errCode = require('err-code') const multicodec = require('multicodec') const createPinSet = require('./pin-set') -const { EMPTY_KEY_HASH } = createPinSet // arbitrary limit to the number of concurrent dag operations const concurrencyLimit = 300 @@ -275,15 +274,18 @@ class PinManager { return callback(new Error(`Could not get pin sets from store: ${err.message}`)) } - // "Empty block" used by the pinner - const emptyBlockCid = new CID(EMPTY_KEY_HASH) - // The pinner stores an object that has two links to pin sets: // 1. The directly pinned CIDs // 2. The recursively pinned CIDs - const pinSetCids = [cid, ...obj.value.Links.map(l => l.Hash)] + // If large enough, these pin sets may have links to buckets to hold + // the pins + this.pinset.getInternalCids(obj.value, (err, cids) => { + if (err) { + return callback(new Error(`Could not get pinner internal cids: ${err.message}`)) + } - callback(null, pinSetCids.concat([emptyBlockCid])) + callback(null, cids.concat(cid)) + }) }) }) } diff --git a/src/core/components/pin/pin-set.js b/src/core/components/pin/pin-set.js index 5dc0564a13..fd29be54bf 100644 --- a/src/core/components/pin/pin-set.js +++ b/src/core/components/pin/pin-set.js @@ -8,6 +8,7 @@ const varint = require('varint') const { DAGNode, DAGLink } = require('ipld-dag-pb') const multicodec = require('multicodec') const someSeries = require('async/someSeries') +const eachSeries = require('async/eachSeries') const eachOfSeries = require('async/eachOfSeries') const pbSchema = require('./pin.proto') @@ -239,6 +240,10 @@ exports = module.exports = function (dag) { }, walkItems: (node, step, callback) => { + pinSet.walkAll(node, step, () => {}, callback) + }, + + walkAll: (node, stepPin, stepBin, callback) => { let pbh try { pbh = readHeader(node) @@ -253,22 +258,38 @@ exports = module.exports = function (dag) { const linkHash = link.Hash.buffer if (!emptyKey.equals(linkHash)) { + stepBin(link, idx, pbh.data) + // walk the links of this fanout bin return dag.get(linkHash, '', { preload: false }, (err, res) => { if (err) { return eachCb(err) } - pinSet.walkItems(res.value, step, eachCb) + pinSet.walkItems(res.value, stepBin, eachCb) }) } } else { // otherwise, the link is a pin - step(link, idx, pbh.data) + stepPin(link, idx, pbh.data) } eachCb(null) }, callback) + }, + + getInternalCids: (rootNode, callback) => { + // "Empty block" used by the pinner + const cids = [new CID(emptyKey)] + + const step = link => cids.push(link.Hash) + eachSeries(rootNode.Links, (topLevelLink, cb) => { + cids.push(topLevelLink.Hash) + + dag.get(topLevelLink.Hash, '', { preload: false }, (err, res) => { + if (err) { return cb(err) } + + pinSet.walkAll(res.value, () => {}, step, cb) + }) + }, (err) => callback(err, cids)) } } return pinSet } - -module.exports.EMPTY_KEY_HASH = emptyKeyHash diff --git a/test/core/pin-set.js b/test/core/pin-set.js index b8eb97e243..190981afd6 100644 --- a/test/core/pin-set.js +++ b/test/core/pin-set.js @@ -22,6 +22,7 @@ const IPFS = require('../../src/core') const createPinSet = require('../../src/core/components/pin/pin-set') const createTempRepo = require('../utils/create-repo-nodejs') +const emptyKeyHash = 'QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n' const defaultFanout = 256 const maxItems = 8192 @@ -180,12 +181,12 @@ describe('pinSet', function () { }) }) - describe('walkItems', function () { + describe('walkAll', function () { it(`fails if node doesn't have a pin-set protobuf header`, function (done) { createNode('datum', (err, node) => { expect(err).to.not.exist() - pinSet.walkItems(node, () => {}, (err, res) => { + pinSet.walkAll(node, () => {}, () => {}, (err, res) => { expect(err).to.exist() expect(res).to.not.exist() done() @@ -193,6 +194,29 @@ describe('pinSet', function () { }) }) + it('visits all links of a root node', function (done) { + this.timeout(90 * 1000) + + const seen = [] + const walker = (link, idx, data) => seen.push({ link, idx, data }) + + createNodes(maxItems + 1, (err, nodes) => { + expect(err).to.not.exist() + + pinSet.storeSet(nodes, (err, result) => { + expect(err).to.not.exist() + + pinSet.walkAll(result.node, () => {}, walker, err => { + expect(err).to.not.exist() + expect(seen).to.have.length(maxItems + defaultFanout + 1) + done() + }) + }) + }) + }) + }) + + describe('walkItems', function () { it('visits all non-fanout links of a root node', function (done) { const seen = [] const walker = (link, idx, data) => seen.push({ link, idx, data }) @@ -217,4 +241,26 @@ describe('pinSet', function () { }) }) }) + + describe('getInternalCids', function () { + it('gets all links and empty key CID', function (done) { + createNodes(defaultFanout, (err, nodes) => { + expect(err).to.not.exist() + + pinSet.storeSet(nodes, (err, result) => { + expect(err).to.not.exist() + + const rootNode = DAGNode.create('pins', [{ Hash: result.cid }]) + pinSet.getInternalCids(rootNode, (err, cids) => { + expect(err).to.not.exist() + expect(cids.length).to.eql(2) + const cidStrs = cids.map(c => c.toString()) + expect(cidStrs).includes(emptyKeyHash) + expect(cidStrs).includes(result.cid.toString()) + done() + }) + }) + }) + }) + }) }) From 830b9a18dec2fc8f5c9d04f17da1c049289c7c6b Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Fri, 7 Jun 2019 15:43:58 -0400 Subject: [PATCH 35/39] test: enable gc tests over ipfs-http-client --- test/http-api/interface.js | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/test/http-api/interface.js b/test/http-api/interface.js index 43c6ff6a12..5afbce1f7d 100644 --- a/test/http-api/interface.js +++ b/test/http-api/interface.js @@ -109,15 +109,7 @@ describe('interface-ipfs-core over ipfs-http-client tests', () => { } })) - tests.repo(defaultCommonFactory, { - skip: [ - // repo.gc - { - name: 'gc', - reason: 'TODO: repo.gc is not implemented in js-ipfs yet!' - } - ] - }) + tests.repo(defaultCommonFactory) tests.stats(defaultCommonFactory) From 280f987845b703cdb0c8255ce1911ab29e543954 Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Mon, 10 Jun 2019 09:55:56 -0400 Subject: [PATCH 36/39] chore: better gc logging --- src/core/components/pin/gc.js | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/core/components/pin/gc.js b/src/core/components/pin/gc.js index 4ac98393d5..342fb75f91 100644 --- a/src/core/components/pin/gc.js +++ b/src/core/components/pin/gc.js @@ -52,7 +52,9 @@ function createMarkedSet (ipfs, callback) { return cb(new Error(`Could not list pinned blocks: ${err.message}`)) } log(`Found ${pins.length} pinned blocks`) - cb(null, pins.map(p => new CID(p.hash))) + const cids = pins.map(p => new CID(p.hash)) + // log(' ' + cids.join('\n ')) + cb(null, cids) }), // Blocks used internally by the pinner @@ -61,6 +63,7 @@ function createMarkedSet (ipfs, callback) { return cb(new Error(`Could not list pinner internal blocks: ${err.message}`)) } log(`Found ${cids.length} pinner internal blocks`) + // log(' ' + cids.join('\n ')) cb(null, cids) }), @@ -92,8 +95,10 @@ function getDescendants (ipfs, cid, callback) { if (err) { return callback(new Error(`Could not get MFS root descendants from store: ${err.message}`)) } - log(`Found ${1 + refs.length} MFS blocks`) - callback(null, [cid, ...refs.map(r => new CID(r.ref))]) + const cids = [cid, ...refs.map(r => new CID(r.ref))] + log(`Found ${cids.length} MFS blocks`) + // log(' ' + cids.join('\n ')) + callback(null, cids) }) } @@ -119,9 +124,10 @@ function deleteUnmarkedBlocks (ipfs, markedSet, blocks, start, callback) { } } - const msg = `Marked set has ${markedSet.size} blocks. Blockstore has ${blocks.length} blocks. ` + + const msg = `Marked set has ${markedSet.size} unique blocks. Blockstore has ${blocks.length} blocks. ` + `Deleting ${unreferenced.length} blocks.` + (errCount ? ` (${errCount} errors)` : '') log(msg) + // log(' ' + unreferenced.join('\n ')) mapLimit(unreferenced, BLOCK_RM_CONCURRENCY, (cid, cb) => { // Delete blocks from blockstore From 232fc39dd720cbbea28b6f05229189d5d66a90ef Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Fri, 14 Jun 2019 12:21:32 -0400 Subject: [PATCH 37/39] fix: pin walking --- src/core/components/pin/pin-set.js | 2 +- test/core/pin-set.js | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/core/components/pin/pin-set.js b/src/core/components/pin/pin-set.js index fd29be54bf..c3ef75d057 100644 --- a/src/core/components/pin/pin-set.js +++ b/src/core/components/pin/pin-set.js @@ -263,7 +263,7 @@ exports = module.exports = function (dag) { // walk the links of this fanout bin return dag.get(linkHash, '', { preload: false }, (err, res) => { if (err) { return eachCb(err) } - pinSet.walkItems(res.value, stepBin, eachCb) + pinSet.walkAll(res.value, stepPin, stepBin, eachCb) }) } } else { diff --git a/test/core/pin-set.js b/test/core/pin-set.js index 190981afd6..039376e657 100644 --- a/test/core/pin-set.js +++ b/test/core/pin-set.js @@ -197,8 +197,10 @@ describe('pinSet', function () { it('visits all links of a root node', function (done) { this.timeout(90 * 1000) - const seen = [] - const walker = (link, idx, data) => seen.push({ link, idx, data }) + const seenPins = [] + const walkerPin = (link, idx, data) => seenPins.push({ link, idx, data }) + const seenBins = [] + const walkerBin = (link, idx, data) => seenBins.push({ link, idx, data }) createNodes(maxItems + 1, (err, nodes) => { expect(err).to.not.exist() @@ -206,9 +208,10 @@ describe('pinSet', function () { pinSet.storeSet(nodes, (err, result) => { expect(err).to.not.exist() - pinSet.walkAll(result.node, () => {}, walker, err => { + pinSet.walkAll(result.node, walkerPin, walkerBin, err => { expect(err).to.not.exist() - expect(seen).to.have.length(maxItems + defaultFanout + 1) + expect(seenPins).to.have.length(maxItems + 1) + expect(seenBins).to.have.length(defaultFanout) done() }) }) From 6b45fea5c2441bfa12f343644d73e3e12529f2cc Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Fri, 21 Jun 2019 15:48:58 -0400 Subject: [PATCH 38/39] refactor: pin set walking --- src/core/components/pin/pin-set.js | 16 ++++++---------- test/core/pin-set.js | 16 +++++++--------- 2 files changed, 13 insertions(+), 19 deletions(-) diff --git a/src/core/components/pin/pin-set.js b/src/core/components/pin/pin-set.js index c3ef75d057..04a389bf2c 100644 --- a/src/core/components/pin/pin-set.js +++ b/src/core/components/pin/pin-set.js @@ -231,19 +231,15 @@ exports = module.exports = function (dag) { dag.get(link.Hash, '', { preload: false }, (err, res) => { if (err) { return callback(err) } const keys = [] - const step = link => keys.push(link.Hash.buffer) - pinSet.walkItems(res.value, step, err => { + const stepPin = link => keys.push(link.Hash.buffer) + pinSet.walkItems(res.value, { stepPin }, err => { if (err) { return callback(err) } return callback(null, keys) }) }) }, - walkItems: (node, step, callback) => { - pinSet.walkAll(node, step, () => {}, callback) - }, - - walkAll: (node, stepPin, stepBin, callback) => { + walkItems: (node, { stepPin = () => {}, stepBin = () => {} }, callback) => { let pbh try { pbh = readHeader(node) @@ -263,7 +259,7 @@ exports = module.exports = function (dag) { // walk the links of this fanout bin return dag.get(linkHash, '', { preload: false }, (err, res) => { if (err) { return eachCb(err) } - pinSet.walkAll(res.value, stepPin, stepBin, eachCb) + pinSet.walkItems(res.value, { stepPin, stepBin }, eachCb) }) } } else { @@ -279,14 +275,14 @@ exports = module.exports = function (dag) { // "Empty block" used by the pinner const cids = [new CID(emptyKey)] - const step = link => cids.push(link.Hash) + const stepBin = link => cids.push(link.Hash) eachSeries(rootNode.Links, (topLevelLink, cb) => { cids.push(topLevelLink.Hash) dag.get(topLevelLink.Hash, '', { preload: false }, (err, res) => { if (err) { return cb(err) } - pinSet.walkAll(res.value, () => {}, step, cb) + pinSet.walkItems(res.value, { stepBin }, cb) }) }, (err) => callback(err, cids)) } diff --git a/test/core/pin-set.js b/test/core/pin-set.js index 039376e657..73bfb69044 100644 --- a/test/core/pin-set.js +++ b/test/core/pin-set.js @@ -181,12 +181,12 @@ describe('pinSet', function () { }) }) - describe('walkAll', function () { + describe('walkItems', function () { it(`fails if node doesn't have a pin-set protobuf header`, function (done) { createNode('datum', (err, node) => { expect(err).to.not.exist() - pinSet.walkAll(node, () => {}, () => {}, (err, res) => { + pinSet.walkItems(node, {}, (err, res) => { expect(err).to.exist() expect(res).to.not.exist() done() @@ -198,9 +198,9 @@ describe('pinSet', function () { this.timeout(90 * 1000) const seenPins = [] - const walkerPin = (link, idx, data) => seenPins.push({ link, idx, data }) + const stepPin = (link, idx, data) => seenPins.push({ link, idx, data }) const seenBins = [] - const walkerBin = (link, idx, data) => seenBins.push({ link, idx, data }) + const stepBin = (link, idx, data) => seenBins.push({ link, idx, data }) createNodes(maxItems + 1, (err, nodes) => { expect(err).to.not.exist() @@ -208,7 +208,7 @@ describe('pinSet', function () { pinSet.storeSet(nodes, (err, result) => { expect(err).to.not.exist() - pinSet.walkAll(result.node, walkerPin, walkerBin, err => { + pinSet.walkItems(result.node, { stepPin, stepBin }, err => { expect(err).to.not.exist() expect(seenPins).to.have.length(maxItems + 1) expect(seenBins).to.have.length(defaultFanout) @@ -217,12 +217,10 @@ describe('pinSet', function () { }) }) }) - }) - describe('walkItems', function () { it('visits all non-fanout links of a root node', function (done) { const seen = [] - const walker = (link, idx, data) => seen.push({ link, idx, data }) + const stepPin = (link, idx, data) => seen.push({ link, idx, data }) createNodes(defaultFanout, (err, nodes) => { expect(err).to.not.exist() @@ -230,7 +228,7 @@ describe('pinSet', function () { pinSet.storeSet(nodes, (err, result) => { expect(err).to.not.exist() - pinSet.walkItems(result.node, walker, err => { + pinSet.walkItems(result.node, { stepPin }, err => { expect(err).to.not.exist() expect(seen).to.have.length(defaultFanout) expect(seen[0].idx).to.eql(defaultFanout) From fbbba1d186c3d6d43da0ac6a352cd8cc9506254f Mon Sep 17 00:00:00 2001 From: Dirk McCormick Date: Tue, 11 Jun 2019 11:22:36 -0400 Subject: [PATCH 39/39] feat: add locking for concurrent pin operations --- src/core/components/pin.js | 41 ++-- .../components/pin/{gc-lock.js => lock.js} | 30 +-- src/core/components/pin/pin-manager.js | 219 +++++++++++------- src/core/index.js | 4 +- test/core/{gc-lock.spec.js => lock.spec.js} | 20 +- test/core/pin.js | 38 +++ 6 files changed, 216 insertions(+), 136 deletions(-) rename src/core/components/pin/{gc-lock.js => lock.js} (83%) rename test/core/{gc-lock.spec.js => lock.spec.js} (95%) diff --git a/src/core/components/pin.js b/src/core/components/pin.js index dd1dfd0fb3..40d661e9a0 100644 --- a/src/core/components/pin.js +++ b/src/core/components/pin.js @@ -20,7 +20,7 @@ function toB58String (hash) { module.exports = (self) => { const dag = self.dag - const pinManager = new PinManager(self._repo, dag, self.log) + const pinManager = new PinManager(self._repo, dag, self._options.repoOwner, self.log) const pin = { add: promisify((paths, options, callback) => { @@ -43,7 +43,7 @@ module.exports = (self) => { if (recursive) { if (pinManager.recursivePins.has(key)) { // it's already pinned recursively - return cb(null, key) + return cb(null, null) } // entire graph of nested links should be pinned, @@ -60,7 +60,7 @@ module.exports = (self) => { } if (pinManager.directPins.has(key)) { // already directly pinned - return cb(null, key) + return cb(null, null) } // make sure we have the object @@ -73,15 +73,20 @@ module.exports = (self) => { }, (err, results) => { if (err) { return pinComplete(err) } - // update the pin sets in memory - const pinset = recursive ? pinManager.recursivePins : pinManager.directPins - results.forEach(key => pinset.add(key)) - - // persist updated pin sets to datastore - pinManager.flushPins((err, root) => { + const flushComplete = (err) => { if (err) { return pinComplete(err) } - pinComplete(null, results.map(hash => ({ hash }))) - }) + pinComplete(null, mhs.map(mh => ({ hash: toB58String(mh) }))) + } + + // each result is either a key or null if there is already a pin + results = results.filter(Boolean) + if (!results.length) { return flushComplete() } + + if (recursive) { + pinManager.addRecursivePins(results, flushComplete) + } else { + pinManager.addDirectPins(results, flushComplete) + } }) } @@ -143,20 +148,10 @@ module.exports = (self) => { }, (err, results) => { if (err) { return lockCb(err) } - // update the pin sets in memory - results.forEach(key => { - if (recursive && pinManager.recursivePins.has(key)) { - pinManager.recursivePins.delete(key) - } else { - pinManager.directPins.delete(key) - } - }) - - // persist updated pin sets to datastore - pinManager.flushPins((err, root) => { + pinManager.rmPins(results, recursive, (err) => { if (err) { return lockCb(err) } self.log(`Removed pins: ${results}`) - lockCb(null, results.map(hash => ({ hash }))) + lockCb(null, mhs.map(mh => ({ hash: toB58String(mh) }))) }) }) }, callback) diff --git a/src/core/components/pin/gc-lock.js b/src/core/components/pin/lock.js similarity index 83% rename from src/core/components/pin/gc-lock.js rename to src/core/components/pin/lock.js index 2dc540b8fb..3b33f10297 100644 --- a/src/core/components/pin/gc-lock.js +++ b/src/core/components/pin/lock.js @@ -3,21 +3,20 @@ const mortice = require('mortice') const pull = require('pull-stream') const EventEmitter = require('events') -const log = require('debug')('ipfs:gc:lock') +const debug = require('debug') -class GCLock extends EventEmitter { - constructor (repoOwner) { +class Lock extends EventEmitter { + constructor (repoOwner, debugName) { super() - // Ensure that we get a different mutex for each instance of GCLock - // (There should only be one GCLock instance per IPFS instance, but - // there may be multiple IPFS instances, eg in unit tests) + // Ensure that we get a different mutex for each instance of Lock const randId = (~~(Math.random() * 1e9)).toString(36) + Date.now() this.mutex = mortice(randId, { singleProcess: repoOwner }) this.lockId = 0 + this.log = debug(debugName || 'lock') } readLock (lockedFn, cb) { @@ -37,14 +36,14 @@ class GCLock extends EventEmitter { } const lockId = this.lockId++ - log(`[${lockId}] ${type} requested`) + this.log(`[${lockId}] ${type} requested`) this.emit(`${type} request`, lockId) const locked = () => new Promise((resolve, reject) => { this.emit(`${type} start`, lockId) - log(`[${lockId}] ${type} started`) + this.log(`[${lockId}] ${type} started`) lockedFn((err, res) => { this.emit(`${type} release`, lockId) - log(`[${lockId}] ${type} released`) + this.log(`[${lockId}] ${type} released`) err ? reject(err) : resolve(res) }) }) @@ -62,7 +61,7 @@ class GCLock extends EventEmitter { } pullLock (type, lockedPullFn) { - const pullLocker = new PullLocker(this, this.mutex, type, this.lockId++) + const pullLocker = new PullLocker(this, this.mutex, type, this.lockId++, this.log) return pull( pullLocker.take(), @@ -73,11 +72,12 @@ class GCLock extends EventEmitter { } class PullLocker { - constructor (emitter, mutex, type, lockId) { + constructor (emitter, mutex, type, lockId, log) { this.emitter = emitter this.mutex = mutex this.type = type this.lockId = lockId + this.log = log // This Promise resolves when the mutex gives us permission to start // running the locked piece of code @@ -91,7 +91,7 @@ class PullLocker { return new Promise((resolve, reject) => { this.releaseLock = (err) => err ? reject(err) : resolve() - log(`[${this.lockId}] ${this.type} (pull) started`) + this.log(`[${this.lockId}] ${this.type} (pull) started`) this.emitter.emit(`${this.type} start`, this.lockId) // The locked piece of code is ready to start, so resolve the @@ -106,7 +106,7 @@ class PullLocker { return pull( pull.asyncMap((i, cb) => { if (!this.lock) { - log(`[${this.lockId}] ${this.type} (pull) requested`) + this.log(`[${this.lockId}] ${this.type} (pull) requested`) this.emitter.emit(`${this.type} request`, this.lockId) // Request the lock this.lock = this.mutex[this.type](() => this.locked()) @@ -125,11 +125,11 @@ class PullLocker { // Releases the lock release () { return pull.through(null, (err) => { - log(`[${this.lockId}] ${this.type} (pull) released`) + this.log(`[${this.lockId}] ${this.type} (pull) released`) this.emitter.emit(`${this.type} release`, this.lockId) this.releaseLock(err) }) } } -module.exports = GCLock +module.exports = Lock diff --git a/src/core/components/pin/pin-manager.js b/src/core/components/pin/pin-manager.js index cecad5f37d..fba7aeaf49 100644 --- a/src/core/components/pin/pin-manager.js +++ b/src/core/components/pin/pin-manager.js @@ -14,6 +14,7 @@ const errCode = require('err-code') const multicodec = require('multicodec') const createPinSet = require('./pin-set') +const Lock = require('./lock') // arbitrary limit to the number of concurrent dag operations const concurrencyLimit = 300 @@ -36,13 +37,14 @@ const PinTypes = { } class PinManager { - constructor (repo, dag, log) { + constructor (repo, dag, repoOwner, log) { this.repo = repo this.dag = dag this.log = log this.pinset = createPinSet(dag) this.directPins = new Set() this.recursivePins = new Set() + this._lock = new Lock(repoOwner, 'ipfs:pin-manager:lock') } directKeys () { @@ -54,39 +56,78 @@ class PinManager { } getIndirectKeys (callback) { - const indirectKeys = new Set() - eachLimit(this.recursiveKeys(), concurrencyLimit, (multihash, cb) => { - this.dag._getRecursive(multihash, (err, nodes) => { - if (err) { - return cb(err) - } - - map(nodes, (node, cb) => util.cid(util.serialize(node), { - cidVersion: 0 - }).then(cid => cb(null, cid), cb), (err, cids) => { + this._lock.readLock((lockCb) => { + const indirectKeys = new Set() + eachLimit(this.recursiveKeys(), concurrencyLimit, (multihash, cb) => { + this.dag._getRecursive(multihash, (err, nodes) => { if (err) { return cb(err) } - cids - .map(cid => cid.toString()) - // recursive pins pre-empt indirect pins - .filter(key => !this.recursivePins.has(key)) - .forEach(key => indirectKeys.add(key)) - - cb() + map(nodes, (node, cb) => util.cid(util.serialize(node), { + cidVersion: 0 + }).then(cid => cb(null, cid), cb), (err, cids) => { + if (err) { + return cb(err) + } + + cids + .map(cid => cid.toString()) + // recursive pins pre-empt indirect pins + .filter(key => !this.recursivePins.has(key)) + .forEach(key => indirectKeys.add(key)) + + cb() + }) }) + }, (err) => { + if (err) { return lockCb(err) } + lockCb(null, Array.from(indirectKeys)) }) - }, (err) => { - if (err) { return callback(err) } - callback(null, Array.from(indirectKeys)) - }) + }, callback) + } + + addRecursivePins (keys, callback) { + this._addPins(keys, this.recursivePins, callback) + } + + addDirectPins (keys, callback) { + this._addPins(keys, this.directPins, callback) + } + + _addPins (keys, pinSet, callback) { + this._lock.writeLock((lockCb) => { + keys = keys.filter(key => !pinSet.has(key)) + if (!keys.length) return lockCb(null, []) + + for (const key of keys) { + pinSet.add(key) + } + this._flushPins(lockCb) + }, callback) + } + + rmPins (keys, recursive, callback) { + if (!keys.length) return callback(null, []) + + this._lock.writeLock((lockCb) => { + for (const key of keys) { + if (recursive && this.recursivePins.has(key)) { + this.recursivePins.delete(key) + } else { + this.directPins.delete(key) + } + } + + this._flushPins(lockCb) + }, callback) } // Encode and write pin key sets to the datastore: // a DAGLink for each of the recursive and direct pinsets // a DAGNode holding those as DAGLinks, a kind of root pin - flushPins (callback) { + // Note: should only be called within a lock + _flushPins (callback) { let dLink, rLink, root series([ // create a DAGLink to the node with direct pins @@ -170,39 +211,41 @@ class PinManager { } load (callback) { - waterfall([ - // hack for CLI tests - (cb) => this.repo.closed ? this.repo.datastore.open(cb) : cb(null, null), - (_, cb) => this.repo.datastore.has(PIN_DS_KEY, cb), - (has, cb) => has ? cb() : cb(new Error('No pins to load')), - (cb) => this.repo.datastore.get(PIN_DS_KEY, cb), - (mh, cb) => { - this.dag.get(new CID(mh), '', { preload: false }, cb) - } - ], (err, pinRoot) => { - if (err) { - if (err.message === 'No pins to load') { - this.log('No pins to load') - return callback() - } else { - return callback(err) + this._lock.writeLock((lockCb) => { + waterfall([ + // hack for CLI tests + (cb) => this.repo.closed ? this.repo.datastore.open(cb) : cb(null, null), + (_, cb) => this.repo.datastore.has(PIN_DS_KEY, cb), + (has, cb) => has ? cb() : cb(new Error('No pins to load')), + (cb) => this.repo.datastore.get(PIN_DS_KEY, cb), + (mh, cb) => { + this.dag.get(new CID(mh), '', { preload: false }, cb) + } + ], (err, pinRoot) => { + if (err) { + if (err.message === 'No pins to load') { + this.log('No pins to load') + return lockCb() + } else { + return lockCb(err) + } } - } - parallel([ - cb => this.pinset.loadSet(pinRoot.value, PinTypes.recursive, cb), - cb => this.pinset.loadSet(pinRoot.value, PinTypes.direct, cb) - ], (err, keys) => { - if (err) { return callback(err) } - const [ rKeys, dKeys ] = keys + parallel([ + cb => this.pinset.loadSet(pinRoot.value, PinTypes.recursive, cb), + cb => this.pinset.loadSet(pinRoot.value, PinTypes.direct, cb) + ], (err, keys) => { + if (err) { return lockCb(err) } + const [ rKeys, dKeys ] = keys - this.directPins = new Set(dKeys.map(toB58String)) - this.recursivePins = new Set(rKeys.map(toB58String)) + this.directPins = new Set(dKeys.map(toB58String)) + this.recursivePins = new Set(rKeys.map(toB58String)) - this.log('Loaded pins from the datastore') - return callback(null) + this.log('Loaded pins from the datastore') + return lockCb(null) + }) }) - }) + }, callback) } isPinnedWithType (multihash, type, callback) { @@ -241,53 +284,57 @@ class PinManager { }) } - // indirect (default) - // check each recursive key to see if multihash is under it - // arbitrary limit, enables handling 1000s of pins. - detectLimit(this.recursiveKeys().map(key => new CID(key)), concurrencyLimit, (cid, cb) => { - waterfall([ - (done) => this.dag.get(cid, '', { preload: false }, done), - (result, done) => done(null, result.value), - (node, done) => this.pinset.hasDescendant(node, key, done) - ], cb) - }, (err, cid) => callback(err, { - key, - pinned: Boolean(cid), - reason: cid - })) + this._lock.readLock((lockCb) => { + // indirect (default) + // check each recursive key to see if multihash is under it + // arbitrary limit, enables handling 1000s of pins. + detectLimit(this.recursiveKeys().map(key => new CID(key)), concurrencyLimit, (cid, cb) => { + waterfall([ + (done) => this.dag.get(cid, '', { preload: false }, done), + (result, done) => done(null, result.value), + (node, done) => this.pinset.hasDescendant(node, key, done) + ], cb) + }, (err, cid) => lockCb(err, { + key, + pinned: Boolean(cid), + reason: cid + })) + }, callback) } // Gets CIDs of blocks used internally by the pinner getInternalBlocks (callback) { - this.repo.datastore.get(PIN_DS_KEY, (err, mh) => { - if (err) { - if (err.code === 'ERR_NOT_FOUND') { - this.log(`No pinned blocks`) - return callback(null, []) - } - return callback(new Error(`Could not get pin sets root from datastore: ${err.message}`)) - } - - const cid = new CID(mh) - this.dag.get(cid, '', { preload: false }, (err, obj) => { + this._lock.writeLock((lockCb) => { + this.repo.datastore.get(PIN_DS_KEY, (err, mh) => { if (err) { - return callback(new Error(`Could not get pin sets from store: ${err.message}`)) + if (err.code === 'ERR_NOT_FOUND') { + this.log(`No pinned blocks`) + return lockCb(null, []) + } + return lockCb(new Error(`Could not get pin sets root from datastore: ${err.message}`)) } - // The pinner stores an object that has two links to pin sets: - // 1. The directly pinned CIDs - // 2. The recursively pinned CIDs - // If large enough, these pin sets may have links to buckets to hold - // the pins - this.pinset.getInternalCids(obj.value, (err, cids) => { + const cid = new CID(mh) + this.dag.get(cid, '', { preload: false }, (err, obj) => { if (err) { - return callback(new Error(`Could not get pinner internal cids: ${err.message}`)) + return lockCb(new Error(`Could not get pin sets from store: ${err.message}`)) } - callback(null, cids.concat(cid)) + // The pinner stores an object that has two links to pin sets: + // 1. The directly pinned CIDs + // 2. The recursively pinned CIDs + // If large enough, these pin sets may have links to buckets to hold + // the pins + this.pinset.getInternalCids(obj.value, (err, cids) => { + if (err) { + return lockCb(new Error(`Could not get pinner internal cids: ${err.message}`)) + } + + lockCb(null, cids.concat(cid)) + }) }) }) - }) + }, callback) } // Returns an error if the pin type is invalid diff --git a/src/core/index.js b/src/core/index.js index c1e660f076..f8f2b53b16 100644 --- a/src/core/index.js +++ b/src/core/index.js @@ -26,7 +26,7 @@ const defaultRepo = require('./runtime/repo-nodejs') const preload = require('./preload') const mfsPreload = require('./mfs-preload') const ipldOptions = require('./runtime/ipld-nodejs') -const GCLock = require('./components/pin/gc-lock') +const Lock = require('./components/pin/lock') class IPFS extends EventEmitter { constructor (options) { @@ -80,7 +80,7 @@ class IPFS extends EventEmitter { this._ipns = undefined // eslint-disable-next-line no-console this._print = this._options.silent ? this.log : console.log - this._gcLock = new GCLock(this._options.repoOwner) + this._gcLock = new Lock(this._options.repoOwner, 'ipfs:gc:lock') // IPFS Core exposed components // - for booting up a node diff --git a/test/core/gc-lock.spec.js b/test/core/lock.spec.js similarity index 95% rename from test/core/gc-lock.spec.js rename to test/core/lock.spec.js index 96e9a3c838..32d2d5792d 100644 --- a/test/core/gc-lock.spec.js +++ b/test/core/lock.spec.js @@ -8,7 +8,7 @@ chai.use(dirtyChai) const parallel = require('async/parallel') const pull = require('pull-stream') -const GCLock = require('../../src/core/components/pin/gc-lock') +const Lock = require('../../src/core/components/pin/lock') const cbTakeLock = (type, lock, out, id, duration) => { return (cb) => lock[type + 'Lock']((lockCb) => { @@ -135,7 +135,7 @@ const expectResult = (out, exp, errs, expErrCount, done) => { const runTests = (suiteName, { readLock, writeLock, readLockError, writeLockError }) => { describe(suiteName, () => { it('multiple simultaneous reads', (done) => { - const lock = new GCLock() + const lock = new Lock() const out = [] parallel([ readLock(lock, out, 1, 100), @@ -152,7 +152,7 @@ const runTests = (suiteName, { readLock, writeLock, readLockError, writeLockErro }) it('multiple simultaneous writes', (done) => { - const lock = new GCLock() + const lock = new Lock() const out = [] parallel([ writeLock(lock, out, 1, 100), @@ -169,7 +169,7 @@ const runTests = (suiteName, { readLock, writeLock, readLockError, writeLockErro }) it('read then write then read', (done) => { - const lock = new GCLock() + const lock = new Lock() const out = [] parallel([ readLock(lock, out, 1, 100), @@ -186,7 +186,7 @@ const runTests = (suiteName, { readLock, writeLock, readLockError, writeLockErro }) it('write then read then write', (done) => { - const lock = new GCLock() + const lock = new Lock() const out = [] parallel([ writeLock(lock, out, 1, 100), @@ -203,7 +203,7 @@ const runTests = (suiteName, { readLock, writeLock, readLockError, writeLockErro }) it('two simultaneous reads then write then read', (done) => { - const lock = new GCLock() + const lock = new Lock() const out = [] parallel([ readLock(lock, out, 1, 100), @@ -223,7 +223,7 @@ const runTests = (suiteName, { readLock, writeLock, readLockError, writeLockErro }) it('two simultaneous writes then read then write', (done) => { - const lock = new GCLock() + const lock = new Lock() const out = [] parallel([ writeLock(lock, out, 1, 100), @@ -243,7 +243,7 @@ const runTests = (suiteName, { readLock, writeLock, readLockError, writeLockErro }) it('simultaneous reads with error then write', (done) => { - const lock = new GCLock() + const lock = new Lock() const out = [] const errs = [] parallel([ @@ -261,7 +261,7 @@ const runTests = (suiteName, { readLock, writeLock, readLockError, writeLockErro }) it('simultaneous writes with error then read', (done) => { - const lock = new GCLock() + const lock = new Lock() const out = [] const errs = [] parallel([ @@ -280,7 +280,7 @@ const runTests = (suiteName, { readLock, writeLock, readLockError, writeLockErro }) } -describe('gc-lock', function () { +describe('lock', function () { runTests('cb style lock', { readLock: cbReadLock, writeLock: cbWriteLock, diff --git a/test/core/pin.js b/test/core/pin.js index b6ad6a220c..b23b5aeb8a 100644 --- a/test/core/pin.js +++ b/test/core/pin.js @@ -331,4 +331,42 @@ describe('pin', function () { .then(ls => expect(ls.length).to.eql(1)) }) }) + + describe('locking', function () { + beforeEach(clearPins) + + const resolveAsync = (pFn) => { + return new Promise((resolve) => setTimeout(() => pFn().then(resolve))) + } + + it('concurrent adds', async function () { + const promises = [] + promises.push(pin.add(pins.mercuryWiki, { recursive: false })) + promises.push(resolveAsync(() => pin.add(pins.solarWiki))) + await Promise.all(promises) + const addLs = await pin.ls() + expect(addLs.length).to.eql(2) + }) + + it('concurrent rms', async function () { + const promises = [] + await pin.add(pins.mercuryWiki) + await pin.add(pins.solarWiki) + promises.push(pin.rm(pins.mercuryWiki)) + promises.push(resolveAsync(() => pin.rm(pins.solarWiki))) + await Promise.all(promises) + const rmLs = await pin.ls() + expect(rmLs.length).to.eql(0) + }) + + it('concurrent add and rm', async function () { + const promises = [] + await pin.add(pins.mercuryWiki) + promises.push(pin.add(pins.solarWiki)) + promises.push(resolveAsync(() => pin.rm(pins.mercuryWiki))) + await Promise.all(promises) + const addRmLs = await pin.ls() + expect(addRmLs.length).to.eql(1) + }) + }) })