diff --git a/README.md b/README.md index da32517..ade47c6 100644 --- a/README.md +++ b/README.md @@ -141,8 +141,8 @@ The input's file paths and directory structure will be preserved in the [`dag-pb - `onlyHash` (boolean, defaults to false): Only chunk and hash - do not write to disk - `hashAlg` (string): multihash hashing algorithm to use - `cidVersion` (integer, default 0): the CID version to use when storing the data (storage keys are based on the CID, _including_ it's version) -- `rawLeaves` (boolean, defaults to false): When a file would span multiple DAGNodes, if this is true the leaf nodes will not be wrapped in `UnixFS` protobufs and will instead contain the raw file bytes -- `leafType` (string, defaults to `'file'`) what type of UnixFS node leaves should be - can be `'file'` or `'raw'` (ignored when `rawLeaves` is `true`) +- `rawLeaves` (boolean, defaults to true): When a file would span multiple DAGNodes, if this is true the leaf nodes will not be wrapped in `UnixFS` protobufs and will instead contain the raw file bytes +- `leafType` (string, defaults to `'file'`) what type of UnixFS node leaves should be - can be `'file'` or `'raw'` (ignored when `rawLeaves` is explicitly set to `true`) - `blockWriteConcurrency` (positive integer, defaults to 10) How many blocks to hash and write to the block store concurrently. For small numbers of large files this should be high (e.g. 50). - `fileImportConcurrency` (number, defaults to 50) How many files to import concurrently. For large numbers of small files this should be high (e.g. 50). diff --git a/package.json b/package.json index 41b6cab..62200b9 100644 --- a/package.json +++ b/package.json @@ -48,7 +48,6 @@ "ipld-in-memory": "^3.0.0", "it-buffer-stream": "^1.0.0", "it-last": "^1.0.0", - "multihashes": "^0.4.14", "nyc": "^15.0.0", "sinon": "^8.0.4" }, diff --git a/src/dag-builder/file/index.js b/src/dag-builder/file/index.js index 0f32c33..bf946e0 100644 --- a/src/dag-builder/file/index.js +++ b/src/dag-builder/file/index.js @@ -9,6 +9,7 @@ const { } = require('ipld-dag-pb') const all = require('it-all') const parallelBatch = require('it-parallel-batch') +const mc = require('multicodec') const dagBuilders = { flat: require('./flat'), @@ -52,6 +53,24 @@ const reduce = (file, ipld, options) => { if (leaves.length === 1 && leaves[0].single && options.reduceSingleLeafToSelf) { const leaf = leaves[0] + // TODO: fix this for when onlyHash is passed + if (leaf.cid.codec === 'raw' && !options.onlyHash) { + // only one leaf node which is a buffer + const buffer = await ipld.get(leaf.cid) + + leaf.unixfs = new UnixFS({ + type: 'file', + mtime: file.mtime, + mode: file.mode, + data: buffer + }) + + const node = new DAGNode(leaf.unixfs.marshal()) + + leaf.cid = await ipld.put(node, mc.DAG_PB, options) + leaf.size = node.size + } + return { cid: leaf.cid, path: file.path, @@ -69,8 +88,8 @@ const reduce = (file, ipld, options) => { const links = leaves .filter(leaf => { - if (leaf.cid.codec === 'raw' && leaf.size) { - return true + if (leaf.cid.codec === 'raw') { + return Boolean(leaf.size) } if (!leaf.unixfs.data && leaf.unixfs.fileSize()) { diff --git a/src/index.js b/src/index.js index 052acff..adba151 100644 --- a/src/index.js +++ b/src/index.js @@ -6,7 +6,7 @@ const mergeOptions = require('merge-options').bind({ ignoreUndefined: true }) const defaultOptions = { chunker: 'fixed', strategy: 'balanced', // 'flat', 'trickle' - rawLeaves: false, + rawLeaves: true, onlyHash: false, reduceSingleLeafToSelf: true, codec: 'dag-pb', @@ -48,7 +48,7 @@ module.exports = async function * (source, ipld, options = {}) { opts.rawLeaves = true } - // go-ifps trickle dag defaults to unixfs raw leaves, balanced dag defaults to file leaves + // go-ipfs trickle dag defaults to unixfs raw leaves, balanced dag defaults to file leaves if (options.strategy === 'trickle') { opts.leafType = 'raw' opts.reduceSingleLeafToSelf = false @@ -58,6 +58,12 @@ module.exports = async function * (source, ipld, options = {}) { opts.codec = options.format } + if (options.leafType && options.rawLeaves == null) { + // if the user has specified a custom UnixFS leaf type and not specified + // raw leaves, really do not use raw leaves + opts.rawLeaves = false + } + let dagBuilder if (typeof options.dagBuilder === 'function') { diff --git a/src/utils/persist.js b/src/utils/persist.js index 69e5af7..e6970b6 100644 --- a/src/utils/persist.js +++ b/src/utils/persist.js @@ -1,6 +1,6 @@ 'use strict' -const mh = require('multihashes') +const mh = require('multihashing-async').multihash const mc = require('multicodec') const persist = (node, ipld, options) => { diff --git a/test/builder.spec.js b/test/builder.spec.js index bbeb9d0..c9d5522 100644 --- a/test/builder.spec.js +++ b/test/builder.spec.js @@ -4,7 +4,7 @@ const chai = require('chai') chai.use(require('dirty-chai')) const expect = chai.expect -const mh = require('multihashes') +const mh = require('multihashing-async').multihash const IPLD = require('ipld') const inMemory = require('ipld-in-memory') const UnixFS = require('ipfs-unixfs') diff --git a/test/chunker-custom.spec.js b/test/chunker-custom.spec.js index 2dd00f0..5d445b3 100644 --- a/test/chunker-custom.spec.js +++ b/test/chunker-custom.spec.js @@ -46,7 +46,7 @@ describe('custom chunker', function () { for await (const part of importer([{ path: 'test', content }], inmem, { chunker })) { - expect(part.size).to.equal(116) + expect(part.size).to.equal(104) } }) @@ -69,5 +69,5 @@ describe('custom chunker', function () { cid: await inmem.put(Buffer.from('hello world'), mc.RAW) } } - it('works with single part', fromPartsTest(single, 11)) + it('works with single part', fromPartsTest(single, 19)) }) diff --git a/test/hash-parity-with-go-ipfs.spec.js b/test/hash-parity-with-go-ipfs.spec.js index 94e44fd..66a4ebd 100644 --- a/test/hash-parity-with-go-ipfs.spec.js +++ b/test/hash-parity-with-go-ipfs.spec.js @@ -25,7 +25,8 @@ const expectedHashes = { strategies.forEach(strategy => { const options = { - strategy: strategy + strategy: strategy, + rawLeaves: false } describe('go-ipfs interop using importer:' + strategy, () => { diff --git a/test/importer.spec.js b/test/importer.spec.js index cf6784e..652d47d 100644 --- a/test/importer.spec.js +++ b/test/importer.spec.js @@ -47,7 +47,7 @@ const baseFiles = { path: '200Bytes.txt' }, '1.2MiB.txt': { - cid: 'QmW7BDxEbGqxxSYVtn3peNPQgdDXbWkoQ6J1EFYAEuQV3Q', + cid: 'QmQLTvhjmSa7657mKdSfTjxFBdwxmK8n9tZC9Xdp9DtxWY', size: 1258000, type: 'file', path: '1.2MiB.txt' @@ -64,19 +64,19 @@ const strategyBaseFiles = { flat: baseFiles, balanced: extend({}, baseFiles, { '1.2MiB.txt': { - cid: 'QmW7BDxEbGqxxSYVtn3peNPQgdDXbWkoQ6J1EFYAEuQV3Q', + cid: 'QmQLTvhjmSa7657mKdSfTjxFBdwxmK8n9tZC9Xdp9DtxWY', type: 'file' } }), trickle: extend({}, baseFiles, { '200Bytes.txt': { - cid: 'QmY8bwnoKAKvJ8qtyPhWNxSS6sxiGVTJ9VpdQffs2KB5pE', + cid: 'QmagyRwMfYhczYNv5SvcJc8xxXjZQBTTHS2jEqNMva2mYT', size: 200, type: 'file', path: '200Bytes.txt' }, '1.2MiB.txt': { - cid: 'QmfAxsHrpaLLuhbqqbo9KQyvQNawMnVSwutYoJed75pnco', + cid: 'QmQLTvhjmSa7657mKdSfTjxFBdwxmK8n9tZC9Xdp9DtxWY', type: 'file' } }) @@ -91,25 +91,25 @@ const strategies = [ const strategyOverrides = { balanced: { 'foo-big': { - cid: 'QmaFgyFJUP4fxFySJCddg2Pj6rpwSywopWk87VEVv52RSj', + cid: 'QmR6jJgszuuWxVDVc4PCRa6domvNf6guQK7pboZnbwnht1', path: 'foo-big', size: 1335478, type: 'directory' }, pim: { - cid: 'QmY8a78tx6Tk6naDgWCgTsd9EqGrUJRrH7dDyQhjyrmH2i', + cid: 'QmSA5QoZVieErY7TuQihgUiVc8BB6ZzgwnKvZSHcdTyFiK', path: 'pim', size: 1335744, type: 'directory' }, 'pam/pum': { - cid: 'QmY8a78tx6Tk6naDgWCgTsd9EqGrUJRrH7dDyQhjyrmH2i', + cid: 'QmSA5QoZVieErY7TuQihgUiVc8BB6ZzgwnKvZSHcdTyFiK', path: 'pam/pum', size: 1335744, type: 'directory' }, pam: { - cid: 'QmRgdtzNx1H1BPJqShdhvWZ2D4DA2HUgZJ3XLtoXei27Av', + cid: 'QmTQcBCQvVR68R3r4eJwKL4VwDci1TCH5cj1njmcQ95Eif', path: 'pam', size: 2671269, type: 'directory' @@ -117,25 +117,25 @@ const strategyOverrides = { }, trickle: { 'foo-big': { - cid: 'QmaKbhFRy9kcCbcwrLsqYHWMiY44BDYkqTCMpAxDdd2du2', + cid: 'QmR6jJgszuuWxVDVc4PCRa6domvNf6guQK7pboZnbwnht1', path: 'foo-big', size: 1334657, type: 'directory' }, pim: { - cid: 'QmbWGdnua4YuYpWJb7fE25PRbW9GbKKLqq9Ucmnsg2gxnt', + cid: 'QmZmrY8nLks6N66f68c1biFpay3dSVy9PGvVRZLgT2T2gD', path: 'pim', size: 1334923, type: 'directory' }, 'pam/pum': { - cid: 'QmbWGdnua4YuYpWJb7fE25PRbW9GbKKLqq9Ucmnsg2gxnt', + cid: 'QmZmrY8nLks6N66f68c1biFpay3dSVy9PGvVRZLgT2T2gD', path: 'pam/pum', size: 1334923, type: 'directory' }, pam: { - cid: 'QmSuh47G9Qm3PFv1zziojtHxqCjuurSdtWAzxLxoKJPq2U', + cid: 'Qmf1necujkKZHrYyj56eKFKAi9DPH8UyZg44KaTciso2Sc', path: 'pam', size: 2669627, type: 'directory' @@ -147,19 +147,19 @@ const strategyOverrides = { type: 'file' }, 'foo/bar': { - cid: 'QmTGMxKPzSGNBDp6jhTwnZxGW6w1S9ciyycRJ4b2qcQaHK', + cid: 'QmcKJQd9cH6sip78HbjkGVLshjxtjfvnZ5ih169kZUK3Yg', size: 0, path: 'foo/bar', type: 'directory' }, foo: { - cid: 'Qme4A8fZmwfZESappfPcxSMTZVACiEzhHKtYRMuM1hbkDp', + cid: 'QmQG6CVVTiz1TwnnueRMBZxB6d5WGJakRocHe4KGz5yHzg', size: 0, path: 'foo', type: 'directory' }, 'small.txt': { - cid: 'QmXmZ3qT328JxWtQXqrmvma2FmPp7tMdNiSuYvVJ5QRhKs', + cid: 'QmX2YDaeaAFbyNK4gat5jQMJhZL171n5Qummc5jcMsmXUu', size: 15, type: 'file', path: 'small.txt' @@ -225,7 +225,7 @@ strategies.forEach((strategy) => { }), 'foo-big': { path: 'foo-big', - cid: 'QmaFgyFJUP4fxFySJCddg2Pj6rpwSywopWk87VEVv52RSj', + cid: 'QmR6jJgszuuWxVDVc4PCRa6domvNf6guQK7pboZnbwnht1', size: 1328120, type: 'directory' }, @@ -237,7 +237,7 @@ strategies.forEach((strategy) => { }), pim: { path: 'pim', - cid: 'QmY8a78tx6Tk6naDgWCgTsd9EqGrUJRrH7dDyQhjyrmH2i', + cid: 'QmSA5QoZVieErY7TuQihgUiVc8BB6ZzgwnKvZSHcdTyFiK', size: 1328386, type: 'directory' }, @@ -248,19 +248,19 @@ strategies.forEach((strategy) => { type: 'directory' }, 'pam/pum': { - cid: 'QmY8a78tx6Tk6naDgWCgTsd9EqGrUJRrH7dDyQhjyrmH2i', + cid: 'QmSA5QoZVieErY7TuQihgUiVc8BB6ZzgwnKvZSHcdTyFiK', path: 'pam/pum', size: 1328386, type: 'directory' }, pam: { - cid: 'QmRgdtzNx1H1BPJqShdhvWZ2D4DA2HUgZJ3XLtoXei27Av', + cid: 'QmTQcBCQvVR68R3r4eJwKL4VwDci1TCH5cj1njmcQ95Eif', path: 'pam', size: 2656553, type: 'directory' }, '200Bytes.txt with raw leaves': extend({}, baseFiles['200Bytes.txt'], { - cid: 'zb2rhXrz1gkCv8p4nUDZRohY6MzBE9C3HVTVDP72g6Du3SD9Q', + cid: 'QmQmZQxSKQppbsWfVzBvg59Cn3DKtsNVQ94bjAxg2h3Lb8', size: 200 }) }, strategyOverrides[strategy])