Skip to content
This repository was archived by the owner on Aug 12, 2020. It is now read-only.

Commit 11885fa

Browse files
committed
fix: build & export interop with go-ipfs for small file raw leaves
For files smaller than max chunk size and raw leaves true, go-ipfs will create a single node that is a raw buffer. Prior to this PR, js-ipfs created a unixfs file node who's data was the raw buffer. This resulted in different hashes for the same file. This PR changes the builder to do the same thing as go-ipfs and adds a resolver to the exporter that allows the exporter to export a node that is a single raw buffer (so that you can `ipfs cat [CID w codec raw]` as you can in go-ipfs). License: MIT Signed-off-by: Alan Shaw <[email protected]>
1 parent fef6e9b commit 11885fa

12 files changed

+164
-94
lines changed

src/builder/reduce.js

Lines changed: 7 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -13,51 +13,13 @@ module.exports = function reduce (file, ipld, options) {
1313
if (leaves.length === 1 && leaves[0].single && options.reduceSingleLeafToSelf) {
1414
const leaf = leaves[0]
1515

16-
if (options.leafType === 'file' && !options.rawLeaves) {
17-
return callback(null, {
18-
path: file.path,
19-
multihash: leaf.multihash,
20-
size: leaf.size,
21-
leafSize: leaf.leafSize,
22-
name: leaf.name
23-
})
24-
}
25-
26-
// we're using raw leaf nodes so we convert the node into a UnixFS `file` node.
27-
return waterfall([
28-
(cb) => ipld.get(leaf.cid, cb),
29-
(result, cb) => {
30-
// If result.value is a buffer, this is a raw leaf otherwise it's a dag-pb node
31-
const data = Buffer.isBuffer(result.value) ? result.value : result.value.data
32-
const fileNode = new UnixFS('file', data)
33-
34-
DAGNode.create(fileNode.marshal(), [], options.hashAlg, (error, node) => {
35-
cb(error, { DAGNode: node, fileNode: fileNode })
36-
})
37-
},
38-
(result, cb) => {
39-
if (options.onlyHash) {
40-
return cb(null, result)
41-
}
42-
43-
let cid = new CID(result.DAGNode.multihash)
44-
45-
if (options.cidVersion === 1) {
46-
cid = cid.toV1()
47-
}
48-
49-
ipld.put(result.DAGNode, { cid }, (error) => cb(error, result))
50-
},
51-
(result, cb) => {
52-
cb(null, {
53-
path: file.path,
54-
multihash: result.DAGNode.multihash,
55-
size: result.DAGNode.size,
56-
leafSize: result.fileNode.fileSize(),
57-
name: leaf.name
58-
})
59-
}
60-
], callback)
16+
return callback(null, {
17+
path: file.path,
18+
multihash: leaf.multihash,
19+
size: leaf.size,
20+
leafSize: leaf.leafSize,
21+
name: leaf.name
22+
})
6123
}
6224

6325
// create a parent node and add all the leaves

src/exporter/clean-multihash.js

Lines changed: 0 additions & 7 deletions
This file was deleted.

src/exporter/dir-flat.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,14 @@ const cat = require('pull-cat')
66
// Logic to export a unixfs directory.
77
module.exports = dirExporter
88

9-
function dirExporter (node, name, path, pathRest, resolve, size, dag, parent, depth) {
9+
function dirExporter (cid, node, name, path, pathRest, resolve, size, dag, parent, depth) {
1010
const accepts = pathRest[0]
1111

1212
const dir = {
1313
name: name,
1414
depth: depth,
1515
path: path,
16-
hash: node.multihash,
16+
hash: cid,
1717
size: node.size,
1818
type: 'dir'
1919
}

src/exporter/dir-hamt-sharded.js

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,18 @@
22

33
const pull = require('pull-stream')
44
const cat = require('pull-cat')
5-
const cleanHash = require('./clean-multihash')
65

76
// Logic to export a unixfs directory.
87
module.exports = shardedDirExporter
98

10-
function shardedDirExporter (node, name, path, pathRest, resolve, size, dag, parent, depth) {
9+
function shardedDirExporter (cid, node, name, path, pathRest, resolve, size, dag, parent, depth) {
1110
let dir
1211
if (!parent || (parent.path !== path)) {
1312
dir = {
1413
name: name,
1514
depth: depth,
1615
path: path,
17-
hash: cleanHash(node.multihash),
16+
hash: cid,
1817
size: node.size,
1918
type: 'dir'
2019
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
'use strict'
2+
3+
module.exports = function extractDataFromBlock (block, streamPosition, begin, end) {
4+
const blockLength = block.length
5+
6+
if (begin >= streamPosition + blockLength) {
7+
// If begin is after the start of the block, return an empty block
8+
// This can happen when internal nodes contain data
9+
return Buffer.alloc(0)
10+
}
11+
12+
if (end - streamPosition < blockLength) {
13+
// If the end byte is in the current block, truncate the block to the end byte
14+
block = block.slice(0, end - streamPosition)
15+
}
16+
17+
if (begin > streamPosition && begin < (streamPosition + blockLength)) {
18+
// If the start byte is in the current block, skip to the start byte
19+
block = block.slice(begin - streamPosition)
20+
}
21+
22+
return block
23+
}

src/exporter/file.js

Lines changed: 3 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,10 @@ const UnixFS = require('ipfs-unixfs')
55
const CID = require('cids')
66
const pull = require('pull-stream')
77
const paramap = require('pull-paramap')
8+
const extractDataFromBlock = require('./extract-data-from-block')
89

910
// Logic to export a single (possibly chunked) unixfs file.
10-
module.exports = (node, name, path, pathRest, resolve, size, dag, parent, depth, offset, length) => {
11+
module.exports = (cid, node, name, path, pathRest, resolve, size, dag, parent, depth, offset, length) => {
1112
const accepts = pathRest[0]
1213

1314
if (accepts !== undefined && accepts !== path) {
@@ -48,7 +49,7 @@ module.exports = (node, name, path, pathRest, resolve, size, dag, parent, depth,
4849
content: content,
4950
name: name,
5051
path: path,
51-
hash: node.multihash,
52+
hash: cid,
5253
size: fileSize,
5354
type: 'file'
5455
}])
@@ -149,25 +150,3 @@ function streamBytes (dag, node, fileSize, offset, length) {
149150
pull.filter(Boolean)
150151
)
151152
}
152-
153-
function extractDataFromBlock (block, streamPosition, begin, end) {
154-
const blockLength = block.length
155-
156-
if (begin >= streamPosition + blockLength) {
157-
// If begin is after the start of the block, return an empty block
158-
// This can happen when internal nodes contain data
159-
return Buffer.alloc(0)
160-
}
161-
162-
if (end - streamPosition < blockLength) {
163-
// If the end byte is in the current block, truncate the block to the end byte
164-
block = block.slice(0, end - streamPosition)
165-
}
166-
167-
if (begin > streamPosition && begin < (streamPosition + blockLength)) {
168-
// If the start byte is in the current block, skip to the start byte
169-
block = block.slice(begin - streamPosition)
170-
}
171-
172-
return block
173-
}

src/exporter/object.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
const CID = require('cids')
44
const pull = require('pull-stream')
55

6-
module.exports = (node, name, path, pathRest, resolve, size, dag, parent, depth) => {
6+
module.exports = (cid, node, name, path, pathRest, resolve, size, dag, parent, depth) => {
77
let newNode
88
if (pathRest.length) {
99
const pathElem = pathRest[0]

src/exporter/raw.js

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
'use strict'
2+
3+
const pull = require('pull-stream')
4+
const extractDataFromBlock = require('./extract-data-from-block')
5+
6+
// Logic to export a single raw block
7+
module.exports = (cid, node, name, path, pathRest, resolve, size, dag, parent, depth, offset, length) => {
8+
const accepts = pathRest[0]
9+
10+
if (accepts !== undefined && accepts !== path) {
11+
return pull.empty()
12+
}
13+
14+
size = size || node.length
15+
16+
if (offset < 0) {
17+
return pull.error(new Error('Offset must be greater than 0'))
18+
}
19+
20+
if (offset > size) {
21+
return pull.error(new Error('Offset must be less than the file size'))
22+
}
23+
24+
if (length < 0) {
25+
return pull.error(new Error('Length must be greater than or equal to 0'))
26+
}
27+
28+
if (length === 0) {
29+
return pull.once({
30+
depth,
31+
content: pull.once(Buffer.alloc(0)),
32+
hash: cid,
33+
name,
34+
path,
35+
size,
36+
type: 'raw'
37+
})
38+
}
39+
40+
if (!offset) {
41+
offset = 0
42+
}
43+
44+
if (!length || (offset + length > size)) {
45+
length = size - offset
46+
}
47+
48+
return pull.once({
49+
depth,
50+
content: pull.once(extractDataFromBlock(node, 0, offset, offset + length)),
51+
hash: cid,
52+
name,
53+
path,
54+
size,
55+
type: 'raw'
56+
})
57+
}

src/exporter/resolve.js

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ const resolvers = {
99
directory: require('./dir-flat'),
1010
'hamt-sharded-directory': require('./dir-hamt-sharded'),
1111
file: require('./file'),
12-
object: require('./object')
12+
object: require('./object'),
13+
raw: require('./raw')
1314
}
1415

1516
module.exports = Object.assign({
@@ -31,39 +32,45 @@ function createResolver (dag, options, depth, parent) {
3132
if ((typeof item.depth) !== 'number') {
3233
return pull.error(new Error('no depth'))
3334
}
35+
3436
if (item.object) {
35-
return cb(null, resolveItem(item.object, item, options.offset, options.length))
37+
return cb(null, resolveItem(null, item.object, item, options.offset, options.length))
3638
}
37-
dag.get(new CID(item.multihash), (err, node) => {
39+
40+
const cid = new CID(item.multihash)
41+
42+
dag.get(cid, (err, node) => {
3843
if (err) {
3944
return cb(err)
4045
}
4146
// const name = item.fromPathRest ? item.name : item.path
42-
cb(null, resolveItem(node.value, item, options.offset, options.length))
47+
cb(null, resolveItem(cid, node.value, item, options.offset, options.length))
4348
})
4449
}),
4550
pull.flatten(),
4651
pull.filter(Boolean),
4752
pull.filter((node) => node.depth <= options.maxDepth)
4853
)
4954

50-
function resolveItem (node, item, offset, length) {
51-
return resolve(node, item.name, item.path, item.pathRest, item.size, dag, item.parent || parent, item.depth, offset, length)
55+
function resolveItem (cid, node, item, offset, length) {
56+
return resolve(cid, node, item.name, item.path, item.pathRest, item.size, dag, item.parent || parent, item.depth, offset, length)
5257
}
5358

54-
function resolve (node, name, path, pathRest, size, dag, parentNode, depth, offset, length) {
59+
function resolve (cid, node, name, path, pathRest, size, dag, parentNode, depth, offset, length) {
5560
const type = typeOf(node)
5661
const nodeResolver = resolvers[type]
5762
if (!nodeResolver) {
5863
return pull.error(new Error('Unkown node type ' + type))
5964
}
6065
const resolveDeep = createResolver(dag, options, depth, node)
61-
return nodeResolver(node, name, path, pathRest, resolveDeep, size, dag, parentNode, depth, offset, length)
66+
return nodeResolver(cid, node, name, path, pathRest, resolveDeep, size, dag, parentNode, depth, offset, length)
6267
}
6368
}
6469

6570
function typeOf (node) {
66-
if (Buffer.isBuffer(node.data)) {
71+
if (Buffer.isBuffer(node)) {
72+
return 'raw'
73+
} else if (Buffer.isBuffer(node.data)) {
6774
return UnixFS.unmarshal(node.data).type
6875
} else {
6976
return 'object'

test/builder-dir-sharding.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ module.exports = (repo) => {
115115
expect(nodes.length).to.be.eql(2)
116116
const expectedHash = new CID(shardedHash).toBaseEncodedString()
117117
expect(nodes[0].path).to.be.eql(expectedHash)
118-
expect(nodes[0].hash).to.be.eql(expectedHash)
118+
expect(new CID(nodes[0].hash).toBaseEncodedString()).to.be.eql(expectedHash)
119119
expect(nodes[1].path).to.be.eql(expectedHash + '/b')
120120
expect(nodes[1].size).to.be.eql(21)
121121
pull(

test/exporter.js

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ const exporter = unixFSEngine.exporter
3030
const importer = unixFSEngine.importer
3131

3232
const bigFile = loadFixture('test/fixtures/1.2MiB.txt')
33+
const smallFile = loadFixture('test/fixtures/200Bytes.txt')
3334

3435
module.exports = (repo) => {
3536
describe('exporter', () => {
@@ -420,12 +421,42 @@ module.exports = (repo) => {
420421
)
421422
})
422423

423-
it('exports a large file > 5mb imported with raw leaves', function (done) {
424+
it('exports a small file imported with raw leaves', function (done) {
424425
this.timeout(30 * 1000)
425426

426427
pull(
427428
pull.values([{
428429
path: '200Bytes.txt',
430+
content: pull.values([smallFile])
431+
}]),
432+
importer(ipld, {
433+
rawLeaves: true
434+
}),
435+
pull.collect(collected)
436+
)
437+
438+
function collected (err, files) {
439+
expect(err).to.not.exist()
440+
expect(files.length).to.equal(1)
441+
442+
pull(
443+
exporter(files[0].multihash, ipld),
444+
pull.collect((err, files) => {
445+
expect(err).to.not.exist()
446+
expect(new CID(files[0].hash).toBaseEncodedString()).to.equal('zb2rhXrz1gkCv8p4nUDZRohY6MzBE9C3HVTVDP72g6Du3SD9Q')
447+
448+
fileEql(files[0], smallFile, done)
449+
})
450+
)
451+
}
452+
})
453+
454+
it('exports a large file > 1mb imported with raw leaves', function (done) {
455+
this.timeout(30 * 1000)
456+
457+
pull(
458+
pull.values([{
459+
path: '1.2MiB.txt',
429460
content: pull.values([bigFile])
430461
}]),
431462
importer(ipld, {

0 commit comments

Comments
 (0)