Skip to content

Commit fb87f9d

Browse files
author
Alan Shaw
authored
feat: add HAMT sharded directories support (#41)
Integrates `@perma/map` to allow HAMT sharded directories to be created. In this PR there's no automated switch to HAMT, the consumer needs to explicitly use `createShardedDirectoryWriter` to build one.
1 parent 802c7db commit fb87f9d

9 files changed

+1157
-32
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@ dist
22
.nyc_output
33
tmp
44
node_modules
5+
coverage

package.json

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,15 +25,17 @@
2525
"test:web": "playwright-test test/**/*.spec.js --cov && nyc report",
2626
"test:node": "c8 --check-coverage --branches 95 --functions 83 --lines 94 mocha test/**/*.spec.js",
2727
"test": "npm run test:node",
28-
"coverage": "c8 --reporter=html mocha test/test-*.js && npm_config_yes=true npx st -d coverage -p 8080",
28+
"coverage": "c8 --reporter=html mocha test/**/*.spec.js && npm_config_yes=true npx st -d coverage -p 8080",
2929
"typecheck": "tsc --build",
3030
"test:convergence": "mocha test/convergence.js"
3131
},
3232
"dependencies": {
33-
"multiformats": "^11.0.1",
3433
"@ipld/dag-pb": "^4.0.0",
34+
"@multiformats/murmur3": "^2.1.3",
35+
"@perma/map": "^1.0.2",
3536
"@web-std/stream": "1.0.1",
3637
"actor": "^2.3.1",
38+
"multiformats": "^11.0.1",
3739
"protobufjs": "^7.1.2",
3840
"rabin-rs": "^2.1.0"
3941
},

src/directory/api.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,6 @@ export interface View<Layout extends unknown = unknown> extends Writer<Layout> {
8282
readonly writer: BlockWriter
8383
readonly settings: EncoderSettings<Layout>
8484

85-
links(): IterableIterator<DirectoryEntryLink>
8685
state: State<Layout>
8786

8887
entries(): IterableIterator<[string, EntryLink]>

src/lib.js

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,11 @@ export {
2121
set,
2222
remove,
2323
} from "./directory.js"
24+
export {
25+
create as createShardedDirectoryWriter,
26+
close as closeShardedDirectory,
27+
fork as forkShardedDirectory,
28+
} from "./sharded-directory.js"
2429

2530
/**
2631
* @template [Layout=unknown]

src/sharded-directory.js

Lines changed: 319 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,319 @@
1+
2+
import * as PermaMap from "@perma/map"
3+
import * as UnixFSPermaMap from "@perma/map/unixfs"
4+
import * as PB from "@ipld/dag-pb"
5+
import { murmur364 } from "@multiformats/murmur3"
6+
import { Block } from 'multiformats/block'
7+
import * as API from "./directory/api.js"
8+
import * as File from "./file.js"
9+
import * as UnixFS from "./codec.js"
10+
import { set, remove } from "./directory.js"
11+
12+
export * from "./directory/api.js"
13+
export { set, remove } from "./directory.js"
14+
15+
export const configure = File.configure
16+
export const defaults = File.defaults
17+
18+
/**
19+
* @template [Layout=unknown]
20+
* @param {API.Options<Layout>} config
21+
* @returns {API.View<Layout>}
22+
*/
23+
export const create = ({ writer, settings = defaults(), metadata = {} }) =>
24+
new HAMTDirectoryWriter({
25+
writer,
26+
metadata,
27+
settings,
28+
entries: new HashMap(),
29+
closed: false,
30+
})
31+
32+
/**
33+
* @template {API.State} Writer
34+
* @param {Writer} writer
35+
* @returns {Writer}
36+
*/
37+
const asWritable = writer => {
38+
if (!writer.closed) {
39+
return writer
40+
} else {
41+
throw new Error("Can not change written HAMT directory, but you can .fork() and make changes to it")
42+
}
43+
}
44+
45+
/**
46+
* @template {unknown} Layout
47+
* @param {{ state: API.State<Layout> }} view
48+
* @param {API.CloseOptions} options
49+
* @returns {Promise<UnixFS.DirectoryLink>}
50+
*/
51+
export const close = async (
52+
view,
53+
{ closeWriter = false, releaseLock = false } = {}
54+
) => {
55+
const { writer, settings, metadata } = asWritable(view.state)
56+
view.state.closed = true
57+
58+
const { entries } = view.state
59+
/* c8 ignore next 3 */
60+
if (!(entries instanceof HashMap)) {
61+
throw new Error(`not a HAMT: ${entries}`)
62+
}
63+
64+
const hamt = entries.builder.build()
65+
const blocks = iterateBlocks(hamt, hamt.root, settings)
66+
67+
/** @type {UnixFS.BlockView<UnixFS.DirectoryShard>?} */
68+
let root = null
69+
for await (const block of blocks) {
70+
root = block
71+
// we make sure that writer has some capacity for this write. If it
72+
// does not we await.
73+
if ((writer.desiredSize || 0) <= 0) {
74+
await writer.ready
75+
}
76+
// once writer has some capacity we write a block, however we do not
77+
// await completion as we don't care when it's taken off the stream.
78+
writer.write(block)
79+
}
80+
/* c8 ignore next */
81+
if (root == null) throw new Error("no root block yielded")
82+
83+
if (closeWriter) {
84+
await writer.close()
85+
} else if (releaseLock) {
86+
writer.releaseLock()
87+
}
88+
89+
return {
90+
cid: root.cid,
91+
dagByteLength: UnixFS.cumulativeDagByteLength(root.bytes, root.value.entries),
92+
}
93+
}
94+
95+
/**
96+
* @template {unknown} Layout
97+
* @param {UnixFSPermaMap.PersistentHashMap<API.EntryLink>} hamt
98+
* @param {UnixFSPermaMap.BitmapIndexedNode<API.EntryLink>} node
99+
* @param {API.EncoderSettings<Layout>} settings
100+
* @returns {AsyncIterableIterator<UnixFS.BlockView<UnixFS.DirectoryShard>>}
101+
*/
102+
const iterateBlocks = async function* (hamt, node, settings) {
103+
/** @type {UnixFS.DirectoryEntryLink[]} */
104+
const entries = []
105+
for (const ent of UnixFSPermaMap.iterate(node)) {
106+
if ('key' in ent) {
107+
entries.push(/** @type {UnixFS.DirectoryEntryLink} */ ({
108+
name: `${ent.prefix ?? ''}${ent.key ?? ''}`,
109+
dagByteLength: ent.value.dagByteLength,
110+
cid: ent.value.cid,
111+
}))
112+
} else {
113+
/** @type {UnixFS.BlockView<UnixFS.DirectoryShard>?} */
114+
let root = null
115+
for await (const block of iterateBlocks(hamt, ent.node, settings)) {
116+
yield block
117+
root = block
118+
}
119+
/* c8 ignore next */
120+
if (root == null) throw new Error("no root block yielded")
121+
122+
entries.push(/** @type {UnixFS.ShardedDirectoryLink} */ ({
123+
name: ent.prefix,
124+
dagByteLength: UnixFS.cumulativeDagByteLength(root.bytes, root.value.entries),
125+
cid: root.cid
126+
}))
127+
}
128+
}
129+
130+
const shard = UnixFS.createDirectoryShard(
131+
entries,
132+
UnixFSPermaMap.bitField(node),
133+
UnixFSPermaMap.tableSize(hamt),
134+
murmur364.code
135+
)
136+
yield await encodeHAMTShardBlock(shard, settings)
137+
}
138+
139+
/**
140+
* @template {unknown} Layout
141+
* @param {UnixFS.DirectoryShard} shard
142+
* @param {API.EncoderSettings<Layout>} settings
143+
* @returns {Promise<UnixFS.BlockView<UnixFS.DirectoryShard>>}
144+
*/
145+
async function encodeHAMTShardBlock (shard, settings) {
146+
const bytes = UnixFS.encodeHAMTShard(shard)
147+
const hash = await settings.hasher.digest(bytes)
148+
const cid = settings.linker.createLink(PB.code, hash)
149+
// @ts-ignore Link is not CID
150+
return new Block({ cid, bytes, value: shard })
151+
}
152+
153+
/**
154+
* @template L1, L2
155+
* @param {API.View<L1>} state
156+
* @param {Partial<API.Options<L1|L2>>} options
157+
* @returns {API.View<L1|L2>}
158+
*/
159+
export const fork = (
160+
{ state },
161+
{
162+
writer = state.writer,
163+
metadata = state.metadata,
164+
settings = state.settings,
165+
} = {}
166+
) =>
167+
new HAMTDirectoryWriter({
168+
writer,
169+
metadata,
170+
settings,
171+
entries: new HashMap(UnixFSPermaMap.from(state.entries.entries()).createBuilder()),
172+
closed: false,
173+
})
174+
175+
/**
176+
* @template [Layout=unknown]
177+
* @implements {API.View<Layout>}
178+
*/
179+
class HAMTDirectoryWriter {
180+
/**
181+
* @param {API.State<Layout>} state
182+
*/
183+
constructor(state) {
184+
this.state = state
185+
}
186+
get writer() {
187+
return this.state.writer
188+
}
189+
get settings() {
190+
return this.state.settings
191+
}
192+
193+
/**
194+
* @param {string} name
195+
* @param {UnixFS.FileLink | UnixFS.DirectoryLink} link
196+
* @param {API.WriteOptions} [options]
197+
*/
198+
199+
set(name, link, options) {
200+
return set(this, name, link, options)
201+
}
202+
203+
/**
204+
* @param {string} name
205+
*/
206+
remove(name) {
207+
return remove(this, name)
208+
}
209+
210+
/**
211+
* @template L
212+
* @param {Partial<API.Options<L>>} [options]
213+
* @returns {API.View<Layout|L>}
214+
*/
215+
fork(options) {
216+
return fork(this, options)
217+
}
218+
219+
/**
220+
* @param {API.CloseOptions} [options]
221+
* @returns {Promise<UnixFS.DirectoryLink>}
222+
*/
223+
close(options) {
224+
return close(this, options)
225+
}
226+
227+
entries() {
228+
return this.state.entries.entries()
229+
}
230+
/**
231+
* @param {string} name
232+
*/
233+
has(name) {
234+
return this.state.entries.has(name)
235+
}
236+
get size() {
237+
return this.state.entries.size
238+
}
239+
}
240+
241+
/**
242+
* @implements {Map<string, API.EntryLink>}
243+
*/
244+
class HashMap extends Map {
245+
/**
246+
* @param {UnixFSPermaMap.HashMapBuilder} [builder]
247+
*/
248+
constructor (builder = UnixFSPermaMap.builder()) {
249+
super()
250+
/** @type {UnixFSPermaMap.HashMapBuilder} */
251+
this.builder = builder
252+
}
253+
254+
clear() {
255+
this.builder = UnixFSPermaMap.builder()
256+
}
257+
258+
/**
259+
* @param {string} key
260+
*/
261+
delete(key) {
262+
const { root } = this.builder
263+
this.builder.delete(key)
264+
return this.builder.root !== root
265+
}
266+
267+
/**
268+
* @param {(value: API.EntryLink, key: string, map: Map<string, API.EntryLink>) => void} callbackfn
269+
* @param {any} [thisArg]
270+
*/
271+
forEach(callbackfn, thisArg = this) {
272+
for (const [k, v] of this.builder.root.entries()) {
273+
callbackfn.call(thisArg, v, k, this)
274+
}
275+
}
276+
277+
/**
278+
* @param {string} key
279+
*/
280+
get(key) {
281+
return PermaMap.get(this.builder, key)
282+
}
283+
284+
/**
285+
* @param {string} key
286+
*/
287+
has(key) {
288+
return PermaMap.has(this.builder, key)
289+
}
290+
291+
/**
292+
* @param {string} key
293+
* @param {API.EntryLink} value
294+
*/
295+
set(key, value) {
296+
this.builder.set(key, value)
297+
return this
298+
}
299+
300+
get size () {
301+
return this.builder.size
302+
}
303+
304+
[Symbol.iterator]() {
305+
return this.builder.root.entries()
306+
}
307+
308+
entries() {
309+
return this.builder.root.entries()
310+
}
311+
312+
keys() {
313+
return this.builder.root.keys()
314+
}
315+
316+
values() {
317+
return this.builder.root.values()
318+
}
319+
}

src/unixfs.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import type {
66
Link as IPLDLink,
77
Version as LinkVersion,
88
Block as IPLDBlock,
9+
BlockView as IPLDBlockView
910
} from "multiformats"
1011
import { Data, type IData } from "../gen/unixfs.js"
1112
export type { MultihashHasher, MultibaseEncoder, MultihashDigest, BlockEncoder }
@@ -401,3 +402,10 @@ export interface Block<
401402
A extends number = number,
402403
V extends LinkVersion = LinkVersion
403404
> extends IPLDBlock<T, C, A, V> {}
405+
406+
export interface BlockView<
407+
T = unknown,
408+
C extends number = number,
409+
A extends number = number,
410+
V extends LinkVersion = LinkVersion
411+
> extends IPLDBlockView<T, C, A, V> {}

0 commit comments

Comments
 (0)