Skip to content

Array nesting: Add the ability to use N5-style nested layout #17

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

<groupId>com.bc.zarr</groupId>
<artifactId>jzarr</artifactId>
<version>0.3.2</version>
<version>0.3.99-SNAPSHOT</version>

<properties>
<!-- needed in test scope to show examples -->
Expand Down
21 changes: 19 additions & 2 deletions src/main/java/com/bc/zarr/ArrayParams.java
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,18 @@ public class ArrayParams {
private ByteOrder byteOrder = ByteOrder.BIG_ENDIAN;
private Number fillValue = 0;
private Compressor compressor = CompressorFactory.createDefaultCompressor();
private Boolean nested = null;

/**
* Sets the optional {@code nested} flag. True signifies "nested" storage with chunks in a directory tree rather
* than all in the same directory. False significes a "flat" layout with all chunks in a single directory
* with the chunk index separated by a ".". Returns a reference to this builder so that the methods
* can be chained together.
*/
public ArrayParams nested(Boolean nested) {
this.nested = nested;
return this;
}

/**
* Sets the mandatory {@code shape} and returns a reference to this Builder so that the methods can be chained together.
Expand Down Expand Up @@ -212,7 +224,7 @@ Params build() {
}
}

return new Params(shape, chunks, dataType, byteOrder, fillValue, compressor);
return new Params(shape, chunks, dataType, byteOrder, fillValue, compressor, nested);
}

/**
Expand All @@ -225,14 +237,16 @@ public static final class Params {
private final ByteOrder byteOrder;
private final Number fillValue;
private final Compressor compressor;
private final Boolean nested;

private Params(int[] shape, int[] chunks, DataType dataType, ByteOrder byteOrder, Number fillValue, Compressor compressor) {
private Params(int[] shape, int[] chunks, DataType dataType, ByteOrder byteOrder, Number fillValue, Compressor compressor, Boolean nested) {
this.shape = shape;
this.chunks = chunks;
this.dataType = dataType;
this.byteOrder = byteOrder;
this.fillValue = fillValue;
this.compressor = compressor;
this.nested = nested;
}

public int[] getShape() {
Expand Down Expand Up @@ -263,6 +277,8 @@ public Compressor getCompressor() {
return compressor;
}

public Boolean getNested() { return nested; };

public ArrayParams toBuilder() {
ArrayParams builder = new ArrayParams();
builder.shape = getShape();
Expand All @@ -272,6 +288,7 @@ public ArrayParams toBuilder() {
builder.byteOrder = getByteOrder();
builder.fillValue = getFillValue();
builder.compressor = getCompressor();
builder.nested = getNested();
return builder;
}
}
Expand Down
103 changes: 86 additions & 17 deletions src/main/java/com/bc/zarr/ZarrArray.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@

public class ZarrArray {

private final static boolean DEFAULT_NESTED;
static {
DEFAULT_NESTED = Boolean.parseBoolean(System.getProperty("jzarr.nested", "false"));
}

private final int[] _shape;
private final int[] _chunks;
private final ZarrPath relativePath;
Expand All @@ -56,8 +61,9 @@ public class ZarrArray {
private final Compressor _compressor;
private final Store _store;
private final ByteOrder _byteOrder;
private final Boolean _nested;

private ZarrArray(ZarrPath relativePath, int[] shape, int[] chunkShape, DataType dataType, ByteOrder order, Number fillValue, Compressor compressor, Store store) {
private ZarrArray(ZarrPath relativePath, int[] shape, int[] chunkShape, DataType dataType, ByteOrder order, Number fillValue, Compressor compressor, Store store, Boolean nested) {
this.relativePath = relativePath;
_shape = shape;
_chunks = chunkShape;
Expand All @@ -72,8 +78,12 @@ private ZarrArray(ZarrPath relativePath, int[] shape, int[] chunkShape, DataType
_chunkReaderWriter = ChunkReaderWriter.create(_compressor, _dataType, order, _chunks, _fillValue, _store);
_chunkFilenames = new HashMap<>();
_byteOrder = order;
_nested = nested;
}

private ZarrArray(ZarrPath relativePath, int[] shape, int[] chunkShape, DataType dataType, ByteOrder order, Number fillValue, Compressor compressor, Store store) {
this(relativePath, shape, chunkShape, dataType, order, fillValue, compressor, store, null);
}
public static ZarrArray open(String path) throws IOException {
return open(Paths.get(path));
}
Expand All @@ -88,24 +98,78 @@ public static ZarrArray open(Store store) throws IOException {

public static ZarrArray open(ZarrPath relativePath, Store store) throws IOException {
final ZarrPath zarrHeaderPath = relativePath.resolve(FILENAME_DOT_ZARRAY);
ZarrHeader header;

try (final InputStream storageStream = store.getInputStream(zarrHeaderPath.storeKey)){
if(storageStream == null) {
throw new IOException("'" + FILENAME_DOT_ZARRAY + "' expected but is not readable or missing in store.");
}

try (BufferedReader reader = new BufferedReader(new InputStreamReader(storageStream))) {
final ZarrHeader header = ZarrUtils.fromJson(reader, ZarrHeader.class);
final int[] shape = header.getShape();
final int[] chunks = header.getChunks();
final DataType dataType = header.getRawDataType();
final ByteOrder byteOrder = header.getByteOrder();
final Number fillValue = header.getFill_value();
Compressor compressor = header.getCompressor();
if (compressor == null) {
compressor = nullCompressor;
header = ZarrUtils.fromJson(reader, ZarrHeader.class);
}
}

final int[] shape = header.getShape();
final int[] chunks = header.getChunks();
final DataType dataType = header.getRawDataType();
final ByteOrder byteOrder = header.getByteOrder();
final Number fillValue = header.getFill_value();
Compressor compressor = header.getCompressor();
if (compressor == null) {
compressor = nullCompressor;
}

// Workaround: In Zarr V2 there is no way to know whether or not an array is
// nested or flat. If the files were guaranteed to be local, we could check for _any_
// file of the form \d.\d*, but since the chunks may be stored elsewhere, we're going
// to loop through all possible chunks and check their existence in one of the two possible
// locations. Once one is found, we will assume _all_ follow the same pattern.
Boolean nested = header.getNested();
if (nested == null) {

// Loop through all dimensions
int n = shape.length;
int[] ptr = new int[n];
long total = 1;
for (int i : shape) {
total *= i;
}

for (int ignore = 0; ignore < total; ignore++) { // essentially a while(true) for our known maximum.
// This n-dim loops allows to walk through all possible values for all dimensions.
for (boolean test : Arrays.asList(true, false)) {
final String chunkFilename = ZarrUtils.createChunkFilename(ptr, test);
final ZarrPath chunkFilePath = relativePath.resolve(chunkFilename);
try (final InputStream storageStream = store.getInputStream(chunkFilePath.storeKey)) {
if (storageStream != null) { // TODO: test available() ?
nested = test;
break;
}
}
}
if (nested != null) {
break;
} else {
for(int j = 0; j < n; j++) {
ptr[j]++;
if(ptr[j] < shape[j]) {
break;
} else {
ptr[j] = 0;
}
}
}
return new ZarrArray(relativePath, shape, chunks, dataType, byteOrder, fillValue, compressor, store);
}

if (nested == null) {
// In this case, *no* chunk was found. Something is almost certainly wrong.
// However, not throwing an exception since there is a possibility that a client
// created an array but did not yet write data.
// TODO: logging
}
}
return new ZarrArray(relativePath, shape, chunks, dataType, byteOrder, fillValue, compressor, store, nested);
}

public static ZarrArray create(ArrayParams arrayParams) throws IOException {
Expand Down Expand Up @@ -155,7 +219,8 @@ public static ZarrArray create(ZarrPath relativePath, Store store, ArrayParams a
final Number fillValue = params.getFillValue();
final Compressor compressor = params.getCompressor();
final ByteOrder byteOrder = params.getByteOrder();
final ZarrArray zarrArray = new ZarrArray(relativePath, shape, chunks, dataType, byteOrder, fillValue, compressor, store);
final Boolean nested = params.getNested();
final ZarrArray zarrArray = new ZarrArray(relativePath, shape, chunks, dataType, byteOrder, fillValue, compressor, store, nested);
zarrArray.writeZArrayHeader();
zarrArray.writeAttributes(attributes);
return zarrArray;
Expand All @@ -181,6 +246,8 @@ public ByteOrder getByteOrder() {
return _byteOrder;
}

public boolean getNested() { return _nested; }

public void write(Number value) throws IOException, InvalidRangeException {
final int[] shape = getShape();
final int[] offset = new int[shape.length];
Expand All @@ -196,9 +263,10 @@ public void write(Object data, int[] dataShape, int[] offset) throws IOException
final int[][] chunkIndices = ZarrUtils.computeChunkIndices(_shape, _chunks, dataShape, offset);
ucar.ma2.DataType dataType = ucar.ma2.DataType.getType(data.getClass().getComponentType(), false);
final Array source = Array.factory(dataType, dataShape, data);
final boolean nested = _nested == null ? DEFAULT_NESTED : _nested;

for (int[] chunkIndex : chunkIndices) {
final String chunkFilename = getChunkFilename(chunkIndex);
final String chunkFilename = getChunkFilename(chunkIndex, nested);
final ZarrPath chunkFilePath = relativePath.resolve(chunkFilename);
final int[] fromBufferPos = computeFrom(chunkIndex, offset, false);
synchronized (chunkFilename) {
Expand Down Expand Up @@ -237,13 +305,14 @@ public void read(Object buffer, int[] bufferShape, int[] offset) throws IOExcept
}
final int targetSize = java.lang.reflect.Array.getLength(buffer);
final long expectedSize = ZarrUtils.computeSize(bufferShape);
final boolean nested = _nested == null ? DEFAULT_NESTED : _nested;
if (targetSize != expectedSize) {
throw new IOException("Expected target buffer size is " + expectedSize + " but was " + targetSize);
}
final int[][] chunkIndices = ZarrUtils.computeChunkIndices(_shape, _chunks, bufferShape, offset);

for (int[] chunkIndex : chunkIndices) {
final String chunkFilename = getChunkFilename(chunkIndex);
final String chunkFilename = getChunkFilename(chunkIndex, nested);
final ZarrPath chunkFilePath = relativePath.resolve(chunkFilename);
final int[] fromChunkPos = computeFrom(chunkIndex, offset, true);
final Array sourceChunk = _chunkReaderWriter.read(chunkFilePath.storeKey);
Expand All @@ -256,8 +325,8 @@ public void read(Object buffer, int[] bufferShape, int[] offset) throws IOExcept
}
}

private synchronized String getChunkFilename(int[] chunkIndex) {
String chunkFilename = ZarrUtils.createChunkFilename(chunkIndex);
private synchronized String getChunkFilename(int[] chunkIndex, boolean nested) {
String chunkFilename = ZarrUtils.createChunkFilename(chunkIndex, nested);
if (_chunkFilenames.containsKey(chunkFilename)) {
return _chunkFilenames.get(chunkFilename);
}
Expand Down Expand Up @@ -315,7 +384,7 @@ private int[] computeFrom(int[] chunkIndex, int[] to, boolean read) {
}

private void writeZArrayHeader() throws IOException {
final ZarrHeader zarrHeader = new ZarrHeader(_shape, _chunks, _dataType.toString(), _byteOrder, _fillValue, _compressor);
final ZarrHeader zarrHeader = new ZarrHeader(_shape, _chunks, _dataType.toString(), _byteOrder, _fillValue, _compressor, _nested);
final ZarrPath zArray = relativePath.resolve(FILENAME_DOT_ZARRAY);
try (
OutputStream os = _store.getOutputStream(zArray.storeKey);
Expand Down
21 changes: 20 additions & 1 deletion src/main/java/com/bc/zarr/ZarrHeader.java
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,13 @@ public class ZarrHeader {
private final String order = "C";
private final int[] shape;
private final int zarr_format = 2;
private final Boolean nested;

public ZarrHeader(int[] shape, int[] chunks, String dtype, ByteOrder byteOrder, Number fill_value, Compressor compressor) {
this(shape, chunks, dtype, byteOrder, fill_value, compressor, null);
}

public ZarrHeader(int[] shape, int[] chunks, String dtype, ByteOrder byteOrder, Number fill_value, Compressor compressor, Boolean nested) {
this.chunks = chunks;
if (compressor == null || CompressorFactory.nullCompressor.equals(compressor)) {
this.compressor = null;
Expand All @@ -63,6 +68,7 @@ public ZarrHeader(int[] shape, int[] chunks, String dtype, ByteOrder byteOrder,
this.dtype = translateByteOrder(byteOrder) + dtype;
this.fill_value = fill_value;
this.shape = shape;
this.nested = nested;
}

public int[] getChunks() {
Expand Down Expand Up @@ -121,6 +127,10 @@ public int[] getShape() {
return shape;
}

public Boolean getNested() {
return nested;
}

static class ZarrHeaderSerializer extends StdSerializer<ZarrHeader> {

protected ZarrHeaderSerializer() {
Expand All @@ -139,6 +149,9 @@ public void serialize(ZarrHeader value, JsonGenerator gen, SerializerProvider pr
gen.writeObjectField("order", value.order);
gen.writeObjectField("shape", value.getShape());
gen.writeNumberField("zarr_format", value.zarr_format);
if (value.nested != null) {
gen.writeBooleanField("nested", value.nested);
}
gen.writeEndObject();
}
}
Expand Down Expand Up @@ -173,7 +186,13 @@ public ZarrHeader deserialize(JsonParser p, DeserializationContext ctxt) throws
} else {
compressor = CompressorFactory.create(compBean);
}
return new ZarrHeader(shape, chunks, getRawDataType(dtype).toString(), getByteOrder(dtype), fill, compressor);

Boolean nested = null;
JsonNode nestedNode = (JsonNode) root.path("nested");
if (!nestedNode.isMissingNode()) {
nested = nestedNode.asBoolean();
}
return new ZarrHeader(shape, chunks, getRawDataType(dtype).toString(), getByteOrder(dtype), fill, compressor, nested);
}

}
Expand Down
5 changes: 3 additions & 2 deletions src/main/java/com/bc/zarr/ZarrUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -98,11 +98,12 @@ public static int[][] computeChunkIndices(int[] shape, int[] chunks, int[] buffe
return chunkIndices;
}

public static String createChunkFilename(int[] currentIdx) {
public static String createChunkFilename(int[] currentIdx, boolean nested) {
final char sep = nested ? '/' : '.';
StringBuilder sb = new StringBuilder();
for (int aCurrentIdx : currentIdx) {
sb.append(aCurrentIdx);
sb.append(".");
sb.append(sep);
}
sb.setLength(sb.length() - 1);
return sb.toString();
Expand Down
7 changes: 6 additions & 1 deletion src/test/java/com/bc/zarr/ZarrUtilsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,12 @@ public void computeChunkIndices_4_Indices() {

@Test
public void computeChunkFilename() {
assertEquals("1.2.3.42", ZarrUtils.createChunkFilename(new int[]{1, 2, 3, 42}));
assertEquals("1.2.3.42", ZarrUtils.createChunkFilename(new int[]{1, 2, 3, 42}, false));
}

@Test
public void computeChunkFilename2() {
assertEquals("1/2/3/42", ZarrUtils.createChunkFilename(new int[]{1, 2, 3, 42}, true));
}

private String expectedJson(boolean nullCompressor) {
Expand Down
Loading