Skip to content

Commit 94aefa6

Browse files
Support file pruning through JsonPredicates and predicates queryRequest parameters (#130)
Co-authored-by: Antonio Murgia <[email protected]>
1 parent 0fcda0b commit 94aefa6

File tree

93 files changed

+2264
-30
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

93 files changed

+2264
-30
lines changed

server/app/src/main/java/io/whitefox/api/deltasharing/DeltaMappers.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,17 +33,21 @@ public static ReadTableRequest api2ReadTableRequest(QueryRequest request) {
3333
throw new IllegalArgumentException("version cannot be negative.");
3434
} else if (request.getVersion() != null && request.getTimestamp() == null) {
3535
return new ReadTableRequest.ReadTableVersion(
36-
request.getPredicateHints(),
36+
Optional.ofNullable(request.getPredicateHints()),
37+
Optional.ofNullable(request.getJsonPredicateHints()),
3738
Optional.ofNullable(request.getLimitHint()),
3839
request.getVersion());
3940
} else if (request.getVersion() == null && request.getTimestamp() != null) {
4041
return new ReadTableRequest.ReadTableAsOfTimestamp(
41-
request.getPredicateHints(),
42+
Optional.ofNullable(request.getPredicateHints()),
43+
Optional.ofNullable(request.getJsonPredicateHints()),
4244
Optional.ofNullable(request.getLimitHint()),
4345
CommonMappers.parseTimestamp(request.getTimestamp()));
4446
} else if (request.getVersion() == null && request.getTimestamp() == null) {
4547
return new ReadTableRequest.ReadTableCurrentVersion(
46-
request.getPredicateHints(), Optional.ofNullable(request.getLimitHint()));
48+
Optional.ofNullable(request.getPredicateHints()),
49+
Optional.ofNullable(request.getJsonPredicateHints()),
50+
Optional.ofNullable(request.getLimitHint()));
4751
} else {
4852
throw new IllegalArgumentException("Cannot specify both version and timestamp");
4953
}

server/app/src/test/java/io/whitefox/api/deltasharing/server/DeltaSharesApiImplTest.java

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,54 @@ public void queryNotExistingTable() throws IOException {
369369
.statusCode(404);
370370
}
371371

372+
@DisabledOnOs(OS.WINDOWS)
373+
@Test
374+
public void queryTableCurrentVersionWithPredicates() throws IOException {
375+
var responseBodyLines = given()
376+
.when()
377+
.filter(deltaFilter)
378+
.body("{\"jsonPredicateHints\": \"{" + " \\\"op\\\": \\\"equal\\\","
379+
+ " \\\"children\\\": ["
380+
+ " {\\\"op\\\": \\\"column\\\", \\\"name\\\":\\\"date\\\", \\\"valueType\\\":\\\"date\\\"},"
381+
+ " {\\\"op\\\":\\\"literal\\\",\\\"value\\\":\\\"2021-04-29\\\",\\\"valueType\\\":\\\"date\\\"}"
382+
+ " ]"
383+
+ "}\"}")
384+
.header(new Header("Content-Type", "application/json"))
385+
.post(
386+
"delta-api/v1/shares/{share}/schemas/{schema}/tables/{table}/query",
387+
"name",
388+
"default",
389+
"table1")
390+
.then()
391+
.statusCode(200)
392+
.extract()
393+
.body()
394+
.asString()
395+
.split("\n");
396+
397+
assertEquals(
398+
deltaTable1Protocol,
399+
objectMapper.reader().readValue(responseBodyLines[0], ProtocolObject.class));
400+
assertEquals(
401+
deltaTable1Metadata,
402+
objectMapper.reader().readValue(responseBodyLines[1], MetadataObject.class));
403+
var files = Arrays.stream(responseBodyLines)
404+
.skip(2)
405+
.map(line -> {
406+
try {
407+
return objectMapper
408+
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
409+
.reader()
410+
.readValue(line, FileObjectWithoutPresignedUrl.class);
411+
} catch (IOException e) {
412+
throw new RuntimeException(e);
413+
}
414+
})
415+
.collect(Collectors.toSet());
416+
assertEquals(7, responseBodyLines.length);
417+
assertEquals(deltaTable1FilesWithoutPresignedUrl, files);
418+
}
419+
372420
@DisabledOnOs(OS.WINDOWS)
373421
@Test
374422
public void queryTableCurrentVersion() throws IOException {

server/core/build.gradle.kts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@ dependencies {
4343
compileOnly(String.format("com.amazonaws:aws-java-sdk-s3:%s", awsSdkVersion))
4444
implementation(String.format("org.apache.hadoop:hadoop-aws:%s", hadoopVersion))
4545

46+
//PREDICATE PARSER
47+
implementation("com.github.jsqlparser:jsqlparser:4.8")
48+
4649
// TEST
4750
testImplementation("org.junit.jupiter:junit-jupiter")
4851
testImplementation("io.quarkus:quarkus-arc")
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
package io.whitefox.core;
2+
3+
import io.whitefox.annotations.SkipCoverageGenerated;
4+
import io.whitefox.core.types.*;
5+
import io.whitefox.core.types.predicates.TypeNotSupportedException;
6+
import java.sql.Date;
7+
import java.sql.Timestamp;
8+
9+
public class ColumnRange {
10+
11+
String minVal;
12+
String maxVal;
13+
14+
DataType valueType;
15+
16+
public ColumnRange(String minVal, String maxVal, DataType valueType) {
17+
this.minVal = minVal;
18+
this.maxVal = maxVal;
19+
this.valueType = valueType;
20+
}
21+
22+
public ColumnRange(String onlyVal, DataType valueType) {
23+
this.minVal = onlyVal;
24+
this.maxVal = onlyVal;
25+
this.valueType = valueType;
26+
}
27+
28+
public DataType getValueType() {
29+
return valueType;
30+
}
31+
32+
public String getSingleValue() {
33+
return minVal;
34+
}
35+
36+
private Boolean typedContains(String point) throws TypeNotSupportedException {
37+
if (valueType instanceof IntegerType) {
38+
var c1 = Integer.compare(Integer.parseInt(minVal), Integer.parseInt(point));
39+
var c2 = Integer.compare(Integer.parseInt(maxVal), Integer.parseInt(point));
40+
return (c1 <= 0 && c2 >= 0);
41+
} else if (valueType instanceof LongType) {
42+
var c1 = Long.compare(Long.parseLong(minVal), Long.parseLong(point));
43+
var c2 = Long.compare(Long.parseLong(maxVal), Long.parseLong(point));
44+
return (c1 <= 0 && c2 >= 0);
45+
} else if (valueType instanceof TimestampType) {
46+
var c1 = Timestamp.valueOf(minVal).before(Timestamp.valueOf(point));
47+
var c2 = Timestamp.valueOf(maxVal).after(Timestamp.valueOf(point));
48+
return (c1 && c2) || Timestamp.valueOf(minVal).equals(Timestamp.valueOf(point));
49+
} else if (valueType instanceof FloatType) {
50+
var c1 = Float.compare(Float.parseFloat(minVal), Float.parseFloat(point));
51+
var c2 = Float.compare(Float.parseFloat(maxVal), Float.parseFloat(point));
52+
return (c1 <= 0 && c2 >= 0);
53+
} else if (valueType instanceof DoubleType) {
54+
var c1 = Double.compare(Double.parseDouble(minVal), Double.parseDouble(point));
55+
var c2 = Double.compare(Double.parseDouble(maxVal), Double.parseDouble(point));
56+
return (c1 <= 0 && c2 >= 0);
57+
} else if (valueType instanceof DateType) {
58+
var c1 = Date.valueOf(minVal).before(Date.valueOf(point));
59+
var c2 = Date.valueOf(maxVal).after(Date.valueOf(point));
60+
return (c1 && c2) || Date.valueOf(minVal).equals(Date.valueOf(point));
61+
} else if (valueType instanceof BooleanType) {
62+
var c1 = Boolean.parseBoolean(minVal) == Boolean.parseBoolean(point);
63+
var c2 = Boolean.parseBoolean(maxVal) == Boolean.parseBoolean(point);
64+
return c1 || c2;
65+
} else if (valueType instanceof StringType) {
66+
var c1 = minVal.compareTo(point);
67+
var c2 = maxVal.compareTo(point);
68+
return (c1 <= 0 && c2 >= 0);
69+
} else throw new TypeNotSupportedException(valueType);
70+
}
71+
72+
private Boolean typedLessThan(String point) throws TypeNotSupportedException {
73+
if (valueType instanceof IntegerType) {
74+
var c1 = Integer.compare(Integer.parseInt(minVal), Integer.parseInt(point));
75+
return (c1 < 0);
76+
} else if (valueType instanceof LongType) {
77+
var c1 = Long.compare(Long.parseLong(minVal), Long.parseLong(point));
78+
return (c1 < 0);
79+
} else if (valueType instanceof TimestampType) {
80+
return Timestamp.valueOf(minVal).before(Timestamp.valueOf(point));
81+
} else if (valueType instanceof FloatType) {
82+
var c1 = Float.compare(Float.parseFloat(minVal), Float.parseFloat(point));
83+
return (c1 < 0);
84+
} else if (valueType instanceof DoubleType) {
85+
var c1 = Double.compare(Double.parseDouble(minVal), Double.parseDouble(point));
86+
return (c1 < 0);
87+
} else if (valueType instanceof DateType) {
88+
return Date.valueOf(minVal).before(Date.valueOf(point));
89+
} else if (valueType instanceof StringType) {
90+
var c = minVal.compareTo(point);
91+
return (c < 0);
92+
} else throw new TypeNotSupportedException(valueType);
93+
}
94+
95+
// not used currently
96+
@SkipCoverageGenerated
97+
private Boolean typedGreaterThan(String point) {
98+
if (valueType instanceof IntegerType) {
99+
var c = Integer.compare(Integer.parseInt(point), Integer.parseInt(maxVal));
100+
return (c < 0);
101+
} else if (valueType instanceof LongType) {
102+
var c = Long.compare(Long.parseLong(point), Long.parseLong(maxVal));
103+
return (c < 0);
104+
} else if (valueType instanceof TimestampType) {
105+
return Timestamp.valueOf(point).before(Timestamp.valueOf(maxVal));
106+
} else if (valueType instanceof FloatType) {
107+
var c = Float.compare(Float.parseFloat(maxVal), Float.parseFloat(point));
108+
return (c < 0);
109+
} else if (valueType instanceof DoubleType) {
110+
var c = Double.compare(Double.parseDouble(maxVal), Double.parseDouble(point));
111+
return (c < 0);
112+
} else if (valueType instanceof DateType) {
113+
return Date.valueOf(point).before(Date.valueOf(maxVal));
114+
115+
} else {
116+
var c = point.compareTo(maxVal);
117+
return (c < 0);
118+
}
119+
}
120+
121+
public Boolean contains(String point) throws TypeNotSupportedException {
122+
return typedContains(point);
123+
}
124+
125+
public Boolean canBeLess(String point) throws TypeNotSupportedException {
126+
return typedLessThan(point);
127+
}
128+
129+
public Boolean canBeGreater(String point) {
130+
return typedGreaterThan(point);
131+
}
132+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
package io.whitefox.core;
2+
3+
import com.fasterxml.jackson.annotation.JsonInclude;
4+
import com.fasterxml.jackson.databind.ObjectMapper;
5+
import com.fasterxml.jackson.databind.module.SimpleModule;
6+
import io.whitefox.core.types.DataType;
7+
import io.whitefox.core.types.predicates.DataTypeDeserializer;
8+
9+
public class DeltaObjectMapper {
10+
11+
private static final ObjectMapper objectMapper = newInstance();
12+
13+
private static ObjectMapper newInstance() {
14+
ObjectMapper mapper = new ObjectMapper();
15+
mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL);
16+
var customSerializersModule = new SimpleModule();
17+
customSerializersModule.addDeserializer(DataType.class, new DataTypeDeserializer());
18+
mapper.registerModule(customSerializersModule);
19+
return mapper;
20+
}
21+
22+
public static ObjectMapper getInstance() {
23+
return objectMapper;
24+
}
25+
}
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
package io.whitefox.core;
2+
3+
import com.fasterxml.jackson.annotation.JsonProperty;
4+
import java.util.Map;
5+
6+
public class FileStats {
7+
// {"numRecords":1,"minValues":{"id":0},"maxValues":{"id":0},"nullCount":{"id":0}}
8+
@JsonProperty("numRecords")
9+
String numRecords;
10+
11+
@JsonProperty("minValues")
12+
Map<String, String> minValues;
13+
14+
@JsonProperty("maxValues")
15+
Map<String, String> maxValues;
16+
17+
@JsonProperty("nullCount")
18+
Map<String, String> nullCount;
19+
20+
public FileStats() {
21+
super();
22+
}
23+
24+
public String getNumRecords() {
25+
return numRecords;
26+
}
27+
28+
public Map<String, String> getMinValues() {
29+
return minValues;
30+
}
31+
32+
public Map<String, String> getMaxValues() {
33+
return maxValues;
34+
}
35+
36+
public Map<String, String> getNullCount() {
37+
return nullCount;
38+
}
39+
40+
public FileStats(
41+
String numRecords,
42+
Map<String, String> minValues,
43+
Map<String, String> maxValues,
44+
Map<String, String> nullCount) {
45+
this.numRecords = numRecords;
46+
this.minValues = minValues;
47+
this.maxValues = maxValues;
48+
this.nullCount = nullCount;
49+
}
50+
}

0 commit comments

Comments
 (0)