Skip to content

Commit 68377bf

Browse files
authored
Test the server with the delta sharing client (#129)
Add spark-delta integration test: Add a new module "client-spark" that runs an integration test against a running whitefox server. The goal is to read a delta-table deployed in an s3 bucket querying the whitefox server with the official delta-sharing client. To run the integration test, start a server running java -jar server/app/build/quarkus-app/quarkus-run.jar, then run ./gradlew spark-client:sparkTest
1 parent 3f84ab7 commit 68377bf

File tree

18 files changed

+355
-49
lines changed

18 files changed

+355
-49
lines changed

.github/workflows/compile.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,16 @@ jobs:
3535
./gradlew build testNative --no-daemon
3636
./gradlew server:app:printVersion --no-daemon -q
3737
shell: bash
38+
- name: Run integration test
39+
shell: bash
40+
env:
41+
WHITEFOX_TEST_AWS_REGION: ${{ vars.WHITEFOX_AWS_REGION }}
42+
WHITEFOX_TEST_AWS_ACCESS_KEY_ID: ${{ secrets.WHITEFOX_AWS_ACCESS_KEY_ID }}
43+
WHITEFOX_TEST_AWS_SECRET_ACCESS_KEY: ${{ secrets.WHITEFOX_AWS_SECRET_ACCESS_KEY }}
44+
run: |
45+
java -jar server/app/build/quarkus-app/quarkus-run.jar &
46+
./gradlew :client-spark:clientSparkTest --no-daemon
47+
kill -9 %1
3848
- name: Build container image
3949
if: runner.os == 'Linux'
4050
run: |

client-spark/build.gradle.kts

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
import org.openapitools.generator.gradle.plugin.tasks.GenerateTask
2+
3+
plugins {
4+
java
5+
id("com.diffplug.spotless")
6+
id("whitefox.java-conventions")
7+
}
8+
9+
repositories {
10+
mavenCentral()
11+
}
12+
13+
dependencies {
14+
// OPENAPI
15+
implementation("org.eclipse.microprofile.openapi:microprofile-openapi-api:3.1.1")
16+
implementation("org.openapitools:jackson-databind-nullable:0.2.6")
17+
testImplementation("com.fasterxml.jackson.datatype:jackson-datatype-jsr310")
18+
testImplementation("jakarta.annotation:jakarta.annotation-api:2.1.1")
19+
20+
// DELTA
21+
testImplementation("org.apache.hadoop:hadoop-common:3.3.6")
22+
testImplementation("io.delta:delta-sharing-spark_2.12:1.0.2")
23+
24+
//SPARK
25+
testImplementation("org.apache.spark:spark-core_2.12:3.3.2")
26+
testImplementation("org.apache.spark:spark-sql_2.12:3.3.2")
27+
testImplementation("com.github.mrpowers:spark-fast-tests_2.12:1.3.0")
28+
29+
//JUNIT
30+
testImplementation("org.junit.jupiter:junit-jupiter:5.8.1")
31+
}
32+
33+
34+
tasks.getByName<Test>("test") {
35+
useJUnitPlatform {
36+
excludeTags.add("clientSparkTest")
37+
}
38+
}
39+
40+
tasks.withType<Test> {
41+
environment = env.allVariables
42+
systemProperty ("java.util.logging.manager", "java.util.logging.LogManager") //TODO modularize the whitefox-conventions plugin
43+
}
44+
45+
tasks.register<Test>("clientSparkTest") {
46+
useJUnitPlatform {
47+
includeTags.add("clientSparkTest")
48+
}
49+
}
50+
51+
val openApiCodeGenDir = "generated/openapi"
52+
val generatedCodeDirectory = generatedCodeDirectory(layout, openApiCodeGenDir)
53+
54+
val whiteFoxGenerate = tasks.register<GenerateTask>("openapiGenerateClientApi") {
55+
dependsOn(tasks.spotlessApply)
56+
generatorName.set("java")
57+
inputSpec.set("$rootDir/protocol/whitefox-protocol-api.yml")
58+
library.set("native")
59+
outputDir.set(generatedCodeDirectory)
60+
additionalProperties.set(mapOf(
61+
"apiPackage" to "io.whitefox.api.client",
62+
"invokerPackage" to "io.whitefox.api.utils",
63+
"modelPackage" to "io.whitefox.api.client.model",
64+
"dateLibrary" to "java8",
65+
"sourceFolder" to "src/gen/java",
66+
"openApiNullable" to "true",
67+
"annotationLibrary" to "none",
68+
"serializationLibrary" to "jackson",
69+
"useJakartaEe" to "true",
70+
"useRuntimeException" to "true"
71+
))
72+
}
73+
74+
sourceSets {
75+
getByName("test") {
76+
java {
77+
srcDir("${generatedCodeDirectory(layout, openApiCodeGenDir)}/src/gen/java")
78+
}
79+
}
80+
}
81+
82+
tasks.withType<JavaCompile> {
83+
options.encoding = "UTF-8"
84+
options.compilerArgs.add("-parameters")
85+
dependsOn(whiteFoxGenerate)
86+
}
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
package io.whitefox.api.client;
2+
3+
import static org.junit.jupiter.api.Assertions.assertEquals;
4+
5+
import com.github.mrpowers.spark.fast.tests.DatasetComparer;
6+
import io.whitefox.api.models.MrFoxDeltaTableSchema;
7+
import io.whitefox.api.utils.StorageManagerInitializer;
8+
import java.util.List;
9+
import org.apache.spark.sql.SparkSession;
10+
import org.apache.spark.sql.types.DataType;
11+
import org.apache.spark.sql.types.Metadata;
12+
import org.apache.spark.sql.types.StructField;
13+
import org.apache.spark.sql.types.StructType;
14+
import org.junit.jupiter.api.BeforeAll;
15+
import org.junit.jupiter.api.Tag;
16+
import org.junit.jupiter.api.Test;
17+
import scala.collection.GenMap;
18+
19+
@Tag("clientSparkTest")
20+
public class ITDeltaSharingClient implements DatasetComparer {
21+
22+
private final String tablePath = String.format(
23+
"%s#%s.%s.%s",
24+
getClass().getClassLoader().getResource("MrFoxProfile.json"),
25+
"s3share",
26+
"s3schema",
27+
"s3Table1");
28+
29+
private final SparkSession spark = SparkSession.builder()
30+
.appName("delta sharing client test")
31+
.config("spark.driver.host", "localhost")
32+
.master("local[1, 4]")
33+
.getOrCreate();
34+
35+
@BeforeAll
36+
static void initStorageManager() {
37+
new StorageManagerInitializer().initStorageManager();
38+
}
39+
40+
@Test
41+
void showS3Table1withQueryTableApi() {
42+
var ds = spark.read().format("deltaSharing").load(tablePath);
43+
var expectedSchema = new StructType(new StructField[] {
44+
new StructField("id", DataType.fromDDL("long"), true, new Metadata(GenMap.empty()))
45+
});
46+
var expectedData = spark
47+
.createDataFrame(
48+
List.of(
49+
new MrFoxDeltaTableSchema(0),
50+
new MrFoxDeltaTableSchema(3),
51+
new MrFoxDeltaTableSchema(2),
52+
new MrFoxDeltaTableSchema(1),
53+
new MrFoxDeltaTableSchema(4)),
54+
MrFoxDeltaTableSchema.class)
55+
.toDF();
56+
57+
assertEquals(expectedSchema.json(), ds.schema().json());
58+
assertEquals(5, ds.count());
59+
assertSmallDatasetEquality(ds, expectedData, true, false, false, 500);
60+
}
61+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
package io.whitefox.api.models;
2+
3+
public class MrFoxDeltaTableSchema {
4+
private final long id;
5+
6+
public MrFoxDeltaTableSchema(long id) {
7+
this.id = id;
8+
}
9+
10+
public long getId() {
11+
return id;
12+
}
13+
}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
package io.whitefox.api.utils;
2+
3+
public class S3TestConfig {
4+
private final String region;
5+
private final String accessKey;
6+
private final String secretKey;
7+
8+
public String getRegion() {
9+
return region;
10+
}
11+
12+
public String getAccessKey() {
13+
return accessKey;
14+
}
15+
16+
public String getSecretKey() {
17+
return secretKey;
18+
}
19+
20+
public S3TestConfig(String region, String accessKey, String secretKey) {
21+
this.region = region;
22+
this.accessKey = accessKey;
23+
this.secretKey = secretKey;
24+
}
25+
26+
public static S3TestConfig loadFromEnv() {
27+
return new S3TestConfig(
28+
System.getenv().get("WHITEFOX_TEST_AWS_REGION"),
29+
System.getenv().get("WHITEFOX_TEST_AWS_ACCESS_KEY_ID"),
30+
System.getenv().get("WHITEFOX_TEST_AWS_SECRET_ACCESS_KEY"));
31+
}
32+
}
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
package io.whitefox.api.utils;
2+
3+
import io.whitefox.api.client.*;
4+
import io.whitefox.api.client.model.*;
5+
import java.util.List;
6+
import java.util.Map;
7+
8+
public class StorageManagerInitializer {
9+
private final S3TestConfig s3TestConfig;
10+
private final StorageV1Api storageV1Api;
11+
private final ProviderV1Api providerV1Api;
12+
private final TableV1Api tableV1Api;
13+
private final ShareV1Api shareV1Api;
14+
private final SchemaV1Api schemaV1Api;
15+
16+
public StorageManagerInitializer() {
17+
var apiClient = new ApiClient();
18+
this.s3TestConfig = S3TestConfig.loadFromEnv();
19+
this.storageV1Api = new StorageV1Api(apiClient);
20+
this.providerV1Api = new ProviderV1Api(apiClient);
21+
this.tableV1Api = new TableV1Api(apiClient);
22+
this.shareV1Api = new ShareV1Api(apiClient);
23+
this.schemaV1Api = new SchemaV1Api(apiClient);
24+
}
25+
26+
public void initStorageManager() {
27+
storageV1Api.createStorage(createStorageRequest(s3TestConfig));
28+
providerV1Api.addProvider(addProviderRequest());
29+
tableV1Api.createTableInProvider(addProviderRequest().getName(), createTableRequest());
30+
shareV1Api.createShare(createShareRequest());
31+
schemaV1Api.createSchema(createShareRequest().getName(), createSchemaRequest());
32+
schemaV1Api.addTableToSchema(
33+
createShareRequest().getName(), createSchemaRequest(), addTableToSchemaRequest());
34+
}
35+
36+
private String createSchemaRequest() {
37+
return "s3schema";
38+
}
39+
40+
private AddTableToSchemaRequest addTableToSchemaRequest() {
41+
return new AddTableToSchemaRequest()
42+
.name("s3Table1")
43+
.reference(new TableReference().providerName("MrFoxProvider").name("s3Table1"));
44+
}
45+
46+
private CreateShareInput createShareRequest() {
47+
return new CreateShareInput().name("s3share").recipients(List.of("Mr.Fox")).schemas(List.of());
48+
}
49+
50+
private CreateTableInput createTableRequest() {
51+
return new CreateTableInput()
52+
.name("s3Table1")
53+
.skipValidation(true)
54+
.properties(Map.of(
55+
"type", "delta",
56+
"location", "s3a://whitefox-s3-test-bucket/delta/samples/delta-table"));
57+
}
58+
59+
private ProviderInput addProviderRequest() {
60+
return new ProviderInput()
61+
.name("MrFoxProvider")
62+
.storageName("MrFoxStorage")
63+
.metastoreName(null);
64+
}
65+
66+
private CreateStorage createStorageRequest(S3TestConfig s3TestConfig) {
67+
return new CreateStorage()
68+
.name("MrFoxStorage")
69+
.type(CreateStorage.TypeEnum.S3)
70+
.properties(new StorageProperties(new S3Properties()
71+
.credentials(new SimpleAwsCredentials()
72+
.region(s3TestConfig.getRegion())
73+
.awsAccessKeyId(s3TestConfig.getAccessKey())
74+
.awsSecretAccessKey(s3TestConfig.getSecretKey()))))
75+
.skipValidation(true);
76+
}
77+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"shareCredentialsVersion": 1,
3+
"endpoint": "http://localhost:8080/delta-api/v1/",
4+
"bearerToken": "fakeToken",
5+
"expirationTime": null
6+
}

protocol/delta-sharing-protocol-api.yml

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,12 @@ paths:
419419
description: 'Starting Timestamp ISO8601 format, in the UTC timezone'
420420
schema:
421421
type: string
422+
- in: header
423+
name: delta-sharing-capabilities
424+
required: false
425+
description: 'Delta Sharing Capabilities'
426+
schema:
427+
type: string
422428
requestBody:
423429
required: true
424430
content:
@@ -670,7 +676,7 @@ components:
670676
items:
671677
type: string
672678
jsonPredicateHints:
673-
type: object
679+
type: string
674680
description: |
675681
query predicates on partition columns specified using a structured JSON format.
676682
When it’s present, the server will try to use the predicates to filter table's
@@ -680,19 +686,20 @@ components:
680686
If the server encounters any errors during predicate processing (for example, invalid
681687
syntax or non existing columns), it will skip filtering and return all the files.
682688
When it’s absent, the server will return all the files in the table.
683-
properties:
684-
op:
685-
$ref: '#/components/schemas/Ops'
686-
children:
687-
type: string
688-
name:
689-
type: string
690-
value:
691-
type: string
692-
valueType:
693-
type: string
689+
# properties:
690+
# op:
691+
# $ref: '#/components/schemas/Ops'
692+
# children:
693+
# type: string
694+
# name:
695+
# type: string
696+
# value:
697+
# type: string
698+
# valueType:
699+
# type: string
694700
limitHint:
695701
type: integer
702+
format: int64
696703
example: 1000
697704
description: |
698705
It’s a hint from the client to tell the server how many rows the
@@ -717,13 +724,15 @@ components:
717724
timestamp. This is only supported on tables with history sharing enabled.
718725
startingVersion:
719726
type: integer
727+
format: int64
720728
example: 1000
721729
description: |
722730
an optional version number. If set, will return all data change files
723731
since startingVersion, inclusive, including historical metadata if seen
724732
in the delta log.
725733
endingVersion:
726734
type: integer
735+
format: int64
727736
example: 1000
728737
description: |
729738
an optional version number, only used if startingVersion is set. If set,
@@ -836,7 +845,7 @@ components:
836845
MetadataObject:
837846
type: object
838847
properties:
839-
metadata:
848+
metaData:
840849
type: object
841850
properties:
842851
id:

server/app/build.gradle.kts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,12 @@ val quarkusPlatformArtifactId: String by project
1111
val quarkusPlatformVersion: String by project
1212

1313
// region dependencies
14-
14+
val hadoopVersion = "3.3.6"
1515
dependencies {
1616
// INTERNAL
1717
implementation(project(":server:core"))
1818
implementation(project(":server:persistence:memory"))
19-
19+
2020
// QUARKUS
2121
implementation(enforcedPlatform("${quarkusPlatformGroupId}:${quarkusPlatformArtifactId}:${quarkusPlatformVersion}"))
2222
implementation("io.quarkus:quarkus-arc")

0 commit comments

Comments
 (0)