Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 4 additions & 6 deletions dev/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,8 @@ WORKDIR ${SPARK_HOME}

ENV SPARK_VERSION=3.4.2
ENV ICEBERG_SPARK_RUNTIME_VERSION=3.4_2.12
ENV ICEBERG_VERSION=1.4.0
ENV AWS_SDK_VERSION=2.20.18
ENV PYICEBERG_VERSION=0.4.0
ENV ICEBERG_VERSION=1.4.2
ENV PYICEBERG_VERSION=0.5.1

RUN curl --retry 3 -s -C - https://dlcdn.apache.org/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop3.tgz -o spark-${SPARK_VERSION}-bin-hadoop3.tgz \
&& tar xzf spark-${SPARK_VERSION}-bin-hadoop3.tgz --directory /opt/spark --strip-components 1 \
Expand All @@ -51,8 +50,7 @@ RUN curl -s https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runt
&& mv iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar /opt/spark/jars

# Download AWS bundle
RUN curl -s https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-aws-bundle/${ICEBERG_VERSION}/iceberg-aws-bundle-${ICEBERG_VERSION}.jar -Lo iceberg-aws-bundle-${ICEBERG_VERSION}.jar \
&& mv iceberg-aws-bundle-${ICEBERG_VERSION}.jar /opt/spark/jars
RUN curl -s https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-aws-bundle/${ICEBERG_VERSION}/iceberg-aws-bundle-${ICEBERG_VERSION}.jar -Lo /opt/spark/jars/iceberg-aws-bundle-${ICEBERG_VERSION}.jar

COPY spark-defaults.conf /opt/spark/conf
ENV PATH="/opt/spark/sbin:/opt/spark/bin:${PATH}"
Expand All @@ -62,7 +60,7 @@ RUN chmod u+x /opt/spark/sbin/* && \

RUN pip3 install -q ipython

RUN pip3 install "pyiceberg[s3fs]==${PYICEBERG_VERSION}"
RUN pip3 install "pyiceberg[s3fs,hive]==${PYICEBERG_VERSION}"

COPY entrypoint.sh .
COPY provision.py .
Expand Down
14 changes: 14 additions & 0 deletions dev/docker-compose-integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ services:
iceberg_net:
depends_on:
- rest
- hive
- minio
volumes:
- ./warehouse:/home/iceberg/warehouse
Expand All @@ -37,6 +38,7 @@ services:
- 8080:8080
links:
- rest:rest
- hive:hive
- minio:minio
rest:
image: tabulario/iceberg-rest
Expand Down Expand Up @@ -85,5 +87,17 @@ services:
/usr/bin/mc policy set public minio/warehouse;
tail -f /dev/null
"
hive:
build: hive/
container_name: hive
hostname: hive
networks:
iceberg_net:
ports:
- 9083:9083
environment:
SERVICE_NAME: "metastore"
SERVICE_OPTS: "-Dmetastore.warehouse.dir=s3a://warehouse/hive/"

networks:
iceberg_net:
34 changes: 34 additions & 0 deletions dev/hive/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

FROM openjdk:8-jre-slim AS build

RUN apt-get update -qq && apt-get -qq -y install curl

ENV AWSSDK_VERSION=2.20.18
ENV HADOOP_VERSION=3.1.0

RUN curl https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/1.11.271/aws-java-sdk-bundle-1.11.271.jar -Lo /tmp/aws-java-sdk-bundle-1.11.271.jar
RUN curl https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/${HADOOP_VERSION}/hadoop-aws-${HADOOP_VERSION}.jar -Lo /tmp/hadoop-aws-${HADOOP_VERSION}.jar


FROM apache/hive:3.1.3

ENV AWSSDK_VERSION=2.20.18
ENV HADOOP_VERSION=3.1.0

COPY --from=build /tmp/hadoop-aws-${HADOOP_VERSION}.jar /opt/hive/lib/hadoop-aws-${HADOOP_VERSION}.jar
COPY --from=build /tmp/aws-java-sdk-bundle-1.11.271.jar /opt/hive/lib/aws-java-sdk-bundle-1.11.271.jar
COPY core-site.xml /opt/hadoop/etc/hadoop/core-site.xml
53 changes: 53 additions & 0 deletions dev/hive/core-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->

<configuration>
<property>
<name>fs.defaultFS</name>
<value>s3a://warehouse/hive</value>
</property>
<property>
<name>fs.s3a.impl</name>
<value>org.apache.hadoop.fs.s3a.S3AFileSystem</value>
</property>
<property>
<name>fs.s3a.fast.upload</name>
<value>true</value>
</property>
<property>
<name>fs.s3a.endpoint</name>
<value>http://minio:9000</value>
</property>
<property>
<name>fs.s3a.access.key</name>
<value>admin</value>
</property>
<property>
<name>fs.s3a.secret.key</name>
<value>password</value>
</property>
<property>
<name>fs.s3a.connection.ssl.enabled</name>
<value>false</value>
</property>
<property>
<name>fs.s3a.path.style.access</name>
<value>true</value>
</property>
</configuration>
Loading