diff --git a/docs/applications/implementations/aggregators.md b/docs/applications/implementations/aggregators.md index c329992227..611d264690 100644 --- a/docs/applications/implementations/aggregators.md +++ b/docs/applications/implementations/aggregators.md @@ -42,7 +42,7 @@ def aggregate_spark(data, columns, args): The following packages have been pre-installed and can be used in your implementations: ```text -pyspark==2.4.1 +pyspark==2.4.2 boto3==1.9.78 msgpack==0.6.1 numpy>=1.13.3,<2 diff --git a/docs/applications/implementations/transformers.md b/docs/applications/implementations/transformers.md index 9c238b426f..64a4ca91af 100644 --- a/docs/applications/implementations/transformers.md +++ b/docs/applications/implementations/transformers.md @@ -86,7 +86,7 @@ def reverse_transform_python(transformed_value, args): The following packages have been pre-installed and can be used in your implementations: ```text -pyspark==2.4.1 +pyspark==2.4.2 boto3==1.9.78 msgpack==0.6.1 numpy>=1.13.3,<2 diff --git a/docs/applications/resources/environments.md b/docs/applications/resources/environments.md index 8e9f656431..45acd8525a 100644 --- a/docs/applications/resources/environments.md +++ b/docs/applications/resources/environments.md @@ -35,7 +35,7 @@ data: #### CSV Config -To help ingest different styles of CSV files, Cortex supports the parameters listed below. All of these parameters are optional. A description and default values for each parameter can be found in the [PySpark CSV Documentation](https://spark.apache.org/docs/2.4.1/api/python/pyspark.sql.html#pyspark.sql.DataFrameReader.csv). +To help ingest different styles of CSV files, Cortex supports the parameters listed below. All of these parameters are optional. A description and default values for each parameter can be found in the [PySpark CSV Documentation](https://spark.apache.org/docs/2.4.2/api/python/pyspark.sql.html#pyspark.sql.DataFrameReader.csv). ```yaml csv_config: diff --git a/images/spark-base/Dockerfile b/images/spark-base/Dockerfile index dca5784f8f..9ff1cd3bf3 100644 --- a/images/spark-base/Dockerfile +++ b/images/spark-base/Dockerfile @@ -11,8 +11,12 @@ RUN apt-get update -qq && apt-get install -y -q \ RUN mkdir -p /opt ARG HADOOP_VERSION="2.9.2" -ARG SPARK_VERSION="2.4.1" +ARG SPARK_VERSION="2.4.2" ARG TF_VERSION="1.12.0" +# Required for building tensorflow spark connector +ARG SCALA_VERSION="2.12" +# Scalatest version from https://github.com/apache/spark/blob/v2.4.2/pom.xml +ARG SCALATEST_VERSION="3.0.3" # Check aws-java-sdk-bundle dependency version: https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/$HADOOP_VERSION ARG AWS_JAVA_SDK_VERSION="1.11.199" @@ -34,7 +38,8 @@ RUN rm -rf ~/tf-ecosystem && git clone https://github.com/tensorflow/ecosystem.g mvn -f ~/tf-ecosystem/hadoop/pom.xml versions:set -DnewVersion=${TF_VERSION} -q && \ mvn -f ~/tf-ecosystem/hadoop/pom.xml -Dmaven.test.skip=true clean install -q && \ mvn -f ~/tf-ecosystem/spark/spark-tensorflow-connector/pom.xml versions:set -DnewVersion=${TF_VERSION} -q && \ - mvn -f ~/tf-ecosystem/spark/spark-tensorflow-connector/pom.xml -Dmaven.test.skip=true clean install -Dspark.version=${SPARK_VERSION} -q && \ + mvn -f ~/tf-ecosystem/spark/spark-tensorflow-connector/pom.xml -Dmaven.test.skip=true clean install \ + -Dspark.version=${SPARK_VERSION} -Dscala.binary.version=${SCALA_VERSION} -Dscala.test.version=${SCALATEST_VERSION} -q && \ mv ~/tf-ecosystem/spark/spark-tensorflow-connector/target/spark-tensorflow-connector_2.11-${TF_VERSION}.jar $SPARK_HOME/jars/ # Hadoop AWS diff --git a/pkg/workloads/lib/package.py b/pkg/workloads/lib/package.py index b7e228706f..c5eec7fca0 100644 --- a/pkg/workloads/lib/package.py +++ b/pkg/workloads/lib/package.py @@ -39,7 +39,7 @@ def get_build_order(python_packages): def get_restricted_packages(): - req_list = ["pyspark==2.4.1", "tensorflow==1.12.0"] + req_list = ["pyspark==2.4.2", "tensorflow==1.12.0"] req_files = glob.glob("/src/**/requirements.txt", recursive=True) for req_file in req_files: