Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions data/test13_expected/test13b_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,16 @@
"name" : "Seg_Id0",
"type" : "string",
"nullable" : true,
"metadata" : { }
"metadata" : {
"maxLength" : 51
}
}, {
"name" : "Seg_Id1",
"type" : "string",
"nullable" : true,
"metadata" : { }
"metadata" : {
"maxLength" : 51
}
}, {
"name" : "SEGMENT_ID",
"type" : "string",
Expand Down
8 changes: 6 additions & 2 deletions data/test14_expected/test14_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,16 @@
"name" : "Seg_Id0",
"type" : "string",
"nullable" : true,
"metadata" : { }
"metadata" : {
"maxLength" : 51
}
}, {
"name" : "Seg_Id1",
"type" : "string",
"nullable" : true,
"metadata" : { }
"metadata" : {
"maxLength" : 51
}
}, {
"name" : "SEGMENT_ID",
"type" : "string",
Expand Down
12 changes: 9 additions & 3 deletions data/test17_expected/test17b_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,23 @@
"name" : "Seg_Id0",
"type" : "string",
"nullable" : true,
"metadata" : { }
"metadata" : {
"maxLength" : 51
}
}, {
"name" : "Seg_Id1",
"type" : "string",
"nullable" : true,
"metadata" : { }
"metadata" : {
"maxLength" : 51
}
}, {
"name" : "Seg_Id2",
"type" : "string",
"nullable" : true,
"metadata" : { }
"metadata" : {
"maxLength" : 51
}
}, {
"name" : "SEGMENT_ID",
"type" : "integer",
Expand Down
2 changes: 1 addition & 1 deletion data/test4_expected/test4_schema.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"type":"struct","fields":[{"name":"File_Id","type":"integer","nullable":false,"metadata":{}},{"name":"Record_Id","type":"long","nullable":false,"metadata":{}},{"name":"Record_Byte_Length","type":"integer","nullable":false,"metadata":{}},{"name":"Seg_Id0","type":"string","nullable":true,"metadata":{}},{"name":"Seg_Id1","type":"string","nullable":true,"metadata":{}},{"name":"SEGMENT_ID","type":"string","nullable":true,"metadata":{"maxLength":5}},{"name":"COMPANY_ID","type":"string","nullable":true,"metadata":{"maxLength":10}},{"name":"STATIC_DETAILS","type":{"type":"struct","fields":[{"name":"COMPANY_NAME","type":"string","nullable":true,"metadata":{"maxLength":15}},{"name":"ADDRESS","type":"string","nullable":true,"metadata":{"maxLength":25}},{"name":"TAXPAYER","type":{"type":"struct","fields":[{"name":"TAXPAYER_TYPE","type":"string","nullable":true,"metadata":{"maxLength":1}},{"name":"TAXPAYER_STR","type":"string","nullable":true,"metadata":{"maxLength":8}},{"name":"TAXPAYER_NUM","type":"integer","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"CONTACTS","type":{"type":"struct","fields":[{"name":"PHONE_NUMBER","type":"string","nullable":true,"metadata":{"maxLength":17}},{"name":"CONTACT_PERSON","type":"string","nullable":true,"metadata":{"maxLength":28}}]},"nullable":true,"metadata":{}}]}
{"type":"struct","fields":[{"name":"File_Id","type":"integer","nullable":false,"metadata":{}},{"name":"Record_Id","type":"long","nullable":false,"metadata":{}},{"name":"Record_Byte_Length","type":"integer","nullable":false,"metadata":{}},{"name":"Seg_Id0","type":"string","nullable":true,"metadata":{"maxLength":51}},{"name":"Seg_Id1","type":"string","nullable":true,"metadata":{"maxLength":51}},{"name":"SEGMENT_ID","type":"string","nullable":true,"metadata":{"maxLength":5}},{"name":"COMPANY_ID","type":"string","nullable":true,"metadata":{"maxLength":10}},{"name":"STATIC_DETAILS","type":{"type":"struct","fields":[{"name":"COMPANY_NAME","type":"string","nullable":true,"metadata":{"maxLength":15}},{"name":"ADDRESS","type":"string","nullable":true,"metadata":{"maxLength":25}},{"name":"TAXPAYER","type":{"type":"struct","fields":[{"name":"TAXPAYER_TYPE","type":"string","nullable":true,"metadata":{"maxLength":1}},{"name":"TAXPAYER_STR","type":"string","nullable":true,"metadata":{"maxLength":8}},{"name":"TAXPAYER_NUM","type":"integer","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"CONTACTS","type":{"type":"struct","fields":[{"name":"PHONE_NUMBER","type":"string","nullable":true,"metadata":{"maxLength":17}},{"name":"CONTACT_PERSON","type":"string","nullable":true,"metadata":{"maxLength":28}}]},"nullable":true,"metadata":{}}]}
2 changes: 1 addition & 1 deletion data/test5_expected/test5_schema.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"type":"struct","fields":[{"name":"File_Id","type":"integer","nullable":false,"metadata":{}},{"name":"Record_Id","type":"long","nullable":false,"metadata":{}},{"name":"Record_Byte_Length","type":"integer","nullable":false,"metadata":{}},{"name":"Seg_Id0","type":"string","nullable":true,"metadata":{}},{"name":"Seg_Id1","type":"string","nullable":true,"metadata":{}},{"name":"SEGMENT_ID","type":"string","nullable":true,"metadata":{"maxLength":5}},{"name":"COMPANY_ID","type":"string","nullable":true,"metadata":{"maxLength":10}},{"name":"STATIC_DETAILS","type":{"type":"struct","fields":[{"name":"COMPANY_NAME","type":"string","nullable":true,"metadata":{"maxLength":15}},{"name":"ADDRESS","type":"string","nullable":true,"metadata":{"maxLength":25}},{"name":"TAXPAYER","type":{"type":"struct","fields":[{"name":"TAXPAYER_TYPE","type":"string","nullable":true,"metadata":{"maxLength":1}},{"name":"TAXPAYER_STR","type":"string","nullable":true,"metadata":{"maxLength":8}},{"name":"TAXPAYER_NUM","type":"integer","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"CONTACTS","type":{"type":"struct","fields":[{"name":"PHONE_NUMBER","type":"string","nullable":true,"metadata":{"maxLength":17}},{"name":"CONTACT_PERSON","type":"string","nullable":true,"metadata":{"maxLength":28}}]},"nullable":true,"metadata":{}}]}
{"type":"struct","fields":[{"name":"File_Id","type":"integer","nullable":false,"metadata":{}},{"name":"Record_Id","type":"long","nullable":false,"metadata":{}},{"name":"Record_Byte_Length","type":"integer","nullable":false,"metadata":{}},{"name":"Seg_Id0","type":"string","nullable":true,"metadata":{"maxLength":51}},{"name":"Seg_Id1","type":"string","nullable":true,"metadata":{"maxLength":51}},{"name":"SEGMENT_ID","type":"string","nullable":true,"metadata":{"maxLength":5}},{"name":"COMPANY_ID","type":"string","nullable":true,"metadata":{"maxLength":10}},{"name":"STATIC_DETAILS","type":{"type":"struct","fields":[{"name":"COMPANY_NAME","type":"string","nullable":true,"metadata":{"maxLength":15}},{"name":"ADDRESS","type":"string","nullable":true,"metadata":{"maxLength":25}},{"name":"TAXPAYER","type":{"type":"struct","fields":[{"name":"TAXPAYER_TYPE","type":"string","nullable":true,"metadata":{"maxLength":1}},{"name":"TAXPAYER_STR","type":"string","nullable":true,"metadata":{"maxLength":8}},{"name":"TAXPAYER_NUM","type":"integer","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"CONTACTS","type":{"type":"struct","fields":[{"name":"PHONE_NUMBER","type":"string","nullable":true,"metadata":{"maxLength":17}},{"name":"CONTACT_PERSON","type":"string","nullable":true,"metadata":{"maxLength":28}}]},"nullable":true,"metadata":{}}]}
2 changes: 1 addition & 1 deletion data/test5_expected/test5a_schema.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"type":"struct","fields":[{"name":"File_Id","type":"integer","nullable":false,"metadata":{}},{"name":"Record_Id","type":"long","nullable":false,"metadata":{}},{"name":"Record_Byte_Length","type":"integer","nullable":false,"metadata":{}},{"name":"Seg_Id0","type":"string","nullable":true,"metadata":{}},{"name":"SEGMENT_ID","type":"string","nullable":true,"metadata":{"maxLength":5}},{"name":"COMPANY_ID","type":"string","nullable":true,"metadata":{"maxLength":10}},{"name":"STATIC_DETAILS","type":{"type":"struct","fields":[{"name":"COMPANY_NAME","type":"string","nullable":true,"metadata":{"maxLength":15}},{"name":"ADDRESS","type":"string","nullable":true,"metadata":{"maxLength":25}},{"name":"TAXPAYER","type":{"type":"struct","fields":[{"name":"TAXPAYER_TYPE","type":"string","nullable":true,"metadata":{"maxLength":1}},{"name":"TAXPAYER_STR","type":"string","nullable":true,"metadata":{"maxLength":8}},{"name":"TAXPAYER_NUM","type":"integer","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"CONTACTS","type":{"type":"struct","fields":[{"name":"PHONE_NUMBER","type":"string","nullable":true,"metadata":{"maxLength":17}},{"name":"CONTACT_PERSON","type":"string","nullable":true,"metadata":{"maxLength":28}}]},"nullable":true,"metadata":{}}]}
{"type":"struct","fields":[{"name":"File_Id","type":"integer","nullable":false,"metadata":{}},{"name":"Record_Id","type":"long","nullable":false,"metadata":{}},{"name":"Record_Byte_Length","type":"integer","nullable":false,"metadata":{}},{"name":"Seg_Id0","type":"string","nullable":true,"metadata":{"maxLength":51}},{"name":"SEGMENT_ID","type":"string","nullable":true,"metadata":{"maxLength":5}},{"name":"COMPANY_ID","type":"string","nullable":true,"metadata":{"maxLength":10}},{"name":"STATIC_DETAILS","type":{"type":"struct","fields":[{"name":"COMPANY_NAME","type":"string","nullable":true,"metadata":{"maxLength":15}},{"name":"ADDRESS","type":"string","nullable":true,"metadata":{"maxLength":25}},{"name":"TAXPAYER","type":{"type":"struct","fields":[{"name":"TAXPAYER_TYPE","type":"string","nullable":true,"metadata":{"maxLength":1}},{"name":"TAXPAYER_STR","type":"string","nullable":true,"metadata":{"maxLength":8}},{"name":"TAXPAYER_NUM","type":"integer","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"CONTACTS","type":{"type":"struct","fields":[{"name":"PHONE_NUMBER","type":"string","nullable":true,"metadata":{"maxLength":17}},{"name":"CONTACT_PERSON","type":"string","nullable":true,"metadata":{"maxLength":28}}]},"nullable":true,"metadata":{}}]}
2 changes: 1 addition & 1 deletion data/test5_expected/test5b_schema.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"type":"struct","fields":[{"name":"File_Id","type":"integer","nullable":false,"metadata":{}},{"name":"Record_Id","type":"long","nullable":false,"metadata":{}},{"name":"Record_Byte_Length","type":"integer","nullable":false,"metadata":{}},{"name":"Seg_Id0","type":"string","nullable":true,"metadata":{}},{"name":"Seg_Id1","type":"string","nullable":true,"metadata":{}},{"name":"SEGMENT_ID","type":"string","nullable":true,"metadata":{"maxLength":5}},{"name":"COMPANY_ID","type":"string","nullable":true,"metadata":{"maxLength":10}},{"name":"STATIC_DETAILS","type":{"type":"struct","fields":[{"name":"COMPANY_NAME","type":"string","nullable":true,"metadata":{"maxLength":15}},{"name":"ADDRESS","type":"string","nullable":true,"metadata":{"maxLength":25}},{"name":"TAXPAYER","type":{"type":"struct","fields":[{"name":"TAXPAYER_TYPE","type":"string","nullable":true,"metadata":{"maxLength":1}},{"name":"TAXPAYER_STR","type":"string","nullable":true,"metadata":{"maxLength":8}},{"name":"TAXPAYER_NUM","type":"integer","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"CONTACTS","type":{"type":"struct","fields":[{"name":"PHONE_NUMBER","type":"string","nullable":true,"metadata":{"maxLength":17}},{"name":"CONTACT_PERSON","type":"string","nullable":true,"metadata":{"maxLength":28}}]},"nullable":true,"metadata":{}}]}
{"type":"struct","fields":[{"name":"File_Id","type":"integer","nullable":false,"metadata":{}},{"name":"Record_Id","type":"long","nullable":false,"metadata":{}},{"name":"Record_Byte_Length","type":"integer","nullable":false,"metadata":{}},{"name":"Seg_Id0","type":"string","nullable":true,"metadata":{"maxLength":51}},{"name":"Seg_Id1","type":"string","nullable":true,"metadata":{"maxLength":51}},{"name":"SEGMENT_ID","type":"string","nullable":true,"metadata":{"maxLength":5}},{"name":"COMPANY_ID","type":"string","nullable":true,"metadata":{"maxLength":10}},{"name":"STATIC_DETAILS","type":{"type":"struct","fields":[{"name":"COMPANY_NAME","type":"string","nullable":true,"metadata":{"maxLength":15}},{"name":"ADDRESS","type":"string","nullable":true,"metadata":{"maxLength":25}},{"name":"TAXPAYER","type":{"type":"struct","fields":[{"name":"TAXPAYER_TYPE","type":"string","nullable":true,"metadata":{"maxLength":1}},{"name":"TAXPAYER_STR","type":"string","nullable":true,"metadata":{"maxLength":8}},{"name":"TAXPAYER_NUM","type":"integer","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"CONTACTS","type":{"type":"struct","fields":[{"name":"PHONE_NUMBER","type":"string","nullable":true,"metadata":{"maxLength":17}},{"name":"CONTACT_PERSON","type":"string","nullable":true,"metadata":{"maxLength":28}}]},"nullable":true,"metadata":{}}]}
2 changes: 1 addition & 1 deletion data/test5_expected/test5c_schema.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"type":"struct","fields":[{"name":"File_Id","type":"integer","nullable":false,"metadata":{}},{"name":"Record_Id","type":"long","nullable":false,"metadata":{}},{"name":"Record_Byte_Length","type":"integer","nullable":false,"metadata":{}},{"name":"Seg_Id0","type":"string","nullable":true,"metadata":{}},{"name":"SEGMENT_ID","type":"string","nullable":true,"metadata":{"maxLength":5}},{"name":"COMPANY_ID","type":"string","nullable":true,"metadata":{"maxLength":10}},{"name":"STATIC_DETAILS","type":{"type":"struct","fields":[{"name":"COMPANY_NAME","type":"string","nullable":true,"metadata":{"maxLength":15}},{"name":"ADDRESS","type":"string","nullable":true,"metadata":{"maxLength":25}},{"name":"TAXPAYER","type":{"type":"struct","fields":[{"name":"TAXPAYER_TYPE","type":"string","nullable":true,"metadata":{"maxLength":1}},{"name":"TAXPAYER_STR","type":"string","nullable":true,"metadata":{"maxLength":8}},{"name":"TAXPAYER_NUM","type":"integer","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"CONTACTS","type":{"type":"struct","fields":[{"name":"PHONE_NUMBER","type":"string","nullable":true,"metadata":{"maxLength":17}},{"name":"CONTACT_PERSON","type":"string","nullable":true,"metadata":{"maxLength":28}}]},"nullable":true,"metadata":{}}]}
{"type":"struct","fields":[{"name":"File_Id","type":"integer","nullable":false,"metadata":{}},{"name":"Record_Id","type":"long","nullable":false,"metadata":{}},{"name":"Record_Byte_Length","type":"integer","nullable":false,"metadata":{}},{"name":"Seg_Id0","type":"string","nullable":true,"metadata":{"maxLength":51}},{"name":"SEGMENT_ID","type":"string","nullable":true,"metadata":{"maxLength":5}},{"name":"COMPANY_ID","type":"string","nullable":true,"metadata":{"maxLength":10}},{"name":"STATIC_DETAILS","type":{"type":"struct","fields":[{"name":"COMPANY_NAME","type":"string","nullable":true,"metadata":{"maxLength":15}},{"name":"ADDRESS","type":"string","nullable":true,"metadata":{"maxLength":25}},{"name":"TAXPAYER","type":{"type":"struct","fields":[{"name":"TAXPAYER_TYPE","type":"string","nullable":true,"metadata":{"maxLength":1}},{"name":"TAXPAYER_STR","type":"string","nullable":true,"metadata":{"maxLength":8}},{"name":"TAXPAYER_NUM","type":"integer","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"CONTACTS","type":{"type":"struct","fields":[{"name":"PHONE_NUMBER","type":"string","nullable":true,"metadata":{"maxLength":17}},{"name":"CONTACT_PERSON","type":"string","nullable":true,"metadata":{"maxLength":28}}]},"nullable":true,"metadata":{}}]}
8 changes: 6 additions & 2 deletions data/test5_expected/test5d_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,16 @@
"name" : "Seg_Id0",
"type" : "string",
"nullable" : true,
"metadata" : { }
"metadata" : {
"maxLength" : 51
}
}, {
"name" : "Seg_Id1",
"type" : "string",
"nullable" : true,
"metadata" : { }
"metadata" : {
"maxLength" : 51
}
}, {
"name" : "RECORD_LENGTH",
"type" : "integer",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/*
* Copyright 2018 ABSA Group Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package za.co.absa.cobrix.spark.cobol.parameters

object MetadataFields {
// Metadata for 'string'
val MAX_LENGTH = "maxLength"

// Metadata for 'array'
val MIN_ELEMENTS = "minElements"
val MAX_ELEMENTS = "maxElements"
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import za.co.absa.cobrix.cobol.reader.policies.SchemaRetentionPolicy
import za.co.absa.cobrix.cobol.reader.policies.SchemaRetentionPolicy.SchemaRetentionPolicy
import za.co.absa.cobrix.cobol.reader.schema.{CobolSchema => CobolReaderSchema}
import za.co.absa.cobrix.spark.cobol.parameters.CobolParametersParser.getReaderProperties
import za.co.absa.cobrix.spark.cobol.parameters.MetadataFields.{MAX_ELEMENTS, MAX_LENGTH, MIN_ELEMENTS}
import za.co.absa.cobrix.spark.cobol.parameters.{CobolParametersParser, Parameters}

import scala.collection.mutable
Expand Down Expand Up @@ -101,7 +102,14 @@ class CobolSchema(copybook: Copybook,

val recordsWithSegmentFields = if (generateSegIdFieldsCnt > 0) {
val newFields = for (level <- Range(0, generateSegIdFieldsCnt))
yield StructField(s"${Constants.segmentIdField}$level", StringType, nullable = true)
yield {
val maxPrefixLength = getMaximumSegmentIdLength(segmentIdProvidedPrefix)
val segFieldMetadata = new MetadataBuilder()
segFieldMetadata.putLong(MAX_LENGTH, maxPrefixLength.toLong)

StructField(s"${Constants.segmentIdField}$level", StringType, nullable = true, metadata = segFieldMetadata.build())
}

newFields.toArray ++ expandRecords
} else {
expandRecords
Expand Down Expand Up @@ -130,6 +138,15 @@ class CobolSchema(copybook: Copybook,
StructType(recordsWithRecordId)
}

private [cobrix] def getMaximumSegmentIdLength(segmentIdProvidedPrefix: String): Int = {
val DATETIME_PREFIX_LENGTH = 15
val SEGMENT_ID_MAX_GENERATED_LENGTH = 50

val prefixLength = if (segmentIdProvidedPrefix.isEmpty) DATETIME_PREFIX_LENGTH else segmentIdProvidedPrefix.length

prefixLength + SEGMENT_ID_MAX_GENERATED_LENGTH
}

@throws(classOf[IllegalStateException])
private def parseGroup(group: Group, segmentRedefines: List[Group]): StructField = {
val fields = group.children.flatMap(field => {
Expand Down Expand Up @@ -210,12 +227,12 @@ class CobolSchema(copybook: Copybook,
}

private def addArrayMetadata(metadataBuilder: MetadataBuilder, st: Statement): MetadataBuilder = {
metadataBuilder.putLong("minElements", st.arrayMinSize)
metadataBuilder.putLong("maxElements", st.arrayMaxSize)
metadataBuilder.putLong(MIN_ELEMENTS, st.arrayMinSize)
metadataBuilder.putLong(MAX_ELEMENTS, st.arrayMaxSize)
}

private def addAlphaNumericMetadata(metadataBuilder: MetadataBuilder, a: AlphaNumeric): MetadataBuilder = {
metadataBuilder.putLong("maxLength", a.length)
metadataBuilder.putLong(MAX_LENGTH, a.length)
}

private def addExtendedMetadata(metadataBuilder: MetadataBuilder, s: Statement): MetadataBuilder = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import za.co.absa.cobrix.spark.cobol.utils.impl.HofsWrapper.transform
import org.apache.spark.sql.types._
import org.apache.spark.sql.{Column, DataFrame, SparkSession}
import za.co.absa.cobrix.cobol.internal.Logging
import za.co.absa.cobrix.spark.cobol.parameters.MetadataFields.MAX_ELEMENTS

import scala.annotation.tailrec
import scala.collection.mutable
Expand Down Expand Up @@ -129,8 +130,8 @@ object SparkUtils extends Logging {

def getMaxArraySize(path: String): Int = {
getField(path, df.schema) match {
case Some(field) if field.metadata.contains("maxElements") =>
field.metadata.getLong("maxElements").toInt
case Some(field) if field.metadata.contains(MAX_ELEMENTS) =>
field.metadata.getLong(MAX_ELEMENTS).toInt
case _ =>
val collected = df.agg(max(expr(s"size($path)"))).collect()(0)(0)
if (collected != null) {
Expand Down
Loading
Loading