Skip to content

Commit a7794ca

Browse files
authored
Arrow: Support Arrow large-string (#382)
1 parent 853a77c commit a7794ca

File tree

2 files changed

+10
-1
lines changed

2 files changed

+10
-1
lines changed

pyiceberg/io/pyarrow.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -864,7 +864,7 @@ def primitive(self, primitive: pa.DataType) -> PrimitiveType:
864864
elif isinstance(primitive, pa.Decimal128Type):
865865
primitive = cast(pa.Decimal128Type, primitive)
866866
return DecimalType(primitive.precision, primitive.scale)
867-
elif pa.types.is_string(primitive):
867+
elif pa.types.is_string(primitive) or pa.types.is_large_string(primitive):
868868
return StringType()
869869
elif pa.types.is_date32(primitive):
870870
return DateType()

tests/io/test_pyarrow_visitor.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,15 @@ def test_round_schema_conversion_nested(table_schema_nested: Schema) -> None:
272272
assert actual == expected
273273

274274

275+
def test_round_schema_large_string() -> None:
276+
schema = pa.schema([pa.field("animals", pa.large_string())])
277+
actual = str(pyarrow_to_schema(schema, name_mapping=NameMapping([MappedField(field_id=1, names=["animals"])])))
278+
expected = """table {
279+
1: animals: optional string
280+
}"""
281+
assert actual == expected
282+
283+
275284
def test_simple_schema_has_missing_ids() -> None:
276285
schema = pa.schema([
277286
pa.field('foo', pa.string(), nullable=False),

0 commit comments

Comments
 (0)