diff --git a/avro/src/main/java/com/fasterxml/jackson/dataformat/avro/schema/RecordVisitor.java b/avro/src/main/java/com/fasterxml/jackson/dataformat/avro/schema/RecordVisitor.java index 30a00dea2..82da9b0b3 100644 --- a/avro/src/main/java/com/fasterxml/jackson/dataformat/avro/schema/RecordVisitor.java +++ b/avro/src/main/java/com/fasterxml/jackson/dataformat/avro/schema/RecordVisitor.java @@ -31,6 +31,33 @@ public class RecordVisitor */ protected final boolean _overridden; + /** + * When Avro schema for this JavaType ({@code _type}) results in UNION of multiple Avro types, _typeSchema keeps track + * which Avro type in the UNION represents this JavaType ({@code _type}) so that fields of this JavaType can be set to the right Avro type by {@code builtAvroSchema()}. + * + * Example: + *
+     *   @JsonSubTypes({
+     *     @JsonSubTypes.Type(value = Apple.class),
+     *     @JsonSubTypes.Type(value = Pear.class) })
+     *   class Fruit {}
+     *
+     *   class Apple extends Fruit {}
+     *   class Orange extends Fruit {}
+     * 
+ * When _type = Fruit.class + * Then + * _avroSchema if Fruit.class is union of Fruit record, Apple record and Orange record schemas: [ + * { name: Fruit, type: record, fields: [..] }, <--- _typeSchema points here + * { name: Apple, type: record, fields: [..] }, + * { name: Orange, type: record, fields: [..]} + * ] + * _typeSchema points to Fruit.class without subtypes record schema + * + * FIXME: When _typeSchema is not null, then _overridden must be true, therefore (_overridden == true) can be replaced with (_typeSchema != null), + * but it might be considered API change cause _overridden has protected access modifier. + */ + private Schema _typeSchema; protected Schema _avroSchema; protected List _fields = new ArrayList<>(); @@ -42,32 +69,59 @@ public RecordVisitor(SerializerProvider p, JavaType type, VisitorFormatWrapperIm _visitorWrapper = visitorWrapper; // Check if the schema for this record is overridden BeanDescription bean = getProvider().getConfig().introspectDirectClassAnnotations(_type); - List subTypes = getProvider().getAnnotationIntrospector().findSubtypes(bean.getClassInfo()); AvroSchema ann = bean.getClassInfo().getAnnotation(AvroSchema.class); if (ann != null) { _avroSchema = AvroSchemaHelper.parseJsonSchema(ann.value()); _overridden = true; - } else if (subTypes != null && !subTypes.isEmpty()) { - List unionSchemas = new ArrayList<>(); - try { - for (NamedType subType : subTypes) { - JsonSerializer ser = getProvider().findValueSerializer(subType.getType()); - VisitorFormatWrapperImpl visitor = _visitorWrapper.createChildWrapper(); - ser.acceptJsonFormatVisitor(visitor, getProvider().getTypeFactory().constructType(subType.getType())); - unionSchemas.add(visitor.getAvroSchema()); - } - _avroSchema = Schema.createUnion(unionSchemas); - _overridden = true; - } catch (JsonMappingException jme) { - throw new RuntimeException("Failed to build schema", jme); - } } else { - _avroSchema = AvroSchemaHelper.initializeRecordSchema(bean); + // If Avro schema for this _type results in UNION I want to know Avro type where to assign fields + _typeSchema = AvroSchemaHelper.initializeRecordSchema(bean); + _avroSchema = _typeSchema; _overridden = false; AvroMeta meta = bean.getClassInfo().getAnnotation(AvroMeta.class); if (meta != null) { _avroSchema.addProp(meta.key(), meta.value()); } + + List subTypes = getProvider().getAnnotationIntrospector().findSubtypes(bean.getClassInfo()); + if (subTypes != null && !subTypes.isEmpty()) { + // alreadySeenClasses prevents subType processing in endless loop + Set> alreadySeenClasses = new HashSet<>(); + alreadySeenClasses.add(_type.getRawClass()); + + // At this point calculating hashCode for _typeSchema fails with NPE because RecordSchema.fields is NULL + // see org.apache.avro.Schema.RecordSchema#computeHash. + // Therefore, unionSchemas must not be HashSet (or any other type using hashCode() for equality check). + // Set ensures that each subType schema is once in resulting union. + // IdentityHashMap is used because it is using reference-equality. + Set unionSchemas = Collections.newSetFromMap(new IdentityHashMap<>()); + // Initialize with this schema + if (_type.isConcrete()) { + unionSchemas.add(_typeSchema); + } + + try { + for (NamedType subType : subTypes) { + if (!alreadySeenClasses.add(subType.getType())) { + continue; + } + JsonSerializer ser = getProvider().findValueSerializer(subType.getType()); + VisitorFormatWrapperImpl visitor = _visitorWrapper.createChildWrapper(); + ser.acceptJsonFormatVisitor(visitor, getProvider().getTypeFactory().constructType(subType.getType())); + // Add subType schema into this union, unless it is already there. + Schema subTypeSchema = visitor.getAvroSchema(); + // When subType schema is union itself, include each its type into this union if not there already + if (subTypeSchema.getType() == Type.UNION) { + unionSchemas.addAll(subTypeSchema.getTypes()); + } else { + unionSchemas.add(subTypeSchema); + } + } + _avroSchema = Schema.createUnion(new ArrayList<>(unionSchemas)); + } catch (JsonMappingException jme) { + throw new RuntimeException("Failed to build schema", jme); + } + } } _visitorWrapper.getSchemas().addSchema(type, _avroSchema); } @@ -76,7 +130,7 @@ public RecordVisitor(SerializerProvider p, JavaType type, VisitorFormatWrapperIm public Schema builtAvroSchema() { if (!_overridden) { // Assumption now is that we are done, so let's assign fields - _avroSchema.setFields(_fields); + _typeSchema.setFields(_fields); } return _avroSchema; } diff --git a/avro/src/test/java/com/fasterxml/jackson/dataformat/avro/schema/PolymorphicTypeAnnotationsTest.java b/avro/src/test/java/com/fasterxml/jackson/dataformat/avro/schema/PolymorphicTypeAnnotationsTest.java new file mode 100644 index 000000000..3360d640d --- /dev/null +++ b/avro/src/test/java/com/fasterxml/jackson/dataformat/avro/schema/PolymorphicTypeAnnotationsTest.java @@ -0,0 +1,296 @@ +package com.fasterxml.jackson.dataformat.avro.schema; + +import com.fasterxml.jackson.annotation.JsonSubTypes; +import com.fasterxml.jackson.databind.JsonMappingException; +import com.fasterxml.jackson.dataformat.avro.AvroMapper; +import com.fasterxml.jackson.dataformat.avro.annotation.AvroNamespace; +import org.apache.avro.Schema; +import org.apache.avro.reflect.Union; +import org.junit.jupiter.api.Test; + +import java.io.IOException; + +import static org.assertj.core.api.Assertions.assertThat; + +public class PolymorphicTypeAnnotationsTest { + + private static final AvroMapper MAPPER = AvroMapper.builder().build(); + // it is easier maintain string schema representation when namespace is constant, rather than being inferred from this class package name + private static final String TEST_NAMESPACE = "test"; + + @JsonSubTypes({ + @JsonSubTypes.Type(value = Cat.class), + @JsonSubTypes.Type(value = Dog.class), + }) + private interface AnimalInterface { + } + + private static abstract class AbstractMammal implements AnimalInterface { + public int legs; + } + + private static class Cat extends AbstractMammal { + public String color; + } + + private static class Dog extends AbstractMammal { + public int size; + } + + @Test + public void subclasses_of_interface_test() throws JsonMappingException { + // GIVEN + final Schema catSchema = MAPPER.schemaFor(Cat.class).getAvroSchema(); + final Schema dogSchema = MAPPER.schemaFor(Dog.class).getAvroSchema(); + + // WHEN + Schema actualSchema = MAPPER.schemaFor(AnimalInterface.class).getAvroSchema(); + + System.out.println("Animal schema:\n" + actualSchema.toString(true)); + + // THEN + assertThat(actualSchema.getType()).isEqualTo(Schema.Type.UNION); + // Because AnimalInterface is interface and AbstractMammal is abstract, they are not expected to be among types in union + assertThat(actualSchema.getTypes()).containsExactlyInAnyOrder(catSchema, dogSchema); + } + + @JsonSubTypes({ + @JsonSubTypes.Type(value = Apple.class), + @JsonSubTypes.Type(value = Pear.class), + }) + @AvroNamespace(TEST_NAMESPACE) // @AvroNamespace makes it easier to create schema string representation + private static class Fruit { + public boolean eatable; + } + + private static final String FRUIT_ITSELF_SCHEMA_STR = "{\"type\":\"record\",\"name\":\"Fruit\",\"namespace\":\"test\",\"fields\":[{\"name\":\"eatable\",\"type\":\"boolean\"}]}"; + + private static class Apple extends Fruit { + public String color; + } + + private static class Pear extends Fruit { + public int seeds; + } + + @Test + public void jsonSubTypes_on_concrete_class_test() throws IOException { + // GIVEN + final Schema fruitItselfSchema = MAPPER.schemaFrom(FRUIT_ITSELF_SCHEMA_STR).getAvroSchema(); + final Schema appleSchema = MAPPER.schemaFor(Apple.class).getAvroSchema(); + final Schema pearSchema = MAPPER.schemaFor(Pear.class).getAvroSchema(); + + // WHEN + Schema actualSchema = MAPPER.schemaFor(Fruit.class).getAvroSchema(); + + System.out.println("Fruit schema:\n" + actualSchema.toString(true)); + + // THEN + assertThat(actualSchema.getType()).isEqualTo(Schema.Type.UNION); + assertThat(actualSchema.getTypes()).containsExactlyInAnyOrder(fruitItselfSchema, appleSchema, pearSchema); + } + + @JsonSubTypes({ + @JsonSubTypes.Type(value = LandVehicle.class), + @JsonSubTypes.Type(value = AbstractWaterVehicle.class), + }) + @AvroNamespace(TEST_NAMESPACE) + private static class Vehicle { + } + + private static final String VEHICLE_ITSELF_SCHEMA_STR = "{\"type\":\"record\",\"name\":\"Vehicle\",\"namespace\":\"test\",\"fields\":[]}"; + + @JsonSubTypes({ + @JsonSubTypes.Type(value = Car.class), + @JsonSubTypes.Type(value = MotorCycle.class), + }) + @AvroNamespace(TEST_NAMESPACE) + private static class LandVehicle extends Vehicle { + } + + private static final String LAND_VEHICLE_ITSELF_SCHEMA_STR = "{\"type\":\"record\",\"name\":\"LandVehicle\",\"namespace\":\"test\",\"fields\":[]}"; + + private static class Car extends LandVehicle { + } + + private static class MotorCycle extends LandVehicle { + } + + @JsonSubTypes({ + @JsonSubTypes.Type(value = Boat.class), + @JsonSubTypes.Type(value = Submarine.class), + }) + private static abstract class AbstractWaterVehicle extends Vehicle { + public int propellers; + } + + private static class Boat extends AbstractWaterVehicle { + } + + private static class Submarine extends AbstractWaterVehicle { + } + + @Test + public void jsonSubTypes_of_jsonSubTypes_test() throws IOException { + // GIVEN + final Schema vehicleItselfSchema = MAPPER.schemaFrom(VEHICLE_ITSELF_SCHEMA_STR).getAvroSchema(); + final Schema landVehicleItselfSchema = MAPPER.schemaFrom(LAND_VEHICLE_ITSELF_SCHEMA_STR).getAvroSchema(); + final Schema carSchema = MAPPER.schemaFor(Car.class).getAvroSchema(); + final Schema motorCycleSchema = MAPPER.schemaFor(MotorCycle.class).getAvroSchema(); + final Schema boatSchema = MAPPER.schemaFor(Boat.class).getAvroSchema(); + final Schema submarineSchema = MAPPER.schemaFor(Submarine.class).getAvroSchema(); + + // WHEN + Schema actualSchema = MAPPER.schemaFor(Vehicle.class).getAvroSchema(); + + System.out.println("Vehicle schema:\n" + actualSchema.toString(true)); + + // THEN + assertThat(actualSchema.getType()).isEqualTo(Schema.Type.UNION); + assertThat(actualSchema.getTypes()).containsExactlyInAnyOrder( + vehicleItselfSchema, + landVehicleItselfSchema, carSchema, motorCycleSchema, + // AbstractWaterVehicle is not here, because it is abstract + boatSchema, submarineSchema); + } + + // Helium is twice in subtypes hierarchy, once as ElementInterface subtype and second time as subtype + // of AbstractGas subtype. This situation may result in + // "Failed to generate `AvroSchema` for ...., problem: (AvroRuntimeException) Duplicate in union:com.fasterxml...PolymorphicTypeAnnotationsTest.Helium" + // error. + @JsonSubTypes({ + @JsonSubTypes.Type(value = AbstractGas.class), + @JsonSubTypes.Type(value = Helium.class), + }) + private interface ElementInterface { + } + + @JsonSubTypes({ + @JsonSubTypes.Type(value = Helium.class), + @JsonSubTypes.Type(value = Oxygen.class), + }) + static abstract class AbstractGas implements ElementInterface { + public int atomicMass; + } + + private static class Helium extends AbstractGas { + } + + private static class Oxygen extends AbstractGas { + } + + @Test + public void class_is_referenced_twice_in_hierarchy_test() throws JsonMappingException { + // GIVEN + final Schema heliumSchema = MAPPER.schemaFor(Helium.class).getAvroSchema(); + final Schema oxygenSchema = MAPPER.schemaFor(Oxygen.class).getAvroSchema(); + + // WHEN + Schema actualSchema = MAPPER.schemaFor(ElementInterface.class).getAvroSchema(); + + System.out.println("ElementInterface schema:\n" + actualSchema.toString(true)); + + // THEN + assertThat(actualSchema.getType()).isEqualTo(Schema.Type.UNION); + // ElementInterface and AbstractGas are not concrete classes they are not expected to be among types in union + assertThat(actualSchema.getTypes()).containsExactlyInAnyOrder(heliumSchema, oxygenSchema); + } + + @JsonSubTypes({ + // Base class being explicitly in @JsonSubTypes led to StackOverflowError exception. + @JsonSubTypes.Type(value = Image.class), + @JsonSubTypes.Type(value = Jpeg.class), + @JsonSubTypes.Type(value = Png.class), + }) + @AvroNamespace(TEST_NAMESPACE) // @AvroNamespace makes it easier to create schema string representation + private static class Image { + } + + private static final String IMAGE_ITSELF_SCHEMA_STR = "{\"type\":\"record\",\"name\":\"Image\",\"namespace\":\"test\",\"fields\":[]}"; + + private static class Jpeg extends Image { + } + + private static class Png extends Image { + } + + @Test + public void base_class_explicitly_in_JsonSubTypes_annotation_test() throws IOException { + // GIVEN + final Schema imageItselfSchema = MAPPER.schemaFrom(IMAGE_ITSELF_SCHEMA_STR).getAvroSchema(); + final Schema jpegSchema = MAPPER.schemaFor(Jpeg.class).getAvroSchema(); + final Schema pngSchema = MAPPER.schemaFor(Png.class).getAvroSchema(); + + // WHEN + Schema actualSchema = MAPPER.schemaFor(Image.class).getAvroSchema(); + + System.out.println("Image schema:\n" + actualSchema.toString(true)); + + // THEN + assertThat(actualSchema.getType()).isEqualTo(Schema.Type.UNION); + assertThat(actualSchema.getTypes()).containsExactlyInAnyOrder(imageItselfSchema, jpegSchema, pngSchema); + } + + @Union({ + // Base class being explicitly in @Union led to StackOverflowError exception. + Sport.class, + Football.class, Basketball.class}) + @AvroNamespace(TEST_NAMESPACE) // @AvroNamespace makes it easier to create schema string representation + private static class Sport { + } + + private static final String SPORT_ITSELF_SCHEMA_STR = "{\"type\":\"record\",\"name\":\"Sport\",\"namespace\":\"test\",\"fields\":[]}"; + + private static class Football extends Sport { + } + + private static class Basketball extends Sport { + } + + @Test + public void base_class_explicitly_in_Union_annotation_test() throws IOException { + // GIVEN + final Schema sportItselfSchema = MAPPER.schemaFrom(SPORT_ITSELF_SCHEMA_STR).getAvroSchema(); + final Schema footballSchema = MAPPER.schemaFor(Football.class).getAvroSchema(); + final Schema basketballSchema = MAPPER.schemaFor(Basketball.class).getAvroSchema(); + + // WHEN + Schema actualSchema = MAPPER.schemaFor(Sport.class).getAvroSchema(); + + System.out.println("Sport schema:\n" + actualSchema.toString(true)); + + // THEN + assertThat(actualSchema.getType()).isEqualTo(Schema.Type.UNION); + assertThat(actualSchema.getTypes()).containsExactlyInAnyOrder(sportItselfSchema, footballSchema, basketballSchema); + } + + @Union({ + // Interface being explicitly in @Union led to StackOverflowError exception. + DocumentInterface.class, + Word.class, Excel.class}) + private interface DocumentInterface { + } + + private static class Word implements DocumentInterface { + } + + private static class Excel implements DocumentInterface { + } + + @Test + public void interface_explicitly_in_Union_annotation_test() throws IOException { + // GIVEN + final Schema wordSchema = MAPPER.schemaFor(Word.class).getAvroSchema(); + final Schema excelSchema = MAPPER.schemaFor(Excel.class).getAvroSchema(); + + // WHEN + Schema actualSchema = MAPPER.schemaFor(DocumentInterface.class).getAvroSchema(); + + System.out.println("Document schema:\n" + actualSchema.toString(true)); + + // THEN + assertThat(actualSchema.getType()).isEqualTo(Schema.Type.UNION); + assertThat(actualSchema.getTypes()).containsExactlyInAnyOrder(wordSchema, excelSchema); + } + +}