diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlInferSchema.scala index be5a29d299a3..b9342c53d020 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlInferSchema.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlInferSchema.scala @@ -195,7 +195,9 @@ class XmlInferSchema(options: XmlOptions, caseSensitive: Boolean) private def inferField(parser: XMLEventReader): DataType = { parser.peek match { - case _: EndElement => NullType + case _: EndElement => + parser.nextEvent() + NullType case _: StartElement => inferObject(parser) case _: Characters => val structType = inferObject(parser).asInstanceOf[StructType] @@ -450,7 +452,7 @@ class XmlInferSchema(options: XmlOptions, caseSensitive: Boolean) oldTypeOpt match { // If the field name already exists, // merge the type and infer the combined field as an array type if necessary - case Some(oldType) if !oldType.isInstanceOf[ArrayType] && !newType.isInstanceOf[NullType] => + case Some(oldType) if !oldType.isInstanceOf[ArrayType] => ArrayType(compatibleType(caseSensitive, options.valueTag)(oldType, newType)) case Some(oldType) => compatibleType(caseSensitive, options.valueTag)(oldType, newType)