Home > Back-end >  How to convert JSON Schema from Camel case to lower case
How to convert JSON Schema from Camel case to lower case

Time:12-27

I have a JSON Schema with keys in camel case and I am trying to convert all data type to lower case. I am facing issue with the ArrayType.

 import org.apache.spark.sql.types.{ArrayType, IntegerType, StringType, StructField, StructType}
 import org.apache.spark.sql.types.{DataType, StructType}
 import spark.implicits._

 val spark: SparkSession = SparkSession.builder().enableHiveSupport().getOrCreate()
 var sample_schema = spark.read.json("path").schema

 def columnsToLowercase(schema: StructType): StructType = {
    def recurRename(schema: StructType): Seq[StructField] =
       schema.fields.map {
          case StructField(name, dtype: StructType, nullable, meta) =>
             StructField(name.toLowerCase, StructType(recurRename(dtype)), nullable, meta)                        
          case StructField(name, dtype, nullable, meta) =>
             StructField(name.toLowerCase, dtype, nullable, meta)
       }

    StructType(recurRename(schema))
 }

 val jsonDFrame: DataFrame = spark.read.schema(columnsToLowercase(sample_schema)).json("path")

Sample Schema:

root
 |-- id: string (nullable = true)
 |-- master: struct (nullable = true)
 |    |-- code: string (nullable = true)
 |    |-- provInfo: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- claimInfo: array (nullable = true)
 |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |-- claimId: string (nullable = true)
 |    |    |    |-- demoInfo: struct (nullable = true)
 |    |    |    |    |-- family: struct (nullable = true)
 |    |    |    |    |    |-- outOrder: struct (nullable = true)
 |    |    |    |    |    |    |-- LocOut: boolean (nullable = true)
 |    |    |    |    |    |    |-- found: boolean (nullable = true)
 |    |-- claimAddr: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- address: string (nullable = true)
 |-- system: string (nullable = true)

CodePudding user response:

You should be able to lowercase fields nested in ArrayType by adding another case clause. For array columns, you also need to check its sub-elements type:

def columnsToLowercase(schema: StructType): StructType = {
     // ....
          case StructField(name, dtype: ArrayType, nullable, meta) => dtype.elementType match {
            case s: StructType => StructField(name.toLowerCase, ArrayType(StructType(recurRename(s)), true), nullable, meta)
            case dt => StructField(name.toLowerCase, dt, nullable, meta)
          }          
    //.... 
}

Applying on your schema:

df.printSchema
//root
// |-- id: string (nullable = true)
// |-- master: struct (nullable = true)
// |    |-- provInfo: struct (nullable = true)
// |    |    |-- claimInfo: array (nullable = true)
// |    |    |    |-- element: struct (containsNull = true)
// |    |    |    |    |-- claimId: string (nullable = true)
// |    |    |-- demoInfo: struct (nullable = true)
// |    |    |    |-- family: struct (nullable = true)
// |    |    |    |    |-- outOrder: struct (nullable = true)
// |    |    |    |    |    |-- LocOut: boolean (nullable = false)
// |    |    |    |    |    |-- found: boolean (nullable = false)
// |    |-- claimAddr: array (nullable = true)
// |    |    |-- element: struct (containsNull = true)
// |    |    |    |-- address: string (nullable = true)
// |-- system: string (nullable = true)


columnsToLowercase(df.schema).printTreeString()
//root
// |-- id: string (nullable = true)
// |-- master: struct (nullable = true)
// |    |-- provinfo: struct (nullable = true)
// |    |    |-- claiminfo: array (nullable = true)
// |    |    |    |-- element: struct (containsNull = true)
// |    |    |    |    |-- claimid: string (nullable = true)
// |    |    |-- demoinfo: struct (nullable = true)
// |    |    |    |-- family: struct (nullable = true)
// |    |    |    |    |-- outorder: struct (nullable = true)
// |    |    |    |    |    |-- locout: boolean (nullable = false)
// |    |    |    |    |    |-- found: boolean (nullable = false)
// |    |-- claimaddr: array (nullable = true)
// |    |    |-- element: struct (containsNull = true)
// |    |    |    |-- address: string (nullable = true)
// |-- system: string (nullable = true)
  • Related