I have the following code :
def reader (path:String):DataFrame={
val path =sc.textFile("/FileStore/tables/Data_exo2.json").reduce((a,b)=>s"$a$b")
val df = Seq((path)).toDF()
val schema_tran = new StructType()
.add("Devis", StringType, false)
.add("IdTransaction",LongType, false)
.add("Pays", StringType, false)
.add("Prix",DoubleType, false)
.add("TypeProduit", StringType, false)
val schema = new StructType().add("Transaction", ArrayType(schema_tran),true)
val df_66 = df.select(from_json($"value",schema)as "struct")
.select($"struct.*")
.withColumn("Transaction", explode(col("Transaction")))
.select($"Transaction.*")
val schema_devis = new StructType()
.add("Devis", StringType, false)
.add("Taux",DoubleType, false)
val schema_1 = new StructType()
.add("Devis", ArrayType(schema_devis),true)
val df_67 = df.select(from_json($"value",schema_1)as "struct")
.select($"struct.*")
.withColumn("Devis", explode(col("Devis")))
.select($"Devis.*")
*****}
So I have two dataframe df_66 and df_67. Basicaly, I want to define a fonction to return those dataframe, one by one like this reader(path)(0)
CodePudding user response:
Is this what you are looking for ?
def reader(path: String): Map[Int, DataFrame] = {
val df = spark.read.format("json").load(path)
val schema_tran = new StructType()
.add("Devis", StringType, false)
.add("IdTransaction",LongType, false)
.add("Pays", StringType, false)
.add("Prix",DoubleType, false)
.add("TypeProduit", StringType, false)
val schema = new StructType().add("Transaction", ArrayType(schema_tran),true)
val df_66 = df.select(from_json($"value",schema)as "struct")
.select($"struct.*")
.withColumn("Transaction", explode(col("Transaction")))
.select($"Transaction.*")
val schema_devis = new StructType()
.add("Devis", StringType, false)
.add("Taux",DoubleType, false)
val schema_1 = new StructType()
.add("Devis", ArrayType(schema_devis),true)
val df_67 = df.select(from_json($"value",schema_1)as "struct")
.select($"struct.*")
.withColumn("Devis", explode(col("Devis")))
.select($"Devis.*")
Map(
0 -> df_66,
1 -> df_67
)
}
You can get the DataFrame back using
reader("some path")(0) // df_66
reader("some path")(1) // df_67