I have a DF
df = spark.sql("""select number,name,owner,support,user,business_unit from table""")
I want to rename owner.display_value as owner_display_value and support.display_value as support_display_value
--owner column and support column is a struct, hence im obtaining only the display_value from the column.
df2 = df.select("number","name","owner.display_value" as owner_display_value,"support.display_value" as support_display_value, "user_group","business_unit")
But I error 'DataFrame' object has no attribute 'rename'. Kindly suggest
CodePudding user response:
replace
df2 = df.select(
"number",
"name",
"owner.display_value" as owner_display_value,
"support.display_value" as support_display_value,
"user_group",
"business_unit"
)
with
df2 = df.selectExpr(
"number",
"name",
"owner.display_value as owner_display_value",
"support.display_value as support_display_value",
"user_group",
"business_unit"
)
CodePudding user response:
Use F.col("column_name").alias("new_name")
:
Full example:
import pyspark.sql.functions as F
schema = StructType([
StructField("number", LongType()),
StructField("name", StringType()),
StructField("owner", StructType([StructField("display_value", StringType())])),
StructField("support", StructType([StructField("display_value", StringType())])),
StructField("user_group", StringType()),
StructField("business_unit", StringType()),
])
df = spark.createDataFrame(data=[[123, "abc", ("onwr",), ("sprt",), "usr", "bu"]], schema=schema)
df2 = df.select(
"number",
"name",
F.col("owner.display_value").alias("owner_display_value"),
F.col("support.display_value").alias("support_display_value"),
"user_group",
"business_unit")
[Out]:
------ ---- ------------------- --------------------- ---------- -------------
|number|name|owner_display_value|support_display_value|user_group|business_unit|
------ ---- ------------------- --------------------- ---------- -------------
|123 |abc |onwr |sprt |usr |bu |
------ ---- ------------------- --------------------- ---------- -------------