Adding new column in PySpark dataframe which contains existing column name if corresponding value is-CodePudding

I want to add new column "Null_Values" in PySpark dataframe as below

=======================================================|
ID | Maths | Science | English | NUll_Values           |
=======================================================|
11 | 80    | NULL    | 89      | Science               |
12 | NULL  | NULL    | 89      | Maths,Science         |
13 | 90    | 95      | 85      | NULL                  |
14 | NULL  | NULL    | NULL    | Maths,Science,English |
=======================================================|

CodePudding user response：

df = df.withColumn(
    "NUll_Values",
    F.concat_ws(",", *[F.when(F.col(c).isNull(), c) for c in df.columns])
)

df.show(truncate=False)
#   --- ----- ------- ------- --------------------- 
#  |ID |Maths|Science|English|NUll_Values          |
#   --- ----- ------- ------- --------------------- 
#  |11 |80   |null   |89     |Science              |
#  |12 |null |null   |89     |Maths,Science        |
#  |13 |90   |95     |85     |                     |
#  |14 |null |null   |null   |Maths,Science,English|
#   --- ----- ------- ------- ---------------------

df = df.withColumn(
    "NUll_Values",
    F.concat_ws(",", *[F.when(F.col(c).isNull(), c) for c in df.columns])
).replace("", None, subset=["NUll_Values"])

df.show(truncate=False)
#   --- ----- ------- ------- --------------------- 
#  |ID |Maths|Science|English|NUll_Values          |
#   --- ----- ------- ------- --------------------- 
#  |11 |80   |null   |89     |Science              |
#  |12 |null |null   |89     |Maths,Science        |
#  |13 |90   |95     |85     |null                 |
#  |14 |null |null   |null   |Maths,Science,English|
#   --- ----- ------- ------- ---------------------