Home > Software design >  Returning a error in Pyspark 'when' condition
Returning a error in Pyspark 'when' condition

Time:09-09

I have the follow function:

@udf (DoubleType())
def rate_calc(periodo ,valor_par,valor_ven, valor_cli):
    if (periodo * valor_par) == valor_ven :
        return 0.000000
    else:
        return float(npf.rate(periodo, valor_par, valor_ven, valor_cli))

And I builded this function:

def transformation_d1_gp(df_input: DataFrame) -> DataFrame:
    df_output = (df_input.withColumn('PROD_PLANO_VLR',sf.col('PARCELA_CLIENTE')*sf.col('VALOR_VENDA_PEDIDO'))
    .groupBy('GRUPO_PROMOCIONAL_NOVO').agg(sum(
    sf.col('VALOR_VENDA_PEDIDO')).alias("SUM_VALOR_VENDA"),sum(sf.col(
    'VALOR_PG_CLIENTE')).alias("SUM_PG_CLIENTE"),sum(sf.col('PROD_PLANO_VLR')).alias('PROD_PLANO_VLR'))
    .withColumn('PLANO_MEDIO',(sf.col('PROD_PLANO_VLR')/sf.col('SUM_VALOR_VENDA')).cast(DoubleType()))
    .withColumn('NEGATIVE_SALES', -(sf.col('SUM_VALOR_VENDA')).cast(DoubleType()))
    .withColumn('PG_PERIODO', (sf.col('SUM_PG_CLIENTE')/sf.col('PLANO_MEDIO')).cast(DoubleType()))
    .withColumn('REBATE_R$', (sf.col('SUM_PG_CLIENTE')-sf.col('SUM_VALOR_VENDA')).cast(DoubleType()))
    .withColumn('REBATE_%', (sf.col('REBATE_R$')/sf.col('SUM_PG_CLIENTE')).cast(DoubleType()))
    .withColumn('MIX_VENDAS', (sf.col('SUM_VALOR_VENDA')/sf.sum('SUM_VALOR_VENDA').over(Window.partitionBy())).cast(DoubleType()))
    .withColumn('TAXA', when((sf.col('PLANO_MEDIO').isNull()) | (sf.col('PLANO_MEDIO') == 0),0)
                        .when((sf.col('PG_PERIODO').isNull()) | (sf.col('PG_PERIODO') == 0),0)
                        .when((sf.col('NEGATIVE_SALES') == -0) | (sf.col('NEGATIVE_SALES').isNull()), 0)
                        .otherwise(rate_calc('PLANO_MEDIO','PG_PERIODO','NEGATIVE_SALES',sf.lit(0.0))))
    .select('GRUPO_PROMOCIONAL_NOVO','SUM_VALOR_VENDA','MIX_VENDAS','REBATE_R$','PLANO_MEDIO','REBATE_%','TAXA','NEGATIVE_SALES'))
    return df_output

See, that I use 'rate_calc', but in some columns the first condition that are 'when's' is true, so should to return 0. But, return this error:

PythonException: 
  An exception was thrown from the Python worker. Please see the stack trace below.
Traceback (most recent call last):
  File "<ipython-input-40-be527cb47980>", line 11, in rate_calc
TypeError: unsupported operand type(s) for *: 'NoneType' and 'NoneType'

PS: If I put 0 in 'otherwise' condition only to test, return without any error obeying the 'when's'. Can anyone help me?

CodePudding user response:

You could testing the types or try catching the error in your function, something like:

@udf (DoubleType())
def rate_calc(periodo ,valor_par,valor_ven, valor_cli):
    try:
        if (periodo * valor_par) == valor_ven :
            return 0.0
        else:
            return float(npf.rate(periodo, valor_par, valor_ven, valor_cli))
    except ValueError:
        return 0.0
  • Related