How to ignore an error raised while using a lambda function on a dataframe rows?-CodePudding

I am doing a Pandas operation on all rows using lambda function

match = re.compile(r"([\d]{2,4}[-|/][\d]{1,2}[-|/][\d]{2,4})")
date_to_month = lambda x: pd.to_datetime(x.group(0)).strftime("%B")
data["path"] = data["path"].str.replace(match, date_to_month, regex=True)

The dataframe is so large and for a particular row I am getting the following error:

DateParseError: Invalid date specified (17/25)

I tried to add try except like below:

try:
    match = re.compile(r"([\d]{2,4}[-|/][\d]{1,2}[-|/][\d]{2,4})")
    date_to_month = lambda x: pd.to_datetime(x.group(0)).strftime("%B")
    data["path"] = data["path"].str.replace(match, date_to_month, regex=True)
except:
    pass

Now this will pass the error. The problem is just one row is causing this error and all other rows are affected by this as this operation won't happen on other rows.

Is there a way by which we can skip the rows which throw error on executing without affecting the operation on other rows?

CodePudding user response：

So, to mimic your use case:

import re
import pandas as pd

df = pd.DataFrame({"col": ["a", "b", "c"], "path": ["--10--", "--99--", "--12--"]})

# Should convert '10' to 'October
match = re.compile("(\d{2})")
date_to_month = lambda x: pd.to_datetime(x[0], format="%m").strftime("%B")

# Raises ValueError: unconverted data remains [99]
df["path"] = df["path"].str.replace(match, date_to_month, regex=True)

Here is one possible workaround:

def convert_month(x):
    """Put the code in a function and refactor it to use Python 're.sub'
    instead of Pandas 'str.replace'.
    """
    match = re.compile("(\d{2})")
    date_to_month = lambda x: pd.to_datetime(x[0], format="%m").strftime('%B')
    return re.sub(match, date_to_month, x)


def ignore_exception(func, x):
    """Define a helper function.
    """
    try:
        return func(x)
    except Exception:
        return x


df["path"] = df["path"].apply(lambda x: ignore_exception(convert_month, x))

print(df)
# Output with no error raised
  col          path
0   a   --October--
1   b        --99--
2   c  --December--

Same idea, with a decorator:

def ignore_exception(func):
    """Define a decorator."""
    def wrapper(x):
        try:
            return func(x)
        except Exception:
            return x
    return wrapper

@ignore_exception
def convert_month(x):
   ...

df["path"] = df["path"].apply(convert_month)

print(df)
# Output with no error raised
  col          path
0   a   --October--
1   b        --99--
2   c  --December--