I would like to create a class that applies a certain pandas method based on the input entered by the user.
More specifically, the class can apply pandas methods sub(), mul() or add(), where a series is subtracted, multiplied or added to different variables of a dataframe. But the class should apply only the methods specified by the user in the init, and ideally in the order specified by the user.
For example:
_PERMITTED_FUNCTIONS = ["add", "sub", "mul"]
class RelativeFeatures:
def __init__(
self,
variables: List[Union[str, int]],
reference: List[Union[str, int]],
func: List[str] = _PERMITTED_FUNCTIONS,
) -> None:
self.variables = variables
self.reference = reference
self.func = func
def _sub(self, X):
for reference in self.reference:
varname = [
str(var) "_sub_" str(reference)
for var in self.variables
]
X[varname] = X[self.variables].sub(X[reference], axis=0)
return X
def _add(self, X):
for reference in self.reference:
varname = [
str(var) "_add_" str(reference)
for var in self.variables
]
X[varname] = X[self.variables].add(X[reference], axis=0)
return X
def _mul(self, X):
for reference in self.reference:
varname = [
str(var) "_mul_" str(reference)
for var in self.variables
]
X[varname] = X[self.variables].mul(X[reference], axis=0)
return X
def transform(X):
for method in self.func:
# apply the method that matches the string in the list
So that, if a user creates the following class:
tr = RelativeFeatures(
variables = ["var1", "var2]
reference = ["var3],
func = ["mul", "add"]
)
and then passes a dataframe X[["var1", "var2", "var3"]] to the transform() method, the result is
XX[["var1", "var2", "var3", "var1_mul_var3", "var2_mul_var3", "var1_add_var3", "var2_add_var3"]]
Is there a way in which I can call the methods in a specified order?
Something on these lines:
for function in self.func:
apply corresponding method
Thank you!
CodePudding user response:
Furas suggestion was great, thank you!
I post the answer here:
_PERMITTED_FUNCTIONS = ["add", "sub", "mul"]
class RelativeFeatures:
def __init__(
self,
variables: List[Union[str, int]],
reference: List[Union[str, int]],
func: List[str] = _PERMITTED_FUNCTIONS,
) -> None:
self.variables = variables
self.reference = reference
self.func = func
def _sub(self, X):
for reference in self.reference:
varname = [
str(var) "_sub_" str(reference)
for var in self.variables
]
X[varname] = X[self.variables].sub(X[reference], axis=0)
return X
def _add(self, X):
for reference in self.reference:
varname = [
str(var) "_add_" str(reference)
for var in self.variables
]
X[varname] = X[self.variables].add(X[reference], axis=0)
return X
def _mul(self, X):
for reference in self.reference:
varname = [
str(var) "_mul_" str(reference)
for var in self.variables
]
X[varname] = X[self.variables].mul(X[reference], axis=0)
return X
def transform(X):
methods_dict = {
"add": self._add,
"mul": self._mul,
"sub": self._sub,
}
for func in self.func:
methods_dict[func](X)
return X