I am trying to build a dataframe containing columns which are conditional. My code:
from faker import Faker
import pandas as pd
import random
fake = Faker()
def create_rows_faker(num=1):
output = [{"name":fake.name(),
"address":fake.address(),
"name":fake.name(),
"email":fake.email()} for x in range(num)]
return output
produces
df = pd.DataFrame(create_rows_faker(3))
df
How can I change the definition of ouput
so that if I had a variable if name_column == '1'
then include this in output (and don't include otherwise), and similarly with name and email?
CodePudding user response:
Use a standard for loop instead of overcomplicating the comprehension.
def create_rows_faker(num=1, name_col = True, address_col = True, email_col = False):
output = []
for x in range(num):
out = {}
if name_col:
out["name"] = fake.name()
if address_col:
out["address"] = fake.address()
if email_col:
out["email"] = fake.email()
output.append(out)
return output
CodePudding user response:
Here is an option using a dictionary of function and a list of the keys:
def create_rows_faker(num=1, use=('name', 'address', 'email')):
options = {"name":fake.name,
"address":fake.address,
"email":fake.email}
use = set(use)
options = {k:f for k,f in options.items() if k in use}
output = [{k:f() for k,f in options.items()} for x in range(num)]
return output
pd.DataFrame(create_rows_faker(3, use=['name']))
output:
name
0 Tracy Alexander MD
1 Mark Winters
2 Lori Edwards