I am mapping countries in my data, I do the following for 1 key:value
pair
from itertools import chain
countryDict = {
'Australia': 'ANZ',
'New Zealand': 'ANZ',
'China': 'CHINA PLUS',
'Hong Kong': 'CHINA PLUS',
'India': 'INDIA',
'Japan': 'JAPAN',
'Indonesia': 'SEA',
'Malaysia': 'SEA',
'Philippines': 'SEA',
'Singapore': 'SEA',
'Thailand': 'SEA',
'VietNam': 'SEA',
'South Korea': 'SOUTH KOREA',
'Taiwan': 'TAIWAN',
}
mapping_expr = F.create_map([F.lit(x) for x in chain(*countryDict.items())])
df = df.withColumn('Country_L1', mapping_expr[df['Customer_Country']])
I am not sure how to do it for multiple keys: 1 value
, I am currently using when and otherwise to achieve but I want something like a dictionary to make it look simple
bbo = bbo.withColumn("Country_L1", F.when(F.col('customer_country').isin(
'AU', 'CC', 'CK', 'CX', 'FJ', 'FM', 'GU', 'KI', 'MH', 'MP', 'NF', 'NR', 'NU', 'NZ', 'PG', 'PN', 'PW', 'TO', 'TV', 'WF', 'WS', 'SB', 'VU'), F.lit('ANZ')).otherwise(
F.when(F.col('customer_country').isin(
'CN', 'HK', 'MO'), F.lit('CHINA PLUS')).otherwise(
F.when(F.col('customer_country').isin(
'BD', 'BN', 'BT', 'ID', 'KH', 'KP', 'LA', 'LK', 'MM', 'MN', 'MV', 'MY', 'NP', 'PH', 'SG', 'TH', 'TL', 'VN'), F.lit('SEA')).otherwise(
F.when(F.col('customer_country') == 'JP', F.lit('JAPAN')).otherwise(
F.when(F.col('customer_country') == 'KR', F.lit('SOUTH KOREA')).otherwise(
F.when(F.col('customer_country') == 'TW', F.lit('TAIWAN')).otherwise(
F.lit('Non APAC'))
)
)
)
)
)
)
any help to make this simple would be appreciated, thanks in advance.
CodePudding user response:
You can have a dict like this:
mapping = {
'ANZ': ['AU', 'CC', 'CK', 'CX', 'FJ', 'FM', 'GU', 'KI', 'MH', 'MP', 'NF', 'NR', 'NU', 'NZ', 'PG', 'PN', 'PW', 'TO',
'TV', 'WF', 'WS', 'SB', 'VU'],
'CHINA PLUS': ['CN', 'HK', 'MO'],
'SEA': ['BD', 'BN', 'BT', 'ID', 'KH', 'KP', 'LA', 'LK', 'MM', 'MN', 'MV', 'MY', 'NP', 'PH', 'SG', 'TH', 'TL', 'VN'],
'JAPAN': ['JP'],
'SOUTH KOREA': ['KR'],
'TAIWAN': ['TW']
}
Then use python functools.reduce
function to dynamically generate when
expressions from the dict elements:
from functools import reduce
from pyspark.sql import functions as F
bbo = bbo.withColumn(
"Country_L1",
reduce(
lambda a, b: a.when(F.col("customer_country").isin(b[1]), b[0]),
mapping.items(),
F
).otherwise('Non APAC')
)
CodePudding user response:
look_up={ # Store the relations here
'SEA': ['Indonesia','Malaysia','Philippines'],
'JAPAN' :['Japan']
}
rev_lookup={value: key for (key, values) in look_up.items() for value in values } #Reverse key-value
rev_lookup
Result:
{'Indonesia': 'SEA', 'Malaysia': 'SEA', 'Philippines': 'SEA', 'Japan': 'JAPAN'}
Possible duplicate with Reverse / invert a dictionary mapping