I have a dataframe whose columns are populated using faker, this is the code
import pandas as pd
import numpy as np
import faker
fake = Faker()
def make_workers():
status_list = ['in', 'out']
room_list = ['FL1_RM1','FL1_RM2','FL1_RM3', 'FL1_RM4', 'FL2_RM1','FL2_RM2','FL2_RM3','FL2_RM4', 'FL3_RM1', 'FL3_RM2','FL3_RM3', 'FL3_RM4', 'FL4_RM1','FL4_RM2','FL4_RM3', 'FL4_RM4']
Property = ['B1','B2','B3','B4']
d1 = datetime.strptime(f'03/01/2022', '%m/%d/%Y')
d2 = datetime.strptime(f'08/08/2022', '%m/%d/%Y')
timestamps = pd.date_range(d1, d2, freq="1min")
fake_workers = [{'ID':fake.random_number(digits=10),
'Property num': np.random.choice(Property, p=[0.25, 0.25, 0.25, 0.25]),
'Temp':np.random.randint(low=35, high=50),
'noted Date':timestamps[x],
'Status':np.random.choice(status_list),
'Humidity': np.random.uniform(low=-35.09, high=70.00),
'Dust':np.random.randint(low=2, high=5),
'CO2 level':np.random.uniform(low= 350.09, high=450.00),
'room_id':np.random.choice(room_list)} for x in range(len(timestamps))]
return fake_workers
worker_df = pd.DataFrame(make_workers())
worker_df.head(30)
I want to add a new column called Floor_Number which takes the values from third letter from room_id column, for example if room_id -> FL1_RM2 then Floor_number -> is 1 and so on
whats the way for this
CodePudding user response:
One way is to nest the list comprehension you already have going:
from datetime import datetime
import numpy as np
import pandas as pd
from faker import Faker
fake = Faker()
def make_workers() -> list:
status_list = ['in', 'out']
room_list = ['FL1_RM1', 'FL1_RM2', 'FL1_RM3', 'FL1_RM4', 'FL2_RM1', 'FL2_RM2', 'FL2_RM3', 'FL2_RM4', 'FL3_RM1',
'FL3_RM2', 'FL3_RM3', 'FL3_RM4', 'FL4_RM1', 'FL4_RM2', 'FL4_RM3', 'FL4_RM4']
Property = ['B1', 'B2', 'B3', 'B4']
d1 = datetime.strptime('03/01/2022', '%m/%d/%Y')
d2 = datetime.strptime('08/08/2022', '%m/%d/%Y')
timestamps = pd.date_range(d1, d2, freq="1min")
return [{**elem, **{"Floor_Number": elem.get("room_id")[2]}} for elem in [
{'ID' : fake.random_number(digits=10),
'Property num': np.random.choice(Property, p=[0.25, 0.25, 0.25, 0.25]),
'Temp' : np.random.randint(low=35, high=50),
'noted Date' : timestamps[x],
'Status' : np.random.choice(status_list),
'Humidity' : np.random.uniform(low=-35.09, high=70.00),
'Dust' : np.random.randint(low=2, high=5),
'CO2 level' : np.random.uniform(low=350.09, high=450.00),
'room_id' : np.random.choice(room_list)} for x in range(len(timestamps))]]
worker_df = pd.DataFrame(make_workers())
print(worker_df.head(30))
Readability could be improved here, perhaps by creating the entire dataframe within make_workers()
or splitting this process/function into parts.
Output
ID | Property num | Temp | noted Date | Status | Humidity | Dust | CO2 level | room_id | Floor_Number |
---|---|---|---|---|---|---|---|---|---|
2012906991 | B2 | 49 | 2022-03-01 00:00:00 | in | 30.4534 | 3 | 383.193 | FL4_RM4 | 4 |
3491237933 | B4 | 46 | 2022-03-01 00:01:00 | out | -2.28172 | 2 | 414.814 | FL3_RM3 | 3 |
4774765663 | B1 | 40 | 2022-03-01 00:02:00 | out | -28.6572 | 2 | 449.09 | FL4_RM3 | 4 |
7897538907 | B2 | 49 | 2022-03-01 00:03:00 | out | 60.627 | 2 | 365.443 | FL1_RM1 | 1 |
6767676785 | B4 | 38 | 2022-03-01 00:04:00 | out | 4.66333 | 4 | 350.222 | FL4_RM3 | 4 |
1383598759 | B1 | 38 | 2022-03-01 00:05:00 | out | 28.5089 | 4 | 368.719 | FL3_RM4 | 3 |
9909123329 | B2 | 42 | 2022-03-01 00:06:00 | out | -25.8358 | 3 | 361.844 | FL1_RM3 | 1 |
2368808891 | B3 | 40 | 2022-03-01 00:07:00 | out | -34.4912 | 2 | 390.457 | FL3_RM4 | 3 |
2154969325 | B3 | 49 | 2022-03-01 00:08:00 | in | -9.7037 | 2 | 381.218 | FL4_RM2 | 4 |
4784061156 | B2 | 35 | 2022-03-01 00:09:00 | in | 9.28874 | 3 | 448.503 | FL1_RM2 | 1 |
1099496995 | B1 | 43 | 2022-03-01 00:10:00 | in | 26.6181 | 3 | 437.796 | FL1_RM1 | 1 |
4886335510 | B1 | 40 | 2022-03-01 00:11:00 | in | 4.54296 | 4 | 421.517 | FL4_RM3 | 4 |
1167417135 | B2 | 47 | 2022-03-01 00:12:00 | out | 9.91049 | 2 | 363.036 | FL2_RM1 | 2 |
8843281938 | B2 | 48 | 2022-03-01 00:13:00 | out | -21.9967 | 2 | 353.61 | FL2_RM3 | 2 |
1760220531 | B4 | 43 | 2022-03-01 00:14:00 | out | 21.9313 | 2 | 396.611 | FL4_RM4 | 4 |
6108731121 | B4 | 42 | 2022-03-01 00:15:00 | out | -15.861 | 4 | 377.379 | FL4_RM1 | 4 |
6240491738 | B3 | 49 | 2022-03-01 00:16:00 | out | -3.72436 | 4 | 397.546 | FL4_RM2 | 4 |
1487766621 | B2 | 49 | 2022-03-01 00:17:00 | out | 16.5894 | 3 | 389.542 | FL3_RM2 | 3 |
8797318432 | B2 | 42 | 2022-03-01 00:18:00 | out | 12.3838 | 4 | 379.292 | FL4_RM4 | 4 |
6681081800 | B4 | 40 | 2022-03-01 00:19:00 | out | -7.06711 | 3 | 351.191 | FL1_RM2 | 1 |
9273902981 | B4 | 39 | 2022-03-01 00:20:00 | out | 46.1884 | 4 | 432.785 | FL2_RM3 | 2 |
4708480330 | B2 | 43 | 2022-03-01 00:21:00 | in | 3.87004 | 2 | 428.494 | FL3_RM3 | 3 |
4618656781 | B2 | 39 | 2022-03-01 00:22:00 | out | -22.7715 | 2 | 351.438 | FL2_RM2 | 2 |
7637424523 | B2 | 40 | 2022-03-01 00:23:00 | in | 58.0428 | 2 | 358.484 | FL3_RM2 | 3 |
9554140160 | B3 | 45 | 2022-03-01 00:24:00 | in | -2.04843 | 3 | 404.48 | FL1_RM4 | 1 |
1886102987 | B3 | 38 | 2022-03-01 00:25:00 | in | 59.717 | 4 | 396.403 | FL2_RM1 | 2 |
8264475612 | B1 | 37 | 2022-03-01 00:26:00 | out | 5.8059 | 4 | 353.922 | FL4_RM1 | 4 |
8375042467 | B4 | 41 | 2022-03-01 00:27:00 | in | -22.43 | 3 | 423.655 | FL2_RM3 | 2 |
6580935111 | B1 | 48 | 2022-03-01 00:28:00 | out | 49.2072 | 4 | 362.047 | FL2_RM3 | 2 |
9222349838 | B4 | 48 | 2022-03-01 00:29:00 | in | 9.86473 | 3 | 423.386 | FL2_RM2 | 2 |