how to populate a column's value of a dataframe based on other column value inside a for loop?-CodePudding

I have a dataframe whose columns are populated using faker, this is the code

import pandas as pd
import numpy as np
import faker

fake = Faker()

def make_workers():
    status_list = ['in', 'out']
    room_list = ['FL1_RM1','FL1_RM2','FL1_RM3', 'FL1_RM4', 'FL2_RM1','FL2_RM2','FL2_RM3','FL2_RM4', 'FL3_RM1', 'FL3_RM2','FL3_RM3', 'FL3_RM4', 'FL4_RM1','FL4_RM2','FL4_RM3', 'FL4_RM4']
    Property = ['B1','B2','B3','B4']
    d1 = datetime.strptime(f'03/01/2022', '%m/%d/%Y')
    d2 = datetime.strptime(f'08/08/2022', '%m/%d/%Y')
    timestamps = pd.date_range(d1, d2, freq="1min")
    fake_workers = [{'ID':fake.random_number(digits=10),
                  'Property num': np.random.choice(Property, p=[0.25, 0.25, 0.25, 0.25]),
                  'Temp':np.random.randint(low=35, high=50), 
                  'noted Date':timestamps[x],
                  'Status':np.random.choice(status_list),
                  'Humidity': np.random.uniform(low=-35.09, high=70.00),
                   
                  'Dust':np.random.randint(low=2, high=5),
                  'CO2 level':np.random.uniform(low= 350.09, high=450.00),
                  'room_id':np.random.choice(room_list)} for x in range(len(timestamps))]
        
    return fake_workers

worker_df = pd.DataFrame(make_workers())
worker_df.head(30)

I want to add a new column called Floor_Number which takes the values from third letter from room_id column, for example if room_id -> FL1_RM2 then Floor_number -> is 1 and so on

whats the way for this

CodePudding user response：

One way is to nest the list comprehension you already have going:

from datetime import datetime

import numpy as np
import pandas as pd
from faker import Faker

fake = Faker()


def make_workers() -> list:
    status_list = ['in', 'out']
    room_list = ['FL1_RM1', 'FL1_RM2', 'FL1_RM3', 'FL1_RM4', 'FL2_RM1', 'FL2_RM2', 'FL2_RM3', 'FL2_RM4', 'FL3_RM1',
                 'FL3_RM2', 'FL3_RM3', 'FL3_RM4', 'FL4_RM1', 'FL4_RM2', 'FL4_RM3', 'FL4_RM4']
    Property = ['B1', 'B2', 'B3', 'B4']
    d1 = datetime.strptime('03/01/2022', '%m/%d/%Y')
    d2 = datetime.strptime('08/08/2022', '%m/%d/%Y')
    timestamps = pd.date_range(d1, d2, freq="1min")
    return [{**elem, **{"Floor_Number": elem.get("room_id")[2]}} for elem in [
        {'ID'          : fake.random_number(digits=10),
         'Property num': np.random.choice(Property, p=[0.25, 0.25, 0.25, 0.25]),
         'Temp'        : np.random.randint(low=35, high=50),
         'noted Date'  : timestamps[x],
         'Status'      : np.random.choice(status_list),
         'Humidity'    : np.random.uniform(low=-35.09, high=70.00),
         'Dust'        : np.random.randint(low=2, high=5),
         'CO2 level'   : np.random.uniform(low=350.09, high=450.00),
         'room_id'     : np.random.choice(room_list)} for x in range(len(timestamps))]]


worker_df = pd.DataFrame(make_workers())
print(worker_df.head(30))

Readability could be improved here, perhaps by creating the entire dataframe within make_workers() or splitting this process/function into parts.

Output

ID	Property num	Temp	noted Date	Status	Humidity	Dust	CO2 level	room_id	Floor_Number
2012906991	B2	49	2022-03-01 00:00:00	in	30.4534	3	383.193	FL4_RM4	4
3491237933	B4	46	2022-03-01 00:01:00	out	-2.28172	2	414.814	FL3_RM3	3
4774765663	B1	40	2022-03-01 00:02:00	out	-28.6572	2	449.09	FL4_RM3	4
7897538907	B2	49	2022-03-01 00:03:00	out	60.627	2	365.443	FL1_RM1	1
6767676785	B4	38	2022-03-01 00:04:00	out	4.66333	4	350.222	FL4_RM3	4
1383598759	B1	38	2022-03-01 00:05:00	out	28.5089	4	368.719	FL3_RM4	3
9909123329	B2	42	2022-03-01 00:06:00	out	-25.8358	3	361.844	FL1_RM3	1
2368808891	B3	40	2022-03-01 00:07:00	out	-34.4912	2	390.457	FL3_RM4	3
2154969325	B3	49	2022-03-01 00:08:00	in	-9.7037	2	381.218	FL4_RM2	4
4784061156	B2	35	2022-03-01 00:09:00	in	9.28874	3	448.503	FL1_RM2	1
1099496995	B1	43	2022-03-01 00:10:00	in	26.6181	3	437.796	FL1_RM1	1
4886335510	B1	40	2022-03-01 00:11:00	in	4.54296	4	421.517	FL4_RM3	4
1167417135	B2	47	2022-03-01 00:12:00	out	9.91049	2	363.036	FL2_RM1	2
8843281938	B2	48	2022-03-01 00:13:00	out	-21.9967	2	353.61	FL2_RM3	2
1760220531	B4	43	2022-03-01 00:14:00	out	21.9313	2	396.611	FL4_RM4	4
6108731121	B4	42	2022-03-01 00:15:00	out	-15.861	4	377.379	FL4_RM1	4
6240491738	B3	49	2022-03-01 00:16:00	out	-3.72436	4	397.546	FL4_RM2	4
1487766621	B2	49	2022-03-01 00:17:00	out	16.5894	3	389.542	FL3_RM2	3
8797318432	B2	42	2022-03-01 00:18:00	out	12.3838	4	379.292	FL4_RM4	4
6681081800	B4	40	2022-03-01 00:19:00	out	-7.06711	3	351.191	FL1_RM2	1
9273902981	B4	39	2022-03-01 00:20:00	out	46.1884	4	432.785	FL2_RM3	2
4708480330	B2	43	2022-03-01 00:21:00	in	3.87004	2	428.494	FL3_RM3	3
4618656781	B2	39	2022-03-01 00:22:00	out	-22.7715	2	351.438	FL2_RM2	2
7637424523	B2	40	2022-03-01 00:23:00	in	58.0428	2	358.484	FL3_RM2	3
9554140160	B3	45	2022-03-01 00:24:00	in	-2.04843	3	404.48	FL1_RM4	1
1886102987	B3	38	2022-03-01 00:25:00	in	59.717	4	396.403	FL2_RM1	2
8264475612	B1	37	2022-03-01 00:26:00	out	5.8059	4	353.922	FL4_RM1	4
8375042467	B4	41	2022-03-01 00:27:00	in	-22.43	3	423.655	FL2_RM3	2
6580935111	B1	48	2022-03-01 00:28:00	out	49.2072	4	362.047	FL2_RM3	2
9222349838	B4	48	2022-03-01 00:29:00	in	9.86473	3	423.386	FL2_RM2	2