I have the following dataframe:
data = {'unit': {59: 'unit1',
662: 'unit1',
680: 'unit1',
725: 'unit1',
709: 'unit1',
703: 'unit1',
653: 'unit1',
807: 'unit4',
825: 'unit4',
778: 'unit4',
816: 'unit4',
822: 'unit4',
849: 'unit4',
820: 'unit4',
754: 'unit4',
1031: 'unit3',
1094: 'unit2',
1008: 'unit2',
1089: 'unit2',
1044: 'unit5'},
'Date_job': {59: datetime.date(2021, 6, 7),
662: datetime.date(2021, 6, 14),
680: datetime.date(2021, 7, 5),
725: datetime.date(2021, 7, 26),
709: datetime.date(2021, 8, 30),
703: datetime.date(2021, 10, 11),
653: datetime.date(2021, 10, 18),
807: datetime.date(2021, 7, 19),
825: datetime.date(2021, 7, 26),
778: datetime.date(2021, 8, 23),
816: datetime.date(2021, 8, 30),
822: datetime.date(2021, 9, 6),
849: datetime.date(2021, 9, 27),
820: datetime.date(2021, 10, 4),
754: datetime.date(2021, 10, 18),
1031: datetime.date(2021, 9, 6),
1094: datetime.date(2021, 7, 26),
1008: datetime.date(2021, 8, 9),
1089: datetime.date(2021, 10, 4),
1044: datetime.date(2021, 6, 14)},
'Vector': {59: ['A|14:1/9.0',
'A|15:1/11.0',
'A|16:1/12.0',
'B|11:1/4.0',
'B|2:1/3.0',
'B|3:1/12.0',
'B|4:1/12.0',
'B|5:1/9.0',
'B|6:1/5.0',
'B|7:1/5.0'],
662: ['A|14:1/9.0',
'A|15:1/11.0',
'A|16:1/12.0',
'B|11:1/4.0',
'B|3:1/12.0',
'B|4:1/12.0',
'B|5:1/9.0',
'B|5:1/8.0',
'B|6:1/5.0',
'B|7:1/5.0'],
680: ['A|14:1/9.0',
'A|14:1/4.0',
'A|15:1/11.0',
'A|16:1/12.0',
'B|11:1/4.0',
'B|3:1/12.0',
'B|4:1/12.0',
'B|5:1/9.0',
'B|6:1/5.0',
'B|7:1/5.0'],
725: ['A|14:1/9.0',
'A|15:1/11.0',
'A|16:1/12.0',
'B|11:1/4.0',
'B|2:1/3.0',
'B|3:1/12.0',
'B|4:1/12.0',
'B|5:1/9.0',
'B|6:1/5.0',
'B|7:1/5.0'],
709: ['A|14:1/9.0',
'A|15:1/11.0',
'A|16:1/12.0',
'B|11:1/4.0',
'B|2:1/3.0',
'B|3:1/12.0',
'B|4:1/12.0',
'B|5:1/9.0',
'B|6:1/5.0',
'B|7:1/5.0'],
703: ['A|14:1/9.0',
'A|15:1/11.0',
'A|16:1/12.0',
'B|11:1/4.0',
'B|2:1/4.0',
'B|3:1/12.0',
'B|4:1/12.0',
'B|5:1/9.0',
'B|6:1/6.0',
'B|7:1/5.0'],
653: ['A|14:1/9.0',
'A|15:1/11.0',
'A|16:1/12.0',
'B|11:1/4.0',
'B|2:1/4.0',
'B|3:1/12.0',
'B|4:1/12.0',
'B|5:1/9.0',
'B|6:1/6.0',
'B|7:1/5.0'],
807: ['A|10:1/13.0',
'A|10:1/13.0',
'A|3:1/6.0',
'A|3:1/6.0',
'A|4:1/2.0',
'A|5:1/2.0',
'A|6:1/5.0',
'A|6:1/5.0',
'A|7:1/10.0',
'A|7:1/10.0'],
825: ['A|10:1/13.0',
'A|10:1/13.0',
'A|3:1/6.0',
'A|3:1/6.0',
'A|5:1/2.0',
'A|5:1/2.0',
'A|6:1/5.0',
'A|6:1/5.0',
'A|7:1/10.0',
'A|7:1/10.0'],
778: ['A|10:1/13.0',
'A|10:1/13.0',
'A|3:1/6.0',
'A|3:1/6.0',
'A|5:1/2.0',
'A|6:1/5.0',
'A|6:1/5.0',
'A|7:1/10.0',
'A|7:1/10.0',
'A|8:1/7.0'],
816: ['A|10:1/13.0',
'A|10:1/13.0',
'A|3:1/6.0',
'A|3:1/6.0',
'A|5:1/2.0',
'A|6:1/5.0',
'A|6:1/4.0',
'A|7:1/10.0',
'A|7:1/10.0',
'A|8:1/7.0'],
822: ['A|10:1/13.0',
'A|10:1/13.0',
'A|3:1/6.0',
'A|3:1/6.0',
'A|5:1/2.0',
'A|5:1/2.0',
'A|6:1/5.0',
'A|6:1/4.0',
'A|7:1/10.0',
'A|7:1/10.0'],
849: ['A|10:1/13.0',
'A|10:1/13.0',
'A|3:1/6.0',
'A|3:1/6.0',
'A|5:1/3.0',
'A|5:1/2.0',
'A|6:1/5.0',
'A|6:1/5.0',
'A|7:1/10.0',
'A|7:1/10.0'],
820: ['A|10:1/13.0',
'A|10:1/13.0',
'A|3:1/6.0',
'A|3:1/6.0',
'A|5:1/5.0',
'A|5:1/2.0',
'A|6:1/5.0',
'A|6:1/5.0',
'A|7:1/10.0',
'A|7:1/10.0'],
754: ['A|10:1/13.0',
'A|10:1/13.0',
'A|3:1/6.0',
'A|3:1/6.0',
'A|5:1/3.0',
'A|5:1/2.0',
'A|6:1/5.0',
'A|6:1/5.0',
'A|7:1/10.0',
'A|7:1/10.0'],
1031: ['A|10:1/7.0',
'A|12:1/2.0',
'A|5:1/10.0',
'A|5:1/2.0',
'A|6:1/12.0',
'A|6:1/11.0',
'A|6:1/4.0',
'A|7:1/9.0',
'A|7:1/6.0',
'A|9:1/2.0'],
1094: ['A|10:1/7.0',
'A|12:1/2.0',
'A|5:1/9.0',
'A|6:1/11.0',
'A|6:1/4.0',
'A|7:1/9.0',
'A|7:1/4.0',
'A|8:1/4.0',
'A|8:1/3.0',
'A|9:1/2.0'],
1008: ['A|10:1/7.0',
'A|12:1/2.0',
'A|5:1/9.0',
'A|5:1/4.0',
'A|6:1/11.0',
'A|6:1/4.0',
'A|7:1/9.0',
'A|7:1/9.0',
'A|8:1/4.0',
'A|9:1/2.0'],
1089: ['A|10:1/7.0',
'A|12:1/2.0',
'A|5:1/9.0',
'A|5:1/2.0',
'A|6:1/11.0',
'A|6:1/6.0',
'A|7:1/9.0',
'A|7:1/3.0',
'A|8:1/4.0',
'A|9:1/2.0'],
1044: ['A|10:1/6.0',
'A|10:1/6.0',
'A|5:1/4.0',
'A|5:1/4.0',
'A|6:1/10.0',
'A|6:1/9.0',
'A|6:1/9.0',
'A|7:1/8.0',
'A|7:1/8.0',
'A|8:1/3.0']}}
which looks like this:
unit Date_job Vector
59 unit1 2021-06-07 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/3.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/5.0, B|7:1/5.0]
662 unit1 2021-06-14 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|5:1/8.0, B|6:1/5.0, B|7:1/5.0]
680 unit1 2021-07-05 [A|14:1/9.0, A|14:1/4.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/5.0, B|7:1/5.0]
725 unit1 2021-07-26 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/3.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/5.0, B|7:1/5.0]
709 unit1 2021-08-30 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/3.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/5.0, B|7:1/5.0]
703 unit1 2021-10-11 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/4.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/6.0, B|7:1/5.0]
653 unit1 2021-10-18 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/4.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/6.0, B|7:1/5.0]
807 unit4 2021-07-19 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|4:1/2.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0]
825 unit4 2021-07-26 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/2.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0]
778 unit4 2021-08-23 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0, A|8:1/7.0]
816 unit4 2021-08-30 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/2.0, A|6:1/5.0, A|6:1/4.0, A|7:1/10.0, A|7:1/10.0, A|8:1/7.0]
822 unit4 2021-09-06 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/2.0, A|5:1/2.0, A|6:1/5.0, A|6:1/4.0, A|7:1/10.0, A|7:1/10.0]
849 unit4 2021-09-27 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/3.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0]
820 unit4 2021-10-04 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/5.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0]
754 unit4 2021-10-18 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/3.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0]
1031 unit3 2021-09-06 [A|10:1/7.0, A|12:1/2.0, A|5:1/10.0, A|5:1/2.0, A|6:1/12.0, A|6:1/11.0, A|6:1/4.0, A|7:1/9.0, A|7:1/6.0, A|9:1/2.0]
1094 unit2 2021-07-26 [A|10:1/7.0, A|12:1/2.0, A|5:1/9.0, A|6:1/11.0, A|6:1/4.0, A|7:1/9.0, A|7:1/4.0, A|8:1/4.0, A|8:1/3.0, A|9:1/2.0]
1008 unit2 2021-08-09 [A|10:1/7.0, A|12:1/2.0, A|5:1/9.0, A|5:1/4.0, A|6:1/11.0, A|6:1/4.0, A|7:1/9.0, A|7:1/9.0, A|8:1/4.0, A|9:1/2.0]
1089 unit2 2021-10-04 [A|10:1/7.0, A|12:1/2.0, A|5:1/9.0, A|5:1/2.0, A|6:1/11.0, A|6:1/6.0, A|7:1/9.0, A|7:1/3.0, A|8:1/4.0, A|9:1/2.0]
1044 unit5 2021-06-14 [A|10:1/6.0, A|10:1/6.0, A|5:1/4.0, A|5:1/4.0, A|6:1/10.0, A|6:1/9.0, A|6:1/9.0, A|7:1/8.0, A|7:1/8.0, A|8:1/3.0]
The last column is a vector, here written as a list. As you can see, this vector can change over time for the same unit. I want to be able to tag the change between rows for each unit. I know how to do this for the entire dataframe but I haven't been able to apply groupby
to this.
Here's what I know how to do:
df2["VectorChanged"] = df2["Vector"].shift(1, fill_value=df2["Vector"].head(1)) != df2["Vector"]
But, as you understand, it'll not a change of vector when units are changed (which is true but not applicable since we are not looking at the same unit):
unit Date_job Vector VectorChanged
59 unit1 2021-06-07 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/3.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/5.0, B|7:1/5.0] False
662 unit1 2021-06-14 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|5:1/8.0, B|6:1/5.0, B|7:1/5.0] True
680 unit1 2021-07-05 [A|14:1/9.0, A|14:1/4.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/5.0, B|7:1/5.0] True
725 unit1 2021-07-26 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/3.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/5.0, B|7:1/5.0] True
709 unit1 2021-08-30 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/3.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/5.0, B|7:1/5.0] False
703 unit1 2021-10-11 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/4.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/6.0, B|7:1/5.0] True
653 unit1 2021-10-18 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/4.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/6.0, B|7:1/5.0] False
807 unit4 2021-07-19 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|4:1/2.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0] True
825 unit4 2021-07-26 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/2.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0] True
778 unit4 2021-08-23 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0, A|8:1/7.0] True
816 unit4 2021-08-30 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/2.0, A|6:1/5.0, A|6:1/4.0, A|7:1/10.0, A|7:1/10.0, A|8:1/7.0] True
822 unit4 2021-09-06 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/2.0, A|5:1/2.0, A|6:1/5.0, A|6:1/4.0, A|7:1/10.0, A|7:1/10.0] True
849 unit4 2021-09-27 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/3.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0] True
820 unit4 2021-10-04 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/5.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0] True
754 unit4 2021-10-18 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/3.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0] True
1031 unit3 2021-09-06 [A|10:1/7.0, A|12:1/2.0, A|5:1/10.0, A|5:1/2.0, A|6:1/12.0, A|6:1/11.0, A|6:1/4.0, A|7:1/9.0, A|7:1/6.0, A|9:1/2.0] True
1094 unit2 2021-07-26 [A|10:1/7.0, A|12:1/2.0, A|5:1/9.0, A|6:1/11.0, A|6:1/4.0, A|7:1/9.0, A|7:1/4.0, A|8:1/4.0, A|8:1/3.0, A|9:1/2.0] True
1008 unit2 2021-08-09 [A|10:1/7.0, A|12:1/2.0, A|5:1/9.0, A|5:1/4.0, A|6:1/11.0, A|6:1/4.0, A|7:1/9.0, A|7:1/9.0, A|8:1/4.0, A|9:1/2.0] True
1089 unit2 2021-10-04 [A|10:1/7.0, A|12:1/2.0, A|5:1/9.0, A|5:1/2.0, A|6:1/11.0, A|6:1/6.0, A|7:1/9.0, A|7:1/3.0, A|8:1/4.0, A|9:1/2.0] True
1044 unit5 2021-06-14 [A|10:1/6.0, A|10:1/6.0, A|5:1/4.0, A|5:1/4.0, A|6:1/10.0, A|6:1/9.0, A|6:1/9.0, A|7:1/8.0, A|7:1/8.0, A|8:1/3.0] True
The expected output is:
unit Date_job Vector VectorChanged
59 unit1 2021-06-07 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/3.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/5.0, B|7:1/5.0] False
662 unit1 2021-06-14 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|5:1/8.0, B|6:1/5.0, B|7:1/5.0] True
680 unit1 2021-07-05 [A|14:1/9.0, A|14:1/4.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/5.0, B|7:1/5.0] True
725 unit1 2021-07-26 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/3.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/5.0, B|7:1/5.0] True
709 unit1 2021-08-30 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/3.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/5.0, B|7:1/5.0] False
703 unit1 2021-10-11 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/4.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/6.0, B|7:1/5.0] True
653 unit1 2021-10-18 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/4.0, B|2:1/4.0, B|3:1/12.0, B|4:1/12.0, B|5:1/9.0, B|6:1/6.0, B|7:1/5.0] False
807 unit4 2021-07-19 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|4:1/2.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0] False
825 unit4 2021-07-26 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/2.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0] True
778 unit4 2021-08-23 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0, A|8:1/7.0] True
816 unit4 2021-08-30 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/2.0, A|6:1/5.0, A|6:1/4.0, A|7:1/10.0, A|7:1/10.0, A|8:1/7.0] True
822 unit4 2021-09-06 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/2.0, A|5:1/2.0, A|6:1/5.0, A|6:1/4.0, A|7:1/10.0, A|7:1/10.0] True
849 unit4 2021-09-27 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/3.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0] True
820 unit4 2021-10-04 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/5.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0] True
754 unit4 2021-10-18 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.0, A|5:1/3.0, A|5:1/2.0, A|6:1/5.0, A|6:1/5.0, A|7:1/10.0, A|7:1/10.0] True
1031 unit3 2021-09-06 [A|10:1/7.0, A|12:1/2.0, A|5:1/10.0, A|5:1/2.0, A|6:1/12.0, A|6:1/11.0, A|6:1/4.0, A|7:1/9.0, A|7:1/6.0, A|9:1/2.0] False
1094 unit2 2021-07-26 [A|10:1/7.0, A|12:1/2.0, A|5:1/9.0, A|6:1/11.0, A|6:1/4.0, A|7:1/9.0, A|7:1/4.0, A|8:1/4.0, A|8:1/3.0, A|9:1/2.0] True
1008 unit2 2021-08-09 [A|10:1/7.0, A|12:1/2.0, A|5:1/9.0, A|5:1/4.0, A|6:1/11.0, A|6:1/4.0, A|7:1/9.0, A|7:1/9.0, A|8:1/4.0, A|9:1/2.0] True
1089 unit2 2021-10-04 [A|10:1/7.0, A|12:1/2.0, A|5:1/9.0, A|5:1/2.0, A|6:1/11.0, A|6:1/6.0, A|7:1/9.0, A|7:1/3.0, A|8:1/4.0, A|9:1/2.0] True
1044 unit5 2021-06-14 [A|10:1/6.0, A|10:1/6.0, A|5:1/4.0, A|5:1/4.0, A|6:1/10.0, A|6:1/9.0, A|6:1/9.0, A|7:1/8.0, A|7:1/8.0, A|8:1/3.0] False
That is, the first row of each group i False
since there is nothing to compare to, i.e., no change.
Any help on this will be greatly appreciated.
CodePudding user response:
Compare shifted values and then set False
for first values by unit
use if performance is important, here groupby
is not necessary:
df["Vector Changed"] = (df["Vector"].shift()
.ne(df['Vector'])
.where(df['unit'].duplicated(), False))
EDIT:
If possible consecutive duplicates by unit
column use:
m = df["Vector"].shift().ne(df['Vector'])
g = df["unit"].shift().ne(df['unit']).cumsum()
df["Vector Changed"] = m.where(g.duplicated(), False)
print (df)
unit Date_job Vector \
59 unit1 2021-06-07 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/...
662 unit1 2021-06-14 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/...
680 unit1 2021-07-05 [A|14:1/9.0, A|14:1/4.0, A|15:1/11.0, A|16:1/1...
725 unit1 2021-07-26 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/...
709 unit1 2021-08-30 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/...
703 unit1 2021-10-11 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/...
653 unit1 2021-10-18 [A|14:1/9.0, A|15:1/11.0, A|16:1/12.0, B|11:1/...
807 unit4 2021-07-19 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6....
825 unit4 2021-07-26 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6....
778 unit4 2021-08-23 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6....
816 unit4 2021-08-30 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6....
822 unit1 2021-09-06 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.... <- again 1 groups
849 unit1 2021-09-27 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.... <- again 1 groups
820 unit1 2021-10-04 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.... <- again 1 groups
754 unit4 2021-10-18 [A|10:1/13.0, A|10:1/13.0, A|3:1/6.0, A|3:1/6.... <- again 4 groups
1031 unit3 2021-09-06 [A|10:1/7.0, A|12:1/2.0, A|5:1/10.0, A|5:1/2.0...
1094 unit2 2021-07-26 [A|10:1/7.0, A|12:1/2.0, A|5:1/9.0, A|6:1/11.0...
1008 unit2 2021-08-09 [A|10:1/7.0, A|12:1/2.0, A|5:1/9.0, A|5:1/4.0,...
1089 unit2 2021-10-04 [A|10:1/7.0, A|12:1/2.0, A|5:1/9.0, A|5:1/2.0,...
1044 unit5 2021-06-14 [A|10:1/6.0, A|10:1/6.0, A|5:1/4.0, A|5:1/4.0,...
Vector Changed
59 False
662 True
680 True
725 True
709 False
703 True
653 False
807 False
825 True
778 True
816 True
822 False
849 True
820 True
754 False
1031 False
1094 False
1008 True
1089 True
1044 False
CodePudding user response:
Try with groupby
and transform
like so:
df["Vector Changed"] = df.groupby("unit")["Vector"].transform(lambda x: x!=x.shift().bfill())
df["Vector Changed"] = df["Vector Changed"].mask(df.groupby("unit")["Vector"].transform("count")==1, False)
>>> df
unit Date_job Vector Vector Changed
59 unit1 2021-06-07 ... False
662 unit1 2021-06-14 ... True
680 unit1 2021-07-05 ... True
725 unit1 2021-07-26 ... True
709 unit1 2021-08-30 ... False
703 unit1 2021-10-11 ... True
653 unit1 2021-10-18 ... False
807 unit4 2021-07-19 ... False
825 unit4 2021-07-26 ... True
778 unit4 2021-08-23 ... True
816 unit4 2021-08-30 ... True
822 unit4 2021-09-06 ... True
849 unit4 2021-09-27 ... True
820 unit4 2021-10-04 ... True
754 unit4 2021-10-18 ... True
1031 unit3 2021-09-06 ... False
1094 unit2 2021-07-26 ... False
1008 unit2 2021-08-09 ... True
1089 unit2 2021-10-04 ... True
1044 unit5 2021-06-14 ... False