I have a list of lists (path_list
; actually this is a path object) and another list (y_path
).
Now, I want to remove
and filter
those two lists based on another list remove_all_list
.
The remove function (not in
) is working fine. But, when I am using the in
function I am getting an empty list.
Code is given below
remove_all_list = ["Image_034_S_0735_Img_Na__Ma_F_", "Image_033_S_0733_Img_Na__Ma_F_"]
X_test = path_list.copy()
y_test = y_path.copy()
for remove_item in remove_all_list:
path_list = [[item for item in sub_list if remove_item not in item.name] for sub_list in path_list]
y_path = [item for item in y_path if remove_item not in item.name]
X_test = [[item for item in sub_list if remove_item in item.name] for sub_list in X_test]
y_test = [item for item in y_test if remove_item in item.name]
X_train_val = [x for x in path_list if x] # Removing empty list
X_test_all = [x for x in X_test if x] # Removing empty list
The shape I am getting from the loop
print(np.shape(y_path), np.shape(X_train_val), np.shape(y_test), np.shape(X_test_all))
(2,) (2, 2) (0,) (0,)
But, the expected shape is (2,) (2, 2) (2,) (2, 2)
. Any ideas are appreciated.
Reproducible data
import numpy as np
from pathlib import Path
path_list = [
[Path('/home/0_knowle/Desktop/project/data/train/Image/Folder_1/Image_033_S_0724_Img_Na__Ma_F_.png'),
Path('/home/0_knowle/Desktop/project/data/train/Image/Folder_2/Image_033_S_0724_Img_Na__Ma_F_.png')],
[Path('/home/0_knowle/Desktop/project/data/train/Image/Folder_1/Image_033_S_0733_Img_Na__Ma_F_.png'),
Path('/home/0_knowle/Desktop/project/data/train/Image/Folder_2/Image_033_S_0733_Img_Na__Ma_F_.png')],
[Path('/home/0_knowle/Desktop/project/data/train/Image/Folder_1/Image_034_S_0734_Img_Na__Ma_F_.png'),
Path('/home/0_knowle/Desktop/project/data/train/Image/Folder_2/Image_034_S_0734_Img_Na__Ma_F_.png')],
[Path('/home/0_knowle/Desktop/project/data/train/Image/Folder_1/Image_034_S_0735_Img_Na__Ma_F_.png'),
Path('/home/0_knowle/Desktop/project/data/train/Image/Folder_2/Image_034_S_0735_Img_Na__Ma_F_.png')]
]
y_path = [
Path('/home/0_knowle/Desktop/project/data/train/Image/Folder_1/Image_033_S_0724_Img_Na__Ma_F_.png'),
Path('/home/0_knowle/Desktop/project/data/train/Image/Folder_1/Image_033_S_0733_Img_Na__Ma_F_.png'),
Path('/home/0_knowle/Desktop/project/data/train/Image/Folder_1/Image_034_S_0734_Img_Na__Ma_F_.png'),
Path('/home/0_knowle/Desktop/project/data/train/Image/Folder_1/Image_034_S_0735_Img_Na__Ma_F_.png'),
]
CodePudding user response:
I believe this accomplished what you want. It's more sensible to filter through the pathlist as the outer loop, then decide line by line whether it's a winner or a loser, sorting into two piles:
from pprint import pprint
from pathlib import Path
path_list = [
[Path('/home/0_knowle/Desktop/project/data/train/Image/Folder_1/Image_033_S_0724_Img_Na__Ma_F_.png'),
Path('/home/0_knowle/Desktop/project/data/train/Image/Folder_2/Image_033_S_0724_Img_Na__Ma_F_.png')],
[Path('/home/0_knowle/Desktop/project/data/train/Image/Folder_1/Image_033_S_0733_Img_Na__Ma_F_.png'),
Path('/home/0_knowle/Desktop/project/data/train/Image/Folder_2/Image_033_S_0733_Img_Na__Ma_F_.png')],
[Path('/home/0_knowle/Desktop/project/data/train/Image/Folder_1/Image_034_S_0734_Img_Na__Ma_F_.png'),
Path('/home/0_knowle/Desktop/project/data/train/Image/Folder_2/Image_034_S_0734_Img_Na__Ma_F_.png')],
[Path('/home/0_knowle/Desktop/project/data/train/Image/Folder_1/Image_034_S_0735_Img_Na__Ma_F_.png'),
Path('/home/0_knowle/Desktop/project/data/train/Image/Folder_2/Image_034_S_0735_Img_Na__Ma_F_.png')]
]
y_path = [
Path('/home/0_knowle/Desktop/project/data/train/Image/Folder_1/Image_033_S_0724_Img_Na__Ma_F_.png'),
Path('/home/0_knowle/Desktop/project/data/train/Image/Folder_1/Image_033_S_0733_Img_Na__Ma_F_.png'),
Path('/home/0_knowle/Desktop/project/data/train/Image/Folder_1/Image_034_S_0734_Img_Na__Ma_F_.png'),
Path('/home/0_knowle/Desktop/project/data/train/Image/Folder_1/Image_034_S_0735_Img_Na__Ma_F_.png'),
]
remove_all_list = ["Image_034_S_0735_Img_Na__Ma_F_", "Image_033_S_0733_Img_Na__Ma_F_"]
X_keep = []
X_test = []
for sub_list in path_list:
if any(all(remove in item.name for item in sub_list) for remove in remove_all_list):
X_test.append( sub_list )
else:
X_keep.append( sub_list )
y_keep = []
y_test = []
for item in y_path:
if any(remove in item.name for remove in remove_all_list):
y_test.append( item )
else:
y_keep.append( item )
print("PL")
pprint(X_keep)
print("X")
pprint(X_test)
print("yk")
pprint(y_keep)
print("y")
pprint(y_test)
Output:
PL
[[WindowsPath('/home/0_knowle/Desktop/project/data/train/Image/Folder_1/Image_033_S_0724_Img_Na__Ma_F_.png'),
WindowsPath('/home/0_knowle/Desktop/project/data/train/Image/Folder_2/Image_033_S_0724_Img_Na__Ma_F_.png')],
[WindowsPath('/home/0_knowle/Desktop/project/data/train/Image/Folder_1/Image_034_S_0734_Img_Na__Ma_F_.png'),
WindowsPath('/home/0_knowle/Desktop/project/data/train/Image/Folder_2/Image_034_S_0734_Img_Na__Ma_F_.png')]]
X
[[WindowsPath('/home/0_knowle/Desktop/project/data/train/Image/Folder_1/Image_033_S_0733_Img_Na__Ma_F_.png'),
WindowsPath('/home/0_knowle/Desktop/project/data/train/Image/Folder_2/Image_033_S_0733_Img_Na__Ma_F_.png')],
[WindowsPath('/home/0_knowle/Desktop/project/data/train/Image/Folder_1/Image_034_S_0735_Img_Na__Ma_F_.png'),
WindowsPath('/home/0_knowle/Desktop/project/data/train/Image/Folder_2/Image_034_S_0735_Img_Na__Ma_F_.png')]]
yk
[WindowsPath('/home/0_knowle/Desktop/project/data/train/Image/Folder_1/Image_033_S_0724_Img_Na__Ma_F_.png'),
WindowsPath('/home/0_knowle/Desktop/project/data/train/Image/Folder_1/Image_034_S_0734_Img_Na__Ma_F_.png')]
y
[WindowsPath('/home/0_knowle/Desktop/project/data/train/Image/Folder_1/Image_033_S_0733_Img_Na__Ma_F_.png'),
WindowsPath('/home/0_knowle/Desktop/project/data/train/Image/Folder_1/Image_034_S_0735_Img_Na__Ma_F_.png')]