I'm processing the img, I capture a picture from production line, then extract the part I want from image, cut the img into small pieces, and detect img to text
Quesion: one of the image is disappear, for now I know it called "output_y:389_x:150.png" cause I name them by their (x,y) value in original img
- the script
import cv2
import numpy as np
from os import listdir
from os.path import isfile, join
from PIL import Image
import pytesseract
import cv2
import numpy as np
from os import listdir
from os.path import isfile, join
image = cv2.imread("/home/student_DC/desktop/test_11_8/original.png")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
white_bg = 255*np.ones_like(image)
ret, thresh = cv2.threshold(gray, 60, 255, cv2.THRESH_BINARY_INV)
blur = cv2.medianBlur(thresh, 1)
kernel = np.ones((10, 20), np.uint8)
img_dilation = cv2.dilate(blur, kernel, iterations=1)
im2, ctrs, hier = cv2.findContours(img_dilation.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
j = 1
xy_list = []
for i, ctr in enumerate(sorted_ctrs):
# Get bounding box
x, y, w, h = cv2.boundingRect(ctr)
roi = image[y:y h, x:x w]
if (h > 50 and w > 50) and h < 200:
cv2.rectangle(image, (x, y), (x w, y h), (255, 255, 255), 1)
# cv2.imshow('{}.png'.format(i), roi)
print(x , y )
if 25 < x < 65:
x = 45
elif 130 < x < 170:
x = 150
elif 235 < x < 275:
x = 255
elif 340 < x < 380:
x = 360
elif 445 < x < 485:
x = 465
elif 550 < x < 590:
x = 570
else:
x = 0
if 27 < y < 67:
y = 47
elif 139 < y < 179:
y = 159
elif 253 < y < 293:
y = 273
elif 369 < y < 409:
y = 389
elif 484 < y < 524:
y = 504
else:
y = 0
print("new number" , x , y )
tem_list_x_and_y = [ ]
if (x != 0) and (y != 0):
cv2.imwrite(f"/home/student_DC/desktop/test_11_8/output01_test11_9/output_y:{y}_x:{x}.png", roi)
tem_list_x_and_y.append(x)
tem_list_x_and_y.append(y)
xy_list.append(tem_list_x_and_y)
#--- paste ROIs on image with white background
# white_bg[y:y h, x:x w] = roi
j =1
print("len is : " ,len(xy_list))
aaa_list = (sorted(xy_list , key=lambda k: [k[1], k[0]]))
print(aaa_list)
print(" ")
cv2.imwrite(f"/home/student_DC/desktop/test_11_8/output01_test11_9/final_output_{x}_{y}.png", white_bg)
cv2.waitKey(0)
cv2.destroyAllWindows()
mypath = "/home/student_DC/desktop/test_11_8/output01_test11_9"
onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]
# print("output01_test11_9 file data")
# print(onlyfiles)
# print(type(onlyfiles))
i = 1
number_of_onlyfiles = len(onlyfiles)
while i < number_of_onlyfiles:
each_file_path = '/home/student_DC/desktop/test_11_8/output01_test11_9/' onlyfiles[i]
image = cv2.imread(each_file_path)
y=51
x=25
h=16
w=61
crop = image[y:y h, x:x w]
cv2.imwrite(f"/home/student_DC/desktop/test_11_8/output02_test11_9/" onlyfiles[i] , crop)
i =1
ordered_png_name_list = []
for i in aaa_list:
ordered_png_name_list.append('output_y:' str(i[1]) '_x:' str(i[0]) '.png')
print(ordered_png_name_list)
i = 0
listOfElems = []
listOfDuplicate = []
Number_of_onlyfiles = len(onlyfiles)
while i < Number_of_onlyfiles :
each_file_path = '/home/student_DC/desktop/test_11_8/output02_test11_9/' ordered_png_name_list[i]
image = Image.open(each_file_path)
text = pytesseract.image_to_string(image, lang='eng')
print("= = = = = = = = =")
print(ordered_png_name_list[i])
print(text)
if text not in listOfElems:
listOfElems.append(text)
else:
print("Duplicate")
listOfDuplicate.append(each_file_path)
i =1
print(listOfElems)
print(listOfDuplicate)
print("= = = = ")
print ("each_Duplicate_file_path")
j =0
Number_of_listOfDuplicate = len(listOfDuplicate)
while j < Number_of_listOfDuplicate :
each_Duplicate_file_path = listOfDuplicate[j]
image = Image.open(each_Duplicate_file_path)
print (each_Duplicate_file_path)
j =1
- the error:
Traceback (most recent call last):
File "/home/student_DC/desktop/test_11_8/locate_sticker_xy_order加1by1.py", line 154, in <module>
image = Image.open(each_file_path)
File "/home/student_DC/miniconda3/lib/python3.9/site-packages/PIL/Image.py", line 3131, in open
fp = builtins.open(filename, "rb")
FileNotFoundError: [Errno 2] No such file or directory: '/student_DC/desktop/test_11_8/output02_test11_9/output_y:389_x:150.png'
if I just individualy do with img "output_y:389_x:150.png",it works fine
import cv2
import numpy as np
from os import listdir
from os.path import isfile, join
from PIL import Image
import pytesseract
import cv2
import numpy as np
from os import listdir
from os.path import isfile, join
image = cv2.imread('/home/student_DC/desktop/test_11_8/output01_test11_9/output_y:389_x:150.png')
y=51
x=25
h=16
w=61
crop = image[y:y h, x:x w]
cv2.imwrite(f"/home/student_DC/desktop/test_11_8/output02_test11_9/output_y:389_x:150.png" , crop)
the image link: https://imgur.com/a/U8bn9DX
(pic 2) in link, if I just singly do with "output_y:389_x:150.png" works fine
but like (pic 1) for loop do with total 30 image from "output 1 file" to "output 2 file" , the "output_y:389_x:150.png" img can missing, how come?
CodePudding user response:
Here's a version of your code that makes the loops more consistent. In general, it's a very bad idea to include hard-coded paths in code like this (that is, "/home/student_DC/desktop/test_11_8", etc.). If you are going to run this code from the "test_11_8" directory, then you can remove that prefix from all of your paths.
import os
import cv2
import numpy as np
from PIL import Image
import pytesseract
image = cv2.imread("/home/student_DC/desktop/test_11_8/original.png")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
white_bg = 255*np.ones_like(image)
ret, thresh = cv2.threshold(gray, 60, 255, cv2.THRESH_BINARY_INV)
blur = cv2.medianBlur(thresh, 1)
kernel = np.ones((10, 20), np.uint8)
img_dilation = cv2.dilate(blur, kernel, iterations=1)
im2, ctrs, hier = cv2.findContours(img_dilation.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
xy_list = []
for i, ctr in enumerate(sorted_ctrs):
# Get bounding box
x, y, w, h = cv2.boundingRect(ctr)
roi = image[y:y h, x:x w]
if (h > 50 and w > 50) and h < 200:
cv2.rectangle(image, (x, y), (x w, y h), (255, 255, 255), 1)
print(x , y )
for xc in (45,150,255,360,465,570):
if xc-20 < x < xc 20:
x = xc
break
else:
x = 0
for yc in (47,159,272,389,504):
if yc-20 < y < yc 20:
y = yc
break
else:
y = 0
print("new number" , x , y )
tem_list_x_and_y = [ ]
if (x != 0) and (y != 0):
cv2.imwrite(f"/home/student_DC/desktop/test_11_8/output01_test11_9/output_y:{y}_x:{x}.png", roi)
tem_list_x_and_y.append(x)
tem_list_x_and_y.append(y)
xy_list.append(tem_list_x_and_y)
print("len is : " ,len(xy_list))
aaa_list = (sorted(xy_list , key=lambda k: [k[1], k[0]]))
print(aaa_list)
print(" ")
cv2.imwrite(f"/home/student_DC/desktop/test_11_8/output01_test11_9/final_output_{x}_{y}.png", white_bg)
cv2.waitKey(0)
cv2.destroyAllWindows()
mypath = "/home/student_DC/desktop/test_11_8/output01_test11_9"
onlyfiles = [f for f in os.listdir(mypath) if os.path.isfile(os.path.join(mypath, f))]
for name in onlyfiles:
each_file_path = '/home/student_DC/desktop/test_11_8/output01_test11_9/' name
image = cv2.imread(each_file_path)
y=51
x=25
h=16
w=61
crop = image[y:y h, x:x w]
cv2.imwrite(f"/home/student_DC/desktop/test_11_8/output02_test11_9/" name, crop)
i =1
ordered_png_name_list = []
for i in aaa_list:
ordered_png_name_list.append('output_y:' str(i[1]) '_x:' str(i[0]) '.png')
print(ordered_png_name_list)
listOfElems = []
listOfDuplicate = []
for name in ordered_png_name_list:
each_file_path = '/home/student_DC/desktop/test_11_8/output02_test11_9/' name
image = Image.open(each_file_path)
text = pytesseract.image_to_string(image, lang='eng')
print("= = = = = = = = =")
print(name)
print(text)
if text not in listOfElems:
listOfElems.append(text)
else:
print("Duplicate")
listOfDuplicate.append(each_file_path)
print(listOfElems)
print(listOfDuplicate)
print("= = = = ")
print ("each_Duplicate_file_path")
for path in listOfDuplicate:
print (path)