Here is my image:
I can recognize the words:
I need to check if there is a line with specific text in the image and highlight this line with a rectangle. For example. I check if there is "times, it was the worst". And then I expect to see:
How can I achieve this?
My code:
import cv2 as cv
from pytesseract import pytesseract, Output
unchanged_image = cv.imread("i1Abv.png", cv.IMREAD_UNCHANGED)
initial_image = cv.imread("i1Abv.png", 0)
cv.imshow('', initial_image)
cv.waitKey(0)
ret, image = cv.threshold(initial_image, 100, 255, cv.THRESH_BINARY)
cv.imshow('', image)
cv.waitKey(0)
results = pytesseract.image_to_data(image, output_type=Output.DICT, config="--psm 6")
for i in range(0, len(results["text"])):
# extract the bounding box coordinates of the text region from
# the current result
x = results["left"][i]
y = results["top"][i]
w = results["width"][i]
h = results["height"][i]
# extract the OCR text itself along with the confidence of the
# text localization
text = results["text"][i]
conf = float(results["conf"][i])
# filter out weak confidence text localizations
if conf > 10:
# strip out non-ASCII text, so we can draw the text on the image
# using OpenCV, then draw a bounding box around the text along
# with the text itself
cv.rectangle(unchanged_image, (x, y), (x w, y h), (0, 255, 0), 2)
cv.putText(img=unchanged_image, text=text, org=(x, y), fontFace=cv.FONT_HERSHEY_COMPLEX,
fontScale=0.3, color=(36, 0, 255), thickness=1)
cv.imshow('', unchanged_image)
cv.waitKey(0)
CodePudding user response:
pytesseract.image_to_data
provides line_num
for each recognized text block. You can group all the recognitions by line_num
and then concatenate words into a text line. Also you need to find a bounding box that includes all the text boxes of each word (you can get them using left
, top
, width
, height
). You can find it with
Code:
import cv2
import numpy as np
import pytesseract
original_image = cv2.imread("text_line.png")
gray = cv2.cvtColor(original_image, cv2.COLOR_BGR2GRAY)
target_text = "times, it was the worst"
df = pytesseract.image_to_data(gray, lang="eng", config="--psm 6", output_type=pytesseract.Output.DATAFRAME)
# group recognized words by lines
for line_num, words_per_line in df.groupby("line_num"):
# filter out words with a low confidence
words_per_line = words_per_line[words_per_line["conf"] >= 5]
if not len(words_per_line):
continue
words = words_per_line["text"].values
line = " ".join(words)
print(f"{line_num} '{line}'")
if target_text in line:
print("Found a line with specified text")
word_boxes = []
for left, top, width, height in words_per_line[["left", "top", "width", "height"]].values:
word_boxes.append((left, top))
word_boxes.append((left width, top height))
x, y, w, h = cv2.boundingRect(np.array(word_boxes))
cv2.rectangle(original_image, (x, y), (x w, y h), color=(255, 0, 255), thickness=3)
cv2.imwrite("out.jpg", original_image)