I have an image of mathematical formula and I need to parse symbols of it, but also save where they were (center of each symbol). For example image like this needs to be transformed into 15 different images 75x75, 1 per each symbol.
What I have tried is:
- Transform to gray and then binary: pixels close to white(> 250) becomes 255 and other become 0
- Use BNF to find all components and then transform them into images (with rescaling and everything else)
But I am sure it is not the best way to do it, maybe there is standard approach for this problem exist?
Here is my code:
class Parser:
def init(self, targetSizes=(75, 75), binaryThreshold=cv2.THRESH_BINARY_INV cv2.THRESH_OTSU,
scaleFully=False, scaleFullyRate=0.9, whiteThreshold=249, blackThreshold=0,
rescalingInterpolation=cv2.INTER_AREA, pixelsInImageThreshold=20,
rescaleOriginalImage=True, rescaleToAtLeast=200, rescaleToAtMaximum=1000):
self.targetWidth = targetSizes[0]
self.targetHeight = targetSizes[1]
self.binaryThreshold = binaryThreshold
self.scaleFully = scaleFully
self.scaleFullyRate = scaleFullyRate
self.whiteThreshold = whiteThreshold
self.blackThreshold = blackThreshold
self.rescalingInterpolation = rescalingInterpolation
self.pixelsInIMageThreshold = pixelsInImageThreshold
self.rescaleOriginalImage = rescaleOriginalImage
self.rescaleOriginalMin = rescaleToAtLeast
self.rescaleOriginalMax = rescaleToAtMaximum
self.parseMode = 1
def _imageToBinary(self, image, zeroValueTrash=0, oneValueTrash=253):
grayImage = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
ret, binary = cv2.threshold(grayImage, self.blackThreshold, self.whiteThreshold, self.binaryThreshold)
# cv2.imwrite("Test.png", binary)
return binary
def _BNF(self, binaryImage):
Q = MyQueue()
whitePixels = []
gg = 0
for i in range(len(binaryImage)):
for j in range(len(binaryImage[i])):
if binaryImage[i][j] > self.whiteThreshold-1:
Q.put((i, j))
binaryImage[i][j] = 0
obj = []
gg = 1
while not Q.empty():
i, j = Q.pop()
obj.append((i, j))
if i 1 < len(binaryImage) and binaryImage[i 1][j] != 0:
Q.put((i 1, j))
binaryImage[i 1][j] = 0
if j - 1 > 0 and binaryImage[i][j - 1] != 0:
Q.put((i, j - 1))
binaryImage[i][j - 1] = 0
if i - 1 > 0 and binaryImage[i - 1][j] != 0:
Q.put((i - 1, j))
binaryImage[i - 1][j] = 0
if j 1 < len(binaryImage[i]) and binaryImage[i][j 1] != 0:
Q.put((i, j 1))
binaryImage[i][j 1] = 0
if self.parseMode == 1:
if i 1 < len(binaryImage) and j 1 < len(binaryImage[i 1]) and binaryImage[i 1][j 1] != 0:
Q.put((i 1, j 1))
binaryImage[i 1][j 1] = 0
if i 1 < len(binaryImage) and j - 1 > 0 and binaryImage[i 1][j-1] != 0:
Q.put((i 1, j-1))
binaryImage[i 1][j-1] = 0
if i - 1 > 0 and j - 1 > 0 and binaryImage[i - 1][j - 1] != 0:
Q.put((i - 1, j - 1))
binaryImage[i - 1][j - 1] = 0
if i - 1 > 0 and j 1 < len(binaryImage[i-1]) and binaryImage[i - 1][j 1] != 0:
Q.put((i - 1, j 1))
binaryImage[i - 1][j 1] = 0
cv2.imwrite("tmp/{}.png".format(gg), binaryImage)
whitePixels.append(obj)
return whitePixels
def parseImage(self, image_path: str) -> list:
image = cv2.imread(image_path)
if self.rescaleOriginalImage:
image = self.scaleOriginal(image)
binary = self._imageToBinary(image)
whitePixels = self._BNF(binary)
return whitePixels
def isScaleable(self, imageShape):
return True
def scaleOriginal(self, image: np.ndarray):
# To be created
return image
@staticmethod
def _getImageAndCenterFromDotes(Dotes, originalImage=None):
i_mx, j_mx = -1, -1
i_mn, j_mn = 100500, 100500 # just big numbers
# finding upper right and lower left corner of image
for el in Dotes:
i, j = el
if i_mx < i:
i_mx = i
if j_mx < j:
j_mx = j
if j_mn > j:
j_mn = j
if i_mn > i:
i_mn = i
# updating image center
imageCenter = Point((i_mx i_mn) // 2, (j_mx j_mn) // 2)
# finding out size of image
width, height = i_mx - i_mn 1, j_mx - j_mn 1
image = np.zeros((width, height)) if originalImage is None else np.zeros((width, height, 3))
# recreating image from dotes
if originalImage is not None:
for el in Dotes:
i, j = el
image[i - i_mn][j - j_mn] = originalImage[i][j]
else:
for el in Dotes:
i, j = el
image[i - i_mn][j - j_mn] = 255
return image, imageCenter
def scaleParsedImage(self, image: np.ndarray):
"""
:param image: np.ndarray
:return: scaledImage np.ndarray
"""
width, height = image.shape if len(image.shape) == 2 else image.shape[0], image.shape[1]
newWidth = self.targetWidth if width > self.targetHeight else width
newHeight = self.targetHeight if height > self.targetHeight else height
if self.scaleFully and newHeight < self.targetHeight * self.scaleFullyRate and newWidth * self.scaleFullyRate:
scaleRate = min((self.targetWidth * self.scaleFullyRate / newWidth), (
self.targetHeight * self.scaleFullyRate / newHeight))
newWidth = math.ceil(newWidth * scaleRate)
newHeight = math.ceil(newHeight * scaleRate)
scaled = cv2.resize(image, (newHeight, newWidth), interpolation=self.rescalingInterpolation)
# pasting our scaled image in the middle
x_add, y_add = (self.targetWidth - newWidth) // 2, (self.targetHeight - newHeight) // 2
resized = np.zeros((self.targetWidth, self.targetHeight)) if len(image.shape) == 2 else np.zeros((self.targetWidth, self.targetHeight, 3))
for x in range(newWidth):
for y in range(newHeight):
resized[x x_add][y y_add] = scaled[x][y]
return resized
def parseAndConvert(self, image_name: str) -> list:
imagesInDotes = self.parseImage(image_name)
original = 255 - cv2.imread(image_name)
images = []
for dotes in imagesInDotes:
image = self._getImageAndCenterFromDotes(dotes, original)
images.append([self.scaleParsedImage(image[0]), image[1]])
rawImages = []
for image, center in images:
rawImages.append(RawImage(image, center))
return rawImages
CodePudding user response:
Have a look at the (imho not all too intuitively named) function cv.findContours()
:
https://docs.opencv.org/3.4/d4/d73/tutorial_py_contours_begin.html
It should do most things that you doing by hand right now out of the box, which is extracting and measuring binary objects.
If you encouter problems where a single symbol is made up of several objects (like i
, %
or "
), look into the morphological operations to merge them into a single one: erode()
, dilate()
, or open and close via morphologyEx()
(Tutorial here: https://docs.opencv.org/4.x/d9/d61/tutorial_py_morphological_ops.html).