Home > other >  Image parsing in python (connected components analysis)
Image parsing in python (connected components analysis)

Time:11-18

I have an image of mathematical formula and I need to parse symbols of it, but also save where they were (center of each symbol). For example image like this needs to be transformed into 15 different images 75x75, 1 per each symbol.

What I have tried is:

  1. Transform to gray and then binary: pixels close to white(> 250) becomes 255 and other become 0
  2. Use BNF to find all components and then transform them into images (with rescaling and everything else)

But I am sure it is not the best way to do it, maybe there is standard approach for this problem exist?

Here is my code:


class Parser:

    def init(self, targetSizes=(75, 75), binaryThreshold=cv2.THRESH_BINARY_INV   cv2.THRESH_OTSU,
                 scaleFully=False, scaleFullyRate=0.9, whiteThreshold=249, blackThreshold=0,
                 rescalingInterpolation=cv2.INTER_AREA, pixelsInImageThreshold=20,
                 rescaleOriginalImage=True, rescaleToAtLeast=200, rescaleToAtMaximum=1000):
        self.targetWidth = targetSizes[0]
        self.targetHeight = targetSizes[1]

        self.binaryThreshold = binaryThreshold

        self.scaleFully = scaleFully
        self.scaleFullyRate = scaleFullyRate

        self.whiteThreshold = whiteThreshold
        self.blackThreshold = blackThreshold

        self.rescalingInterpolation = rescalingInterpolation
        self.pixelsInIMageThreshold = pixelsInImageThreshold

        self.rescaleOriginalImage = rescaleOriginalImage
        self.rescaleOriginalMin = rescaleToAtLeast
        self.rescaleOriginalMax = rescaleToAtMaximum

        self.parseMode = 1

    def _imageToBinary(self, image, zeroValueTrash=0, oneValueTrash=253):
        grayImage = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

        ret, binary = cv2.threshold(grayImage, self.blackThreshold, self.whiteThreshold, self.binaryThreshold)
        # cv2.imwrite("Test.png", binary)
        return binary

    def _BNF(self, binaryImage):

        Q = MyQueue()
        whitePixels = []
        gg = 0
        for i in range(len(binaryImage)):
            for j in range(len(binaryImage[i])):
                if binaryImage[i][j] > self.whiteThreshold-1:
                    Q.put((i, j))
                    binaryImage[i][j] = 0
                    obj = []
                    gg  = 1
                    while not Q.empty():
                        i, j = Q.pop()

                        obj.append((i, j))

                        if i   1 < len(binaryImage) and binaryImage[i   1][j] != 0:
                            Q.put((i   1, j))
                            binaryImage[i   1][j] = 0

                        if j - 1 > 0 and binaryImage[i][j - 1] != 0:
                            Q.put((i, j - 1))
                            binaryImage[i][j - 1] = 0

                        if i - 1 > 0 and binaryImage[i - 1][j] != 0:
                            Q.put((i - 1, j))
                            binaryImage[i - 1][j] = 0

                        if j   1 < len(binaryImage[i]) and binaryImage[i][j   1] != 0:
                            Q.put((i, j   1))
                            binaryImage[i][j   1] = 0

                        if self.parseMode == 1:
                            if i   1 < len(binaryImage) and j 1 < len(binaryImage[i 1]) and binaryImage[i   1][j 1] != 0:
                                Q.put((i   1, j 1))
                                binaryImage[i   1][j 1] = 0
                            if i   1 < len(binaryImage) and j - 1 > 0 and binaryImage[i   1][j-1] != 0:
                                Q.put((i   1, j-1))
                                binaryImage[i   1][j-1] = 0
                            if i - 1 > 0 and j - 1 > 0 and binaryImage[i - 1][j - 1] != 0:
                                Q.put((i - 1, j - 1))
                                binaryImage[i - 1][j - 1] = 0
                            if i - 1 > 0 and j   1 < len(binaryImage[i-1]) and binaryImage[i - 1][j   1] != 0:
                                Q.put((i - 1, j   1))
                                binaryImage[i - 1][j   1] = 0

                    cv2.imwrite("tmp/{}.png".format(gg), binaryImage)
                    whitePixels.append(obj)
        return whitePixels

    def parseImage(self, image_path: str) -> list:

        image = cv2.imread(image_path)
        if self.rescaleOriginalImage:
            image = self.scaleOriginal(image)

        binary = self._imageToBinary(image)

        whitePixels = self._BNF(binary)

        return whitePixels

    def isScaleable(self, imageShape):
        return True

    def scaleOriginal(self, image: np.ndarray):
        # To be created
        return image

@staticmethod
    def _getImageAndCenterFromDotes(Dotes, originalImage=None):
        i_mx, j_mx = -1, -1
        i_mn, j_mn = 100500, 100500  # just big numbers

        # finding upper right and lower left corner of image
        for el in Dotes:
            i, j = el

            if i_mx < i:
                i_mx = i
            if j_mx < j:
                j_mx = j
            if j_mn > j:
                j_mn = j
            if i_mn > i:
                i_mn = i

        # updating image center
        imageCenter = Point((i_mx   i_mn) // 2, (j_mx   j_mn) // 2)

        # finding out size of image
        width, height = i_mx - i_mn   1, j_mx - j_mn   1
        image = np.zeros((width, height)) if originalImage is None else np.zeros((width, height, 3))

        # recreating image from dotes
        if originalImage is not None:
            for el in Dotes:
                i, j = el
                image[i - i_mn][j - j_mn] = originalImage[i][j]
        else:
            for el in Dotes:
                i, j = el
                image[i - i_mn][j - j_mn] = 255

        return image, imageCenter

    def scaleParsedImage(self, image: np.ndarray):
        """
        :param image: np.ndarray
        :return: scaledImage np.ndarray
        """
        width, height = image.shape if len(image.shape) == 2 else image.shape[0], image.shape[1]

        newWidth = self.targetWidth if width > self.targetHeight else width
        newHeight = self.targetHeight if height > self.targetHeight else height
        if self.scaleFully and newHeight < self.targetHeight * self.scaleFullyRate and newWidth * self.scaleFullyRate:
            scaleRate = min((self.targetWidth * self.scaleFullyRate / newWidth), (
                    self.targetHeight * self.scaleFullyRate / newHeight))

            newWidth = math.ceil(newWidth * scaleRate)
            newHeight = math.ceil(newHeight * scaleRate)



        scaled = cv2.resize(image, (newHeight, newWidth), interpolation=self.rescalingInterpolation)

        # pasting our scaled image in the middle
        x_add, y_add = (self.targetWidth - newWidth) // 2, (self.targetHeight - newHeight) // 2
        resized = np.zeros((self.targetWidth, self.targetHeight)) if len(image.shape) == 2 else np.zeros((self.targetWidth, self.targetHeight, 3))
        for x in range(newWidth):
            for y in range(newHeight):
                resized[x   x_add][y   y_add] = scaled[x][y]

        return resized

    def parseAndConvert(self, image_name: str) -> list:
        imagesInDotes = self.parseImage(image_name)
        original = 255 - cv2.imread(image_name)

        images = []

        for dotes in imagesInDotes:
            image = self._getImageAndCenterFromDotes(dotes, original)
            images.append([self.scaleParsedImage(image[0]), image[1]])
        rawImages = []
        for image, center in images:
            rawImages.append(RawImage(image, center))

        return rawImages

CodePudding user response:

Have a look at the (imho not all too intuitively named) function cv.findContours(): https://docs.opencv.org/3.4/d4/d73/tutorial_py_contours_begin.html

It should do most things that you doing by hand right now out of the box, which is extracting and measuring binary objects.

If you encouter problems where a single symbol is made up of several objects (like i, % or "), look into the morphological operations to merge them into a single one: erode(), dilate(), or open and close via morphologyEx() (Tutorial here: https://docs.opencv.org/4.x/d9/d61/tutorial_py_morphological_ops.html).

  • Related