How do I translate x, y coordinates with matchTemplate once I've cropped the template area?-CodePudding

I'm using python 3.9.6 and OpenCV 4.5.1

I'm trying to detect an objects on a template. My template is a real-time feed of my monitor and my objects are jpg's.

The issue: When I crop my template to speed up detection my mouse starts clicking in the wrong location.

This only happens after I've cropped my template. I think it's because I'm cropping my template at the wrong time in my script. My full monitor is (0 , 0, 1920, 1080) but I only want to capture [220:900, 270:1590]

I've followed the OpenCV documentation and a few online tutorials so far but I'm now stuck.

How do I click on img (third code block) rather than an incorrect off-set caused by cropping my template incorrectly?

I'm using win32gui to grab my template:

import numpy as np
import win32gui, win32ui, win32con


class WindowCapture:

    # properties
    w = 0
    h = 0
    hwnd = None
    cropped_x = 0
    cropped_y = 0
    offset_x = 0
    offset_y = 0

    # constructor
    def __init__(self, window_name=None):
        # find the handle for the window we want to capture.
        # if no window name is given, capture the entire screen
        if window_name is None:
            self.hwnd = win32gui.GetDesktopWindow()
        else:
            self.hwnd = win32gui.FindWindow(None, window_name)
            if not self.hwnd:
                raise Exception('Window not found: {}'.format(window_name))

        # get the window size
        window_rect = win32gui.GetWindowRect(self.hwnd)
        self.w = window_rect[2] - window_rect[0]
        self.h = window_rect[3] - window_rect[1]

        # account for the window border and titlebar and cut them off
        border_pixels = 0
        titlebar_pixels = 0
        self.w = self.w - (border_pixels * 2)
        self.h = self.h - titlebar_pixels - border_pixels
        self.cropped_x = border_pixels
        self.cropped_y = titlebar_pixels

        # set the cropped coordinates offset so we can translate screenshot
        # images into actual screen positions
        self.offset_x = window_rect[0]   self.cropped_x
        self.offset_y = window_rect[1]   self.cropped_y

    def get_screenshot(self):

        # get the window image data
        wDC = win32gui.GetWindowDC(self.hwnd)
        dcObj = win32ui.CreateDCFromHandle(wDC)
        cDC = dcObj.CreateCompatibleDC()
        dataBitMap = win32ui.CreateBitmap()
        dataBitMap.CreateCompatibleBitmap(dcObj, self.w, self.h)
        cDC.SelectObject(dataBitMap)
        cDC.BitBlt((0, 0), (self.w, self.h), dcObj, (self.cropped_x, self.cropped_y), win32con.SRCCOPY)

        # convert the raw data into a format opencv can read
        # dataBitMap.SaveBitmapFile(cDC, 'debug.bmp')
        signedIntsArray = dataBitMap.GetBitmapBits(True)
        img = np.fromstring(signedIntsArray, dtype='uint8')
        img.shape = (self.h, self.w, 4)

        # free resources
        dcObj.DeleteDC()
        cDC.DeleteDC()
        win32gui.ReleaseDC(self.hwnd, wDC)
        win32gui.DeleteObject(dataBitMap.GetHandle())
        img = img[...,:3]
        img = np.ascontiguousarray(img)

        return img

    @staticmethod
    def list_window_names():
        def winEnumHandler(hwnd, ctx):
            if win32gui.IsWindowVisible(hwnd):
                print(hex(hwnd), win32gui.GetWindowText(hwnd))
        win32gui.EnumWindows(winEnumHandler, None)

And OpenCV and numpy for my object detection:

import cv2 as cv
import numpy as np
    
class Vision:

    # properties
    needle_img = None
    needle_w = 0
    needle_h = 0
    method = None

    # constructor
    def __init__(self, needle_img_path, method=cv.TM_CCORR_NORMED):
        self.needle_img = cv.imread(needle_img_path, cv.IMREAD_UNCHANGED)

        # Save the dimensions of the needle image
        self.needle_w = self.needle_img.shape[1]
        self.needle_h = self.needle_img.shape[0]

        # There are 6 methods to choose from:
        # TM_CCOEFF, TM_CCOEFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_SQDIFF, TM_SQDIFF_NORMED
        self.method = method

    def find(self, haystack_img, threshold=0.5, debug_mode=None):
        # run the OpenCV algorithm
        result = cv.matchTemplate(haystack_img, self.needle_img, self.method)

        # Get the all the positions from the match result that exceed our threshold           
        locations = np.where(result >= threshold)
        locations = list(zip(*locations[::-1]))
        rectangles = []
        for loc in locations:
            rect = [int(loc[0]), int(loc[1]), self.needle_w, self.needle_h]
            # Add every box to the list twice in order to retain single (non-overlapping) boxes
            rectangles.append(rect)
            rectangles.append(rect)
        # Apply group rectangles
        rectangles, weights = cv.groupRectangles(rectangles, groupThreshold=1, eps=0.5)

        points = []
        if len(rectangles):
            line_color = (0, 255, 0)
            line_type = cv.LINE_4
            marker_color = (255, 0, 255)
            marker_type = cv.MARKER_CROSS

            # Loop over all the rectangles
            for (x, y, w, h) in rectangles:

                # Determine the center position
                center_x = x   int(w/2)
                center_y = y   int(h/2)
                # Save the points
                points.append((center_x, center_y))

                if debug_mode == 'rectangles':
                    # Determine the box position
                    top_left = (x, y)
                    bottom_right = (x   w, y   h)
                    # Draw the box
                    cv.rectangle(haystack_img, top_left, bottom_right, color=line_color, 
                                lineType=line_type, thickness=2)
                elif debug_mode == 'points':
                    # Draw the center point
                    cv.drawMarker(haystack_img, (center_x, center_y), 
                                color=marker_color, markerType=marker_type, 
                                markerSize=40, thickness=2)


        ############ DISPLAYS MATCHES #############
        if debug_mode:
            cv.imshow('Matches', haystack_img)

        return points

And then passing in both variables in a separate script here:

import cv2 as cv
import pyautogui as py
from windowcapture import WindowCapture
from vision import Vision
    
# initialize the WindowCapture class
# leave blank to capture the whole screen
haystack = WindowCapture()
# initialize the Vision class
needle = Vision('needle.jpg')

while(True):

    # get an updated image of the game
    screenshot = template.get_screenshot()
    screenshotCropped = screenshot[220:900, 270:1590]

    img = needle.find(screenshotCropped, 0.85, 'points')

    if img:
        py.moveTo(img[0])

The line causing the issue is: screenshotCropped = screenshot[220:900, 270:1590] If it's removed I click on the object correctly.

I also tried adding border_pixels and titlebar_pixels to allow me to crop directly from WindowCapture but I run into the same issue detailed above.

CodePudding user response：

If I understand your code correctly, when you crop the image, you're not (yet) accounting for the X/Y offset introduced through that crop.

If I understand your example correctly, your code

screenshotCropped = screenshot[220:900, 270:1590]

is cropping from 220-900 along the y-axis (height) and 270-1590 along the x-axis (width), yes? If so, try

x_0, x_1 = 270,1590
y_0, y_1 = 220,900
screenshotCropped = screenshot[y_0:y_1, x_0:x_1]
...
if img:
    x_coord = img[0][0]   x_0
    y_coord = img[0][1]   y_0
    py.moveTo(x_coord,y_coord)

If your cropping region changes, update your (x_0, x_1, y_0, y_1) values accordingly (in both the crop operation and the py.moveTo operation)?