I'm using python 3.9.6 and OpenCV 4.5.1
I'm trying to detect an objects on a template. My template is a real-time feed of my monitor and my objects are jpg's.
The issue: When I crop my template to speed up detection my mouse starts clicking in the wrong location.
This only happens after I've cropped my template. I think it's because I'm cropping my template at the wrong time in my script. My full monitor is (0 , 0, 1920, 1080)
but I only want to capture [220:900, 270:1590]
I've followed the OpenCV documentation and a few online tutorials so far but I'm now stuck.
How do I click on img
(third code block) rather than an incorrect off-set caused by cropping my template incorrectly?
I'm using win32gui to grab my template:
import numpy as np
import win32gui, win32ui, win32con
class WindowCapture:
# properties
w = 0
h = 0
hwnd = None
cropped_x = 0
cropped_y = 0
offset_x = 0
offset_y = 0
# constructor
def __init__(self, window_name=None):
# find the handle for the window we want to capture.
# if no window name is given, capture the entire screen
if window_name is None:
self.hwnd = win32gui.GetDesktopWindow()
self.hwnd = win32gui.FindWindow(None, window_name)
if not self.hwnd:
raise Exception('Window not found: {}'.format(window_name))
# get the window size
window_rect = win32gui.GetWindowRect(self.hwnd)
self.w = window_rect[2] - window_rect[0]
self.h = window_rect[3] - window_rect[1]
# account for the window border and titlebar and cut them off
border_pixels = 0
titlebar_pixels = 0
self.w = self.w - (border_pixels * 2)
self.h = self.h - titlebar_pixels - border_pixels
self.cropped_x = border_pixels
self.cropped_y = titlebar_pixels
# set the cropped coordinates offset so we can translate screenshot
# images into actual screen positions
self.offset_x = window_rect[0] self.cropped_x
self.offset_y = window_rect[1] self.cropped_y
def get_screenshot(self):
# get the window image data
wDC = win32gui.GetWindowDC(self.hwnd)
dcObj = win32ui.CreateDCFromHandle(wDC)
cDC = dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, self.w, self.h)
cDC.BitBlt((0, 0), (self.w, self.h), dcObj, (self.cropped_x, self.cropped_y), win32con.SRCCOPY)
# convert the raw data into a format opencv can read
# dataBitMap.SaveBitmapFile(cDC, 'debug.bmp')
signedIntsArray = dataBitMap.GetBitmapBits(True)
img = np.fromstring(signedIntsArray, dtype='uint8')
img.shape = (self.h, self.w, 4)
# free resources
win32gui.ReleaseDC(self.hwnd, wDC)
img = img[...,:3]
img = np.ascontiguousarray(img)
return img
def list_window_names():
def winEnumHandler(hwnd, ctx):
if win32gui.IsWindowVisible(hwnd):
print(hex(hwnd), win32gui.GetWindowText(hwnd))
win32gui.EnumWindows(winEnumHandler, None)
And OpenCV and numpy for my object detection:
import cv2 as cv
import numpy as np
class Vision:
# properties
needle_img = None
needle_w = 0
needle_h = 0
method = None
# constructor
def __init__(self, needle_img_path, method=cv.TM_CCORR_NORMED):
self.needle_img = cv.imread(needle_img_path, cv.IMREAD_UNCHANGED)
# Save the dimensions of the needle image
self.needle_w = self.needle_img.shape[1]
self.needle_h = self.needle_img.shape[0]
# There are 6 methods to choose from:
self.method = method
def find(self, haystack_img, threshold=0.5, debug_mode=None):
# run the OpenCV algorithm
result = cv.matchTemplate(haystack_img, self.needle_img, self.method)
# Get the all the positions from the match result that exceed our threshold
locations = np.where(result >= threshold)
locations = list(zip(*locations[::-1]))
rectangles = []
for loc in locations:
rect = [int(loc[0]), int(loc[1]), self.needle_w, self.needle_h]
# Add every box to the list twice in order to retain single (non-overlapping) boxes
# Apply group rectangles
rectangles, weights = cv.groupRectangles(rectangles, groupThreshold=1, eps=0.5)
points = []
if len(rectangles):
line_color = (0, 255, 0)
line_type = cv.LINE_4
marker_color = (255, 0, 255)
marker_type = cv.MARKER_CROSS
# Loop over all the rectangles
for (x, y, w, h) in rectangles:
# Determine the center position
center_x = x int(w/2)
center_y = y int(h/2)
# Save the points
points.append((center_x, center_y))
if debug_mode == 'rectangles':
# Determine the box position
top_left = (x, y)
bottom_right = (x w, y h)
# Draw the box
cv.rectangle(haystack_img, top_left, bottom_right, color=line_color,
lineType=line_type, thickness=2)
elif debug_mode == 'points':
# Draw the center point
cv.drawMarker(haystack_img, (center_x, center_y),
color=marker_color, markerType=marker_type,
markerSize=40, thickness=2)
############ DISPLAYS MATCHES #############
if debug_mode:
cv.imshow('Matches', haystack_img)
return points
And then passing in both variables in a separate script here:
import cv2 as cv
import pyautogui as py
from windowcapture import WindowCapture
from vision import Vision
# initialize the WindowCapture class
# leave blank to capture the whole screen
haystack = WindowCapture()
# initialize the Vision class
needle = Vision('needle.jpg')
# get an updated image of the game
screenshot = template.get_screenshot()
screenshotCropped = screenshot[220:900, 270:1590]
img = needle.find(screenshotCropped, 0.85, 'points')
if img:
The line causing the issue is: screenshotCropped = screenshot[220:900, 270:1590]
If it's removed I click on the object correctly.
I also tried adding border_pixels
and titlebar_pixels
to allow me to crop directly from WindowCapture
but I run into the same issue detailed above.
CodePudding user response:
If I understand your code correctly, when you crop the image, you're not (yet) accounting for the X/Y offset introduced through that crop.
If I understand your example correctly, your code
screenshotCropped = screenshot[220:900, 270:1590]
is cropping from 220-900
along the y-axis (height) and 270-1590
along the x-axis (width), yes? If so, try
x_0, x_1 = 270,1590
y_0, y_1 = 220,900
screenshotCropped = screenshot[y_0:y_1, x_0:x_1]
if img:
x_coord = img[0][0] x_0
y_coord = img[0][1] y_0
If your cropping region changes, update your (x_0, x_1, y_0, y_1)
values accordingly (in both the crop operation and the py.moveTo