ValueError: Could not broadcast when trying to resize and center image-CodePudding

I have written a simple function that creates an video frame from an image.

Basically, if the image is bigger than the frame, if the aspect ratio of the image is greater than or equal to the aspect ratio of the frame, the image will be resized to fit the width while keeping its aspect ratio, else the image is resized to fit height while maintaining its aspect ratio.

Then if the (resized) image has four channels (it has an alpha channel), the image will be converted to its alpha composite with a black image (the color channels will become the product of the original color channels and the alpha channel).

Then the image is moved to the center of frame. The frame is always a black three channel image.

Yep it is quite verbose to describe the process in English, but the function is actually quite simple in code:

import cv2
import numpy as np

def make_frame(img, fw, fh):
    h, w, c = img.shape
    if w > fw or h > fh:
        if w/h >= fw/fh:
            h = round(fw * h / w)
            w = fw
        else:
            w = round(fh * w / h)
            h = fh
        img = cv2.resize(img, (h, w), interpolation = cv2.INTER_AREA)
        h, w, c = img.shape
    
    dw, dh = round((fw - w) / 2), round((fh - h) / 2)
    frame = np.zeros((fh, fw, 3), dtype=np.uint8)
    if c == 3:
        frame[dh:dh h, dw:dw w] = img
    else:
        alpha = img[..., 3]/255
        bgr = img[..., :3]
        frame[dh:dh h, dw:dw w] = bgr * alpha[...,np.newaxis]
    return frame

I don't understand how can it raise exceptions like this:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-128-0b3cf7332ed5> in <module>
----> 1 make_frame(img, 1280, 720)

<ipython-input-127-d57a541f7e04> in make_frame(img, fw, fh)
     18         alpha = img[..., 3]/255
     19         bgr = img[..., :3]
---> 20         frame[dh:dh h, dw:dw w] = bgr * alpha[...,np.newaxis]
     21     return frame

ValueError: could not broadcast input array from shape (721,720,3) into shape (720,720,3)

I have managed to make an image that when passed as argument to the above function, will perfectly reproduce the above error:

The image is a transparent PNG file with resolution of 1081*1080. Stack Overflow automatically converts the image so the image downloaded from Stack Overflow may fail to reproduce the exception.

Download the original version from Google Drive for perfect reproducibility.

What might caused the exception? Is there a bug in my code?

Steps to reproduce:

img = cv2.imread(file_path, cv2.IMREAD_UNCHANGED)
make_frame(img, 1280, 720)

Interestingly, if you crop the image to 1080*1080, and do the same steps above, it will work just fine...

CodePudding user response：

Indeed the problem was the call to resize the images. The dimension parameter is flipped. After fixing that, the problem was solved.

Anyways here is the whole script where the function is from:

import cv2
import numpy as np
import os
import skimage.exposure

FFMPEG = 'D:/ffmpeg/ffmpeg.exe'

def imread_transparent(filename):
    img = cv2.imread(filename, cv2.IMREAD_UNCHANGED)
    if img.shape[2] == 4:
        return img
    
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    morph = cv2.threshold(gray, 11, 255, cv2.THRESH_BINARY)[1]
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3))
    for mode in (cv2.MORPH_OPEN, cv2.MORPH_CLOSE, cv2.MORPH_ERODE):
        morph = cv2.morphologyEx(morph, mode, kernel, borderType=cv2.BORDER_CONSTANT, borderValue=0)

    contours = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contours = contours[0] if len(contours) == 2 else contours[1]
    big_contour = max(contours, key=cv2.contourArea)
    contour = np.zeros_like(gray)
    cv2.drawContours(contour, [big_contour], 0, 255, -1)

    blur = cv2.GaussianBlur(contour, (5,5), sigmaX=0, sigmaY=0, borderType = cv2.BORDER_DEFAULT)
    mask = skimage.exposure.rescale_intensity(blur, in_range=(127.5,255), out_range=(0,255))
    result = cv2.cvtColor(img, cv2.COLOR_BGR2BGRA)
    result[:,:,3] = mask
    
    return result

def rotate_image(image, angle):
    h, w = image.shape[:2]
    cX, cY = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D((cX, cY), -angle, 1.0)
    cos = np.abs(M[0, 0])
    sin = np.abs(M[0, 1])
    nW = round((h * sin)   (w * cos))
    nH = round((h * cos)   (w * sin))
    M[0, 2]  = (nW / 2) - cX
    M[1, 2]  = (nH / 2) - cY
    return cv2.warpAffine(image, M, (nW, nH))

def remove_border(image):
    if image.shape[2] == 3:
        return image
    
    y,x = image[:,:,3].nonzero()
    minx = np.min(x)
    miny = np.min(y)
    maxx = np.max(x)
    maxy = np.max(y) 
    return image[miny:maxy, minx:maxx]

def make_frame(img, fw, fh):
    h, w, c = img.shape
    if w > fw or h > fh:
        if w/h >= fw/fh:
            h = round(fw * h / w)
            w = fw
        else:
            w = round(fh * w / h)
            h = fh
        img = cv2.resize(img, (w, h), interpolation = cv2.INTER_AREA)
    
    dw, dh = round((fw - w) / 2), round((fh - h) / 2)
    frame = np.zeros((fh, fw, 3), dtype=np.uint8)
    if c == 3:
        frame[dh:dh h, dw:dw w] = img
    else:
        alpha = img[..., 3]/255
        bgr = img[..., :3]
        frame[dh:dh h, dw:dw w] = bgr * alpha[...,np.newaxis]
    return frame

def huerotvid(imagefile, outfile, n=256, loops=1, fps=24, rotate=False, frame_width=None, frame_height=None, bitrate='5M'):
    assert 0 < n <= 256
    img = imread_transparent(imagefile)
    height, width = img.shape[:2]
    if frame_width is None:
        frame_width = width
    if frame_height is None:
        frame_height = height
    assert frame_width > 0 and frame_height > 0
    hsv = np.zeros((height, width, 4), dtype=np.uint8)
    hsv[..., :3] = cv2.cvtColor(img, cv2.COLOR_BGR2HSV_FULL)
    hsv[..., 3] = img[..., 3]
    h = hsv[..., 0]
    chsv = hsv.copy()
    file_name = imagefile.split('/')[-1][::-1].split('.', 1)[1][::-1]
    tmp_folder = os.environ['tmp']
    for i in range(n):
        chsv[..., 0] = (h   round(i/n*256)) % 256
        frame = np.zeros((height, width, 4), dtype=np.uint8)
        frame[..., :3] = cv2.cvtColor(chsv[..., :3], cv2.COLOR_HSV2BGR_FULL)
        frame[..., 3] = img[..., 3]
        if rotate:
            frame = remove_border(rotate_image(frame, i*360/n))
        frame = make_frame(frame, frame_width, frame_height)
        cv2.imwrite('{}/{}_{}.png'.format(tmp_folder, file_name, i), frame)
    
    command = '{} -y -stream_loop {} -framerate {} -hwaccel cuda -hwaccel_output_format cuda -i {}/{}_%d.png -c:v h264_nvenc -b:v {} -vf scale={}:{} {}'
    os.system(command.format(FFMPEG, loops-1, fps, tmp_folder, file_name, bitrate, frame_width, frame_height, outfile))
    for i in range(n):
        os.remove('{}/{}_{}.png'.format(tmp_folder, file_name, i))