Combining foreground png image with jpg background-CodePudding

I've bellow function:

def alphaMerge(small_foreground, background, top, left):
    
    result = background.copy()
    
    fg_b, fg_g, fg_r, fg_a = cv.split(small_foreground)
    print(fg_b, fg_g, fg_r, fg_a)

    fg_a = fg_a / 255.0

    label_rgb = cv.merge([fg_b * fg_a, fg_g * fg_a, fg_r * fg_a])

    height, width = small_foreground.shape[0], small_foreground.shape[1]
    part_of_bg = result[top:top   height, left:left   width, :]

    bg_b, bg_g, bg_r = cv.split(part_of_bg)

    part_of_bg = cv.merge([bg_b * (1 - fg_a), bg_g * (1 - fg_a), bg_r * (1 - fg_a)])

    cv.add(label_rgb, part_of_bg, part_of_bg)
    result[top:top   height, left:left   width, :] = part_of_bg
    return result

if __name__ == '__main__':
   folder_dir = r"C:\photo_datasets\products_small"
   logo = cv.imread(r"C:\Users\PiotrSnella\photo_datasets\discount.png", cv.IMREAD_UNCHANGED)
   for images in os.listdir(folder_dir):
       input_path = os.path.join(folder_dir, images)
       image_size = os.stat(input_path).st_size
       if image_size < 8388608:
           img = cv.imread(input_path, cv.IMREAD_UNCHANGED)
           height, width, channels = img.shape
           if height > 500 and width > 500:
               result = alphaMerge(logo, img, 0, 0)
               cv.imwrite(r'C:\photo_datasets\products_small_output_cv\{}.png'.format(images), result)

I want to combine two pictures, one with the logo which I would like to apply on full dataset from folder products_small. I'm getting a error part_of_bg = cv.merge([bg_b * (1 - fg_a), bg_g * (1 - fg_a), bg_r * (1 - fg_a)]) ValueError: operands could not be broadcast together with shapes (720,540) (766,827)

I tried other combining options and still get the error about problem with shapes, the photo could be a problem or something with the code?

Thank you for your help guys :)

CodePudding user response：

Here is one way to do that in Python/OpenCV. I will place a 20% resized logo onto the pants image at coordinates 660,660 on the right side pocket.

Read the background image (pants)
Read the foreground image (logo) unchanged to preserve the alpha channel
Resize the foreground (logo) to 20%
Create a transparent image the size of the background image
Insert the resized foreground (logo) into the transparent image at the desired location
Extract the alpha channel from the inserted, resized foreground image
Extract the base BGR channels from the inserted, resized foreground image
Blend the background image and the base BGR image using the alpha channel as a mask using np.where(). Note all images must be the same dimensions and 3 channels
Save the result

Background Image:

Foreground Image:

import cv2
import numpy as np

# read background image
bimg = cv2.imread('pants.jpg')
hh, ww = bimg.shape[:2]

# read foreground image
fimg = cv2.imread('flashsale.png', cv2.IMREAD_UNCHANGED)

# resize foreground image
fimg_small = cv2.resize(fimg, (0,0), fx=0.2, fy=0.2)
ht, wd = fimg_small.shape[:2]

# create transparent image
fimg_new = np.full((hh,ww,4), (0,0,0,0), dtype=np.uint8)

# insert resized image into transparent image at desired coordinates
fimg_new[660:660 ht, 660:660 wd] = fimg_small

# extract alpha channel from foreground image as mask and make 3 channels
alpha = fimg_new[:,:,3]
alpha = cv2.merge([alpha,alpha,alpha])

# extract bgr channels from foreground image
base = fimg_new[:,:,0:3]

# blend the two images using the alpha channel as controlling mask
result = np.where(alpha==(0,0,0), bimg, base)

# save result
cv2.imwrite("pants_flashsale.png", result)

# show result
cv2.imshow("RESULT", result)
cv2.waitKey(0)

Result:

CodePudding user response：

This just requires some multiplication and subtraction.

Your overlay has an actual alpha channel, not just a boolean mask. You should use it. It makes edges look better than just a hard boolean mask.

I see one issue with your overlay: it doesn't have any "shadow" to give the white text contrast against a potentially white background.

When you resize RGBA data, it's not trivial. You'd better export the graphic from your vector graphics program in the desired resolution in the first place. Resizing after the fact requires operations to make sure partially transparent pixels (neither 100% opaque nor 100% transparent) are calculated properly so undefined "background" from the fully transparent areas of the overlay image is not mixed into those partially transparent pixels.

base = cv.imread("U3hRd.jpg")
overlay = cv.imread("OBxGQ.png", cv.IMREAD_UNCHANGED)
(bheight, bwidth) = base.shape[:2]
(oheight, owidth) = overlay.shape[:2]
print("base:", bheight, bwidth)
print("overlay:", oheight, owidth)

# place overlay in center
#ox = (bwidth - owidth) // 2
#oy = (bheight - oheight) // 2
# place overlay in top left
ox = 0
oy = 0

overlay_color = overlay[:,:,:3]
overlay_alpha = overlay[:,:,3] * np.float32(1/255)
# "unsqueeze" (insert 1-sized color dimension) so numpy broadcasting works
overlay_alpha = np.expand_dims(overlay_alpha, axis=2)

composite = base.copy()
base_roi = base[oy:oy oheight, ox:ox owidth]
composite_roi = composite[oy:oy oheight, ox:ox owidth]
composite_roi[:,:] = overlay_color * overlay_alpha   base_roi * (1 - overlay_alpha)

CodePudding user response：

This is what you wanted on top left corner. Noticed, the logo on white foreground doesn't work on background on pant.jpg.

Just 17 lines of codes compared to

import cv2
import numpy as np

img1 = cv2.imread('pant.jpg')
overlay_img1 = np.ones(img1.shape,np.uint8)*255
img2 = cv2.imread('logo3.png')
rows,cols,channels = img2.shape
overlay_img1[0:rows, 0:cols ] = img2
img2gray = cv2.cvtColor(overlay_img1,cv2.COLOR_BGR2GRAY)
ret, mask = cv2.threshold(img2gray,220,255,cv2.THRESH_BINARY_INV)
mask_inv = cv2.bitwise_not(mask)
temp1 = cv2.bitwise_and(img1,img1,mask = mask_inv)
temp2 = cv2.bitwise_and(overlay_img1,overlay_img1, mask = mask)
result = cv2.add(temp1,temp2)
cv2.imshow("Result",result)
cv2.waitKey(0)
cv2.destroyAllWindows()

Result:

Logo resize(320x296):