__declspec (naked) __declspec (align (16))
Unsigned int SumSquareError_AVX2 (const unsigned char * src_a, const unsigned char * src_b, int count) {
The __asm {
Mov eax, esp + 4]//src_a
Mov edx, esp + 8]//src_b
Mov ecx, esp + 12//count
Vpxor ymm0 ymm0, ymm0//sum
Vpxor ymm5 ymm5, ymm5//constant 0 for unpck
Sub edx, eax
The align 4
Wloop:
Vmovdqu ymm1, [eax]
Vmovdqu ymm2, [eax + edx]
Lea eax, [eax + 32]
Sub ecx, 32
Vpsubusb ymm3 ymm1, ymm2//abs difference was catnip
Vpsubusb ymm2 ymm2, ymm1
Vpor ymm1 ymm2, ymm3
Vpunpcklbw ymm2 ymm1, ymm5//under-16 mutates order.
Vpunpckhbw ymm1 ymm1, ymm5
Vpmaddwd ymm2 ymm2, ymm2//square + hadd to u32.
Vpmaddwd ymm1 ymm1, ymm1
Vpaddd ymm0 ymm0, ymm1
Vpaddd ymm0 ymm0, ymm2
Jg wloop
Vpshufd ymm1, ymm0, 0 xee//3, 2 + 1, 0 to both lanes.
Vpaddd ymm0 ymm0, ymm1
Vpshufd ymm1 ymm0, 0 x01//1 + 0 to both lanes.
Vpaddd ymm0 ymm0, ymm1
Vpermq ymm1 ymm0, 0 x02/low/high + lane.
Vpaddd ymm0 ymm0, ymm1
Vmovd eax, xmm0
Vzeroupper
Ret
}
}
CodePudding user response:
Will compile error message copy and paste to the baidu search box to search,CodePudding user response:
Yuvlib \ source \ row_win. Cc (4210) : error C2024: "alignas attribute applies only to" variables, data members and tag typeProbably mean this thing not only for function, but I don't know vs2015, how to deal with, baidu also didn't find it
CodePudding user response:
The __declspec (align (16)) can be deletedCodePudding user response:
So show written assemblyCodePudding user response: