cosmopolitan/tool/viz/lib/convolve.h

43 lines
1.3 KiB
C

#ifndef COSMOPOLITAN_TOOL_VIZ_LIB_CONVOLVE_H_
#define COSMOPOLITAN_TOOL_VIZ_LIB_CONVOLVE_H_
#include "libc/bits/xmmintrin.internal.h"
#include "libc/str/str.h"
#include "tool/viz/lib/graphic.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
forceinline void convolve(unsigned yn, unsigned xn, __m128 img[yn][xn], int KW,
const float kernel[KW][KW], float C1, float C2) {
/* TODO(jart): nontemporal herringbone strategy */
float f;
struct Graphic g;
unsigned y, x, i, j;
__v4sf p, kflip[KW][KW], (*tmp)[yn][xn];
for (i = 0; i < KW; ++i) {
for (j = 0; j < KW; ++j) {
f = kernel[i][j] / C1;
kflip[KW - i - 1][KW - j - 1] = (__v4sf){f, f, f, f};
}
}
memset(&g, 0, sizeof(g));
resizegraphic(&g, yn, xn);
tmp = g.b.p;
for (y = 0; y < yn - KW; ++y) {
for (x = 0; x < xn - KW; ++x) {
memset(&p, 0, sizeof(p));
for (i = 0; i < KW; ++i) {
for (j = 0; j < KW; ++j) {
p += img[y + i][x + j] * kflip[i][j] + C2;
}
}
memcpy(&(*tmp)[y + KW / 2][x + KW / 2], &p, sizeof(p));
}
}
memcpy(img, tmp, yn * xn * sizeof(img[0][0]));
bfree(&g.b);
}
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_TOOL_VIZ_LIB_CONVOLVE_H_ */