/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ │vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ ╞══════════════════════════════════════════════════════════════════════════════╡ │ Copyright 2020 Justine Alexandra Roberts Tunney │ │ │ │ This program is free software; you can redistribute it and/or modify │ │ it under the terms of the GNU General Public License as published by │ │ the Free Software Foundation; version 2 of the License. │ │ │ │ This program is distributed in the hope that it will be useful, but │ │ WITHOUT ANY WARRANTY; without even the implied warranty of │ │ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ │ General Public License for more details. │ │ │ │ You should have received a copy of the GNU General Public License │ │ along with this program; if not, write to the Free Software │ │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ │ 02110-1301 USA │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "dsp/core/c11.h" #include "dsp/core/c1331.h" #include "dsp/core/c1331s.h" #include "dsp/core/c161.h" #include "dsp/core/core.h" #include "dsp/core/half.h" #include "dsp/core/illumination.h" #include "dsp/core/q.h" #include "dsp/scale/scale.h" #include "libc/bits/xmmintrin.internal.h" #include "libc/calls/calls.h" #include "libc/calls/sigbits.h" #include "libc/calls/struct/sigset.h" #include "libc/intrin/pmulhrsw.h" #include "libc/log/check.h" #include "libc/log/log.h" #include "libc/macros.h" #include "libc/math.h" #include "libc/mem/mem.h" #include "libc/nexgen32e/gc.internal.h" #include "libc/nexgen32e/nexgen32e.h" #include "libc/nexgen32e/x86feature.h" #include "libc/runtime/gc.h" #include "libc/runtime/runtime.h" #include "libc/str/str.h" #include "libc/sysv/consts/sig.h" #include "libc/sysv/errfuns.h" #include "libc/time/time.h" #include "libc/x/x.h" #include "third_party/avir/lanczos.h" #include "tool/viz/lib/graphic.h" #include "tool/viz/lib/knobs.h" #include "tool/viz/lib/ycbcr.h" #define M 15 #define CLAMP(X) MIN(255, MAX(0, X)) const double kBt601Primaries[] = {.299, .587, .114}; const double kBt709Primaries[] = {871024 / 4096299., 8788810 / 12288897., 887015 / 12288897.}; const double kSrgbToXyz[3][3] = { {506752 / 1228815., 87881 / 245763., 12673 / 70218.}, {87098 / 409605., 175762 / 245763., 12673 / 175545.}, {7918 / 409605., 87881 / 737289., 1001167 / 1053270.}, }; long magikarp_latency_; long gyarados_latency_; long ycbcr2rgb_latency_; long double magikarp_start_; struct YCbCr { bool yonly; int magnums[8][4]; int lighting[6][4]; unsigned char transfer[2][256]; struct YCbCrSamplingSolution { long dyn, dxn; long syn, sxn; double ry, rx; double oy, ox; double py, px; struct SamplingSolution *cy, *cx; } luma, chroma; }; /** * Computes magnums for Y′CbCr decoding. * * @param swing should be 219 for TV, or 255 for JPEG * @param M is integer coefficient bits */ void YCbCrComputeCoefficients(int swing, double gamma, const double primaries[3], const double illuminant[3], int out_magnums[8][4], int out_lighting[6][4], unsigned char out_transfer[256]) { int i, j; double x; double f1[6][3]; long longs[6][6]; long bitlimit = roundup2pow(swing); long wordoffset = rounddown2pow((bitlimit - swing) / 2); long chromaswing = swing + 2 * (bitlimit / 2. - swing / 2. - wordoffset); long lumamin = wordoffset; long lumamax = wordoffset + swing; long diffmax = wordoffset + chromaswing - bitlimit / 2; long diffmin = -diffmax; double rEb = 1 - primaries[2] + primaries[0] + primaries[1]; double rEgEb = 1 / primaries[1] * primaries[2] * rEb; double rEr = 1 - primaries[0] + primaries[1] + primaries[2]; double rEgEr = 1 / primaries[1] * primaries[0] * rEr; double unswing = 1. / swing * bitlimit; double digital = 1. / swing * chromaswing; double reals[6][6] = { {rEr / digital}, {-rEgEb / digital, -rEgEr / digital}, {rEb / digital}, {0, 0, unswing}, }; for (i = 0; i < 4; ++i) { GetIntegerCoefficients(longs[i], reals[i], M, diffmin, diffmax); } for (i = 0; i < 4; ++i) { for (j = 0; j < 4; ++j) { out_magnums[i][j] = longs[i][j]; } } out_magnums[3][0] = wordoffset; out_magnums[3][1] = bitlimit / 2; GetChromaticAdaptationMatrix(f1, kIlluminantD65, illuminant); for (i = 0; i < 3; ++i) { for (j = 0; j < 3; ++j) { reals[i][j] = f1[i][j]; } } for (i = 0; i < 6; ++i) { GetIntegerCoefficients(longs[i], reals[i], M, diffmin * 2, lumamax * 2); } for (i = 0; i < 6; ++i) { for (j = 0; j < 3; ++j) { out_lighting[i][j] = longs[i][j]; } } for (i = 0; i < 256; ++i) { x = i; x /= 255; x = tv2pcgamma(x, gamma); x *= 255; out_transfer[i] = CLAMP(x); } memset(out_transfer, out_transfer[lumamin], lumamin); memset(out_transfer + lumamax + 1, out_transfer[lumamax], bitlimit - lumamax); } void YCbCrInit(struct YCbCr **ycbcr, bool yonly, int swing, double gamma, const double gamut[3], const double illuminant[3]) { if (!*ycbcr) *ycbcr = xcalloc(1, sizeof(struct YCbCr)); (*ycbcr)->yonly = yonly; memset((*ycbcr)->magnums, 0, sizeof((*ycbcr)->magnums)); memset((*ycbcr)->lighting, 0, sizeof((*ycbcr)->lighting)); YCbCrComputeCoefficients(swing, gamma, gamut, illuminant, (*ycbcr)->magnums, (*ycbcr)->lighting, (*ycbcr)->transfer[0]); imapxlatab((*ycbcr)->transfer[1]); } void YCbCrFree(struct YCbCr **ycbcr) { if (*ycbcr) { FreeSamplingSolution((*ycbcr)->luma.cy), (*ycbcr)->luma.cy = NULL; FreeSamplingSolution((*ycbcr)->luma.cx), (*ycbcr)->luma.cx = NULL; FreeSamplingSolution((*ycbcr)->chroma.cy), (*ycbcr)->chroma.cy = NULL; FreeSamplingSolution((*ycbcr)->chroma.cx), (*ycbcr)->chroma.cx = NULL; free(*ycbcr), *ycbcr = NULL; } } void *YCbCrReallocPlane(long ys, long xs, const unsigned char p[ys][xs], long yn, long xn) { long y; unsigned char(*res)[yn][xn]; res = xmemalign(32, yn * xn); for (y = 0; y < yn; ++y) { memcpy((*res)[y], p[y], xn); } return res; } void YCbCrComputeSamplingSolution(struct YCbCrSamplingSolution *scale, long dyn, long dxn, long syn, long sxn, double ry, double rx, double oy, double ox, double py, double px) { if (scale->dyn != dyn || scale->dxn != dxn || scale->syn != syn || scale->sxn != sxn || fabs(scale->ry - ry) > .001 || fabs(scale->rx - rx) > .001 || fabs(scale->oy - oy) > .001 || fabs(scale->ox - ox) > .001 || fabs(scale->py - py) > .001 || fabs(scale->px - px) > .001) { LOGF("recomputing sampling solution"); FreeSamplingSolution(scale->cy), scale->cy = NULL; FreeSamplingSolution(scale->cx), scale->cx = NULL; scale->cy = ComputeSamplingSolution(dyn, syn, ry, oy, py); scale->cx = ComputeSamplingSolution(dxn, sxn, rx, ox, px); scale->dyn = dyn, scale->dxn = dxn; scale->syn = syn, scale->sxn = sxn; scale->ry = ry, scale->rx = rx; scale->oy = oy, scale->ox = ox; scale->py = py, scale->px = px; } } void Y2Rgb(long yn, long xn, unsigned char RGB[restrict 3][yn][xn], long yys, long yxs, const unsigned char Y[restrict yys][yxs], const int K[8][4], const unsigned char T[256]) { long i, j; for (i = 0; i < yn; ++i) { for (j = 0; j < xn; ++j) { RGB[0][i][j] = T[Y[i][j]]; } } memcpy(RGB[1], RGB[0], yn * xn); memcpy(RGB[2], RGB[0], yn * xn); } /** * Converts Y′CbCr samples to RGB. */ void YCbCr2Rgb(long yn, long xn, unsigned char RGB[restrict 3][yn][xn], long yys, long yxs, const unsigned char Y[restrict yys][yxs], long cys, long cxs, const unsigned char Cb[restrict cys][cxs], const unsigned char Cr[restrict cys][cxs], const int K[8][4], const int L[6][4], const unsigned char T[256]) { long i, j; short y, u, v, r, g, b, A, B, C; for (i = 0; i < yn; ++i) { for (j = 0; j < xn; ++j) { y = T[Y[i][j]]; u = Cb[i][j] - K[3][1]; v = Cr[i][j] - K[3][1]; r = y + QRS(M, v * K[0][0]); g = y + QRS(M, u * K[1][0] + v * K[1][1]); b = y + QRS(M, u * K[2][0]); r = QRS(M, (MIN(235, MAX(16, r)) - K[3][0]) * K[3][2]); g = QRS(M, (MIN(235, MAX(16, g)) - K[3][0]) * K[3][2]); b = QRS(M, (MIN(235, MAX(16, b)) - K[3][0]) * K[3][2]); RGB[0][i][j] = CLAMP(QRS(M, r * L[0][0] + g * L[0][1] + b * L[0][2])); RGB[1][i][j] = CLAMP(QRS(M, r * L[1][0] + g * L[1][1] + b * L[1][2])); RGB[2][i][j] = CLAMP(QRS(M, r * L[2][0] + g * L[2][1] + b * L[2][2])); } } } void YCbCrConvert(struct YCbCr *me, long yn, long xn, unsigned char RGB[restrict 3][yn][xn], long yys, long yxs, const unsigned char Y[restrict yys][yxs], long cys, long cxs, unsigned char Cb[restrict cys][cxs], unsigned char Cr[restrict cys][cxs]) { long double ts; ts = nowl(); if (!me->yonly) { YCbCr2Rgb(yn, xn, RGB, yys, yxs, Y, cys, cxs, Cb, Cr, me->magnums, me->lighting, me->transfer[pf10_]); } else { Y2Rgb(yn, xn, RGB, yys, yxs, Y, me->magnums, me->transfer[pf10_]); } ycbcr2rgb_latency_ = lroundl((nowl() - ts) * 1e6l); } void YCbCr2RgbScaler(struct YCbCr *me, long dyn, long dxn, unsigned char RGB[restrict 3][dyn][dxn], long yys, long yxs, unsigned char Y[restrict yys][yxs], long cys, long cxs, unsigned char Cb[restrict cys][cxs], unsigned char Cr[restrict cys][cxs], long yyn, long yxn, long cyn, long cxn, double syn, double sxn, double pry, double prx) { long double ts; long y, x, scyn, scxn; double yry, yrx, cry, crx, yoy, yox, coy, cox, err, oy; scyn = syn * cyn / yyn; scxn = sxn * cxn / yxn; if (HALF(yxn) > dxn && HALF(scxn) > dxn) { YCbCr2RgbScaler(me, dyn, dxn, RGB, yys, yxs, Magikarp2xX(yys, yxs, Y, syn, sxn), cys, cxs, Magkern2xX(cys, cxs, Cb, scyn, scxn), Magkern2xX(cys, cxs, Cr, scyn, scxn), yyn, HALF(yxn), cyn, HALF(cxn), syn, sxn / 2, pry, prx); } else if (HALF(yyn) > dyn && HALF(scyn) > dyn) { YCbCr2RgbScaler(me, dyn, dxn, RGB, yys, yxs, Magikarp2xY(yys, yxs, Y, syn, sxn), cys, cxs, Magkern2xY(cys, cxs, Cb, scyn, scxn), Magkern2xY(cys, cxs, Cr, scyn, scxn), HALF(yyn), yxn, HALF(cyn), scxn, syn / 2, sxn, pry, prx); } else { magikarp_latency_ = lroundl((nowl() - magikarp_start_) * 1e6l); ts = nowl(); yry = syn / dyn; yrx = sxn / dxn; cry = syn * cyn / yyn / dyn; crx = sxn * cxn / yxn / dxn; yoy = syn / scyn / 2 - pry * .5; yox = sxn / scxn / 2 - prx * .5; coy = syn / scyn / 2 - pry * .5; cox = sxn / scxn / 2 - prx * .5; LOGF("gyarados pry=%.3f prx=%.3f syn=%.3f sxn=%.3f dyn=%ld dxn=%ld " "yyn=%ld " "yxn=%ld cyn=%ld cxn=%ld yry=%.3f yrx=%.3f cry=%.3f crx=%.3f " "yoy=%.3f " "yox=%.3f coy=%.3f cox=%.3f", pry, prx, syn, sxn, dyn, dxn, yyn, yxn, cyn, cxn, yry, yrx, cry, crx, yoy, yox, coy, cox); YCbCrComputeSamplingSolution(&me->luma, dyn, dxn, syn, sxn, yry, yrx, yoy, yox, pry, prx); YCbCrComputeSamplingSolution(&me->chroma, dyn, dxn, scyn, scxn, cry, crx, coy, cox, pry, prx); if (pf8_) sharpen(1, yys, yxs, (void *)Y, yyn, yxn); if (pf9_) unsharp(1, yys, yxs, (void *)Y, yyn, yxn); GyaradosUint8(yys, yxs, Y, yys, yxs, Y, dyn, dxn, syn, sxn, 0, 255, me->luma.cy, me->luma.cx, true); GyaradosUint8(cys, cxs, Cb, cys, cxs, Cb, dyn, dxn, scyn, scxn, 0, 255, me->chroma.cy, me->chroma.cx, false); GyaradosUint8(cys, cxs, Cr, cys, cxs, Cr, dyn, dxn, scyn, scxn, 0, 255, me->chroma.cy, me->chroma.cx, false); gyarados_latency_ = lround((nowl() - ts) * 1e6l); YCbCrConvert(me, dyn, dxn, RGB, yys, yxs, Y, cys, cxs, Cb, Cr); LOGF("done"); } } /** * Converts Y′CbCr frame for PC display. * * "[The] experiments of Professor J. D. Forbes, which I * witnessed… [established] that blue and yellow do not * make green but a pinkish tint, when neither prevails * in the combination [and the] result of mixing yellow * and blue was, I believe, not previously known. * — James Clerk Maxwell * * This function converts TV to PC graphics. We do that by * * 1. decimating w/ facebook magikarp photoshop cubic sharpen * 2. upsampling color difference planes, to be as big as luma plane * 3. converting color format * 4. expanding dynamic range * 5. transferring gamma from TV to PC convention * 6. resampling again to exact requested display / pixel geometry * * @param dyn/dxn is display height/width after scaling/conversion * @param RGB points to memory for packed de-interlaced RGB output * @param Y′ ∈ [16,235] is the luminance plane a gamma-corrected RGB * weighted sum; a.k.a. black/white legacy component part of the * TV signal; which may be used independently of the chrominance * planes; and decodes to the range [0,1] * @param Cb/Cr ∈ [16,240] is blue/red chrominance difference planes * which (if sampled at a different rate) will get stretched out * over the luma plane appropriately * @param yys/yxs dimensions luma sample array * @param cys/cxs dimensions chroma sample arrays * @param yyn/yxn is number of samples in luma signal * @param cyn/cxn is number of samples in each chroma signal * @param syn/sxn is size of source signal * @param pry/prx is pixel aspect ratio, e.g. 1,1 * @return RGB */ void *YCbCr2RgbScale(long dyn, long dxn, unsigned char RGB[restrict 3][dyn][dxn], long yys, long yxs, unsigned char Y[restrict yys][yxs], long cys, long cxs, unsigned char Cb[restrict cys][cxs], unsigned char Cr[restrict cys][cxs], long yyn, long yxn, long cyn, long cxn, double syn, double sxn, double pry, double prx, struct YCbCr **ycbcr) { long minyys, minyxs, mincys, mincxs; CHECK_LE(yyn, yys); CHECK_LE(yxn, yxs); CHECK_LE(cyn, cys); CHECK_LE(cxn, cxs); LOGF("magikarp2x"); magikarp_start_ = nowl(); minyys = MAX(ceil(syn), MAX(yyn, ceil(dyn * pry))); minyxs = MAX(ceil(sxn), MAX(yxn, ceil(dxn * prx))); mincys = MAX(cyn, ceil(dyn * pry)); mincxs = MAX(cxn, ceil(dxn * prx)); YCbCr2RgbScaler(*ycbcr, dyn, dxn, RGB, MAX(yys, minyys), MAX(yxs, minyxs), (yys >= minyys && yxs >= minyxs ? Y : gc(YCbCrReallocPlane(yys, yxs, Y, minyys, minyxs))), MAX(cys, mincys), MAX(cxs, mincxs), (cys >= mincys && cxs >= mincxs ? Cb : gc(YCbCrReallocPlane(cys, cxs, Cb, mincys, mincxs))), (cys >= mincys && cxs >= mincxs ? Cr : gc(YCbCrReallocPlane(cys, cxs, Cr, mincys, mincxs))), yyn, yxn, cyn, cxn, syn, sxn, pry, prx); return RGB; }