// clang-format off //$ nobt //$ nocpp /** * @file lancir.h * * @brief The self-contained "lancir" inclusion file. * * This is the self-contained inclusion file for the "LANCIR" image resizer, * part of the AVIR library. * * AVIR Copyright (c) 2015-2019 Aleksey Vaneev * * @mainpage * * @section intro_sec Introduction * * Description is available at https://github.com/avaneev/avir * * @section license License * * AVIR License Agreement * * The MIT License (MIT) * * Copyright (c) 2015-2019 Aleksey Vaneev * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ #ifndef AVIR_CLANCIR_INCLUDED #define AVIR_CLANCIR_INCLUDED #include "third_party/avir/notice.h" #include "libc/str/str.h" #include "libc/log/log.h" #include "libc/mem/mem.h" #include "libc/log/check.h" #include "libc/macros.h" #include "libc/math.h" namespace avir { /** * The macro equals to "pi" constant, fills 53-bit floating point mantissa. * Undefined at the end of file. */ #define LANCIR_PI 3.1415926535897932 /** * @brief LANCIR image resizer class. * * The object of this class can be used to resize 1-4 channel images to any * required size. Resizing is performed by utilizing Lanczos filters, with * 8-bit precision. This class offers a kind of "optimal" Lanczos resampling * implementation. * * Object of this class can be allocated on stack. * * Note that object of this class does not free temporary buffers and * variables after the resizeImage() call (until object's destruction), these * buffers are reused on subsequent calls making batch resizing of same-size * images faster. This means resizing is not thread-safe: a separate object * should be created for each thread. */ class CLancIR { private: CLancIR( const CLancIR& ) { // Unsupported. } CLancIR& operator = ( const CLancIR& ) { // Unsupported. return( *this ); } public: CLancIR() : FltBuf( NULL ) , FltBufLen( 0 ) , spv( NULL ) , spvlen( 0 ) { } ~CLancIR() { delete[] FltBuf; delete[] spv; } /** * Function resizes image. * * @param SrcBuf Source image buffer. * @param SrcWidth Source image width. * @param SrcHeight Source image height. * @param SrcScanlineSize Physical size of source scanline in elements * (not bytes). If this value is below 1, SrcWidth * ElCount will be * used as the physical source scanline size. * @param[out] NewBuf Buffer to accept the resized image. Can be equal to * SrcBuf if the size of the resized image is smaller or equal to source * image in size. * @param NewWidth New image width. * @param NewHeight New image height. * @param ElCount The number of elements (channels) used to store each * source and destination pixel (1-4). * @param kx0 Resizing step - horizontal (one output pixel corresponds to * "k" input pixels). A downsizing factor if > 1.0; upsizing factor * if <= 1.0. Multiply by -1 if you would like to bypass "ox" and "oy" * adjustment which is done by default to produce a centered image. If * step value equals 0, the step value will be chosen automatically. * @param ky0 Resizing step - vertical. Same as "kx". * @param ox Start X pixel offset within source image (can be negative). * Positive offset moves the image to the left. * @param oy Start Y pixel offset within source image (can be negative). * Positive offset moves the image to the top. * @tparam T Input and output buffer element's type. Can be uint8_t * (0-255 value range), uint16_t (0-65535 value range), float * (any value range), double (any value range). Larger integer types are * treated as uint16_t. Signed integer types are unsupported. */ template< class T > void resizeImage( const T* const SrcBuf, const int SrcWidth, const int SrcHeight, int SrcScanlineSize, T* const NewBuf, const int NewWidth, const int NewHeight, const int ElCount, const double kx0 = 0.0, const double ky0 = 0.0, double ox = 0.0, double oy = 0.0 ) { if( NewWidth <= 0 || NewHeight <= 0 ) { return; } if( SrcWidth <= 0 || SrcHeight <= 0 ) { handleEmptySrcCornerCase( NewBuf, (size_t) NewWidth * NewHeight * sizeof( T ) ); return; } const double la = 3.0; // Lanczos "a". double kx; double ky; if( kx0 == 0.0 ) { if( NewWidth > SrcWidth ) { kx = (double) ( SrcWidth - 1 ) / ( NewWidth - 1 ); } else { kx = (double) SrcWidth / NewWidth; ox += ( kx - 1.0 ) * 0.5; } } else if( kx0 > 0.0 ) { kx = kx0; if( kx0 > 1.0 ) { ox += ( kx0 - 1.0 ) * 0.5; } } else { kx = -kx0; } if( ky0 == 0.0 ) { if( NewHeight > SrcHeight ) { ky = (double) ( SrcHeight - 1 ) / ( NewHeight - 1 ); } else { ky = (double) SrcHeight / NewHeight; oy += ( ky - 1.0 ) * 0.5; } } else if( ky0 > 0.0 ) { ky = ky0; if( ky0 > 1.0 ) { oy += ( ky0 - 1.0 ) * 0.5; } } else { ky = -ky0; } if( rfh.update( la, kx )) { rsh.reset(); rsv.reset(); } CResizeFilters* rfv; // Pointer to resizing filters for vertical // resizing, may equal to "rfh" if the same stepping is in use. if( ky == kx ) { rfv = &rfh; } else { rfv = &rfv0; if( rfv0.update( la, ky )) { rsv.reset(); } } rsh.update( kx, ox, ElCount, SrcWidth, NewWidth, rfh ); rsv.update( ky, oy, ElCount, SrcHeight, NewHeight, *rfv ); const int NewWidthE = NewWidth * ElCount; if( SrcScanlineSize < 1 ) { SrcScanlineSize = SrcWidth * ElCount; } // Allocate/resize temporary buffer. const size_t FltBufLenNew = (size_t) NewWidthE * (size_t) SrcHeight; if( FltBufLenNew > FltBufLen ) { free( FltBuf ); FltBufLen = FltBufLenNew; FltBuf = (float *) memalign( 32, sizeof(float) * FltBufLen ); CHECK_NOTNULL(FltBuf); } // Perform horizontal resizing. const T* ips = SrcBuf; float* op = FltBuf; size_t i; if( ElCount == 3 ) { for( i = 0; i < SrcHeight; i++ ) { copyScanline3h( ips, rsh, SrcWidth ); resize3( op, NewWidth, rsh.pos, rfh.KernelLen ); ips += SrcScanlineSize; op += NewWidthE; } } else if( ElCount == 1 ) { for( i = 0; i < SrcHeight; i++ ) { copyScanline1h( ips, rsh, SrcWidth ); resize1( op, NewWidth, rsh.pos, rfh.KernelLen ); ips += SrcScanlineSize; op += NewWidthE; } } else if( ElCount == 4 ) { for( i = 0; i < SrcHeight; i++ ) { copyScanline4h( ips, rsh, SrcWidth ); resize4( op, NewWidth, rsh.pos, rfh.KernelLen ); ips += SrcScanlineSize; op += NewWidthE; } } else if( ElCount == 2 ) { for( i = 0; i < SrcHeight; i++ ) { copyScanline2h( ips, rsh, SrcWidth ); resize2( op, NewWidth, rsh.pos, rfh.KernelLen ); ips += SrcScanlineSize; op += NewWidthE; } } // Perform vertical resizing. const int spvlennew = NewHeight * ElCount; if( spvlennew > spvlen ) { free( spv ); spvlen = spvlennew; spv = (float *) memalign( 32, sizeof(float) * spvlen ); } const bool IsIOFloat = ( (T) 0.25 != 0 ); const int Clamp = ( sizeof( T ) == 1 ? 255 : 65535 ); const float* ip = FltBuf; T* opd = NewBuf; if( ElCount == 3 ) { for( i = 0; i < NewWidth; i++ ) { copyScanline3v( ip, rsv, SrcHeight, NewWidthE ); resize3( spv, NewHeight, rsv.pos, rfv -> KernelLen ); copyOutput3( spv, opd, NewHeight, NewWidthE, IsIOFloat, Clamp ); ip += 3; opd += 3; } } else if( ElCount == 1 ) { for( i = 0; i < NewWidth; i++ ) { copyScanline1v( ip, rsv, SrcHeight, NewWidthE ); resize1( spv, NewHeight, rsv.pos, rfv -> KernelLen ); copyOutput1( spv, opd, NewHeight, NewWidthE, IsIOFloat, Clamp ); ip++; opd++; } } else if( ElCount == 4 ) { for( i = 0; i < NewWidth; i++ ) { copyScanline4v( ip, rsv, SrcHeight, NewWidthE ); resize4( spv, NewHeight, rsv.pos, rfv -> KernelLen ); copyOutput4( spv, opd, NewHeight, NewWidthE, IsIOFloat, Clamp ); ip += 4; opd += 4; } } else if( ElCount == 2 ) { for( i = 0; i < NewWidth; i++ ) { copyScanline2v( ip, rsv, SrcHeight, NewWidthE ); resize2( spv, NewHeight, rsv.pos, rfv -> KernelLen ); copyOutput2( spv, opd, NewHeight, NewWidthE, IsIOFloat, Clamp ); ip += 2; opd += 2; } } } protected: float* FltBuf; ///< Intermediate resizing buffer. ///< size_t FltBufLen; ///< Intermediate resizing buffer length. ///< float* spv; ///< Scanline buffer for vertical resizing. ///< unsigned spvlen; ///< Length of "spv". ///< /** * Function rounds a value and applies clamping. * * @param v Value to round and clamp. * @param Clamp High clamp level, low level is 0. */ static int roundclamp( const float v, const int Clamp ) #define roundclamp(V, CLAMP) ((CLAMP)==255?MAX(0, MIN(255, lrintf(V))):(roundclamp)(V, CLAMP)) { if( Clamp == 255 ) { return MAX(0, MIN(255, lrintf(v))); } else { if( v <= 0.0f ) { return( 0 ); } const int vr = (int) ( v + 0.5f ); if( vr > Clamp ) { return( Clamp ); } return( vr ); } } /** * Function performs final output of the resized scanline data to the * destination image buffer. Variants for 1-4-channel image. * * @param ip Input resized scanline. * @param op Output image buffer. * @param l Pixel count. * @param opinc "op" increment, should account ElCount. * @param IsIOFloat "True" if float output and no clamping is necessary. * @param Clamp Clamp high level, used if IsIOFloat is "false". */ template< class T > static void copyOutput1( const float* ip, T* op, int l, const int opinc, const bool IsIOFloat, const int Clamp ) { if( IsIOFloat ) { while( l > 0 ) { op[ 0 ] = (T) ip[ 0 ]; ip++; op += opinc; l--; } } else { while( l > 0 ) { op[ 0 ] = (T) roundclamp( ip[ 0 ], Clamp ); ip++; op += opinc; l--; } } } template< class T > static void copyOutput2( const float* ip, T* op, int l, const int opinc, const bool IsIOFloat, const int Clamp ) { if( IsIOFloat ) { while( l > 0 ) { op[ 0 ] = (T) ip[ 0 ]; op[ 1 ] = (T) ip[ 1 ]; ip += 2; op += opinc; l--; } } else { while( l > 0 ) { op[ 0 ] = (T) roundclamp( ip[ 0 ], Clamp ); op[ 1 ] = (T) roundclamp( ip[ 1 ], Clamp ); ip += 2; op += opinc; l--; } } } template< class T > static void copyOutput3( const float* ip, T* op, int l, const int opinc, const bool IsIOFloat, const int Clamp ) { if( IsIOFloat ) { while( l > 0 ) { op[ 0 ] = (T) ip[ 0 ]; op[ 1 ] = (T) ip[ 1 ]; op[ 2 ] = (T) ip[ 2 ]; ip += 3; op += opinc; l--; } } else { while( l > 0 ) { op[ 0 ] = (T) roundclamp( ip[ 0 ], Clamp ); op[ 1 ] = (T) roundclamp( ip[ 1 ], Clamp ); op[ 2 ] = (T) roundclamp( ip[ 2 ], Clamp ); ip += 3; op += opinc; l--; } } } template< class T > static void copyOutput4( const float* ip, T* op, int l, const int opinc, const bool IsIOFloat, const int Clamp ) { if( IsIOFloat ) { while( l > 0 ) { op[ 0 ] = (T) ip[ 0 ]; op[ 1 ] = (T) ip[ 1 ]; op[ 2 ] = (T) ip[ 2 ]; op[ 3 ] = (T) ip[ 3 ]; ip += 4; op += opinc; l--; } } else { while( l > 0 ) { op[ 0 ] = (T) roundclamp( ip[ 0 ], Clamp ); op[ 1 ] = (T) roundclamp( ip[ 1 ], Clamp ); op[ 2 ] = (T) roundclamp( ip[ 2 ], Clamp ); op[ 3 ] = (T) roundclamp( ip[ 3 ], Clamp ); ip += 4; op += opinc; l--; } } } class CResizeScanline; /** * Class implements fractional delay filter bank calculation. */ class CResizeFilters { friend class CResizeScanline; public: int KernelLen; ///< Resampling filter kernel length, taps. Available ///< after the update() function call. ///< CResizeFilters() : FilterBuf( NULL ) , Filters( NULL ) , Prevla( -1.0 ) , Prevk( -1.0 ) , FilterBufLen( 0 ) , FiltersLen( 0 ) { } ~CResizeFilters() { free( FilterBuf ); delete[] Filters; } /** * Function updates the resizing filter bank. * * @param la Lanczos "a" parameter value. * @param k Resizing step. * @return "True" if update occured and resizing positions should be * updated unconditionally. */ bool update( const double la, const double k ) { if( la == Prevla && k == Prevk ) { return( false ); } Prevla = la; Prevk = k; NormFreq = ( k <= 1.0 ? 1.0 : 1.0 / k ); Freq = LANCIR_PI * NormFreq; if( Freq > LANCIR_PI ) { Freq = LANCIR_PI; } FreqA = LANCIR_PI * NormFreq / la; Len2 = la / NormFreq; fl2 = (int) ceil( Len2 ); KernelLen = fl2 + fl2; FracCount = 607; // For 8-bit precision. FracFill = 0; const int FilterBufLenNew = FracCount * KernelLen; if( FilterBufLenNew > FilterBufLen ) { free( FilterBuf ); FilterBufLen = FilterBufLenNew; FilterBuf = (float *) memalign( 32, sizeof(float) * FilterBufLen ); CHECK_NOTNULL(FilterBuf); } if( FracCount > FiltersLen ) { delete[] Filters; FiltersLen = FracCount; Filters = new float*[ FiltersLen ]; } memset( Filters, 0, FracCount * sizeof( float* )); return( true ); } /** * Function returns filter at the specified fractional offset. This * function can only be called before the prior update() function * call. * * @param x Fractional offset, [0; 1). */ float* getFilter( const double x ) { const int Frac = (int) floor( x * FracCount ); if( Filters[ Frac ] == NULL ) { Filters[ Frac ] = FilterBuf + FracFill * KernelLen; FracFill++; makeFilter( 1.0 - (double) Frac / FracCount, Filters[ Frac ]); normalizeFilter( Filters[ Frac ]); } return( Filters[ Frac ]); } protected: double NormFreq; ///< Normalized frequency of the filter. ///< double Freq; ///< Circular frequency of the filter. ///< double FreqA; ///< Circular frequency of the window function. ///< double Len2; ///< Half resampling filter length, unrounded. ///< int fl2; ///< Half resampling length, integer. ///< int FracCount; ///< The number of fractional positions for which ///< filters are created. ///< int FracFill; ///< The number of fractional positions filled in the ///< filter buffer. ///< float* FilterBuf; ///< Buffer that holds all filters. ///< float** Filters; ///< Fractional delay filters for all positions. ///< Filter pointers equal NULL if filter was not yet created. ///< double Prevla; ///< Previous "la". ///< double Prevk; ///< Previous "k". ///< int FilterBufLen; ///< Allocated length of FilterBuf in elements. ///< int FiltersLen; ///< Allocated length of Filters in elements. ///< /** * @brief Sine signal generator class. * * Class implements sine signal generator without biasing, with * constructor-based initalization only. This generator uses * oscillator instead of "sin" function. */ class CSinGen { public: /** * Constructor initializes *this sine signal generator. * * @param si Sine function increment, in radians. * @param ph Starting phase, in radians. Add 0.5 * LANCIR_PI for * cosine function. * @param g Gain value. */ CSinGen( const double si, const double ph, const double g = 1.0 ) : svalue1( sin( ph ) * g ) , svalue2( sin( ph - si ) * g ) , sincr( 2.0 * cos( si )) { } /** * @return The next value of the sine function, without biasing. */ double generate() { const double res = svalue1; svalue1 = sincr * res - svalue2; svalue2 = res; return( res ); } private: double svalue1; ///< Current sine value. ///< double svalue2; ///< Previous sine value. ///< double sincr; ///< Sine value increment. ///< }; /** * Function creates filter for the specified fractional delay. The * update() function should be called prior to calling this function. * * @param FracDelay Fractional delay, 0 to 1, inclusive. * @param[out] Output filter buffer. * @tparam T Output buffer type. */ template< class T > void makeFilter( const double FracDelay, T* op ) const { CSinGen f( Freq, Freq * ( FracDelay - fl2 )); CSinGen fw( FreqA, FreqA * ( FracDelay - fl2 ), Len2 ); int t = -fl2; if( t + FracDelay < -Len2 ) { f.generate(); fw.generate(); *op = (T) 0.0; op++; t++; } int mt = ( FracDelay >= 1.0 - 1e-13 && FracDelay <= 1.0 + 1e-13 ? -1 : 0 ); while( t < mt ) { double ut = ( t + FracDelay ) * LANCIR_PI; *op = (T) ( f.generate() * fw.generate() / ( ut * ut )); op++; t++; } double ut = t + FracDelay; if( fabs( ut ) <= 1e-13 ) { *op = (T) NormFreq; f.generate(); fw.generate(); } else { ut *= LANCIR_PI; *op = (T) ( f.generate() * fw.generate() / ( ut * ut )); } mt = fl2 - 2; while( t < mt ) { op++; t++; ut = ( t + FracDelay ) * LANCIR_PI; *op = (T) ( f.generate() * fw.generate() / ( ut * ut )); } op++; t++; ut = t + FracDelay; if( ut > Len2 ) { *op = (T) 0.0; } else { ut *= LANCIR_PI; *op = (T) ( f.generate() * fw.generate() / ( ut * ut )); } } /** * Function normalizes the specified filter so that it has unity gain * at DC. * * @param p Filter buffer pointer. * @tparam T Filter buffer type. */ template< class T > void normalizeFilter( T* const p ) const { double s = 0.0; size_t i; for( i = 0; i < KernelLen; i++ ) { s += p[ i ]; } s = 1.0 / s; for( i = 0; i < KernelLen; i++ ) { p[ i ] = (T) ( p[ i ] * s ); } } }; /** * Structure defines source scanline positioning and filters for each * destination pixel. */ struct CResizePos { const float* ip; ///< Source image pixel pointer. ///< float* flt; ///< Fractional delay filter. ///< }; /** * Class contains resizing positioning and a temporary scanline buffer, * prepares source scanline positions for resize filtering. */ class CResizeScanline { public: int padl; ///< Left-padding (in pixels) required for source scanline. ///< Available after the update() function call. ///< int padr; ///< Right-padding (in pixels) required for source scanline. ///< Available after the update() function call. ///< float* sp; ///< Source scanline buffer, with "padl" and "padr" ///< padding. ///< CResizePos* pos; ///< Source scanline pointers (point to "sp") ///< and filters for each destination pixel position. Available ///< after the update() function call. ///< CResizeScanline() : sp( NULL ) , pos( NULL ) , PrevSrcLen( -1 ) , PrevDstLen( -1 ) , Prevk( 0.0 ) , Prevo( 0.0 ) , PrevElCount( 0 ) , splen( 0 ) , poslen( 0 ) { } ~CResizeScanline() { free( sp ); delete[] pos; } /** * Function "resets" *this object so that the next update() call fully * updates the position buffer. Reset is necessary if the filter * object was updated. */ void reset() { PrevSrcLen = -1; } /** * Function updates resizing positions, updates "padl", "padr" and * "pos" buffer. * * @param k Resizing step. * @param o0 Initial source image offset. * @param SrcLen Source image scanline length, used to create a * scanline buffer without length pre-calculation. * @param DstLen Destination image scanline length. * @param rf Resizing filters object. */ void update( const double k, const double o0, const int ElCount, const int SrcLen, const size_t DstLen, CResizeFilters& rf ) { if( SrcLen == PrevSrcLen && DstLen == PrevDstLen && k == Prevk && o0 == Prevo && ElCount == PrevElCount ) { return; } PrevSrcLen = SrcLen; PrevDstLen = DstLen; Prevk = k; Prevo = o0; PrevElCount = ElCount; const int fl2m1 = rf.fl2 - 1; padl = fl2m1 - (int) floor( o0 ); if( padl < 0 ) { padl = 0; } padr = (int) floor( o0 + k * ( DstLen - 1 )) + rf.fl2 + 1 - SrcLen; if( padr < 0 ) { padr = 0; } const int splennew = ( padl + SrcLen + padr ) * ElCount; if( splennew > splen ) { free( sp ); splen = splennew; sp = (float *) memalign( 32, sizeof(float) * splen ); CHECK_NOTNULL(sp); } if( DstLen > poslen ) { delete[] pos; poslen = DstLen; pos = new CResizePos[ poslen ]; } const float* const spo = sp + ( padl - fl2m1 ) * ElCount; size_t i; for( i = 0; i < DstLen; i++ ) { const double o = o0 + k * i; const int ix = (int) floor( o ); pos[ i ].ip = spo + ix * ElCount; pos[ i ].flt = rf.getFilter( o - ix ); } } protected: int PrevSrcLen; ///< Previous SrcLen. ///< int PrevDstLen; ///< Previous DstLen. ///< double Prevk; ///< Previous "k". ///< double Prevo; ///< Previous "o". ///< int PrevElCount; ///< Previous pixel element count. ///< int splen; ///< Allocated "sp" buffer length. ///< int poslen; ///< Allocated "pos" buffer length. ///< }; CResizeFilters rfh; ///< Resizing filters for horizontal resizing. ///< CResizeFilters rfv0; ///< Resizing filters for vertical resizing (may not ///< be in use). ///< CResizeScanline rsh; ///< Horizontal resize scanline. ///< CResizeScanline rsv; ///< Vertical resize scanline. ///< /** * Function copies scanline from the source buffer in its native format * to internal scanline buffer, in preparation for horizontal resizing. * Variants for 1-4-channel images. * * @param ip Source scanline buffer. * @param rs Scanline resizing positions object. * @param l Source scanline length, in pixels. * @param ipinc "ip" increment per pixel. */ template< class T > static void copyScanline1h( const T* ip, CResizeScanline& rs, const int l ) { float* op = rs.sp; int i; DCHECK_ALIGNED(32, op); for( i = 0; i < rs.padl; i++ ) { op[ 0 ] = ip[ 0 ]; op++; } for( i = 0; i < l - 1; i++ ) { op[ 0 ] = ip[ 0 ]; ip++; op++; } for( i = 0; i <= rs.padr; i++ ) { op[ 0 ] = ip[ 0 ]; op++; } } template< class T > static void copyScanline2h( const T* ip, CResizeScanline& rs, const int l ) { float* op = rs.sp; int i; for( i = 0; i < rs.padl; i++ ) { op[ 0 ] = ip[ 0 ]; op[ 1 ] = ip[ 1 ]; op += 2; } for( i = 0; i < l - 1; i++ ) { op[ 0 ] = ip[ 0 ]; op[ 1 ] = ip[ 1 ]; ip += 2; op += 2; } for( i = 0; i <= rs.padr; i++ ) { op[ 0 ] = ip[ 0 ]; op[ 1 ] = ip[ 1 ]; op += 2; } } template< class T > static void copyScanline3h( const T* ip, CResizeScanline& rs, const int l ) { float* op = rs.sp; int i; for( i = 0; i < rs.padl; i++ ) { op[ 0 ] = ip[ 0 ]; op[ 1 ] = ip[ 1 ]; op[ 2 ] = ip[ 2 ]; op += 3; } for( i = 0; i < l - 1; i++ ) { op[ 0 ] = ip[ 0 ]; op[ 1 ] = ip[ 1 ]; op[ 2 ] = ip[ 2 ]; ip += 3; op += 3; } for( i = 0; i <= rs.padr; i++ ) { op[ 0 ] = ip[ 0 ]; op[ 1 ] = ip[ 1 ]; op[ 2 ] = ip[ 2 ]; op += 3; } } template< class T > static void copyScanline4h( const T* ip, CResizeScanline& rs, const size_t l ) { float* op = rs.sp; size_t i; for( i = 0; i < rs.padl; i++ ) { op[ 0 ] = ip[ 0 ]; op[ 1 ] = ip[ 1 ]; op[ 2 ] = ip[ 2 ]; op[ 3 ] = ip[ 3 ]; op += 4; } for( i = 0; i < l - 1; i++ ) { op[ 0 ] = ip[ 0 ]; op[ 1 ] = ip[ 1 ]; op[ 2 ] = ip[ 2 ]; op[ 3 ] = ip[ 3 ]; ip += 4; op += 4; } for( i = 0; i <= rs.padr; i++ ) { op[ 0 ] = ip[ 0 ]; op[ 1 ] = ip[ 1 ]; op[ 2 ] = ip[ 2 ]; op[ 3 ] = ip[ 3 ]; op += 4; } } /** * Function copies scanline from the source buffer in its native format * to internal scanline buffer, in preparation for vertical resizing. * Variants for 1-4-channel images. * * @param ip Source scanline buffer. * @param rs Scanline resizing positions object. * @param l Source scanline length, in pixels. * @param ipinc "ip" increment per pixel. */ template< class T > static void copyScanline1v( const T* ip, CResizeScanline& rs, const int l, const int ipinc ) { float* op = rs.sp; int i; DCHECK_ALIGNED(32, op); for( i = 0; i < rs.padl; i++ ) { op[ 0 ] = ip[ 0 ]; op++; } for( i = 0; i < l - 1; i++ ) { op[ 0 ] = ip[ 0 ]; ip += ipinc; op++; } for( i = 0; i <= rs.padr; i++ ) { op[ 0 ] = ip[ 0 ]; op++; } } template< class T > static void copyScanline2v( const T* ip, CResizeScanline& rs, const int l, const int ipinc ) { float* op = rs.sp; int i; for( i = 0; i < rs.padl; i++ ) { op[ 0 ] = ip[ 0 ]; op[ 1 ] = ip[ 1 ]; op += 2; } for( i = 0; i < l - 1; i++ ) { op[ 0 ] = ip[ 0 ]; op[ 1 ] = ip[ 1 ]; ip += ipinc; op += 2; } for( i = 0; i <= rs.padr; i++ ) { op[ 0 ] = ip[ 0 ]; op[ 1 ] = ip[ 1 ]; op += 2; } } template< class T > static void copyScanline3v( const T* ip, CResizeScanline& rs, const int l, const int ipinc ) { float* op = rs.sp; int i; for( i = 0; i < rs.padl; i++ ) { op[ 0 ] = ip[ 0 ]; op[ 1 ] = ip[ 1 ]; op[ 2 ] = ip[ 2 ]; op += 3; } for( i = 0; i < l - 1; i++ ) { op[ 0 ] = ip[ 0 ]; op[ 1 ] = ip[ 1 ]; op[ 2 ] = ip[ 2 ]; ip += ipinc; op += 3; } for( i = 0; i <= rs.padr; i++ ) { op[ 0 ] = ip[ 0 ]; op[ 1 ] = ip[ 1 ]; op[ 2 ] = ip[ 2 ]; op += 3; } } template< class T > static void copyScanline4v( const T* ip, CResizeScanline& rs, const size_t l, const int ipinc ) { float* op = rs.sp; size_t i; for( i = 0; i < rs.padl; i++ ) { op[ 0 ] = ip[ 0 ]; op[ 1 ] = ip[ 1 ]; op[ 2 ] = ip[ 2 ]; op[ 3 ] = ip[ 3 ]; op += 4; } for( i = 0; i < l - 1; i++ ) { op[ 0 ] = ip[ 0 ]; op[ 1 ] = ip[ 1 ]; op[ 2 ] = ip[ 2 ]; op[ 3 ] = ip[ 3 ]; ip += ipinc; op += 4; } for( i = 0; i <= rs.padr; i++ ) { op[ 0 ] = ip[ 0 ]; op[ 1 ] = ip[ 1 ]; op[ 2 ] = ip[ 2 ]; op[ 3 ] = ip[ 3 ]; op += 4; } } #define LANCIR_LF_PRE \ CResizePos* const rpe = rp + DstLen; \ while( rp < rpe ) \ { \ const float* ip = rp -> ip; \ const float* const flt = rp -> flt; #define LANCIR_LF_POST \ rp++; \ } static void resize1_kl6( float* op, int DstLen, CResizePos* rp ) { LANCIR_LF_PRE op[ 0 ] = flt[ 0 ] * ip[ 0 ] + flt[ 1 ] * ip[ 1 ] + flt[ 2 ] * ip[ 2 ] + flt[ 3 ] * ip[ 3 ] + flt[ 4 ] * ip[ 4 ] + flt[ 5 ] * ip[ 5 ]; op++; LANCIR_LF_POST } static void resize1_kln( float* op, int DstLen, CResizePos* rp, const int kl ) { LANCIR_LF_PRE float sum = 0.0; int i; for( i = 0; i < kl; i++ ) { sum += flt[ i ] * ip[ i ]; } op[ 0 ] = sum; op++; LANCIR_LF_POST } /** * Function performs internal scanline resizing. Variants for 1-4-channel * images. * * @param op Destination buffer. * @param DstLen Destination length, in pixels. * @param rp Resizing positions and filters. * @param kl Filter kernel length, in taps. */ static void resize1( float* op, int DstLen, CResizePos* rp, const int kl ) { if( kl == 6 ) { resize1_kl6( op, DstLen, rp ); } else { resize1_kln( op, DstLen, rp, kl ); } } static void resize2( float* op, int DstLen, CResizePos* rp, const int kl ) { if( kl == 6 ) { LANCIR_LF_PRE op[ 0 ] = flt[ 0 ] * ip[ 0 ] + flt[ 1 ] * ip[ 2 ] + flt[ 2 ] * ip[ 4 ] + flt[ 3 ] * ip[ 6 ] + flt[ 4 ] * ip[ 8 ] + flt[ 5 ] * ip[ 10 ]; op[ 1 ] = flt[ 0 ] * ip[ 1 ] + flt[ 1 ] * ip[ 3 ] + flt[ 2 ] * ip[ 5 ] + flt[ 3 ] * ip[ 7 ] + flt[ 4 ] * ip[ 9 ] + flt[ 5 ] * ip[ 11 ]; op += 2; LANCIR_LF_POST } else { LANCIR_LF_PRE float sum[ 2 ]; sum[ 0 ] = 0.0; sum[ 1 ] = 0.0; int i; for( i = 0; i < kl; i++ ) { const float xx = flt[ i ]; sum[ 0 ] += xx * ip[ 0 ]; sum[ 1 ] += xx * ip[ 1 ]; ip += 2; } op[ 0 ] = sum[ 0 ]; op[ 1 ] = sum[ 1 ]; op += 2; LANCIR_LF_POST } } static void resize3( float* op, int DstLen, CResizePos* rp, const int kl ) { if( kl == 6 ) { LANCIR_LF_PRE op[ 0 ] = flt[ 0 ] * ip[ 0 ] + flt[ 1 ] * ip[ 3 ] + flt[ 2 ] * ip[ 6 ] + flt[ 3 ] * ip[ 9 ] + flt[ 4 ] * ip[ 12 ] + flt[ 5 ] * ip[ 15 ]; op[ 1 ] = flt[ 0 ] * ip[ 1 ] + flt[ 1 ] * ip[ 4 ] + flt[ 2 ] * ip[ 7 ] + flt[ 3 ] * ip[ 10 ] + flt[ 4 ] * ip[ 13 ] + flt[ 5 ] * ip[ 16 ]; op[ 2 ] = flt[ 0 ] * ip[ 2 ] + flt[ 1 ] * ip[ 5 ] + flt[ 2 ] * ip[ 8 ] + flt[ 3 ] * ip[ 11 ] + flt[ 4 ] * ip[ 14 ] + flt[ 5 ] * ip[ 17 ]; op += 3; LANCIR_LF_POST } else { LANCIR_LF_PRE float sum[ 3 ]; sum[ 0 ] = 0.0; sum[ 1 ] = 0.0; sum[ 2 ] = 0.0; int i; for( i = 0; i < kl; i++ ) { const float xx = flt[ i ]; sum[ 0 ] += xx * ip[ 0 ]; sum[ 1 ] += xx * ip[ 1 ]; sum[ 2 ] += xx * ip[ 2 ]; ip += 3; } op[ 0 ] = sum[ 0 ]; op[ 1 ] = sum[ 1 ]; op[ 2 ] = sum[ 2 ]; op += 3; LANCIR_LF_POST } } static optimizespeed void resize4( float* op, int DstLen, CResizePos* rp, const size_t kl ) { LANCIR_LF_PRE float sum[ 4 ]; sum[ 0 ] = 0.0; sum[ 1 ] = 0.0; sum[ 2 ] = 0.0; sum[ 3 ] = 0.0; size_t i; for( i = 0; i < kl; i++ ) { const float xx = flt[ i ]; sum[ 0 ] += xx * ip[ 0 ]; sum[ 1 ] += xx * ip[ 1 ]; sum[ 2 ] += xx * ip[ 2 ]; sum[ 3 ] += xx * ip[ 3 ]; ip += 4; } op[ 0 ] = sum[ 0 ]; op[ 1 ] = sum[ 1 ]; op[ 2 ] = sum[ 2 ]; op[ 3 ] = sum[ 3 ]; op += 4; LANCIR_LF_POST } #undef LANCIR_LF_PRE #undef LANCIR_LF_POST static relegated nooptimize noinline void handleEmptySrcCornerCase( void * const NewBuf, const size_t Size ) { memset( NewBuf, 0, Size ); } }; #undef LANCIR_PI } // namespace avir #endif // AVIR_CLANCIR_INCLUDED