cosmopolitan/third_party/avir/lancir.h

// clang-format off
//$ nobt
//$ nocpp

/**
 * @file lancir.h
 *
 * @brief The self-contained "lancir" inclusion file.
 *
 * This is the self-contained inclusion file for the "LANCIR" image resizer,
 * part of the AVIR library.
 *
 * AVIR Copyright (c) 2015-2019 Aleksey Vaneev
 *
 * @mainpage
 *
 * @section intro_sec Introduction
 *
 * Description is available at https://github.com/avaneev/avir
 *
 * @section license License
 *
 * AVIR License Agreement
 *
 * The MIT License (MIT)
 *
 * Copyright (c) 2015-2019 Aleksey Vaneev
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

#ifndef AVIR_CLANCIR_INCLUDED
#define AVIR_CLANCIR_INCLUDED

#include "third_party/avir/notice.h"
#include "libc/str/str.h"
#include "libc/log/log.h"
#include "libc/mem/mem.h"
#include "libc/log/check.h"
#include "libc/macros.h"
#include "libc/math.h"

namespace avir {

/**
 * The macro equals to "pi" constant, fills 53-bit floating point mantissa.
 * Undefined at the end of file.
 */

#define LANCIR_PI 3.1415926535897932

/**
 * @brief LANCIR image resizer class.
 *
 * The object of this class can be used to resize 1-4 channel images to any
 * required size. Resizing is performed by utilizing Lanczos filters, with
 * 8-bit precision. This class offers a kind of "optimal" Lanczos resampling
 * implementation.
 *
 * Object of this class can be allocated on stack.
 *
 * Note that object of this class does not free temporary buffers and
 * variables after the resizeImage() call (until object's destruction), these
 * buffers are reused on subsequent calls making batch resizing of same-size
 * images faster. This means resizing is not thread-safe: a separate object
 * should be created for each thread.
 */

class CLancIR
{
private:
	CLancIR( const CLancIR& )
	{
		// Unsupported.
	}

	CLancIR& operator = ( const CLancIR& )
	{
		// Unsupported.
		return( *this );
	}

public:
	CLancIR()
		: FltBuf( NULL )
		, FltBufLen( 0 )
		, spv( NULL )
		, spvlen( 0 )
	{
	}

	~CLancIR()
	{
		delete[] FltBuf;
		delete[] spv;
	}

	/**
	 * Function resizes image.
	 *
	 * @param SrcBuf Source image buffer.
	 * @param SrcWidth Source image width.
	 * @param SrcHeight Source image height.
	 * @param SrcScanlineSize Physical size of source scanline in elements
	 * (not bytes). If this value is below 1, SrcWidth * ElCount will be
	 * used as the physical source scanline size.
	 * @param[out] NewBuf Buffer to accept the resized image. Can be equal to
	 * SrcBuf if the size of the resized image is smaller or equal to source
	 * image in size.
	 * @param NewWidth New image width.
	 * @param NewHeight New image height.
	 * @param ElCount The number of elements (channels) used to store each
	 * source and destination pixel (1-4).
	 * @param kx0 Resizing step - horizontal (one output pixel corresponds to
	 * "k" input pixels). A downsizing factor if > 1.0; upsizing factor
	 * if <= 1.0. Multiply by -1 if you would like to bypass "ox" and "oy"
	 * adjustment which is done by default to produce a centered image. If
	 * step value equals 0, the step value will be chosen automatically.
	 * @param ky0 Resizing step - vertical. Same as "kx".
	 * @param ox Start X pixel offset within source image (can be negative).
	 * Positive offset moves the image to the left.
	 * @param oy Start Y pixel offset within source image (can be negative).
	 * Positive offset moves the image to the top.
	 * @tparam T Input and output buffer element's type. Can be uint8_t
	 * (0-255 value range), uint16_t (0-65535 value range), float
	 * (any value range), double (any value range). Larger integer types are
	 * treated as uint16_t. Signed integer types are unsupported.
	 */

	template< class T >
	void resizeImage( const T* const SrcBuf, const int SrcWidth,
		const int SrcHeight, int SrcScanlineSize, T* const NewBuf,
		const int NewWidth, const int NewHeight, const int ElCount,
		const double kx0 = 0.0, const double ky0 = 0.0, double ox = 0.0,
		double oy = 0.0 )
	{
		if( NewWidth <= 0 || NewHeight <= 0 )
		{
			return;
		}

		if( SrcWidth <= 0 || SrcHeight <= 0 )
		{
			handleEmptySrcCornerCase( NewBuf, (size_t) NewWidth * NewHeight * sizeof( T ) );
			return;
		}

		const double la = 3.0; // Lanczos "a".
		double kx;
		double ky;

		if( kx0 == 0.0 )
		{
			if( NewWidth > SrcWidth )
			{
				kx = (double) ( SrcWidth - 1 ) / ( NewWidth - 1 );
			}
			else
			{
				kx = (double) SrcWidth / NewWidth;
				ox += ( kx - 1.0 ) * 0.5;
			}
		}
		else
		if( kx0 > 0.0 )
		{
			kx = kx0;

			if( kx0 > 1.0 )
			{
				ox += ( kx0 - 1.0 ) * 0.5;
			}
		}
		else
		{
			kx = -kx0;
		}

		if( ky0 == 0.0 )
		{
			if( NewHeight > SrcHeight )
			{
				ky = (double) ( SrcHeight - 1 ) / ( NewHeight - 1 );
			}
			else
			{
				ky = (double) SrcHeight / NewHeight;
				oy += ( ky - 1.0 ) * 0.5;
			}
		}
		else
		if( ky0 > 0.0 )
		{
			ky = ky0;

			if( ky0 > 1.0 )
			{
				oy += ( ky0 - 1.0 ) * 0.5;
			}
		}
		else
		{
			ky = -ky0;
		}

		if( rfh.update( la, kx ))
		{
			rsh.reset();
			rsv.reset();
		}

		CResizeFilters* rfv; // Pointer to resizing filters for vertical
			// resizing, may equal to "rfh" if the same stepping is in use.

		if( ky == kx )
		{
			rfv = &rfh;
		}
		else
		{
			rfv = &rfv0;

			if( rfv0.update( la, ky ))
			{
				rsv.reset();
			}
		}

		rsh.update( kx, ox, ElCount, SrcWidth, NewWidth, rfh );
		rsv.update( ky, oy, ElCount, SrcHeight, NewHeight, *rfv );

		const int NewWidthE = NewWidth * ElCount;

		if( SrcScanlineSize < 1 )
		{
			SrcScanlineSize = SrcWidth * ElCount;
		}

		// Allocate/resize temporary buffer.

		const size_t FltBufLenNew = (size_t) NewWidthE * (size_t) SrcHeight;

		if( FltBufLenNew > FltBufLen )
		{
			free( FltBuf );
			FltBufLen = FltBufLenNew;
			FltBuf = (float *) memalign( 32, sizeof(float) * FltBufLen );
			CHECK_NOTNULL(FltBuf);
		}

		// Perform horizontal resizing.

		const T* ips = SrcBuf;
		float* op = FltBuf;
		size_t i;

		if( ElCount == 3 )
		{
			for( i = 0; i < SrcHeight; i++ )
			{
				copyScanline3h( ips, rsh, SrcWidth );
				resize3( op, NewWidth, rsh.pos, rfh.KernelLen );
				ips += SrcScanlineSize;
				op += NewWidthE;
			}
		}
		else
		if( ElCount == 1 )
		{
			for( i = 0; i < SrcHeight; i++ )
			{
				copyScanline1h( ips, rsh, SrcWidth );
				resize1( op, NewWidth, rsh.pos, rfh.KernelLen );
				ips += SrcScanlineSize;
				op += NewWidthE;
			}
		}
		else
		if( ElCount == 4 )
		{
			for( i = 0; i < SrcHeight; i++ )
			{
				copyScanline4h( ips, rsh, SrcWidth );
				resize4( op, NewWidth, rsh.pos, rfh.KernelLen );
				ips += SrcScanlineSize;
				op += NewWidthE;
			}
		}
		else
		if( ElCount == 2 )
		{
			for( i = 0; i < SrcHeight; i++ )
			{
				copyScanline2h( ips, rsh, SrcWidth );
				resize2( op, NewWidth, rsh.pos, rfh.KernelLen );
				ips += SrcScanlineSize;
				op += NewWidthE;
			}
		}

		// Perform vertical resizing.

		const int spvlennew = NewHeight * ElCount;

		if( spvlennew > spvlen )
		{
			free( spv );
			spvlen = spvlennew;
			spv = (float *) memalign( 32, sizeof(float) * spvlen );
		}

		const bool IsIOFloat = ( (T) 0.25 != 0 );
		const int Clamp = ( sizeof( T ) == 1 ? 255 : 65535 );
		const float* ip = FltBuf;
		T* opd = NewBuf;

		if( ElCount == 3 )
		{
			for( i = 0; i < NewWidth; i++ )
			{
				copyScanline3v( ip, rsv, SrcHeight, NewWidthE );
				resize3( spv, NewHeight, rsv.pos, rfv -> KernelLen );
				copyOutput3( spv, opd, NewHeight, NewWidthE, IsIOFloat,
					Clamp );

				ip += 3;
				opd += 3;
			}
		}
		else
		if( ElCount == 1 )
		{
			for( i = 0; i < NewWidth; i++ )
			{
				copyScanline1v( ip, rsv, SrcHeight, NewWidthE );
				resize1( spv, NewHeight, rsv.pos, rfv -> KernelLen );
				copyOutput1( spv, opd, NewHeight, NewWidthE, IsIOFloat,
					Clamp );

				ip++;
				opd++;
			}
		}
		else
		if( ElCount == 4 )
		{
			for( i = 0; i < NewWidth; i++ )
			{
				copyScanline4v( ip, rsv, SrcHeight, NewWidthE );
				resize4( spv, NewHeight, rsv.pos, rfv -> KernelLen );
				copyOutput4( spv, opd, NewHeight, NewWidthE, IsIOFloat,
					Clamp );

				ip += 4;
				opd += 4;
			}
		}
		else
		if( ElCount == 2 )
		{
			for( i = 0; i < NewWidth; i++ )
			{
				copyScanline2v( ip, rsv, SrcHeight, NewWidthE );
				resize2( spv, NewHeight, rsv.pos, rfv -> KernelLen );
				copyOutput2( spv, opd, NewHeight, NewWidthE, IsIOFloat,
					Clamp );

				ip += 2;
				opd += 2;
			}
		}
	}

protected:
	float* FltBuf; ///< Intermediate resizing buffer.
		///<
	size_t FltBufLen; ///< Intermediate resizing buffer length.
		///<
	float* spv; ///< Scanline buffer for vertical resizing.
		///<
	unsigned spvlen; ///< Length of "spv".
		///<

	/**
	 * Function rounds a value and applies clamping.
	 *
	 * @param v Value to round and clamp.
	 * @param Clamp High clamp level, low level is 0.
	 */
	static int roundclamp( const float v, const int Clamp )
#define roundclamp(V, CLAMP) ((CLAMP)==255?MAX(0, MIN(255, lrintf(V))):(roundclamp)(V, CLAMP))
	{
		if( Clamp == 255 ) {
			return MAX(0, MIN(255, lrintf(v)));
		} else {
			if( v <= 0.0f )
			{
				return( 0 );
			}
			const int vr = (int) ( v + 0.5f );
			if( vr > Clamp )
			{
				return( Clamp );
			}
			return( vr );
		}
	}

	/**
	 * Function performs final output of the resized scanline data to the
	 * destination image buffer. Variants for 1-4-channel image.
	 *
	 * @param ip Input resized scanline.
	 * @param op Output image buffer.
	 * @param l Pixel count.
	 * @param opinc "op" increment, should account ElCount.
	 * @param IsIOFloat "True" if float output and no clamping is necessary.
	 * @param Clamp Clamp high level, used if IsIOFloat is "false".
	 */

	template< class T >
	static void copyOutput1( const float* ip, T* op, int l, const int opinc,
		const bool IsIOFloat, const int Clamp )
	{
		if( IsIOFloat )
		{
			while( l > 0 )
			{
				op[ 0 ] = (T) ip[ 0 ];
				ip++;
				op += opinc;
				l--;
			}
		}
		else
		{
			while( l > 0 )
			{
				op[ 0 ] = (T) roundclamp( ip[ 0 ], Clamp );
				ip++;
				op += opinc;
				l--;
			}
		}
	}

	template< class T >
	static void copyOutput2( const float* ip, T* op, int l, const int opinc,
		const bool IsIOFloat, const int Clamp )
	{
		if( IsIOFloat )
		{
			while( l > 0 )
			{
				op[ 0 ] = (T) ip[ 0 ];
				op[ 1 ] = (T) ip[ 1 ];
				ip += 2;
				op += opinc;
				l--;
			}
		}
		else
		{
			while( l > 0 )
			{
				op[ 0 ] = (T) roundclamp( ip[ 0 ], Clamp );
				op[ 1 ] = (T) roundclamp( ip[ 1 ], Clamp );
				ip += 2;
				op += opinc;
				l--;
			}
		}
	}

	template< class T >
	static void copyOutput3( const float* ip, T* op, int l, const int opinc,
		const bool IsIOFloat, const int Clamp )
	{
		if( IsIOFloat )
		{
			while( l > 0 )
			{
				op[ 0 ] = (T) ip[ 0 ];
				op[ 1 ] = (T) ip[ 1 ];
				op[ 2 ] = (T) ip[ 2 ];
				ip += 3;
				op += opinc;
				l--;
			}
		}
		else
		{
			while( l > 0 )
			{
				op[ 0 ] = (T) roundclamp( ip[ 0 ], Clamp );
				op[ 1 ] = (T) roundclamp( ip[ 1 ], Clamp );
				op[ 2 ] = (T) roundclamp( ip[ 2 ], Clamp );
				ip += 3;
				op += opinc;
				l--;
			}
		}
	}

	template< class T >
	static void copyOutput4( const float* ip, T* op, int l, const int opinc,
		const bool IsIOFloat, const int Clamp )
	{
		if( IsIOFloat )
		{
			while( l > 0 )
			{
				op[ 0 ] = (T) ip[ 0 ];
				op[ 1 ] = (T) ip[ 1 ];
				op[ 2 ] = (T) ip[ 2 ];
				op[ 3 ] = (T) ip[ 3 ];
				ip += 4;
				op += opinc;
				l--;
			}
		}
		else
		{
			while( l > 0 )
			{
				op[ 0 ] = (T) roundclamp( ip[ 0 ], Clamp );
				op[ 1 ] = (T) roundclamp( ip[ 1 ], Clamp );
				op[ 2 ] = (T) roundclamp( ip[ 2 ], Clamp );
				op[ 3 ] = (T) roundclamp( ip[ 3 ], Clamp );
				ip += 4;
				op += opinc;
				l--;
			}
		}
	}

	class CResizeScanline;

	/**
	 * Class implements fractional delay filter bank calculation.
	 */

	class CResizeFilters
	{
		friend class CResizeScanline;

	public:
		int KernelLen; ///< Resampling filter kernel length, taps. Available
			///< after the update() function call.
			///<

		CResizeFilters()
			: FilterBuf( NULL )
			, Filters( NULL )
			, Prevla( -1.0 )
			, Prevk( -1.0 )
			, FilterBufLen( 0 )
			, FiltersLen( 0 )
		{
		}

		~CResizeFilters()
		{
			free( FilterBuf );
			delete[] Filters;
		}

		/**
		 * Function updates the resizing filter bank.
		 *
		 * @param la Lanczos "a" parameter value.
		 * @param k Resizing step.
		 * @return "True" if update occured and resizing positions should be
		 * updated unconditionally.
		 */

		bool update( const double la, const double k )
		{
			if( la == Prevla && k == Prevk )
			{
				return( false );
			}

			Prevla = la;
			Prevk = k;

			NormFreq = ( k <= 1.0 ? 1.0 : 1.0 / k );
			Freq = LANCIR_PI * NormFreq;

			if( Freq > LANCIR_PI )
			{
				Freq = LANCIR_PI;
			}

			FreqA = LANCIR_PI * NormFreq / la;
			Len2 = la / NormFreq;
			fl2 = (int) ceil( Len2 );
			KernelLen = fl2 + fl2;

			FracCount = 607; // For 8-bit precision.
			FracFill = 0;

			const int FilterBufLenNew = FracCount * KernelLen;

			if( FilterBufLenNew > FilterBufLen )
			{
				free( FilterBuf );
				FilterBufLen = FilterBufLenNew;
				FilterBuf = (float *) memalign( 32, sizeof(float) * FilterBufLen );
				CHECK_NOTNULL(FilterBuf);
			}

			if( FracCount > FiltersLen )
			{
				delete[] Filters;
				FiltersLen = FracCount;
				Filters = new float*[ FiltersLen ];
			}

			memset( Filters, 0, FracCount * sizeof( float* ));

			return( true );
		}

		/**
		 * Function returns filter at the specified fractional offset. This
		 * function can only be called before the prior update() function
		 * call.
		 *
		 * @param x Fractional offset, [0; 1).
		 */

		float* getFilter( const double x )
		{
			const int Frac = (int) floor( x * FracCount );

			if( Filters[ Frac ] == NULL )
			{
				Filters[ Frac ] = FilterBuf + FracFill * KernelLen;
				FracFill++;
				makeFilter( 1.0 - (double) Frac / FracCount, Filters[ Frac ]);
				normalizeFilter( Filters[ Frac ]);
			}

			return( Filters[ Frac ]);
		}

	protected:
		double NormFreq; ///< Normalized frequency of the filter.
			///<
		double Freq; ///< Circular frequency of the filter.
			///<
		double FreqA; ///< Circular frequency of the window function.
			///<
		double Len2; ///< Half resampling filter length, unrounded.
			///<
		int fl2; ///< Half resampling length, integer.
			///<
		int FracCount; ///< The number of fractional positions for which
			///< filters are created.
			///<
		int FracFill; ///< The number of fractional positions filled in the
			///< filter buffer.
			///<
		float* FilterBuf; ///< Buffer that holds all filters.
			///<
		float** Filters; ///< Fractional delay filters for all positions.
			///< Filter pointers equal NULL if filter was not yet created.
			///<
		double Prevla; ///< Previous "la".
			///<
		double Prevk; ///< Previous "k".
			///<
		int FilterBufLen; ///< Allocated length of FilterBuf in elements.
			///<
		int FiltersLen; ///< Allocated length of Filters in elements.
			///<

		/**
		 * @brief Sine signal generator class.
		 *
		 * Class implements sine signal generator without biasing, with
		 * constructor-based initalization only. This generator uses
		 * oscillator instead of "sin" function.
		 */

		class CSinGen
		{
		public:
			/**
			 * Constructor initializes *this sine signal generator.
			 *
			 * @param si Sine function increment, in radians.
			 * @param ph Starting phase, in radians. Add 0.5 * LANCIR_PI for
			 * cosine function.
			 * @param g Gain value.
			 */

			CSinGen( const double si, const double ph, const double g = 1.0 )
				: svalue1( sin( ph ) * g )
				, svalue2( sin( ph - si ) * g )
				, sincr( 2.0 * cos( si ))
			{
			}

			/**
			 * @return The next value of the sine function, without biasing.
			 */

			double generate()
			{
				const double res = svalue1;

				svalue1 = sincr * res - svalue2;
				svalue2 = res;

				return( res );
			}

		private:
			double svalue1; ///< Current sine value.
				///<
			double svalue2; ///< Previous sine value.
				///<
			double sincr; ///< Sine value increment.
				///<
		};

		/**
		 * Function creates filter for the specified fractional delay. The
		 * update() function should be called prior to calling this function.
		 *
		 * @param FracDelay Fractional delay, 0 to 1, inclusive.
		 * @param[out] Output filter buffer.
		 * @tparam T Output buffer type.
		 */

		template< class T >
		void makeFilter( const double FracDelay, T* op ) const
		{
			CSinGen f( Freq, Freq * ( FracDelay - fl2 ));
			CSinGen fw( FreqA, FreqA * ( FracDelay - fl2 ), Len2 );

			int t = -fl2;

			if( t + FracDelay < -Len2 )
			{
				f.generate();
				fw.generate();
				*op = (T) 0.0;
				op++;
				t++;
			}

			int mt = ( FracDelay >= 1.0 - 1e-13 && FracDelay <= 1.0 + 1e-13 ?
				-1 : 0 );

			while( t < mt )
			{
				double ut = ( t + FracDelay ) * LANCIR_PI;
				*op = (T) ( f.generate() * fw.generate() / ( ut * ut ));
				op++;
				t++;
			}

			double ut = t + FracDelay;

			if( fabs( ut ) <= 1e-13 )
			{
				*op = (T) NormFreq;
				f.generate();
				fw.generate();
			}
			else
			{
				ut *= LANCIR_PI;
				*op = (T) ( f.generate() * fw.generate() / ( ut * ut ));
			}

			mt = fl2 - 2;

			while( t < mt )
			{
				op++;
				t++;
				ut = ( t + FracDelay ) * LANCIR_PI;
				*op = (T) ( f.generate() * fw.generate() / ( ut * ut ));
			}

			op++;
			t++;
			ut = t + FracDelay;

			if( ut > Len2 )
			{
				*op = (T) 0.0;
			}
			else
			{
				ut *= LANCIR_PI;
				*op = (T) ( f.generate() * fw.generate() / ( ut * ut ));
			}
		}

		/**
		 * Function normalizes the specified filter so that it has unity gain
		 * at DC.
		 *
		 * @param p Filter buffer pointer.
		 * @tparam T Filter buffer type.
		 */

		template< class T >
		void normalizeFilter( T* const p ) const
		{
			double s = 0.0;
			size_t i;

			for( i = 0; i < KernelLen; i++ )
			{
				s += p[ i ];
			}

			s = 1.0 / s;

			for( i = 0; i < KernelLen; i++ )
			{
				p[ i ] = (T) ( p[ i ] * s );
			}
		}
	};

	/**
	 * Structure defines source scanline positioning and filters for each
	 * destination pixel.
	 */

	struct CResizePos
	{
		const float* ip; ///< Source image pixel pointer.
			///<
		float* flt; ///< Fractional delay filter.
			///<
	};

	/**
	 * Class contains resizing positioning and a temporary scanline buffer,
	 * prepares source scanline positions for resize filtering.
	 */

	class CResizeScanline
	{
	public:
		int padl; ///< Left-padding (in pixels) required for source scanline.
			///< Available after the update() function call.
			///<
		int padr; ///< Right-padding (in pixels) required for source scanline.
			///< Available after the update() function call.
			///<
		float* sp; ///< Source scanline buffer, with "padl" and "padr"
			///< padding.
			///<
		CResizePos* pos; ///< Source scanline pointers (point to "sp")
			///< and filters for each destination pixel position. Available
			///< after the update() function call.
			///<

		CResizeScanline()
			: sp( NULL )
			, pos( NULL )
			, PrevSrcLen( -1 )
			, PrevDstLen( -1 )
			, Prevk( 0.0 )
			, Prevo( 0.0 )
			, PrevElCount( 0 )
			, splen( 0 )
			, poslen( 0 )
		{
		}

		~CResizeScanline()
		{
			free( sp );
			delete[] pos;
		}

		/**
		 * Function "resets" *this object so that the next update() call fully
		 * updates the position buffer. Reset is necessary if the filter
		 * object was updated.
		 */

		void reset()
		{
			PrevSrcLen = -1;
		}

		/**
		 * Function updates resizing positions, updates "padl", "padr" and
		 * "pos" buffer.
		 *
		 * @param k Resizing step.
		 * @param o0 Initial source image offset.
		 * @param SrcLen Source image scanline length, used to create a
		 * scanline buffer without length pre-calculation.
		 * @param DstLen Destination image scanline length.
		 * @param rf Resizing filters object.
		 */

		void update( const double k, const double o0, const int ElCount,
			const int SrcLen, const size_t DstLen, CResizeFilters& rf )
		{
			if( SrcLen == PrevSrcLen && DstLen == PrevDstLen &&
				k == Prevk && o0 == Prevo && ElCount == PrevElCount )
			{
				return;
			}

			PrevSrcLen = SrcLen;
			PrevDstLen = DstLen;
			Prevk = k;
			Prevo = o0;
			PrevElCount = ElCount;

			const int fl2m1 = rf.fl2 - 1;
			padl = fl2m1 - (int) floor( o0 );

			if( padl < 0 )
			{
				padl = 0;
			}

			padr = (int) floor( o0 + k * ( DstLen - 1 )) + rf.fl2 + 1 -
				SrcLen;

			if( padr < 0 )
			{
				padr = 0;
			}

			const int splennew = ( padl + SrcLen + padr ) * ElCount;

			if( splennew > splen )
			{
				free( sp );
				splen = splennew;
				sp = (float *) memalign( 32, sizeof(float) * splen );
				CHECK_NOTNULL(sp);
			}

			if( DstLen > poslen )
			{
				delete[] pos;
				poslen = DstLen;
				pos = new CResizePos[ poslen ];
			}

			const float* const spo = sp + ( padl - fl2m1 ) * ElCount;
			size_t i;

			for( i = 0; i < DstLen; i++ )
			{
				const double o = o0 + k * i;
				const int ix = (int) floor( o );
				pos[ i ].ip = spo + ix * ElCount;
				pos[ i ].flt = rf.getFilter( o - ix );
			}
		}

	protected:
		int PrevSrcLen; ///< Previous SrcLen.
			///<
		int PrevDstLen; ///< Previous DstLen.
			///<
		double Prevk; ///< Previous "k".
			///<
		double Prevo; ///< Previous "o".
			///<
		int PrevElCount; ///< Previous pixel element count.
			///<
		int splen; ///< Allocated "sp" buffer length.
			///<
		int poslen; ///< Allocated "pos" buffer length.
			///<
	};

	CResizeFilters rfh; ///< Resizing filters for horizontal resizing.
		///<
	CResizeFilters rfv0; ///< Resizing filters for vertical resizing (may not
		///< be in use).
		///<
	CResizeScanline rsh; ///< Horizontal resize scanline.
		///<
	CResizeScanline rsv; ///< Vertical resize scanline.
		///<

	/**
	 * Function copies scanline from the source buffer in its native format
	 * to internal scanline buffer, in preparation for horizontal resizing.
	 * Variants for 1-4-channel images.
	 *
	 * @param ip Source scanline buffer.
	 * @param rs Scanline resizing positions object.
	 * @param l Source scanline length, in pixels.
	 * @param ipinc "ip" increment per pixel.
	 */

	template< class T >
	static void copyScanline1h( const T* ip, CResizeScanline& rs, const int l )
	{
		float* op = rs.sp;
		int i;

		DCHECK_ALIGNED(32, op);

		for( i = 0; i < rs.padl; i++ )
		{
			op[ 0 ] = ip[ 0 ];
			op++;
		}

		for( i = 0; i < l - 1; i++ )
		{
			op[ 0 ] = ip[ 0 ];
			ip++;
			op++;
		}

		for( i = 0; i <= rs.padr; i++ )
		{
			op[ 0 ] = ip[ 0 ];
			op++;
		}
	}

	template< class T >
	static void copyScanline2h( const T* ip, CResizeScanline& rs, const int l )
	{
		float* op = rs.sp;
		int i;

		for( i = 0; i < rs.padl; i++ )
		{
			op[ 0 ] = ip[ 0 ];
			op[ 1 ] = ip[ 1 ];
			op += 2;
		}

		for( i = 0; i < l - 1; i++ )
		{
			op[ 0 ] = ip[ 0 ];
			op[ 1 ] = ip[ 1 ];
			ip += 2;
			op += 2;
		}

		for( i = 0; i <= rs.padr; i++ )
		{
			op[ 0 ] = ip[ 0 ];
			op[ 1 ] = ip[ 1 ];
			op += 2;
		}
	}

	template< class T >
	static void copyScanline3h( const T* ip, CResizeScanline& rs, const int l )
	{
		float* op = rs.sp;
		int i;

		for( i = 0; i < rs.padl; i++ )
		{
			op[ 0 ] = ip[ 0 ];
			op[ 1 ] = ip[ 1 ];
			op[ 2 ] = ip[ 2 ];
			op += 3;
		}

		for( i = 0; i < l - 1; i++ )
		{
			op[ 0 ] = ip[ 0 ];
			op[ 1 ] = ip[ 1 ];
			op[ 2 ] = ip[ 2 ];
			ip += 3;
			op += 3;
		}

		for( i = 0; i <= rs.padr; i++ )
		{
			op[ 0 ] = ip[ 0 ];
			op[ 1 ] = ip[ 1 ];
			op[ 2 ] = ip[ 2 ];
			op += 3;
		}
	}

	template< class T >
	static void copyScanline4h( const T* ip, CResizeScanline& rs, const size_t l )
	{
		float* op = rs.sp;
		size_t i;

		for( i = 0; i < rs.padl; i++ )
		{
			op[ 0 ] = ip[ 0 ];
			op[ 1 ] = ip[ 1 ];
			op[ 2 ] = ip[ 2 ];
			op[ 3 ] = ip[ 3 ];
			op += 4;
		}

		for( i = 0; i < l - 1; i++ )
		{
			op[ 0 ] = ip[ 0 ];
			op[ 1 ] = ip[ 1 ];
			op[ 2 ] = ip[ 2 ];
			op[ 3 ] = ip[ 3 ];
			ip += 4;
			op += 4;
		}

		for( i = 0; i <= rs.padr; i++ )
		{
			op[ 0 ] = ip[ 0 ];
			op[ 1 ] = ip[ 1 ];
			op[ 2 ] = ip[ 2 ];
			op[ 3 ] = ip[ 3 ];
			op += 4;
		}
	}

	/**
	 * Function copies scanline from the source buffer in its native format
	 * to internal scanline buffer, in preparation for vertical resizing.
	 * Variants for 1-4-channel images.
	 *
	 * @param ip Source scanline buffer.
	 * @param rs Scanline resizing positions object.
	 * @param l Source scanline length, in pixels.
	 * @param ipinc "ip" increment per pixel.
	 */

	template< class T >
	static void copyScanline1v( const T* ip, CResizeScanline& rs, const int l,
		const int ipinc )
	{
		float* op = rs.sp;
		int i;

		DCHECK_ALIGNED(32, op);

		for( i = 0; i < rs.padl; i++ )
		{
			op[ 0 ] = ip[ 0 ];
			op++;
		}

		for( i = 0; i < l - 1; i++ )
		{
			op[ 0 ] = ip[ 0 ];
			ip += ipinc;
			op++;
		}

		for( i = 0; i <= rs.padr; i++ )
		{
			op[ 0 ] = ip[ 0 ];
			op++;
		}
	}

	template< class T >
	static void copyScanline2v( const T* ip, CResizeScanline& rs, const int l,
		const int ipinc )
	{
		float* op = rs.sp;
		int i;

		for( i = 0; i < rs.padl; i++ )
		{
			op[ 0 ] = ip[ 0 ];
			op[ 1 ] = ip[ 1 ];
			op += 2;
		}

		for( i = 0; i < l - 1; i++ )
		{
			op[ 0 ] = ip[ 0 ];
			op[ 1 ] = ip[ 1 ];
			ip += ipinc;
			op += 2;
		}

		for( i = 0; i <= rs.padr; i++ )
		{
			op[ 0 ] = ip[ 0 ];
			op[ 1 ] = ip[ 1 ];
			op += 2;
		}
	}

	template< class T >
	static void copyScanline3v( const T* ip, CResizeScanline& rs, const int l,
		const int ipinc )
	{
		float* op = rs.sp;
		int i;

		for( i = 0; i < rs.padl; i++ )
		{
			op[ 0 ] = ip[ 0 ];
			op[ 1 ] = ip[ 1 ];
			op[ 2 ] = ip[ 2 ];
			op += 3;
		}

		for( i = 0; i < l - 1; i++ )
		{
			op[ 0 ] = ip[ 0 ];
			op[ 1 ] = ip[ 1 ];
			op[ 2 ] = ip[ 2 ];
			ip += ipinc;
			op += 3;
		}

		for( i = 0; i <= rs.padr; i++ )
		{
			op[ 0 ] = ip[ 0 ];
			op[ 1 ] = ip[ 1 ];
			op[ 2 ] = ip[ 2 ];
			op += 3;
		}
	}

	template< class T >
	static void copyScanline4v( const T* ip, CResizeScanline& rs, const size_t l,
		const int ipinc )
	{
		float* op = rs.sp;
		size_t i;

		for( i = 0; i < rs.padl; i++ )
		{
			op[ 0 ] = ip[ 0 ];
			op[ 1 ] = ip[ 1 ];
			op[ 2 ] = ip[ 2 ];
			op[ 3 ] = ip[ 3 ];
			op += 4;
		}

		for( i = 0; i < l - 1; i++ )
		{
			op[ 0 ] = ip[ 0 ];
			op[ 1 ] = ip[ 1 ];
			op[ 2 ] = ip[ 2 ];
			op[ 3 ] = ip[ 3 ];
			ip += ipinc;
			op += 4;
		}

		for( i = 0; i <= rs.padr; i++ )
		{
			op[ 0 ] = ip[ 0 ];
			op[ 1 ] = ip[ 1 ];
			op[ 2 ] = ip[ 2 ];
			op[ 3 ] = ip[ 3 ];
			op += 4;
		}
	}

	#define LANCIR_LF_PRE \
			CResizePos* const rpe = rp + DstLen; \
			while( rp < rpe ) \
			{ \
				const float* ip = rp -> ip; \
				const float* const flt = rp -> flt;

	#define LANCIR_LF_POST \
				rp++; \
			}

	static void resize1_kl6( float* op, int DstLen, CResizePos* rp )
	{
		LANCIR_LF_PRE
		op[ 0 ] =
			flt[ 0 ] * ip[ 0 ] +
			flt[ 1 ] * ip[ 1 ] +
			flt[ 2 ] * ip[ 2 ] +
			flt[ 3 ] * ip[ 3 ] +
			flt[ 4 ] * ip[ 4 ] +
			flt[ 5 ] * ip[ 5 ];
		op++;
		LANCIR_LF_POST
	}

	static void resize1_kln( float* op, int DstLen, CResizePos* rp, const int kl )
	{
		LANCIR_LF_PRE
		float sum = 0.0;
		int i;
		for( i = 0; i < kl; i++ )
		{
			sum += flt[ i ] * ip[ i ];
		}
		op[ 0 ] = sum;
		op++;
		LANCIR_LF_POST
	}

	/**
	 * Function performs internal scanline resizing. Variants for 1-4-channel
	 * images.
	 *
	 * @param op Destination buffer.
	 * @param DstLen Destination length, in pixels.
	 * @param rp Resizing positions and filters.
	 * @param kl Filter kernel length, in taps.
	 */
	static void resize1( float* op, int DstLen, CResizePos* rp, const int kl )
	{
		if( kl == 6 )
		{
			resize1_kl6( op, DstLen, rp );
		}
		else
		{
			resize1_kln( op, DstLen, rp, kl );
		}
	}

	static void resize2( float* op, int DstLen, CResizePos* rp, const int kl )
	{
		if( kl == 6 )
		{
			LANCIR_LF_PRE
			op[ 0 ] =
				flt[ 0 ] * ip[ 0 ] +
				flt[ 1 ] * ip[ 2 ] +
				flt[ 2 ] * ip[ 4 ] +
				flt[ 3 ] * ip[ 6 ] +
				flt[ 4 ] * ip[ 8 ] +
				flt[ 5 ] * ip[ 10 ];

			op[ 1 ] =
				flt[ 0 ] * ip[ 1 ] +
				flt[ 1 ] * ip[ 3 ] +
				flt[ 2 ] * ip[ 5 ] +
				flt[ 3 ] * ip[ 7 ] +
				flt[ 4 ] * ip[ 9 ] +
				flt[ 5 ] * ip[ 11 ];

			op += 2;
			LANCIR_LF_POST
		}
		else
		{
			LANCIR_LF_PRE
			float sum[ 2 ];
			sum[ 0 ] = 0.0;
			sum[ 1 ] = 0.0;
			int i;

			for( i = 0; i < kl; i++ )
			{
				const float xx = flt[ i ];
				sum[ 0 ] += xx * ip[ 0 ];
				sum[ 1 ] += xx * ip[ 1 ];
				ip += 2;
			}

			op[ 0 ] = sum[ 0 ];
			op[ 1 ] = sum[ 1 ];
			op += 2;
			LANCIR_LF_POST
		}
	}

	static void resize3( float* op, int DstLen, CResizePos* rp, const int kl )
	{
		if( kl == 6 )
		{
			LANCIR_LF_PRE
			op[ 0 ] =
				flt[ 0 ] * ip[ 0 ] +
				flt[ 1 ] * ip[ 3 ] +
				flt[ 2 ] * ip[ 6 ] +
				flt[ 3 ] * ip[ 9 ] +
				flt[ 4 ] * ip[ 12 ] +
				flt[ 5 ] * ip[ 15 ];

			op[ 1 ] =
				flt[ 0 ] * ip[ 1 ] +
				flt[ 1 ] * ip[ 4 ] +
				flt[ 2 ] * ip[ 7 ] +
				flt[ 3 ] * ip[ 10 ] +
				flt[ 4 ] * ip[ 13 ] +
				flt[ 5 ] * ip[ 16 ];

			op[ 2 ] =
				flt[ 0 ] * ip[ 2 ] +
				flt[ 1 ] * ip[ 5 ] +
				flt[ 2 ] * ip[ 8 ] +
				flt[ 3 ] * ip[ 11 ] +
				flt[ 4 ] * ip[ 14 ] +
				flt[ 5 ] * ip[ 17 ];

			op += 3;
			LANCIR_LF_POST
		}
		else
		{
			LANCIR_LF_PRE
			float sum[ 3 ];
			sum[ 0 ] = 0.0;
			sum[ 1 ] = 0.0;
			sum[ 2 ] = 0.0;
			int i;

			for( i = 0; i < kl; i++ )
			{
				const float xx = flt[ i ];
				sum[ 0 ] += xx * ip[ 0 ];
				sum[ 1 ] += xx * ip[ 1 ];
				sum[ 2 ] += xx * ip[ 2 ];
				ip += 3;
			}

			op[ 0 ] = sum[ 0 ];
			op[ 1 ] = sum[ 1 ];
			op[ 2 ] = sum[ 2 ];
			op += 3;
			LANCIR_LF_POST
		}
	}

	static optimizespeed void resize4( float* op, int DstLen, CResizePos* rp, const size_t kl )
	{
		LANCIR_LF_PRE
		float sum[ 4 ];
		sum[ 0 ] = 0.0;
		sum[ 1 ] = 0.0;
		sum[ 2 ] = 0.0;
		sum[ 3 ] = 0.0;
		size_t i;

		for( i = 0; i < kl; i++ )
		{
			const float xx = flt[ i ];
			sum[ 0 ] += xx * ip[ 0 ];
			sum[ 1 ] += xx * ip[ 1 ];
			sum[ 2 ] += xx * ip[ 2 ];
			sum[ 3 ] += xx * ip[ 3 ];
			ip += 4;
		}

		op[ 0 ] = sum[ 0 ];
		op[ 1 ] = sum[ 1 ];
		op[ 2 ] = sum[ 2 ];
		op[ 3 ] = sum[ 3 ];
		op += 4;
		LANCIR_LF_POST
	}

	#undef LANCIR_LF_PRE
	#undef LANCIR_LF_POST

	static relegated nooptimize noinline void handleEmptySrcCornerCase(
		void * const NewBuf, const size_t Size )
	{
		memset( NewBuf, 0, Size );
	}
};

#undef LANCIR_PI

} // namespace avir

#endif // AVIR_CLANCIR_INCLUDED