17066 lines
484 KiB
C++
17066 lines
484 KiB
C++
/* clang-format off */
|
|
//$ nobt
|
|
//$ nocpp
|
|
|
|
/**
|
|
* @file avir.h
|
|
*
|
|
* @brief The "main" inclusion file with all required classes and functions.
|
|
*
|
|
* This is the "main" inclusion file for the "AVIR" image resizer. This
|
|
* inclusion file contains implementation of the AVIR image resizing algorithm
|
|
* in its entirety. Also includes several classes and functions that can be
|
|
* useful elsewhere.
|
|
*
|
|
* AVIR Copyright (c) 2015-2019 Aleksey Vaneev
|
|
*
|
|
* @mainpage
|
|
*
|
|
* @section intro_sec Introduction
|
|
*
|
|
* Description is available at https://github.com/avaneev/avir
|
|
*
|
|
* AVIR is devoted to women. Your digital photos can look good at any size!
|
|
*
|
|
* @section license License
|
|
*
|
|
* AVIR License Agreement
|
|
*
|
|
* The MIT License (MIT)
|
|
*
|
|
* Copyright (c) 2015-2019 Aleksey Vaneev
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
* DEALINGS IN THE SOFTWARE.
|
|
*
|
|
* Please credit the author of this library in your documentation in the
|
|
* following way: "AVIR image resizing algorithm designed by Aleksey Vaneev"
|
|
*
|
|
* @version 2.4
|
|
*/
|
|
|
|
#ifndef AVIR_CIMAGERESIZER_INCLUDED
|
|
#define AVIR_CIMAGERESIZER_INCLUDED
|
|
|
|
#include "third_party/avir/notice.h"
|
|
#include "libc/bits/xmmintrin.internal.h"
|
|
#include "libc/str/str.h"
|
|
#include "libc/mem/mem.h"
|
|
#include "libc/bits/bits.h"
|
|
#include "libc/math.h"
|
|
|
|
namespace avir {
|
|
|
|
/**
|
|
* The macro defines AVIR version string.
|
|
*/
|
|
|
|
#define AVIR_VERSION "2.4"
|
|
|
|
/**
|
|
* The macro equals to "pi" constant, fills 53-bit floating point mantissa.
|
|
* Undefined at the end of file.
|
|
*/
|
|
|
|
#define AVIR_PI 3.1415926535897932
|
|
|
|
/**
|
|
* The macro equals to "pi divided by 2" constant, fills 53-bit floating
|
|
* point mantissa. Undefined at the end of file.
|
|
*/
|
|
|
|
#define AVIR_PId2 1.5707963267948966
|
|
|
|
/**
|
|
* Rounding function, based on the (int) typecast. Biased result. Not suitable
|
|
* for numbers >= 2^31.
|
|
*
|
|
* @param d Value to round.
|
|
* @return Rounded value. Some bias may be introduced.
|
|
*/
|
|
|
|
template <class T>
|
|
inline T round(const T d) {
|
|
return (d < 0.0 ? -(T)(int)((T)0.5 - d) : (T)(int)(d + (T)0.5));
|
|
}
|
|
|
|
/**
|
|
* Template function "clamps" (clips) the specified value so that it is not
|
|
* lesser than "minv", and not greater than "maxv".
|
|
*
|
|
* @param Value Value to clamp.
|
|
* @param minv Minimal allowed value.
|
|
* @param maxv Maximal allowed value.
|
|
* @return The clamped value.
|
|
*/
|
|
|
|
template <class T>
|
|
inline T clamp(const T& Value, const T minv, const T maxv) {
|
|
if (Value < minv) {
|
|
return (minv);
|
|
} else if (Value > maxv) {
|
|
return (maxv);
|
|
} else {
|
|
return (Value);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Power 2.4 approximation function, designed for sRGB gamma correction.
|
|
*
|
|
* @param x Argument, in the range 0.09 to 1.
|
|
* @return Value raised into power 2.4, approximate.
|
|
*/
|
|
|
|
template <class T>
|
|
inline T pow24_sRGB(const T x) {
|
|
const double x2 = x * x;
|
|
const double x3 = x2 * x;
|
|
const double x4 = x2 * x2;
|
|
|
|
return ((T)(0.0985766365536824 + 0.839474952656502 * x2 +
|
|
0.363287814061725 * x3 -
|
|
0.0125559718896615 / (0.12758338921578 + 0.290283465468235 * x) -
|
|
0.231757513261358 * x - 0.0395365717969074 * x4));
|
|
}
|
|
|
|
/**
|
|
* Power 1/2.4 approximation function, designed for sRGB gamma correction.
|
|
*
|
|
* @param x Argument, in the range 0.003 to 1.
|
|
* @return Value raised into power 1/2.4, approximate.
|
|
*/
|
|
|
|
template <class T>
|
|
inline T pow24i_sRGB(const T x) {
|
|
const double sx = sqrt(x);
|
|
const double ssx = sqrt(sx);
|
|
const double sssx = sqrt(ssx);
|
|
|
|
return ((T)(0.000213364515060263 + 0.0149409239419218 * x +
|
|
0.433973412731747 * sx +
|
|
ssx * (0.659628181609715 * sssx - 0.0380957908841466 -
|
|
0.0706476137208521 * sx)));
|
|
}
|
|
|
|
/**
|
|
* Function approximately linearizes the sRGB gamma value.
|
|
*
|
|
* @param s sRGB gamma value, in the range 0 to 1.
|
|
* @return Linearized sRGB gamma value, approximated.
|
|
*/
|
|
|
|
template <class T>
|
|
inline T convertSRGB2Lin(const T s) {
|
|
const T a = (T)0.055;
|
|
|
|
if (s <= (T)0.04045) {
|
|
return (s / (T)12.92);
|
|
}
|
|
|
|
return (pow24_sRGB((s + a) / ((T)1 + a)));
|
|
}
|
|
|
|
/**
|
|
* Function approximately de-linearizes the linear gamma value.
|
|
*
|
|
* @param s Linear gamma value, in the range 0 to 1.
|
|
* @return sRGB gamma value, approximated.
|
|
*/
|
|
|
|
template <class T>
|
|
inline T convertLin2SRGB(const T s) {
|
|
const T a = (T)0.055;
|
|
|
|
if (s <= (T)0.0031308) {
|
|
return ((T)12.92 * s);
|
|
}
|
|
|
|
return (((T)1 + a) * pow24i_sRGB(s) - a);
|
|
}
|
|
|
|
/**
|
|
* Function converts (via typecast) specified array of type T1 values of
|
|
* length l into array of type T2 values. If T1 is the same as T2, copy
|
|
* operation is performed. When copying data at overlapping address spaces,
|
|
* "op" should be lower than "ip".
|
|
*
|
|
* @param ip Input buffer.
|
|
* @param[out] op Output buffer.
|
|
* @param l The number of elements to copy.
|
|
* @param ip Input buffer pointer increment.
|
|
* @param op Output buffer pointer increment.
|
|
*/
|
|
|
|
template <class T1, class T2>
|
|
inline void copyArray(const T1* ip, T2* op, int l, const int ipinc = 1,
|
|
const int opinc = 1) {
|
|
while (l > 0) {
|
|
*op = (T2)*ip;
|
|
op += opinc;
|
|
ip += ipinc;
|
|
l--;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function adds values located in array "ip" to array "op".
|
|
*
|
|
* @param ip Input buffer.
|
|
* @param[out] op Output buffer.
|
|
* @param l The number of elements to add.
|
|
* @param ip Input buffer pointer increment.
|
|
* @param op Output buffer pointer increment.
|
|
*/
|
|
|
|
template <class T1, class T2>
|
|
inline void addArray(const T1* ip, T2* op, int l, const int ipinc = 1,
|
|
const int opinc = 1) {
|
|
while (l > 0) {
|
|
*op += *ip;
|
|
op += opinc;
|
|
ip += ipinc;
|
|
l--;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function that replicates a set of adjacent elements several times in a row.
|
|
* This operation is usually used to replicate pixels at the start or end of
|
|
* image's scanline.
|
|
*
|
|
* @param ip Source array.
|
|
* @param ipl Source array length (usually 1..4, but can be any number).
|
|
* @param[out] op Destination buffer.
|
|
* @param l Number of times the source array should be replicated (the
|
|
* destination buffer should be able to hold ipl * l number of elements).
|
|
* @param opinc Destination buffer position increment after replicating the
|
|
* source array. This value should be equal to at least ipl.
|
|
*/
|
|
|
|
template <class T1, class T2>
|
|
inline void replicateArray(const T1* const ip, const int ipl, T2* op, int l,
|
|
const int opinc) {
|
|
if (ipl == 1) {
|
|
while (l > 0) {
|
|
op[0] = ip[0];
|
|
op += opinc;
|
|
l--;
|
|
}
|
|
} else if (ipl == 4) {
|
|
while (l > 0) {
|
|
op[0] = ip[0];
|
|
op[1] = ip[1];
|
|
op[2] = ip[2];
|
|
op[3] = ip[3];
|
|
op += opinc;
|
|
l--;
|
|
}
|
|
} else if (ipl == 3) {
|
|
while (l > 0) {
|
|
op[0] = ip[0];
|
|
op[1] = ip[1];
|
|
op[2] = ip[2];
|
|
op += opinc;
|
|
l--;
|
|
}
|
|
} else if (ipl == 2) {
|
|
while (l > 0) {
|
|
op[0] = ip[0];
|
|
op[1] = ip[1];
|
|
op += opinc;
|
|
l--;
|
|
}
|
|
} else {
|
|
while (l > 0) {
|
|
int i;
|
|
|
|
for (i = 0; i < ipl; i++) {
|
|
op[i] = ip[i];
|
|
}
|
|
|
|
op += opinc;
|
|
l--;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function calculates frequency response of the specified FIR filter at the
|
|
* specified circular frequency. Phase can be calculated as atan2( im, re ).
|
|
* Function uses computationally-efficient oscillators instead of "cos" and
|
|
* "sin" functions.
|
|
*
|
|
* @param flt FIR filter's coefficients.
|
|
* @param fltlen Number of coefficients (taps) in the filter.
|
|
* @param th Circular frequency [0; pi].
|
|
* @param[out] re0 Resulting real part of the complex frequency response.
|
|
* @param[out] im0 Resulting imaginary part of the complex frequency response.
|
|
* @param fltlat Filter's latency in samples (taps).
|
|
*/
|
|
|
|
template <class T>
|
|
inline void calcFIRFilterResponse(const T* flt, int fltlen, const double th,
|
|
double& re0, double& im0,
|
|
const int fltlat = 0) {
|
|
const double sincr = 2.0 * cos(th);
|
|
double cvalue1;
|
|
double svalue1;
|
|
|
|
if (fltlat == 0) {
|
|
cvalue1 = 1.0;
|
|
svalue1 = 0.0;
|
|
} else {
|
|
cvalue1 = cos(-fltlat * th);
|
|
svalue1 = sin(-fltlat * th);
|
|
}
|
|
|
|
double cvalue2 = cos(-(fltlat + 1) * th);
|
|
double svalue2 = sin(-(fltlat + 1) * th);
|
|
|
|
double re = 0.0;
|
|
double im = 0.0;
|
|
|
|
while (fltlen > 0) {
|
|
re += cvalue1 * flt[0];
|
|
im += svalue1 * flt[0];
|
|
flt++;
|
|
fltlen--;
|
|
|
|
double tmp = cvalue1;
|
|
cvalue1 = sincr * cvalue1 - cvalue2;
|
|
cvalue2 = tmp;
|
|
|
|
tmp = svalue1;
|
|
svalue1 = sincr * svalue1 - svalue2;
|
|
svalue2 = tmp;
|
|
}
|
|
|
|
re0 = re;
|
|
im0 = im;
|
|
}
|
|
|
|
/**
|
|
* Function normalizes FIR filter so that its frequency response at DC is
|
|
* equal to DCGain.
|
|
*
|
|
* @param[in,out] p Filter coefficients.
|
|
* @param l Filter length.
|
|
* @param DCGain Filter's gain at DC.
|
|
* @param pstep "p" array step.
|
|
*/
|
|
|
|
template <class T>
|
|
inline void normalizeFIRFilter(T* const p, const int l, const double DCGain,
|
|
const int pstep = 1) {
|
|
double s = 0.0;
|
|
T* pp = p;
|
|
int i = l;
|
|
|
|
while (i > 0) {
|
|
s += *pp;
|
|
pp += pstep;
|
|
i--;
|
|
}
|
|
|
|
s = DCGain / s;
|
|
pp = p;
|
|
i = l;
|
|
|
|
while (i > 0) {
|
|
*pp = (T)(*pp * s);
|
|
pp += pstep;
|
|
i--;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @brief Memory buffer class for element array storage, with capacity
|
|
* tracking.
|
|
*
|
|
* Allows easier handling of memory blocks allocation and automatic
|
|
* deallocation for arrays (buffers) consisting of elements of specified
|
|
* class. Tracks buffer's capacity in "int" variable; unsuitable for
|
|
* allocation of very large memory blocks (with more than 2 billion elements).
|
|
*
|
|
* This class manages memory space only - it does not perform element class
|
|
* construction (initialization) operations. Buffer's required memory address
|
|
* alignment specification is supported.
|
|
*
|
|
* Uses standard library to allocate and deallocate memory.
|
|
*
|
|
* @tparam T Buffer element's type.
|
|
* @tparam capint Buffer capacity's type to use. Use size_t for large buffers.
|
|
*/
|
|
|
|
template <class T, typename capint = int>
|
|
class CBuffer {
|
|
public:
|
|
CBuffer() : Data(NULL), DataAligned(NULL), Capacity(0), Alignment(0) {}
|
|
|
|
/**
|
|
* Constructor creates the buffer with the specified capacity.
|
|
*
|
|
* @param aCapacity Buffer's capacity.
|
|
* @param aAlignment Buffer's required memory address alignment. 0 - use
|
|
* stdlib's default alignment.
|
|
*/
|
|
|
|
CBuffer(const capint aCapacity, const int aAlignment = 0) {
|
|
allocinit(aCapacity, aAlignment);
|
|
}
|
|
|
|
CBuffer(const CBuffer& Source) {
|
|
allocinit(Source.Capacity, Source.Alignment);
|
|
memcpy(DataAligned, Source.DataAligned, Capacity * sizeof(T));
|
|
}
|
|
|
|
~CBuffer() { freeData(); }
|
|
|
|
CBuffer& operator=(const CBuffer& Source) {
|
|
alloc(Source.Capacity, Source.Alignment);
|
|
memcpy(DataAligned, Source.DataAligned, Capacity * sizeof(T));
|
|
return (*this);
|
|
}
|
|
|
|
/**
|
|
* Function allocates memory so that the specified number of elements
|
|
* can be stored in *this buffer object.
|
|
*
|
|
* @param aCapacity Storage for this number of elements to allocate.
|
|
* @param aAlignment Buffer's required memory address alignment,
|
|
* power-of-2 values only. 0 - use stdlib's default alignment.
|
|
*/
|
|
|
|
void alloc(const capint aCapacity, const int aAlignment = 0) {
|
|
freeData();
|
|
allocinit(aCapacity, aAlignment);
|
|
}
|
|
|
|
/**
|
|
* Function deallocates any previously allocated buffer.
|
|
*/
|
|
|
|
void free() {
|
|
freeData();
|
|
Data = NULL;
|
|
DataAligned = NULL;
|
|
Capacity = 0;
|
|
Alignment = 0;
|
|
}
|
|
|
|
/**
|
|
* @return The capacity of the element buffer.
|
|
*/
|
|
|
|
capint getCapacity() const { return (Capacity); }
|
|
|
|
/**
|
|
* Function "forces" *this buffer to have an arbitary capacity. Calling
|
|
* this function invalidates all further operations except deleting *this
|
|
* object. This function should not be usually used at all. Function can
|
|
* be used to "model" certain buffer capacity without calling a costly
|
|
* memory allocation function.
|
|
*
|
|
* @param NewCapacity A new "forced" capacity.
|
|
*/
|
|
|
|
void forceCapacity(const capint NewCapacity) { Capacity = NewCapacity; }
|
|
|
|
/**
|
|
* Function reallocates *this buffer to a larger size so that it will be
|
|
* able to hold the specified number of elements. Downsizing is not
|
|
* performed. Alignment is not changed.
|
|
*
|
|
* @param NewCapacity New (increased) capacity.
|
|
* @param DoDataCopy "True" if data in the buffer should be retained.
|
|
*/
|
|
|
|
void increaseCapacity(const capint NewCapacity,
|
|
const bool DoDataCopy = true) {
|
|
if (NewCapacity < Capacity) {
|
|
return;
|
|
}
|
|
|
|
if (DoDataCopy) {
|
|
const capint PrevCapacity = Capacity;
|
|
T* const PrevData = Data;
|
|
T* const PrevDataAligned = DataAligned;
|
|
|
|
allocinit(NewCapacity, Alignment);
|
|
memcpy(DataAligned, PrevDataAligned, PrevCapacity * sizeof(T));
|
|
|
|
::free(PrevData);
|
|
} else {
|
|
::free(Data);
|
|
allocinit(NewCapacity, Alignment);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function "truncates" (reduces) capacity of the buffer without
|
|
* reallocating it. Alignment is not changed.
|
|
*
|
|
* @param NewCapacity New required capacity.
|
|
*/
|
|
|
|
void truncateCapacity(const capint NewCapacity) {
|
|
if (NewCapacity >= Capacity) {
|
|
return;
|
|
}
|
|
|
|
Capacity = NewCapacity;
|
|
}
|
|
|
|
/**
|
|
* Function increases capacity so that the specified number of
|
|
* elements can be stored. This function increases the previous capacity
|
|
* value by third the current capacity value until space for the required
|
|
* number of elements is available. Alignment is not changed.
|
|
*
|
|
* @param ReqCapacity Required capacity.
|
|
*/
|
|
|
|
void updateCapacity(const capint ReqCapacity) {
|
|
if (ReqCapacity <= Capacity) {
|
|
return;
|
|
}
|
|
|
|
capint NewCapacity = Capacity;
|
|
|
|
while (NewCapacity < ReqCapacity) {
|
|
NewCapacity += NewCapacity / 3 + 1;
|
|
}
|
|
|
|
increaseCapacity(NewCapacity);
|
|
}
|
|
|
|
operator T*() const { return (DataAligned); }
|
|
|
|
private:
|
|
T* Data; ///< Element buffer pointer.
|
|
///<
|
|
T* DataAligned; ///< Memory address-aligned element buffer pointer.
|
|
///<
|
|
capint Capacity; ///< Element buffer capacity.
|
|
///<
|
|
int Alignment; ///< Memory address alignment in use. 0 - use stdlib's
|
|
///< default alignment.
|
|
///<
|
|
|
|
/**
|
|
* Internal element buffer allocation function used during object
|
|
* construction.
|
|
*
|
|
* @param aCapacity Storage for this number of elements to allocate.
|
|
* @param aAlignment Buffer's required memory address alignment. 0 - use
|
|
* stdlib's default alignment.
|
|
*/
|
|
|
|
void allocinit(const capint aCapacity, const int aAlignment) {
|
|
if (aAlignment == 0) {
|
|
Data = (T*)::malloc(aCapacity * sizeof(T));
|
|
DataAligned = Data;
|
|
Alignment = 0;
|
|
} else {
|
|
Data = (T*)::malloc(aCapacity * sizeof(T) + aAlignment);
|
|
DataAligned = alignptr(Data, aAlignment);
|
|
Alignment = aAlignment;
|
|
}
|
|
|
|
Capacity = aCapacity;
|
|
}
|
|
|
|
/**
|
|
* Function frees a previously allocated Data buffer.
|
|
*/
|
|
|
|
void freeData() { ::free(Data); }
|
|
|
|
/**
|
|
* Function modifies the specified pointer so that it becomes memory
|
|
* address-aligned.
|
|
*
|
|
* @param ptr Pointer to align.
|
|
* @param align Alignment in bytes to apply.
|
|
* @return Pointer aligned to align bytes. Works with power-of-2
|
|
* alignments only. If no alignment is necessary, "align" bytes will be
|
|
* added to the pointer value.
|
|
*/
|
|
|
|
template <class Tp>
|
|
inline Tp alignptr(const Tp ptr, const uintptr_t align) {
|
|
return ((Tp)((uintptr_t)ptr + align - ((uintptr_t)ptr & (align - 1))));
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Function optimizes the length of the symmetric-odd FIR filter by removing
|
|
* left- and rightmost elements that are below specific threshold.
|
|
*
|
|
* Synthetic test shows that filter gets optimized in 2..3% of cases and in
|
|
* each such case optimization reduces filter length by 6..8%. Optimization,
|
|
* however, may skew the results of algorithm modeling and complexity
|
|
* calculation leading to a choice of a less optimal algorithm.
|
|
*
|
|
* @param[in,out] Flt Buffer that contains filter being optimized.
|
|
* @param[in,out] FltLatency Variable that holds the current latency of the
|
|
* filter. May be adjusted on function return.
|
|
* @param Threshold Threshold level.
|
|
*/
|
|
|
|
template <class T>
|
|
inline void optimizeFIRFilter(CBuffer<T>& Flt, int& FltLatency,
|
|
T const Threshold = (T)0.00001) {
|
|
int i;
|
|
|
|
// Optimize length.
|
|
|
|
for (i = 0; i <= FltLatency; i++) {
|
|
if (fabs(Flt[i]) >= Threshold || i == FltLatency) {
|
|
if (i > 0) {
|
|
const int NewCapacity = Flt.getCapacity() - i * 2;
|
|
copyArray(&Flt[i], &Flt[0], NewCapacity);
|
|
Flt.truncateCapacity(NewCapacity);
|
|
FltLatency -= i;
|
|
}
|
|
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @brief Array of structured objects.
|
|
*
|
|
* Implements allocation of a linear array of objects of class T (which are
|
|
* initialized), addressable via operator[]. Each object is created via the
|
|
* "operator new". New object insertions are quick since implementation uses
|
|
* prior space allocation (capacity), thus not requiring frequent memory block
|
|
* reallocations.
|
|
*
|
|
* @tparam T Array element's type.
|
|
*/
|
|
|
|
template <class T>
|
|
class CStructArray {
|
|
public:
|
|
CStructArray() : ItemCount(0) {}
|
|
|
|
CStructArray(const CStructArray& Source)
|
|
: ItemCount(0), Items(Source.getItemCount()) {
|
|
while (ItemCount < Source.getItemCount()) {
|
|
Items[ItemCount] = new T(Source[ItemCount]);
|
|
ItemCount++;
|
|
}
|
|
}
|
|
|
|
~CStructArray() { clear(); }
|
|
|
|
CStructArray& operator=(const CStructArray& Source) {
|
|
clear();
|
|
|
|
const int NewCount = Source.ItemCount;
|
|
Items.updateCapacity(NewCount);
|
|
|
|
while (ItemCount < NewCount) {
|
|
Items[ItemCount] = new T(Source[ItemCount]);
|
|
ItemCount++;
|
|
}
|
|
|
|
return (*this);
|
|
}
|
|
|
|
T& operator[](const int Index) { return (*Items[Index]); }
|
|
|
|
const T& operator[](const int Index) const { return (*Items[Index]); }
|
|
|
|
/**
|
|
* Function creates a new object of type T with the default constructor
|
|
* and adds this object to the array.
|
|
*
|
|
* @return Reference to a newly added object.
|
|
*/
|
|
|
|
T& add() {
|
|
if (ItemCount == Items.getCapacity()) {
|
|
Items.increaseCapacity(ItemCount * 3 / 2 + 1);
|
|
}
|
|
|
|
Items[ItemCount] = new T();
|
|
ItemCount++;
|
|
|
|
return ((*this)[ItemCount - 1]);
|
|
}
|
|
|
|
/**
|
|
* Function changes number of allocated items. New items are created with
|
|
* the default constructor. If NewCount is below the current item count,
|
|
* items that are above NewCount range will be destructed.
|
|
*
|
|
* @param NewCount New requested item count.
|
|
*/
|
|
|
|
void setItemCount(const int NewCount) {
|
|
if (NewCount > ItemCount) {
|
|
Items.increaseCapacity(NewCount);
|
|
|
|
while (ItemCount < NewCount) {
|
|
Items[ItemCount] = new T();
|
|
ItemCount++;
|
|
}
|
|
} else {
|
|
while (ItemCount > NewCount) {
|
|
ItemCount--;
|
|
delete Items[ItemCount];
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function erases all items of *this array.
|
|
*/
|
|
|
|
void clear() {
|
|
while (ItemCount > 0) {
|
|
ItemCount--;
|
|
delete Items[ItemCount];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @return The number of allocated items.
|
|
*/
|
|
|
|
int getItemCount() const { return (ItemCount); }
|
|
|
|
private:
|
|
int ItemCount; ///< The number of items available in the array.
|
|
///<
|
|
CBuffer<T*> Items; ///< Element buffer.
|
|
///<
|
|
};
|
|
|
|
/**
|
|
* @brief Sine signal generator class.
|
|
*
|
|
* Class implements sine signal generator without biasing, with
|
|
* constructor-based initalization only. This generator uses oscillator
|
|
* instead of "sin" function.
|
|
*/
|
|
|
|
class CSineGen {
|
|
public:
|
|
/**
|
|
* Constructor initializes *this sine signal generator.
|
|
*
|
|
* @param si Sine function increment, in radians.
|
|
* @param ph Starting phase, in radians. Add 0.5 * AVIR_PI for cosine
|
|
* function.
|
|
*/
|
|
|
|
CSineGen(const double si, const double ph)
|
|
: svalue1(sin(ph)), svalue2(sin(ph - si)), sincr(2.0 * cos(si)) {}
|
|
|
|
/**
|
|
* @return The next value of the sine function, without biasing.
|
|
*/
|
|
|
|
double generate() {
|
|
const double res = svalue1;
|
|
|
|
svalue1 = sincr * res - svalue2;
|
|
svalue2 = res;
|
|
|
|
return (res);
|
|
}
|
|
|
|
private:
|
|
double svalue1; ///< Current sine value.
|
|
///<
|
|
double svalue2; ///< Previous sine value.
|
|
///<
|
|
double sincr; ///< Sine value increment.
|
|
///<
|
|
};
|
|
|
|
/**
|
|
* @brief Peaked Cosine window function generator class.
|
|
*
|
|
* Class implements Peaked Cosine window function generator. Generates the
|
|
* right-handed half of the window function. The Alpha parameter of this
|
|
* window function offers the control of the balance between the early and
|
|
* later taps of the filter. E.g. at Alpha=1 both early and later taps are
|
|
* attenuated, but at Alpha=4 mostly later taps are attenuated. This offers a
|
|
* great control over ringing artifacts produced by a low-pass filter in image
|
|
* processing, without compromising achieved image sharpness.
|
|
*/
|
|
|
|
class CDSPWindowGenPeakedCosine {
|
|
public:
|
|
/**
|
|
* Constructor initializes *this window function generator.
|
|
*
|
|
* @param aAlpha Alpha parameter, affects the peak shape (peak
|
|
* augmentation) of the window function. Should be >= 1.0.
|
|
* @param aLen2 Half filter's length (non-truncated).
|
|
*/
|
|
|
|
CDSPWindowGenPeakedCosine(const double aAlpha, const double aLen2)
|
|
: Alpha(aAlpha),
|
|
Len2(aLen2),
|
|
wn(0),
|
|
w1(AVIR_PId2 / Len2, AVIR_PI * 0.5) {}
|
|
|
|
/**
|
|
* @return The next Peaked Cosine window function coefficient.
|
|
*/
|
|
|
|
double generate() {
|
|
const double h = pow(wn / Len2, Alpha);
|
|
wn++;
|
|
|
|
return (w1.generate() * (1.0 - h));
|
|
}
|
|
|
|
private:
|
|
double Alpha; ///< Alpha parameter, affects the peak shape of window.
|
|
///<
|
|
double Len2; ///< Half length of the window function.
|
|
///<
|
|
int wn; ///< Window function integer position. 0 - center of the
|
|
///< window function.
|
|
///<
|
|
CSineGen w1; ///< Sine-wave generator.
|
|
///<
|
|
};
|
|
|
|
/**
|
|
* @brief FIR filter-based equalizer generator.
|
|
*
|
|
* Class implements an object used to generate symmetric-odd FIR filters with
|
|
* the specified frequency response (aka paragraphic equalizer). The
|
|
* calculated filter is windowed by the Peaked Cosine window function.
|
|
*
|
|
* In image processing, due to short length of filters being used (6-8 taps)
|
|
* the resulting frequency response of the filter is approximate and may be
|
|
* mathematically imperfect, but still adequate to the visual requirements.
|
|
*
|
|
* On a side note, this equalizer generator can be successfully used for audio
|
|
* signal equalization as well: for example, it is used in almost the same
|
|
* form in Voxengo Marvel GEQ equalizer plug-in.
|
|
*
|
|
* Filter generation is based on decomposition of frequency range into
|
|
* spectral bands, with each band represented by linear and ramp "kernels".
|
|
* When the filter is built, these kernels are combined together with
|
|
* different weights that approximate the required frequency response.
|
|
*/
|
|
|
|
class CDSPFIREQ {
|
|
public:
|
|
/**
|
|
* Function initializes *this object with the required parameters. The
|
|
* gain of frequencies beyond the MinFreq..MaxFreq range are controlled by
|
|
* the first and the last band's gain.
|
|
*
|
|
* @param SampleRate Processing sample rate (use 2 for image processing).
|
|
* @param aFilterLength Required filter length in samples (taps). The
|
|
* actual filter length is truncated to an integer value.
|
|
* @param aBandCount Number of band crossover points required to control,
|
|
* including bands at MinFreq and MaxFreq.
|
|
* @param MinFreq Minimal frequency that should be controlled.
|
|
* @param MaxFreq Maximal frequency that should be controlled.
|
|
* @param IsLogBands "True" if the bands should be spaced logarithmically.
|
|
* @param WFAlpha Peaked Cosine window function's Alpha parameter.
|
|
*/
|
|
|
|
void init(const double SampleRate, const double aFilterLength,
|
|
const int aBandCount, const double MinFreq, const double MaxFreq,
|
|
const bool IsLogBands, const double WFAlpha) {
|
|
FilterLength = aFilterLength;
|
|
BandCount = aBandCount;
|
|
|
|
CenterFreqs.alloc(BandCount);
|
|
|
|
z = (int)ceil(FilterLength * 0.5);
|
|
zi = z + (z & 1);
|
|
z2 = z * 2;
|
|
|
|
CBuffer<double> oscbuf(z2);
|
|
initOscBuf(oscbuf);
|
|
|
|
CBuffer<double> winbuf(z);
|
|
initWinBuf(winbuf, WFAlpha);
|
|
|
|
UseFirstVirtBand = (MinFreq > 0.0);
|
|
const int k = zi * (BandCount + (UseFirstVirtBand ? 1 : 0));
|
|
Kernels1.alloc(k);
|
|
Kernels2.alloc(k);
|
|
|
|
double m; // Frequency step multiplier.
|
|
double mo; // Frequency step offset (addition).
|
|
|
|
if (IsLogBands) {
|
|
m = exp(log(MaxFreq / MinFreq) / (BandCount - 1));
|
|
mo = 0.0;
|
|
} else {
|
|
m = 1.0;
|
|
mo = (MaxFreq - MinFreq) / (BandCount - 1);
|
|
}
|
|
|
|
double f = MinFreq;
|
|
double x1 = 0.0;
|
|
double x2;
|
|
int si;
|
|
|
|
if (UseFirstVirtBand) {
|
|
si = 0;
|
|
} else {
|
|
si = 1;
|
|
CenterFreqs[0] = 0.0;
|
|
f = f * m + mo;
|
|
}
|
|
|
|
double* kernbuf1 = &Kernels1[0];
|
|
double* kernbuf2 = &Kernels2[0];
|
|
int i;
|
|
|
|
for (i = si; i < BandCount; i++) {
|
|
x2 = f * 2.0 / SampleRate;
|
|
CenterFreqs[i] = x2;
|
|
|
|
fillBandKernel(x1, x2, kernbuf1, kernbuf2, oscbuf, winbuf);
|
|
|
|
kernbuf1 += zi;
|
|
kernbuf2 += zi;
|
|
x1 = x2;
|
|
f = f * m + mo;
|
|
}
|
|
|
|
if (x1 < 1.0) {
|
|
UseLastVirtBand = true;
|
|
fillBandKernel(x1, 1.0, kernbuf1, kernbuf2, oscbuf, winbuf);
|
|
} else {
|
|
UseLastVirtBand = false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @return Filter's length, in samples (taps).
|
|
*/
|
|
|
|
int getFilterLength() const { return (z2 - 1); }
|
|
|
|
/**
|
|
* @return Filter's latency (group delay), in samples (taps).
|
|
*/
|
|
|
|
int getFilterLatency() const { return (z - 1); }
|
|
|
|
/**
|
|
* Function creates symmetric-odd FIR filter with the specified gain
|
|
* levels at band crossover points.
|
|
*
|
|
* @param BandGains Array of linear gain levels, count=BandCount specified
|
|
* in the init() function.
|
|
* @param[out] Filter Output filter buffer, length = getFilterLength().
|
|
*/
|
|
|
|
void buildFilter(const double* const BandGains, double* const Filter) {
|
|
const double* kernbuf1 = &Kernels1[0];
|
|
const double* kernbuf2 = &Kernels2[0];
|
|
double x1 = 0.0;
|
|
double y1 = BandGains[0];
|
|
double x2;
|
|
double y2;
|
|
|
|
int i;
|
|
int si;
|
|
|
|
if (UseFirstVirtBand) {
|
|
si = 1;
|
|
x2 = CenterFreqs[0];
|
|
y2 = y1;
|
|
} else {
|
|
si = 2;
|
|
x2 = CenterFreqs[1];
|
|
y2 = BandGains[1];
|
|
}
|
|
|
|
copyBandKernel(Filter, kernbuf1, kernbuf2, y1 - y2, x1 * y2 - x2 * y1);
|
|
|
|
kernbuf1 += zi;
|
|
kernbuf2 += zi;
|
|
x1 = x2;
|
|
y1 = y2;
|
|
|
|
for (i = si; i < BandCount; i++) {
|
|
x2 = CenterFreqs[i];
|
|
y2 = BandGains[i];
|
|
|
|
addBandKernel(Filter, kernbuf1, kernbuf2, y1 - y2, x1 * y2 - x2 * y1);
|
|
|
|
kernbuf1 += zi;
|
|
kernbuf2 += zi;
|
|
x1 = x2;
|
|
y1 = y2;
|
|
}
|
|
|
|
if (UseLastVirtBand) {
|
|
addBandKernel(Filter, kernbuf1, kernbuf2, y1 - y2, x1 * y2 - y1);
|
|
}
|
|
|
|
for (i = 0; i < z - 1; i++) {
|
|
Filter[z + i] = Filter[z - 2 - i];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function calculates filter's length (in samples) and latency depending
|
|
* on the required non-truncated filter length.
|
|
*
|
|
* @param aFilterLength Required filter length in samples (non-truncated).
|
|
* @param[out] Latency Resulting latency (group delay) of the filter,
|
|
* in samples (taps).
|
|
* @return Filter length in samples (taps).
|
|
*/
|
|
|
|
static int calcFilterLength(const double aFilterLength, int& Latency) {
|
|
const int l = (int)ceil(aFilterLength * 0.5);
|
|
Latency = l - 1;
|
|
|
|
return (l * 2 - 1);
|
|
}
|
|
|
|
private:
|
|
double FilterLength; ///< Length of filter.
|
|
///<
|
|
int z; ///< Equals (int) ceil( FilterLength * 0.5 ).
|
|
///<
|
|
int zi; ///< Equals "z" if z is even, or z + 1 if z is odd. Used as a
|
|
///< Kernels1 and Kernels2 size multiplier and kernel buffer
|
|
///< increment to make sure each kernel buffer is 16-byte aligned.
|
|
///<
|
|
int z2; ///< Equals z * 2.
|
|
///<
|
|
int BandCount; ///< Number of controllable bands.
|
|
///<
|
|
CBuffer<double> CenterFreqs; ///< Center frequencies for all bands,
|
|
///< normalized to 0.0-1.0 range.
|
|
///<
|
|
CBuffer<double> Kernels1; ///< Half-length kernel buffers for each
|
|
///< spectral band (linear part).
|
|
///<
|
|
CBuffer<double> Kernels2; ///< Half-length kernel buffers for each
|
|
///< spectral band (ramp part).
|
|
///<
|
|
bool UseFirstVirtBand; ///< "True" if the first virtual band
|
|
///< (between 0.0 and MinFreq) should be used. The
|
|
///< first virtual band won't be used if MinFreq
|
|
///< equals 0.0.
|
|
///<
|
|
bool UseLastVirtBand; ///< "True" if the last virtual band (between
|
|
///< MaxFreq and SampleRate * 0.5) should be used. The
|
|
///< last virtual band won't be used if MaxFreq * 2.0
|
|
///< equals SampleRate.
|
|
///<
|
|
|
|
/**
|
|
* Function initializes the "oscbuf" used in the fillBandKernel()
|
|
* function.
|
|
*
|
|
* @param oscbuf Oscillator buffer, length = z * 2.
|
|
*/
|
|
|
|
void initOscBuf(double* oscbuf) const {
|
|
int i = z;
|
|
|
|
while (i > 0) {
|
|
oscbuf[0] = 0.0;
|
|
oscbuf[1] = 1.0;
|
|
oscbuf += 2;
|
|
i--;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function initializes window function buffer. This function generates
|
|
* Peaked Cosine window function.
|
|
*
|
|
* @param winbuf Windowing buffer.
|
|
* @param Alpha Peaked Cosine alpha parameter.
|
|
*/
|
|
|
|
void initWinBuf(double* winbuf, const double Alpha) const {
|
|
CDSPWindowGenPeakedCosine wf(Alpha, FilterLength * 0.5);
|
|
int i;
|
|
|
|
for (i = 1; i <= z; i++) {
|
|
winbuf[z - i] = wf.generate();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function fills first half of symmetric-odd FIR kernel for the band.
|
|
* This function should be called successively for adjacent bands.
|
|
* Previous band's x2 should be equal to current band's x1. A band kernel
|
|
* consists of 2 elements: linear kernel and ramp kernel.
|
|
*
|
|
* @param x1 Band's left corner frequency (0..1).
|
|
* @param x2 Band's right corner frequency (0..1).
|
|
* @param kernbuf1 Band kernel buffer 1 (linear part), length = z.
|
|
* @param kernbuf2 Band kernel buffer 2 (ramp part), length = z.
|
|
* @param oscbuf Oscillation buffer. Before the first call of the
|
|
* fillBandKernel() should be initialized with the call of the
|
|
* initOscBuf() function.
|
|
* @param winbuf Buffer that contains windowing function.
|
|
*/
|
|
|
|
void fillBandKernel(const double x1, const double x2, double* kernbuf1,
|
|
double* kernbuf2, double* oscbuf,
|
|
const double* const winbuf) {
|
|
const double s2_incr = AVIR_PI * x2;
|
|
const double s2_coeff = 2.0 * cos(s2_incr);
|
|
|
|
double s2_value1 = sin(s2_incr * (-z + 1));
|
|
double c2_value1 = sin(s2_incr * (-z + 1) + AVIR_PI * 0.5);
|
|
oscbuf[0] = sin(s2_incr * -z);
|
|
oscbuf[1] = sin(s2_incr * -z + AVIR_PI * 0.5);
|
|
|
|
int ks;
|
|
|
|
for (ks = 1; ks < z; ks++) {
|
|
const int ks2 = ks * 2;
|
|
const double s1_value1 = oscbuf[ks2];
|
|
const double c1_value1 = oscbuf[ks2 + 1];
|
|
oscbuf[ks2] = s2_value1;
|
|
oscbuf[ks2 + 1] = c2_value1;
|
|
|
|
const double x = AVIR_PI * (ks - z);
|
|
const double v0 = winbuf[ks - 1] / ((x1 - x2) * x);
|
|
|
|
kernbuf1[ks - 1] =
|
|
(x2 * s2_value1 - x1 * s1_value1 + (c2_value1 - c1_value1) / x) * v0;
|
|
|
|
kernbuf2[ks - 1] = (s2_value1 - s1_value1) * v0;
|
|
|
|
s2_value1 = s2_coeff * s2_value1 - oscbuf[ks2 - 2];
|
|
c2_value1 = s2_coeff * c2_value1 - oscbuf[ks2 - 1];
|
|
}
|
|
|
|
kernbuf1[z - 1] = (x2 * x2 - x1 * x1) / (x1 - x2) * 0.5;
|
|
kernbuf2[z - 1] = -1.0;
|
|
}
|
|
|
|
/**
|
|
* Function copies band kernel's elements to the output buffer.
|
|
*
|
|
* @param outbuf Output buffer.
|
|
* @param kernbuf1 Kernel buffer 1 (linear part).
|
|
* @param kernbuf2 Kernel buffer 2 (ramp part).
|
|
* @param c Multiplier for linear kernel element.
|
|
* @param d Multiplier for ramp kernel element.
|
|
*/
|
|
|
|
void copyBandKernel(double* outbuf, const double* const kernbuf1,
|
|
const double* const kernbuf2, const double c,
|
|
const double d) const {
|
|
int ks;
|
|
|
|
for (ks = 0; ks < z; ks++) {
|
|
outbuf[ks] = c * kernbuf1[ks] + d * kernbuf2[ks];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function adds band kernel's elements to the output buffer.
|
|
*
|
|
* @param outbuf Output buffer.
|
|
* @param kernbuf1 Kernel buffer 1 (linear part).
|
|
* @param kernbuf2 Kernel buffer 2 (ramp part).
|
|
* @param c Multiplier for linear kernel element.
|
|
* @param d Multiplier for ramp kernel element.
|
|
*/
|
|
|
|
void addBandKernel(double* outbuf, const double* const kernbuf1,
|
|
const double* const kernbuf2, const double c,
|
|
const double d) const {
|
|
int ks;
|
|
|
|
for (ks = 0; ks < z; ks++) {
|
|
outbuf[ks] += c * kernbuf1[ks] + d * kernbuf2[ks];
|
|
}
|
|
}
|
|
};
|
|
|
|
/**
|
|
* @brief Low-pass filter windowed by Peaked Cosine window function.
|
|
*
|
|
* This class implements calculation of linear-phase symmetric-odd FIR
|
|
* low-pass filter windowed by the Peaked Cosine window function, for image
|
|
* processing applications.
|
|
*/
|
|
|
|
class CDSPPeakedCosineLPF {
|
|
public:
|
|
int fl2; ///< Half filter's length, excluding the peak value. This value
|
|
///< can be also used as filter's latency in samples (taps).
|
|
///<
|
|
int FilterLen; ///< Filter's length in samples (taps).
|
|
///<
|
|
|
|
/**
|
|
* Constructor initalizes *this object.
|
|
*
|
|
* @param aLen2 Half-length (non-truncated) of low-pass filter, in samples
|
|
* (taps).
|
|
* @param aFreq2 Low-pass filter's corner frequency [0; pi].
|
|
* @param aAlpha Peaked Cosine window function Alpha parameter.
|
|
*/
|
|
|
|
CDSPPeakedCosineLPF(const double aLen2, const double aFreq2,
|
|
const double aAlpha)
|
|
: fl2((int)ceil(aLen2) - 1),
|
|
FilterLen(fl2 + fl2 + 1),
|
|
Len2(aLen2),
|
|
Freq2(aFreq2),
|
|
Alpha(aAlpha) {}
|
|
|
|
/**
|
|
* Function generates a linear-phase low-pass filter windowed by Peaked
|
|
* Cosine window function.
|
|
*
|
|
* @param[out] op Output buffer, length = FilterLen (fl2 * 2 + 1).
|
|
* @param DCGain Required gain at DC. The resulting filter will be
|
|
* normalized to achieve this DC gain.
|
|
*/
|
|
|
|
template <class T>
|
|
void generateLPF(T* op, const double DCGain) {
|
|
CDSPWindowGenPeakedCosine wf(Alpha, Len2);
|
|
CSineGen f2(Freq2, 0.0);
|
|
|
|
op += fl2;
|
|
T* op2 = op;
|
|
f2.generate();
|
|
int t = 1;
|
|
|
|
*op = (T)(Freq2 * wf.generate() / AVIR_PI);
|
|
double s = *op;
|
|
|
|
while (t <= fl2) {
|
|
const double v = f2.generate() * wf.generate() / t / AVIR_PI;
|
|
op++;
|
|
op2--;
|
|
*op = (T)v;
|
|
*op2 = (T)v;
|
|
s += *op + *op2;
|
|
t++;
|
|
}
|
|
|
|
t = FilterLen;
|
|
s = DCGain / s;
|
|
|
|
while (t > 0) {
|
|
*op2 = (T)(*op2 * s);
|
|
op2++;
|
|
t--;
|
|
}
|
|
}
|
|
|
|
private:
|
|
double Len2; ///< Half-length (non-truncated) of low-pass filter, in
|
|
///< samples (taps).
|
|
///<
|
|
double Freq2; ///< Low-pass filter's corner frequency.
|
|
///<
|
|
double Alpha; ///< Peaked Cosine window function Alpha parameter.
|
|
///<
|
|
};
|
|
|
|
/**
|
|
* @brief Buffer class for parametrized low-pass filter.
|
|
*
|
|
* This class extends the CBuffer< double > class by adding several variables
|
|
* that define a symmetric-odd FIR low-pass filter windowed by Peaked Cosine
|
|
* window function. This class can be used to compare filters without
|
|
* comparing their buffer contents.
|
|
*/
|
|
|
|
class CFltBuffer : public CBuffer<double> {
|
|
public:
|
|
double Len2; ///< Half-length (non-truncated) of low-pass filters, in
|
|
///< samples (taps).
|
|
///<
|
|
double Freq; ///< Low-pass filter's corner frequency.
|
|
///<
|
|
double Alpha; ///< Peaked Cosine window function Alpha parameter.
|
|
///<
|
|
double DCGain; ///< DC gain applied to the filter.
|
|
///<
|
|
|
|
CFltBuffer()
|
|
: CBuffer<double>(), Len2(0.0), Freq(0.0), Alpha(0.0), DCGain(0.0) {}
|
|
|
|
/**
|
|
* @param b2 Filter buffer to compare *this object to.
|
|
* @return Operator returns "true" if both filters have same parameters.
|
|
*/
|
|
|
|
bool operator==(const CFltBuffer& b2) const {
|
|
return (Len2 == b2.Len2 && Freq == b2.Freq && Alpha == b2.Alpha &&
|
|
DCGain == b2.DCGain);
|
|
}
|
|
};
|
|
|
|
/**
|
|
* @brief Sinc function-based fractional delay filter bank.
|
|
*
|
|
* Class implements storage and initialization of a bank of sinc
|
|
* function-based fractional delay filters, expressed as 1st order polynomial
|
|
* interpolation coefficients. The filters are produced from a single "long"
|
|
* windowed low-pass filter. Also supports 0th-order ("nearest neighbor")
|
|
* interpolation.
|
|
*
|
|
* This class also supports multiplication of each fractional delay filter by
|
|
* an external filter (usually a low-pass filter).
|
|
*
|
|
* @tparam fptype Specifies storage type of the filter coefficients bank. The
|
|
* filters are initially calculated using the "double" precision.
|
|
*/
|
|
|
|
template <class fptype>
|
|
class CDSPFracFilterBankLin {
|
|
public:
|
|
CDSPFracFilterBankLin() : Order(-1) {}
|
|
|
|
/**
|
|
* Copy constructor copies a limited set of parameters of the source
|
|
* filter bank. The actual filters are not copied. Such copying is used
|
|
* during filtering steps "modeling" stage. A further init() function
|
|
* call is required.
|
|
*
|
|
* @param s Source filter bank.
|
|
*/
|
|
|
|
void copyInitParams(const CDSPFracFilterBankLin& s) {
|
|
WFLen2 = s.WFLen2;
|
|
WFFreq = s.WFFreq;
|
|
WFAlpha = s.WFAlpha;
|
|
FracCount = s.FracCount;
|
|
Order = s.Order;
|
|
Alignment = s.Alignment;
|
|
SrcFilterLen = s.SrcFilterLen;
|
|
FilterLen = s.FilterLen;
|
|
FilterSize = s.FilterSize;
|
|
IsSrcTableBuilt = false;
|
|
ExtFilter = s.ExtFilter;
|
|
TableFillFlags.alloc(s.TableFillFlags.getCapacity());
|
|
int i;
|
|
|
|
// Copy table fill flags, but shifted so that further initialization
|
|
// is still possible (such feature should not be used, though).
|
|
|
|
for (i = 0; i < TableFillFlags.getCapacity(); i++) {
|
|
TableFillFlags[i] = (uint8_t)(s.TableFillFlags[i] << 2);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Operator compares *this filter bank and another filter bank and returns
|
|
* "true" if their parameters are equal. Alignment is not taken into
|
|
* account.
|
|
*
|
|
* @param s Filter bank to compare to.
|
|
* @return "True" if compared banks have equal parameters.
|
|
*/
|
|
|
|
bool operator==(const CDSPFracFilterBankLin& s) const {
|
|
return (Order == s.Order && WFLen2 == s.WFLen2 && WFFreq == s.WFFreq &&
|
|
WFAlpha == s.WFAlpha && FracCount == s.FracCount &&
|
|
ExtFilter == s.ExtFilter);
|
|
}
|
|
|
|
/**
|
|
* Function initializes (builds) the filter bank based on the supplied
|
|
* parameters. If the supplied parameters are equal to previously defined
|
|
* parameters, function does nothing (alignment is assumed to be never
|
|
* changing between the init() function calls).
|
|
*
|
|
* @param ReqFracCount Required number of fractional delays in the filter
|
|
* bank. The minimal value is 2.
|
|
* @param ReqOrder Required order of the interpolation polynomial
|
|
* (0 or 1).
|
|
* @param BaseLen Low-pass filter's base length, in samples (taps).
|
|
* Affects the actual length of the filter and its overall steepness.
|
|
* @param Cutoff Low-pass filter's normalized cutoff frequency [0; 1].
|
|
* @param aWFAlpha Peaked Cosine window function's Alpha parameter.
|
|
* @param aExtFilter External filter to apply to each fractional delay
|
|
* filter.
|
|
* @param aAlignment Memory alignment of the filter bank, power-of-2
|
|
* value. 0 - use default stdlib alignment.
|
|
* @param FltLenAlign Filter's length alignment, power-of-2 value.
|
|
*/
|
|
|
|
void init(const int ReqFracCount, const int ReqOrder, const double BaseLen,
|
|
const double Cutoff, const double aWFAlpha,
|
|
const CFltBuffer& aExtFilter, const int aAlignment = 0,
|
|
const int FltLenAlign = 1) {
|
|
double NewWFLen2 = 0.5 * BaseLen * ReqFracCount;
|
|
double NewWFFreq = AVIR_PI * Cutoff / ReqFracCount;
|
|
double NewWFAlpha = aWFAlpha;
|
|
|
|
if (ReqOrder == Order && NewWFLen2 == WFLen2 && NewWFFreq == WFFreq &&
|
|
NewWFAlpha == WFAlpha && ReqFracCount == FracCount &&
|
|
aExtFilter == ExtFilter) {
|
|
IsInitRequired = false;
|
|
return;
|
|
}
|
|
|
|
WFLen2 = NewWFLen2;
|
|
WFFreq = NewWFFreq;
|
|
WFAlpha = NewWFAlpha;
|
|
FracCount = ReqFracCount;
|
|
Order = ReqOrder;
|
|
Alignment = aAlignment;
|
|
ExtFilter = aExtFilter;
|
|
|
|
CDSPPeakedCosineLPF p(WFLen2, WFFreq, WFAlpha);
|
|
SrcFilterLen = (p.fl2 / ReqFracCount + 1) * 2;
|
|
|
|
const int ElementSize = ReqOrder + 1;
|
|
FilterLen = SrcFilterLen;
|
|
|
|
if (ExtFilter.getCapacity() > 0) {
|
|
FilterLen += ExtFilter.getCapacity() - 1;
|
|
}
|
|
|
|
FilterLen = (FilterLen + FltLenAlign - 1) & ~(FltLenAlign - 1);
|
|
FilterSize = FilterLen * ElementSize;
|
|
IsSrcTableBuilt = false;
|
|
IsInitRequired = true;
|
|
}
|
|
|
|
/**
|
|
* @return The length of each fractional delay filter, in samples (taps).
|
|
* Always an even value.
|
|
*/
|
|
|
|
int getFilterLen() const { return (FilterLen); }
|
|
|
|
/**
|
|
* @return The number of fractional filters in use by *this bank.
|
|
*/
|
|
|
|
int getFracCount() const { return (FracCount); }
|
|
|
|
/**
|
|
* @return The order of the interpolation polynomial.
|
|
*/
|
|
|
|
int getOrder() const { return (Order); }
|
|
|
|
/**
|
|
* Function returns the pointer to the specified interpolation table
|
|
* filter.
|
|
*
|
|
* @param i Filter (fractional delay) index, in the range 0 to
|
|
* ReqFracCount - 1, inclusive.
|
|
* @return Pointer to filter. Higher order polynomial coefficients are
|
|
* stored after after previous order coefficients, separated by FilterLen
|
|
* elements.
|
|
*/
|
|
|
|
const fptype* getFilter(const int i) {
|
|
if (!IsSrcTableBuilt) {
|
|
buildSrcTable();
|
|
}
|
|
|
|
fptype* const Res = &Table[i * FilterSize];
|
|
|
|
if ((TableFillFlags[i] & 2) == 0) {
|
|
createFilter(i);
|
|
TableFillFlags[i] |= 2;
|
|
|
|
if (Order > 0) {
|
|
createFilter(i + 1);
|
|
const fptype* const Res2 = Res + FilterSize;
|
|
fptype* const op = Res + FilterLen;
|
|
int j;
|
|
|
|
// Create higher-order interpolation coefficients (linear
|
|
// interpolation).
|
|
|
|
for (j = 0; j < FilterLen; j++) {
|
|
op[j] = Res2[j] - Res[j];
|
|
}
|
|
}
|
|
}
|
|
|
|
return (Res);
|
|
}
|
|
|
|
/**
|
|
* Function makes sure all fractional delay filters were created.
|
|
*/
|
|
|
|
void createAllFilters() {
|
|
int i;
|
|
|
|
for (i = 0; i < FracCount; i++) {
|
|
getFilter(i);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function returns an approximate initialization complexity, expressed in
|
|
* the number of multiply-add operations. This includes fractional delay
|
|
* filters calculation and multiplication by an external filter. This
|
|
* function can only be called after the init() function.
|
|
*
|
|
* @param FracUseMap Fractional delays use map, each element corresponds
|
|
* to a single fractional delay, will be compared to the internal table
|
|
* fill flags. This map should include 0 and 1 values only.
|
|
* @return The complexity of the initialization, expressed in the number
|
|
* of multiply-add operations.
|
|
*/
|
|
|
|
int calcInitComplexity(const CBuffer<uint8_t>& FracUseMap) const {
|
|
const int FltInitCost = 65; // Cost to initialize a single sample
|
|
// of the fractional delay filter.
|
|
const int FltUseCost =
|
|
FilterLen * Order +
|
|
SrcFilterLen * ExtFilter.getCapacity(); // Cost to use a single
|
|
// fractional delay filter.
|
|
const int ucb[2] = {0, FltUseCost};
|
|
int ic;
|
|
int i;
|
|
|
|
if (IsInitRequired) {
|
|
ic = FracCount * SrcFilterLen * FltInitCost;
|
|
|
|
for (i = 0; i < FracCount; i++) {
|
|
ic += ucb[FracUseMap[i]];
|
|
}
|
|
} else {
|
|
ic = 0;
|
|
|
|
for (i = 0; i < FracCount; i++) {
|
|
if (FracUseMap[i] != 0) {
|
|
ic += ucb[TableFillFlags[i] == 0 ? 1 : 0];
|
|
}
|
|
}
|
|
}
|
|
|
|
return (ic);
|
|
}
|
|
|
|
private:
|
|
static const int InterpPoints = 2; ///< The maximal number of points the
|
|
///< interpolation is based on.
|
|
///<
|
|
double WFLen2; ///< Window function's Len2 parameter.
|
|
///<
|
|
double WFFreq; ///< Window function's Freq parameter.
|
|
///<
|
|
double WFAlpha; ///< Window function's Alpha parameter.
|
|
///<
|
|
int FracCount; ///< The required number of fractional delay filters.
|
|
///<
|
|
int Order; ///< The order of the interpolation polynomial.
|
|
///<
|
|
int Alignment; ///< The required filter table alignment.
|
|
///<
|
|
int SrcFilterLen; ///< Length of the "source" filters. This is always an
|
|
///< even value.
|
|
///<
|
|
int FilterLen; ///< Specifies the number of samples (taps) each fractional
|
|
///< delay filter has. This is always an even value, adjusted
|
|
///< by the FltLenAlign.
|
|
///<
|
|
int FilterSize; ///< The size of a single filter element, equals
|
|
///< FilterLen * ElementSize.
|
|
///<
|
|
bool IsInitRequired; ///< "True" if SrcTable filter table initialization
|
|
///< is required. This value is available only after the
|
|
///< call to the init() function.
|
|
///<
|
|
CBuffer<fptype> Table; ///< Interpolation table, size equals to
|
|
///< ReqFracCount * FilterLen * ElementSize.
|
|
///<
|
|
CBuffer<uint8_t>
|
|
TableFillFlags; ///< Contains ReqFracCount + 1
|
|
///< elements. Bit 0 of every element is 1 if Table
|
|
///< already contains the filter from SrcTable filtered
|
|
///< by ExtFilter. Bit 1 of every element means higher
|
|
///< order coefficients were filled for the filter.
|
|
///<
|
|
CFltBuffer ExtFilter; ///< External filter that should be applied to every
|
|
///< fractional delay filter. Can be empty. Half of
|
|
///< this filter's capacity is used as latency (group
|
|
///< delay) value of the filter.
|
|
///<
|
|
CBuffer<double> SrcTable; ///< Source table of delay filters, contains
|
|
///< ReqFracCount + 1 elements. This table is used
|
|
///< to fill the Table with the actual filters,
|
|
///< filtered by an external filter.
|
|
///<
|
|
bool IsSrcTableBuilt; ///< "True" if the SrcTable was built already. This
|
|
///< variable is set to "false" in the init() function.
|
|
///<
|
|
|
|
/**
|
|
* Function builds source table used in the createFilter() function.
|
|
*/
|
|
|
|
void buildSrcTable() {
|
|
IsSrcTableBuilt = true;
|
|
IsInitRequired = false;
|
|
|
|
CDSPPeakedCosineLPF p(WFLen2, WFFreq, WFAlpha);
|
|
|
|
const int BufLen = SrcFilterLen * FracCount + InterpPoints - 1;
|
|
const int BufOffs = InterpPoints / 2 - 1;
|
|
const int BufCenter = SrcFilterLen * FracCount / 2 + BufOffs;
|
|
|
|
CBuffer<double> Buf(BufLen);
|
|
memset(Buf, 0, (BufCenter - p.fl2) * sizeof(double));
|
|
int i = BufLen - BufCenter - p.fl2 - 1;
|
|
memset(&Buf[BufLen - i], 0, i * sizeof(double));
|
|
|
|
p.generateLPF(&Buf[BufCenter - p.fl2], FracCount);
|
|
|
|
SrcTable.alloc((FracCount + 1) * SrcFilterLen);
|
|
TableFillFlags.alloc(FracCount + 1);
|
|
int j;
|
|
double* op0 = SrcTable;
|
|
|
|
for (i = FracCount; i >= 0; i--) {
|
|
TableFillFlags[i] = 0;
|
|
double* p = Buf + BufOffs + i;
|
|
|
|
for (j = 0; j < SrcFilterLen; j++) {
|
|
op0[0] = p[0];
|
|
op0++;
|
|
p += FracCount;
|
|
}
|
|
}
|
|
|
|
Table.alloc((FracCount + 1) * FilterSize, Alignment);
|
|
}
|
|
|
|
/**
|
|
* Function creates the specified filter in the Table by copying it from
|
|
* the SrcTable and filtering by ExtFilter. Function does nothing if
|
|
* filter was already created.
|
|
*
|
|
* @param k Filter index to create, in the range 0 to FracCount,
|
|
* inclusive.
|
|
*/
|
|
|
|
void createFilter(const int k) {
|
|
if (TableFillFlags[k] != 0) {
|
|
return;
|
|
}
|
|
|
|
TableFillFlags[k] |= 1;
|
|
const int ExtFilterLatency = ExtFilter.getCapacity() / 2;
|
|
const int ResLatency = ExtFilterLatency + SrcFilterLen / 2;
|
|
int ResLen = SrcFilterLen;
|
|
|
|
if (ExtFilter.getCapacity() > 0) {
|
|
ResLen += ExtFilter.getCapacity() - 1;
|
|
}
|
|
|
|
const int ResOffs = FilterLen / 2 - ResLatency;
|
|
fptype* op = &Table[k * FilterSize];
|
|
int i;
|
|
|
|
for (i = 0; i < ResOffs; i++) {
|
|
op[i] = 0.0;
|
|
}
|
|
|
|
for (i = ResOffs + ResLen; i < FilterLen; i++) {
|
|
op[i] = 0.0;
|
|
}
|
|
|
|
op += ResOffs;
|
|
const double* const srcflt = &SrcTable[k * SrcFilterLen];
|
|
|
|
if (ExtFilter.getCapacity() == 0) {
|
|
for (i = 0; i < ResLen; i++) {
|
|
op[i] = (fptype)srcflt[i];
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
// Perform convolution of extflt and srcflt.
|
|
|
|
const double* const extflt = &ExtFilter[0];
|
|
int j;
|
|
|
|
for (j = 0; j < ResLen; j++) {
|
|
int k = 0;
|
|
int l = j - ExtFilter.getCapacity() + 1;
|
|
int r = l + ExtFilter.getCapacity();
|
|
|
|
if (l < 0) {
|
|
k -= l;
|
|
l = 0;
|
|
}
|
|
|
|
if (r > SrcFilterLen) {
|
|
r = SrcFilterLen;
|
|
}
|
|
|
|
const double* const extfltb = extflt + k;
|
|
const double* const srcfltb = srcflt + l;
|
|
double s = 0.0;
|
|
l = r - l;
|
|
|
|
for (i = 0; i < l; i++) {
|
|
s += extfltb[i] * srcfltb[i];
|
|
}
|
|
|
|
op[j] = (fptype)s;
|
|
}
|
|
}
|
|
};
|
|
|
|
/**
|
|
* @brief Thread pool for multi-threaded image resizing operation.
|
|
*
|
|
* This base class is used to organize a multi-threaded image resizing
|
|
* operation. The thread pool should consist of threads that initially wait
|
|
* for a signal. Upon receiving a signal (via the startAllWorkloads()
|
|
* function) each previously added thread should execute its workload's
|
|
* process() function once, and return to the wait signal state again. The
|
|
* thread pool should be also able to efficiently wait for all workloads to
|
|
* finish via the waitAllWorkloadsToFinish() function.
|
|
*
|
|
* The image resizing algorithm makes calls to functions of this class.
|
|
*/
|
|
|
|
class CImageResizerThreadPool {
|
|
public:
|
|
CImageResizerThreadPool() {}
|
|
|
|
virtual ~CImageResizerThreadPool() {}
|
|
|
|
/**
|
|
* @brief Thread pool's workload object class.
|
|
*
|
|
* This class should be used as a base class for objects that perform the
|
|
* actual work spread over several threads.
|
|
*/
|
|
|
|
class CWorkload {
|
|
public:
|
|
virtual ~CWorkload() {}
|
|
|
|
/**
|
|
* Function that gets called from the thread when thread pool's
|
|
* startAllWorkloads() function is called.
|
|
*/
|
|
|
|
virtual void process() = 0;
|
|
};
|
|
|
|
/**
|
|
* @return The suggested number of workloads (and their associated
|
|
* threads) to add. The minimal value this function can return is 1. The
|
|
* usual value may depend on the number of physical and virtual cores
|
|
* present in the system, and on other considerations.
|
|
*/
|
|
|
|
virtual int getSuggestedWorkloadCount() const { return (1); }
|
|
|
|
/**
|
|
* Function adds a new workload (and possibly thread) to the thread pool.
|
|
* The caller decides how many parallel workloads (and threads) it
|
|
* requires, but this number will not exceed the value returned by the
|
|
* getSuggestedWorkloadCount() function. It is implementation-specific how
|
|
* many workloads to associate with a single thread. But for efficiency
|
|
* reasons each workload should be associated with its own thread.
|
|
*
|
|
* Note that the same set of workload objects will be processed each time
|
|
* the startAllWorkloads() function is called. This means that workload
|
|
* objects are added only once. The caller changes the state of the
|
|
* workload objects and then calls the startAllWorkloads() function to
|
|
* process them.
|
|
*
|
|
* @param Workload Workload object whose process() function will be called
|
|
* from within the thread when the startAllWorkloads() function is called.
|
|
*/
|
|
|
|
virtual void addWorkload(CWorkload* const Workload) {}
|
|
|
|
/**
|
|
* Function starts all workloads associated with threads previously added
|
|
* via the addWorkload() function. It is assumed that this function
|
|
* performs the necessary "memory barrier" (or "cache sync") kind of
|
|
* operation so that all threads catch up the prior changes made to the
|
|
* workload objects during their wait state.
|
|
*/
|
|
|
|
virtual void startAllWorkloads() {}
|
|
|
|
/**
|
|
* Function waits for all workloads to finish.
|
|
*/
|
|
|
|
virtual void waitAllWorkloadsToFinish() {}
|
|
|
|
/**
|
|
* Function removes all workloads previously added via the addWorkload()
|
|
* function. This function gets called only after the
|
|
* waitAllWorkloadsToFinish() function call.
|
|
*/
|
|
|
|
virtual void removeAllWorkloads() {}
|
|
};
|
|
|
|
/**
|
|
* @brief Resizing algorithm parameters structure.
|
|
*
|
|
* This structure holds all selectable parameters used by the resizing
|
|
* algorithm at various stages, for both downsizing and upsizing. There are no
|
|
* other parameters exist that can optimize the performance of the resizing
|
|
* algorithm. Filter length parameters can take fractional values.
|
|
*
|
|
* Beside quality, these parameters (except Alpha parameters) directly affect
|
|
* the computative cost of the resizing algorithm. It is possible to trade
|
|
* the visual quality for computative cost.
|
|
*
|
|
* Anti-alias filtering during downsizing can be defined as a considerable
|
|
* reduction of contrast of smallest features of an image. Unfortunately, such
|
|
* de-contrasting partially affects features of all sizes thus producing a
|
|
* non-linearity of frequency response. All pre-defined parameter sets are
|
|
* described by 3 values separated by slashes. The first value is the
|
|
* de-contrasting factor of small features (which are being removed) while
|
|
* the second value is the de-contrasting factor of large features (which
|
|
* should remain intact), with value of 1 equating to "no contrast change".
|
|
* The third value is the optimization score (see below), with value of 0
|
|
* equating to the "perfect" linearity of frequency response.
|
|
*
|
|
* The pre-defined parameter sets offered by this library were auto-optimized
|
|
* for the given LPFltBaseLen, IntFltLen and CorrFltAlpha values. The
|
|
* optimization goal was to minimize the score: the sum of squares of the
|
|
* difference between original and processed images (which was not actually
|
|
* resized, k=1). The original image was a 0.5 megapixel uniformly-distributed
|
|
* white-noise image with pixel intensities in the 0-1 range. Such goal
|
|
* converges very well and produces filtering system with the flattest
|
|
* frequency response possible for the given constraints. With this goal,
|
|
* increasing the LPFltBaseLen value reduces the general amount of aliasing
|
|
* artifacts.
|
|
*/
|
|
|
|
struct CImageResizerParams {
|
|
double CorrFltAlpha; ///< Alpha parameter of the Peaked Cosine window
|
|
///< function used on the correction filter. The
|
|
///< "usable" values are in the narrow range 1.0 to 1.5.
|
|
///<
|
|
double CorrFltLen; ///< Correction filter's length in samples (taps). The
|
|
///< "usable" range is narrow, 5.5 to 8, as to minimize
|
|
///< the "overcorrection" which is mathematically precise,
|
|
///< but visually unacceptable.
|
|
///<
|
|
double IntFltAlpha; ///< Alpha parameter of the Peaked Cosine window
|
|
///< function used on the interpolation low-pass filter.
|
|
///< The "usable" values are in the range 1.5 to 2.5.
|
|
///<
|
|
double IntFltCutoff; ///< Interpolation low-pass filter's cutoff frequency
|
|
///< (normalized, [0; 1]). The "usable" range is 0.6 to
|
|
///< 0.8.
|
|
///<
|
|
double IntFltLen; ///< Interpolation low-pass filter's length in samples
|
|
///< (taps). The length value should be at least 18 or
|
|
///< otherwise a "dark grid" artifact will be introduced if
|
|
///< a further sharpening is applied. IntFltLen together
|
|
///< with other IntFlt parameters should be tuned in a way
|
|
///< that produces the flattest frequency response in 0-0.5
|
|
///< normalized frequency range (this range is due to 2X
|
|
///< upsampling).
|
|
///<
|
|
double LPFltAlpha; ///< Alpha parameter of the Peaked Cosine window
|
|
///< function used on the low-pass filter. The "usable"
|
|
///< values are in the range 1.5 to 6.5.
|
|
///<
|
|
double LPFltBaseLen; ///< Base length of the low-pass (aka anti-aliasing
|
|
///< or reconstruction) filter, in samples (taps),
|
|
///< further adjusted by the actual cutoff frequency,
|
|
///< upsampling and downsampling factors. The "usable"
|
|
///< range is between 6 and 9.
|
|
///<
|
|
double LPFltCutoffMult; ///< Low-pass filter's cutoff frequency
|
|
///< multiplier. This value can be both below and
|
|
///< above 1.0 as low-pass filters are inserted on
|
|
///< downsampling and upsampling steps and always
|
|
///< have corner frequency equal to or below 0.5pi.
|
|
///< This multiplier shifts low-pass filter's corner
|
|
///< frequency towards lower (if below 1.0) or higher
|
|
///< (if above 1.0) frequencies. This multiplier can
|
|
///< be way below 1.0 since any additional
|
|
///< high-frequency damping will be partially
|
|
///< corrected by the correction filter. The "usable"
|
|
///< range is 0.3 to 1.0.
|
|
///<
|
|
|
|
CImageResizerParams()
|
|
: HBFltAlpha(1.75395), HBFltCutoff(0.40356), HBFltLen(22.00000) {}
|
|
|
|
double HBFltAlpha; ///< Half-band filter's Alpha. Assigned internally.
|
|
///<
|
|
double HBFltCutoff; ///< Half-band filter's cutoff point [0; 1]. Assigned
|
|
///< internally.
|
|
///<
|
|
double HBFltLen; ///< Length of the half-band low-pass filter. Assigned
|
|
///< internally. Internally used to perform 2X or higher
|
|
///< downsampling. These filter parameters should be treated
|
|
///< as "technical" and do not require adjustment as they
|
|
///< were tuned to suit all combinations of other
|
|
///< parameters. This half-band filter provides a wide
|
|
///< transition band (for minimal ringing artifacts) and a
|
|
///< high stop-band attenuation (for minimal aliasing).
|
|
///<
|
|
};
|
|
|
|
/**
|
|
* @brief The default set of resizing algorithm parameters
|
|
* (10.01/1.029/0.019169).
|
|
*
|
|
* This is the default set of resizing parameters that was designed to deliver
|
|
* a sharp image while still providing a low amount of ringing artifacts, and
|
|
* having a reasonable computational cost.
|
|
*/
|
|
|
|
struct CImageResizerParamsDef : public CImageResizerParams {
|
|
CImageResizerParamsDef() {
|
|
CorrFltAlpha = 1.0; // 10.01/1.88/1.029(522.43)/0.019169:258648,446808
|
|
CorrFltLen = 6.30770;
|
|
IntFltAlpha = 2.27825;
|
|
IntFltCutoff = 0.75493;
|
|
IntFltLen = 18.0;
|
|
LPFltAlpha = 3.40127;
|
|
LPFltBaseLen = 7.78;
|
|
LPFltCutoffMult = 0.78797;
|
|
}
|
|
};
|
|
|
|
/**
|
|
* @brief Set of resizing algorithm parameters for ultra-low-ringing
|
|
* performance (7.69/1.069/0.000245).
|
|
*
|
|
* This set of resizing algorithm parameters offers the lowest amount of
|
|
* ringing this library is capable of providing while still offering a decent
|
|
* quality. Low ringing is attained at the expense of higher aliasing
|
|
* artifacts and a slightly reduced contrast.
|
|
*/
|
|
|
|
struct CImageResizerParamsULR : public CImageResizerParams {
|
|
CImageResizerParamsULR() {
|
|
CorrFltAlpha = 1.0; // 7.69/1.97/1.069(31445.45)/0.000245:258627,436845
|
|
CorrFltLen = 5.83280;
|
|
IntFltAlpha = 2.11453;
|
|
IntFltCutoff = 0.73986;
|
|
IntFltLen = 18.0;
|
|
LPFltAlpha = 1.73455;
|
|
LPFltBaseLen = 6.40;
|
|
LPFltCutoffMult = 0.61314;
|
|
}
|
|
};
|
|
|
|
/**
|
|
* @brief Set of resizing algorithm parameters for low-ringing performance
|
|
* (7.86/1.065/0.000106).
|
|
*
|
|
* This set of resizing algorithm parameters offers a very low-ringing
|
|
* performance at the expense of higher aliasing artifacts and a slightly
|
|
* reduced contrast.
|
|
*/
|
|
|
|
struct CImageResizerParamsLR : public CImageResizerParams {
|
|
CImageResizerParamsLR() {
|
|
CorrFltAlpha = 1.0; // 7.86/1.96/1.065(73865.02)/0.000106:258636,437381
|
|
CorrFltLen = 5.87671;
|
|
IntFltAlpha = 2.25322;
|
|
IntFltCutoff = 0.74090;
|
|
IntFltLen = 18.0;
|
|
LPFltAlpha = 1.79306;
|
|
LPFltBaseLen = 7.00;
|
|
LPFltCutoffMult = 0.68881;
|
|
}
|
|
};
|
|
|
|
/**
|
|
* @brief Set of resizing algorithm parameters for lower-ringing performance
|
|
* (8.86/1.046/0.010168).
|
|
*
|
|
* This set of resizing algorithm parameters offers a lower-ringing
|
|
* performance in comparison to the default setting, at the expense of higher
|
|
* aliasing artifacts and a slightly reduced contrast.
|
|
*/
|
|
|
|
struct CImageResizerParamsLow : public CImageResizerParams {
|
|
CImageResizerParamsLow() {
|
|
CorrFltAlpha = 1.0; // 8.86/1.92/1.046(871.54)/0.010168:258647,442252
|
|
CorrFltLen = 6.09757;
|
|
IntFltAlpha = 2.36704;
|
|
IntFltCutoff = 0.74674;
|
|
IntFltLen = 18.0;
|
|
LPFltAlpha = 2.19427;
|
|
LPFltBaseLen = 7.66;
|
|
LPFltCutoffMult = 0.75380;
|
|
}
|
|
};
|
|
|
|
/**
|
|
* @brief Set of resizing algorithm parameters for low-aliasing
|
|
* resizing (11.81/1.012/0.038379).
|
|
*
|
|
* This set of resizing algorithm parameters offers a considerable
|
|
* anti-aliasing performance with a good frequency response linearity (and
|
|
* contrast). This is an intermediate setting between the default and Ultra
|
|
* parameters.
|
|
*/
|
|
|
|
struct CImageResizerParamsHigh : public CImageResizerParams {
|
|
CImageResizerParamsHigh() {
|
|
CorrFltAlpha = 1.0; // 11.81/1.83/1.012(307.84)/0.038379:258660,452719
|
|
CorrFltLen = 6.80909;
|
|
IntFltAlpha = 2.44917;
|
|
IntFltCutoff = 0.75856;
|
|
IntFltLen = 18.0;
|
|
LPFltAlpha = 4.39527;
|
|
LPFltBaseLen = 8.18;
|
|
LPFltCutoffMult = 0.79172;
|
|
}
|
|
};
|
|
|
|
/**
|
|
* @brief Set of resizing algorithm parameters for ultra low-aliasing
|
|
* resizing (13.65/1.001/0.000483).
|
|
*
|
|
* This set of resizing algorithm parameters offers a very considerable
|
|
* anti-aliasing performance with a good frequency response linearity (and
|
|
* contrast). This set of parameters is computationally expensive and may
|
|
* produce ringing artifacts on sharp features.
|
|
*/
|
|
|
|
struct CImageResizerParamsUltra : public CImageResizerParams {
|
|
CImageResizerParamsUltra() {
|
|
CorrFltAlpha = 1.0; // 13.65/1.79/1.001(28288.41)/0.000483:258658,457974
|
|
CorrFltLen = 7.48060;
|
|
IntFltAlpha = 1.93750;
|
|
IntFltCutoff = 0.75462;
|
|
IntFltLen = 18.0;
|
|
LPFltAlpha = 5.55209;
|
|
LPFltBaseLen = 8.34;
|
|
LPFltCutoffMult = 0.78002;
|
|
}
|
|
};
|
|
|
|
/**
|
|
* @brief Image resizing variables class.
|
|
*
|
|
* This is an utility "catch all" class that defines various variables used
|
|
* during image resizing. Several variables that are explicitly initialized in
|
|
* this class' constructor are also used as additional "input" variables to
|
|
* the image resizing function. These variables will not be changed by the
|
|
* avir::CImageResizer<>::resizeImage() function.
|
|
*/
|
|
|
|
class CImageResizerVars {
|
|
public:
|
|
int ElCount; ///< The number of "fptype" elements used to store 1 pixel.
|
|
///<
|
|
int ElCountIO; ///< The number of source and destination image's elements
|
|
///< used to store 1 pixel.
|
|
///<
|
|
int fppack; ///< The number of atomic types stored in a single "fptype"
|
|
///< element.
|
|
///<
|
|
int fpalign; ///< Suggested alignment size in bytes. This is not a
|
|
///< required alignment, because image resizing algorithm cannot
|
|
///< be made to have a strictly aligned data access in all cases
|
|
///< (e.g. de-interleaved interpolation cannot perform aligned
|
|
///< accesses).
|
|
///<
|
|
int elalign; ///< Length alignment of arrays of elements. This applies to
|
|
///< filters and intermediate buffers: this constant forces
|
|
///< filters and scanlines to have a length which is a multiple
|
|
///< of this value, for more efficient SIMD implementation.
|
|
///<
|
|
int packmode; ///< 0 if interleaved packing, 1 if de-interleaved.
|
|
///<
|
|
int BufLen[2]; ///< Intermediate buffers' lengths in "fptype" elements.
|
|
int BufOffs[2]; ///< Offsets into the intermediate buffers, used to
|
|
///< provide prefix elements required during processing so
|
|
///< that no "out of range" access happens. This offset is a
|
|
///< multiple of ElCount if pixels are stored in interleaved
|
|
///< form.
|
|
///<
|
|
double k; ///< Resizing step coefficient, updated to reflect the actually
|
|
///< used coefficient during resizing.
|
|
///<
|
|
double o; ///< Starting pixel offset inside the source image, updated to
|
|
///< reflect the actually used offset during resizing.
|
|
///<
|
|
int ResizeStep; ///< Index of the resizing step in the latest filtering
|
|
///< steps array.
|
|
///<
|
|
double InGammaMult; ///< Input gamma multiplier, used to convert input
|
|
///< data to 0 to 1 range. 0.0 if no gamma is in use.
|
|
///<
|
|
double OutGammaMult; ///< Output gamma multiplier, used to convert data to
|
|
///< 0 to 255/65535 range. 0.0 if no gamma is in use.
|
|
///<
|
|
|
|
double ox; ///< Start X pixel offset within source image (can be
|
|
///< negative). Positive offset moves image to the left.
|
|
///<
|
|
double oy; ///< Start Y pixel offset within source image (can be
|
|
///< negative). Positive offset moves image to the top.
|
|
///<
|
|
CImageResizerThreadPool*
|
|
ThreadPool; ///< Thread pool to be used by the
|
|
///< image resizing function. Set to NULL to use
|
|
///< single-threaded processing.
|
|
///<
|
|
bool UseSRGBGamma; ///< Perform sRGB gamma linearization (correction).
|
|
///<
|
|
int BuildMode; ///< The build mode to use, for debugging purposes. Set to
|
|
///< -1 to select a minimal-complexity mode automatically. All
|
|
///< build modes deliver similar results with minor
|
|
///< deviations.
|
|
///<
|
|
int RndSeed; ///< Random seed parameter. This parameter may be incremented
|
|
///< after each random generator initialization. The use of this
|
|
///< variable depends on the ditherer implementation.
|
|
///<
|
|
|
|
CImageResizerVars()
|
|
: ox(0.0),
|
|
oy(0.0),
|
|
ThreadPool(NULL),
|
|
UseSRGBGamma(false),
|
|
BuildMode(-1),
|
|
RndSeed(0) {}
|
|
};
|
|
|
|
/**
|
|
* @brief Image resizer's filtering step class.
|
|
*
|
|
* Class defines data to perform a single filtering step over a whole
|
|
* horizontal or vertical scanline. Resizing consists of 1 or more steps that
|
|
* may be performed before the actual resizing takes place. Filtering may also
|
|
* follow a resizing step. Each step must ensure that scanline data contains
|
|
* enough pixels to perform the next step (which may be resizing) without
|
|
* exceeding scanline's bounds.
|
|
*
|
|
* A derived class must implement several "const" and "static" functions that
|
|
* are used to perform the actual filtering in interleaved or de-interleaved
|
|
* mode.
|
|
*
|
|
* @tparam fptype Floating point type to use for storing pixel elements. SIMD
|
|
* types can be used: in this case each element may hold a whole pixel.
|
|
* @tparam fptypeatom The atomic type the "fptype" consists of.
|
|
*/
|
|
|
|
template <class fptype, class fptypeatom>
|
|
class CImageResizerFilterStep {
|
|
public:
|
|
bool IsUpsample; ///< "True" if this step is an upsampling step, "false"
|
|
///< if downsampling step. Should be set to "false" if
|
|
///< ResampleFactor equals 0.
|
|
///<
|
|
int ResampleFactor; ///< Resample factor (>=1). If 0, this is a resizing
|
|
///< step. This value should be >1 if IsUpsample equals
|
|
///< "true".
|
|
///<
|
|
CBuffer<fptype> Flt; ///< Filter to use at this step.
|
|
///<
|
|
CFltBuffer FltOrig; ///< Originally-designed filter. This buffer may not
|
|
///< be assigned. Assigned by filters that precede the
|
|
///< resizing step if such filter is planned to be
|
|
///< embedded into the interpolation filter as "external"
|
|
///< filter. If IsUpsample=true and this filter buffer is
|
|
///< not empty, the upsampling step will not itself apply
|
|
///< any filtering over upsampled input scanline.
|
|
///<
|
|
double DCGain; ///< DC gain which was applied to the filter. Not defined
|
|
///< if ResampleFactor = 0.
|
|
///<
|
|
int FltLatency; ///< Filter's latency (group delay, shift) in pixels.
|
|
///<
|
|
const CImageResizerVars* Vars; ///< Image resizing-related variables.
|
|
///<
|
|
int InLen; ///< Input scanline's length in pixels.
|
|
///<
|
|
int InBuf; ///< Input buffer index, 0 or 1.
|
|
///<
|
|
int InPrefix; ///< Required input prefix pixels. These prefix pixels will
|
|
///< be filled with source scanline's first pixel value. If
|
|
///< IsUpsample is "true", this is the additional number of
|
|
///< times the first pixel will be filtered before processing
|
|
///< scanline, this number is also reflected in the OutPrefix.
|
|
///<
|
|
int InSuffix; ///< Required input suffix pixels. These suffix pixels will
|
|
///< be filled with source scanline's last pixel value. If
|
|
///< IsUpsample is "true", this is the additional number of
|
|
///< times the last pixel will be filtered before processing
|
|
///< scanline, this number is also reflected in the OutSuffix.
|
|
///<
|
|
int InElIncr; ///< Pixel element increment within the input buffer, used
|
|
///< during de-interleaved processing: in this case each
|
|
///< image's channel is stored independently, InElIncr elements
|
|
///< apart.
|
|
///<
|
|
int OutLen; ///< Length of the resulting scanline.
|
|
///<
|
|
int OutBuf; ///< Output buffer index. 0 or 1; 2 for the last step.
|
|
///<
|
|
int OutPrefix; ///< Required output prefix pixels. These prefix pixels
|
|
///< will not be pre-filled with any values. Value is valid
|
|
///< only if IsUpsample equals "true".
|
|
///<
|
|
int OutSuffix; ///< Required input suffix pixels. These suffix pixels will
|
|
///< not be pre-filled with any values. Value is valid only if
|
|
///< IsUpsample equals "true".
|
|
///<
|
|
int OutElIncr; ///< Pixel element increment within the output buffer, used
|
|
///< during de-interleaved processing. Equals to the
|
|
///< InBufElIncr of the next step.
|
|
///<
|
|
CBuffer<fptype> PrefixDC; ///< DC component fluctuations added at the
|
|
///< start of the resulting scanline, used when
|
|
///< IsUpsample equals "true".
|
|
///<
|
|
CBuffer<fptype> SuffixDC; ///< DC component fluctuations added at the
|
|
///< end of the resulting scanline, used when
|
|
///< IsUpsample equals "true".
|
|
///<
|
|
int EdgePixelCount; ///< The number of edge pixels added. Affects the
|
|
///< initial position within the input scanline, used to
|
|
///< produce edge pixels. This variable is used and
|
|
///< should be defined when IsUpsample=false and
|
|
///< ResampleFactor>0. When assigning this variable it is
|
|
///< also necessary to update InPrefix, OutLen and Vars.o
|
|
///< variables.
|
|
///<
|
|
static const int EdgePixelCountDef =
|
|
3; ///< The default number of pixels
|
|
///< additionally produced at scanline edges during filtering. This is
|
|
///< required to reduce edge artifacts.
|
|
///<
|
|
|
|
/**
|
|
* @brief Resizing position structure.
|
|
*
|
|
* Structure holds resizing position and pointer to fractional delay
|
|
* filter.
|
|
*/
|
|
|
|
struct CResizePos {
|
|
int SrcPosInt; ///< Source scanline position.
|
|
///<
|
|
int fti; ///< Fractional delay filter index.
|
|
///<
|
|
const fptype* ftp; ///< Fractional delay filter pointer.
|
|
///<
|
|
fptypeatom x; ///< Interpolation coefficient between delay filters.
|
|
///<
|
|
int SrcOffs; ///< Source scanline offset.
|
|
///<
|
|
};
|
|
|
|
/**
|
|
* @brief Resizing positions buffer class.
|
|
*
|
|
* This class combines buffer together with variables that define resizing
|
|
* stepping.
|
|
*/
|
|
|
|
class CRPosBuf : public CBuffer<CResizePos> {
|
|
public:
|
|
double k; ///< Resizing step.
|
|
///<
|
|
double o; ///< Resizing offset.
|
|
///<
|
|
int FracCount; ///< The number of fractional delay filters in a filter
|
|
///< bank used together with this buffer.
|
|
///<
|
|
};
|
|
|
|
/**
|
|
* @brief Resizing positions buffer array class.
|
|
*
|
|
* This class combines structure array of the CRPosBuf class objects with
|
|
* the function that locates or creates buffer with the required resizing
|
|
* stepping.
|
|
*/
|
|
|
|
class CRPosBufArray : public CStructArray<CRPosBuf> {
|
|
public:
|
|
using CStructArray<CRPosBuf>::add;
|
|
using CStructArray<CRPosBuf>::getItemCount;
|
|
|
|
/**
|
|
* Function returns the resizing positions buffer with the required
|
|
* stepping. If no such buffer exists, it is created.
|
|
*
|
|
* @param k Resizing step.
|
|
* @param o Resizing offset.
|
|
* @param FracCount The number of fractional delay filters in a filter
|
|
* bank used together with this buffer.
|
|
* @return Reference to the CRPosBuf object.
|
|
*/
|
|
|
|
CRPosBuf& getRPosBuf(const double k, const double o, const int FracCount) {
|
|
int i;
|
|
|
|
for (i = 0; i < getItemCount(); i++) {
|
|
CRPosBuf& Buf = (*this)[i];
|
|
|
|
if (Buf.k == k && Buf.o == o && Buf.FracCount == FracCount) {
|
|
return (Buf);
|
|
}
|
|
}
|
|
|
|
CRPosBuf& NewBuf = add();
|
|
NewBuf.k = k;
|
|
NewBuf.o = o;
|
|
NewBuf.FracCount = FracCount;
|
|
|
|
return (NewBuf);
|
|
}
|
|
};
|
|
|
|
CRPosBuf* RPosBuf; ///< Resizing positions buffer. Used when
|
|
///< ResampleFactor equals 0 (resizing step).
|
|
///<
|
|
CDSPFracFilterBankLin<fptype>* FltBank; ///< Filter bank in use by *this
|
|
///< resizing step.
|
|
///<
|
|
};
|
|
|
|
/**
|
|
* @brief Interleaved filtering steps implementation class.
|
|
*
|
|
* This class implements scanline filtering functions in interleaved mode.
|
|
* This means that each pixel is processed independently, not in groups.
|
|
*
|
|
* @tparam fptype Floating point type to use for storing pixel elements. SIMD
|
|
* types can be used: in this case each element may hold a whole pixel.
|
|
* @tparam fptypeatom The atomic type the "fptype" consists of.
|
|
*/
|
|
|
|
template <class fptype, class fptypeatom>
|
|
class CImageResizerFilterStepINL
|
|
: public CImageResizerFilterStep<fptype, fptypeatom> {
|
|
public:
|
|
using CImageResizerFilterStep<fptype, fptypeatom>::IsUpsample;
|
|
using CImageResizerFilterStep<fptype, fptypeatom>::ResampleFactor;
|
|
using CImageResizerFilterStep<fptype, fptypeatom>::Flt;
|
|
using CImageResizerFilterStep<fptype, fptypeatom>::FltOrig;
|
|
using CImageResizerFilterStep<fptype, fptypeatom>::FltLatency;
|
|
using CImageResizerFilterStep<fptype, fptypeatom>::Vars;
|
|
using CImageResizerFilterStep<fptype, fptypeatom>::InLen;
|
|
using CImageResizerFilterStep<fptype, fptypeatom>::InPrefix;
|
|
using CImageResizerFilterStep<fptype, fptypeatom>::InSuffix;
|
|
using CImageResizerFilterStep<fptype, fptypeatom>::OutLen;
|
|
using CImageResizerFilterStep<fptype, fptypeatom>::OutPrefix;
|
|
using CImageResizerFilterStep<fptype, fptypeatom>::OutSuffix;
|
|
using CImageResizerFilterStep<fptype, fptypeatom>::PrefixDC;
|
|
using CImageResizerFilterStep<fptype, fptypeatom>::SuffixDC;
|
|
using CImageResizerFilterStep<fptype, fptypeatom>::RPosBuf;
|
|
using CImageResizerFilterStep<fptype, fptypeatom>::FltBank;
|
|
using CImageResizerFilterStep<fptype, fptypeatom>::EdgePixelCount;
|
|
|
|
/**
|
|
* Function performs "packing" of a scanline and type conversion.
|
|
* Scanline, depending on the "fptype" can be potentially stored as a
|
|
* packed SIMD values having a certain atomic type. If required, the sRGB
|
|
* gamma correction is applied.
|
|
*
|
|
* @param ip Input scanline.
|
|
* @param op0 Output scanline.
|
|
* @param l0 The number of pixels to "pack".
|
|
*/
|
|
|
|
template <class Tin>
|
|
void packScanline(const Tin* ip, fptype* const op0, const int l0) const {
|
|
const int ElCount = Vars->ElCount;
|
|
const int ElCountIO = Vars->ElCountIO;
|
|
fptype* op = op0;
|
|
int l = l0;
|
|
|
|
if (!Vars->UseSRGBGamma) {
|
|
if (ElCountIO == 1) {
|
|
while (l > 0) {
|
|
fptypeatom* v = (fptypeatom*)op;
|
|
v[0] = (fptypeatom)ip[0];
|
|
op += ElCount;
|
|
ip++;
|
|
l--;
|
|
}
|
|
} else if (ElCountIO == 4) {
|
|
while (l > 0) {
|
|
fptypeatom* v = (fptypeatom*)op;
|
|
v[0] = (fptypeatom)ip[0];
|
|
v[1] = (fptypeatom)ip[1];
|
|
v[2] = (fptypeatom)ip[2];
|
|
v[3] = (fptypeatom)ip[3];
|
|
op += ElCount;
|
|
ip += 4;
|
|
l--;
|
|
}
|
|
} else if (ElCountIO == 3) {
|
|
while (l > 0) {
|
|
fptypeatom* v = (fptypeatom*)op;
|
|
v[0] = (fptypeatom)ip[0];
|
|
v[1] = (fptypeatom)ip[1];
|
|
v[2] = (fptypeatom)ip[2];
|
|
op += ElCount;
|
|
ip += 3;
|
|
l--;
|
|
}
|
|
} else if (ElCountIO == 2) {
|
|
while (l > 0) {
|
|
fptypeatom* v = (fptypeatom*)op;
|
|
v[0] = (fptypeatom)ip[0];
|
|
v[1] = (fptypeatom)ip[1];
|
|
op += ElCount;
|
|
ip += 2;
|
|
l--;
|
|
}
|
|
}
|
|
} else {
|
|
const fptypeatom gm = (fptypeatom)Vars->InGammaMult;
|
|
|
|
if (ElCountIO == 1) {
|
|
while (l > 0) {
|
|
fptypeatom* v = (fptypeatom*)op;
|
|
v[0] = convertSRGB2Lin((fptypeatom)ip[0] * gm);
|
|
op += ElCount;
|
|
ip++;
|
|
l--;
|
|
}
|
|
} else if (ElCountIO == 4) {
|
|
while (l > 0) {
|
|
fptypeatom* v = (fptypeatom*)op;
|
|
v[0] = convertSRGB2Lin((fptypeatom)ip[0] * gm);
|
|
v[1] = convertSRGB2Lin((fptypeatom)ip[1] * gm);
|
|
v[2] = convertSRGB2Lin((fptypeatom)ip[2] * gm);
|
|
v[3] = convertSRGB2Lin((fptypeatom)ip[3] * gm);
|
|
op += ElCount;
|
|
ip += 4;
|
|
l--;
|
|
}
|
|
} else if (ElCountIO == 3) {
|
|
while (l > 0) {
|
|
fptypeatom* v = (fptypeatom*)op;
|
|
v[0] = convertSRGB2Lin((fptypeatom)ip[0] * gm);
|
|
v[1] = convertSRGB2Lin((fptypeatom)ip[1] * gm);
|
|
v[2] = convertSRGB2Lin((fptypeatom)ip[2] * gm);
|
|
op += ElCount;
|
|
ip += 3;
|
|
l--;
|
|
}
|
|
} else if (ElCountIO == 2) {
|
|
while (l > 0) {
|
|
fptypeatom* v = (fptypeatom*)op;
|
|
v[0] = convertSRGB2Lin((fptypeatom)ip[0] * gm);
|
|
v[1] = convertSRGB2Lin((fptypeatom)ip[1] * gm);
|
|
op += ElCount;
|
|
ip += 2;
|
|
l--;
|
|
}
|
|
}
|
|
}
|
|
|
|
const int ZeroCount = ElCount * Vars->fppack - ElCountIO;
|
|
op = op0;
|
|
l = l0;
|
|
|
|
if (ZeroCount == 1) {
|
|
while (l > 0) {
|
|
fptypeatom* v = (fptypeatom*)op + ElCountIO;
|
|
v[0] = (fptypeatom)0;
|
|
op += ElCount;
|
|
l--;
|
|
}
|
|
} else if (ZeroCount == 2) {
|
|
while (l > 0) {
|
|
fptypeatom* v = (fptypeatom*)op + ElCountIO;
|
|
v[0] = (fptypeatom)0;
|
|
v[1] = (fptypeatom)0;
|
|
op += ElCount;
|
|
l--;
|
|
}
|
|
} else if (ZeroCount == 3) {
|
|
while (l > 0) {
|
|
fptypeatom* v = (fptypeatom*)op + ElCountIO;
|
|
v[0] = (fptypeatom)0;
|
|
v[1] = (fptypeatom)0;
|
|
v[2] = (fptypeatom)0;
|
|
op += ElCount;
|
|
l--;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function applies Linear to sRGB gamma correction to the specified
|
|
* scanline.
|
|
*
|
|
* @param p Scanline.
|
|
* @param l The number of pixels to de-linearize.
|
|
* @param Vars0 Image resizing-related variables.
|
|
*/
|
|
|
|
static void applySRGBGamma(fptype* p, int l, const CImageResizerVars& Vars0) {
|
|
const int ElCount = Vars0.ElCount;
|
|
const int ElCountIO = Vars0.ElCountIO;
|
|
const fptypeatom gm = (fptypeatom)Vars0.OutGammaMult;
|
|
|
|
if (ElCountIO == 1) {
|
|
while (l > 0) {
|
|
fptypeatom* v = (fptypeatom*)p;
|
|
v[0] = convertLin2SRGB(v[0]) * gm;
|
|
p += ElCount;
|
|
l--;
|
|
}
|
|
} else if (ElCountIO == 4) {
|
|
while (l > 0) {
|
|
fptypeatom* v = (fptypeatom*)p;
|
|
v[0] = convertLin2SRGB(v[0]) * gm;
|
|
v[1] = convertLin2SRGB(v[1]) * gm;
|
|
v[2] = convertLin2SRGB(v[2]) * gm;
|
|
v[3] = convertLin2SRGB(v[3]) * gm;
|
|
p += ElCount;
|
|
l--;
|
|
}
|
|
} else if (ElCountIO == 3) {
|
|
while (l > 0) {
|
|
fptypeatom* v = (fptypeatom*)p;
|
|
v[0] = convertLin2SRGB(v[0]) * gm;
|
|
v[1] = convertLin2SRGB(v[1]) * gm;
|
|
v[2] = convertLin2SRGB(v[2]) * gm;
|
|
p += ElCount;
|
|
l--;
|
|
}
|
|
} else if (ElCountIO == 2) {
|
|
while (l > 0) {
|
|
fptypeatom* v = (fptypeatom*)p;
|
|
v[0] = convertLin2SRGB(v[0]) * gm;
|
|
v[1] = convertLin2SRGB(v[1]) * gm;
|
|
p += ElCount;
|
|
l--;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function converts vertical scanline to horizontal scanline. This
|
|
* function is called by the image resizer when image is resized
|
|
* vertically. This means that the vertical scanline is stored in the
|
|
* same format produced by the packScanline() and maintained by other
|
|
* filtering functions.
|
|
*
|
|
* @param ip Input vertical scanline.
|
|
* @param op Output buffer (temporary buffer used during resizing).
|
|
* @param SrcLen The number of pixels in the input scanline, also used to
|
|
* calculate input buffer increment.
|
|
* @param SrcIncr Input buffer increment to the next vertical pixel.
|
|
*/
|
|
|
|
void convertVtoH(const fptype* ip, fptype* op, const int SrcLen,
|
|
const int SrcIncr) const {
|
|
const int ElCount = Vars->ElCount;
|
|
int j;
|
|
|
|
if (ElCount == 1) {
|
|
for (j = 0; j < SrcLen; j++) {
|
|
op[0] = ip[0];
|
|
ip += SrcIncr;
|
|
op++;
|
|
}
|
|
} else if (ElCount == 4) {
|
|
for (j = 0; j < SrcLen; j++) {
|
|
op[0] = ip[0];
|
|
op[1] = ip[1];
|
|
op[2] = ip[2];
|
|
op[3] = ip[3];
|
|
ip += SrcIncr;
|
|
op += 4;
|
|
}
|
|
} else if (ElCount == 3) {
|
|
for (j = 0; j < SrcLen; j++) {
|
|
op[0] = ip[0];
|
|
op[1] = ip[1];
|
|
op[2] = ip[2];
|
|
ip += SrcIncr;
|
|
op += 3;
|
|
}
|
|
} else if (ElCount == 2) {
|
|
for (j = 0; j < SrcLen; j++) {
|
|
op[0] = ip[0];
|
|
op[1] = ip[1];
|
|
ip += SrcIncr;
|
|
op += 2;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function performs "unpacking" of a scanline and type conversion
|
|
* (truncation is used when floating point is converted to integer).
|
|
* Scanline, depending on the "fptype" can be potentially stored as a
|
|
* packed SIMD values having a certain atomic type. The unpacking function
|
|
* assumes that scanline is stored in the style produced by the
|
|
* packScanline() function.
|
|
*
|
|
* @param ip Input scanline.
|
|
* @param op Output scanline.
|
|
* @param l The number of pixels to "unpack".
|
|
* @param Vars0 Image resizing-related variables.
|
|
*/
|
|
|
|
template <class Tout>
|
|
static void unpackScanline(const fptype* ip, Tout* op, int l,
|
|
const CImageResizerVars& Vars0) {
|
|
const int ElCount = Vars0.ElCount;
|
|
const int ElCountIO = Vars0.ElCountIO;
|
|
|
|
if (ElCountIO == 1) {
|
|
while (l > 0) {
|
|
const fptypeatom* v = (const fptypeatom*)ip;
|
|
op[0] = (Tout)v[0];
|
|
ip += ElCount;
|
|
op++;
|
|
l--;
|
|
}
|
|
} else if (ElCountIO == 4) {
|
|
while (l > 0) {
|
|
const fptypeatom* v = (const fptypeatom*)ip;
|
|
op[0] = (Tout)v[0];
|
|
op[1] = (Tout)v[1];
|
|
op[2] = (Tout)v[2];
|
|
op[3] = (Tout)v[3];
|
|
ip += ElCount;
|
|
op += 4;
|
|
l--;
|
|
}
|
|
} else if (ElCountIO == 3) {
|
|
while (l > 0) {
|
|
const fptypeatom* v = (const fptypeatom*)ip;
|
|
op[0] = (Tout)v[0];
|
|
op[1] = (Tout)v[1];
|
|
op[2] = (Tout)v[2];
|
|
ip += ElCount;
|
|
op += 3;
|
|
l--;
|
|
}
|
|
} else if (ElCountIO == 2) {
|
|
while (l > 0) {
|
|
const fptypeatom* v = (const fptypeatom*)ip;
|
|
op[0] = (Tout)v[0];
|
|
op[1] = (Tout)v[1];
|
|
ip += ElCount;
|
|
op += 2;
|
|
l--;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function prepares input scanline buffer for *this filtering step.
|
|
* Left- and right-most pixels are replicated to make sure no buffer
|
|
* overrun happens. Such approach also allows to bypass any pointer
|
|
* range checks.
|
|
*
|
|
* @param Src Source buffer.
|
|
*/
|
|
|
|
void prepareInBuf(fptype* Src) const {
|
|
if (IsUpsample || InPrefix + InSuffix == 0) {
|
|
return;
|
|
}
|
|
|
|
const int ElCount = Vars->ElCount;
|
|
replicateArray(Src, ElCount, Src - ElCount, InPrefix, -ElCount);
|
|
|
|
Src += (InLen - 1) * ElCount;
|
|
replicateArray(Src, ElCount, Src + ElCount, InSuffix, ElCount);
|
|
}
|
|
|
|
/**
|
|
* Function peforms scanline upsampling with filtering.
|
|
*
|
|
* @param Src Source scanline buffer (length = this -> InLen). Source
|
|
* scanline increment will be equal to ElCount.
|
|
* @param Dst Destination scanline buffer.
|
|
*/
|
|
|
|
void doUpsample(const fptype* const Src, fptype* const Dst) const {
|
|
const int ElCount = Vars->ElCount;
|
|
fptype* op0 = &Dst[-OutPrefix * ElCount];
|
|
memset(&op0->value, 0, (OutPrefix + OutLen + OutSuffix) * ElCount * sizeof(op0->value));
|
|
|
|
const fptype* ip = Src;
|
|
const int opstep = ElCount * ResampleFactor;
|
|
int l;
|
|
|
|
if (FltOrig.getCapacity() > 0) {
|
|
// Do not perform filtering, only upsample.
|
|
|
|
op0 += (OutPrefix % ResampleFactor) * ElCount;
|
|
l = OutPrefix / ResampleFactor;
|
|
|
|
if (ElCount == 1) {
|
|
while (l > 0) {
|
|
op0[0] = ip[0];
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
|
|
l = InLen - 1;
|
|
|
|
while (l > 0) {
|
|
op0[0] = ip[0];
|
|
op0 += opstep;
|
|
ip += ElCount;
|
|
l--;
|
|
}
|
|
|
|
l = OutSuffix / ResampleFactor;
|
|
|
|
while (l >= 0) {
|
|
op0[0] = ip[0];
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
} else if (ElCount == 4) {
|
|
while (l > 0) {
|
|
op0[0] = ip[0];
|
|
op0[1] = ip[1];
|
|
op0[2] = ip[2];
|
|
op0[3] = ip[3];
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
|
|
l = InLen - 1;
|
|
|
|
while (l > 0) {
|
|
op0[0] = ip[0];
|
|
op0[1] = ip[1];
|
|
op0[2] = ip[2];
|
|
op0[3] = ip[3];
|
|
op0 += opstep;
|
|
ip += ElCount;
|
|
l--;
|
|
}
|
|
|
|
l = OutSuffix / ResampleFactor;
|
|
|
|
while (l >= 0) {
|
|
op0[0] = ip[0];
|
|
op0[1] = ip[1];
|
|
op0[2] = ip[2];
|
|
op0[3] = ip[3];
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
} else if (ElCount == 3) {
|
|
while (l > 0) {
|
|
op0[0] = ip[0];
|
|
op0[1] = ip[1];
|
|
op0[2] = ip[2];
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
|
|
l = InLen - 1;
|
|
|
|
while (l > 0) {
|
|
op0[0] = ip[0];
|
|
op0[1] = ip[1];
|
|
op0[2] = ip[2];
|
|
op0 += opstep;
|
|
ip += ElCount;
|
|
l--;
|
|
}
|
|
|
|
l = OutSuffix / ResampleFactor;
|
|
|
|
while (l >= 0) {
|
|
op0[0] = ip[0];
|
|
op0[1] = ip[1];
|
|
op0[2] = ip[2];
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
} else if (ElCount == 2) {
|
|
while (l > 0) {
|
|
op0[0] = ip[0];
|
|
op0[1] = ip[1];
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
|
|
l = InLen - 1;
|
|
|
|
while (l > 0) {
|
|
op0[0] = ip[0];
|
|
op0[1] = ip[1];
|
|
op0 += opstep;
|
|
ip += ElCount;
|
|
l--;
|
|
}
|
|
|
|
l = OutSuffix / ResampleFactor;
|
|
|
|
while (l >= 0) {
|
|
op0[0] = ip[0];
|
|
op0[1] = ip[1];
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
const fptype* const f = Flt;
|
|
const int flen = Flt.getCapacity();
|
|
fptype* op;
|
|
int i;
|
|
|
|
if (ElCount == 1) {
|
|
l = InPrefix;
|
|
|
|
while (l > 0) {
|
|
op = op0;
|
|
|
|
for (i = 0; i < flen; i++) {
|
|
op[i] += f[i] * ip[0];
|
|
}
|
|
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
|
|
l = InLen - 1;
|
|
|
|
while (l > 0) {
|
|
op = op0;
|
|
|
|
for (i = 0; i < flen; i++) {
|
|
op[i] += f[i] * ip[0];
|
|
}
|
|
|
|
ip += ElCount;
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
|
|
l = InSuffix;
|
|
|
|
while (l >= 0) {
|
|
op = op0;
|
|
|
|
for (i = 0; i < flen; i++) {
|
|
op[i] += f[i] * ip[0];
|
|
}
|
|
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
} else if (ElCount == 4) {
|
|
l = InPrefix;
|
|
|
|
while (l > 0) {
|
|
op = op0;
|
|
|
|
for (i = 0; i < flen; i++) {
|
|
op[0] += f[i] * ip[0];
|
|
op[1] += f[i] * ip[1];
|
|
op[2] += f[i] * ip[2];
|
|
op[3] += f[i] * ip[3];
|
|
op += 4;
|
|
}
|
|
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
|
|
l = InLen - 1;
|
|
|
|
while (l > 0) {
|
|
op = op0;
|
|
|
|
for (i = 0; i < flen; i++) {
|
|
op[0] += f[i] * ip[0];
|
|
op[1] += f[i] * ip[1];
|
|
op[2] += f[i] * ip[2];
|
|
op[3] += f[i] * ip[3];
|
|
op += 4;
|
|
}
|
|
|
|
ip += ElCount;
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
|
|
l = InSuffix;
|
|
|
|
while (l >= 0) {
|
|
op = op0;
|
|
|
|
for (i = 0; i < flen; i++) {
|
|
op[0] += f[i] * ip[0];
|
|
op[1] += f[i] * ip[1];
|
|
op[2] += f[i] * ip[2];
|
|
op[3] += f[i] * ip[3];
|
|
op += 4;
|
|
}
|
|
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
} else if (ElCount == 3) {
|
|
l = InPrefix;
|
|
|
|
while (l > 0) {
|
|
op = op0;
|
|
|
|
for (i = 0; i < flen; i++) {
|
|
op[0] += f[i] * ip[0];
|
|
op[1] += f[i] * ip[1];
|
|
op[2] += f[i] * ip[2];
|
|
op += 3;
|
|
}
|
|
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
|
|
l = InLen - 1;
|
|
|
|
while (l > 0) {
|
|
op = op0;
|
|
|
|
for (i = 0; i < flen; i++) {
|
|
op[0] += f[i] * ip[0];
|
|
op[1] += f[i] * ip[1];
|
|
op[2] += f[i] * ip[2];
|
|
op += 3;
|
|
}
|
|
|
|
ip += ElCount;
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
|
|
l = InSuffix;
|
|
|
|
while (l >= 0) {
|
|
op = op0;
|
|
|
|
for (i = 0; i < flen; i++) {
|
|
op[0] += f[i] * ip[0];
|
|
op[1] += f[i] * ip[1];
|
|
op[2] += f[i] * ip[2];
|
|
op += 3;
|
|
}
|
|
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
} else if (ElCount == 2) {
|
|
l = InPrefix;
|
|
|
|
while (l > 0) {
|
|
op = op0;
|
|
|
|
for (i = 0; i < flen; i++) {
|
|
op[0] += f[i] * ip[0];
|
|
op[1] += f[i] * ip[1];
|
|
op += 2;
|
|
}
|
|
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
|
|
l = InLen - 1;
|
|
|
|
while (l > 0) {
|
|
op = op0;
|
|
|
|
for (i = 0; i < flen; i++) {
|
|
op[0] += f[i] * ip[0];
|
|
op[1] += f[i] * ip[1];
|
|
op += 2;
|
|
}
|
|
|
|
ip += ElCount;
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
|
|
l = InSuffix;
|
|
|
|
while (l >= 0) {
|
|
op = op0;
|
|
|
|
for (i = 0; i < flen; i++) {
|
|
op[0] += f[i] * ip[0];
|
|
op[1] += f[i] * ip[1];
|
|
op += 2;
|
|
}
|
|
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
}
|
|
|
|
op = op0;
|
|
const fptype* dc = SuffixDC;
|
|
l = SuffixDC.getCapacity();
|
|
|
|
if (ElCount == 1) {
|
|
for (i = 0; i < l; i++) {
|
|
op[i] += ip[0] * dc[i];
|
|
}
|
|
} else if (ElCount == 4) {
|
|
while (l > 0) {
|
|
op[0] += ip[0] * dc[0];
|
|
op[1] += ip[1] * dc[0];
|
|
op[2] += ip[2] * dc[0];
|
|
op[3] += ip[3] * dc[0];
|
|
dc++;
|
|
op += 4;
|
|
l--;
|
|
}
|
|
} else if (ElCount == 3) {
|
|
while (l > 0) {
|
|
op[0] += ip[0] * dc[0];
|
|
op[1] += ip[1] * dc[0];
|
|
op[2] += ip[2] * dc[0];
|
|
dc++;
|
|
op += 3;
|
|
l--;
|
|
}
|
|
} else if (ElCount == 2) {
|
|
while (l > 0) {
|
|
op[0] += ip[0] * dc[0];
|
|
op[1] += ip[1] * dc[0];
|
|
dc++;
|
|
op += 2;
|
|
l--;
|
|
}
|
|
}
|
|
|
|
ip = Src;
|
|
op = Dst - InPrefix * opstep;
|
|
dc = PrefixDC;
|
|
l = PrefixDC.getCapacity();
|
|
|
|
if (ElCount == 1) {
|
|
for (i = 0; i < l; i++) {
|
|
op[i] += ip[0] * dc[i];
|
|
}
|
|
} else if (ElCount == 4) {
|
|
while (l > 0) {
|
|
op[0] += ip[0] * dc[0];
|
|
op[1] += ip[1] * dc[0];
|
|
op[2] += ip[2] * dc[0];
|
|
op[3] += ip[3] * dc[0];
|
|
dc++;
|
|
op += 4;
|
|
l--;
|
|
}
|
|
} else if (ElCount == 3) {
|
|
while (l > 0) {
|
|
op[0] += ip[0] * dc[0];
|
|
op[1] += ip[1] * dc[0];
|
|
op[2] += ip[2] * dc[0];
|
|
dc++;
|
|
op += 3;
|
|
l--;
|
|
}
|
|
} else if (ElCount == 2) {
|
|
while (l > 0) {
|
|
op[0] += ip[0] * dc[0];
|
|
op[1] += ip[1] * dc[0];
|
|
dc++;
|
|
op += 2;
|
|
l--;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function peforms scanline filtering with optional downsampling.
|
|
* Function makes use of the symmetry of the filter.
|
|
*
|
|
* @param Src Source scanline buffer (length = this -> InLen). Source
|
|
* scanline increment will be equal to ElCount.
|
|
* @param Dst Destination scanline buffer.
|
|
* @param DstIncr Destination scanline buffer increment, used for
|
|
* horizontal or vertical scanline stepping.
|
|
*/
|
|
|
|
void doFilter(const fptype* const Src, fptype* Dst, const int DstIncr) const {
|
|
const int ElCount = Vars->ElCount;
|
|
const fptype* const f = &Flt[FltLatency];
|
|
const int flen = FltLatency + 1;
|
|
const int ipstep = ElCount * ResampleFactor;
|
|
const fptype* ip = Src - EdgePixelCount * ipstep;
|
|
const fptype* ip1;
|
|
const fptype* ip2;
|
|
int l = OutLen;
|
|
int i;
|
|
|
|
if (ElCount == 1) {
|
|
while (l > 0) {
|
|
fptype s = f[0] * ip[0];
|
|
ip1 = ip;
|
|
ip2 = ip;
|
|
|
|
for (i = 1; i < flen; i++) {
|
|
ip1++;
|
|
ip2--;
|
|
s += f[i] * (ip1[0] + ip2[0]);
|
|
}
|
|
|
|
Dst[0] = s;
|
|
Dst += DstIncr;
|
|
ip += ipstep;
|
|
l--;
|
|
}
|
|
} else if (ElCount == 4) {
|
|
while (l > 0) {
|
|
fptype s1 = f[0] * ip[0];
|
|
fptype s2 = f[0] * ip[1];
|
|
fptype s3 = f[0] * ip[2];
|
|
fptype s4 = f[0] * ip[3];
|
|
ip1 = ip;
|
|
ip2 = ip;
|
|
|
|
for (i = 1; i < flen; i++) {
|
|
ip1 += 4;
|
|
ip2 -= 4;
|
|
s1 += f[i] * (ip1[0] + ip2[0]);
|
|
s2 += f[i] * (ip1[1] + ip2[1]);
|
|
s3 += f[i] * (ip1[2] + ip2[2]);
|
|
s4 += f[i] * (ip1[3] + ip2[3]);
|
|
}
|
|
|
|
Dst[0] = s1;
|
|
Dst[1] = s2;
|
|
Dst[2] = s3;
|
|
Dst[3] = s4;
|
|
Dst += DstIncr;
|
|
ip += ipstep;
|
|
l--;
|
|
}
|
|
} else if (ElCount == 3) {
|
|
while (l > 0) {
|
|
fptype s1 = f[0] * ip[0];
|
|
fptype s2 = f[0] * ip[1];
|
|
fptype s3 = f[0] * ip[2];
|
|
ip1 = ip;
|
|
ip2 = ip;
|
|
|
|
for (i = 1; i < flen; i++) {
|
|
ip1 += 3;
|
|
ip2 -= 3;
|
|
s1 += f[i] * (ip1[0] + ip2[0]);
|
|
s2 += f[i] * (ip1[1] + ip2[1]);
|
|
s3 += f[i] * (ip1[2] + ip2[2]);
|
|
}
|
|
|
|
Dst[0] = s1;
|
|
Dst[1] = s2;
|
|
Dst[2] = s3;
|
|
Dst += DstIncr;
|
|
ip += ipstep;
|
|
l--;
|
|
}
|
|
} else if (ElCount == 2) {
|
|
while (l > 0) {
|
|
fptype s1 = f[0] * ip[0];
|
|
fptype s2 = f[0] * ip[1];
|
|
ip1 = ip;
|
|
ip2 = ip;
|
|
|
|
for (i = 1; i < flen; i++) {
|
|
ip1 += 2;
|
|
ip2 -= 2;
|
|
s1 += f[i] * (ip1[0] + ip2[0]);
|
|
s2 += f[i] * (ip1[1] + ip2[1]);
|
|
}
|
|
|
|
Dst[0] = s1;
|
|
Dst[1] = s2;
|
|
Dst += DstIncr;
|
|
ip += ipstep;
|
|
l--;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function performs resizing of a single scanline. This function does
|
|
* not "know" about the length of the source scanline buffer. This buffer
|
|
* should be padded with enough pixels so that ( SrcPos - FilterLenD2 ) is
|
|
* always >= 0 and ( SrcPos + ( DstLineLen - 1 ) * k + FilterLenD2 + 1 )
|
|
* does not exceed source scanline's buffer length. SrcLine's increment is
|
|
* assumed to be equal to ElCount.
|
|
*
|
|
* @param SrcLine Source scanline buffer.
|
|
* @param DstLine Destination (resized) scanline buffer.
|
|
* @param DstLineIncr Destination scanline position increment, used for
|
|
* horizontal or vertical scanline stepping.
|
|
* @param xx Temporary buffer, of size FltBank -> getFilterLen(), must be
|
|
* aligned by fpclass :: fpalign.
|
|
*/
|
|
|
|
void doResize(const fptype* SrcLine, fptype* DstLine, const int DstLineIncr,
|
|
fptype* const) const {
|
|
const int IntFltLen = FltBank->getFilterLen();
|
|
const int ElCount = Vars->ElCount;
|
|
const typename CImageResizerFilterStep<fptype, fptypeatom>::CResizePos*
|
|
rpos = &(*RPosBuf)[0];
|
|
|
|
const typename CImageResizerFilterStep<
|
|
fptype, fptypeatom>::CResizePos* const rpose = rpos + OutLen;
|
|
|
|
#define AVIR_RESIZE_PART1 \
|
|
while (rpos < rpose) { \
|
|
const fptype x = (fptype)rpos->x; \
|
|
const fptype* const ftp = rpos->ftp; \
|
|
const fptype* const ftp2 = ftp + IntFltLen; \
|
|
const fptype* Src = SrcLine + rpos->SrcOffs; \
|
|
int i;
|
|
|
|
#define AVIR_RESIZE_PART1nx \
|
|
while (rpos < rpose) { \
|
|
const fptype* const ftp = rpos->ftp; \
|
|
const fptype* Src = SrcLine + rpos->SrcOffs; \
|
|
int i;
|
|
|
|
#define AVIR_RESIZE_PART2 \
|
|
DstLine += DstLineIncr; \
|
|
rpos++; \
|
|
}
|
|
|
|
if (FltBank->getOrder() == 1) {
|
|
if (ElCount == 1) {
|
|
AVIR_RESIZE_PART1
|
|
|
|
fptype sum = 0.0;
|
|
|
|
for (i = 0; i < IntFltLen; i++) {
|
|
sum += (ftp[i] + ftp2[i] * x) * Src[i];
|
|
}
|
|
|
|
DstLine[0] = sum;
|
|
|
|
AVIR_RESIZE_PART2
|
|
} else if (ElCount == 4) {
|
|
AVIR_RESIZE_PART1
|
|
|
|
fptype sum[4];
|
|
sum[0] = 0.0;
|
|
sum[1] = 0.0;
|
|
sum[2] = 0.0;
|
|
sum[3] = 0.0;
|
|
|
|
for (i = 0; i < IntFltLen; i++) {
|
|
const fptype xx = ftp[i] + ftp2[i] * x;
|
|
sum[0] += xx * Src[0];
|
|
sum[1] += xx * Src[1];
|
|
sum[2] += xx * Src[2];
|
|
sum[3] += xx * Src[3];
|
|
Src += 4;
|
|
}
|
|
|
|
DstLine[0] = sum[0];
|
|
DstLine[1] = sum[1];
|
|
DstLine[2] = sum[2];
|
|
DstLine[3] = sum[3];
|
|
|
|
AVIR_RESIZE_PART2
|
|
} else if (ElCount == 3) {
|
|
AVIR_RESIZE_PART1
|
|
|
|
fptype sum[3];
|
|
sum[0] = 0.0;
|
|
sum[1] = 0.0;
|
|
sum[2] = 0.0;
|
|
|
|
for (i = 0; i < IntFltLen; i++) {
|
|
const fptype xx = ftp[i] + ftp2[i] * x;
|
|
sum[0] += xx * Src[0];
|
|
sum[1] += xx * Src[1];
|
|
sum[2] += xx * Src[2];
|
|
Src += 3;
|
|
}
|
|
|
|
DstLine[0] = sum[0];
|
|
DstLine[1] = sum[1];
|
|
DstLine[2] = sum[2];
|
|
|
|
AVIR_RESIZE_PART2
|
|
} else if (ElCount == 2) {
|
|
AVIR_RESIZE_PART1
|
|
|
|
fptype sum[2];
|
|
sum[0] = 0.0;
|
|
sum[1] = 0.0;
|
|
|
|
for (i = 0; i < IntFltLen; i++) {
|
|
const fptype xx = ftp[i] + ftp2[i] * x;
|
|
sum[0] += xx * Src[0];
|
|
sum[1] += xx * Src[1];
|
|
Src += 2;
|
|
}
|
|
|
|
DstLine[0] = sum[0];
|
|
DstLine[1] = sum[1];
|
|
|
|
AVIR_RESIZE_PART2
|
|
}
|
|
} else {
|
|
if (ElCount == 1) {
|
|
AVIR_RESIZE_PART1nx
|
|
|
|
fptype sum = 0.0;
|
|
|
|
for (i = 0; i < IntFltLen; i++) {
|
|
sum += ftp[i] * Src[i];
|
|
}
|
|
|
|
DstLine[0] = sum;
|
|
|
|
AVIR_RESIZE_PART2
|
|
} else if (ElCount == 4) {
|
|
AVIR_RESIZE_PART1nx
|
|
|
|
fptype sum[4];
|
|
sum[0] = 0.0;
|
|
sum[1] = 0.0;
|
|
sum[2] = 0.0;
|
|
sum[3] = 0.0;
|
|
|
|
for (i = 0; i < IntFltLen; i++) {
|
|
const fptype xx = ftp[i];
|
|
sum[0] += xx * Src[0];
|
|
sum[1] += xx * Src[1];
|
|
sum[2] += xx * Src[2];
|
|
sum[3] += xx * Src[3];
|
|
Src += 4;
|
|
}
|
|
|
|
DstLine[0] = sum[0];
|
|
DstLine[1] = sum[1];
|
|
DstLine[2] = sum[2];
|
|
DstLine[3] = sum[3];
|
|
|
|
AVIR_RESIZE_PART2
|
|
} else if (ElCount == 3) {
|
|
AVIR_RESIZE_PART1nx
|
|
|
|
fptype sum[3];
|
|
sum[0] = 0.0;
|
|
sum[1] = 0.0;
|
|
sum[2] = 0.0;
|
|
|
|
for (i = 0; i < IntFltLen; i++) {
|
|
const fptype xx = ftp[i];
|
|
sum[0] += xx * Src[0];
|
|
sum[1] += xx * Src[1];
|
|
sum[2] += xx * Src[2];
|
|
Src += 3;
|
|
}
|
|
|
|
DstLine[0] = sum[0];
|
|
DstLine[1] = sum[1];
|
|
DstLine[2] = sum[2];
|
|
|
|
AVIR_RESIZE_PART2
|
|
} else if (ElCount == 2) {
|
|
AVIR_RESIZE_PART1nx
|
|
|
|
fptype sum[2];
|
|
sum[0] = 0.0;
|
|
sum[1] = 0.0;
|
|
|
|
for (i = 0; i < IntFltLen; i++) {
|
|
const fptype xx = ftp[i];
|
|
sum[0] += xx * Src[0];
|
|
sum[1] += xx * Src[1];
|
|
Src += 2;
|
|
}
|
|
|
|
DstLine[0] = sum[0];
|
|
DstLine[1] = sum[1];
|
|
|
|
AVIR_RESIZE_PART2
|
|
}
|
|
}
|
|
}
|
|
#undef AVIR_RESIZE_PART2
|
|
#undef AVIR_RESIZE_PART1nx
|
|
#undef AVIR_RESIZE_PART1
|
|
};
|
|
|
|
/**
|
|
* @brief Image resizer's default dithering class.
|
|
*
|
|
* This class defines an object that performs rounding, clipping and dithering
|
|
* operations over horizontal scanline pixels before scanline is stored in the
|
|
* output buffer.
|
|
*
|
|
* The ditherer should expect the same storage order of the pixels in a
|
|
* scanline as used in the "filtering step" class. So, a separate ditherer
|
|
* class should be defined for each scanline pixel storage style. The default
|
|
* ditherer implements a simple rounding without dithering: it can be used for
|
|
* an efficient dithering method which can be multi-threaded.
|
|
*
|
|
* @tparam fptype Floating point type to use for storing pixel data. SIMD
|
|
* types can be used.
|
|
*/
|
|
|
|
template <class fptype>
|
|
class CImageResizerDithererDefINL {
|
|
public:
|
|
/**
|
|
* Function initializes the ditherer object.
|
|
*
|
|
* @param aLen Scanline length in pixels to process.
|
|
* @param aVars Image resizing-related variables.
|
|
* @param aTrMul Bit-depth truncation multiplier. 1 - no additional
|
|
* truncation.
|
|
* @param aPkOut Peak output value allowed.
|
|
*/
|
|
|
|
void init(const int aLen, const CImageResizerVars& aVars, const double aTrMul,
|
|
const double aPkOut) {
|
|
Len = aLen;
|
|
Vars = &aVars;
|
|
LenE = aLen * Vars->ElCount;
|
|
TrMul0 = aTrMul;
|
|
PkOut0 = aPkOut;
|
|
}
|
|
|
|
/**
|
|
* @return "True" if dithering is recursive relative to scanlines meaning
|
|
* multi-threaded execution is not supported by this dithering method.
|
|
*/
|
|
|
|
static bool isRecursive() { return (false); }
|
|
|
|
/**
|
|
* Function performs rounding and clipping operations.
|
|
*
|
|
* @param ResScanline The buffer containing the final scanline.
|
|
*/
|
|
|
|
void dither(fptype* const ResScanline) const {
|
|
const fptype c0 = 0.0;
|
|
const fptype PkOut = (fptype)PkOut0;
|
|
int j;
|
|
|
|
if (TrMul0 == 1.0) {
|
|
// Optimization - do not perform bit depth truncation.
|
|
|
|
for (j = 0; j < LenE; j++) {
|
|
ResScanline[j] = clamp(round(ResScanline[j]), c0, PkOut);
|
|
}
|
|
} else {
|
|
const fptype TrMul = (fptype)TrMul0;
|
|
|
|
for (j = 0; j < LenE; j++) {
|
|
const fptype z0 = round(ResScanline[j] / TrMul) * TrMul;
|
|
ResScanline[j] = clamp(z0, c0, PkOut);
|
|
}
|
|
}
|
|
}
|
|
|
|
protected:
|
|
int Len; ///< Scanline's length in pixels.
|
|
///<
|
|
const CImageResizerVars* Vars; ///< Image resizing-related variables.
|
|
///<
|
|
int LenE; ///< = LenE * ElCount.
|
|
///<
|
|
double TrMul0; ///< Bit-depth truncation multiplier.
|
|
///<
|
|
double PkOut0; ///< Peak output value allowed.
|
|
///<
|
|
};
|
|
|
|
/**
|
|
* @brief Image resizer's error-diffusion dithering class, interleaved mode.
|
|
*
|
|
* This ditherer implements error-diffusion dithering which looks good, and
|
|
* whose results are compressed by PNG well. This implementation uses
|
|
* weighting coefficients obtained via machine optimization and visual
|
|
* evaluation.
|
|
*
|
|
* @tparam fptype Floating point type to use for storing pixel data. SIMD
|
|
* types can be used.
|
|
*/
|
|
|
|
template <class fptype>
|
|
class CImageResizerDithererErrdINL
|
|
: public CImageResizerDithererDefINL<fptype> {
|
|
public:
|
|
/**
|
|
* Function initializes the ditherer object.
|
|
*
|
|
* @param aLen Scanline length in pixels to process.
|
|
* @param aVars Image resizing-related variables.
|
|
* @param aTrMul Bit-depth truncation multiplier. 1 - no additional
|
|
* truncation.
|
|
* @param aPkOut Peak output value allowed.
|
|
*/
|
|
|
|
void init(const int aLen, const CImageResizerVars& aVars, const double aTrMul,
|
|
const double aPkOut) {
|
|
CImageResizerDithererDefINL<fptype>::init(aLen, aVars, aTrMul, aPkOut);
|
|
|
|
ResScanlineDith0.alloc(LenE + Vars->ElCount, sizeof(fptype));
|
|
ResScanlineDith = ResScanlineDith0 + Vars->ElCount;
|
|
int i;
|
|
|
|
for (i = 0; i < LenE + Vars->ElCount; i++) {
|
|
ResScanlineDith0[i] = 0.0;
|
|
}
|
|
}
|
|
|
|
static bool isRecursive() { return (true); }
|
|
|
|
void dither(fptype* const ResScanline) {
|
|
const int ElCount = Vars->ElCount;
|
|
const fptype c0 = 0.0;
|
|
const fptype TrMul = (fptype)TrMul0;
|
|
const fptype PkOut = (fptype)PkOut0;
|
|
int j;
|
|
|
|
for (j = 0; j < LenE; j++) {
|
|
ResScanline[j] += ResScanlineDith[j];
|
|
ResScanlineDith[j] = 0.0;
|
|
}
|
|
|
|
for (j = 0; j < LenE - ElCount; j++) {
|
|
// Perform rounding, noise estimation and saturation.
|
|
|
|
const fptype z0 = round(ResScanline[j] / TrMul) * TrMul;
|
|
const fptype Noise = ResScanline[j] - z0;
|
|
ResScanline[j] = clamp(z0, c0, PkOut);
|
|
|
|
ResScanline[j + ElCount] += Noise * (fptype)0.364842;
|
|
ResScanlineDith[j - ElCount] += Noise * (fptype)0.207305;
|
|
ResScanlineDith[j] += Noise * (fptype)0.364842;
|
|
ResScanlineDith[j + ElCount] += Noise * (fptype)0.063011;
|
|
}
|
|
|
|
while (j < LenE) {
|
|
const fptype z0 = round(ResScanline[j] / TrMul) * TrMul;
|
|
const fptype Noise = ResScanline[j] - z0;
|
|
ResScanline[j] = clamp(z0, c0, PkOut);
|
|
|
|
ResScanlineDith[j - ElCount] += Noise * (fptype)0.207305;
|
|
ResScanlineDith[j] += Noise * (fptype)0.364842;
|
|
j++;
|
|
}
|
|
}
|
|
|
|
protected:
|
|
using CImageResizerDithererDefINL<fptype>::Len;
|
|
using CImageResizerDithererDefINL<fptype>::Vars;
|
|
using CImageResizerDithererDefINL<fptype>::LenE;
|
|
using CImageResizerDithererDefINL<fptype>::TrMul0;
|
|
using CImageResizerDithererDefINL<fptype>::PkOut0;
|
|
|
|
CBuffer<fptype> ResScanlineDith0; ///< Error diffusion buffer.
|
|
///<
|
|
fptype* ResScanlineDith; ///< Error diffusion buffer pointer which skips
|
|
///< the first ElCount elements.
|
|
///<
|
|
};
|
|
|
|
/**
|
|
* @brief Floating-point processing definition and abstraction class.
|
|
*
|
|
* This class defines several constants and typedefs that point to classes
|
|
* that should be used by the image resizing algorithm. Such "definition
|
|
* class" can be used to define alternative scanline processing algorithms
|
|
* (e.g. SIMD) and image scanline packing styles used during processing. This
|
|
* class also offers an abstraction layer for dithering, rounding and
|
|
* clamping (saturation) operation.
|
|
*
|
|
* The fpclass_def class can be used to define processing using both SIMD and
|
|
* non-SIMD types, but using algorithms that are operate on interleaved pixels
|
|
* and non-SIMD optimized themselves.
|
|
*
|
|
* @tparam afptype Floating point type to use for storing intermediate data
|
|
* and variables. For variables that are not used in intensive calculations
|
|
* the "double" type is always used. On the latest Intel processors (like
|
|
* i7-4770K) there is almost no performance difference between "double" and
|
|
* "float". Image quality differences between "double" and "float" are not
|
|
* apparent on 8-bit images. At the same time the "float" uses half amount of
|
|
* working memory the "double" type uses. SIMD types can be used. The
|
|
* functions round() and clamp() in the "avir" or other visible namespace
|
|
* should be available for the specified type. SIMD types allow to perform
|
|
* resizing of images with more than 4 channels, to be exact 4 * SIMD element
|
|
* number (e.g. 16 for float4), without modification of the image resizing
|
|
* algorithm required.
|
|
* @tparam afptypeatom The atomic type the "afptype" consists of.
|
|
* @tparam adith Ditherer class to use during processing.
|
|
*/
|
|
|
|
template <class afptype, class afptypeatom = afptype,
|
|
class adith = CImageResizerDithererDefINL<afptype> >
|
|
class fpclass_def {
|
|
public:
|
|
typedef afptype fptype; ///< Floating-point type to use during processing.
|
|
///<
|
|
typedef afptypeatom fptypeatom; ///< Atomic type "fptype" consists of.
|
|
///<
|
|
static const int fppack =
|
|
sizeof(fptype) /
|
|
sizeof(fptypeatom); ///<
|
|
///< The number of atomic types stored in a single
|
|
///< "fptype" element.
|
|
///<
|
|
static const int fpalign =
|
|
sizeof(fptype); ///< Suggested alignment size
|
|
///< in bytes. This is not a required alignment, because
|
|
///< image resizing algorithm cannot be made to have a
|
|
///< strictly aligned data access at all steps (e.g.
|
|
///< interpolation cannot perform aligned accesses).
|
|
///<
|
|
static const int elalign =
|
|
1; ///< Length alignment of arrays of elements.
|
|
///< This applies to filters and intermediate buffers: this constant
|
|
///< forces filters and scanlines to have a length which is a multiple
|
|
///< of this value, for more efficient SIMD implementation.
|
|
///<
|
|
static const int packmode = 0; ///< 0 if interleaved packing, 1 if
|
|
///< de-interleaved.
|
|
///<
|
|
typedef CImageResizerFilterStepINL<fptype, fptypeatom>
|
|
CFilterStep; ///<
|
|
///< Filtering step class to use during processing.
|
|
///<
|
|
typedef adith CDitherer; ///< Ditherer class to use during processing.
|
|
///<
|
|
};
|
|
|
|
/**
|
|
* @brief Image resizer class.
|
|
*
|
|
* The object of this class can be used to resize 1-4 channel images to any
|
|
* required size. Resizing is performed by utilizing interpolated sinc
|
|
* fractional delay filters plus (if necessary) a cascade of built-in
|
|
* sinc function-based 2X upsampling or 2X downsampling stages, followed by a
|
|
* correction filtering.
|
|
*
|
|
* Object of this class can be allocated on stack.
|
|
*
|
|
* @tparam fpclass Floating-point processing definition class to use. See
|
|
* avir::fpclass_def for more details.
|
|
*/
|
|
|
|
template <class fpclass = fpclass_def<float> >
|
|
class CImageResizer {
|
|
public:
|
|
/**
|
|
* Constructor initializes the resizer.
|
|
*
|
|
* @param aResBitDepth Required bit depth of resulting image (1-16). If
|
|
* integer value output is used (e.g. uint8_t), the bit depth also affects
|
|
* rounding: for example, if aResBitDepth=6 and "Tout" is uint8_t, the
|
|
* result will be rounded to 6 most significant bits (2 least significant
|
|
* bits truncated, with dithering applied).
|
|
* @param aSrcBitDepth Source image's real bit-depth. Set to 0 to use
|
|
* aResBitDepth.
|
|
* @param aParams Resizing algorithm's parameters to use. Leave out for
|
|
* default values. Can be useful when performing automatic optimization of
|
|
* parameters.
|
|
*/
|
|
|
|
CImageResizer(const int aResBitDepth = 8, const int aSrcBitDepth = 0,
|
|
const CImageResizerParams& aParams = CImageResizerParamsDef())
|
|
: Params(aParams), ResBitDepth(aResBitDepth) {
|
|
SrcBitDepth = (aSrcBitDepth == 0 ? ResBitDepth : aSrcBitDepth);
|
|
|
|
initFilterBank(FixedFilterBank, 1.0, false, CFltBuffer());
|
|
FixedFilterBank.createAllFilters();
|
|
}
|
|
|
|
/**
|
|
* Function resizes image.
|
|
*
|
|
* @param SrcBuf Source image buffer.
|
|
* @param SrcWidth Source image width.
|
|
* @param SrcHeight Source image height.
|
|
* @param SrcScanlineSize Physical size of source scanline in elements
|
|
* (not bytes). If this value is below 1, SrcWidth * ElCountIO will be
|
|
* used as the physical source scanline size.
|
|
* @param[out] NewBuf Buffer to accept the resized image. Can be equal to
|
|
* SrcBuf if the size of the resized image is smaller or equal to source
|
|
* image in size.
|
|
* @param NewWidth New image width.
|
|
* @param NewHeight New image height.
|
|
* @param ElCountIO The number of elements (channels) used to store each
|
|
* source and destination pixel (1-4).
|
|
* @param k Resizing step (one output pixel corresponds to "k" input
|
|
* pixels). A downsizing factor if > 1.0; upsizing factor if <= 1.0.
|
|
* Multiply by -1 if you would like to bypass "ox" and "oy" adjustment
|
|
* which is done by default to produce a centered image. If step value
|
|
* equals 0, the step value will be chosen automatically and independently
|
|
* for horizontal and vertical resizing.
|
|
* @param[in,out] aVars Pointer to variables structure to be passed to the
|
|
* image resizing function. Can be NULL. Only variables that are
|
|
* initialized in default constructor of this structure are accepted by
|
|
* this function. These variables will not be changed by this function.
|
|
* All other variables can be modified by this function. The access to
|
|
* this object is not thread-safe, each concurrent instance of this
|
|
* function should use a separate aVars object.
|
|
* @tparam Tin Input buffer element's type. Can be uint8_t (0-255 value
|
|
* range), uint16_t (0-65535 value range), float (0.0-1.0 value range),
|
|
* double (0.0-1.0 value range). Larger integer types are treated as
|
|
* uint16_t. Signed integer types are unsupported.
|
|
* @tparam Tout Output buffer element's type. Can be uint8_t (0-255 value
|
|
* range), uint16_t (0-65535 value range), float (0.0-1.0 value range),
|
|
* double (0.0-1.0 value range). Larger integer types are treated as
|
|
* uint16_t. Signed integer types are unsupported.
|
|
*/
|
|
|
|
template <class Tin, class Tout>
|
|
void resizeImage(const Tin* const SrcBuf, const int SrcWidth,
|
|
const int SrcHeight, int SrcScanlineSize, Tout* const NewBuf,
|
|
const int NewWidth, const int NewHeight, const int ElCountIO,
|
|
const double k,
|
|
CImageResizerVars* const aVars = NULL) const {
|
|
if (SrcWidth == 0 || SrcHeight == 0) {
|
|
memset(NewBuf, 0, (size_t)NewWidth * NewHeight * sizeof(Tout));
|
|
|
|
return;
|
|
} else if (NewWidth == 0 || NewHeight == 0) {
|
|
return;
|
|
}
|
|
|
|
CImageResizerVars DefVars;
|
|
CImageResizerVars& Vars = (aVars == NULL ? DefVars : *aVars);
|
|
|
|
CImageResizerThreadPool DefThreadPool;
|
|
CImageResizerThreadPool& ThreadPool =
|
|
(Vars.ThreadPool == NULL ? DefThreadPool : *Vars.ThreadPool);
|
|
|
|
// Define resizing steps, also optionally modify offsets so that
|
|
// resizing produces a "centered" image.
|
|
|
|
double kx;
|
|
double ky;
|
|
double ox = Vars.ox;
|
|
double oy = Vars.oy;
|
|
|
|
if (k == 0.0) {
|
|
if (NewWidth > SrcWidth) {
|
|
kx = (double)(SrcWidth - 1) / (NewWidth - 1);
|
|
} else {
|
|
kx = (double)SrcWidth / NewWidth;
|
|
ox += (kx - 1.0) * 0.5;
|
|
}
|
|
|
|
if (NewHeight > SrcHeight) {
|
|
ky = (double)(SrcHeight - 1) / (NewHeight - 1);
|
|
} else {
|
|
ky = (double)SrcHeight / NewHeight;
|
|
oy += (ky - 1.0) * 0.5;
|
|
}
|
|
} else if (k > 0.0) {
|
|
kx = k;
|
|
ky = k;
|
|
|
|
if (k > 1.0) {
|
|
const double ko = (k - 1.0) * 0.5;
|
|
ox += ko;
|
|
oy += ko;
|
|
}
|
|
} else {
|
|
kx = -k;
|
|
ky = -k;
|
|
}
|
|
|
|
// Evaluate pre-multipliers used on the output stage.
|
|
|
|
const bool IsInFloat = ((Tin)0.4 != 0);
|
|
const bool IsOutFloat = ((Tout)0.4 != 0);
|
|
double OutMul; // Output multiplier.
|
|
|
|
if (Vars.UseSRGBGamma) {
|
|
if (IsInFloat) {
|
|
Vars.InGammaMult = 1.0;
|
|
} else {
|
|
Vars.InGammaMult = 1.0 / (sizeof(Tin) == 1 ? 255.0 : 65535.0);
|
|
}
|
|
|
|
if (IsOutFloat) {
|
|
Vars.OutGammaMult = 1.0;
|
|
} else {
|
|
Vars.OutGammaMult = (sizeof(Tout) == 1 ? 255.0 : 65535.0);
|
|
}
|
|
|
|
OutMul = 1.0;
|
|
} else {
|
|
if (IsOutFloat) {
|
|
OutMul = 1.0;
|
|
} else {
|
|
OutMul = (sizeof(Tout) == 1 ? 255.0 : 65535.0);
|
|
}
|
|
|
|
if (!IsInFloat) {
|
|
OutMul /= (sizeof(Tin) == 1 ? 255.0 : 65535.0);
|
|
}
|
|
}
|
|
|
|
// Fill widely-used variables.
|
|
|
|
const int ElCount = (ElCountIO + fpclass ::fppack - 1) / fpclass ::fppack;
|
|
|
|
const int NewWidthE = NewWidth * ElCount;
|
|
|
|
if (SrcScanlineSize < 1) {
|
|
SrcScanlineSize = SrcWidth * ElCountIO;
|
|
}
|
|
|
|
Vars.ElCount = ElCount;
|
|
Vars.ElCountIO = ElCountIO;
|
|
Vars.fppack = fpclass ::fppack;
|
|
Vars.fpalign = fpclass ::fpalign;
|
|
Vars.elalign = fpclass ::elalign;
|
|
Vars.packmode = fpclass ::packmode;
|
|
|
|
// Horizontal scanline filtering and resizing.
|
|
|
|
CDSPFracFilterBankLin<fptype> FltBank;
|
|
CFilterSteps FltSteps;
|
|
typename CFilterStep ::CRPosBufArray RPosBufArray;
|
|
CBuffer<uint8_t> UsedFracMap;
|
|
|
|
// Perform the filtering steps modeling at various modes, find the
|
|
// most efficient mode for both horizontal and vertical resizing.
|
|
|
|
int UseBuildMode = 1;
|
|
const int BuildModeCount = (FixedFilterBank.getOrder() == 0 ? 4 : 2);
|
|
|
|
int m;
|
|
|
|
if (Vars.BuildMode >= 0) {
|
|
UseBuildMode = Vars.BuildMode;
|
|
} else {
|
|
int BestScore = 0x7FFFFFFF;
|
|
|
|
for (m = 0; m < BuildModeCount; m++) {
|
|
CDSPFracFilterBankLin<fptype> TmpBank;
|
|
CFilterSteps TmpSteps;
|
|
Vars.k = kx;
|
|
Vars.o = ox;
|
|
buildFilterSteps(TmpSteps, Vars, TmpBank, OutMul, m, true);
|
|
updateFilterStepBuffers(TmpSteps, Vars, RPosBufArray, SrcWidth,
|
|
NewWidth);
|
|
|
|
fillUsedFracMap(TmpSteps[Vars.ResizeStep], UsedFracMap);
|
|
const int c = calcComplexity(TmpSteps, Vars, UsedFracMap, SrcHeight);
|
|
|
|
if (c < BestScore) {
|
|
UseBuildMode = m;
|
|
BestScore = c;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Perform the actual filtering steps building.
|
|
|
|
Vars.k = kx;
|
|
Vars.o = ox;
|
|
buildFilterSteps(FltSteps, Vars, FltBank, OutMul, UseBuildMode, false);
|
|
|
|
updateFilterStepBuffers(FltSteps, Vars, RPosBufArray, SrcWidth, NewWidth);
|
|
|
|
updateBufLenAndRPosPtrs(FltSteps, Vars, NewWidth);
|
|
|
|
const int ThreadCount = ThreadPool.getSuggestedWorkloadCount();
|
|
// Includes the current thread.
|
|
|
|
CStructArray<CThreadData<Tin, Tout> > td;
|
|
td.setItemCount(ThreadCount);
|
|
int i;
|
|
|
|
for (i = 0; i < ThreadCount; i++) {
|
|
if (i > 0) {
|
|
ThreadPool.addWorkload(&td[i]);
|
|
}
|
|
|
|
td[i].init(i, ThreadCount, FltSteps, Vars);
|
|
|
|
td[i].initScanlineQueue(td[i].sopResizeH, SrcHeight, SrcWidth);
|
|
}
|
|
|
|
CBuffer<fptype, size_t> FltBuf(
|
|
(size_t)NewWidthE * SrcHeight,
|
|
fpclass ::fpalign); // Temporary buffer that receives
|
|
// horizontally-filtered and resized image.
|
|
|
|
for (i = 0; i < SrcHeight; i++) {
|
|
td[i % ThreadCount].addScanlineToQueue(
|
|
(void*)&SrcBuf[(size_t)i * SrcScanlineSize],
|
|
&FltBuf[(size_t)i * NewWidthE]);
|
|
}
|
|
|
|
ThreadPool.startAllWorkloads();
|
|
td[0].processScanlineQueue();
|
|
ThreadPool.waitAllWorkloadsToFinish();
|
|
|
|
// Vertical scanline filtering and resizing, reuse previously defined
|
|
// filtering steps if possible.
|
|
|
|
const int PrevUseBuildMode = UseBuildMode;
|
|
|
|
if (Vars.BuildMode >= 0) {
|
|
UseBuildMode = Vars.BuildMode;
|
|
} else {
|
|
CImageResizerVars TmpVars(Vars);
|
|
int BestScore = 0x7FFFFFFF;
|
|
|
|
for (m = 0; m < BuildModeCount; m++) {
|
|
CDSPFracFilterBankLin<fptype> TmpBank;
|
|
TmpBank.copyInitParams(FltBank);
|
|
CFilterSteps TmpSteps;
|
|
TmpVars.k = ky;
|
|
TmpVars.o = oy;
|
|
buildFilterSteps(TmpSteps, TmpVars, TmpBank, 1.0, m, true);
|
|
updateFilterStepBuffers(TmpSteps, TmpVars, RPosBufArray, SrcHeight,
|
|
NewHeight);
|
|
|
|
fillUsedFracMap(TmpSteps[TmpVars.ResizeStep], UsedFracMap);
|
|
|
|
const int c = calcComplexity(TmpSteps, TmpVars, UsedFracMap, NewWidth);
|
|
|
|
if (c < BestScore) {
|
|
UseBuildMode = m;
|
|
BestScore = c;
|
|
}
|
|
}
|
|
}
|
|
|
|
Vars.k = ky;
|
|
Vars.o = oy;
|
|
|
|
if (UseBuildMode == PrevUseBuildMode && ky == kx) {
|
|
if (OutMul != 1.0) {
|
|
modifyCorrFilterDCGain(FltSteps, 1.0 / OutMul);
|
|
}
|
|
} else {
|
|
buildFilterSteps(FltSteps, Vars, FltBank, 1.0, UseBuildMode, false);
|
|
}
|
|
|
|
updateFilterStepBuffers(FltSteps, Vars, RPosBufArray, SrcHeight, NewHeight);
|
|
|
|
updateBufLenAndRPosPtrs(FltSteps, Vars, NewWidth);
|
|
|
|
if (IsOutFloat && sizeof(FltBuf[0]) == sizeof(Tout) &&
|
|
fpclass ::packmode == 0) {
|
|
// In-place output.
|
|
|
|
for (i = 0; i < ThreadCount; i++) {
|
|
td[i].initScanlineQueue(td[i].sopResizeV, NewWidth, SrcHeight,
|
|
NewWidthE, NewWidthE);
|
|
}
|
|
|
|
for (i = 0; i < NewWidth; i++) {
|
|
td[i % ThreadCount].addScanlineToQueue(
|
|
&FltBuf[(size_t)i * ElCount],
|
|
(fptype*)&NewBuf[(size_t)i * ElCount]);
|
|
}
|
|
|
|
ThreadPool.startAllWorkloads();
|
|
td[0].processScanlineQueue();
|
|
ThreadPool.waitAllWorkloadsToFinish();
|
|
ThreadPool.removeAllWorkloads();
|
|
|
|
return;
|
|
}
|
|
|
|
CBuffer<fptype, size_t> ResBuf((size_t)NewWidthE * NewHeight,
|
|
fpclass ::fpalign);
|
|
|
|
for (i = 0; i < ThreadCount; i++) {
|
|
td[i].initScanlineQueue(td[i].sopResizeV, NewWidth, SrcHeight, NewWidthE,
|
|
NewWidthE);
|
|
}
|
|
|
|
const int im = (fpclass ::packmode == 0 ? ElCount : 1);
|
|
|
|
for (i = 0; i < NewWidth; i++) {
|
|
td[i % ThreadCount].addScanlineToQueue(&FltBuf[(size_t)i * im],
|
|
&ResBuf[(size_t)i * im]);
|
|
}
|
|
|
|
ThreadPool.startAllWorkloads();
|
|
td[0].processScanlineQueue();
|
|
ThreadPool.waitAllWorkloadsToFinish();
|
|
|
|
if (IsOutFloat) {
|
|
// Perform output, but skip dithering.
|
|
|
|
for (i = 0; i < ThreadCount; i++) {
|
|
td[i].initScanlineQueue(td[i].sopUnpackH, NewHeight, NewWidth);
|
|
}
|
|
|
|
for (i = 0; i < NewHeight; i++) {
|
|
td[i % ThreadCount].addScanlineToQueue(
|
|
&ResBuf[(size_t)i * NewWidthE],
|
|
&NewBuf[(size_t)i * NewWidth * ElCountIO]);
|
|
}
|
|
|
|
ThreadPool.startAllWorkloads();
|
|
td[0].processScanlineQueue();
|
|
ThreadPool.waitAllWorkloadsToFinish();
|
|
ThreadPool.removeAllWorkloads();
|
|
|
|
return;
|
|
}
|
|
|
|
// Perform output with dithering (for integer output only).
|
|
|
|
int TruncBits; // The number of lower bits to truncate and dither.
|
|
int OutRange; // Output range.
|
|
|
|
if (sizeof(Tout) == 1) {
|
|
TruncBits = 8 - ResBitDepth;
|
|
OutRange = 255;
|
|
} else {
|
|
TruncBits = 16 - ResBitDepth;
|
|
OutRange = 65535;
|
|
}
|
|
|
|
const double PkOut = OutRange;
|
|
const double TrMul =
|
|
(TruncBits > 0 ? PkOut / (OutRange >> TruncBits) : 1.0);
|
|
|
|
if (CDitherer ::isRecursive()) {
|
|
td[0].getDitherer().init(NewWidth, Vars, TrMul, PkOut);
|
|
|
|
if (Vars.UseSRGBGamma) {
|
|
for (i = 0; i < NewHeight; i++) {
|
|
fptype* const ResScanline = &ResBuf[(size_t)i * NewWidthE];
|
|
|
|
CFilterStep ::applySRGBGamma(ResScanline, NewWidth, Vars);
|
|
|
|
td[0].getDitherer().dither(ResScanline);
|
|
|
|
CFilterStep ::unpackScanline(
|
|
ResScanline, &NewBuf[(size_t)i * NewWidth * ElCountIO], NewWidth,
|
|
Vars);
|
|
}
|
|
} else {
|
|
for (i = 0; i < NewHeight; i++) {
|
|
fptype* const ResScanline = &ResBuf[(size_t)i * NewWidthE];
|
|
|
|
td[0].getDitherer().dither(ResScanline);
|
|
|
|
CFilterStep ::unpackScanline(
|
|
ResScanline, &NewBuf[(size_t)i * NewWidth * ElCountIO], NewWidth,
|
|
Vars);
|
|
}
|
|
}
|
|
} else {
|
|
for (i = 0; i < ThreadCount; i++) {
|
|
td[i].initScanlineQueue(td[i].sopDitherAndUnpackH, NewHeight, NewWidth);
|
|
|
|
td[i].getDitherer().init(NewWidth, Vars, TrMul, PkOut);
|
|
}
|
|
|
|
for (i = 0; i < NewHeight; i++) {
|
|
td[i % ThreadCount].addScanlineToQueue(
|
|
&ResBuf[(size_t)i * NewWidthE],
|
|
&NewBuf[(size_t)i * NewWidth * ElCountIO]);
|
|
}
|
|
|
|
ThreadPool.startAllWorkloads();
|
|
td[0].processScanlineQueue();
|
|
ThreadPool.waitAllWorkloadsToFinish();
|
|
}
|
|
|
|
ThreadPool.removeAllWorkloads();
|
|
}
|
|
|
|
private:
|
|
typedef typename fpclass ::fptype fptype; ///< Floating-point type to use
|
|
///< during processing.
|
|
///<
|
|
typedef typename fpclass ::CFilterStep
|
|
CFilterStep; ///< Filtering step
|
|
///< class to use during processing.
|
|
///<
|
|
typedef typename fpclass ::CDitherer CDitherer; ///< Ditherer class to
|
|
///< use during processing.
|
|
///<
|
|
CImageResizerParams Params; ///< Algorithm's parameters currently in use.
|
|
///<
|
|
int SrcBitDepth; ///< Bit resolution of the source image.
|
|
///<
|
|
int ResBitDepth; ///< Bit resolution of the resulting image.
|
|
///<
|
|
CDSPFracFilterBankLin<fptype>
|
|
FixedFilterBank; ///< Fractional delay
|
|
///< filter bank with fixed characteristics, mainly for
|
|
///< upsizing cases.
|
|
///<
|
|
|
|
/**
|
|
* @brief Filtering steps array.
|
|
*
|
|
* The object of this class stores filtering steps together.
|
|
*/
|
|
|
|
typedef CStructArray<CFilterStep> CFilterSteps;
|
|
|
|
/**
|
|
* Function initializes the filter bank in the specified resizing step
|
|
* according to the source and resulting image bit depths.
|
|
*
|
|
* @param FltBank Filter bank to initialize.
|
|
* @param CutoffMult Cutoff multiplier, 0 to 1. 1 corresponds to 0.5pi
|
|
* cutoff point.
|
|
* @param ForceHiOrder "True" if a high-order interpolation should be
|
|
* forced which requires considerably less resources for initialization.
|
|
* @param ExtFilter External filter to apply to interpolation filter.
|
|
*/
|
|
|
|
void initFilterBank(CDSPFracFilterBankLin<fptype>& FltBank,
|
|
const double CutoffMult, const bool ForceHiOrder,
|
|
const CFltBuffer& ExtFilter) const {
|
|
const int IntBitDepth =
|
|
(ResBitDepth > SrcBitDepth ? ResBitDepth : SrcBitDepth);
|
|
|
|
const double SNR = -6.02 * (IntBitDepth + 3);
|
|
int UseOrder;
|
|
int FracCount; // The number of fractional delay filters sampled by
|
|
// the filter bank. This variable affects the
|
|
// signal-to-noise ratio at interpolation stage.
|
|
// Theoretically, at UseOrder==1, 8-bit image resizing
|
|
// requires 66.2 dB SNR or 11. 16-bit resizing requires
|
|
// 114.4 dB SNR or 150. At UseOrder=0 the required number of
|
|
// filters is exponentially higher.
|
|
|
|
if (ForceHiOrder || IntBitDepth > 8) {
|
|
UseOrder = 1; // -146 dB max
|
|
FracCount = (int)ceil(0.23134052 * exp(-0.058062929 * SNR));
|
|
} else {
|
|
UseOrder = 0; // -72 dB max
|
|
FracCount = (int)ceil(0.33287686 * exp(-0.11334583 * SNR));
|
|
}
|
|
|
|
if (FracCount < 2) {
|
|
FracCount = 2;
|
|
}
|
|
|
|
FltBank.init(FracCount, UseOrder, Params.IntFltLen / CutoffMult,
|
|
Params.IntFltCutoff * CutoffMult, Params.IntFltAlpha,
|
|
ExtFilter, fpclass ::fpalign, fpclass ::elalign);
|
|
}
|
|
|
|
/**
|
|
* Function allocates filter buffer taking "fpclass" alignments into
|
|
* account. The allocated buffer may be larger than the requested size: in
|
|
* this case the additional elements will be zeroed by this function.
|
|
*
|
|
* @param Flt Filter buffer.
|
|
* @param ReqCapacity The required filter buffer's capacity.
|
|
* @param IsModel "True" if filtering steps modeling is performed without
|
|
* actual filter allocation.
|
|
* @param FltExt If non-NULL this variable will receive the number of
|
|
* elements the filter was extended by.
|
|
*/
|
|
|
|
static void allocFilter(CBuffer<fptype>& Flt, const int ReqCapacity,
|
|
const bool IsModel = false,
|
|
int* const FltExt = NULL) {
|
|
int UseCapacity =
|
|
(ReqCapacity + fpclass ::elalign - 1) & ~(fpclass ::elalign - 1);
|
|
|
|
int Ext = UseCapacity - ReqCapacity;
|
|
|
|
if (FltExt != NULL) {
|
|
*FltExt = Ext;
|
|
}
|
|
|
|
if (IsModel) {
|
|
Flt.forceCapacity(UseCapacity);
|
|
return;
|
|
}
|
|
|
|
Flt.alloc(UseCapacity, fpclass ::fpalign);
|
|
|
|
while (Ext > 0) {
|
|
Ext--;
|
|
Flt[ReqCapacity + Ext] = 0.0;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function assigns filter parameters to the specified filtering step
|
|
* object.
|
|
*
|
|
* @param fs Filtering step to assign parameter to. This step cannot be
|
|
* the last step if ResampleFactor greater than 1 was specified.
|
|
* @param IsUpsample "True" if upsampling step. Should be set to "false"
|
|
* if FltCutoff is negative.
|
|
* @param ResampleFactor Resampling factor of this filter (>=1).
|
|
* @param FltCutoff Filter cutoff point. This value will be divided by the
|
|
* ResampleFactor if IsUpsample equals "true". If zero value was
|
|
* specified, the "half-band" predefined filter will be created. In this
|
|
* case the ResampleFactor will modify the filter cutoff point.
|
|
* @param DCGain DC gain to apply to the filter. Assigned to filtering
|
|
* step's DCGain variable.
|
|
* @param UseFltOrig "True" if the originally-designed filter should be
|
|
* left in filtering step's FltOrig buffer. Otherwise it will be freed.
|
|
* @param IsModel "True" if filtering steps modeling is performed without
|
|
* actual filter building.
|
|
*/
|
|
|
|
void assignFilterParams(CFilterStep& fs, const bool IsUpsample,
|
|
const int ResampleFactor, const double FltCutoff,
|
|
const double DCGain, const bool UseFltOrig,
|
|
const bool IsModel) const {
|
|
double FltAlpha;
|
|
double Len2;
|
|
double Freq;
|
|
|
|
if (FltCutoff == 0.0) {
|
|
const double m = 2.0 / ResampleFactor;
|
|
FltAlpha = Params.HBFltAlpha;
|
|
Len2 = 0.5 * Params.HBFltLen / m;
|
|
Freq = AVIR_PI * Params.HBFltCutoff * m;
|
|
} else {
|
|
FltAlpha = Params.LPFltAlpha;
|
|
Len2 = 0.25 * Params.LPFltBaseLen / FltCutoff;
|
|
Freq = AVIR_PI * Params.LPFltCutoffMult * FltCutoff;
|
|
}
|
|
|
|
if (IsUpsample) {
|
|
Len2 *= ResampleFactor;
|
|
Freq /= ResampleFactor;
|
|
fs.DCGain = DCGain * ResampleFactor;
|
|
} else {
|
|
fs.DCGain = DCGain;
|
|
}
|
|
|
|
fs.FltOrig.Len2 = Len2;
|
|
fs.FltOrig.Freq = Freq;
|
|
fs.FltOrig.Alpha = FltAlpha;
|
|
fs.FltOrig.DCGain = fs.DCGain;
|
|
|
|
CDSPPeakedCosineLPF w(Len2, Freq, FltAlpha);
|
|
|
|
fs.IsUpsample = IsUpsample;
|
|
fs.ResampleFactor = ResampleFactor;
|
|
fs.FltLatency = w.fl2;
|
|
|
|
int FltExt; // Filter's extension due to fpclass :: elalign.
|
|
|
|
if (IsModel) {
|
|
allocFilter(fs.Flt, w.FilterLen, true, &FltExt);
|
|
|
|
if (UseFltOrig) {
|
|
// Allocate a real buffer even in modeling mode since this
|
|
// filter may be copied by the filter bank.
|
|
|
|
fs.FltOrig.alloc(w.FilterLen);
|
|
memset(&fs.FltOrig[0], 0, w.FilterLen * sizeof(fs.FltOrig[0]));
|
|
}
|
|
} else {
|
|
fs.FltOrig.alloc(w.FilterLen);
|
|
|
|
w.generateLPF(&fs.FltOrig[0], 1.0);
|
|
optimizeFIRFilter(fs.FltOrig, fs.FltLatency);
|
|
normalizeFIRFilter(&fs.FltOrig[0], fs.FltOrig.getCapacity(), fs.DCGain);
|
|
|
|
allocFilter(fs.Flt, fs.FltOrig.getCapacity(), false, &FltExt);
|
|
copyArray(&fs.FltOrig[0], &fs.Flt[0], fs.FltOrig.getCapacity());
|
|
|
|
if (!UseFltOrig) {
|
|
fs.FltOrig.free();
|
|
}
|
|
}
|
|
|
|
if (IsUpsample) {
|
|
int l = fs.Flt.getCapacity() - fs.FltLatency - ResampleFactor - FltExt;
|
|
|
|
allocFilter(fs.PrefixDC, l, IsModel);
|
|
allocFilter(fs.SuffixDC, fs.FltLatency, IsModel);
|
|
|
|
if (IsModel) {
|
|
return;
|
|
}
|
|
|
|
// Create prefix and suffix "tails" used during upsampling.
|
|
|
|
const fptype* ip = &fs.Flt[fs.FltLatency + ResampleFactor];
|
|
copyArray(ip, &fs.PrefixDC[0], l);
|
|
|
|
while (true) {
|
|
ip += ResampleFactor;
|
|
l -= ResampleFactor;
|
|
|
|
if (l <= 0) {
|
|
break;
|
|
}
|
|
|
|
addArray(ip, &fs.PrefixDC[0], l);
|
|
}
|
|
|
|
l = fs.FltLatency;
|
|
fptype* op = &fs.SuffixDC[0];
|
|
copyArray(&fs.Flt[0], op, l);
|
|
|
|
while (true) {
|
|
op += ResampleFactor;
|
|
l -= ResampleFactor;
|
|
|
|
if (l <= 0) {
|
|
break;
|
|
}
|
|
|
|
addArray(&fs.Flt[0], op, l);
|
|
}
|
|
} else if (!UseFltOrig) {
|
|
fs.EdgePixelCount = fs.EdgePixelCountDef;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function adds a correction filter that tries to achieve a linear
|
|
* frequency response at all frequencies. The actual resulting response
|
|
* may feature a slight damping of the highest frequencies since a
|
|
* suitably short correction filter cannot fix steep high-frequency
|
|
* damping.
|
|
*
|
|
* This function assumes that the resizing step is currently the last
|
|
* step, even if it was not inserted yet: this allows placement of the
|
|
* correction filter both before and after the resizing step.
|
|
*
|
|
* @param Steps Filtering steps.
|
|
* @param bw Resulting bandwidth relative to the original bandwidth (which
|
|
* is 1.0), usually 1/k. Should be <= 1.0.
|
|
* @param IsPreCorrection "True" if the filtering step was already created
|
|
* and it is first in the Steps array. "True" also adds edge pixels to
|
|
* reduce edge artifacts.
|
|
* @param IsModel "True" if filtering steps modeling is performed without
|
|
* actual filter building.
|
|
*/
|
|
|
|
void addCorrectionFilter(CFilterSteps& Steps, const double bw,
|
|
const bool IsPreCorrection,
|
|
const bool IsModel) const {
|
|
CFilterStep& fs = (IsPreCorrection ? Steps[0] : Steps.add());
|
|
fs.IsUpsample = false;
|
|
fs.ResampleFactor = 1;
|
|
fs.DCGain = 1.0;
|
|
fs.EdgePixelCount = (IsPreCorrection ? fs.EdgePixelCountDef : 0);
|
|
|
|
if (IsModel) {
|
|
allocFilter(
|
|
fs.Flt,
|
|
CDSPFIREQ ::calcFilterLength(Params.CorrFltLen, fs.FltLatency), true);
|
|
|
|
return;
|
|
}
|
|
|
|
const int BinCount = 65; // Frequency response bins to control.
|
|
const int BinCount1 = BinCount - 1;
|
|
double curbw = 1.0; // Bandwidth of the filter at the current step.
|
|
int i;
|
|
int j;
|
|
double re;
|
|
double im;
|
|
|
|
CBuffer<double> Bins(BinCount); // Adjustment introduced by all
|
|
// steps at all frequencies of interest.
|
|
|
|
for (j = 0; j < BinCount; j++) {
|
|
Bins[j] = 1.0;
|
|
}
|
|
|
|
const int si = (IsPreCorrection ? 1 : 0);
|
|
|
|
for (i = si; i < Steps.getItemCount() - (si ^ 1); i++) {
|
|
const CFilterStep& fs = Steps[i];
|
|
|
|
if (fs.IsUpsample) {
|
|
curbw *= fs.ResampleFactor;
|
|
|
|
if (fs.FltOrig.getCapacity() > 0) {
|
|
continue;
|
|
}
|
|
}
|
|
|
|
const double dcg = 1.0 / fs.DCGain; // DC gain correction.
|
|
const fptype* Flt;
|
|
int FltLen;
|
|
|
|
if (fs.ResampleFactor == 0) {
|
|
Flt = fs.FltBank->getFilter(0);
|
|
FltLen = fs.FltBank->getFilterLen();
|
|
} else {
|
|
Flt = &fs.Flt[0];
|
|
FltLen = fs.Flt.getCapacity();
|
|
}
|
|
|
|
// Calculate frequency response adjustment introduced by the
|
|
// filter at this step, within the bounds of bandwidth of
|
|
// interest.
|
|
|
|
for (j = 0; j < BinCount; j++) {
|
|
const double th = AVIR_PI * bw / curbw * j / BinCount1;
|
|
|
|
calcFIRFilterResponse(Flt, FltLen, th, re, im);
|
|
|
|
Bins[j] /= sqrt(re * re + im * im) * dcg;
|
|
}
|
|
|
|
if (!fs.IsUpsample && fs.ResampleFactor > 1) {
|
|
curbw /= fs.ResampleFactor;
|
|
}
|
|
}
|
|
|
|
// Calculate filter.
|
|
|
|
CDSPFIREQ EQ;
|
|
EQ.init(bw * 2.0, Params.CorrFltLen, BinCount, 0.0, bw, false,
|
|
Params.CorrFltAlpha);
|
|
|
|
fs.FltLatency = EQ.getFilterLatency();
|
|
|
|
CBuffer<double> Filter(EQ.getFilterLength());
|
|
EQ.buildFilter(Bins, &Filter[0]);
|
|
normalizeFIRFilter(&Filter[0], Filter.getCapacity(), 1.0);
|
|
optimizeFIRFilter(Filter, fs.FltLatency);
|
|
normalizeFIRFilter(&Filter[0], Filter.getCapacity(), 1.0);
|
|
|
|
allocFilter(fs.Flt, Filter.getCapacity());
|
|
copyArray(&Filter[0], &fs.Flt[0], Filter.getCapacity());
|
|
|
|
// Print a theoretically achieved final frequency response at various
|
|
// feature sizes (from DC to 1 pixel). Values above 255 means features
|
|
// become brighter, values below 255 means features become dimmer.
|
|
|
|
/* const double sbw = ( bw > 1.0 ? 1.0 / bw : 1.0 );
|
|
|
|
for( j = 0; j < BinCount; j++ )
|
|
{
|
|
const double th = AVIR_PI * sbw * j / BinCount1;
|
|
|
|
calcFIRFilterResponse( &fs.Flt[ 0 ],
|
|
fs.Flt.getCapacity(), th, re, im );
|
|
|
|
printf( "%f\n", sqrt( re * re + im * im ) / Bins[ j
|
|
] * 255 );
|
|
}
|
|
|
|
printf( "***\n" );*/
|
|
}
|
|
|
|
/**
|
|
* Function adds a sharpening filter if image is being upsized. Such
|
|
* sharpening allows to spot interpolation filter's stop-band attenuation:
|
|
* if attenuation is too weak, a "dark grid" and other artifacts may
|
|
* become visible.
|
|
*
|
|
* It is assumed that 40 decibel stop-band attenuation should be
|
|
* considered a required minimum: this allows application of (deliberately
|
|
* strong) 64X sharpening without spotting any artifacts.
|
|
*
|
|
* @param Steps Filtering steps.
|
|
* @param bw Resulting bandwidth relative to the original bandwidth (which
|
|
* is 1.0), usually 1/k.
|
|
* @param IsModel "True" if filtering steps modeling is performed without
|
|
* actual filter building.
|
|
*/
|
|
|
|
static void addSharpenTest(CFilterSteps& Steps, const double bw,
|
|
const bool IsModel) {
|
|
if (bw <= 1.0) {
|
|
return;
|
|
}
|
|
|
|
const double FltLen = 10.0 * bw;
|
|
|
|
CFilterStep& fs = Steps.add();
|
|
fs.IsUpsample = false;
|
|
fs.ResampleFactor = 1;
|
|
fs.DCGain = 1.0;
|
|
fs.EdgePixelCount = 0;
|
|
|
|
if (IsModel) {
|
|
allocFilter(fs.Flt, CDSPFIREQ ::calcFilterLength(FltLen, fs.FltLatency),
|
|
true);
|
|
|
|
return;
|
|
}
|
|
|
|
const int BinCount = 200;
|
|
CBuffer<double> Bins(BinCount);
|
|
int Thresh = (int)round(BinCount / bw * 1.75);
|
|
|
|
if (Thresh > BinCount) {
|
|
Thresh = BinCount;
|
|
}
|
|
|
|
int j;
|
|
|
|
for (j = 0; j < Thresh; j++) {
|
|
Bins[j] = 1.0;
|
|
}
|
|
|
|
for (j = Thresh; j < BinCount; j++) {
|
|
Bins[j] = 256.0;
|
|
}
|
|
|
|
CDSPFIREQ EQ;
|
|
EQ.init(bw * 2.0, FltLen, BinCount, 0.0, bw, false, 1.7);
|
|
|
|
fs.FltLatency = EQ.getFilterLatency();
|
|
|
|
CBuffer<double> Filter(EQ.getFilterLength());
|
|
EQ.buildFilter(Bins, &Filter[0]);
|
|
normalizeFIRFilter(&Filter[0], Filter.getCapacity(), 1.0);
|
|
optimizeFIRFilter(Filter, fs.FltLatency);
|
|
normalizeFIRFilter(&Filter[0], Filter.getCapacity(), 1.0);
|
|
|
|
allocFilter(fs.Flt, Filter.getCapacity());
|
|
copyArray(&Filter[0], &fs.Flt[0], Filter.getCapacity());
|
|
|
|
/* for( j = 0; j < BinCount; j++ )
|
|
{
|
|
const double th = AVIR_PI * j / ( BinCount - 1 );
|
|
double re;
|
|
double im;
|
|
|
|
calcFIRFilterResponse( &fs.Flt[ 0 ],
|
|
fs.Flt.getCapacity(), th, re, im );
|
|
|
|
printf( "%f\n", sqrt( re * re + im * im ));
|
|
}
|
|
|
|
printf( "***\n" );*/
|
|
}
|
|
|
|
/**
|
|
* Function builds sequence of filtering steps depending on the specified
|
|
* resizing coefficient. The last steps included are always the resizing
|
|
* step then (possibly) the correction step.
|
|
*
|
|
* @param Steps Array that receives filtering steps.
|
|
* @param[out] Vars Variables object.
|
|
* @param FltBank Filter bank to initialize and use.
|
|
* @param DCGain The overall DC gain to apply. This DC gain is applied to
|
|
* the first filtering step only (upsampling or filtering step).
|
|
* @param ModeFlags Build mode flags to use. This is a bitmap of switches
|
|
* that enable or disable certain algorithm features.
|
|
* @param IsModel "True" if filtering steps modeling is performed without
|
|
* the actual filter allocation and building.
|
|
*/
|
|
|
|
void buildFilterSteps(CFilterSteps& Steps, CImageResizerVars& Vars,
|
|
CDSPFracFilterBankLin<fptype>& FltBank,
|
|
const double DCGain, const int ModeFlags,
|
|
const bool IsModel) const {
|
|
Steps.clear();
|
|
|
|
const bool DoFltAndIntCombo =
|
|
((ModeFlags & 1) != 0); // Do filter
|
|
// and interpolator combining.
|
|
const bool ForceHiOrderInt =
|
|
((ModeFlags & 2) != 0); // Force use
|
|
// of a higher-order interpolation.
|
|
const bool UseHalfband = ((ModeFlags & 4) != 0); // Use half-band
|
|
// filter.
|
|
|
|
const double bw = 1.0 / Vars.k; // Resulting bandwidth.
|
|
const int UpsampleFactor = ((int)floor(Vars.k) < 2 ? 2 : 1);
|
|
double IntCutoffMult; // Interpolation filter cutoff multiplier.
|
|
CFilterStep* ReuseStep; // If not NULL, resizing step should use
|
|
// this step object instead of creating a new one.
|
|
CFilterStep* ExtFltStep; // Use FltOrig of this step as the external
|
|
// filter to applied to the interpolator.
|
|
bool IsPreCorrection; // "True" if the correction filter is applied
|
|
// first.
|
|
double FltCutoff; // Cutoff frequency of the first filtering step.
|
|
double corrbw; ///< Bandwidth at the correction step.
|
|
|
|
if (Vars.k <= 1.0) {
|
|
IsPreCorrection = true;
|
|
FltCutoff = 1.0;
|
|
corrbw = 1.0;
|
|
Steps.add();
|
|
} else {
|
|
IsPreCorrection = false;
|
|
FltCutoff = bw;
|
|
corrbw = bw;
|
|
}
|
|
|
|
// Add 1 upsampling or several downsampling filters.
|
|
|
|
if (UpsampleFactor > 1) {
|
|
CFilterStep& fs = Steps.add();
|
|
assignFilterParams(fs, true, UpsampleFactor, FltCutoff, DCGain,
|
|
DoFltAndIntCombo, IsModel);
|
|
|
|
IntCutoffMult = FltCutoff * 2.0 / UpsampleFactor;
|
|
ReuseStep = NULL;
|
|
ExtFltStep = (DoFltAndIntCombo ? &fs : NULL);
|
|
} else {
|
|
int DownsampleFactor;
|
|
|
|
while (true) {
|
|
DownsampleFactor = (int)floor(0.5 / FltCutoff);
|
|
bool DoHBFltAdd;
|
|
|
|
if (DownsampleFactor > 16) {
|
|
// Add half-band filter unconditionally in order to keep
|
|
// filter lengths lower for more precise frequency
|
|
// response and less edge artifacts.
|
|
|
|
DoHBFltAdd = true;
|
|
DownsampleFactor = 16;
|
|
} else {
|
|
DoHBFltAdd = (UseHalfband && DownsampleFactor > 1);
|
|
}
|
|
|
|
if (DoHBFltAdd) {
|
|
assignFilterParams(Steps.add(), false, DownsampleFactor, 0.0, 1.0,
|
|
false, IsModel);
|
|
|
|
FltCutoff *= DownsampleFactor;
|
|
} else {
|
|
if (DownsampleFactor < 1) {
|
|
DownsampleFactor = 1;
|
|
}
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
CFilterStep& fs = Steps.add();
|
|
assignFilterParams(fs, false, DownsampleFactor, FltCutoff, DCGain,
|
|
DoFltAndIntCombo, IsModel);
|
|
|
|
IntCutoffMult = FltCutoff / 0.5;
|
|
|
|
if (DoFltAndIntCombo) {
|
|
ReuseStep = &fs;
|
|
ExtFltStep = &fs;
|
|
} else {
|
|
IntCutoffMult *= DownsampleFactor;
|
|
ReuseStep = NULL;
|
|
ExtFltStep = NULL;
|
|
}
|
|
}
|
|
|
|
// Insert resizing and correction steps.
|
|
|
|
CFilterStep& fs = (ReuseStep == NULL ? Steps.add() : *ReuseStep);
|
|
|
|
Vars.ResizeStep = Steps.getItemCount() - 1;
|
|
fs.IsUpsample = false;
|
|
fs.ResampleFactor = 0;
|
|
fs.DCGain = (ExtFltStep == NULL ? 1.0 : ExtFltStep->DCGain);
|
|
|
|
initFilterBank(FltBank, IntCutoffMult, ForceHiOrderInt,
|
|
(ExtFltStep == NULL ? fs.FltOrig : ExtFltStep->FltOrig));
|
|
|
|
if (FltBank == FixedFilterBank) {
|
|
fs.FltBank = (CDSPFracFilterBankLin<fptype>*)&FixedFilterBank;
|
|
} else {
|
|
fs.FltBank = &FltBank;
|
|
}
|
|
|
|
addCorrectionFilter(Steps, corrbw, IsPreCorrection, IsModel);
|
|
|
|
// addSharpenTest( Steps, bw, IsModel );
|
|
}
|
|
|
|
/**
|
|
* Function extends *this upsampling step so that it produces more
|
|
* upsampled pixels that cover the prefix and suffix needs of the next
|
|
* step. After the call to this function the InPrefix and InSuffix
|
|
* variables of the next step will be set to zero.
|
|
*
|
|
* @param fs Upsampling filtering step.
|
|
* @param NextStep The next step structure.
|
|
*/
|
|
|
|
static void extendUpsample(CFilterStep& fs, CFilterStep& NextStep) {
|
|
fs.InPrefix =
|
|
(NextStep.InPrefix + fs.ResampleFactor - 1) / fs.ResampleFactor;
|
|
|
|
fs.OutPrefix += fs.InPrefix * fs.ResampleFactor;
|
|
NextStep.InPrefix = 0;
|
|
|
|
fs.InSuffix =
|
|
(NextStep.InSuffix + fs.ResampleFactor - 1) / fs.ResampleFactor;
|
|
|
|
fs.OutSuffix += fs.InSuffix * fs.ResampleFactor;
|
|
NextStep.InSuffix = 0;
|
|
}
|
|
|
|
/**
|
|
* Function fills resizing step's RPosBuf array, excluding the actual
|
|
* "ftp" pointers and "SrcOffs" offsets.
|
|
*
|
|
* This array should be cleared if the resizing step or offset were
|
|
* changed. Otherwise this function only fills the elements required to
|
|
* cover resizing step's OutLen.
|
|
*
|
|
* This function is called by the updateFilterStepBuffers() function.
|
|
*
|
|
* @param fs Resizing step.
|
|
* @param Vars Variables object.
|
|
*/
|
|
|
|
static void fillRPosBuf(CFilterStep& fs, const CImageResizerVars& Vars) {
|
|
const int PrevLen = fs.RPosBuf->getCapacity();
|
|
|
|
if (fs.OutLen > PrevLen) {
|
|
fs.RPosBuf->increaseCapacity(fs.OutLen);
|
|
}
|
|
|
|
typename CFilterStep ::CResizePos* rpos = &(*fs.RPosBuf)[PrevLen];
|
|
const int FracCount = fs.FltBank->getFracCount();
|
|
const double o = Vars.o;
|
|
const double k = Vars.k;
|
|
int i;
|
|
|
|
for (i = PrevLen; i < fs.OutLen; i++) {
|
|
const double SrcPos = o + k * i;
|
|
const int SrcPosInt = (int)floor(SrcPos);
|
|
const double x = (SrcPos - SrcPosInt) * FracCount;
|
|
const int fti = (int)x;
|
|
rpos->x = (typename fpclass ::fptypeatom)(x - fti);
|
|
rpos->fti = fti;
|
|
rpos->SrcPosInt = SrcPosInt;
|
|
rpos++;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function updates filtering step buffer lengths depending on the
|
|
* specified source and new scanline lengths. This function should be
|
|
* called after the buildFilterSteps() function.
|
|
*
|
|
* @param Steps Array that receives filtering steps.
|
|
* @param[out] Vars Variables object, will receive buffer size and length.
|
|
* This function expects "k" and "o" variable values that will be
|
|
* adjusted by this function.
|
|
* @param RPosBufArray Resizing position buffers array, used to obtain
|
|
* buffer to initialize and use (will be reused if it is already fully or
|
|
* partially filled).
|
|
* @param SrcLen Source scanline's length in pixels.
|
|
* @param NewLen New scanline's length in pixels.
|
|
*/
|
|
|
|
static void updateFilterStepBuffers(
|
|
CFilterSteps& Steps, CImageResizerVars& Vars,
|
|
typename CFilterStep ::CRPosBufArray& RPosBufArray, int SrcLen,
|
|
const int NewLen) {
|
|
int upstep = -1;
|
|
int InBuf = 0;
|
|
int i;
|
|
|
|
for (i = 0; i < Steps.getItemCount(); i++) {
|
|
CFilterStep& fs = Steps[i];
|
|
|
|
fs.Vars = &Vars;
|
|
fs.InLen = SrcLen;
|
|
fs.InBuf = InBuf;
|
|
fs.OutBuf = (InBuf + 1) & 1;
|
|
|
|
if (fs.IsUpsample) {
|
|
upstep = i;
|
|
Vars.k *= fs.ResampleFactor;
|
|
Vars.o *= fs.ResampleFactor;
|
|
fs.InPrefix = 0;
|
|
fs.InSuffix = 0;
|
|
fs.OutLen = fs.InLen * fs.ResampleFactor;
|
|
fs.OutPrefix = fs.FltLatency;
|
|
fs.OutSuffix = fs.Flt.getCapacity() - fs.FltLatency - fs.ResampleFactor;
|
|
|
|
int l0 = fs.OutPrefix + fs.OutLen + fs.OutSuffix;
|
|
int l = fs.InLen * fs.ResampleFactor + fs.SuffixDC.getCapacity();
|
|
|
|
if (l > l0) {
|
|
fs.OutSuffix += l - l0;
|
|
}
|
|
|
|
l0 = fs.OutLen + fs.OutSuffix;
|
|
|
|
if (fs.PrefixDC.getCapacity() > l0) {
|
|
fs.OutSuffix += fs.PrefixDC.getCapacity() - l0;
|
|
}
|
|
} else if (fs.ResampleFactor == 0) {
|
|
const int FilterLenD2 = fs.FltBank->getFilterLen() / 2;
|
|
const int FilterLenD21 = FilterLenD2 - 1;
|
|
|
|
const int ResizeLPix = (int)floor(Vars.o) - FilterLenD21;
|
|
fs.InPrefix = (ResizeLPix < 0 ? -ResizeLPix : 0);
|
|
const int ResizeRPix =
|
|
(int)floor(Vars.o + (NewLen - 1) * Vars.k) + FilterLenD2 + 1;
|
|
|
|
fs.InSuffix = (ResizeRPix > fs.InLen ? ResizeRPix - fs.InLen : 0);
|
|
|
|
fs.OutLen = NewLen;
|
|
fs.RPosBuf = &RPosBufArray.getRPosBuf(Vars.k, Vars.o,
|
|
fs.FltBank->getFracCount());
|
|
|
|
fillRPosBuf(fs, Vars);
|
|
} else {
|
|
Vars.k /= fs.ResampleFactor;
|
|
Vars.o /= fs.ResampleFactor;
|
|
Vars.o += fs.EdgePixelCount;
|
|
|
|
fs.InPrefix = fs.FltLatency;
|
|
fs.InSuffix = fs.Flt.getCapacity() - fs.FltLatency - 1;
|
|
|
|
// Additionally extend OutLen to produce more precise edge
|
|
// pixels.
|
|
|
|
fs.OutLen = (fs.InLen + fs.ResampleFactor - 1) / fs.ResampleFactor +
|
|
fs.EdgePixelCount;
|
|
|
|
fs.InSuffix += (fs.OutLen - 1) * fs.ResampleFactor + 1 - fs.InLen;
|
|
|
|
fs.InPrefix += fs.EdgePixelCount * fs.ResampleFactor;
|
|
fs.OutLen += fs.EdgePixelCount;
|
|
}
|
|
|
|
InBuf = fs.OutBuf;
|
|
SrcLen = fs.OutLen;
|
|
}
|
|
|
|
Steps[Steps.getItemCount() - 1].OutBuf = 2;
|
|
|
|
if (upstep != -1) {
|
|
extendUpsample(Steps[upstep], Steps[upstep + 1]);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function calculates an optimal intermediate buffer length that will
|
|
* cover all needs of the specified filtering steps. This function should
|
|
* be called after the updateFilterStepBuffers() function.
|
|
*
|
|
* Function also updates resizing step's RPosBuf pointers to the filter
|
|
* bank and SrcOffs values.
|
|
*
|
|
* @param Steps Filtering steps.
|
|
* @param[out] Vars Variables object, will receive buffer size and length.
|
|
* @param ResElIncr Resulting (final) element increment, used to produce
|
|
* de-interleaved result. For horizontal processing this value is equal
|
|
* to last step's OutLen, for vertical processing this value is equal to
|
|
* resulting image's width.
|
|
*/
|
|
|
|
static void updateBufLenAndRPosPtrs(CFilterSteps& Steps,
|
|
CImageResizerVars& Vars,
|
|
const int ResElIncr) {
|
|
int MaxPrefix[2] = {0, 0};
|
|
int MaxLen[2] = {0, 0};
|
|
int i;
|
|
|
|
for (i = 0; i < Steps.getItemCount(); i++) {
|
|
CFilterStep& fs = Steps[i];
|
|
const int ib = fs.InBuf;
|
|
|
|
if (fs.InPrefix > MaxPrefix[ib]) {
|
|
MaxPrefix[ib] = fs.InPrefix;
|
|
}
|
|
|
|
int l = fs.InLen + fs.InSuffix;
|
|
|
|
if (l > MaxLen[ib]) {
|
|
MaxLen[ib] = l;
|
|
}
|
|
|
|
fs.InElIncr = fs.InPrefix + l;
|
|
|
|
if (fs.OutBuf == 2) {
|
|
break;
|
|
}
|
|
|
|
const int ob = fs.OutBuf;
|
|
|
|
if (fs.IsUpsample) {
|
|
if (fs.OutPrefix > MaxPrefix[ob]) {
|
|
MaxPrefix[ob] = fs.OutPrefix;
|
|
}
|
|
|
|
l = fs.OutLen + fs.OutSuffix;
|
|
|
|
if (l > MaxLen[ob]) {
|
|
MaxLen[ob] = l;
|
|
}
|
|
} else {
|
|
if (fs.OutLen > MaxLen[ob]) {
|
|
MaxLen[ob] = fs.OutLen;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Update OutElIncr values of all steps.
|
|
|
|
for (i = 0; i < Steps.getItemCount(); i++) {
|
|
CFilterStep& fs = Steps[i];
|
|
|
|
if (fs.OutBuf == 2) {
|
|
fs.OutElIncr = ResElIncr;
|
|
break;
|
|
}
|
|
|
|
CFilterStep& fs2 = Steps[i + 1];
|
|
|
|
if (fs.IsUpsample) {
|
|
fs.OutElIncr = fs.OutPrefix + fs.OutLen + fs.OutSuffix;
|
|
|
|
if (fs.OutElIncr > fs2.InElIncr) {
|
|
fs2.InElIncr = fs.OutElIncr;
|
|
} else {
|
|
fs.OutElIncr = fs2.InElIncr;
|
|
}
|
|
} else {
|
|
fs.OutElIncr = fs2.InElIncr;
|
|
}
|
|
}
|
|
|
|
// Update temporary buffer's length.
|
|
|
|
for (i = 0; i < 2; i++) {
|
|
Vars.BufLen[i] = MaxPrefix[i] + MaxLen[i];
|
|
Vars.BufOffs[i] = MaxPrefix[i];
|
|
|
|
if (Vars.packmode == 0) {
|
|
Vars.BufOffs[i] *= Vars.ElCount;
|
|
}
|
|
|
|
Vars.BufLen[i] *= Vars.ElCount;
|
|
}
|
|
|
|
// Update RPosBuf pointers and SrcOffs.
|
|
|
|
CFilterStep& fs = Steps[Vars.ResizeStep];
|
|
typename CFilterStep ::CResizePos* rpos = &(*fs.RPosBuf)[0];
|
|
const int em = (fpclass ::packmode == 0 ? Vars.ElCount : 1);
|
|
const int FilterLenD21 = fs.FltBank->getFilterLen() / 2 - 1;
|
|
|
|
for (i = 0; i < fs.OutLen; i++) {
|
|
rpos->ftp = fs.FltBank->getFilter(rpos->fti);
|
|
rpos->SrcOffs = (rpos->SrcPosInt - FilterLenD21) * em;
|
|
rpos++;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function modifies the overall (DC) gain of the correction filter in the
|
|
* pre-built filtering steps array.
|
|
*
|
|
* @param Steps Filtering steps.
|
|
* @param m Multiplier to apply to the correction filter.
|
|
*/
|
|
|
|
void modifyCorrFilterDCGain(CFilterSteps& Steps, const double m) const {
|
|
CBuffer<fptype>* Flt;
|
|
const int z = Steps.getItemCount() - 1;
|
|
|
|
if (!Steps[z].IsUpsample && Steps[z].ResampleFactor == 1) {
|
|
Flt = &Steps[z].Flt;
|
|
} else {
|
|
Flt = &Steps[0].Flt;
|
|
}
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < Flt->getCapacity(); i++) {
|
|
(*Flt)[i] = (fptype)((double)(*Flt)[i] * m);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function builds a map of used fractional delay filters based on the
|
|
* resizing positions buffer.
|
|
*
|
|
* @param fs Resizing step.
|
|
* @param[out] UsedFracMap Map of used fractional delay filters.
|
|
*/
|
|
|
|
static void fillUsedFracMap(const CFilterStep& fs,
|
|
CBuffer<uint8_t>& UsedFracMap) {
|
|
const int FracCount = fs.FltBank->getFracCount();
|
|
UsedFracMap.increaseCapacity(FracCount, false);
|
|
memset(&UsedFracMap[0], 0, FracCount * sizeof(UsedFracMap[0]));
|
|
|
|
typename CFilterStep ::CResizePos* rpos = &(*fs.RPosBuf)[0];
|
|
int i;
|
|
|
|
for (i = 0; i < fs.OutLen; i++) {
|
|
UsedFracMap[rpos->fti] |= 1;
|
|
rpos++;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function calculates the overall filtering steps complexity per
|
|
* scanline. Each complexity unit corresponds to a single multiply-add
|
|
* operation. Data copy and pointer math operations are not included in
|
|
* this calculation, it is assumed that they correlate to the multiply-add
|
|
* operations. Calculation also does not include final rounding, dithering
|
|
* and clamping operations since they cannot be optimized out anyway.
|
|
*
|
|
* Calculation of the CRPosBuf buffer is not included since it cannot be
|
|
* avoided.
|
|
*
|
|
* This function should be called after the updateFilterStepBuffers()
|
|
* function.
|
|
*
|
|
* @param Steps Filtering steps array.
|
|
* @param Vars Variables object.
|
|
* @param UsedFracMap The map of used fractional delay filters.
|
|
* @param ScanlineCount Scanline count.
|
|
*/
|
|
|
|
static int calcComplexity(const CFilterSteps& Steps,
|
|
const CImageResizerVars& Vars,
|
|
const CBuffer<uint8_t>& UsedFracMap,
|
|
const int ScanlineCount) {
|
|
int fcnum; // Filter complexity multiplier numerator.
|
|
int fcdenom; // Filter complexity multiplier denominator.
|
|
|
|
if (Vars.packmode != 0) {
|
|
fcnum = 1;
|
|
fcdenom = 1;
|
|
} else {
|
|
// In interleaved processing mode, filters require 1 less
|
|
// multiplication per 2 multiply-add instructions.
|
|
|
|
fcnum = 3;
|
|
fcdenom = 4;
|
|
}
|
|
|
|
int s = 0; // Complexity per one scanline.
|
|
int s2 = 0; // Complexity per all scanlines.
|
|
int i;
|
|
|
|
for (i = 0; i < Steps.getItemCount(); i++) {
|
|
const CFilterStep& fs = Steps[i];
|
|
|
|
s2 += 65 * fs.Flt.getCapacity(); // Filter creation complexity.
|
|
|
|
if (fs.IsUpsample) {
|
|
if (fs.FltOrig.getCapacity() > 0) {
|
|
continue;
|
|
}
|
|
|
|
s += (fs.Flt.getCapacity() * (fs.InPrefix + fs.InLen + fs.InSuffix) +
|
|
fs.SuffixDC.getCapacity() + fs.PrefixDC.getCapacity()) *
|
|
Vars.ElCount;
|
|
} else if (fs.ResampleFactor == 0) {
|
|
s += fs.FltBank->getFilterLen() *
|
|
(fs.FltBank->getOrder() + Vars.ElCount) * fs.OutLen;
|
|
|
|
s2 += fs.FltBank->calcInitComplexity(UsedFracMap);
|
|
} else {
|
|
s += fs.Flt.getCapacity() * Vars.ElCount * fs.OutLen * fcnum / fcdenom;
|
|
}
|
|
}
|
|
|
|
return (s + s2 / ScanlineCount);
|
|
}
|
|
|
|
/**
|
|
* @brief Thread-isolated data used for scanline processing.
|
|
*
|
|
* This structure holds data necessary for image's horizontal or vertical
|
|
* scanline processing, including scanline processing queue.
|
|
*
|
|
* @tparam Tin Source element data type. Intermediate buffers store data
|
|
* in floating point format.
|
|
* @tparam Tout Destination element data type. Intermediate buffers store
|
|
* data in floating point format.
|
|
*/
|
|
|
|
template <class Tin, class Tout>
|
|
class CThreadData : public CImageResizerThreadPool ::CWorkload {
|
|
public:
|
|
virtual void process() { processScanlineQueue(); }
|
|
|
|
/**
|
|
* This enumeration lists possible scanline operations.
|
|
*/
|
|
|
|
enum EScanlineOperation {
|
|
sopResizeH, ///< Resize horizontal scanline.
|
|
///<
|
|
sopResizeV, ///< Resize vertical scanline.
|
|
///<
|
|
sopDitherAndUnpackH, ///< Dither and unpack horizontal scanline.
|
|
///<
|
|
sopUnpackH ///< Unpack horizontal scanline.
|
|
///<
|
|
};
|
|
|
|
/**
|
|
* Function initializes *this thread data object and assigns certain
|
|
* variables provided by the higher level code.
|
|
*
|
|
* @param aThreadIndex Index of this thread data (0-based).
|
|
* @param aThreadCount Total number of threads used during processing.
|
|
* @param aSteps Filtering steps.
|
|
* @param aVars Image resizer variables.
|
|
*/
|
|
|
|
void init(const int aThreadIndex, const int aThreadCount,
|
|
const CFilterSteps& aSteps, const CImageResizerVars& aVars) {
|
|
ThreadIndex = aThreadIndex;
|
|
ThreadCount = aThreadCount;
|
|
Steps = &aSteps;
|
|
Vars = &aVars;
|
|
}
|
|
|
|
/**
|
|
* Function initializes scanline processing queue, and updates
|
|
* capacities of intermediate buffers.
|
|
*
|
|
* @param aOp Operation to perform over scanline.
|
|
* @param TotalLines The total number of scanlines that will be
|
|
* processed by all threads.
|
|
* @param aSrcLen Source scanline length in pixels.
|
|
* @param aSrcIncr Source scanline buffer increment. Ignored in
|
|
* horizontal scanline processing.
|
|
* @param aResIncr Resulting scanline buffer increment. Ignored in
|
|
* horizontal scanline processing.
|
|
*/
|
|
|
|
void initScanlineQueue(const EScanlineOperation aOp, const int TotalLines,
|
|
const int aSrcLen, const int aSrcIncr = 0,
|
|
const int aResIncr = 0) {
|
|
const int l = Vars->BufLen[0] + Vars->BufLen[1];
|
|
|
|
if (Bufs.getCapacity() < l) {
|
|
Bufs.alloc(l, fpclass ::fpalign);
|
|
}
|
|
|
|
BufPtrs[0] = Bufs + Vars->BufOffs[0];
|
|
BufPtrs[1] = Bufs + Vars->BufLen[0] + Vars->BufOffs[1];
|
|
|
|
int j;
|
|
int ml = 0;
|
|
|
|
for (j = 0; j < Steps->getItemCount(); j++) {
|
|
const CFilterStep& fs = (*Steps)[j];
|
|
|
|
if (fs.ResampleFactor == 0 && ml < fs.FltBank->getFilterLen()) {
|
|
ml = fs.FltBank->getFilterLen();
|
|
}
|
|
}
|
|
|
|
TmpFltBuf.alloc(ml, fpclass ::fpalign);
|
|
ScanlineOp = aOp;
|
|
SrcLen = aSrcLen;
|
|
SrcIncr = aSrcIncr;
|
|
ResIncr = aResIncr;
|
|
QueueLen = 0;
|
|
Queue.increaseCapacity((TotalLines + ThreadCount - 1) / ThreadCount,
|
|
false);
|
|
}
|
|
|
|
/**
|
|
* Function adds a scanline to the queue buffer. The
|
|
* initScanlineQueue() function should be called before calling this
|
|
* function. The number of calls to this add function should not
|
|
* exceed the TotalLines spread over all threads.
|
|
*
|
|
* @param SrcBuf Source scanline buffer.
|
|
* @param ResBuf Resulting scanline buffer.
|
|
*/
|
|
|
|
void addScanlineToQueue(void* const SrcBuf, void* const ResBuf) {
|
|
Queue[QueueLen].SrcBuf = SrcBuf;
|
|
Queue[QueueLen].ResBuf = ResBuf;
|
|
QueueLen++;
|
|
}
|
|
|
|
/**
|
|
* Function processes all queued scanlines.
|
|
*/
|
|
|
|
void processScanlineQueue() {
|
|
int i;
|
|
|
|
switch (ScanlineOp) {
|
|
case sopResizeH: {
|
|
for (i = 0; i < QueueLen; i++) {
|
|
resizeScanlineH((Tin*)Queue[i].SrcBuf, (fptype*)Queue[i].ResBuf);
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
case sopResizeV: {
|
|
for (i = 0; i < QueueLen; i++) {
|
|
resizeScanlineV((fptype*)Queue[i].SrcBuf, (fptype*)Queue[i].ResBuf);
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
case sopDitherAndUnpackH: {
|
|
if (Vars->UseSRGBGamma) {
|
|
for (i = 0; i < QueueLen; i++) {
|
|
CFilterStep ::applySRGBGamma((fptype*)Queue[i].SrcBuf, SrcLen,
|
|
*Vars);
|
|
|
|
Ditherer.dither((fptype*)Queue[i].SrcBuf);
|
|
|
|
CFilterStep ::unpackScanline((fptype*)Queue[i].SrcBuf,
|
|
(Tout*)Queue[i].ResBuf, SrcLen,
|
|
*Vars);
|
|
}
|
|
} else {
|
|
for (i = 0; i < QueueLen; i++) {
|
|
Ditherer.dither((fptype*)Queue[i].SrcBuf);
|
|
|
|
CFilterStep ::unpackScanline((fptype*)Queue[i].SrcBuf,
|
|
(Tout*)Queue[i].ResBuf, SrcLen,
|
|
*Vars);
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
case sopUnpackH: {
|
|
if (Vars->UseSRGBGamma) {
|
|
for (i = 0; i < QueueLen; i++) {
|
|
CFilterStep ::applySRGBGamma((fptype*)Queue[i].SrcBuf, SrcLen,
|
|
*Vars);
|
|
|
|
CFilterStep ::unpackScanline((fptype*)Queue[i].SrcBuf,
|
|
(Tout*)Queue[i].ResBuf, SrcLen,
|
|
*Vars);
|
|
}
|
|
} else {
|
|
for (i = 0; i < QueueLen; i++) {
|
|
CFilterStep ::unpackScanline((fptype*)Queue[i].SrcBuf,
|
|
(Tout*)Queue[i].ResBuf, SrcLen,
|
|
*Vars);
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function returns ditherer object associated with *this thread data
|
|
* object.
|
|
*/
|
|
|
|
CDitherer& getDitherer() { return (Ditherer); }
|
|
|
|
private:
|
|
int ThreadIndex; ///< Thread index.
|
|
///<
|
|
int ThreadCount; ///< Thread count.
|
|
///<
|
|
const CFilterSteps* Steps; ///< Filtering steps.
|
|
///<
|
|
const CImageResizerVars* Vars; ///< Image resizer variables.
|
|
///<
|
|
CBuffer<fptype> Bufs; ///< Flip-flop intermediate buffers.
|
|
///<
|
|
fptype* BufPtrs[3]; ///< Flip-flop buffer pointers (referenced by
|
|
///< filtering step's InBuf and OutBuf indices).
|
|
///<
|
|
CBuffer<fptype>
|
|
TmpFltBuf; ///< Temporary buffer used in the
|
|
///< doResize() function, aligned by fpclass :: fpalign.
|
|
///<
|
|
EScanlineOperation ScanlineOp; ///< Operation to perform over
|
|
///< scanline.
|
|
///<
|
|
int SrcLen; ///< Source scanline length in the last queue.
|
|
///<
|
|
int SrcIncr; ///< Source scanline buffer increment in the last queue.
|
|
///<
|
|
int ResIncr; ///< Resulting scanline buffer increment in the last
|
|
///< queue.
|
|
///<
|
|
CDitherer Ditherer; ///< Ditherer object to use.
|
|
///<
|
|
|
|
/**
|
|
* @brief Scanline processing queue item.
|
|
*
|
|
* Scanline processing queue item.
|
|
*/
|
|
|
|
struct CQueueItem {
|
|
void* SrcBuf; ///< Source scanline buffer, will by typecasted to
|
|
///< Tin or fptype*.
|
|
///<
|
|
void* ResBuf; ///< Resulting scanline buffer, will by typecasted
|
|
///< to Tout or fptype*.
|
|
///<
|
|
};
|
|
|
|
CBuffer<CQueueItem> Queue; ///< Scanline processing queue.
|
|
///<
|
|
int QueueLen; ///< Queue length.
|
|
///<
|
|
|
|
/**
|
|
* Function resizes a single horizontal scanline.
|
|
*
|
|
* @param SrcBuf Source scanline buffer. Can be either horizontal or
|
|
* vertical.
|
|
* @param ResBuf Resulting scanline buffer.
|
|
*/
|
|
|
|
void resizeScanlineH(const Tin* const SrcBuf, fptype* const ResBuf) {
|
|
(*Steps)[0].packScanline(SrcBuf, BufPtrs[0], SrcLen);
|
|
BufPtrs[2] = ResBuf;
|
|
int j;
|
|
|
|
for (j = 0; j < Steps->getItemCount(); j++) {
|
|
const CFilterStep& fs = (*Steps)[j];
|
|
fs.prepareInBuf(BufPtrs[fs.InBuf]);
|
|
const int DstIncr = (Vars->packmode == 0 ? Vars->ElCount : 1);
|
|
|
|
if (fs.ResampleFactor != 0) {
|
|
if (fs.IsUpsample) {
|
|
fs.doUpsample(BufPtrs[fs.InBuf], BufPtrs[fs.OutBuf]);
|
|
} else {
|
|
fs.doFilter(BufPtrs[fs.InBuf], BufPtrs[fs.OutBuf], DstIncr);
|
|
}
|
|
} else {
|
|
fs.doResize(BufPtrs[fs.InBuf], BufPtrs[fs.OutBuf], DstIncr,
|
|
TmpFltBuf);
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function resizes a single vertical scanline.
|
|
*
|
|
* @param SrcBuf Source scanline buffer. Can be either horizontal or
|
|
* vertical.
|
|
* @param ResBuf Resulting scanline buffer.
|
|
*/
|
|
|
|
void resizeScanlineV(const fptype* const SrcBuf, fptype* const ResBuf) {
|
|
(*Steps)[0].convertVtoH(SrcBuf, BufPtrs[0], SrcLen, SrcIncr);
|
|
|
|
BufPtrs[2] = ResBuf;
|
|
int j;
|
|
|
|
for (j = 0; j < Steps->getItemCount(); j++) {
|
|
const CFilterStep& fs = (*Steps)[j];
|
|
fs.prepareInBuf(BufPtrs[fs.InBuf]);
|
|
const int DstIncr =
|
|
(fs.OutBuf == 2 ? ResIncr
|
|
: (Vars->packmode == 0 ? Vars->ElCount : 1));
|
|
|
|
if (fs.ResampleFactor != 0) {
|
|
if (fs.IsUpsample) {
|
|
fs.doUpsample(BufPtrs[fs.InBuf], BufPtrs[fs.OutBuf]);
|
|
} else {
|
|
fs.doFilter(BufPtrs[fs.InBuf], BufPtrs[fs.OutBuf], DstIncr);
|
|
}
|
|
} else {
|
|
fs.doResize(BufPtrs[fs.InBuf], BufPtrs[fs.OutBuf], DstIncr,
|
|
TmpFltBuf);
|
|
}
|
|
}
|
|
}
|
|
};
|
|
};
|
|
|
|
#undef AVIR_PI
|
|
#undef AVIR_PId2
|
|
|
|
} // namespace avir
|
|
|
|
#endif // AVIR_CIMAGERESIZER_INCLUDED
|
|
//$ nobt
|
|
//$ nocpp
|
|
|
|
/**
|
|
* @file avir.h
|
|
*
|
|
* @brief The "main" inclusion file with all required classes and functions.
|
|
*
|
|
* This is the "main" inclusion file for the "AVIR" image resizer. This
|
|
* inclusion file contains implementation of the AVIR image resizing algorithm
|
|
* in its entirety. Also includes several classes and functions that can be
|
|
* useful elsewhere.
|
|
*
|
|
* AVIR Copyright (c) 2015-2019 Aleksey Vaneev
|
|
*
|
|
* @mainpage
|
|
*
|
|
* @section intro_sec Introduction
|
|
*
|
|
* Description is available at https://github.com/avaneev/avir
|
|
*
|
|
* AVIR is devoted to women. Your digital photos can look good at any size!
|
|
*
|
|
* @section license License
|
|
*
|
|
* AVIR License Agreement
|
|
*
|
|
* The MIT License (MIT)
|
|
*
|
|
* Copyright (c) 2015-2019 Aleksey Vaneev
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
* DEALINGS IN THE SOFTWARE.
|
|
*
|
|
* Please credit the author of this library in your documentation in the
|
|
* following way: "AVIR image resizing algorithm designed by Aleksey Vaneev"
|
|
*
|
|
* @version 2.4
|
|
*/
|
|
|
|
#ifndef AVIR_CIMAGERESIZER_INCLUDED
|
|
#define AVIR_CIMAGERESIZER_INCLUDED
|
|
|
|
#include <math.h>
|
|
#include <stdint.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
namespace avir {
|
|
|
|
/**
|
|
* The macro defines AVIR version string.
|
|
*/
|
|
|
|
#define AVIR_VERSION "2.4"
|
|
|
|
/**
|
|
* The macro equals to "pi" constant, fills 53-bit floating point mantissa.
|
|
* Undefined at the end of file.
|
|
*/
|
|
|
|
#define AVIR_PI 3.1415926535897932
|
|
|
|
/**
|
|
* The macro equals to "pi divided by 2" constant, fills 53-bit floating
|
|
* point mantissa. Undefined at the end of file.
|
|
*/
|
|
|
|
#define AVIR_PId2 1.5707963267948966
|
|
|
|
/**
|
|
* Rounding function, based on the (int) typecast. Biased result. Not suitable
|
|
* for numbers >= 2^31.
|
|
*
|
|
* @param d Value to round.
|
|
* @return Rounded value. Some bias may be introduced.
|
|
*/
|
|
|
|
template <class T>
|
|
inline T round(const T d) {
|
|
return (d < 0.0 ? -(T)(int)((T)0.5 - d) : (T)(int)(d + (T)0.5));
|
|
}
|
|
|
|
/**
|
|
* Template function "clamps" (clips) the specified value so that it is not
|
|
* lesser than "minv", and not greater than "maxv".
|
|
*
|
|
* @param Value Value to clamp.
|
|
* @param minv Minimal allowed value.
|
|
* @param maxv Maximal allowed value.
|
|
* @return The clamped value.
|
|
*/
|
|
|
|
template <class T>
|
|
inline T clamp(const T& Value, const T minv, const T maxv) {
|
|
if (Value < minv) {
|
|
return (minv);
|
|
} else if (Value > maxv) {
|
|
return (maxv);
|
|
} else {
|
|
return (Value);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Power 2.4 approximation function, designed for sRGB gamma correction.
|
|
*
|
|
* @param x Argument, in the range 0.09 to 1.
|
|
* @return Value raised into power 2.4, approximate.
|
|
*/
|
|
|
|
template <class T>
|
|
inline T pow24_sRGB(const T x) {
|
|
const double x2 = x * x;
|
|
const double x3 = x2 * x;
|
|
const double x4 = x2 * x2;
|
|
|
|
return ((T)(0.0985766365536824 + 0.839474952656502 * x2 +
|
|
0.363287814061725 * x3 -
|
|
0.0125559718896615 / (0.12758338921578 + 0.290283465468235 * x) -
|
|
0.231757513261358 * x - 0.0395365717969074 * x4));
|
|
}
|
|
|
|
/**
|
|
* Power 1/2.4 approximation function, designed for sRGB gamma correction.
|
|
*
|
|
* @param x Argument, in the range 0.003 to 1.
|
|
* @return Value raised into power 1/2.4, approximate.
|
|
*/
|
|
|
|
template <class T>
|
|
inline T pow24i_sRGB(const T x) {
|
|
const double sx = sqrt(x);
|
|
const double ssx = sqrt(sx);
|
|
const double sssx = sqrt(ssx);
|
|
|
|
return ((T)(0.000213364515060263 + 0.0149409239419218 * x +
|
|
0.433973412731747 * sx +
|
|
ssx * (0.659628181609715 * sssx - 0.0380957908841466 -
|
|
0.0706476137208521 * sx)));
|
|
}
|
|
|
|
/**
|
|
* Function approximately linearizes the sRGB gamma value.
|
|
*
|
|
* @param s sRGB gamma value, in the range 0 to 1.
|
|
* @return Linearized sRGB gamma value, approximated.
|
|
*/
|
|
|
|
template <class T>
|
|
inline T convertSRGB2Lin(const T s) {
|
|
const T a = (T)0.055;
|
|
|
|
if (s <= (T)0.04045) {
|
|
return (s / (T)12.92);
|
|
}
|
|
|
|
return (pow24_sRGB((s + a) / ((T)1 + a)));
|
|
}
|
|
|
|
/**
|
|
* Function approximately de-linearizes the linear gamma value.
|
|
*
|
|
* @param s Linear gamma value, in the range 0 to 1.
|
|
* @return sRGB gamma value, approximated.
|
|
*/
|
|
|
|
template <class T>
|
|
inline T convertLin2SRGB(const T s) {
|
|
const T a = (T)0.055;
|
|
|
|
if (s <= (T)0.0031308) {
|
|
return ((T)12.92 * s);
|
|
}
|
|
|
|
return (((T)1 + a) * pow24i_sRGB(s) - a);
|
|
}
|
|
|
|
/**
|
|
* Function converts (via typecast) specified array of type T1 values of
|
|
* length l into array of type T2 values. If T1 is the same as T2, copy
|
|
* operation is performed. When copying data at overlapping address spaces,
|
|
* "op" should be lower than "ip".
|
|
*
|
|
* @param ip Input buffer.
|
|
* @param[out] op Output buffer.
|
|
* @param l The number of elements to copy.
|
|
* @param ip Input buffer pointer increment.
|
|
* @param op Output buffer pointer increment.
|
|
*/
|
|
|
|
template <class T1, class T2>
|
|
inline void copyArray(const T1* ip, T2* op, int l, const int ipinc = 1,
|
|
const int opinc = 1) {
|
|
while (l > 0) {
|
|
*op = (T2)*ip;
|
|
op += opinc;
|
|
ip += ipinc;
|
|
l--;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function adds values located in array "ip" to array "op".
|
|
*
|
|
* @param ip Input buffer.
|
|
* @param[out] op Output buffer.
|
|
* @param l The number of elements to add.
|
|
* @param ip Input buffer pointer increment.
|
|
* @param op Output buffer pointer increment.
|
|
*/
|
|
|
|
template <class T1, class T2>
|
|
inline void addArray(const T1* ip, T2* op, int l, const int ipinc = 1,
|
|
const int opinc = 1) {
|
|
while (l > 0) {
|
|
*op += *ip;
|
|
op += opinc;
|
|
ip += ipinc;
|
|
l--;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function that replicates a set of adjacent elements several times in a row.
|
|
* This operation is usually used to replicate pixels at the start or end of
|
|
* image's scanline.
|
|
*
|
|
* @param ip Source array.
|
|
* @param ipl Source array length (usually 1..4, but can be any number).
|
|
* @param[out] op Destination buffer.
|
|
* @param l Number of times the source array should be replicated (the
|
|
* destination buffer should be able to hold ipl * l number of elements).
|
|
* @param opinc Destination buffer position increment after replicating the
|
|
* source array. This value should be equal to at least ipl.
|
|
*/
|
|
|
|
template <class T1, class T2>
|
|
inline void replicateArray(const T1* const ip, const int ipl, T2* op, int l,
|
|
const int opinc) {
|
|
if (ipl == 1) {
|
|
while (l > 0) {
|
|
op[0] = ip[0];
|
|
op += opinc;
|
|
l--;
|
|
}
|
|
} else if (ipl == 4) {
|
|
while (l > 0) {
|
|
op[0] = ip[0];
|
|
op[1] = ip[1];
|
|
op[2] = ip[2];
|
|
op[3] = ip[3];
|
|
op += opinc;
|
|
l--;
|
|
}
|
|
} else if (ipl == 3) {
|
|
while (l > 0) {
|
|
op[0] = ip[0];
|
|
op[1] = ip[1];
|
|
op[2] = ip[2];
|
|
op += opinc;
|
|
l--;
|
|
}
|
|
} else if (ipl == 2) {
|
|
while (l > 0) {
|
|
op[0] = ip[0];
|
|
op[1] = ip[1];
|
|
op += opinc;
|
|
l--;
|
|
}
|
|
} else {
|
|
while (l > 0) {
|
|
int i;
|
|
|
|
for (i = 0; i < ipl; i++) {
|
|
op[i] = ip[i];
|
|
}
|
|
|
|
op += opinc;
|
|
l--;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function calculates frequency response of the specified FIR filter at the
|
|
* specified circular frequency. Phase can be calculated as atan2( im, re ).
|
|
* Function uses computationally-efficient oscillators instead of "cos" and
|
|
* "sin" functions.
|
|
*
|
|
* @param flt FIR filter's coefficients.
|
|
* @param fltlen Number of coefficients (taps) in the filter.
|
|
* @param th Circular frequency [0; pi].
|
|
* @param[out] re0 Resulting real part of the complex frequency response.
|
|
* @param[out] im0 Resulting imaginary part of the complex frequency response.
|
|
* @param fltlat Filter's latency in samples (taps).
|
|
*/
|
|
|
|
template <class T>
|
|
inline void calcFIRFilterResponse(const T* flt, int fltlen, const double th,
|
|
double& re0, double& im0,
|
|
const int fltlat = 0) {
|
|
const double sincr = 2.0 * cos(th);
|
|
double cvalue1;
|
|
double svalue1;
|
|
|
|
if (fltlat == 0) {
|
|
cvalue1 = 1.0;
|
|
svalue1 = 0.0;
|
|
} else {
|
|
cvalue1 = cos(-fltlat * th);
|
|
svalue1 = sin(-fltlat * th);
|
|
}
|
|
|
|
double cvalue2 = cos(-(fltlat + 1) * th);
|
|
double svalue2 = sin(-(fltlat + 1) * th);
|
|
|
|
double re = 0.0;
|
|
double im = 0.0;
|
|
|
|
while (fltlen > 0) {
|
|
re += cvalue1 * flt[0];
|
|
im += svalue1 * flt[0];
|
|
flt++;
|
|
fltlen--;
|
|
|
|
double tmp = cvalue1;
|
|
cvalue1 = sincr * cvalue1 - cvalue2;
|
|
cvalue2 = tmp;
|
|
|
|
tmp = svalue1;
|
|
svalue1 = sincr * svalue1 - svalue2;
|
|
svalue2 = tmp;
|
|
}
|
|
|
|
re0 = re;
|
|
im0 = im;
|
|
}
|
|
|
|
/**
|
|
* Function normalizes FIR filter so that its frequency response at DC is
|
|
* equal to DCGain.
|
|
*
|
|
* @param[in,out] p Filter coefficients.
|
|
* @param l Filter length.
|
|
* @param DCGain Filter's gain at DC.
|
|
* @param pstep "p" array step.
|
|
*/
|
|
|
|
template <class T>
|
|
inline void normalizeFIRFilter(T* const p, const int l, const double DCGain,
|
|
const int pstep = 1) {
|
|
double s = 0.0;
|
|
T* pp = p;
|
|
int i = l;
|
|
|
|
while (i > 0) {
|
|
s += *pp;
|
|
pp += pstep;
|
|
i--;
|
|
}
|
|
|
|
s = DCGain / s;
|
|
pp = p;
|
|
i = l;
|
|
|
|
while (i > 0) {
|
|
*pp = (T)(*pp * s);
|
|
pp += pstep;
|
|
i--;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @brief Memory buffer class for element array storage, with capacity
|
|
* tracking.
|
|
*
|
|
* Allows easier handling of memory blocks allocation and automatic
|
|
* deallocation for arrays (buffers) consisting of elements of specified
|
|
* class. Tracks buffer's capacity in "int" variable; unsuitable for
|
|
* allocation of very large memory blocks (with more than 2 billion elements).
|
|
*
|
|
* This class manages memory space only - it does not perform element class
|
|
* construction (initialization) operations. Buffer's required memory address
|
|
* alignment specification is supported.
|
|
*
|
|
* Uses standard library to allocate and deallocate memory.
|
|
*
|
|
* @tparam T Buffer element's type.
|
|
* @tparam capint Buffer capacity's type to use. Use size_t for large buffers.
|
|
*/
|
|
|
|
template <class T, typename capint = int>
|
|
class CBuffer {
|
|
public:
|
|
CBuffer() : Data(NULL), DataAligned(NULL), Capacity(0), Alignment(0) {}
|
|
|
|
/**
|
|
* Constructor creates the buffer with the specified capacity.
|
|
*
|
|
* @param aCapacity Buffer's capacity.
|
|
* @param aAlignment Buffer's required memory address alignment. 0 - use
|
|
* stdlib's default alignment.
|
|
*/
|
|
|
|
CBuffer(const capint aCapacity, const int aAlignment = 0) {
|
|
allocinit(aCapacity, aAlignment);
|
|
}
|
|
|
|
CBuffer(const CBuffer& Source) {
|
|
allocinit(Source.Capacity, Source.Alignment);
|
|
memcpy(DataAligned, Source.DataAligned, Capacity * sizeof(T));
|
|
}
|
|
|
|
~CBuffer() { freeData(); }
|
|
|
|
CBuffer& operator=(const CBuffer& Source) {
|
|
alloc(Source.Capacity, Source.Alignment);
|
|
memcpy(DataAligned, Source.DataAligned, Capacity * sizeof(T));
|
|
return (*this);
|
|
}
|
|
|
|
/**
|
|
* Function allocates memory so that the specified number of elements
|
|
* can be stored in *this buffer object.
|
|
*
|
|
* @param aCapacity Storage for this number of elements to allocate.
|
|
* @param aAlignment Buffer's required memory address alignment,
|
|
* power-of-2 values only. 0 - use stdlib's default alignment.
|
|
*/
|
|
|
|
void alloc(const capint aCapacity, const int aAlignment = 0) {
|
|
freeData();
|
|
allocinit(aCapacity, aAlignment);
|
|
}
|
|
|
|
/**
|
|
* Function deallocates any previously allocated buffer.
|
|
*/
|
|
|
|
void free() {
|
|
freeData();
|
|
Data = NULL;
|
|
DataAligned = NULL;
|
|
Capacity = 0;
|
|
Alignment = 0;
|
|
}
|
|
|
|
/**
|
|
* @return The capacity of the element buffer.
|
|
*/
|
|
|
|
capint getCapacity() const { return (Capacity); }
|
|
|
|
/**
|
|
* Function "forces" *this buffer to have an arbitary capacity. Calling
|
|
* this function invalidates all further operations except deleting *this
|
|
* object. This function should not be usually used at all. Function can
|
|
* be used to "model" certain buffer capacity without calling a costly
|
|
* memory allocation function.
|
|
*
|
|
* @param NewCapacity A new "forced" capacity.
|
|
*/
|
|
|
|
void forceCapacity(const capint NewCapacity) { Capacity = NewCapacity; }
|
|
|
|
/**
|
|
* Function reallocates *this buffer to a larger size so that it will be
|
|
* able to hold the specified number of elements. Downsizing is not
|
|
* performed. Alignment is not changed.
|
|
*
|
|
* @param NewCapacity New (increased) capacity.
|
|
* @param DoDataCopy "True" if data in the buffer should be retained.
|
|
*/
|
|
|
|
void increaseCapacity(const capint NewCapacity,
|
|
const bool DoDataCopy = true) {
|
|
if (NewCapacity < Capacity) {
|
|
return;
|
|
}
|
|
|
|
if (DoDataCopy) {
|
|
const capint PrevCapacity = Capacity;
|
|
T* const PrevData = Data;
|
|
T* const PrevDataAligned = DataAligned;
|
|
|
|
allocinit(NewCapacity, Alignment);
|
|
memcpy(DataAligned, PrevDataAligned, PrevCapacity * sizeof(T));
|
|
|
|
::free(PrevData);
|
|
} else {
|
|
::free(Data);
|
|
allocinit(NewCapacity, Alignment);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function "truncates" (reduces) capacity of the buffer without
|
|
* reallocating it. Alignment is not changed.
|
|
*
|
|
* @param NewCapacity New required capacity.
|
|
*/
|
|
|
|
void truncateCapacity(const capint NewCapacity) {
|
|
if (NewCapacity >= Capacity) {
|
|
return;
|
|
}
|
|
|
|
Capacity = NewCapacity;
|
|
}
|
|
|
|
/**
|
|
* Function increases capacity so that the specified number of
|
|
* elements can be stored. This function increases the previous capacity
|
|
* value by third the current capacity value until space for the required
|
|
* number of elements is available. Alignment is not changed.
|
|
*
|
|
* @param ReqCapacity Required capacity.
|
|
*/
|
|
|
|
void updateCapacity(const capint ReqCapacity) {
|
|
if (ReqCapacity <= Capacity) {
|
|
return;
|
|
}
|
|
|
|
capint NewCapacity = Capacity;
|
|
|
|
while (NewCapacity < ReqCapacity) {
|
|
NewCapacity += NewCapacity / 3 + 1;
|
|
}
|
|
|
|
increaseCapacity(NewCapacity);
|
|
}
|
|
|
|
operator T*() const { return (DataAligned); }
|
|
|
|
private:
|
|
T* Data; ///< Element buffer pointer.
|
|
///<
|
|
T* DataAligned; ///< Memory address-aligned element buffer pointer.
|
|
///<
|
|
capint Capacity; ///< Element buffer capacity.
|
|
///<
|
|
int Alignment; ///< Memory address alignment in use. 0 - use stdlib's
|
|
///< default alignment.
|
|
///<
|
|
|
|
/**
|
|
* Internal element buffer allocation function used during object
|
|
* construction.
|
|
*
|
|
* @param aCapacity Storage for this number of elements to allocate.
|
|
* @param aAlignment Buffer's required memory address alignment. 0 - use
|
|
* stdlib's default alignment.
|
|
*/
|
|
|
|
void allocinit(const capint aCapacity, const int aAlignment) {
|
|
if (aAlignment == 0) {
|
|
Data = (T*)::malloc(aCapacity * sizeof(T));
|
|
DataAligned = Data;
|
|
Alignment = 0;
|
|
} else {
|
|
Data = (T*)::malloc(aCapacity * sizeof(T) + aAlignment);
|
|
DataAligned = alignptr(Data, aAlignment);
|
|
Alignment = aAlignment;
|
|
}
|
|
|
|
Capacity = aCapacity;
|
|
}
|
|
|
|
/**
|
|
* Function frees a previously allocated Data buffer.
|
|
*/
|
|
|
|
void freeData() { ::free(Data); }
|
|
|
|
/**
|
|
* Function modifies the specified pointer so that it becomes memory
|
|
* address-aligned.
|
|
*
|
|
* @param ptr Pointer to align.
|
|
* @param align Alignment in bytes to apply.
|
|
* @return Pointer aligned to align bytes. Works with power-of-2
|
|
* alignments only. If no alignment is necessary, "align" bytes will be
|
|
* added to the pointer value.
|
|
*/
|
|
|
|
template <class Tp>
|
|
inline Tp alignptr(const Tp ptr, const uintptr_t align) {
|
|
return ((Tp)((uintptr_t)ptr + align - ((uintptr_t)ptr & (align - 1))));
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Function optimizes the length of the symmetric-odd FIR filter by removing
|
|
* left- and rightmost elements that are below specific threshold.
|
|
*
|
|
* Synthetic test shows that filter gets optimized in 2..3% of cases and in
|
|
* each such case optimization reduces filter length by 6..8%. Optimization,
|
|
* however, may skew the results of algorithm modeling and complexity
|
|
* calculation leading to a choice of a less optimal algorithm.
|
|
*
|
|
* @param[in,out] Flt Buffer that contains filter being optimized.
|
|
* @param[in,out] FltLatency Variable that holds the current latency of the
|
|
* filter. May be adjusted on function return.
|
|
* @param Threshold Threshold level.
|
|
*/
|
|
|
|
template <class T>
|
|
inline void optimizeFIRFilter(CBuffer<T>& Flt, int& FltLatency,
|
|
T const Threshold = (T)0.00001) {
|
|
int i;
|
|
|
|
// Optimize length.
|
|
|
|
for (i = 0; i <= FltLatency; i++) {
|
|
if (fabs(Flt[i]) >= Threshold || i == FltLatency) {
|
|
if (i > 0) {
|
|
const int NewCapacity = Flt.getCapacity() - i * 2;
|
|
copyArray(&Flt[i], &Flt[0], NewCapacity);
|
|
Flt.truncateCapacity(NewCapacity);
|
|
FltLatency -= i;
|
|
}
|
|
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @brief Array of structured objects.
|
|
*
|
|
* Implements allocation of a linear array of objects of class T (which are
|
|
* initialized), addressable via operator[]. Each object is created via the
|
|
* "operator new". New object insertions are quick since implementation uses
|
|
* prior space allocation (capacity), thus not requiring frequent memory block
|
|
* reallocations.
|
|
*
|
|
* @tparam T Array element's type.
|
|
*/
|
|
|
|
template <class T>
|
|
class CStructArray {
|
|
public:
|
|
CStructArray() : ItemCount(0) {}
|
|
|
|
CStructArray(const CStructArray& Source)
|
|
: ItemCount(0), Items(Source.getItemCount()) {
|
|
while (ItemCount < Source.getItemCount()) {
|
|
Items[ItemCount] = new T(Source[ItemCount]);
|
|
ItemCount++;
|
|
}
|
|
}
|
|
|
|
~CStructArray() { clear(); }
|
|
|
|
CStructArray& operator=(const CStructArray& Source) {
|
|
clear();
|
|
|
|
const int NewCount = Source.ItemCount;
|
|
Items.updateCapacity(NewCount);
|
|
|
|
while (ItemCount < NewCount) {
|
|
Items[ItemCount] = new T(Source[ItemCount]);
|
|
ItemCount++;
|
|
}
|
|
|
|
return (*this);
|
|
}
|
|
|
|
T& operator[](const int Index) { return (*Items[Index]); }
|
|
|
|
const T& operator[](const int Index) const { return (*Items[Index]); }
|
|
|
|
/**
|
|
* Function creates a new object of type T with the default constructor
|
|
* and adds this object to the array.
|
|
*
|
|
* @return Reference to a newly added object.
|
|
*/
|
|
|
|
T& add() {
|
|
if (ItemCount == Items.getCapacity()) {
|
|
Items.increaseCapacity(ItemCount * 3 / 2 + 1);
|
|
}
|
|
|
|
Items[ItemCount] = new T();
|
|
ItemCount++;
|
|
|
|
return ((*this)[ItemCount - 1]);
|
|
}
|
|
|
|
/**
|
|
* Function changes number of allocated items. New items are created with
|
|
* the default constructor. If NewCount is below the current item count,
|
|
* items that are above NewCount range will be destructed.
|
|
*
|
|
* @param NewCount New requested item count.
|
|
*/
|
|
|
|
void setItemCount(const int NewCount) {
|
|
if (NewCount > ItemCount) {
|
|
Items.increaseCapacity(NewCount);
|
|
|
|
while (ItemCount < NewCount) {
|
|
Items[ItemCount] = new T();
|
|
ItemCount++;
|
|
}
|
|
} else {
|
|
while (ItemCount > NewCount) {
|
|
ItemCount--;
|
|
delete Items[ItemCount];
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function erases all items of *this array.
|
|
*/
|
|
|
|
void clear() {
|
|
while (ItemCount > 0) {
|
|
ItemCount--;
|
|
delete Items[ItemCount];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @return The number of allocated items.
|
|
*/
|
|
|
|
int getItemCount() const { return (ItemCount); }
|
|
|
|
private:
|
|
int ItemCount; ///< The number of items available in the array.
|
|
///<
|
|
CBuffer<T*> Items; ///< Element buffer.
|
|
///<
|
|
};
|
|
|
|
/**
|
|
* @brief Sine signal generator class.
|
|
*
|
|
* Class implements sine signal generator without biasing, with
|
|
* constructor-based initalization only. This generator uses oscillator
|
|
* instead of "sin" function.
|
|
*/
|
|
|
|
class CSineGen {
|
|
public:
|
|
/**
|
|
* Constructor initializes *this sine signal generator.
|
|
*
|
|
* @param si Sine function increment, in radians.
|
|
* @param ph Starting phase, in radians. Add 0.5 * AVIR_PI for cosine
|
|
* function.
|
|
*/
|
|
|
|
CSineGen(const double si, const double ph)
|
|
: svalue1(sin(ph)), svalue2(sin(ph - si)), sincr(2.0 * cos(si)) {}
|
|
|
|
/**
|
|
* @return The next value of the sine function, without biasing.
|
|
*/
|
|
|
|
double generate() {
|
|
const double res = svalue1;
|
|
|
|
svalue1 = sincr * res - svalue2;
|
|
svalue2 = res;
|
|
|
|
return (res);
|
|
}
|
|
|
|
private:
|
|
double svalue1; ///< Current sine value.
|
|
///<
|
|
double svalue2; ///< Previous sine value.
|
|
///<
|
|
double sincr; ///< Sine value increment.
|
|
///<
|
|
};
|
|
|
|
/**
|
|
* @brief Peaked Cosine window function generator class.
|
|
*
|
|
* Class implements Peaked Cosine window function generator. Generates the
|
|
* right-handed half of the window function. The Alpha parameter of this
|
|
* window function offers the control of the balance between the early and
|
|
* later taps of the filter. E.g. at Alpha=1 both early and later taps are
|
|
* attenuated, but at Alpha=4 mostly later taps are attenuated. This offers a
|
|
* great control over ringing artifacts produced by a low-pass filter in image
|
|
* processing, without compromising achieved image sharpness.
|
|
*/
|
|
|
|
class CDSPWindowGenPeakedCosine {
|
|
public:
|
|
/**
|
|
* Constructor initializes *this window function generator.
|
|
*
|
|
* @param aAlpha Alpha parameter, affects the peak shape (peak
|
|
* augmentation) of the window function. Should be >= 1.0.
|
|
* @param aLen2 Half filter's length (non-truncated).
|
|
*/
|
|
|
|
CDSPWindowGenPeakedCosine(const double aAlpha, const double aLen2)
|
|
: Alpha(aAlpha),
|
|
Len2(aLen2),
|
|
wn(0),
|
|
w1(AVIR_PId2 / Len2, AVIR_PI * 0.5) {}
|
|
|
|
/**
|
|
* @return The next Peaked Cosine window function coefficient.
|
|
*/
|
|
|
|
double generate() {
|
|
const double h = pow(wn / Len2, Alpha);
|
|
wn++;
|
|
|
|
return (w1.generate() * (1.0 - h));
|
|
}
|
|
|
|
private:
|
|
double Alpha; ///< Alpha parameter, affects the peak shape of window.
|
|
///<
|
|
double Len2; ///< Half length of the window function.
|
|
///<
|
|
int wn; ///< Window function integer position. 0 - center of the
|
|
///< window function.
|
|
///<
|
|
CSineGen w1; ///< Sine-wave generator.
|
|
///<
|
|
};
|
|
|
|
/**
|
|
* @brief FIR filter-based equalizer generator.
|
|
*
|
|
* Class implements an object used to generate symmetric-odd FIR filters with
|
|
* the specified frequency response (aka paragraphic equalizer). The
|
|
* calculated filter is windowed by the Peaked Cosine window function.
|
|
*
|
|
* In image processing, due to short length of filters being used (6-8 taps)
|
|
* the resulting frequency response of the filter is approximate and may be
|
|
* mathematically imperfect, but still adequate to the visual requirements.
|
|
*
|
|
* On a side note, this equalizer generator can be successfully used for audio
|
|
* signal equalization as well: for example, it is used in almost the same
|
|
* form in Voxengo Marvel GEQ equalizer plug-in.
|
|
*
|
|
* Filter generation is based on decomposition of frequency range into
|
|
* spectral bands, with each band represented by linear and ramp "kernels".
|
|
* When the filter is built, these kernels are combined together with
|
|
* different weights that approximate the required frequency response.
|
|
*/
|
|
|
|
class CDSPFIREQ {
|
|
public:
|
|
/**
|
|
* Function initializes *this object with the required parameters. The
|
|
* gain of frequencies beyond the MinFreq..MaxFreq range are controlled by
|
|
* the first and the last band's gain.
|
|
*
|
|
* @param SampleRate Processing sample rate (use 2 for image processing).
|
|
* @param aFilterLength Required filter length in samples (taps). The
|
|
* actual filter length is truncated to an integer value.
|
|
* @param aBandCount Number of band crossover points required to control,
|
|
* including bands at MinFreq and MaxFreq.
|
|
* @param MinFreq Minimal frequency that should be controlled.
|
|
* @param MaxFreq Maximal frequency that should be controlled.
|
|
* @param IsLogBands "True" if the bands should be spaced logarithmically.
|
|
* @param WFAlpha Peaked Cosine window function's Alpha parameter.
|
|
*/
|
|
|
|
void init(const double SampleRate, const double aFilterLength,
|
|
const int aBandCount, const double MinFreq, const double MaxFreq,
|
|
const bool IsLogBands, const double WFAlpha) {
|
|
FilterLength = aFilterLength;
|
|
BandCount = aBandCount;
|
|
|
|
CenterFreqs.alloc(BandCount);
|
|
|
|
z = (int)ceil(FilterLength * 0.5);
|
|
zi = z + (z & 1);
|
|
z2 = z * 2;
|
|
|
|
CBuffer<double> oscbuf(z2);
|
|
initOscBuf(oscbuf);
|
|
|
|
CBuffer<double> winbuf(z);
|
|
initWinBuf(winbuf, WFAlpha);
|
|
|
|
UseFirstVirtBand = (MinFreq > 0.0);
|
|
const int k = zi * (BandCount + (UseFirstVirtBand ? 1 : 0));
|
|
Kernels1.alloc(k);
|
|
Kernels2.alloc(k);
|
|
|
|
double m; // Frequency step multiplier.
|
|
double mo; // Frequency step offset (addition).
|
|
|
|
if (IsLogBands) {
|
|
m = exp(log(MaxFreq / MinFreq) / (BandCount - 1));
|
|
mo = 0.0;
|
|
} else {
|
|
m = 1.0;
|
|
mo = (MaxFreq - MinFreq) / (BandCount - 1);
|
|
}
|
|
|
|
double f = MinFreq;
|
|
double x1 = 0.0;
|
|
double x2;
|
|
int si;
|
|
|
|
if (UseFirstVirtBand) {
|
|
si = 0;
|
|
} else {
|
|
si = 1;
|
|
CenterFreqs[0] = 0.0;
|
|
f = f * m + mo;
|
|
}
|
|
|
|
double* kernbuf1 = &Kernels1[0];
|
|
double* kernbuf2 = &Kernels2[0];
|
|
int i;
|
|
|
|
for (i = si; i < BandCount; i++) {
|
|
x2 = f * 2.0 / SampleRate;
|
|
CenterFreqs[i] = x2;
|
|
|
|
fillBandKernel(x1, x2, kernbuf1, kernbuf2, oscbuf, winbuf);
|
|
|
|
kernbuf1 += zi;
|
|
kernbuf2 += zi;
|
|
x1 = x2;
|
|
f = f * m + mo;
|
|
}
|
|
|
|
if (x1 < 1.0) {
|
|
UseLastVirtBand = true;
|
|
fillBandKernel(x1, 1.0, kernbuf1, kernbuf2, oscbuf, winbuf);
|
|
} else {
|
|
UseLastVirtBand = false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @return Filter's length, in samples (taps).
|
|
*/
|
|
|
|
int getFilterLength() const { return (z2 - 1); }
|
|
|
|
/**
|
|
* @return Filter's latency (group delay), in samples (taps).
|
|
*/
|
|
|
|
int getFilterLatency() const { return (z - 1); }
|
|
|
|
/**
|
|
* Function creates symmetric-odd FIR filter with the specified gain
|
|
* levels at band crossover points.
|
|
*
|
|
* @param BandGains Array of linear gain levels, count=BandCount specified
|
|
* in the init() function.
|
|
* @param[out] Filter Output filter buffer, length = getFilterLength().
|
|
*/
|
|
|
|
void buildFilter(const double* const BandGains, double* const Filter) {
|
|
const double* kernbuf1 = &Kernels1[0];
|
|
const double* kernbuf2 = &Kernels2[0];
|
|
double x1 = 0.0;
|
|
double y1 = BandGains[0];
|
|
double x2;
|
|
double y2;
|
|
|
|
int i;
|
|
int si;
|
|
|
|
if (UseFirstVirtBand) {
|
|
si = 1;
|
|
x2 = CenterFreqs[0];
|
|
y2 = y1;
|
|
} else {
|
|
si = 2;
|
|
x2 = CenterFreqs[1];
|
|
y2 = BandGains[1];
|
|
}
|
|
|
|
copyBandKernel(Filter, kernbuf1, kernbuf2, y1 - y2, x1 * y2 - x2 * y1);
|
|
|
|
kernbuf1 += zi;
|
|
kernbuf2 += zi;
|
|
x1 = x2;
|
|
y1 = y2;
|
|
|
|
for (i = si; i < BandCount; i++) {
|
|
x2 = CenterFreqs[i];
|
|
y2 = BandGains[i];
|
|
|
|
addBandKernel(Filter, kernbuf1, kernbuf2, y1 - y2, x1 * y2 - x2 * y1);
|
|
|
|
kernbuf1 += zi;
|
|
kernbuf2 += zi;
|
|
x1 = x2;
|
|
y1 = y2;
|
|
}
|
|
|
|
if (UseLastVirtBand) {
|
|
addBandKernel(Filter, kernbuf1, kernbuf2, y1 - y2, x1 * y2 - y1);
|
|
}
|
|
|
|
for (i = 0; i < z - 1; i++) {
|
|
Filter[z + i] = Filter[z - 2 - i];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function calculates filter's length (in samples) and latency depending
|
|
* on the required non-truncated filter length.
|
|
*
|
|
* @param aFilterLength Required filter length in samples (non-truncated).
|
|
* @param[out] Latency Resulting latency (group delay) of the filter,
|
|
* in samples (taps).
|
|
* @return Filter length in samples (taps).
|
|
*/
|
|
|
|
static int calcFilterLength(const double aFilterLength, int& Latency) {
|
|
const int l = (int)ceil(aFilterLength * 0.5);
|
|
Latency = l - 1;
|
|
|
|
return (l * 2 - 1);
|
|
}
|
|
|
|
private:
|
|
double FilterLength; ///< Length of filter.
|
|
///<
|
|
int z; ///< Equals (int) ceil( FilterLength * 0.5 ).
|
|
///<
|
|
int zi; ///< Equals "z" if z is even, or z + 1 if z is odd. Used as a
|
|
///< Kernels1 and Kernels2 size multiplier and kernel buffer
|
|
///< increment to make sure each kernel buffer is 16-byte aligned.
|
|
///<
|
|
int z2; ///< Equals z * 2.
|
|
///<
|
|
int BandCount; ///< Number of controllable bands.
|
|
///<
|
|
CBuffer<double> CenterFreqs; ///< Center frequencies for all bands,
|
|
///< normalized to 0.0-1.0 range.
|
|
///<
|
|
CBuffer<double> Kernels1; ///< Half-length kernel buffers for each
|
|
///< spectral band (linear part).
|
|
///<
|
|
CBuffer<double> Kernels2; ///< Half-length kernel buffers for each
|
|
///< spectral band (ramp part).
|
|
///<
|
|
bool UseFirstVirtBand; ///< "True" if the first virtual band
|
|
///< (between 0.0 and MinFreq) should be used. The
|
|
///< first virtual band won't be used if MinFreq
|
|
///< equals 0.0.
|
|
///<
|
|
bool UseLastVirtBand; ///< "True" if the last virtual band (between
|
|
///< MaxFreq and SampleRate * 0.5) should be used. The
|
|
///< last virtual band won't be used if MaxFreq * 2.0
|
|
///< equals SampleRate.
|
|
///<
|
|
|
|
/**
|
|
* Function initializes the "oscbuf" used in the fillBandKernel()
|
|
* function.
|
|
*
|
|
* @param oscbuf Oscillator buffer, length = z * 2.
|
|
*/
|
|
|
|
void initOscBuf(double* oscbuf) const {
|
|
int i = z;
|
|
|
|
while (i > 0) {
|
|
oscbuf[0] = 0.0;
|
|
oscbuf[1] = 1.0;
|
|
oscbuf += 2;
|
|
i--;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function initializes window function buffer. This function generates
|
|
* Peaked Cosine window function.
|
|
*
|
|
* @param winbuf Windowing buffer.
|
|
* @param Alpha Peaked Cosine alpha parameter.
|
|
*/
|
|
|
|
void initWinBuf(double* winbuf, const double Alpha) const {
|
|
CDSPWindowGenPeakedCosine wf(Alpha, FilterLength * 0.5);
|
|
int i;
|
|
|
|
for (i = 1; i <= z; i++) {
|
|
winbuf[z - i] = wf.generate();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function fills first half of symmetric-odd FIR kernel for the band.
|
|
* This function should be called successively for adjacent bands.
|
|
* Previous band's x2 should be equal to current band's x1. A band kernel
|
|
* consists of 2 elements: linear kernel and ramp kernel.
|
|
*
|
|
* @param x1 Band's left corner frequency (0..1).
|
|
* @param x2 Band's right corner frequency (0..1).
|
|
* @param kernbuf1 Band kernel buffer 1 (linear part), length = z.
|
|
* @param kernbuf2 Band kernel buffer 2 (ramp part), length = z.
|
|
* @param oscbuf Oscillation buffer. Before the first call of the
|
|
* fillBandKernel() should be initialized with the call of the
|
|
* initOscBuf() function.
|
|
* @param winbuf Buffer that contains windowing function.
|
|
*/
|
|
|
|
void fillBandKernel(const double x1, const double x2, double* kernbuf1,
|
|
double* kernbuf2, double* oscbuf,
|
|
const double* const winbuf) {
|
|
const double s2_incr = AVIR_PI * x2;
|
|
const double s2_coeff = 2.0 * cos(s2_incr);
|
|
|
|
double s2_value1 = sin(s2_incr * (-z + 1));
|
|
double c2_value1 = sin(s2_incr * (-z + 1) + AVIR_PI * 0.5);
|
|
oscbuf[0] = sin(s2_incr * -z);
|
|
oscbuf[1] = sin(s2_incr * -z + AVIR_PI * 0.5);
|
|
|
|
int ks;
|
|
|
|
for (ks = 1; ks < z; ks++) {
|
|
const int ks2 = ks * 2;
|
|
const double s1_value1 = oscbuf[ks2];
|
|
const double c1_value1 = oscbuf[ks2 + 1];
|
|
oscbuf[ks2] = s2_value1;
|
|
oscbuf[ks2 + 1] = c2_value1;
|
|
|
|
const double x = AVIR_PI * (ks - z);
|
|
const double v0 = winbuf[ks - 1] / ((x1 - x2) * x);
|
|
|
|
kernbuf1[ks - 1] =
|
|
(x2 * s2_value1 - x1 * s1_value1 + (c2_value1 - c1_value1) / x) * v0;
|
|
|
|
kernbuf2[ks - 1] = (s2_value1 - s1_value1) * v0;
|
|
|
|
s2_value1 = s2_coeff * s2_value1 - oscbuf[ks2 - 2];
|
|
c2_value1 = s2_coeff * c2_value1 - oscbuf[ks2 - 1];
|
|
}
|
|
|
|
kernbuf1[z - 1] = (x2 * x2 - x1 * x1) / (x1 - x2) * 0.5;
|
|
kernbuf2[z - 1] = -1.0;
|
|
}
|
|
|
|
/**
|
|
* Function copies band kernel's elements to the output buffer.
|
|
*
|
|
* @param outbuf Output buffer.
|
|
* @param kernbuf1 Kernel buffer 1 (linear part).
|
|
* @param kernbuf2 Kernel buffer 2 (ramp part).
|
|
* @param c Multiplier for linear kernel element.
|
|
* @param d Multiplier for ramp kernel element.
|
|
*/
|
|
|
|
void copyBandKernel(double* outbuf, const double* const kernbuf1,
|
|
const double* const kernbuf2, const double c,
|
|
const double d) const {
|
|
int ks;
|
|
|
|
for (ks = 0; ks < z; ks++) {
|
|
outbuf[ks] = c * kernbuf1[ks] + d * kernbuf2[ks];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function adds band kernel's elements to the output buffer.
|
|
*
|
|
* @param outbuf Output buffer.
|
|
* @param kernbuf1 Kernel buffer 1 (linear part).
|
|
* @param kernbuf2 Kernel buffer 2 (ramp part).
|
|
* @param c Multiplier for linear kernel element.
|
|
* @param d Multiplier for ramp kernel element.
|
|
*/
|
|
|
|
void addBandKernel(double* outbuf, const double* const kernbuf1,
|
|
const double* const kernbuf2, const double c,
|
|
const double d) const {
|
|
int ks;
|
|
|
|
for (ks = 0; ks < z; ks++) {
|
|
outbuf[ks] += c * kernbuf1[ks] + d * kernbuf2[ks];
|
|
}
|
|
}
|
|
};
|
|
|
|
/**
|
|
* @brief Low-pass filter windowed by Peaked Cosine window function.
|
|
*
|
|
* This class implements calculation of linear-phase symmetric-odd FIR
|
|
* low-pass filter windowed by the Peaked Cosine window function, for image
|
|
* processing applications.
|
|
*/
|
|
|
|
class CDSPPeakedCosineLPF {
|
|
public:
|
|
int fl2; ///< Half filter's length, excluding the peak value. This value
|
|
///< can be also used as filter's latency in samples (taps).
|
|
///<
|
|
int FilterLen; ///< Filter's length in samples (taps).
|
|
///<
|
|
|
|
/**
|
|
* Constructor initalizes *this object.
|
|
*
|
|
* @param aLen2 Half-length (non-truncated) of low-pass filter, in samples
|
|
* (taps).
|
|
* @param aFreq2 Low-pass filter's corner frequency [0; pi].
|
|
* @param aAlpha Peaked Cosine window function Alpha parameter.
|
|
*/
|
|
|
|
CDSPPeakedCosineLPF(const double aLen2, const double aFreq2,
|
|
const double aAlpha)
|
|
: fl2((int)ceil(aLen2) - 1),
|
|
FilterLen(fl2 + fl2 + 1),
|
|
Len2(aLen2),
|
|
Freq2(aFreq2),
|
|
Alpha(aAlpha) {}
|
|
|
|
/**
|
|
* Function generates a linear-phase low-pass filter windowed by Peaked
|
|
* Cosine window function.
|
|
*
|
|
* @param[out] op Output buffer, length = FilterLen (fl2 * 2 + 1).
|
|
* @param DCGain Required gain at DC. The resulting filter will be
|
|
* normalized to achieve this DC gain.
|
|
*/
|
|
|
|
template <class T>
|
|
void generateLPF(T* op, const double DCGain) {
|
|
CDSPWindowGenPeakedCosine wf(Alpha, Len2);
|
|
CSineGen f2(Freq2, 0.0);
|
|
|
|
op += fl2;
|
|
T* op2 = op;
|
|
f2.generate();
|
|
int t = 1;
|
|
|
|
*op = (T)(Freq2 * wf.generate() / AVIR_PI);
|
|
double s = *op;
|
|
|
|
while (t <= fl2) {
|
|
const double v = f2.generate() * wf.generate() / t / AVIR_PI;
|
|
op++;
|
|
op2--;
|
|
*op = (T)v;
|
|
*op2 = (T)v;
|
|
s += *op + *op2;
|
|
t++;
|
|
}
|
|
|
|
t = FilterLen;
|
|
s = DCGain / s;
|
|
|
|
while (t > 0) {
|
|
*op2 = (T)(*op2 * s);
|
|
op2++;
|
|
t--;
|
|
}
|
|
}
|
|
|
|
private:
|
|
double Len2; ///< Half-length (non-truncated) of low-pass filter, in
|
|
///< samples (taps).
|
|
///<
|
|
double Freq2; ///< Low-pass filter's corner frequency.
|
|
///<
|
|
double Alpha; ///< Peaked Cosine window function Alpha parameter.
|
|
///<
|
|
};
|
|
|
|
/**
|
|
* @brief Buffer class for parametrized low-pass filter.
|
|
*
|
|
* This class extends the CBuffer< double > class by adding several variables
|
|
* that define a symmetric-odd FIR low-pass filter windowed by Peaked Cosine
|
|
* window function. This class can be used to compare filters without
|
|
* comparing their buffer contents.
|
|
*/
|
|
|
|
class CFltBuffer : public CBuffer<double> {
|
|
public:
|
|
double Len2; ///< Half-length (non-truncated) of low-pass filters, in
|
|
///< samples (taps).
|
|
///<
|
|
double Freq; ///< Low-pass filter's corner frequency.
|
|
///<
|
|
double Alpha; ///< Peaked Cosine window function Alpha parameter.
|
|
///<
|
|
double DCGain; ///< DC gain applied to the filter.
|
|
///<
|
|
|
|
CFltBuffer()
|
|
: CBuffer<double>(), Len2(0.0), Freq(0.0), Alpha(0.0), DCGain(0.0) {}
|
|
|
|
/**
|
|
* @param b2 Filter buffer to compare *this object to.
|
|
* @return Operator returns "true" if both filters have same parameters.
|
|
*/
|
|
|
|
bool operator==(const CFltBuffer& b2) const {
|
|
return (Len2 == b2.Len2 && Freq == b2.Freq && Alpha == b2.Alpha &&
|
|
DCGain == b2.DCGain);
|
|
}
|
|
};
|
|
|
|
/**
|
|
* @brief Sinc function-based fractional delay filter bank.
|
|
*
|
|
* Class implements storage and initialization of a bank of sinc
|
|
* function-based fractional delay filters, expressed as 1st order polynomial
|
|
* interpolation coefficients. The filters are produced from a single "long"
|
|
* windowed low-pass filter. Also supports 0th-order ("nearest neighbor")
|
|
* interpolation.
|
|
*
|
|
* This class also supports multiplication of each fractional delay filter by
|
|
* an external filter (usually a low-pass filter).
|
|
*
|
|
* @tparam fptype Specifies storage type of the filter coefficients bank. The
|
|
* filters are initially calculated using the "double" precision.
|
|
*/
|
|
|
|
template <class fptype>
|
|
class CDSPFracFilterBankLin {
|
|
public:
|
|
CDSPFracFilterBankLin() : Order(-1) {}
|
|
|
|
/**
|
|
* Copy constructor copies a limited set of parameters of the source
|
|
* filter bank. The actual filters are not copied. Such copying is used
|
|
* during filtering steps "modeling" stage. A further init() function
|
|
* call is required.
|
|
*
|
|
* @param s Source filter bank.
|
|
*/
|
|
|
|
void copyInitParams(const CDSPFracFilterBankLin& s) {
|
|
WFLen2 = s.WFLen2;
|
|
WFFreq = s.WFFreq;
|
|
WFAlpha = s.WFAlpha;
|
|
FracCount = s.FracCount;
|
|
Order = s.Order;
|
|
Alignment = s.Alignment;
|
|
SrcFilterLen = s.SrcFilterLen;
|
|
FilterLen = s.FilterLen;
|
|
FilterSize = s.FilterSize;
|
|
IsSrcTableBuilt = false;
|
|
ExtFilter = s.ExtFilter;
|
|
TableFillFlags.alloc(s.TableFillFlags.getCapacity());
|
|
int i;
|
|
|
|
// Copy table fill flags, but shifted so that further initialization
|
|
// is still possible (such feature should not be used, though).
|
|
|
|
for (i = 0; i < TableFillFlags.getCapacity(); i++) {
|
|
TableFillFlags[i] = (uint8_t)(s.TableFillFlags[i] << 2);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Operator compares *this filter bank and another filter bank and returns
|
|
* "true" if their parameters are equal. Alignment is not taken into
|
|
* account.
|
|
*
|
|
* @param s Filter bank to compare to.
|
|
* @return "True" if compared banks have equal parameters.
|
|
*/
|
|
|
|
bool operator==(const CDSPFracFilterBankLin& s) const {
|
|
return (Order == s.Order && WFLen2 == s.WFLen2 && WFFreq == s.WFFreq &&
|
|
WFAlpha == s.WFAlpha && FracCount == s.FracCount &&
|
|
ExtFilter == s.ExtFilter);
|
|
}
|
|
|
|
/**
|
|
* Function initializes (builds) the filter bank based on the supplied
|
|
* parameters. If the supplied parameters are equal to previously defined
|
|
* parameters, function does nothing (alignment is assumed to be never
|
|
* changing between the init() function calls).
|
|
*
|
|
* @param ReqFracCount Required number of fractional delays in the filter
|
|
* bank. The minimal value is 2.
|
|
* @param ReqOrder Required order of the interpolation polynomial
|
|
* (0 or 1).
|
|
* @param BaseLen Low-pass filter's base length, in samples (taps).
|
|
* Affects the actual length of the filter and its overall steepness.
|
|
* @param Cutoff Low-pass filter's normalized cutoff frequency [0; 1].
|
|
* @param aWFAlpha Peaked Cosine window function's Alpha parameter.
|
|
* @param aExtFilter External filter to apply to each fractional delay
|
|
* filter.
|
|
* @param aAlignment Memory alignment of the filter bank, power-of-2
|
|
* value. 0 - use default stdlib alignment.
|
|
* @param FltLenAlign Filter's length alignment, power-of-2 value.
|
|
*/
|
|
|
|
void init(const int ReqFracCount, const int ReqOrder, const double BaseLen,
|
|
const double Cutoff, const double aWFAlpha,
|
|
const CFltBuffer& aExtFilter, const int aAlignment = 0,
|
|
const int FltLenAlign = 1) {
|
|
double NewWFLen2 = 0.5 * BaseLen * ReqFracCount;
|
|
double NewWFFreq = AVIR_PI * Cutoff / ReqFracCount;
|
|
double NewWFAlpha = aWFAlpha;
|
|
|
|
if (ReqOrder == Order && NewWFLen2 == WFLen2 && NewWFFreq == WFFreq &&
|
|
NewWFAlpha == WFAlpha && ReqFracCount == FracCount &&
|
|
aExtFilter == ExtFilter) {
|
|
IsInitRequired = false;
|
|
return;
|
|
}
|
|
|
|
WFLen2 = NewWFLen2;
|
|
WFFreq = NewWFFreq;
|
|
WFAlpha = NewWFAlpha;
|
|
FracCount = ReqFracCount;
|
|
Order = ReqOrder;
|
|
Alignment = aAlignment;
|
|
ExtFilter = aExtFilter;
|
|
|
|
CDSPPeakedCosineLPF p(WFLen2, WFFreq, WFAlpha);
|
|
SrcFilterLen = (p.fl2 / ReqFracCount + 1) * 2;
|
|
|
|
const int ElementSize = ReqOrder + 1;
|
|
FilterLen = SrcFilterLen;
|
|
|
|
if (ExtFilter.getCapacity() > 0) {
|
|
FilterLen += ExtFilter.getCapacity() - 1;
|
|
}
|
|
|
|
FilterLen = (FilterLen + FltLenAlign - 1) & ~(FltLenAlign - 1);
|
|
FilterSize = FilterLen * ElementSize;
|
|
IsSrcTableBuilt = false;
|
|
IsInitRequired = true;
|
|
}
|
|
|
|
/**
|
|
* @return The length of each fractional delay filter, in samples (taps).
|
|
* Always an even value.
|
|
*/
|
|
|
|
int getFilterLen() const { return (FilterLen); }
|
|
|
|
/**
|
|
* @return The number of fractional filters in use by *this bank.
|
|
*/
|
|
|
|
int getFracCount() const { return (FracCount); }
|
|
|
|
/**
|
|
* @return The order of the interpolation polynomial.
|
|
*/
|
|
|
|
int getOrder() const { return (Order); }
|
|
|
|
/**
|
|
* Function returns the pointer to the specified interpolation table
|
|
* filter.
|
|
*
|
|
* @param i Filter (fractional delay) index, in the range 0 to
|
|
* ReqFracCount - 1, inclusive.
|
|
* @return Pointer to filter. Higher order polynomial coefficients are
|
|
* stored after after previous order coefficients, separated by FilterLen
|
|
* elements.
|
|
*/
|
|
|
|
const fptype* getFilter(const int i) {
|
|
if (!IsSrcTableBuilt) {
|
|
buildSrcTable();
|
|
}
|
|
|
|
fptype* const Res = &Table[i * FilterSize];
|
|
|
|
if ((TableFillFlags[i] & 2) == 0) {
|
|
createFilter(i);
|
|
TableFillFlags[i] |= 2;
|
|
|
|
if (Order > 0) {
|
|
createFilter(i + 1);
|
|
const fptype* const Res2 = Res + FilterSize;
|
|
fptype* const op = Res + FilterLen;
|
|
int j;
|
|
|
|
// Create higher-order interpolation coefficients (linear
|
|
// interpolation).
|
|
|
|
for (j = 0; j < FilterLen; j++) {
|
|
op[j] = Res2[j] - Res[j];
|
|
}
|
|
}
|
|
}
|
|
|
|
return (Res);
|
|
}
|
|
|
|
/**
|
|
* Function makes sure all fractional delay filters were created.
|
|
*/
|
|
|
|
void createAllFilters() {
|
|
int i;
|
|
|
|
for (i = 0; i < FracCount; i++) {
|
|
getFilter(i);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function returns an approximate initialization complexity, expressed in
|
|
* the number of multiply-add operations. This includes fractional delay
|
|
* filters calculation and multiplication by an external filter. This
|
|
* function can only be called after the init() function.
|
|
*
|
|
* @param FracUseMap Fractional delays use map, each element corresponds
|
|
* to a single fractional delay, will be compared to the internal table
|
|
* fill flags. This map should include 0 and 1 values only.
|
|
* @return The complexity of the initialization, expressed in the number
|
|
* of multiply-add operations.
|
|
*/
|
|
|
|
int calcInitComplexity(const CBuffer<uint8_t>& FracUseMap) const {
|
|
const int FltInitCost = 65; // Cost to initialize a single sample
|
|
// of the fractional delay filter.
|
|
const int FltUseCost =
|
|
FilterLen * Order +
|
|
SrcFilterLen * ExtFilter.getCapacity(); // Cost to use a single
|
|
// fractional delay filter.
|
|
const int ucb[2] = {0, FltUseCost};
|
|
int ic;
|
|
int i;
|
|
|
|
if (IsInitRequired) {
|
|
ic = FracCount * SrcFilterLen * FltInitCost;
|
|
|
|
for (i = 0; i < FracCount; i++) {
|
|
ic += ucb[FracUseMap[i]];
|
|
}
|
|
} else {
|
|
ic = 0;
|
|
|
|
for (i = 0; i < FracCount; i++) {
|
|
if (FracUseMap[i] != 0) {
|
|
ic += ucb[TableFillFlags[i] == 0 ? 1 : 0];
|
|
}
|
|
}
|
|
}
|
|
|
|
return (ic);
|
|
}
|
|
|
|
private:
|
|
static const int InterpPoints = 2; ///< The maximal number of points the
|
|
///< interpolation is based on.
|
|
///<
|
|
double WFLen2; ///< Window function's Len2 parameter.
|
|
///<
|
|
double WFFreq; ///< Window function's Freq parameter.
|
|
///<
|
|
double WFAlpha; ///< Window function's Alpha parameter.
|
|
///<
|
|
int FracCount; ///< The required number of fractional delay filters.
|
|
///<
|
|
int Order; ///< The order of the interpolation polynomial.
|
|
///<
|
|
int Alignment; ///< The required filter table alignment.
|
|
///<
|
|
int SrcFilterLen; ///< Length of the "source" filters. This is always an
|
|
///< even value.
|
|
///<
|
|
int FilterLen; ///< Specifies the number of samples (taps) each fractional
|
|
///< delay filter has. This is always an even value, adjusted
|
|
///< by the FltLenAlign.
|
|
///<
|
|
int FilterSize; ///< The size of a single filter element, equals
|
|
///< FilterLen * ElementSize.
|
|
///<
|
|
bool IsInitRequired; ///< "True" if SrcTable filter table initialization
|
|
///< is required. This value is available only after the
|
|
///< call to the init() function.
|
|
///<
|
|
CBuffer<fptype> Table; ///< Interpolation table, size equals to
|
|
///< ReqFracCount * FilterLen * ElementSize.
|
|
///<
|
|
CBuffer<uint8_t>
|
|
TableFillFlags; ///< Contains ReqFracCount + 1
|
|
///< elements. Bit 0 of every element is 1 if Table
|
|
///< already contains the filter from SrcTable filtered
|
|
///< by ExtFilter. Bit 1 of every element means higher
|
|
///< order coefficients were filled for the filter.
|
|
///<
|
|
CFltBuffer ExtFilter; ///< External filter that should be applied to every
|
|
///< fractional delay filter. Can be empty. Half of
|
|
///< this filter's capacity is used as latency (group
|
|
///< delay) value of the filter.
|
|
///<
|
|
CBuffer<double> SrcTable; ///< Source table of delay filters, contains
|
|
///< ReqFracCount + 1 elements. This table is used
|
|
///< to fill the Table with the actual filters,
|
|
///< filtered by an external filter.
|
|
///<
|
|
bool IsSrcTableBuilt; ///< "True" if the SrcTable was built already. This
|
|
///< variable is set to "false" in the init() function.
|
|
///<
|
|
|
|
/**
|
|
* Function builds source table used in the createFilter() function.
|
|
*/
|
|
|
|
void buildSrcTable() {
|
|
IsSrcTableBuilt = true;
|
|
IsInitRequired = false;
|
|
|
|
CDSPPeakedCosineLPF p(WFLen2, WFFreq, WFAlpha);
|
|
|
|
const int BufLen = SrcFilterLen * FracCount + InterpPoints - 1;
|
|
const int BufOffs = InterpPoints / 2 - 1;
|
|
const int BufCenter = SrcFilterLen * FracCount / 2 + BufOffs;
|
|
|
|
CBuffer<double> Buf(BufLen);
|
|
memset(Buf, 0, (BufCenter - p.fl2) * sizeof(double));
|
|
int i = BufLen - BufCenter - p.fl2 - 1;
|
|
memset(&Buf[BufLen - i], 0, i * sizeof(double));
|
|
|
|
p.generateLPF(&Buf[BufCenter - p.fl2], FracCount);
|
|
abort();
|
|
|
|
SrcTable.alloc((FracCount + 1) * SrcFilterLen);
|
|
TableFillFlags.alloc(FracCount + 1);
|
|
int j;
|
|
double* op0 = SrcTable;
|
|
|
|
for (i = FracCount; i >= 0; i--) {
|
|
TableFillFlags[i] = 0;
|
|
double* p = Buf + BufOffs + i;
|
|
|
|
for (j = 0; j < SrcFilterLen; j++) {
|
|
op0[0] = p[0];
|
|
op0++;
|
|
p += FracCount;
|
|
}
|
|
}
|
|
|
|
Table.alloc((FracCount + 1) * FilterSize, Alignment);
|
|
}
|
|
|
|
/**
|
|
* Function creates the specified filter in the Table by copying it from
|
|
* the SrcTable and filtering by ExtFilter. Function does nothing if
|
|
* filter was already created.
|
|
*
|
|
* @param k Filter index to create, in the range 0 to FracCount,
|
|
* inclusive.
|
|
*/
|
|
|
|
void createFilter(const int k) {
|
|
if (TableFillFlags[k] != 0) {
|
|
return;
|
|
}
|
|
|
|
TableFillFlags[k] |= 1;
|
|
const int ExtFilterLatency = ExtFilter.getCapacity() / 2;
|
|
const int ResLatency = ExtFilterLatency + SrcFilterLen / 2;
|
|
int ResLen = SrcFilterLen;
|
|
|
|
if (ExtFilter.getCapacity() > 0) {
|
|
ResLen += ExtFilter.getCapacity() - 1;
|
|
}
|
|
|
|
const int ResOffs = FilterLen / 2 - ResLatency;
|
|
fptype* op = &Table[k * FilterSize];
|
|
int i;
|
|
|
|
for (i = 0; i < ResOffs; i++) {
|
|
op[i] = 0.0;
|
|
}
|
|
|
|
for (i = ResOffs + ResLen; i < FilterLen; i++) {
|
|
op[i] = 0.0;
|
|
}
|
|
|
|
op += ResOffs;
|
|
const double* const srcflt = &SrcTable[k * SrcFilterLen];
|
|
|
|
if (ExtFilter.getCapacity() == 0) {
|
|
for (i = 0; i < ResLen; i++) {
|
|
op[i] = (fptype)srcflt[i];
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
// Perform convolution of extflt and srcflt.
|
|
|
|
const double* const extflt = &ExtFilter[0];
|
|
int j;
|
|
|
|
for (j = 0; j < ResLen; j++) {
|
|
int k = 0;
|
|
int l = j - ExtFilter.getCapacity() + 1;
|
|
int r = l + ExtFilter.getCapacity();
|
|
|
|
if (l < 0) {
|
|
k -= l;
|
|
l = 0;
|
|
}
|
|
|
|
if (r > SrcFilterLen) {
|
|
r = SrcFilterLen;
|
|
}
|
|
|
|
const double* const extfltb = extflt + k;
|
|
const double* const srcfltb = srcflt + l;
|
|
double s = 0.0;
|
|
l = r - l;
|
|
|
|
for (i = 0; i < l; i++) {
|
|
s += extfltb[i] * srcfltb[i];
|
|
}
|
|
|
|
op[j] = (fptype)s;
|
|
}
|
|
}
|
|
};
|
|
|
|
/**
|
|
* @brief Thread pool for multi-threaded image resizing operation.
|
|
*
|
|
* This base class is used to organize a multi-threaded image resizing
|
|
* operation. The thread pool should consist of threads that initially wait
|
|
* for a signal. Upon receiving a signal (via the startAllWorkloads()
|
|
* function) each previously added thread should execute its workload's
|
|
* process() function once, and return to the wait signal state again. The
|
|
* thread pool should be also able to efficiently wait for all workloads to
|
|
* finish via the waitAllWorkloadsToFinish() function.
|
|
*
|
|
* The image resizing algorithm makes calls to functions of this class.
|
|
*/
|
|
|
|
class CImageResizerThreadPool {
|
|
public:
|
|
CImageResizerThreadPool() {}
|
|
|
|
virtual ~CImageResizerThreadPool() {}
|
|
|
|
/**
|
|
* @brief Thread pool's workload object class.
|
|
*
|
|
* This class should be used as a base class for objects that perform the
|
|
* actual work spread over several threads.
|
|
*/
|
|
|
|
class CWorkload {
|
|
public:
|
|
virtual ~CWorkload() {}
|
|
|
|
/**
|
|
* Function that gets called from the thread when thread pool's
|
|
* startAllWorkloads() function is called.
|
|
*/
|
|
|
|
virtual void process() = 0;
|
|
};
|
|
|
|
/**
|
|
* @return The suggested number of workloads (and their associated
|
|
* threads) to add. The minimal value this function can return is 1. The
|
|
* usual value may depend on the number of physical and virtual cores
|
|
* present in the system, and on other considerations.
|
|
*/
|
|
|
|
virtual int getSuggestedWorkloadCount() const { return (1); }
|
|
|
|
/**
|
|
* Function adds a new workload (and possibly thread) to the thread pool.
|
|
* The caller decides how many parallel workloads (and threads) it
|
|
* requires, but this number will not exceed the value returned by the
|
|
* getSuggestedWorkloadCount() function. It is implementation-specific how
|
|
* many workloads to associate with a single thread. But for efficiency
|
|
* reasons each workload should be associated with its own thread.
|
|
*
|
|
* Note that the same set of workload objects will be processed each time
|
|
* the startAllWorkloads() function is called. This means that workload
|
|
* objects are added only once. The caller changes the state of the
|
|
* workload objects and then calls the startAllWorkloads() function to
|
|
* process them.
|
|
*
|
|
* @param Workload Workload object whose process() function will be called
|
|
* from within the thread when the startAllWorkloads() function is called.
|
|
*/
|
|
|
|
virtual void addWorkload(CWorkload* const Workload) {}
|
|
|
|
/**
|
|
* Function starts all workloads associated with threads previously added
|
|
* via the addWorkload() function. It is assumed that this function
|
|
* performs the necessary "memory barrier" (or "cache sync") kind of
|
|
* operation so that all threads catch up the prior changes made to the
|
|
* workload objects during their wait state.
|
|
*/
|
|
|
|
virtual void startAllWorkloads() {}
|
|
|
|
/**
|
|
* Function waits for all workloads to finish.
|
|
*/
|
|
|
|
virtual void waitAllWorkloadsToFinish() {}
|
|
|
|
/**
|
|
* Function removes all workloads previously added via the addWorkload()
|
|
* function. This function gets called only after the
|
|
* waitAllWorkloadsToFinish() function call.
|
|
*/
|
|
|
|
virtual void removeAllWorkloads() {}
|
|
};
|
|
|
|
/**
|
|
* @brief Resizing algorithm parameters structure.
|
|
*
|
|
* This structure holds all selectable parameters used by the resizing
|
|
* algorithm at various stages, for both downsizing and upsizing. There are no
|
|
* other parameters exist that can optimize the performance of the resizing
|
|
* algorithm. Filter length parameters can take fractional values.
|
|
*
|
|
* Beside quality, these parameters (except Alpha parameters) directly affect
|
|
* the computative cost of the resizing algorithm. It is possible to trade
|
|
* the visual quality for computative cost.
|
|
*
|
|
* Anti-alias filtering during downsizing can be defined as a considerable
|
|
* reduction of contrast of smallest features of an image. Unfortunately, such
|
|
* de-contrasting partially affects features of all sizes thus producing a
|
|
* non-linearity of frequency response. All pre-defined parameter sets are
|
|
* described by 3 values separated by slashes. The first value is the
|
|
* de-contrasting factor of small features (which are being removed) while
|
|
* the second value is the de-contrasting factor of large features (which
|
|
* should remain intact), with value of 1 equating to "no contrast change".
|
|
* The third value is the optimization score (see below), with value of 0
|
|
* equating to the "perfect" linearity of frequency response.
|
|
*
|
|
* The pre-defined parameter sets offered by this library were auto-optimized
|
|
* for the given LPFltBaseLen, IntFltLen and CorrFltAlpha values. The
|
|
* optimization goal was to minimize the score: the sum of squares of the
|
|
* difference between original and processed images (which was not actually
|
|
* resized, k=1). The original image was a 0.5 megapixel uniformly-distributed
|
|
* white-noise image with pixel intensities in the 0-1 range. Such goal
|
|
* converges very well and produces filtering system with the flattest
|
|
* frequency response possible for the given constraints. With this goal,
|
|
* increasing the LPFltBaseLen value reduces the general amount of aliasing
|
|
* artifacts.
|
|
*/
|
|
|
|
struct CImageResizerParams {
|
|
double CorrFltAlpha; ///< Alpha parameter of the Peaked Cosine window
|
|
///< function used on the correction filter. The
|
|
///< "usable" values are in the narrow range 1.0 to 1.5.
|
|
///<
|
|
double CorrFltLen; ///< Correction filter's length in samples (taps). The
|
|
///< "usable" range is narrow, 5.5 to 8, as to minimize
|
|
///< the "overcorrection" which is mathematically precise,
|
|
///< but visually unacceptable.
|
|
///<
|
|
double IntFltAlpha; ///< Alpha parameter of the Peaked Cosine window
|
|
///< function used on the interpolation low-pass filter.
|
|
///< The "usable" values are in the range 1.5 to 2.5.
|
|
///<
|
|
double IntFltCutoff; ///< Interpolation low-pass filter's cutoff frequency
|
|
///< (normalized, [0; 1]). The "usable" range is 0.6 to
|
|
///< 0.8.
|
|
///<
|
|
double IntFltLen; ///< Interpolation low-pass filter's length in samples
|
|
///< (taps). The length value should be at least 18 or
|
|
///< otherwise a "dark grid" artifact will be introduced if
|
|
///< a further sharpening is applied. IntFltLen together
|
|
///< with other IntFlt parameters should be tuned in a way
|
|
///< that produces the flattest frequency response in 0-0.5
|
|
///< normalized frequency range (this range is due to 2X
|
|
///< upsampling).
|
|
///<
|
|
double LPFltAlpha; ///< Alpha parameter of the Peaked Cosine window
|
|
///< function used on the low-pass filter. The "usable"
|
|
///< values are in the range 1.5 to 6.5.
|
|
///<
|
|
double LPFltBaseLen; ///< Base length of the low-pass (aka anti-aliasing
|
|
///< or reconstruction) filter, in samples (taps),
|
|
///< further adjusted by the actual cutoff frequency,
|
|
///< upsampling and downsampling factors. The "usable"
|
|
///< range is between 6 and 9.
|
|
///<
|
|
double LPFltCutoffMult; ///< Low-pass filter's cutoff frequency
|
|
///< multiplier. This value can be both below and
|
|
///< above 1.0 as low-pass filters are inserted on
|
|
///< downsampling and upsampling steps and always
|
|
///< have corner frequency equal to or below 0.5pi.
|
|
///< This multiplier shifts low-pass filter's corner
|
|
///< frequency towards lower (if below 1.0) or higher
|
|
///< (if above 1.0) frequencies. This multiplier can
|
|
///< be way below 1.0 since any additional
|
|
///< high-frequency damping will be partially
|
|
///< corrected by the correction filter. The "usable"
|
|
///< range is 0.3 to 1.0.
|
|
///<
|
|
|
|
CImageResizerParams()
|
|
: HBFltAlpha(1.75395), HBFltCutoff(0.40356), HBFltLen(22.00000) {}
|
|
|
|
double HBFltAlpha; ///< Half-band filter's Alpha. Assigned internally.
|
|
///<
|
|
double HBFltCutoff; ///< Half-band filter's cutoff point [0; 1]. Assigned
|
|
///< internally.
|
|
///<
|
|
double HBFltLen; ///< Length of the half-band low-pass filter. Assigned
|
|
///< internally. Internally used to perform 2X or higher
|
|
///< downsampling. These filter parameters should be treated
|
|
///< as "technical" and do not require adjustment as they
|
|
///< were tuned to suit all combinations of other
|
|
///< parameters. This half-band filter provides a wide
|
|
///< transition band (for minimal ringing artifacts) and a
|
|
///< high stop-band attenuation (for minimal aliasing).
|
|
///<
|
|
};
|
|
|
|
/**
|
|
* @brief The default set of resizing algorithm parameters
|
|
* (10.01/1.029/0.019169).
|
|
*
|
|
* This is the default set of resizing parameters that was designed to deliver
|
|
* a sharp image while still providing a low amount of ringing artifacts, and
|
|
* having a reasonable computational cost.
|
|
*/
|
|
|
|
struct CImageResizerParamsDef : public CImageResizerParams {
|
|
CImageResizerParamsDef() {
|
|
CorrFltAlpha = 1.0; // 10.01/1.88/1.029(522.43)/0.019169:258648,446808
|
|
CorrFltLen = 6.30770;
|
|
IntFltAlpha = 2.27825;
|
|
IntFltCutoff = 0.75493;
|
|
IntFltLen = 18.0;
|
|
LPFltAlpha = 3.40127;
|
|
LPFltBaseLen = 7.78;
|
|
LPFltCutoffMult = 0.78797;
|
|
}
|
|
};
|
|
|
|
/**
|
|
* @brief Set of resizing algorithm parameters for ultra-low-ringing
|
|
* performance (7.69/1.069/0.000245).
|
|
*
|
|
* This set of resizing algorithm parameters offers the lowest amount of
|
|
* ringing this library is capable of providing while still offering a decent
|
|
* quality. Low ringing is attained at the expense of higher aliasing
|
|
* artifacts and a slightly reduced contrast.
|
|
*/
|
|
|
|
struct CImageResizerParamsULR : public CImageResizerParams {
|
|
CImageResizerParamsULR() {
|
|
CorrFltAlpha = 1.0; // 7.69/1.97/1.069(31445.45)/0.000245:258627,436845
|
|
CorrFltLen = 5.83280;
|
|
IntFltAlpha = 2.11453;
|
|
IntFltCutoff = 0.73986;
|
|
IntFltLen = 18.0;
|
|
LPFltAlpha = 1.73455;
|
|
LPFltBaseLen = 6.40;
|
|
LPFltCutoffMult = 0.61314;
|
|
}
|
|
};
|
|
|
|
/**
|
|
* @brief Set of resizing algorithm parameters for low-ringing performance
|
|
* (7.86/1.065/0.000106).
|
|
*
|
|
* This set of resizing algorithm parameters offers a very low-ringing
|
|
* performance at the expense of higher aliasing artifacts and a slightly
|
|
* reduced contrast.
|
|
*/
|
|
|
|
struct CImageResizerParamsLR : public CImageResizerParams {
|
|
CImageResizerParamsLR() {
|
|
CorrFltAlpha = 1.0; // 7.86/1.96/1.065(73865.02)/0.000106:258636,437381
|
|
CorrFltLen = 5.87671;
|
|
IntFltAlpha = 2.25322;
|
|
IntFltCutoff = 0.74090;
|
|
IntFltLen = 18.0;
|
|
LPFltAlpha = 1.79306;
|
|
LPFltBaseLen = 7.00;
|
|
LPFltCutoffMult = 0.68881;
|
|
}
|
|
};
|
|
|
|
/**
|
|
* @brief Set of resizing algorithm parameters for lower-ringing performance
|
|
* (8.86/1.046/0.010168).
|
|
*
|
|
* This set of resizing algorithm parameters offers a lower-ringing
|
|
* performance in comparison to the default setting, at the expense of higher
|
|
* aliasing artifacts and a slightly reduced contrast.
|
|
*/
|
|
|
|
struct CImageResizerParamsLow : public CImageResizerParams {
|
|
CImageResizerParamsLow() {
|
|
CorrFltAlpha = 1.0; // 8.86/1.92/1.046(871.54)/0.010168:258647,442252
|
|
CorrFltLen = 6.09757;
|
|
IntFltAlpha = 2.36704;
|
|
IntFltCutoff = 0.74674;
|
|
IntFltLen = 18.0;
|
|
LPFltAlpha = 2.19427;
|
|
LPFltBaseLen = 7.66;
|
|
LPFltCutoffMult = 0.75380;
|
|
}
|
|
};
|
|
|
|
/**
|
|
* @brief Set of resizing algorithm parameters for low-aliasing
|
|
* resizing (11.81/1.012/0.038379).
|
|
*
|
|
* This set of resizing algorithm parameters offers a considerable
|
|
* anti-aliasing performance with a good frequency response linearity (and
|
|
* contrast). This is an intermediate setting between the default and Ultra
|
|
* parameters.
|
|
*/
|
|
|
|
struct CImageResizerParamsHigh : public CImageResizerParams {
|
|
CImageResizerParamsHigh() {
|
|
CorrFltAlpha = 1.0; // 11.81/1.83/1.012(307.84)/0.038379:258660,452719
|
|
CorrFltLen = 6.80909;
|
|
IntFltAlpha = 2.44917;
|
|
IntFltCutoff = 0.75856;
|
|
IntFltLen = 18.0;
|
|
LPFltAlpha = 4.39527;
|
|
LPFltBaseLen = 8.18;
|
|
LPFltCutoffMult = 0.79172;
|
|
}
|
|
};
|
|
|
|
/**
|
|
* @brief Set of resizing algorithm parameters for ultra low-aliasing
|
|
* resizing (13.65/1.001/0.000483).
|
|
*
|
|
* This set of resizing algorithm parameters offers a very considerable
|
|
* anti-aliasing performance with a good frequency response linearity (and
|
|
* contrast). This set of parameters is computationally expensive and may
|
|
* produce ringing artifacts on sharp features.
|
|
*/
|
|
|
|
struct CImageResizerParamsUltra : public CImageResizerParams {
|
|
CImageResizerParamsUltra() {
|
|
CorrFltAlpha = 1.0; // 13.65/1.79/1.001(28288.41)/0.000483:258658,457974
|
|
CorrFltLen = 7.48060;
|
|
IntFltAlpha = 1.93750;
|
|
IntFltCutoff = 0.75462;
|
|
IntFltLen = 18.0;
|
|
LPFltAlpha = 5.55209;
|
|
LPFltBaseLen = 8.34;
|
|
LPFltCutoffMult = 0.78002;
|
|
}
|
|
};
|
|
|
|
/**
|
|
* @brief Image resizing variables class.
|
|
*
|
|
* This is an utility "catch all" class that defines various variables used
|
|
* during image resizing. Several variables that are explicitly initialized in
|
|
* this class' constructor are also used as additional "input" variables to
|
|
* the image resizing function. These variables will not be changed by the
|
|
* avir::CImageResizer<>::resizeImage() function.
|
|
*/
|
|
|
|
class CImageResizerVars {
|
|
public:
|
|
int ElCount; ///< The number of "fptype" elements used to store 1 pixel.
|
|
///<
|
|
int ElCountIO; ///< The number of source and destination image's elements
|
|
///< used to store 1 pixel.
|
|
///<
|
|
int fppack; ///< The number of atomic types stored in a single "fptype"
|
|
///< element.
|
|
///<
|
|
int fpalign; ///< Suggested alignment size in bytes. This is not a
|
|
///< required alignment, because image resizing algorithm cannot
|
|
///< be made to have a strictly aligned data access in all cases
|
|
///< (e.g. de-interleaved interpolation cannot perform aligned
|
|
///< accesses).
|
|
///<
|
|
int elalign; ///< Length alignment of arrays of elements. This applies to
|
|
///< filters and intermediate buffers: this constant forces
|
|
///< filters and scanlines to have a length which is a multiple
|
|
///< of this value, for more efficient SIMD implementation.
|
|
///<
|
|
int packmode; ///< 0 if interleaved packing, 1 if de-interleaved.
|
|
///<
|
|
int BufLen[2]; ///< Intermediate buffers' lengths in "fptype" elements.
|
|
int BufOffs[2]; ///< Offsets into the intermediate buffers, used to
|
|
///< provide prefix elements required during processing so
|
|
///< that no "out of range" access happens. This offset is a
|
|
///< multiple of ElCount if pixels are stored in interleaved
|
|
///< form.
|
|
///<
|
|
double k; ///< Resizing step coefficient, updated to reflect the actually
|
|
///< used coefficient during resizing.
|
|
///<
|
|
double o; ///< Starting pixel offset inside the source image, updated to
|
|
///< reflect the actually used offset during resizing.
|
|
///<
|
|
int ResizeStep; ///< Index of the resizing step in the latest filtering
|
|
///< steps array.
|
|
///<
|
|
double InGammaMult; ///< Input gamma multiplier, used to convert input
|
|
///< data to 0 to 1 range. 0.0 if no gamma is in use.
|
|
///<
|
|
double OutGammaMult; ///< Output gamma multiplier, used to convert data to
|
|
///< 0 to 255/65535 range. 0.0 if no gamma is in use.
|
|
///<
|
|
|
|
double ox; ///< Start X pixel offset within source image (can be
|
|
///< negative). Positive offset moves image to the left.
|
|
///<
|
|
double oy; ///< Start Y pixel offset within source image (can be
|
|
///< negative). Positive offset moves image to the top.
|
|
///<
|
|
CImageResizerThreadPool*
|
|
ThreadPool; ///< Thread pool to be used by the
|
|
///< image resizing function. Set to NULL to use
|
|
///< single-threaded processing.
|
|
///<
|
|
bool UseSRGBGamma; ///< Perform sRGB gamma linearization (correction).
|
|
///<
|
|
int BuildMode; ///< The build mode to use, for debugging purposes. Set to
|
|
///< -1 to select a minimal-complexity mode automatically. All
|
|
///< build modes deliver similar results with minor
|
|
///< deviations.
|
|
///<
|
|
int RndSeed; ///< Random seed parameter. This parameter may be incremented
|
|
///< after each random generator initialization. The use of this
|
|
///< variable depends on the ditherer implementation.
|
|
///<
|
|
|
|
CImageResizerVars()
|
|
: ox(0.0),
|
|
oy(0.0),
|
|
ThreadPool(NULL),
|
|
UseSRGBGamma(false),
|
|
BuildMode(-1),
|
|
RndSeed(0) {}
|
|
};
|
|
|
|
/**
|
|
* @brief Image resizer's filtering step class.
|
|
*
|
|
* Class defines data to perform a single filtering step over a whole
|
|
* horizontal or vertical scanline. Resizing consists of 1 or more steps that
|
|
* may be performed before the actual resizing takes place. Filtering may also
|
|
* follow a resizing step. Each step must ensure that scanline data contains
|
|
* enough pixels to perform the next step (which may be resizing) without
|
|
* exceeding scanline's bounds.
|
|
*
|
|
* A derived class must implement several "const" and "static" functions that
|
|
* are used to perform the actual filtering in interleaved or de-interleaved
|
|
* mode.
|
|
*
|
|
* @tparam fptype Floating point type to use for storing pixel elements. SIMD
|
|
* types can be used: in this case each element may hold a whole pixel.
|
|
* @tparam fptypeatom The atomic type the "fptype" consists of.
|
|
*/
|
|
|
|
template <class fptype, class fptypeatom>
|
|
class CImageResizerFilterStep {
|
|
public:
|
|
bool IsUpsample; ///< "True" if this step is an upsampling step, "false"
|
|
///< if downsampling step. Should be set to "false" if
|
|
///< ResampleFactor equals 0.
|
|
///<
|
|
int ResampleFactor; ///< Resample factor (>=1). If 0, this is a resizing
|
|
///< step. This value should be >1 if IsUpsample equals
|
|
///< "true".
|
|
///<
|
|
CBuffer<fptype> Flt; ///< Filter to use at this step.
|
|
///<
|
|
CFltBuffer FltOrig; ///< Originally-designed filter. This buffer may not
|
|
///< be assigned. Assigned by filters that precede the
|
|
///< resizing step if such filter is planned to be
|
|
///< embedded into the interpolation filter as "external"
|
|
///< filter. If IsUpsample=true and this filter buffer is
|
|
///< not empty, the upsampling step will not itself apply
|
|
///< any filtering over upsampled input scanline.
|
|
///<
|
|
double DCGain; ///< DC gain which was applied to the filter. Not defined
|
|
///< if ResampleFactor = 0.
|
|
///<
|
|
int FltLatency; ///< Filter's latency (group delay, shift) in pixels.
|
|
///<
|
|
const CImageResizerVars* Vars; ///< Image resizing-related variables.
|
|
///<
|
|
int InLen; ///< Input scanline's length in pixels.
|
|
///<
|
|
int InBuf; ///< Input buffer index, 0 or 1.
|
|
///<
|
|
int InPrefix; ///< Required input prefix pixels. These prefix pixels will
|
|
///< be filled with source scanline's first pixel value. If
|
|
///< IsUpsample is "true", this is the additional number of
|
|
///< times the first pixel will be filtered before processing
|
|
///< scanline, this number is also reflected in the OutPrefix.
|
|
///<
|
|
int InSuffix; ///< Required input suffix pixels. These suffix pixels will
|
|
///< be filled with source scanline's last pixel value. If
|
|
///< IsUpsample is "true", this is the additional number of
|
|
///< times the last pixel will be filtered before processing
|
|
///< scanline, this number is also reflected in the OutSuffix.
|
|
///<
|
|
int InElIncr; ///< Pixel element increment within the input buffer, used
|
|
///< during de-interleaved processing: in this case each
|
|
///< image's channel is stored independently, InElIncr elements
|
|
///< apart.
|
|
///<
|
|
int OutLen; ///< Length of the resulting scanline.
|
|
///<
|
|
int OutBuf; ///< Output buffer index. 0 or 1; 2 for the last step.
|
|
///<
|
|
int OutPrefix; ///< Required output prefix pixels. These prefix pixels
|
|
///< will not be pre-filled with any values. Value is valid
|
|
///< only if IsUpsample equals "true".
|
|
///<
|
|
int OutSuffix; ///< Required input suffix pixels. These suffix pixels will
|
|
///< not be pre-filled with any values. Value is valid only if
|
|
///< IsUpsample equals "true".
|
|
///<
|
|
int OutElIncr; ///< Pixel element increment within the output buffer, used
|
|
///< during de-interleaved processing. Equals to the
|
|
///< InBufElIncr of the next step.
|
|
///<
|
|
CBuffer<fptype> PrefixDC; ///< DC component fluctuations added at the
|
|
///< start of the resulting scanline, used when
|
|
///< IsUpsample equals "true".
|
|
///<
|
|
CBuffer<fptype> SuffixDC; ///< DC component fluctuations added at the
|
|
///< end of the resulting scanline, used when
|
|
///< IsUpsample equals "true".
|
|
///<
|
|
int EdgePixelCount; ///< The number of edge pixels added. Affects the
|
|
///< initial position within the input scanline, used to
|
|
///< produce edge pixels. This variable is used and
|
|
///< should be defined when IsUpsample=false and
|
|
///< ResampleFactor>0. When assigning this variable it is
|
|
///< also necessary to update InPrefix, OutLen and Vars.o
|
|
///< variables.
|
|
///<
|
|
static const int EdgePixelCountDef =
|
|
3; ///< The default number of pixels
|
|
///< additionally produced at scanline edges during filtering. This is
|
|
///< required to reduce edge artifacts.
|
|
///<
|
|
|
|
/**
|
|
* @brief Resizing position structure.
|
|
*
|
|
* Structure holds resizing position and pointer to fractional delay
|
|
* filter.
|
|
*/
|
|
|
|
struct CResizePos {
|
|
int SrcPosInt; ///< Source scanline position.
|
|
///<
|
|
int fti; ///< Fractional delay filter index.
|
|
///<
|
|
const fptype* ftp; ///< Fractional delay filter pointer.
|
|
///<
|
|
fptypeatom x; ///< Interpolation coefficient between delay filters.
|
|
///<
|
|
int SrcOffs; ///< Source scanline offset.
|
|
///<
|
|
};
|
|
|
|
/**
|
|
* @brief Resizing positions buffer class.
|
|
*
|
|
* This class combines buffer together with variables that define resizing
|
|
* stepping.
|
|
*/
|
|
|
|
class CRPosBuf : public CBuffer<CResizePos> {
|
|
public:
|
|
double k; ///< Resizing step.
|
|
///<
|
|
double o; ///< Resizing offset.
|
|
///<
|
|
int FracCount; ///< The number of fractional delay filters in a filter
|
|
///< bank used together with this buffer.
|
|
///<
|
|
};
|
|
|
|
/**
|
|
* @brief Resizing positions buffer array class.
|
|
*
|
|
* This class combines structure array of the CRPosBuf class objects with
|
|
* the function that locates or creates buffer with the required resizing
|
|
* stepping.
|
|
*/
|
|
|
|
class CRPosBufArray : public CStructArray<CRPosBuf> {
|
|
public:
|
|
using CStructArray<CRPosBuf>::add;
|
|
using CStructArray<CRPosBuf>::getItemCount;
|
|
|
|
/**
|
|
* Function returns the resizing positions buffer with the required
|
|
* stepping. If no such buffer exists, it is created.
|
|
*
|
|
* @param k Resizing step.
|
|
* @param o Resizing offset.
|
|
* @param FracCount The number of fractional delay filters in a filter
|
|
* bank used together with this buffer.
|
|
* @return Reference to the CRPosBuf object.
|
|
*/
|
|
|
|
CRPosBuf& getRPosBuf(const double k, const double o, const int FracCount) {
|
|
int i;
|
|
|
|
for (i = 0; i < getItemCount(); i++) {
|
|
CRPosBuf& Buf = (*this)[i];
|
|
|
|
if (Buf.k == k && Buf.o == o && Buf.FracCount == FracCount) {
|
|
return (Buf);
|
|
}
|
|
}
|
|
|
|
CRPosBuf& NewBuf = add();
|
|
NewBuf.k = k;
|
|
NewBuf.o = o;
|
|
NewBuf.FracCount = FracCount;
|
|
|
|
return (NewBuf);
|
|
}
|
|
};
|
|
|
|
CRPosBuf* RPosBuf; ///< Resizing positions buffer. Used when
|
|
///< ResampleFactor equals 0 (resizing step).
|
|
///<
|
|
CDSPFracFilterBankLin<fptype>* FltBank; ///< Filter bank in use by *this
|
|
///< resizing step.
|
|
///<
|
|
};
|
|
|
|
/**
|
|
* @brief Interleaved filtering steps implementation class.
|
|
*
|
|
* This class implements scanline filtering functions in interleaved mode.
|
|
* This means that each pixel is processed independently, not in groups.
|
|
*
|
|
* @tparam fptype Floating point type to use for storing pixel elements. SIMD
|
|
* types can be used: in this case each element may hold a whole pixel.
|
|
* @tparam fptypeatom The atomic type the "fptype" consists of.
|
|
*/
|
|
|
|
template <class fptype, class fptypeatom>
|
|
class CImageResizerFilterStepINL
|
|
: public CImageResizerFilterStep<fptype, fptypeatom> {
|
|
public:
|
|
using CImageResizerFilterStep<fptype, fptypeatom>::IsUpsample;
|
|
using CImageResizerFilterStep<fptype, fptypeatom>::ResampleFactor;
|
|
using CImageResizerFilterStep<fptype, fptypeatom>::Flt;
|
|
using CImageResizerFilterStep<fptype, fptypeatom>::FltOrig;
|
|
using CImageResizerFilterStep<fptype, fptypeatom>::FltLatency;
|
|
using CImageResizerFilterStep<fptype, fptypeatom>::Vars;
|
|
using CImageResizerFilterStep<fptype, fptypeatom>::InLen;
|
|
using CImageResizerFilterStep<fptype, fptypeatom>::InPrefix;
|
|
using CImageResizerFilterStep<fptype, fptypeatom>::InSuffix;
|
|
using CImageResizerFilterStep<fptype, fptypeatom>::OutLen;
|
|
using CImageResizerFilterStep<fptype, fptypeatom>::OutPrefix;
|
|
using CImageResizerFilterStep<fptype, fptypeatom>::OutSuffix;
|
|
using CImageResizerFilterStep<fptype, fptypeatom>::PrefixDC;
|
|
using CImageResizerFilterStep<fptype, fptypeatom>::SuffixDC;
|
|
using CImageResizerFilterStep<fptype, fptypeatom>::RPosBuf;
|
|
using CImageResizerFilterStep<fptype, fptypeatom>::FltBank;
|
|
using CImageResizerFilterStep<fptype, fptypeatom>::EdgePixelCount;
|
|
|
|
/**
|
|
* Function performs "packing" of a scanline and type conversion.
|
|
* Scanline, depending on the "fptype" can be potentially stored as a
|
|
* packed SIMD values having a certain atomic type. If required, the sRGB
|
|
* gamma correction is applied.
|
|
*
|
|
* @param ip Input scanline.
|
|
* @param op0 Output scanline.
|
|
* @param l0 The number of pixels to "pack".
|
|
*/
|
|
|
|
template <class Tin>
|
|
void packScanline(const Tin* ip, fptype* const op0, const int l0) const {
|
|
const int ElCount = Vars->ElCount;
|
|
const int ElCountIO = Vars->ElCountIO;
|
|
fptype* op = op0;
|
|
int l = l0;
|
|
|
|
if (!Vars->UseSRGBGamma) {
|
|
if (ElCountIO == 1) {
|
|
while (l > 0) {
|
|
fptypeatom* v = (fptypeatom*)op;
|
|
v[0] = (fptypeatom)ip[0];
|
|
op += ElCount;
|
|
ip++;
|
|
l--;
|
|
}
|
|
} else if (ElCountIO == 4) {
|
|
while (l > 0) {
|
|
fptypeatom* v = (fptypeatom*)op;
|
|
v[0] = (fptypeatom)ip[0];
|
|
v[1] = (fptypeatom)ip[1];
|
|
v[2] = (fptypeatom)ip[2];
|
|
v[3] = (fptypeatom)ip[3];
|
|
op += ElCount;
|
|
ip += 4;
|
|
l--;
|
|
}
|
|
} else if (ElCountIO == 3) {
|
|
while (l > 0) {
|
|
fptypeatom* v = (fptypeatom*)op;
|
|
v[0] = (fptypeatom)ip[0];
|
|
v[1] = (fptypeatom)ip[1];
|
|
v[2] = (fptypeatom)ip[2];
|
|
op += ElCount;
|
|
ip += 3;
|
|
l--;
|
|
}
|
|
} else if (ElCountIO == 2) {
|
|
while (l > 0) {
|
|
fptypeatom* v = (fptypeatom*)op;
|
|
v[0] = (fptypeatom)ip[0];
|
|
v[1] = (fptypeatom)ip[1];
|
|
op += ElCount;
|
|
ip += 2;
|
|
l--;
|
|
}
|
|
}
|
|
} else {
|
|
const fptypeatom gm = (fptypeatom)Vars->InGammaMult;
|
|
|
|
if (ElCountIO == 1) {
|
|
while (l > 0) {
|
|
fptypeatom* v = (fptypeatom*)op;
|
|
v[0] = convertSRGB2Lin((fptypeatom)ip[0] * gm);
|
|
op += ElCount;
|
|
ip++;
|
|
l--;
|
|
}
|
|
} else if (ElCountIO == 4) {
|
|
while (l > 0) {
|
|
fptypeatom* v = (fptypeatom*)op;
|
|
v[0] = convertSRGB2Lin((fptypeatom)ip[0] * gm);
|
|
v[1] = convertSRGB2Lin((fptypeatom)ip[1] * gm);
|
|
v[2] = convertSRGB2Lin((fptypeatom)ip[2] * gm);
|
|
v[3] = convertSRGB2Lin((fptypeatom)ip[3] * gm);
|
|
op += ElCount;
|
|
ip += 4;
|
|
l--;
|
|
}
|
|
} else if (ElCountIO == 3) {
|
|
while (l > 0) {
|
|
fptypeatom* v = (fptypeatom*)op;
|
|
v[0] = convertSRGB2Lin((fptypeatom)ip[0] * gm);
|
|
v[1] = convertSRGB2Lin((fptypeatom)ip[1] * gm);
|
|
v[2] = convertSRGB2Lin((fptypeatom)ip[2] * gm);
|
|
op += ElCount;
|
|
ip += 3;
|
|
l--;
|
|
}
|
|
} else if (ElCountIO == 2) {
|
|
while (l > 0) {
|
|
fptypeatom* v = (fptypeatom*)op;
|
|
v[0] = convertSRGB2Lin((fptypeatom)ip[0] * gm);
|
|
v[1] = convertSRGB2Lin((fptypeatom)ip[1] * gm);
|
|
op += ElCount;
|
|
ip += 2;
|
|
l--;
|
|
}
|
|
}
|
|
}
|
|
|
|
const int ZeroCount = ElCount * Vars->fppack - ElCountIO;
|
|
op = op0;
|
|
l = l0;
|
|
|
|
if (ZeroCount == 1) {
|
|
while (l > 0) {
|
|
fptypeatom* v = (fptypeatom*)op + ElCountIO;
|
|
v[0] = (fptypeatom)0;
|
|
op += ElCount;
|
|
l--;
|
|
}
|
|
} else if (ZeroCount == 2) {
|
|
while (l > 0) {
|
|
fptypeatom* v = (fptypeatom*)op + ElCountIO;
|
|
v[0] = (fptypeatom)0;
|
|
v[1] = (fptypeatom)0;
|
|
op += ElCount;
|
|
l--;
|
|
}
|
|
} else if (ZeroCount == 3) {
|
|
while (l > 0) {
|
|
fptypeatom* v = (fptypeatom*)op + ElCountIO;
|
|
v[0] = (fptypeatom)0;
|
|
v[1] = (fptypeatom)0;
|
|
v[2] = (fptypeatom)0;
|
|
op += ElCount;
|
|
l--;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function applies Linear to sRGB gamma correction to the specified
|
|
* scanline.
|
|
*
|
|
* @param p Scanline.
|
|
* @param l The number of pixels to de-linearize.
|
|
* @param Vars0 Image resizing-related variables.
|
|
*/
|
|
|
|
static void applySRGBGamma(fptype* p, int l, const CImageResizerVars& Vars0) {
|
|
const int ElCount = Vars0.ElCount;
|
|
const int ElCountIO = Vars0.ElCountIO;
|
|
const fptypeatom gm = (fptypeatom)Vars0.OutGammaMult;
|
|
|
|
if (ElCountIO == 1) {
|
|
while (l > 0) {
|
|
fptypeatom* v = (fptypeatom*)p;
|
|
v[0] = convertLin2SRGB(v[0]) * gm;
|
|
p += ElCount;
|
|
l--;
|
|
}
|
|
} else if (ElCountIO == 4) {
|
|
while (l > 0) {
|
|
fptypeatom* v = (fptypeatom*)p;
|
|
v[0] = convertLin2SRGB(v[0]) * gm;
|
|
v[1] = convertLin2SRGB(v[1]) * gm;
|
|
v[2] = convertLin2SRGB(v[2]) * gm;
|
|
v[3] = convertLin2SRGB(v[3]) * gm;
|
|
p += ElCount;
|
|
l--;
|
|
}
|
|
} else if (ElCountIO == 3) {
|
|
while (l > 0) {
|
|
fptypeatom* v = (fptypeatom*)p;
|
|
v[0] = convertLin2SRGB(v[0]) * gm;
|
|
v[1] = convertLin2SRGB(v[1]) * gm;
|
|
v[2] = convertLin2SRGB(v[2]) * gm;
|
|
p += ElCount;
|
|
l--;
|
|
}
|
|
} else if (ElCountIO == 2) {
|
|
while (l > 0) {
|
|
fptypeatom* v = (fptypeatom*)p;
|
|
v[0] = convertLin2SRGB(v[0]) * gm;
|
|
v[1] = convertLin2SRGB(v[1]) * gm;
|
|
p += ElCount;
|
|
l--;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function converts vertical scanline to horizontal scanline. This
|
|
* function is called by the image resizer when image is resized
|
|
* vertically. This means that the vertical scanline is stored in the
|
|
* same format produced by the packScanline() and maintained by other
|
|
* filtering functions.
|
|
*
|
|
* @param ip Input vertical scanline.
|
|
* @param op Output buffer (temporary buffer used during resizing).
|
|
* @param SrcLen The number of pixels in the input scanline, also used to
|
|
* calculate input buffer increment.
|
|
* @param SrcIncr Input buffer increment to the next vertical pixel.
|
|
*/
|
|
|
|
void convertVtoH(const fptype* ip, fptype* op, const int SrcLen,
|
|
const int SrcIncr) const {
|
|
const int ElCount = Vars->ElCount;
|
|
int j;
|
|
|
|
if (ElCount == 1) {
|
|
for (j = 0; j < SrcLen; j++) {
|
|
op[0] = ip[0];
|
|
ip += SrcIncr;
|
|
op++;
|
|
}
|
|
} else if (ElCount == 4) {
|
|
for (j = 0; j < SrcLen; j++) {
|
|
op[0] = ip[0];
|
|
op[1] = ip[1];
|
|
op[2] = ip[2];
|
|
op[3] = ip[3];
|
|
ip += SrcIncr;
|
|
op += 4;
|
|
}
|
|
} else if (ElCount == 3) {
|
|
for (j = 0; j < SrcLen; j++) {
|
|
op[0] = ip[0];
|
|
op[1] = ip[1];
|
|
op[2] = ip[2];
|
|
ip += SrcIncr;
|
|
op += 3;
|
|
}
|
|
} else if (ElCount == 2) {
|
|
for (j = 0; j < SrcLen; j++) {
|
|
op[0] = ip[0];
|
|
op[1] = ip[1];
|
|
ip += SrcIncr;
|
|
op += 2;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function performs "unpacking" of a scanline and type conversion
|
|
* (truncation is used when floating point is converted to integer).
|
|
* Scanline, depending on the "fptype" can be potentially stored as a
|
|
* packed SIMD values having a certain atomic type. The unpacking function
|
|
* assumes that scanline is stored in the style produced by the
|
|
* packScanline() function.
|
|
*
|
|
* @param ip Input scanline.
|
|
* @param op Output scanline.
|
|
* @param l The number of pixels to "unpack".
|
|
* @param Vars0 Image resizing-related variables.
|
|
*/
|
|
|
|
template <class Tout>
|
|
static void unpackScanline(const fptype* ip, Tout* op, int l,
|
|
const CImageResizerVars& Vars0) {
|
|
const int ElCount = Vars0.ElCount;
|
|
const int ElCountIO = Vars0.ElCountIO;
|
|
|
|
if (ElCountIO == 1) {
|
|
while (l > 0) {
|
|
const fptypeatom* v = (const fptypeatom*)ip;
|
|
op[0] = (Tout)v[0];
|
|
ip += ElCount;
|
|
op++;
|
|
l--;
|
|
}
|
|
} else if (ElCountIO == 4) {
|
|
while (l > 0) {
|
|
const fptypeatom* v = (const fptypeatom*)ip;
|
|
op[0] = (Tout)v[0];
|
|
op[1] = (Tout)v[1];
|
|
op[2] = (Tout)v[2];
|
|
op[3] = (Tout)v[3];
|
|
ip += ElCount;
|
|
op += 4;
|
|
l--;
|
|
}
|
|
} else if (ElCountIO == 3) {
|
|
while (l > 0) {
|
|
const fptypeatom* v = (const fptypeatom*)ip;
|
|
op[0] = (Tout)v[0];
|
|
op[1] = (Tout)v[1];
|
|
op[2] = (Tout)v[2];
|
|
ip += ElCount;
|
|
op += 3;
|
|
l--;
|
|
}
|
|
} else if (ElCountIO == 2) {
|
|
while (l > 0) {
|
|
const fptypeatom* v = (const fptypeatom*)ip;
|
|
op[0] = (Tout)v[0];
|
|
op[1] = (Tout)v[1];
|
|
ip += ElCount;
|
|
op += 2;
|
|
l--;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function prepares input scanline buffer for *this filtering step.
|
|
* Left- and right-most pixels are replicated to make sure no buffer
|
|
* overrun happens. Such approach also allows to bypass any pointer
|
|
* range checks.
|
|
*
|
|
* @param Src Source buffer.
|
|
*/
|
|
|
|
void prepareInBuf(fptype* Src) const {
|
|
if (IsUpsample || InPrefix + InSuffix == 0) {
|
|
return;
|
|
}
|
|
|
|
const int ElCount = Vars->ElCount;
|
|
replicateArray(Src, ElCount, Src - ElCount, InPrefix, -ElCount);
|
|
|
|
Src += (InLen - 1) * ElCount;
|
|
replicateArray(Src, ElCount, Src + ElCount, InSuffix, ElCount);
|
|
}
|
|
|
|
/**
|
|
* Function peforms scanline upsampling with filtering.
|
|
*
|
|
* @param Src Source scanline buffer (length = this -> InLen). Source
|
|
* scanline increment will be equal to ElCount.
|
|
* @param Dst Destination scanline buffer.
|
|
*/
|
|
|
|
void doUpsample(const fptype* const Src, fptype* const Dst) const {
|
|
const int ElCount = Vars->ElCount;
|
|
fptype* op0 = &Dst[-OutPrefix * ElCount];
|
|
memset(op0, 0, (OutPrefix + OutLen + OutSuffix) * ElCount * sizeof(fptype));
|
|
|
|
const fptype* ip = Src;
|
|
const int opstep = ElCount * ResampleFactor;
|
|
int l;
|
|
|
|
if (FltOrig.getCapacity() > 0) {
|
|
// Do not perform filtering, only upsample.
|
|
|
|
op0 += (OutPrefix % ResampleFactor) * ElCount;
|
|
l = OutPrefix / ResampleFactor;
|
|
|
|
if (ElCount == 1) {
|
|
while (l > 0) {
|
|
op0[0] = ip[0];
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
|
|
l = InLen - 1;
|
|
|
|
while (l > 0) {
|
|
op0[0] = ip[0];
|
|
op0 += opstep;
|
|
ip += ElCount;
|
|
l--;
|
|
}
|
|
|
|
l = OutSuffix / ResampleFactor;
|
|
|
|
while (l >= 0) {
|
|
op0[0] = ip[0];
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
} else if (ElCount == 4) {
|
|
while (l > 0) {
|
|
op0[0] = ip[0];
|
|
op0[1] = ip[1];
|
|
op0[2] = ip[2];
|
|
op0[3] = ip[3];
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
|
|
l = InLen - 1;
|
|
|
|
while (l > 0) {
|
|
op0[0] = ip[0];
|
|
op0[1] = ip[1];
|
|
op0[2] = ip[2];
|
|
op0[3] = ip[3];
|
|
op0 += opstep;
|
|
ip += ElCount;
|
|
l--;
|
|
}
|
|
|
|
l = OutSuffix / ResampleFactor;
|
|
|
|
while (l >= 0) {
|
|
op0[0] = ip[0];
|
|
op0[1] = ip[1];
|
|
op0[2] = ip[2];
|
|
op0[3] = ip[3];
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
} else if (ElCount == 3) {
|
|
while (l > 0) {
|
|
op0[0] = ip[0];
|
|
op0[1] = ip[1];
|
|
op0[2] = ip[2];
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
|
|
l = InLen - 1;
|
|
|
|
while (l > 0) {
|
|
op0[0] = ip[0];
|
|
op0[1] = ip[1];
|
|
op0[2] = ip[2];
|
|
op0 += opstep;
|
|
ip += ElCount;
|
|
l--;
|
|
}
|
|
|
|
l = OutSuffix / ResampleFactor;
|
|
|
|
while (l >= 0) {
|
|
op0[0] = ip[0];
|
|
op0[1] = ip[1];
|
|
op0[2] = ip[2];
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
} else if (ElCount == 2) {
|
|
while (l > 0) {
|
|
op0[0] = ip[0];
|
|
op0[1] = ip[1];
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
|
|
l = InLen - 1;
|
|
|
|
while (l > 0) {
|
|
op0[0] = ip[0];
|
|
op0[1] = ip[1];
|
|
op0 += opstep;
|
|
ip += ElCount;
|
|
l--;
|
|
}
|
|
|
|
l = OutSuffix / ResampleFactor;
|
|
|
|
while (l >= 0) {
|
|
op0[0] = ip[0];
|
|
op0[1] = ip[1];
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
const fptype* const f = Flt;
|
|
const int flen = Flt.getCapacity();
|
|
fptype* op;
|
|
int i;
|
|
|
|
if (ElCount == 1) {
|
|
l = InPrefix;
|
|
|
|
while (l > 0) {
|
|
op = op0;
|
|
|
|
for (i = 0; i < flen; i++) {
|
|
op[i] += f[i] * ip[0];
|
|
}
|
|
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
|
|
l = InLen - 1;
|
|
|
|
while (l > 0) {
|
|
op = op0;
|
|
|
|
for (i = 0; i < flen; i++) {
|
|
op[i] += f[i] * ip[0];
|
|
}
|
|
|
|
ip += ElCount;
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
|
|
l = InSuffix;
|
|
|
|
while (l >= 0) {
|
|
op = op0;
|
|
|
|
for (i = 0; i < flen; i++) {
|
|
op[i] += f[i] * ip[0];
|
|
}
|
|
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
} else if (ElCount == 4) {
|
|
l = InPrefix;
|
|
|
|
while (l > 0) {
|
|
op = op0;
|
|
|
|
for (i = 0; i < flen; i++) {
|
|
op[0] += f[i] * ip[0];
|
|
op[1] += f[i] * ip[1];
|
|
op[2] += f[i] * ip[2];
|
|
op[3] += f[i] * ip[3];
|
|
op += 4;
|
|
}
|
|
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
|
|
l = InLen - 1;
|
|
|
|
while (l > 0) {
|
|
op = op0;
|
|
|
|
for (i = 0; i < flen; i++) {
|
|
op[0] += f[i] * ip[0];
|
|
op[1] += f[i] * ip[1];
|
|
op[2] += f[i] * ip[2];
|
|
op[3] += f[i] * ip[3];
|
|
op += 4;
|
|
}
|
|
|
|
ip += ElCount;
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
|
|
l = InSuffix;
|
|
|
|
while (l >= 0) {
|
|
op = op0;
|
|
|
|
for (i = 0; i < flen; i++) {
|
|
op[0] += f[i] * ip[0];
|
|
op[1] += f[i] * ip[1];
|
|
op[2] += f[i] * ip[2];
|
|
op[3] += f[i] * ip[3];
|
|
op += 4;
|
|
}
|
|
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
} else if (ElCount == 3) {
|
|
l = InPrefix;
|
|
|
|
while (l > 0) {
|
|
op = op0;
|
|
|
|
for (i = 0; i < flen; i++) {
|
|
op[0] += f[i] * ip[0];
|
|
op[1] += f[i] * ip[1];
|
|
op[2] += f[i] * ip[2];
|
|
op += 3;
|
|
}
|
|
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
|
|
l = InLen - 1;
|
|
|
|
while (l > 0) {
|
|
op = op0;
|
|
|
|
for (i = 0; i < flen; i++) {
|
|
op[0] += f[i] * ip[0];
|
|
op[1] += f[i] * ip[1];
|
|
op[2] += f[i] * ip[2];
|
|
op += 3;
|
|
}
|
|
|
|
ip += ElCount;
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
|
|
l = InSuffix;
|
|
|
|
while (l >= 0) {
|
|
op = op0;
|
|
|
|
for (i = 0; i < flen; i++) {
|
|
op[0] += f[i] * ip[0];
|
|
op[1] += f[i] * ip[1];
|
|
op[2] += f[i] * ip[2];
|
|
op += 3;
|
|
}
|
|
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
} else if (ElCount == 2) {
|
|
l = InPrefix;
|
|
|
|
while (l > 0) {
|
|
op = op0;
|
|
|
|
for (i = 0; i < flen; i++) {
|
|
op[0] += f[i] * ip[0];
|
|
op[1] += f[i] * ip[1];
|
|
op += 2;
|
|
}
|
|
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
|
|
l = InLen - 1;
|
|
|
|
while (l > 0) {
|
|
op = op0;
|
|
|
|
for (i = 0; i < flen; i++) {
|
|
op[0] += f[i] * ip[0];
|
|
op[1] += f[i] * ip[1];
|
|
op += 2;
|
|
}
|
|
|
|
ip += ElCount;
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
|
|
l = InSuffix;
|
|
|
|
while (l >= 0) {
|
|
op = op0;
|
|
|
|
for (i = 0; i < flen; i++) {
|
|
op[0] += f[i] * ip[0];
|
|
op[1] += f[i] * ip[1];
|
|
op += 2;
|
|
}
|
|
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
}
|
|
|
|
op = op0;
|
|
const fptype* dc = SuffixDC;
|
|
l = SuffixDC.getCapacity();
|
|
|
|
if (ElCount == 1) {
|
|
for (i = 0; i < l; i++) {
|
|
op[i] += ip[0] * dc[i];
|
|
}
|
|
} else if (ElCount == 4) {
|
|
while (l > 0) {
|
|
op[0] += ip[0] * dc[0];
|
|
op[1] += ip[1] * dc[0];
|
|
op[2] += ip[2] * dc[0];
|
|
op[3] += ip[3] * dc[0];
|
|
dc++;
|
|
op += 4;
|
|
l--;
|
|
}
|
|
} else if (ElCount == 3) {
|
|
while (l > 0) {
|
|
op[0] += ip[0] * dc[0];
|
|
op[1] += ip[1] * dc[0];
|
|
op[2] += ip[2] * dc[0];
|
|
dc++;
|
|
op += 3;
|
|
l--;
|
|
}
|
|
} else if (ElCount == 2) {
|
|
while (l > 0) {
|
|
op[0] += ip[0] * dc[0];
|
|
op[1] += ip[1] * dc[0];
|
|
dc++;
|
|
op += 2;
|
|
l--;
|
|
}
|
|
}
|
|
|
|
ip = Src;
|
|
op = Dst - InPrefix * opstep;
|
|
dc = PrefixDC;
|
|
l = PrefixDC.getCapacity();
|
|
|
|
if (ElCount == 1) {
|
|
for (i = 0; i < l; i++) {
|
|
op[i] += ip[0] * dc[i];
|
|
}
|
|
} else if (ElCount == 4) {
|
|
while (l > 0) {
|
|
op[0] += ip[0] * dc[0];
|
|
op[1] += ip[1] * dc[0];
|
|
op[2] += ip[2] * dc[0];
|
|
op[3] += ip[3] * dc[0];
|
|
dc++;
|
|
op += 4;
|
|
l--;
|
|
}
|
|
} else if (ElCount == 3) {
|
|
while (l > 0) {
|
|
op[0] += ip[0] * dc[0];
|
|
op[1] += ip[1] * dc[0];
|
|
op[2] += ip[2] * dc[0];
|
|
dc++;
|
|
op += 3;
|
|
l--;
|
|
}
|
|
} else if (ElCount == 2) {
|
|
while (l > 0) {
|
|
op[0] += ip[0] * dc[0];
|
|
op[1] += ip[1] * dc[0];
|
|
dc++;
|
|
op += 2;
|
|
l--;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function peforms scanline filtering with optional downsampling.
|
|
* Function makes use of the symmetry of the filter.
|
|
*
|
|
* @param Src Source scanline buffer (length = this -> InLen). Source
|
|
* scanline increment will be equal to ElCount.
|
|
* @param Dst Destination scanline buffer.
|
|
* @param DstIncr Destination scanline buffer increment, used for
|
|
* horizontal or vertical scanline stepping.
|
|
*/
|
|
|
|
void doFilter(const fptype* const Src, fptype* Dst, const int DstIncr) const {
|
|
const int ElCount = Vars->ElCount;
|
|
const fptype* const f = &Flt[FltLatency];
|
|
const int flen = FltLatency + 1;
|
|
const int ipstep = ElCount * ResampleFactor;
|
|
const fptype* ip = Src - EdgePixelCount * ipstep;
|
|
const fptype* ip1;
|
|
const fptype* ip2;
|
|
int l = OutLen;
|
|
int i;
|
|
|
|
if (ElCount == 1) {
|
|
while (l > 0) {
|
|
fptype s = f[0] * ip[0];
|
|
ip1 = ip;
|
|
ip2 = ip;
|
|
|
|
for (i = 1; i < flen; i++) {
|
|
ip1++;
|
|
ip2--;
|
|
s += f[i] * (ip1[0] + ip2[0]);
|
|
}
|
|
|
|
Dst[0] = s;
|
|
Dst += DstIncr;
|
|
ip += ipstep;
|
|
l--;
|
|
}
|
|
} else if (ElCount == 4) {
|
|
while (l > 0) {
|
|
fptype s1 = f[0] * ip[0];
|
|
fptype s2 = f[0] * ip[1];
|
|
fptype s3 = f[0] * ip[2];
|
|
fptype s4 = f[0] * ip[3];
|
|
ip1 = ip;
|
|
ip2 = ip;
|
|
|
|
for (i = 1; i < flen; i++) {
|
|
ip1 += 4;
|
|
ip2 -= 4;
|
|
s1 += f[i] * (ip1[0] + ip2[0]);
|
|
s2 += f[i] * (ip1[1] + ip2[1]);
|
|
s3 += f[i] * (ip1[2] + ip2[2]);
|
|
s4 += f[i] * (ip1[3] + ip2[3]);
|
|
}
|
|
|
|
Dst[0] = s1;
|
|
Dst[1] = s2;
|
|
Dst[2] = s3;
|
|
Dst[3] = s4;
|
|
Dst += DstIncr;
|
|
ip += ipstep;
|
|
l--;
|
|
}
|
|
} else if (ElCount == 3) {
|
|
while (l > 0) {
|
|
fptype s1 = f[0] * ip[0];
|
|
fptype s2 = f[0] * ip[1];
|
|
fptype s3 = f[0] * ip[2];
|
|
ip1 = ip;
|
|
ip2 = ip;
|
|
|
|
for (i = 1; i < flen; i++) {
|
|
ip1 += 3;
|
|
ip2 -= 3;
|
|
s1 += f[i] * (ip1[0] + ip2[0]);
|
|
s2 += f[i] * (ip1[1] + ip2[1]);
|
|
s3 += f[i] * (ip1[2] + ip2[2]);
|
|
}
|
|
|
|
Dst[0] = s1;
|
|
Dst[1] = s2;
|
|
Dst[2] = s3;
|
|
Dst += DstIncr;
|
|
ip += ipstep;
|
|
l--;
|
|
}
|
|
} else if (ElCount == 2) {
|
|
while (l > 0) {
|
|
fptype s1 = f[0] * ip[0];
|
|
fptype s2 = f[0] * ip[1];
|
|
ip1 = ip;
|
|
ip2 = ip;
|
|
|
|
for (i = 1; i < flen; i++) {
|
|
ip1 += 2;
|
|
ip2 -= 2;
|
|
s1 += f[i] * (ip1[0] + ip2[0]);
|
|
s2 += f[i] * (ip1[1] + ip2[1]);
|
|
}
|
|
|
|
Dst[0] = s1;
|
|
Dst[1] = s2;
|
|
Dst += DstIncr;
|
|
ip += ipstep;
|
|
l--;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function performs resizing of a single scanline. This function does
|
|
* not "know" about the length of the source scanline buffer. This buffer
|
|
* should be padded with enough pixels so that ( SrcPos - FilterLenD2 ) is
|
|
* always >= 0 and ( SrcPos + ( DstLineLen - 1 ) * k + FilterLenD2 + 1 )
|
|
* does not exceed source scanline's buffer length. SrcLine's increment is
|
|
* assumed to be equal to ElCount.
|
|
*
|
|
* @param SrcLine Source scanline buffer.
|
|
* @param DstLine Destination (resized) scanline buffer.
|
|
* @param DstLineIncr Destination scanline position increment, used for
|
|
* horizontal or vertical scanline stepping.
|
|
* @param xx Temporary buffer, of size FltBank -> getFilterLen(), must be
|
|
* aligned by fpclass :: fpalign.
|
|
*/
|
|
|
|
void doResize(const fptype* SrcLine, fptype* DstLine, const int DstLineIncr,
|
|
fptype* const) const {
|
|
const int IntFltLen = FltBank->getFilterLen();
|
|
const int ElCount = Vars->ElCount;
|
|
const typename CImageResizerFilterStep<fptype, fptypeatom>::CResizePos*
|
|
rpos = &(*RPosBuf)[0];
|
|
|
|
const typename CImageResizerFilterStep<
|
|
fptype, fptypeatom>::CResizePos* const rpose = rpos + OutLen;
|
|
|
|
#define AVIR_RESIZE_PART1 \
|
|
while (rpos < rpose) { \
|
|
const fptype x = (fptype)rpos->x; \
|
|
const fptype* const ftp = rpos->ftp; \
|
|
const fptype* const ftp2 = ftp + IntFltLen; \
|
|
const fptype* Src = SrcLine + rpos->SrcOffs; \
|
|
int i;
|
|
|
|
#define AVIR_RESIZE_PART1nx \
|
|
while (rpos < rpose) { \
|
|
const fptype* const ftp = rpos->ftp; \
|
|
const fptype* Src = SrcLine + rpos->SrcOffs; \
|
|
int i;
|
|
|
|
#define AVIR_RESIZE_PART2 \
|
|
DstLine += DstLineIncr; \
|
|
rpos++; \
|
|
}
|
|
|
|
if (FltBank->getOrder() == 1) {
|
|
if (ElCount == 1) {
|
|
AVIR_RESIZE_PART1
|
|
|
|
fptype sum = 0.0;
|
|
|
|
for (i = 0; i < IntFltLen; i++) {
|
|
sum += (ftp[i] + ftp2[i] * x) * Src[i];
|
|
}
|
|
|
|
DstLine[0] = sum;
|
|
|
|
AVIR_RESIZE_PART2
|
|
} else if (ElCount == 4) {
|
|
AVIR_RESIZE_PART1
|
|
|
|
fptype sum[4];
|
|
sum[0] = 0.0;
|
|
sum[1] = 0.0;
|
|
sum[2] = 0.0;
|
|
sum[3] = 0.0;
|
|
|
|
for (i = 0; i < IntFltLen; i++) {
|
|
const fptype xx = ftp[i] + ftp2[i] * x;
|
|
sum[0] += xx * Src[0];
|
|
sum[1] += xx * Src[1];
|
|
sum[2] += xx * Src[2];
|
|
sum[3] += xx * Src[3];
|
|
Src += 4;
|
|
}
|
|
|
|
DstLine[0] = sum[0];
|
|
DstLine[1] = sum[1];
|
|
DstLine[2] = sum[2];
|
|
DstLine[3] = sum[3];
|
|
|
|
AVIR_RESIZE_PART2
|
|
} else if (ElCount == 3) {
|
|
AVIR_RESIZE_PART1
|
|
|
|
fptype sum[3];
|
|
sum[0] = 0.0;
|
|
sum[1] = 0.0;
|
|
sum[2] = 0.0;
|
|
|
|
for (i = 0; i < IntFltLen; i++) {
|
|
const fptype xx = ftp[i] + ftp2[i] * x;
|
|
sum[0] += xx * Src[0];
|
|
sum[1] += xx * Src[1];
|
|
sum[2] += xx * Src[2];
|
|
Src += 3;
|
|
}
|
|
|
|
DstLine[0] = sum[0];
|
|
DstLine[1] = sum[1];
|
|
DstLine[2] = sum[2];
|
|
|
|
AVIR_RESIZE_PART2
|
|
} else if (ElCount == 2) {
|
|
AVIR_RESIZE_PART1
|
|
|
|
fptype sum[2];
|
|
sum[0] = 0.0;
|
|
sum[1] = 0.0;
|
|
|
|
for (i = 0; i < IntFltLen; i++) {
|
|
const fptype xx = ftp[i] + ftp2[i] * x;
|
|
sum[0] += xx * Src[0];
|
|
sum[1] += xx * Src[1];
|
|
Src += 2;
|
|
}
|
|
|
|
DstLine[0] = sum[0];
|
|
DstLine[1] = sum[1];
|
|
|
|
AVIR_RESIZE_PART2
|
|
}
|
|
} else {
|
|
if (ElCount == 1) {
|
|
AVIR_RESIZE_PART1nx
|
|
|
|
fptype sum = 0.0;
|
|
|
|
for (i = 0; i < IntFltLen; i++) {
|
|
sum += ftp[i] * Src[i];
|
|
}
|
|
|
|
DstLine[0] = sum;
|
|
|
|
AVIR_RESIZE_PART2
|
|
} else if (ElCount == 4) {
|
|
AVIR_RESIZE_PART1nx
|
|
|
|
fptype sum[4];
|
|
sum[0] = 0.0;
|
|
sum[1] = 0.0;
|
|
sum[2] = 0.0;
|
|
sum[3] = 0.0;
|
|
|
|
for (i = 0; i < IntFltLen; i++) {
|
|
const fptype xx = ftp[i];
|
|
sum[0] += xx * Src[0];
|
|
sum[1] += xx * Src[1];
|
|
sum[2] += xx * Src[2];
|
|
sum[3] += xx * Src[3];
|
|
Src += 4;
|
|
}
|
|
|
|
DstLine[0] = sum[0];
|
|
DstLine[1] = sum[1];
|
|
DstLine[2] = sum[2];
|
|
DstLine[3] = sum[3];
|
|
|
|
AVIR_RESIZE_PART2
|
|
} else if (ElCount == 3) {
|
|
AVIR_RESIZE_PART1nx
|
|
|
|
fptype sum[3];
|
|
sum[0] = 0.0;
|
|
sum[1] = 0.0;
|
|
sum[2] = 0.0;
|
|
|
|
for (i = 0; i < IntFltLen; i++) {
|
|
const fptype xx = ftp[i];
|
|
sum[0] += xx * Src[0];
|
|
sum[1] += xx * Src[1];
|
|
sum[2] += xx * Src[2];
|
|
Src += 3;
|
|
}
|
|
|
|
DstLine[0] = sum[0];
|
|
DstLine[1] = sum[1];
|
|
DstLine[2] = sum[2];
|
|
|
|
AVIR_RESIZE_PART2
|
|
} else if (ElCount == 2) {
|
|
AVIR_RESIZE_PART1nx
|
|
|
|
fptype sum[2];
|
|
sum[0] = 0.0;
|
|
sum[1] = 0.0;
|
|
|
|
for (i = 0; i < IntFltLen; i++) {
|
|
const fptype xx = ftp[i];
|
|
sum[0] += xx * Src[0];
|
|
sum[1] += xx * Src[1];
|
|
Src += 2;
|
|
}
|
|
|
|
DstLine[0] = sum[0];
|
|
DstLine[1] = sum[1];
|
|
|
|
AVIR_RESIZE_PART2
|
|
}
|
|
}
|
|
}
|
|
#undef AVIR_RESIZE_PART2
|
|
#undef AVIR_RESIZE_PART1nx
|
|
#undef AVIR_RESIZE_PART1
|
|
};
|
|
|
|
/**
|
|
* @brief Image resizer's default dithering class.
|
|
*
|
|
* This class defines an object that performs rounding, clipping and dithering
|
|
* operations over horizontal scanline pixels before scanline is stored in the
|
|
* output buffer.
|
|
*
|
|
* The ditherer should expect the same storage order of the pixels in a
|
|
* scanline as used in the "filtering step" class. So, a separate ditherer
|
|
* class should be defined for each scanline pixel storage style. The default
|
|
* ditherer implements a simple rounding without dithering: it can be used for
|
|
* an efficient dithering method which can be multi-threaded.
|
|
*
|
|
* @tparam fptype Floating point type to use for storing pixel data. SIMD
|
|
* types can be used.
|
|
*/
|
|
|
|
template <class fptype>
|
|
class CImageResizerDithererDefINL {
|
|
public:
|
|
/**
|
|
* Function initializes the ditherer object.
|
|
*
|
|
* @param aLen Scanline length in pixels to process.
|
|
* @param aVars Image resizing-related variables.
|
|
* @param aTrMul Bit-depth truncation multiplier. 1 - no additional
|
|
* truncation.
|
|
* @param aPkOut Peak output value allowed.
|
|
*/
|
|
|
|
void init(const int aLen, const CImageResizerVars& aVars, const double aTrMul,
|
|
const double aPkOut) {
|
|
Len = aLen;
|
|
Vars = &aVars;
|
|
LenE = aLen * Vars->ElCount;
|
|
TrMul0 = aTrMul;
|
|
PkOut0 = aPkOut;
|
|
}
|
|
|
|
/**
|
|
* @return "True" if dithering is recursive relative to scanlines meaning
|
|
* multi-threaded execution is not supported by this dithering method.
|
|
*/
|
|
|
|
static bool isRecursive() { return (false); }
|
|
|
|
/**
|
|
* Function performs rounding and clipping operations.
|
|
*
|
|
* @param ResScanline The buffer containing the final scanline.
|
|
*/
|
|
|
|
void dither(fptype* const ResScanline) const {
|
|
const fptype c0 = 0.0;
|
|
const fptype PkOut = (fptype)PkOut0;
|
|
int j;
|
|
|
|
if (TrMul0 == 1.0) {
|
|
// Optimization - do not perform bit depth truncation.
|
|
|
|
for (j = 0; j < LenE; j++) {
|
|
ResScanline[j] = clamp(round(ResScanline[j]), c0, PkOut);
|
|
}
|
|
} else {
|
|
const fptype TrMul = (fptype)TrMul0;
|
|
|
|
for (j = 0; j < LenE; j++) {
|
|
const fptype z0 = round(ResScanline[j] / TrMul) * TrMul;
|
|
ResScanline[j] = clamp(z0, c0, PkOut);
|
|
}
|
|
}
|
|
}
|
|
|
|
protected:
|
|
int Len; ///< Scanline's length in pixels.
|
|
///<
|
|
const CImageResizerVars* Vars; ///< Image resizing-related variables.
|
|
///<
|
|
int LenE; ///< = LenE * ElCount.
|
|
///<
|
|
double TrMul0; ///< Bit-depth truncation multiplier.
|
|
///<
|
|
double PkOut0; ///< Peak output value allowed.
|
|
///<
|
|
};
|
|
|
|
/**
|
|
* @brief Image resizer's error-diffusion dithering class, interleaved mode.
|
|
*
|
|
* This ditherer implements error-diffusion dithering which looks good, and
|
|
* whose results are compressed by PNG well. This implementation uses
|
|
* weighting coefficients obtained via machine optimization and visual
|
|
* evaluation.
|
|
*
|
|
* @tparam fptype Floating point type to use for storing pixel data. SIMD
|
|
* types can be used.
|
|
*/
|
|
|
|
template <class fptype>
|
|
class CImageResizerDithererErrdINL
|
|
: public CImageResizerDithererDefINL<fptype> {
|
|
public:
|
|
/**
|
|
* Function initializes the ditherer object.
|
|
*
|
|
* @param aLen Scanline length in pixels to process.
|
|
* @param aVars Image resizing-related variables.
|
|
* @param aTrMul Bit-depth truncation multiplier. 1 - no additional
|
|
* truncation.
|
|
* @param aPkOut Peak output value allowed.
|
|
*/
|
|
|
|
void init(const int aLen, const CImageResizerVars& aVars, const double aTrMul,
|
|
const double aPkOut) {
|
|
CImageResizerDithererDefINL<fptype>::init(aLen, aVars, aTrMul, aPkOut);
|
|
|
|
ResScanlineDith0.alloc(LenE + Vars->ElCount, sizeof(fptype));
|
|
ResScanlineDith = ResScanlineDith0 + Vars->ElCount;
|
|
int i;
|
|
|
|
for (i = 0; i < LenE + Vars->ElCount; i++) {
|
|
ResScanlineDith0[i] = 0.0;
|
|
}
|
|
}
|
|
|
|
static bool isRecursive() { return (true); }
|
|
|
|
void dither(fptype* const ResScanline) {
|
|
const int ElCount = Vars->ElCount;
|
|
const fptype c0 = 0.0;
|
|
const fptype TrMul = (fptype)TrMul0;
|
|
const fptype PkOut = (fptype)PkOut0;
|
|
int j;
|
|
|
|
for (j = 0; j < LenE; j++) {
|
|
ResScanline[j] += ResScanlineDith[j];
|
|
ResScanlineDith[j] = 0.0;
|
|
}
|
|
|
|
for (j = 0; j < LenE - ElCount; j++) {
|
|
// Perform rounding, noise estimation and saturation.
|
|
|
|
const fptype z0 = round(ResScanline[j] / TrMul) * TrMul;
|
|
const fptype Noise = ResScanline[j] - z0;
|
|
ResScanline[j] = clamp(z0, c0, PkOut);
|
|
|
|
ResScanline[j + ElCount] += Noise * (fptype)0.364842;
|
|
ResScanlineDith[j - ElCount] += Noise * (fptype)0.207305;
|
|
ResScanlineDith[j] += Noise * (fptype)0.364842;
|
|
ResScanlineDith[j + ElCount] += Noise * (fptype)0.063011;
|
|
}
|
|
|
|
while (j < LenE) {
|
|
const fptype z0 = round(ResScanline[j] / TrMul) * TrMul;
|
|
const fptype Noise = ResScanline[j] - z0;
|
|
ResScanline[j] = clamp(z0, c0, PkOut);
|
|
|
|
ResScanlineDith[j - ElCount] += Noise * (fptype)0.207305;
|
|
ResScanlineDith[j] += Noise * (fptype)0.364842;
|
|
j++;
|
|
}
|
|
}
|
|
|
|
protected:
|
|
using CImageResizerDithererDefINL<fptype>::Len;
|
|
using CImageResizerDithererDefINL<fptype>::Vars;
|
|
using CImageResizerDithererDefINL<fptype>::LenE;
|
|
using CImageResizerDithererDefINL<fptype>::TrMul0;
|
|
using CImageResizerDithererDefINL<fptype>::PkOut0;
|
|
|
|
CBuffer<fptype> ResScanlineDith0; ///< Error diffusion buffer.
|
|
///<
|
|
fptype* ResScanlineDith; ///< Error diffusion buffer pointer which skips
|
|
///< the first ElCount elements.
|
|
///<
|
|
};
|
|
|
|
/**
|
|
* @brief Floating-point processing definition and abstraction class.
|
|
*
|
|
* This class defines several constants and typedefs that point to classes
|
|
* that should be used by the image resizing algorithm. Such "definition
|
|
* class" can be used to define alternative scanline processing algorithms
|
|
* (e.g. SIMD) and image scanline packing styles used during processing. This
|
|
* class also offers an abstraction layer for dithering, rounding and
|
|
* clamping (saturation) operation.
|
|
*
|
|
* The fpclass_def class can be used to define processing using both SIMD and
|
|
* non-SIMD types, but using algorithms that are operate on interleaved pixels
|
|
* and non-SIMD optimized themselves.
|
|
*
|
|
* @tparam afptype Floating point type to use for storing intermediate data
|
|
* and variables. For variables that are not used in intensive calculations
|
|
* the "double" type is always used. On the latest Intel processors (like
|
|
* i7-4770K) there is almost no performance difference between "double" and
|
|
* "float". Image quality differences between "double" and "float" are not
|
|
* apparent on 8-bit images. At the same time the "float" uses half amount of
|
|
* working memory the "double" type uses. SIMD types can be used. The
|
|
* functions round() and clamp() in the "avir" or other visible namespace
|
|
* should be available for the specified type. SIMD types allow to perform
|
|
* resizing of images with more than 4 channels, to be exact 4 * SIMD element
|
|
* number (e.g. 16 for float4), without modification of the image resizing
|
|
* algorithm required.
|
|
* @tparam afptypeatom The atomic type the "afptype" consists of.
|
|
* @tparam adith Ditherer class to use during processing.
|
|
*/
|
|
|
|
template <class afptype, class afptypeatom = afptype,
|
|
class adith = CImageResizerDithererDefINL<afptype> >
|
|
class fpclass_def {
|
|
public:
|
|
typedef afptype fptype; ///< Floating-point type to use during processing.
|
|
///<
|
|
typedef afptypeatom fptypeatom; ///< Atomic type "fptype" consists of.
|
|
///<
|
|
static const int fppack =
|
|
sizeof(fptype) /
|
|
sizeof(fptypeatom); ///<
|
|
///< The number of atomic types stored in a single
|
|
///< "fptype" element.
|
|
///<
|
|
static const int fpalign =
|
|
sizeof(fptype); ///< Suggested alignment size
|
|
///< in bytes. This is not a required alignment, because
|
|
///< image resizing algorithm cannot be made to have a
|
|
///< strictly aligned data access at all steps (e.g.
|
|
///< interpolation cannot perform aligned accesses).
|
|
///<
|
|
static const int elalign =
|
|
1; ///< Length alignment of arrays of elements.
|
|
///< This applies to filters and intermediate buffers: this constant
|
|
///< forces filters and scanlines to have a length which is a multiple
|
|
///< of this value, for more efficient SIMD implementation.
|
|
///<
|
|
static const int packmode = 0; ///< 0 if interleaved packing, 1 if
|
|
///< de-interleaved.
|
|
///<
|
|
typedef CImageResizerFilterStepINL<fptype, fptypeatom>
|
|
CFilterStep; ///<
|
|
///< Filtering step class to use during processing.
|
|
///<
|
|
typedef adith CDitherer; ///< Ditherer class to use during processing.
|
|
///<
|
|
};
|
|
|
|
/**
|
|
* @brief Image resizer class.
|
|
*
|
|
* The object of this class can be used to resize 1-4 channel images to any
|
|
* required size. Resizing is performed by utilizing interpolated sinc
|
|
* fractional delay filters plus (if necessary) a cascade of built-in
|
|
* sinc function-based 2X upsampling or 2X downsampling stages, followed by a
|
|
* correction filtering.
|
|
*
|
|
* Object of this class can be allocated on stack.
|
|
*
|
|
* @tparam fpclass Floating-point processing definition class to use. See
|
|
* avir::fpclass_def for more details.
|
|
*/
|
|
|
|
template <class fpclass = fpclass_def<float> >
|
|
class CImageResizer {
|
|
public:
|
|
/**
|
|
* Constructor initializes the resizer.
|
|
*
|
|
* @param aResBitDepth Required bit depth of resulting image (1-16). If
|
|
* integer value output is used (e.g. uint8_t), the bit depth also affects
|
|
* rounding: for example, if aResBitDepth=6 and "Tout" is uint8_t, the
|
|
* result will be rounded to 6 most significant bits (2 least significant
|
|
* bits truncated, with dithering applied).
|
|
* @param aSrcBitDepth Source image's real bit-depth. Set to 0 to use
|
|
* aResBitDepth.
|
|
* @param aParams Resizing algorithm's parameters to use. Leave out for
|
|
* default values. Can be useful when performing automatic optimization of
|
|
* parameters.
|
|
*/
|
|
|
|
CImageResizer(const int aResBitDepth = 8, const int aSrcBitDepth = 0,
|
|
const CImageResizerParams& aParams = CImageResizerParamsDef())
|
|
: Params(aParams), ResBitDepth(aResBitDepth) {
|
|
SrcBitDepth = (aSrcBitDepth == 0 ? ResBitDepth : aSrcBitDepth);
|
|
|
|
initFilterBank(FixedFilterBank, 1.0, false, CFltBuffer());
|
|
FixedFilterBank.createAllFilters();
|
|
}
|
|
|
|
/**
|
|
* Function resizes image.
|
|
*
|
|
* @param SrcBuf Source image buffer.
|
|
* @param SrcWidth Source image width.
|
|
* @param SrcHeight Source image height.
|
|
* @param SrcScanlineSize Physical size of source scanline in elements
|
|
* (not bytes). If this value is below 1, SrcWidth * ElCountIO will be
|
|
* used as the physical source scanline size.
|
|
* @param[out] NewBuf Buffer to accept the resized image. Can be equal to
|
|
* SrcBuf if the size of the resized image is smaller or equal to source
|
|
* image in size.
|
|
* @param NewWidth New image width.
|
|
* @param NewHeight New image height.
|
|
* @param ElCountIO The number of elements (channels) used to store each
|
|
* source and destination pixel (1-4).
|
|
* @param k Resizing step (one output pixel corresponds to "k" input
|
|
* pixels). A downsizing factor if > 1.0; upsizing factor if <= 1.0.
|
|
* Multiply by -1 if you would like to bypass "ox" and "oy" adjustment
|
|
* which is done by default to produce a centered image. If step value
|
|
* equals 0, the step value will be chosen automatically and independently
|
|
* for horizontal and vertical resizing.
|
|
* @param[in,out] aVars Pointer to variables structure to be passed to the
|
|
* image resizing function. Can be NULL. Only variables that are
|
|
* initialized in default constructor of this structure are accepted by
|
|
* this function. These variables will not be changed by this function.
|
|
* All other variables can be modified by this function. The access to
|
|
* this object is not thread-safe, each concurrent instance of this
|
|
* function should use a separate aVars object.
|
|
* @tparam Tin Input buffer element's type. Can be uint8_t (0-255 value
|
|
* range), uint16_t (0-65535 value range), float (0.0-1.0 value range),
|
|
* double (0.0-1.0 value range). Larger integer types are treated as
|
|
* uint16_t. Signed integer types are unsupported.
|
|
* @tparam Tout Output buffer element's type. Can be uint8_t (0-255 value
|
|
* range), uint16_t (0-65535 value range), float (0.0-1.0 value range),
|
|
* double (0.0-1.0 value range). Larger integer types are treated as
|
|
* uint16_t. Signed integer types are unsupported.
|
|
*/
|
|
|
|
template <class Tin, class Tout>
|
|
void resizeImage(const Tin* const SrcBuf, const int SrcWidth,
|
|
const int SrcHeight, int SrcScanlineSize, Tout* const NewBuf,
|
|
const int NewWidth, const int NewHeight, const int ElCountIO,
|
|
const double k,
|
|
CImageResizerVars* const aVars = NULL) const {
|
|
if (SrcWidth == 0 || SrcHeight == 0) {
|
|
memset(NewBuf, 0, (size_t)NewWidth * NewHeight * sizeof(Tout));
|
|
|
|
return;
|
|
} else if (NewWidth == 0 || NewHeight == 0) {
|
|
return;
|
|
}
|
|
|
|
CImageResizerVars DefVars;
|
|
CImageResizerVars& Vars = (aVars == NULL ? DefVars : *aVars);
|
|
|
|
CImageResizerThreadPool DefThreadPool;
|
|
CImageResizerThreadPool& ThreadPool =
|
|
(Vars.ThreadPool == NULL ? DefThreadPool : *Vars.ThreadPool);
|
|
|
|
// Define resizing steps, also optionally modify offsets so that
|
|
// resizing produces a "centered" image.
|
|
|
|
double kx;
|
|
double ky;
|
|
double ox = Vars.ox;
|
|
double oy = Vars.oy;
|
|
|
|
if (k == 0.0) {
|
|
if (NewWidth > SrcWidth) {
|
|
kx = (double)(SrcWidth - 1) / (NewWidth - 1);
|
|
} else {
|
|
kx = (double)SrcWidth / NewWidth;
|
|
ox += (kx - 1.0) * 0.5;
|
|
}
|
|
|
|
if (NewHeight > SrcHeight) {
|
|
ky = (double)(SrcHeight - 1) / (NewHeight - 1);
|
|
} else {
|
|
ky = (double)SrcHeight / NewHeight;
|
|
oy += (ky - 1.0) * 0.5;
|
|
}
|
|
} else if (k > 0.0) {
|
|
kx = k;
|
|
ky = k;
|
|
|
|
if (k > 1.0) {
|
|
const double ko = (k - 1.0) * 0.5;
|
|
ox += ko;
|
|
oy += ko;
|
|
}
|
|
} else {
|
|
kx = -k;
|
|
ky = -k;
|
|
}
|
|
|
|
// Evaluate pre-multipliers used on the output stage.
|
|
|
|
const bool IsInFloat = ((Tin)0.4 != 0);
|
|
const bool IsOutFloat = ((Tout)0.4 != 0);
|
|
double OutMul; // Output multiplier.
|
|
|
|
if (Vars.UseSRGBGamma) {
|
|
if (IsInFloat) {
|
|
Vars.InGammaMult = 1.0;
|
|
} else {
|
|
Vars.InGammaMult = 1.0 / (sizeof(Tin) == 1 ? 255.0 : 65535.0);
|
|
}
|
|
|
|
if (IsOutFloat) {
|
|
Vars.OutGammaMult = 1.0;
|
|
} else {
|
|
Vars.OutGammaMult = (sizeof(Tout) == 1 ? 255.0 : 65535.0);
|
|
}
|
|
|
|
OutMul = 1.0;
|
|
} else {
|
|
if (IsOutFloat) {
|
|
OutMul = 1.0;
|
|
} else {
|
|
OutMul = (sizeof(Tout) == 1 ? 255.0 : 65535.0);
|
|
}
|
|
|
|
if (!IsInFloat) {
|
|
OutMul /= (sizeof(Tin) == 1 ? 255.0 : 65535.0);
|
|
}
|
|
}
|
|
|
|
// Fill widely-used variables.
|
|
|
|
const int ElCount = (ElCountIO + fpclass ::fppack - 1) / fpclass ::fppack;
|
|
|
|
const int NewWidthE = NewWidth * ElCount;
|
|
|
|
if (SrcScanlineSize < 1) {
|
|
SrcScanlineSize = SrcWidth * ElCountIO;
|
|
}
|
|
|
|
Vars.ElCount = ElCount;
|
|
Vars.ElCountIO = ElCountIO;
|
|
Vars.fppack = fpclass ::fppack;
|
|
Vars.fpalign = fpclass ::fpalign;
|
|
Vars.elalign = fpclass ::elalign;
|
|
Vars.packmode = fpclass ::packmode;
|
|
|
|
// Horizontal scanline filtering and resizing.
|
|
|
|
CDSPFracFilterBankLin<fptype> FltBank;
|
|
CFilterSteps FltSteps;
|
|
typename CFilterStep ::CRPosBufArray RPosBufArray;
|
|
CBuffer<uint8_t> UsedFracMap;
|
|
|
|
// Perform the filtering steps modeling at various modes, find the
|
|
// most efficient mode for both horizontal and vertical resizing.
|
|
|
|
int UseBuildMode = 1;
|
|
const int BuildModeCount = (FixedFilterBank.getOrder() == 0 ? 4 : 2);
|
|
|
|
int m;
|
|
|
|
if (Vars.BuildMode >= 0) {
|
|
UseBuildMode = Vars.BuildMode;
|
|
} else {
|
|
int BestScore = 0x7FFFFFFF;
|
|
|
|
for (m = 0; m < BuildModeCount; m++) {
|
|
CDSPFracFilterBankLin<fptype> TmpBank;
|
|
CFilterSteps TmpSteps;
|
|
Vars.k = kx;
|
|
Vars.o = ox;
|
|
buildFilterSteps(TmpSteps, Vars, TmpBank, OutMul, m, true);
|
|
updateFilterStepBuffers(TmpSteps, Vars, RPosBufArray, SrcWidth,
|
|
NewWidth);
|
|
|
|
fillUsedFracMap(TmpSteps[Vars.ResizeStep], UsedFracMap);
|
|
const int c = calcComplexity(TmpSteps, Vars, UsedFracMap, SrcHeight);
|
|
|
|
if (c < BestScore) {
|
|
UseBuildMode = m;
|
|
BestScore = c;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Perform the actual filtering steps building.
|
|
|
|
Vars.k = kx;
|
|
Vars.o = ox;
|
|
buildFilterSteps(FltSteps, Vars, FltBank, OutMul, UseBuildMode, false);
|
|
|
|
updateFilterStepBuffers(FltSteps, Vars, RPosBufArray, SrcWidth, NewWidth);
|
|
|
|
updateBufLenAndRPosPtrs(FltSteps, Vars, NewWidth);
|
|
|
|
const int ThreadCount = ThreadPool.getSuggestedWorkloadCount();
|
|
// Includes the current thread.
|
|
|
|
CStructArray<CThreadData<Tin, Tout> > td;
|
|
td.setItemCount(ThreadCount);
|
|
int i;
|
|
|
|
for (i = 0; i < ThreadCount; i++) {
|
|
if (i > 0) {
|
|
ThreadPool.addWorkload(&td[i]);
|
|
}
|
|
|
|
td[i].init(i, ThreadCount, FltSteps, Vars);
|
|
|
|
td[i].initScanlineQueue(td[i].sopResizeH, SrcHeight, SrcWidth);
|
|
}
|
|
|
|
CBuffer<fptype, size_t> FltBuf(
|
|
(size_t)NewWidthE * SrcHeight,
|
|
fpclass ::fpalign); // Temporary buffer that receives
|
|
// horizontally-filtered and resized image.
|
|
|
|
for (i = 0; i < SrcHeight; i++) {
|
|
td[i % ThreadCount].addScanlineToQueue(
|
|
(void*)&SrcBuf[(size_t)i * SrcScanlineSize],
|
|
&FltBuf[(size_t)i * NewWidthE]);
|
|
}
|
|
|
|
ThreadPool.startAllWorkloads();
|
|
td[0].processScanlineQueue();
|
|
ThreadPool.waitAllWorkloadsToFinish();
|
|
|
|
// Vertical scanline filtering and resizing, reuse previously defined
|
|
// filtering steps if possible.
|
|
|
|
const int PrevUseBuildMode = UseBuildMode;
|
|
|
|
if (Vars.BuildMode >= 0) {
|
|
UseBuildMode = Vars.BuildMode;
|
|
} else {
|
|
CImageResizerVars TmpVars(Vars);
|
|
int BestScore = 0x7FFFFFFF;
|
|
|
|
for (m = 0; m < BuildModeCount; m++) {
|
|
CDSPFracFilterBankLin<fptype> TmpBank;
|
|
TmpBank.copyInitParams(FltBank);
|
|
CFilterSteps TmpSteps;
|
|
TmpVars.k = ky;
|
|
TmpVars.o = oy;
|
|
buildFilterSteps(TmpSteps, TmpVars, TmpBank, 1.0, m, true);
|
|
updateFilterStepBuffers(TmpSteps, TmpVars, RPosBufArray, SrcHeight,
|
|
NewHeight);
|
|
|
|
fillUsedFracMap(TmpSteps[TmpVars.ResizeStep], UsedFracMap);
|
|
|
|
const int c = calcComplexity(TmpSteps, TmpVars, UsedFracMap, NewWidth);
|
|
|
|
if (c < BestScore) {
|
|
UseBuildMode = m;
|
|
BestScore = c;
|
|
}
|
|
}
|
|
}
|
|
|
|
Vars.k = ky;
|
|
Vars.o = oy;
|
|
|
|
if (UseBuildMode == PrevUseBuildMode && ky == kx) {
|
|
if (OutMul != 1.0) {
|
|
modifyCorrFilterDCGain(FltSteps, 1.0 / OutMul);
|
|
}
|
|
} else {
|
|
buildFilterSteps(FltSteps, Vars, FltBank, 1.0, UseBuildMode, false);
|
|
}
|
|
|
|
updateFilterStepBuffers(FltSteps, Vars, RPosBufArray, SrcHeight, NewHeight);
|
|
|
|
updateBufLenAndRPosPtrs(FltSteps, Vars, NewWidth);
|
|
|
|
if (IsOutFloat && sizeof(FltBuf[0]) == sizeof(Tout) &&
|
|
fpclass ::packmode == 0) {
|
|
// In-place output.
|
|
|
|
for (i = 0; i < ThreadCount; i++) {
|
|
td[i].initScanlineQueue(td[i].sopResizeV, NewWidth, SrcHeight,
|
|
NewWidthE, NewWidthE);
|
|
}
|
|
|
|
for (i = 0; i < NewWidth; i++) {
|
|
td[i % ThreadCount].addScanlineToQueue(
|
|
&FltBuf[(size_t)i * ElCount],
|
|
(fptype*)&NewBuf[(size_t)i * ElCount]);
|
|
}
|
|
|
|
ThreadPool.startAllWorkloads();
|
|
td[0].processScanlineQueue();
|
|
ThreadPool.waitAllWorkloadsToFinish();
|
|
ThreadPool.removeAllWorkloads();
|
|
|
|
return;
|
|
}
|
|
|
|
CBuffer<fptype, size_t> ResBuf((size_t)NewWidthE * NewHeight,
|
|
fpclass ::fpalign);
|
|
|
|
for (i = 0; i < ThreadCount; i++) {
|
|
td[i].initScanlineQueue(td[i].sopResizeV, NewWidth, SrcHeight, NewWidthE,
|
|
NewWidthE);
|
|
}
|
|
|
|
const int im = (fpclass ::packmode == 0 ? ElCount : 1);
|
|
|
|
for (i = 0; i < NewWidth; i++) {
|
|
td[i % ThreadCount].addScanlineToQueue(&FltBuf[(size_t)i * im],
|
|
&ResBuf[(size_t)i * im]);
|
|
}
|
|
|
|
ThreadPool.startAllWorkloads();
|
|
td[0].processScanlineQueue();
|
|
ThreadPool.waitAllWorkloadsToFinish();
|
|
|
|
if (IsOutFloat) {
|
|
// Perform output, but skip dithering.
|
|
|
|
for (i = 0; i < ThreadCount; i++) {
|
|
td[i].initScanlineQueue(td[i].sopUnpackH, NewHeight, NewWidth);
|
|
}
|
|
|
|
for (i = 0; i < NewHeight; i++) {
|
|
td[i % ThreadCount].addScanlineToQueue(
|
|
&ResBuf[(size_t)i * NewWidthE],
|
|
&NewBuf[(size_t)i * NewWidth * ElCountIO]);
|
|
}
|
|
|
|
ThreadPool.startAllWorkloads();
|
|
td[0].processScanlineQueue();
|
|
ThreadPool.waitAllWorkloadsToFinish();
|
|
ThreadPool.removeAllWorkloads();
|
|
|
|
return;
|
|
}
|
|
|
|
// Perform output with dithering (for integer output only).
|
|
|
|
int TruncBits; // The number of lower bits to truncate and dither.
|
|
int OutRange; // Output range.
|
|
|
|
if (sizeof(Tout) == 1) {
|
|
TruncBits = 8 - ResBitDepth;
|
|
OutRange = 255;
|
|
} else {
|
|
TruncBits = 16 - ResBitDepth;
|
|
OutRange = 65535;
|
|
}
|
|
|
|
const double PkOut = OutRange;
|
|
const double TrMul =
|
|
(TruncBits > 0 ? PkOut / (OutRange >> TruncBits) : 1.0);
|
|
|
|
if (CDitherer ::isRecursive()) {
|
|
td[0].getDitherer().init(NewWidth, Vars, TrMul, PkOut);
|
|
|
|
if (Vars.UseSRGBGamma) {
|
|
for (i = 0; i < NewHeight; i++) {
|
|
fptype* const ResScanline = &ResBuf[(size_t)i * NewWidthE];
|
|
|
|
CFilterStep ::applySRGBGamma(ResScanline, NewWidth, Vars);
|
|
|
|
td[0].getDitherer().dither(ResScanline);
|
|
|
|
CFilterStep ::unpackScanline(
|
|
ResScanline, &NewBuf[(size_t)i * NewWidth * ElCountIO], NewWidth,
|
|
Vars);
|
|
}
|
|
} else {
|
|
for (i = 0; i < NewHeight; i++) {
|
|
fptype* const ResScanline = &ResBuf[(size_t)i * NewWidthE];
|
|
|
|
td[0].getDitherer().dither(ResScanline);
|
|
|
|
CFilterStep ::unpackScanline(
|
|
ResScanline, &NewBuf[(size_t)i * NewWidth * ElCountIO], NewWidth,
|
|
Vars);
|
|
}
|
|
}
|
|
} else {
|
|
for (i = 0; i < ThreadCount; i++) {
|
|
td[i].initScanlineQueue(td[i].sopDitherAndUnpackH, NewHeight, NewWidth);
|
|
|
|
td[i].getDitherer().init(NewWidth, Vars, TrMul, PkOut);
|
|
}
|
|
|
|
for (i = 0; i < NewHeight; i++) {
|
|
td[i % ThreadCount].addScanlineToQueue(
|
|
&ResBuf[(size_t)i * NewWidthE],
|
|
&NewBuf[(size_t)i * NewWidth * ElCountIO]);
|
|
}
|
|
|
|
ThreadPool.startAllWorkloads();
|
|
td[0].processScanlineQueue();
|
|
ThreadPool.waitAllWorkloadsToFinish();
|
|
}
|
|
|
|
ThreadPool.removeAllWorkloads();
|
|
}
|
|
|
|
private:
|
|
typedef typename fpclass ::fptype fptype; ///< Floating-point type to use
|
|
///< during processing.
|
|
///<
|
|
typedef typename fpclass ::CFilterStep
|
|
CFilterStep; ///< Filtering step
|
|
///< class to use during processing.
|
|
///<
|
|
typedef typename fpclass ::CDitherer CDitherer; ///< Ditherer class to
|
|
///< use during processing.
|
|
///<
|
|
CImageResizerParams Params; ///< Algorithm's parameters currently in use.
|
|
///<
|
|
int SrcBitDepth; ///< Bit resolution of the source image.
|
|
///<
|
|
int ResBitDepth; ///< Bit resolution of the resulting image.
|
|
///<
|
|
CDSPFracFilterBankLin<fptype>
|
|
FixedFilterBank; ///< Fractional delay
|
|
///< filter bank with fixed characteristics, mainly for
|
|
///< upsizing cases.
|
|
///<
|
|
|
|
/**
|
|
* @brief Filtering steps array.
|
|
*
|
|
* The object of this class stores filtering steps together.
|
|
*/
|
|
|
|
typedef CStructArray<CFilterStep> CFilterSteps;
|
|
|
|
/**
|
|
* Function initializes the filter bank in the specified resizing step
|
|
* according to the source and resulting image bit depths.
|
|
*
|
|
* @param FltBank Filter bank to initialize.
|
|
* @param CutoffMult Cutoff multiplier, 0 to 1. 1 corresponds to 0.5pi
|
|
* cutoff point.
|
|
* @param ForceHiOrder "True" if a high-order interpolation should be
|
|
* forced which requires considerably less resources for initialization.
|
|
* @param ExtFilter External filter to apply to interpolation filter.
|
|
*/
|
|
|
|
void initFilterBank(CDSPFracFilterBankLin<fptype>& FltBank,
|
|
const double CutoffMult, const bool ForceHiOrder,
|
|
const CFltBuffer& ExtFilter) const {
|
|
const int IntBitDepth =
|
|
(ResBitDepth > SrcBitDepth ? ResBitDepth : SrcBitDepth);
|
|
|
|
const double SNR = -6.02 * (IntBitDepth + 3);
|
|
int UseOrder;
|
|
int FracCount; // The number of fractional delay filters sampled by
|
|
// the filter bank. This variable affects the
|
|
// signal-to-noise ratio at interpolation stage.
|
|
// Theoretically, at UseOrder==1, 8-bit image resizing
|
|
// requires 66.2 dB SNR or 11. 16-bit resizing requires
|
|
// 114.4 dB SNR or 150. At UseOrder=0 the required number of
|
|
// filters is exponentially higher.
|
|
|
|
if (ForceHiOrder || IntBitDepth > 8) {
|
|
UseOrder = 1; // -146 dB max
|
|
FracCount = (int)ceil(0.23134052 * exp(-0.058062929 * SNR));
|
|
} else {
|
|
UseOrder = 0; // -72 dB max
|
|
FracCount = (int)ceil(0.33287686 * exp(-0.11334583 * SNR));
|
|
}
|
|
|
|
if (FracCount < 2) {
|
|
FracCount = 2;
|
|
}
|
|
|
|
FltBank.init(FracCount, UseOrder, Params.IntFltLen / CutoffMult,
|
|
Params.IntFltCutoff * CutoffMult, Params.IntFltAlpha,
|
|
ExtFilter, fpclass ::fpalign, fpclass ::elalign);
|
|
}
|
|
|
|
/**
|
|
* Function allocates filter buffer taking "fpclass" alignments into
|
|
* account. The allocated buffer may be larger than the requested size: in
|
|
* this case the additional elements will be zeroed by this function.
|
|
*
|
|
* @param Flt Filter buffer.
|
|
* @param ReqCapacity The required filter buffer's capacity.
|
|
* @param IsModel "True" if filtering steps modeling is performed without
|
|
* actual filter allocation.
|
|
* @param FltExt If non-NULL this variable will receive the number of
|
|
* elements the filter was extended by.
|
|
*/
|
|
|
|
static void allocFilter(CBuffer<fptype>& Flt, const int ReqCapacity,
|
|
const bool IsModel = false,
|
|
int* const FltExt = NULL) {
|
|
int UseCapacity =
|
|
(ReqCapacity + fpclass ::elalign - 1) & ~(fpclass ::elalign - 1);
|
|
|
|
int Ext = UseCapacity - ReqCapacity;
|
|
|
|
if (FltExt != NULL) {
|
|
*FltExt = Ext;
|
|
}
|
|
|
|
if (IsModel) {
|
|
Flt.forceCapacity(UseCapacity);
|
|
return;
|
|
}
|
|
|
|
Flt.alloc(UseCapacity, fpclass ::fpalign);
|
|
|
|
while (Ext > 0) {
|
|
Ext--;
|
|
Flt[ReqCapacity + Ext] = 0.0;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function assigns filter parameters to the specified filtering step
|
|
* object.
|
|
*
|
|
* @param fs Filtering step to assign parameter to. This step cannot be
|
|
* the last step if ResampleFactor greater than 1 was specified.
|
|
* @param IsUpsample "True" if upsampling step. Should be set to "false"
|
|
* if FltCutoff is negative.
|
|
* @param ResampleFactor Resampling factor of this filter (>=1).
|
|
* @param FltCutoff Filter cutoff point. This value will be divided by the
|
|
* ResampleFactor if IsUpsample equals "true". If zero value was
|
|
* specified, the "half-band" predefined filter will be created. In this
|
|
* case the ResampleFactor will modify the filter cutoff point.
|
|
* @param DCGain DC gain to apply to the filter. Assigned to filtering
|
|
* step's DCGain variable.
|
|
* @param UseFltOrig "True" if the originally-designed filter should be
|
|
* left in filtering step's FltOrig buffer. Otherwise it will be freed.
|
|
* @param IsModel "True" if filtering steps modeling is performed without
|
|
* actual filter building.
|
|
*/
|
|
|
|
void assignFilterParams(CFilterStep& fs, const bool IsUpsample,
|
|
const int ResampleFactor, const double FltCutoff,
|
|
const double DCGain, const bool UseFltOrig,
|
|
const bool IsModel) const {
|
|
double FltAlpha;
|
|
double Len2;
|
|
double Freq;
|
|
|
|
if (FltCutoff == 0.0) {
|
|
const double m = 2.0 / ResampleFactor;
|
|
FltAlpha = Params.HBFltAlpha;
|
|
Len2 = 0.5 * Params.HBFltLen / m;
|
|
Freq = AVIR_PI * Params.HBFltCutoff * m;
|
|
} else {
|
|
FltAlpha = Params.LPFltAlpha;
|
|
Len2 = 0.25 * Params.LPFltBaseLen / FltCutoff;
|
|
Freq = AVIR_PI * Params.LPFltCutoffMult * FltCutoff;
|
|
}
|
|
|
|
if (IsUpsample) {
|
|
Len2 *= ResampleFactor;
|
|
Freq /= ResampleFactor;
|
|
fs.DCGain = DCGain * ResampleFactor;
|
|
} else {
|
|
fs.DCGain = DCGain;
|
|
}
|
|
|
|
fs.FltOrig.Len2 = Len2;
|
|
fs.FltOrig.Freq = Freq;
|
|
fs.FltOrig.Alpha = FltAlpha;
|
|
fs.FltOrig.DCGain = fs.DCGain;
|
|
|
|
CDSPPeakedCosineLPF w(Len2, Freq, FltAlpha);
|
|
|
|
fs.IsUpsample = IsUpsample;
|
|
fs.ResampleFactor = ResampleFactor;
|
|
fs.FltLatency = w.fl2;
|
|
|
|
int FltExt; // Filter's extension due to fpclass :: elalign.
|
|
|
|
if (IsModel) {
|
|
allocFilter(fs.Flt, w.FilterLen, true, &FltExt);
|
|
|
|
if (UseFltOrig) {
|
|
// Allocate a real buffer even in modeling mode since this
|
|
// filter may be copied by the filter bank.
|
|
|
|
fs.FltOrig.alloc(w.FilterLen);
|
|
memset(&fs.FltOrig[0], 0, w.FilterLen * sizeof(fs.FltOrig[0]));
|
|
}
|
|
} else {
|
|
fs.FltOrig.alloc(w.FilterLen);
|
|
|
|
w.generateLPF(&fs.FltOrig[0], 1.0);
|
|
optimizeFIRFilter(fs.FltOrig, fs.FltLatency);
|
|
normalizeFIRFilter(&fs.FltOrig[0], fs.FltOrig.getCapacity(), fs.DCGain);
|
|
|
|
allocFilter(fs.Flt, fs.FltOrig.getCapacity(), false, &FltExt);
|
|
copyArray(&fs.FltOrig[0], &fs.Flt[0], fs.FltOrig.getCapacity());
|
|
|
|
if (!UseFltOrig) {
|
|
fs.FltOrig.free();
|
|
}
|
|
}
|
|
|
|
if (IsUpsample) {
|
|
int l = fs.Flt.getCapacity() - fs.FltLatency - ResampleFactor - FltExt;
|
|
|
|
allocFilter(fs.PrefixDC, l, IsModel);
|
|
allocFilter(fs.SuffixDC, fs.FltLatency, IsModel);
|
|
|
|
if (IsModel) {
|
|
return;
|
|
}
|
|
|
|
// Create prefix and suffix "tails" used during upsampling.
|
|
|
|
const fptype* ip = &fs.Flt[fs.FltLatency + ResampleFactor];
|
|
copyArray(ip, &fs.PrefixDC[0], l);
|
|
|
|
while (true) {
|
|
ip += ResampleFactor;
|
|
l -= ResampleFactor;
|
|
|
|
if (l <= 0) {
|
|
break;
|
|
}
|
|
|
|
addArray(ip, &fs.PrefixDC[0], l);
|
|
}
|
|
|
|
l = fs.FltLatency;
|
|
fptype* op = &fs.SuffixDC[0];
|
|
copyArray(&fs.Flt[0], op, l);
|
|
|
|
while (true) {
|
|
op += ResampleFactor;
|
|
l -= ResampleFactor;
|
|
|
|
if (l <= 0) {
|
|
break;
|
|
}
|
|
|
|
addArray(&fs.Flt[0], op, l);
|
|
}
|
|
} else if (!UseFltOrig) {
|
|
fs.EdgePixelCount = fs.EdgePixelCountDef;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function adds a correction filter that tries to achieve a linear
|
|
* frequency response at all frequencies. The actual resulting response
|
|
* may feature a slight damping of the highest frequencies since a
|
|
* suitably short correction filter cannot fix steep high-frequency
|
|
* damping.
|
|
*
|
|
* This function assumes that the resizing step is currently the last
|
|
* step, even if it was not inserted yet: this allows placement of the
|
|
* correction filter both before and after the resizing step.
|
|
*
|
|
* @param Steps Filtering steps.
|
|
* @param bw Resulting bandwidth relative to the original bandwidth (which
|
|
* is 1.0), usually 1/k. Should be <= 1.0.
|
|
* @param IsPreCorrection "True" if the filtering step was already created
|
|
* and it is first in the Steps array. "True" also adds edge pixels to
|
|
* reduce edge artifacts.
|
|
* @param IsModel "True" if filtering steps modeling is performed without
|
|
* actual filter building.
|
|
*/
|
|
|
|
void addCorrectionFilter(CFilterSteps& Steps, const double bw,
|
|
const bool IsPreCorrection,
|
|
const bool IsModel) const {
|
|
CFilterStep& fs = (IsPreCorrection ? Steps[0] : Steps.add());
|
|
fs.IsUpsample = false;
|
|
fs.ResampleFactor = 1;
|
|
fs.DCGain = 1.0;
|
|
fs.EdgePixelCount = (IsPreCorrection ? fs.EdgePixelCountDef : 0);
|
|
|
|
if (IsModel) {
|
|
allocFilter(
|
|
fs.Flt,
|
|
CDSPFIREQ ::calcFilterLength(Params.CorrFltLen, fs.FltLatency), true);
|
|
|
|
return;
|
|
}
|
|
|
|
const int BinCount = 65; // Frequency response bins to control.
|
|
const int BinCount1 = BinCount - 1;
|
|
double curbw = 1.0; // Bandwidth of the filter at the current step.
|
|
int i;
|
|
int j;
|
|
double re;
|
|
double im;
|
|
|
|
CBuffer<double> Bins(BinCount); // Adjustment introduced by all
|
|
// steps at all frequencies of interest.
|
|
|
|
for (j = 0; j < BinCount; j++) {
|
|
Bins[j] = 1.0;
|
|
}
|
|
|
|
const int si = (IsPreCorrection ? 1 : 0);
|
|
|
|
for (i = si; i < Steps.getItemCount() - (si ^ 1); i++) {
|
|
const CFilterStep& fs = Steps[i];
|
|
|
|
if (fs.IsUpsample) {
|
|
curbw *= fs.ResampleFactor;
|
|
|
|
if (fs.FltOrig.getCapacity() > 0) {
|
|
continue;
|
|
}
|
|
}
|
|
|
|
const double dcg = 1.0 / fs.DCGain; // DC gain correction.
|
|
const fptype* Flt;
|
|
int FltLen;
|
|
|
|
if (fs.ResampleFactor == 0) {
|
|
Flt = fs.FltBank->getFilter(0);
|
|
FltLen = fs.FltBank->getFilterLen();
|
|
} else {
|
|
Flt = &fs.Flt[0];
|
|
FltLen = fs.Flt.getCapacity();
|
|
}
|
|
|
|
// Calculate frequency response adjustment introduced by the
|
|
// filter at this step, within the bounds of bandwidth of
|
|
// interest.
|
|
|
|
for (j = 0; j < BinCount; j++) {
|
|
const double th = AVIR_PI * bw / curbw * j / BinCount1;
|
|
|
|
calcFIRFilterResponse(Flt, FltLen, th, re, im);
|
|
|
|
Bins[j] /= sqrt(re * re + im * im) * dcg;
|
|
}
|
|
|
|
if (!fs.IsUpsample && fs.ResampleFactor > 1) {
|
|
curbw /= fs.ResampleFactor;
|
|
}
|
|
}
|
|
|
|
// Calculate filter.
|
|
|
|
CDSPFIREQ EQ;
|
|
EQ.init(bw * 2.0, Params.CorrFltLen, BinCount, 0.0, bw, false,
|
|
Params.CorrFltAlpha);
|
|
|
|
fs.FltLatency = EQ.getFilterLatency();
|
|
|
|
CBuffer<double> Filter(EQ.getFilterLength());
|
|
EQ.buildFilter(Bins, &Filter[0]);
|
|
normalizeFIRFilter(&Filter[0], Filter.getCapacity(), 1.0);
|
|
optimizeFIRFilter(Filter, fs.FltLatency);
|
|
normalizeFIRFilter(&Filter[0], Filter.getCapacity(), 1.0);
|
|
|
|
allocFilter(fs.Flt, Filter.getCapacity());
|
|
copyArray(&Filter[0], &fs.Flt[0], Filter.getCapacity());
|
|
|
|
// Print a theoretically achieved final frequency response at various
|
|
// feature sizes (from DC to 1 pixel). Values above 255 means features
|
|
// become brighter, values below 255 means features become dimmer.
|
|
|
|
/* const double sbw = ( bw > 1.0 ? 1.0 / bw : 1.0 );
|
|
|
|
for( j = 0; j < BinCount; j++ )
|
|
{
|
|
const double th = AVIR_PI * sbw * j / BinCount1;
|
|
|
|
calcFIRFilterResponse( &fs.Flt[ 0 ],
|
|
fs.Flt.getCapacity(), th, re, im );
|
|
|
|
printf( "%f\n", sqrt( re * re + im * im ) / Bins[ j
|
|
] * 255 );
|
|
}
|
|
|
|
printf( "***\n" );*/
|
|
}
|
|
|
|
/**
|
|
* Function adds a sharpening filter if image is being upsized. Such
|
|
* sharpening allows to spot interpolation filter's stop-band attenuation:
|
|
* if attenuation is too weak, a "dark grid" and other artifacts may
|
|
* become visible.
|
|
*
|
|
* It is assumed that 40 decibel stop-band attenuation should be
|
|
* considered a required minimum: this allows application of (deliberately
|
|
* strong) 64X sharpening without spotting any artifacts.
|
|
*
|
|
* @param Steps Filtering steps.
|
|
* @param bw Resulting bandwidth relative to the original bandwidth (which
|
|
* is 1.0), usually 1/k.
|
|
* @param IsModel "True" if filtering steps modeling is performed without
|
|
* actual filter building.
|
|
*/
|
|
|
|
static void addSharpenTest(CFilterSteps& Steps, const double bw,
|
|
const bool IsModel) {
|
|
if (bw <= 1.0) {
|
|
return;
|
|
}
|
|
|
|
const double FltLen = 10.0 * bw;
|
|
|
|
CFilterStep& fs = Steps.add();
|
|
fs.IsUpsample = false;
|
|
fs.ResampleFactor = 1;
|
|
fs.DCGain = 1.0;
|
|
fs.EdgePixelCount = 0;
|
|
|
|
if (IsModel) {
|
|
allocFilter(fs.Flt, CDSPFIREQ ::calcFilterLength(FltLen, fs.FltLatency),
|
|
true);
|
|
|
|
return;
|
|
}
|
|
|
|
const int BinCount = 200;
|
|
CBuffer<double> Bins(BinCount);
|
|
int Thresh = (int)round(BinCount / bw * 1.75);
|
|
|
|
if (Thresh > BinCount) {
|
|
Thresh = BinCount;
|
|
}
|
|
|
|
int j;
|
|
|
|
for (j = 0; j < Thresh; j++) {
|
|
Bins[j] = 1.0;
|
|
}
|
|
|
|
for (j = Thresh; j < BinCount; j++) {
|
|
Bins[j] = 256.0;
|
|
}
|
|
|
|
CDSPFIREQ EQ;
|
|
EQ.init(bw * 2.0, FltLen, BinCount, 0.0, bw, false, 1.7);
|
|
|
|
fs.FltLatency = EQ.getFilterLatency();
|
|
|
|
CBuffer<double> Filter(EQ.getFilterLength());
|
|
EQ.buildFilter(Bins, &Filter[0]);
|
|
normalizeFIRFilter(&Filter[0], Filter.getCapacity(), 1.0);
|
|
optimizeFIRFilter(Filter, fs.FltLatency);
|
|
normalizeFIRFilter(&Filter[0], Filter.getCapacity(), 1.0);
|
|
|
|
allocFilter(fs.Flt, Filter.getCapacity());
|
|
copyArray(&Filter[0], &fs.Flt[0], Filter.getCapacity());
|
|
|
|
/* for( j = 0; j < BinCount; j++ )
|
|
{
|
|
const double th = AVIR_PI * j / ( BinCount - 1 );
|
|
double re;
|
|
double im;
|
|
|
|
calcFIRFilterResponse( &fs.Flt[ 0 ],
|
|
fs.Flt.getCapacity(), th, re, im );
|
|
|
|
printf( "%f\n", sqrt( re * re + im * im ));
|
|
}
|
|
|
|
printf( "***\n" );*/
|
|
}
|
|
|
|
/**
|
|
* Function builds sequence of filtering steps depending on the specified
|
|
* resizing coefficient. The last steps included are always the resizing
|
|
* step then (possibly) the correction step.
|
|
*
|
|
* @param Steps Array that receives filtering steps.
|
|
* @param[out] Vars Variables object.
|
|
* @param FltBank Filter bank to initialize and use.
|
|
* @param DCGain The overall DC gain to apply. This DC gain is applied to
|
|
* the first filtering step only (upsampling or filtering step).
|
|
* @param ModeFlags Build mode flags to use. This is a bitmap of switches
|
|
* that enable or disable certain algorithm features.
|
|
* @param IsModel "True" if filtering steps modeling is performed without
|
|
* the actual filter allocation and building.
|
|
*/
|
|
|
|
void buildFilterSteps(CFilterSteps& Steps, CImageResizerVars& Vars,
|
|
CDSPFracFilterBankLin<fptype>& FltBank,
|
|
const double DCGain, const int ModeFlags,
|
|
const bool IsModel) const {
|
|
Steps.clear();
|
|
|
|
const bool DoFltAndIntCombo =
|
|
((ModeFlags & 1) != 0); // Do filter
|
|
// and interpolator combining.
|
|
const bool ForceHiOrderInt =
|
|
((ModeFlags & 2) != 0); // Force use
|
|
// of a higher-order interpolation.
|
|
const bool UseHalfband = ((ModeFlags & 4) != 0); // Use half-band
|
|
// filter.
|
|
|
|
const double bw = 1.0 / Vars.k; // Resulting bandwidth.
|
|
const int UpsampleFactor = ((int)floor(Vars.k) < 2 ? 2 : 1);
|
|
double IntCutoffMult; // Interpolation filter cutoff multiplier.
|
|
CFilterStep* ReuseStep; // If not NULL, resizing step should use
|
|
// this step object instead of creating a new one.
|
|
CFilterStep* ExtFltStep; // Use FltOrig of this step as the external
|
|
// filter to applied to the interpolator.
|
|
bool IsPreCorrection; // "True" if the correction filter is applied
|
|
// first.
|
|
double FltCutoff; // Cutoff frequency of the first filtering step.
|
|
double corrbw; ///< Bandwidth at the correction step.
|
|
|
|
if (Vars.k <= 1.0) {
|
|
IsPreCorrection = true;
|
|
FltCutoff = 1.0;
|
|
corrbw = 1.0;
|
|
Steps.add();
|
|
} else {
|
|
IsPreCorrection = false;
|
|
FltCutoff = bw;
|
|
corrbw = bw;
|
|
}
|
|
|
|
// Add 1 upsampling or several downsampling filters.
|
|
|
|
if (UpsampleFactor > 1) {
|
|
CFilterStep& fs = Steps.add();
|
|
assignFilterParams(fs, true, UpsampleFactor, FltCutoff, DCGain,
|
|
DoFltAndIntCombo, IsModel);
|
|
|
|
IntCutoffMult = FltCutoff * 2.0 / UpsampleFactor;
|
|
ReuseStep = NULL;
|
|
ExtFltStep = (DoFltAndIntCombo ? &fs : NULL);
|
|
} else {
|
|
int DownsampleFactor;
|
|
|
|
while (true) {
|
|
DownsampleFactor = (int)floor(0.5 / FltCutoff);
|
|
bool DoHBFltAdd;
|
|
|
|
if (DownsampleFactor > 16) {
|
|
// Add half-band filter unconditionally in order to keep
|
|
// filter lengths lower for more precise frequency
|
|
// response and less edge artifacts.
|
|
|
|
DoHBFltAdd = true;
|
|
DownsampleFactor = 16;
|
|
} else {
|
|
DoHBFltAdd = (UseHalfband && DownsampleFactor > 1);
|
|
}
|
|
|
|
if (DoHBFltAdd) {
|
|
assignFilterParams(Steps.add(), false, DownsampleFactor, 0.0, 1.0,
|
|
false, IsModel);
|
|
|
|
FltCutoff *= DownsampleFactor;
|
|
} else {
|
|
if (DownsampleFactor < 1) {
|
|
DownsampleFactor = 1;
|
|
}
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
CFilterStep& fs = Steps.add();
|
|
assignFilterParams(fs, false, DownsampleFactor, FltCutoff, DCGain,
|
|
DoFltAndIntCombo, IsModel);
|
|
|
|
IntCutoffMult = FltCutoff / 0.5;
|
|
|
|
if (DoFltAndIntCombo) {
|
|
ReuseStep = &fs;
|
|
ExtFltStep = &fs;
|
|
} else {
|
|
IntCutoffMult *= DownsampleFactor;
|
|
ReuseStep = NULL;
|
|
ExtFltStep = NULL;
|
|
}
|
|
}
|
|
|
|
// Insert resizing and correction steps.
|
|
|
|
CFilterStep& fs = (ReuseStep == NULL ? Steps.add() : *ReuseStep);
|
|
|
|
Vars.ResizeStep = Steps.getItemCount() - 1;
|
|
fs.IsUpsample = false;
|
|
fs.ResampleFactor = 0;
|
|
fs.DCGain = (ExtFltStep == NULL ? 1.0 : ExtFltStep->DCGain);
|
|
|
|
initFilterBank(FltBank, IntCutoffMult, ForceHiOrderInt,
|
|
(ExtFltStep == NULL ? fs.FltOrig : ExtFltStep->FltOrig));
|
|
|
|
if (FltBank == FixedFilterBank) {
|
|
fs.FltBank = (CDSPFracFilterBankLin<fptype>*)&FixedFilterBank;
|
|
} else {
|
|
fs.FltBank = &FltBank;
|
|
}
|
|
|
|
addCorrectionFilter(Steps, corrbw, IsPreCorrection, IsModel);
|
|
|
|
// addSharpenTest( Steps, bw, IsModel );
|
|
}
|
|
|
|
/**
|
|
* Function extends *this upsampling step so that it produces more
|
|
* upsampled pixels that cover the prefix and suffix needs of the next
|
|
* step. After the call to this function the InPrefix and InSuffix
|
|
* variables of the next step will be set to zero.
|
|
*
|
|
* @param fs Upsampling filtering step.
|
|
* @param NextStep The next step structure.
|
|
*/
|
|
|
|
static void extendUpsample(CFilterStep& fs, CFilterStep& NextStep) {
|
|
fs.InPrefix =
|
|
(NextStep.InPrefix + fs.ResampleFactor - 1) / fs.ResampleFactor;
|
|
|
|
fs.OutPrefix += fs.InPrefix * fs.ResampleFactor;
|
|
NextStep.InPrefix = 0;
|
|
|
|
fs.InSuffix =
|
|
(NextStep.InSuffix + fs.ResampleFactor - 1) / fs.ResampleFactor;
|
|
|
|
fs.OutSuffix += fs.InSuffix * fs.ResampleFactor;
|
|
NextStep.InSuffix = 0;
|
|
}
|
|
|
|
/**
|
|
* Function fills resizing step's RPosBuf array, excluding the actual
|
|
* "ftp" pointers and "SrcOffs" offsets.
|
|
*
|
|
* This array should be cleared if the resizing step or offset were
|
|
* changed. Otherwise this function only fills the elements required to
|
|
* cover resizing step's OutLen.
|
|
*
|
|
* This function is called by the updateFilterStepBuffers() function.
|
|
*
|
|
* @param fs Resizing step.
|
|
* @param Vars Variables object.
|
|
*/
|
|
|
|
static void fillRPosBuf(CFilterStep& fs, const CImageResizerVars& Vars) {
|
|
const int PrevLen = fs.RPosBuf->getCapacity();
|
|
|
|
if (fs.OutLen > PrevLen) {
|
|
fs.RPosBuf->increaseCapacity(fs.OutLen);
|
|
}
|
|
|
|
typename CFilterStep ::CResizePos* rpos = &(*fs.RPosBuf)[PrevLen];
|
|
const int FracCount = fs.FltBank->getFracCount();
|
|
const double o = Vars.o;
|
|
const double k = Vars.k;
|
|
int i;
|
|
|
|
for (i = PrevLen; i < fs.OutLen; i++) {
|
|
const double SrcPos = o + k * i;
|
|
const int SrcPosInt = (int)floor(SrcPos);
|
|
const double x = (SrcPos - SrcPosInt) * FracCount;
|
|
const int fti = (int)x;
|
|
rpos->x = (typename fpclass ::fptypeatom)(x - fti);
|
|
rpos->fti = fti;
|
|
rpos->SrcPosInt = SrcPosInt;
|
|
rpos++;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function updates filtering step buffer lengths depending on the
|
|
* specified source and new scanline lengths. This function should be
|
|
* called after the buildFilterSteps() function.
|
|
*
|
|
* @param Steps Array that receives filtering steps.
|
|
* @param[out] Vars Variables object, will receive buffer size and length.
|
|
* This function expects "k" and "o" variable values that will be
|
|
* adjusted by this function.
|
|
* @param RPosBufArray Resizing position buffers array, used to obtain
|
|
* buffer to initialize and use (will be reused if it is already fully or
|
|
* partially filled).
|
|
* @param SrcLen Source scanline's length in pixels.
|
|
* @param NewLen New scanline's length in pixels.
|
|
*/
|
|
|
|
static void updateFilterStepBuffers(
|
|
CFilterSteps& Steps, CImageResizerVars& Vars,
|
|
typename CFilterStep ::CRPosBufArray& RPosBufArray, int SrcLen,
|
|
const int NewLen) {
|
|
int upstep = -1;
|
|
int InBuf = 0;
|
|
int i;
|
|
|
|
for (i = 0; i < Steps.getItemCount(); i++) {
|
|
CFilterStep& fs = Steps[i];
|
|
|
|
fs.Vars = &Vars;
|
|
fs.InLen = SrcLen;
|
|
fs.InBuf = InBuf;
|
|
fs.OutBuf = (InBuf + 1) & 1;
|
|
|
|
if (fs.IsUpsample) {
|
|
upstep = i;
|
|
Vars.k *= fs.ResampleFactor;
|
|
Vars.o *= fs.ResampleFactor;
|
|
fs.InPrefix = 0;
|
|
fs.InSuffix = 0;
|
|
fs.OutLen = fs.InLen * fs.ResampleFactor;
|
|
fs.OutPrefix = fs.FltLatency;
|
|
fs.OutSuffix = fs.Flt.getCapacity() - fs.FltLatency - fs.ResampleFactor;
|
|
|
|
int l0 = fs.OutPrefix + fs.OutLen + fs.OutSuffix;
|
|
int l = fs.InLen * fs.ResampleFactor + fs.SuffixDC.getCapacity();
|
|
|
|
if (l > l0) {
|
|
fs.OutSuffix += l - l0;
|
|
}
|
|
|
|
l0 = fs.OutLen + fs.OutSuffix;
|
|
|
|
if (fs.PrefixDC.getCapacity() > l0) {
|
|
fs.OutSuffix += fs.PrefixDC.getCapacity() - l0;
|
|
}
|
|
} else if (fs.ResampleFactor == 0) {
|
|
const int FilterLenD2 = fs.FltBank->getFilterLen() / 2;
|
|
const int FilterLenD21 = FilterLenD2 - 1;
|
|
|
|
const int ResizeLPix = (int)floor(Vars.o) - FilterLenD21;
|
|
fs.InPrefix = (ResizeLPix < 0 ? -ResizeLPix : 0);
|
|
const int ResizeRPix =
|
|
(int)floor(Vars.o + (NewLen - 1) * Vars.k) + FilterLenD2 + 1;
|
|
|
|
fs.InSuffix = (ResizeRPix > fs.InLen ? ResizeRPix - fs.InLen : 0);
|
|
|
|
fs.OutLen = NewLen;
|
|
fs.RPosBuf = &RPosBufArray.getRPosBuf(Vars.k, Vars.o,
|
|
fs.FltBank->getFracCount());
|
|
|
|
fillRPosBuf(fs, Vars);
|
|
} else {
|
|
Vars.k /= fs.ResampleFactor;
|
|
Vars.o /= fs.ResampleFactor;
|
|
Vars.o += fs.EdgePixelCount;
|
|
|
|
fs.InPrefix = fs.FltLatency;
|
|
fs.InSuffix = fs.Flt.getCapacity() - fs.FltLatency - 1;
|
|
|
|
// Additionally extend OutLen to produce more precise edge
|
|
// pixels.
|
|
|
|
fs.OutLen = (fs.InLen + fs.ResampleFactor - 1) / fs.ResampleFactor +
|
|
fs.EdgePixelCount;
|
|
|
|
fs.InSuffix += (fs.OutLen - 1) * fs.ResampleFactor + 1 - fs.InLen;
|
|
|
|
fs.InPrefix += fs.EdgePixelCount * fs.ResampleFactor;
|
|
fs.OutLen += fs.EdgePixelCount;
|
|
}
|
|
|
|
InBuf = fs.OutBuf;
|
|
SrcLen = fs.OutLen;
|
|
}
|
|
|
|
Steps[Steps.getItemCount() - 1].OutBuf = 2;
|
|
|
|
if (upstep != -1) {
|
|
extendUpsample(Steps[upstep], Steps[upstep + 1]);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function calculates an optimal intermediate buffer length that will
|
|
* cover all needs of the specified filtering steps. This function should
|
|
* be called after the updateFilterStepBuffers() function.
|
|
*
|
|
* Function also updates resizing step's RPosBuf pointers to the filter
|
|
* bank and SrcOffs values.
|
|
*
|
|
* @param Steps Filtering steps.
|
|
* @param[out] Vars Variables object, will receive buffer size and length.
|
|
* @param ResElIncr Resulting (final) element increment, used to produce
|
|
* de-interleaved result. For horizontal processing this value is equal
|
|
* to last step's OutLen, for vertical processing this value is equal to
|
|
* resulting image's width.
|
|
*/
|
|
|
|
static void updateBufLenAndRPosPtrs(CFilterSteps& Steps,
|
|
CImageResizerVars& Vars,
|
|
const int ResElIncr) {
|
|
int MaxPrefix[2] = {0, 0};
|
|
int MaxLen[2] = {0, 0};
|
|
int i;
|
|
|
|
for (i = 0; i < Steps.getItemCount(); i++) {
|
|
CFilterStep& fs = Steps[i];
|
|
const int ib = fs.InBuf;
|
|
|
|
if (fs.InPrefix > MaxPrefix[ib]) {
|
|
MaxPrefix[ib] = fs.InPrefix;
|
|
}
|
|
|
|
int l = fs.InLen + fs.InSuffix;
|
|
|
|
if (l > MaxLen[ib]) {
|
|
MaxLen[ib] = l;
|
|
}
|
|
|
|
fs.InElIncr = fs.InPrefix + l;
|
|
|
|
if (fs.OutBuf == 2) {
|
|
break;
|
|
}
|
|
|
|
const int ob = fs.OutBuf;
|
|
|
|
if (fs.IsUpsample) {
|
|
if (fs.OutPrefix > MaxPrefix[ob]) {
|
|
MaxPrefix[ob] = fs.OutPrefix;
|
|
}
|
|
|
|
l = fs.OutLen + fs.OutSuffix;
|
|
|
|
if (l > MaxLen[ob]) {
|
|
MaxLen[ob] = l;
|
|
}
|
|
} else {
|
|
if (fs.OutLen > MaxLen[ob]) {
|
|
MaxLen[ob] = fs.OutLen;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Update OutElIncr values of all steps.
|
|
|
|
for (i = 0; i < Steps.getItemCount(); i++) {
|
|
CFilterStep& fs = Steps[i];
|
|
|
|
if (fs.OutBuf == 2) {
|
|
fs.OutElIncr = ResElIncr;
|
|
break;
|
|
}
|
|
|
|
CFilterStep& fs2 = Steps[i + 1];
|
|
|
|
if (fs.IsUpsample) {
|
|
fs.OutElIncr = fs.OutPrefix + fs.OutLen + fs.OutSuffix;
|
|
|
|
if (fs.OutElIncr > fs2.InElIncr) {
|
|
fs2.InElIncr = fs.OutElIncr;
|
|
} else {
|
|
fs.OutElIncr = fs2.InElIncr;
|
|
}
|
|
} else {
|
|
fs.OutElIncr = fs2.InElIncr;
|
|
}
|
|
}
|
|
|
|
// Update temporary buffer's length.
|
|
|
|
for (i = 0; i < 2; i++) {
|
|
Vars.BufLen[i] = MaxPrefix[i] + MaxLen[i];
|
|
Vars.BufOffs[i] = MaxPrefix[i];
|
|
|
|
if (Vars.packmode == 0) {
|
|
Vars.BufOffs[i] *= Vars.ElCount;
|
|
}
|
|
|
|
Vars.BufLen[i] *= Vars.ElCount;
|
|
}
|
|
|
|
// Update RPosBuf pointers and SrcOffs.
|
|
|
|
CFilterStep& fs = Steps[Vars.ResizeStep];
|
|
typename CFilterStep ::CResizePos* rpos = &(*fs.RPosBuf)[0];
|
|
const int em = (fpclass ::packmode == 0 ? Vars.ElCount : 1);
|
|
const int FilterLenD21 = fs.FltBank->getFilterLen() / 2 - 1;
|
|
|
|
for (i = 0; i < fs.OutLen; i++) {
|
|
rpos->ftp = fs.FltBank->getFilter(rpos->fti);
|
|
rpos->SrcOffs = (rpos->SrcPosInt - FilterLenD21) * em;
|
|
rpos++;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function modifies the overall (DC) gain of the correction filter in the
|
|
* pre-built filtering steps array.
|
|
*
|
|
* @param Steps Filtering steps.
|
|
* @param m Multiplier to apply to the correction filter.
|
|
*/
|
|
|
|
void modifyCorrFilterDCGain(CFilterSteps& Steps, const double m) const {
|
|
CBuffer<fptype>* Flt;
|
|
const int z = Steps.getItemCount() - 1;
|
|
|
|
if (!Steps[z].IsUpsample && Steps[z].ResampleFactor == 1) {
|
|
Flt = &Steps[z].Flt;
|
|
} else {
|
|
Flt = &Steps[0].Flt;
|
|
}
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < Flt->getCapacity(); i++) {
|
|
(*Flt)[i] = (fptype)((double)(*Flt)[i] * m);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function builds a map of used fractional delay filters based on the
|
|
* resizing positions buffer.
|
|
*
|
|
* @param fs Resizing step.
|
|
* @param[out] UsedFracMap Map of used fractional delay filters.
|
|
*/
|
|
|
|
static void fillUsedFracMap(const CFilterStep& fs,
|
|
CBuffer<uint8_t>& UsedFracMap) {
|
|
const int FracCount = fs.FltBank->getFracCount();
|
|
UsedFracMap.increaseCapacity(FracCount, false);
|
|
memset(&UsedFracMap[0], 0, FracCount * sizeof(UsedFracMap[0]));
|
|
|
|
typename CFilterStep ::CResizePos* rpos = &(*fs.RPosBuf)[0];
|
|
int i;
|
|
|
|
for (i = 0; i < fs.OutLen; i++) {
|
|
UsedFracMap[rpos->fti] |= 1;
|
|
rpos++;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function calculates the overall filtering steps complexity per
|
|
* scanline. Each complexity unit corresponds to a single multiply-add
|
|
* operation. Data copy and pointer math operations are not included in
|
|
* this calculation, it is assumed that they correlate to the multiply-add
|
|
* operations. Calculation also does not include final rounding, dithering
|
|
* and clamping operations since they cannot be optimized out anyway.
|
|
*
|
|
* Calculation of the CRPosBuf buffer is not included since it cannot be
|
|
* avoided.
|
|
*
|
|
* This function should be called after the updateFilterStepBuffers()
|
|
* function.
|
|
*
|
|
* @param Steps Filtering steps array.
|
|
* @param Vars Variables object.
|
|
* @param UsedFracMap The map of used fractional delay filters.
|
|
* @param ScanlineCount Scanline count.
|
|
*/
|
|
|
|
static int calcComplexity(const CFilterSteps& Steps,
|
|
const CImageResizerVars& Vars,
|
|
const CBuffer<uint8_t>& UsedFracMap,
|
|
const int ScanlineCount) {
|
|
int fcnum; // Filter complexity multiplier numerator.
|
|
int fcdenom; // Filter complexity multiplier denominator.
|
|
|
|
if (Vars.packmode != 0) {
|
|
fcnum = 1;
|
|
fcdenom = 1;
|
|
} else {
|
|
// In interleaved processing mode, filters require 1 less
|
|
// multiplication per 2 multiply-add instructions.
|
|
|
|
fcnum = 3;
|
|
fcdenom = 4;
|
|
}
|
|
|
|
int s = 0; // Complexity per one scanline.
|
|
int s2 = 0; // Complexity per all scanlines.
|
|
int i;
|
|
|
|
for (i = 0; i < Steps.getItemCount(); i++) {
|
|
const CFilterStep& fs = Steps[i];
|
|
|
|
s2 += 65 * fs.Flt.getCapacity(); // Filter creation complexity.
|
|
|
|
if (fs.IsUpsample) {
|
|
if (fs.FltOrig.getCapacity() > 0) {
|
|
continue;
|
|
}
|
|
|
|
s += (fs.Flt.getCapacity() * (fs.InPrefix + fs.InLen + fs.InSuffix) +
|
|
fs.SuffixDC.getCapacity() + fs.PrefixDC.getCapacity()) *
|
|
Vars.ElCount;
|
|
} else if (fs.ResampleFactor == 0) {
|
|
s += fs.FltBank->getFilterLen() *
|
|
(fs.FltBank->getOrder() + Vars.ElCount) * fs.OutLen;
|
|
|
|
s2 += fs.FltBank->calcInitComplexity(UsedFracMap);
|
|
} else {
|
|
s += fs.Flt.getCapacity() * Vars.ElCount * fs.OutLen * fcnum / fcdenom;
|
|
}
|
|
}
|
|
|
|
return (s + s2 / ScanlineCount);
|
|
}
|
|
|
|
/**
|
|
* @brief Thread-isolated data used for scanline processing.
|
|
*
|
|
* This structure holds data necessary for image's horizontal or vertical
|
|
* scanline processing, including scanline processing queue.
|
|
*
|
|
* @tparam Tin Source element data type. Intermediate buffers store data
|
|
* in floating point format.
|
|
* @tparam Tout Destination element data type. Intermediate buffers store
|
|
* data in floating point format.
|
|
*/
|
|
|
|
template <class Tin, class Tout>
|
|
class CThreadData : public CImageResizerThreadPool ::CWorkload {
|
|
public:
|
|
virtual void process() { processScanlineQueue(); }
|
|
|
|
/**
|
|
* This enumeration lists possible scanline operations.
|
|
*/
|
|
|
|
enum EScanlineOperation {
|
|
sopResizeH, ///< Resize horizontal scanline.
|
|
///<
|
|
sopResizeV, ///< Resize vertical scanline.
|
|
///<
|
|
sopDitherAndUnpackH, ///< Dither and unpack horizontal scanline.
|
|
///<
|
|
sopUnpackH ///< Unpack horizontal scanline.
|
|
///<
|
|
};
|
|
|
|
/**
|
|
* Function initializes *this thread data object and assigns certain
|
|
* variables provided by the higher level code.
|
|
*
|
|
* @param aThreadIndex Index of this thread data (0-based).
|
|
* @param aThreadCount Total number of threads used during processing.
|
|
* @param aSteps Filtering steps.
|
|
* @param aVars Image resizer variables.
|
|
*/
|
|
|
|
void init(const int aThreadIndex, const int aThreadCount,
|
|
const CFilterSteps& aSteps, const CImageResizerVars& aVars) {
|
|
ThreadIndex = aThreadIndex;
|
|
ThreadCount = aThreadCount;
|
|
Steps = &aSteps;
|
|
Vars = &aVars;
|
|
}
|
|
|
|
/**
|
|
* Function initializes scanline processing queue, and updates
|
|
* capacities of intermediate buffers.
|
|
*
|
|
* @param aOp Operation to perform over scanline.
|
|
* @param TotalLines The total number of scanlines that will be
|
|
* processed by all threads.
|
|
* @param aSrcLen Source scanline length in pixels.
|
|
* @param aSrcIncr Source scanline buffer increment. Ignored in
|
|
* horizontal scanline processing.
|
|
* @param aResIncr Resulting scanline buffer increment. Ignored in
|
|
* horizontal scanline processing.
|
|
*/
|
|
|
|
void initScanlineQueue(const EScanlineOperation aOp, const int TotalLines,
|
|
const int aSrcLen, const int aSrcIncr = 0,
|
|
const int aResIncr = 0) {
|
|
const int l = Vars->BufLen[0] + Vars->BufLen[1];
|
|
|
|
if (Bufs.getCapacity() < l) {
|
|
Bufs.alloc(l, fpclass ::fpalign);
|
|
}
|
|
|
|
BufPtrs[0] = Bufs + Vars->BufOffs[0];
|
|
BufPtrs[1] = Bufs + Vars->BufLen[0] + Vars->BufOffs[1];
|
|
|
|
int j;
|
|
int ml = 0;
|
|
|
|
for (j = 0; j < Steps->getItemCount(); j++) {
|
|
const CFilterStep& fs = (*Steps)[j];
|
|
|
|
if (fs.ResampleFactor == 0 && ml < fs.FltBank->getFilterLen()) {
|
|
ml = fs.FltBank->getFilterLen();
|
|
}
|
|
}
|
|
|
|
TmpFltBuf.alloc(ml, fpclass ::fpalign);
|
|
ScanlineOp = aOp;
|
|
SrcLen = aSrcLen;
|
|
SrcIncr = aSrcIncr;
|
|
ResIncr = aResIncr;
|
|
QueueLen = 0;
|
|
Queue.increaseCapacity((TotalLines + ThreadCount - 1) / ThreadCount,
|
|
false);
|
|
}
|
|
|
|
/**
|
|
* Function adds a scanline to the queue buffer. The
|
|
* initScanlineQueue() function should be called before calling this
|
|
* function. The number of calls to this add function should not
|
|
* exceed the TotalLines spread over all threads.
|
|
*
|
|
* @param SrcBuf Source scanline buffer.
|
|
* @param ResBuf Resulting scanline buffer.
|
|
*/
|
|
|
|
void addScanlineToQueue(void* const SrcBuf, void* const ResBuf) {
|
|
Queue[QueueLen].SrcBuf = SrcBuf;
|
|
Queue[QueueLen].ResBuf = ResBuf;
|
|
QueueLen++;
|
|
}
|
|
|
|
/**
|
|
* Function processes all queued scanlines.
|
|
*/
|
|
|
|
void processScanlineQueue() {
|
|
int i;
|
|
|
|
switch (ScanlineOp) {
|
|
case sopResizeH: {
|
|
for (i = 0; i < QueueLen; i++) {
|
|
resizeScanlineH((Tin*)Queue[i].SrcBuf, (fptype*)Queue[i].ResBuf);
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
case sopResizeV: {
|
|
for (i = 0; i < QueueLen; i++) {
|
|
resizeScanlineV((fptype*)Queue[i].SrcBuf, (fptype*)Queue[i].ResBuf);
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
case sopDitherAndUnpackH: {
|
|
if (Vars->UseSRGBGamma) {
|
|
for (i = 0; i < QueueLen; i++) {
|
|
CFilterStep ::applySRGBGamma((fptype*)Queue[i].SrcBuf, SrcLen,
|
|
*Vars);
|
|
|
|
Ditherer.dither((fptype*)Queue[i].SrcBuf);
|
|
|
|
CFilterStep ::unpackScanline((fptype*)Queue[i].SrcBuf,
|
|
(Tout*)Queue[i].ResBuf, SrcLen,
|
|
*Vars);
|
|
}
|
|
} else {
|
|
for (i = 0; i < QueueLen; i++) {
|
|
Ditherer.dither((fptype*)Queue[i].SrcBuf);
|
|
|
|
CFilterStep ::unpackScanline((fptype*)Queue[i].SrcBuf,
|
|
(Tout*)Queue[i].ResBuf, SrcLen,
|
|
*Vars);
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
case sopUnpackH: {
|
|
if (Vars->UseSRGBGamma) {
|
|
for (i = 0; i < QueueLen; i++) {
|
|
CFilterStep ::applySRGBGamma((fptype*)Queue[i].SrcBuf, SrcLen,
|
|
*Vars);
|
|
|
|
CFilterStep ::unpackScanline((fptype*)Queue[i].SrcBuf,
|
|
(Tout*)Queue[i].ResBuf, SrcLen,
|
|
*Vars);
|
|
}
|
|
} else {
|
|
for (i = 0; i < QueueLen; i++) {
|
|
CFilterStep ::unpackScanline((fptype*)Queue[i].SrcBuf,
|
|
(Tout*)Queue[i].ResBuf, SrcLen,
|
|
*Vars);
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function returns ditherer object associated with *this thread data
|
|
* object.
|
|
*/
|
|
|
|
CDitherer& getDitherer() { return (Ditherer); }
|
|
|
|
private:
|
|
int ThreadIndex; ///< Thread index.
|
|
///<
|
|
int ThreadCount; ///< Thread count.
|
|
///<
|
|
const CFilterSteps* Steps; ///< Filtering steps.
|
|
///<
|
|
const CImageResizerVars* Vars; ///< Image resizer variables.
|
|
///<
|
|
CBuffer<fptype> Bufs; ///< Flip-flop intermediate buffers.
|
|
///<
|
|
fptype* BufPtrs[3]; ///< Flip-flop buffer pointers (referenced by
|
|
///< filtering step's InBuf and OutBuf indices).
|
|
///<
|
|
CBuffer<fptype>
|
|
TmpFltBuf; ///< Temporary buffer used in the
|
|
///< doResize() function, aligned by fpclass :: fpalign.
|
|
///<
|
|
EScanlineOperation ScanlineOp; ///< Operation to perform over
|
|
///< scanline.
|
|
///<
|
|
int SrcLen; ///< Source scanline length in the last queue.
|
|
///<
|
|
int SrcIncr; ///< Source scanline buffer increment in the last queue.
|
|
///<
|
|
int ResIncr; ///< Resulting scanline buffer increment in the last
|
|
///< queue.
|
|
///<
|
|
CDitherer Ditherer; ///< Ditherer object to use.
|
|
///<
|
|
|
|
/**
|
|
* @brief Scanline processing queue item.
|
|
*
|
|
* Scanline processing queue item.
|
|
*/
|
|
|
|
struct CQueueItem {
|
|
void* SrcBuf; ///< Source scanline buffer, will by typecasted to
|
|
///< Tin or fptype*.
|
|
///<
|
|
void* ResBuf; ///< Resulting scanline buffer, will by typecasted
|
|
///< to Tout or fptype*.
|
|
///<
|
|
};
|
|
|
|
CBuffer<CQueueItem> Queue; ///< Scanline processing queue.
|
|
///<
|
|
int QueueLen; ///< Queue length.
|
|
///<
|
|
|
|
/**
|
|
* Function resizes a single horizontal scanline.
|
|
*
|
|
* @param SrcBuf Source scanline buffer. Can be either horizontal or
|
|
* vertical.
|
|
* @param ResBuf Resulting scanline buffer.
|
|
*/
|
|
|
|
void resizeScanlineH(const Tin* const SrcBuf, fptype* const ResBuf) {
|
|
(*Steps)[0].packScanline(SrcBuf, BufPtrs[0], SrcLen);
|
|
BufPtrs[2] = ResBuf;
|
|
int j;
|
|
|
|
for (j = 0; j < Steps->getItemCount(); j++) {
|
|
const CFilterStep& fs = (*Steps)[j];
|
|
fs.prepareInBuf(BufPtrs[fs.InBuf]);
|
|
const int DstIncr = (Vars->packmode == 0 ? Vars->ElCount : 1);
|
|
|
|
if (fs.ResampleFactor != 0) {
|
|
if (fs.IsUpsample) {
|
|
fs.doUpsample(BufPtrs[fs.InBuf], BufPtrs[fs.OutBuf]);
|
|
} else {
|
|
fs.doFilter(BufPtrs[fs.InBuf], BufPtrs[fs.OutBuf], DstIncr);
|
|
}
|
|
} else {
|
|
fs.doResize(BufPtrs[fs.InBuf], BufPtrs[fs.OutBuf], DstIncr,
|
|
TmpFltBuf);
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function resizes a single vertical scanline.
|
|
*
|
|
* @param SrcBuf Source scanline buffer. Can be either horizontal or
|
|
* vertical.
|
|
* @param ResBuf Resulting scanline buffer.
|
|
*/
|
|
|
|
void resizeScanlineV(const fptype* const SrcBuf, fptype* const ResBuf) {
|
|
(*Steps)[0].convertVtoH(SrcBuf, BufPtrs[0], SrcLen, SrcIncr);
|
|
|
|
BufPtrs[2] = ResBuf;
|
|
int j;
|
|
|
|
for (j = 0; j < Steps->getItemCount(); j++) {
|
|
const CFilterStep& fs = (*Steps)[j];
|
|
fs.prepareInBuf(BufPtrs[fs.InBuf]);
|
|
const int DstIncr =
|
|
(fs.OutBuf == 2 ? ResIncr
|
|
: (Vars->packmode == 0 ? Vars->ElCount : 1));
|
|
|
|
if (fs.ResampleFactor != 0) {
|
|
if (fs.IsUpsample) {
|
|
fs.doUpsample(BufPtrs[fs.InBuf], BufPtrs[fs.OutBuf]);
|
|
} else {
|
|
fs.doFilter(BufPtrs[fs.InBuf], BufPtrs[fs.OutBuf], DstIncr);
|
|
}
|
|
} else {
|
|
fs.doResize(BufPtrs[fs.InBuf], BufPtrs[fs.OutBuf], DstIncr,
|
|
TmpFltBuf);
|
|
}
|
|
}
|
|
}
|
|
};
|
|
};
|
|
|
|
#undef AVIR_PI
|
|
#undef AVIR_PId2
|
|
|
|
} // namespace avir
|
|
|
|
#endif // AVIR_CIMAGERESIZER_INCLUDED
|
|
/* clang-format off */
|
|
//$ nobt
|
|
//$ nocpp
|
|
#include "libc/calls/calls.h"
|
|
|
|
/**
|
|
* @file avir.h
|
|
*
|
|
* @brief The "main" inclusion file with all required classes and functions.
|
|
*
|
|
* This is the "main" inclusion file for the "AVIR" image resizer. This
|
|
* inclusion file contains implementation of the AVIR image resizing algorithm
|
|
* in its entirety. Also includes several classes and functions that can be
|
|
* useful elsewhere.
|
|
*
|
|
* AVIR Copyright (c) 2015-2019 Aleksey Vaneev
|
|
*
|
|
* @mainpage
|
|
*
|
|
* @section intro_sec Introduction
|
|
*
|
|
* Description is available at https://github.com/avaneev/avir
|
|
*
|
|
* AVIR is devoted to women. Your digital photos can look good at any size!
|
|
*
|
|
* @section license License
|
|
*
|
|
* AVIR License Agreement
|
|
*
|
|
* The MIT License (MIT)
|
|
*
|
|
* Copyright (c) 2015-2019 Aleksey Vaneev
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
* DEALINGS IN THE SOFTWARE.
|
|
*
|
|
* Please credit the author of this library in your documentation in the
|
|
* following way: "AVIR image resizing algorithm designed by Aleksey Vaneev"
|
|
*
|
|
* @version 2.4
|
|
*/
|
|
|
|
#ifndef AVIR_CIMAGERESIZER_INCLUDED
|
|
#define AVIR_CIMAGERESIZER_INCLUDED
|
|
|
|
#include <stdint.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <math.h>
|
|
|
|
namespace avir {
|
|
|
|
/**
|
|
* The macro defines AVIR version string.
|
|
*/
|
|
|
|
#define AVIR_VERSION "2.4"
|
|
|
|
/**
|
|
* The macro equals to "pi" constant, fills 53-bit floating point mantissa.
|
|
* Undefined at the end of file.
|
|
*/
|
|
|
|
#define AVIR_PI 3.1415926535897932
|
|
|
|
/**
|
|
* The macro equals to "pi divided by 2" constant, fills 53-bit floating
|
|
* point mantissa. Undefined at the end of file.
|
|
*/
|
|
|
|
#define AVIR_PId2 1.5707963267948966
|
|
|
|
/**
|
|
* Rounding function, based on the (int) typecast. Biased result. Not suitable
|
|
* for numbers >= 2^31.
|
|
*
|
|
* @param d Value to round.
|
|
* @return Rounded value. Some bias may be introduced.
|
|
*/
|
|
|
|
template< class T >
|
|
inline T round( const T d )
|
|
{
|
|
return( d < 0.0 ? -(T) (int) ( (T) 0.5 - d ) : (T) (int) ( d + (T) 0.5 ));
|
|
}
|
|
|
|
/**
|
|
* Template function "clamps" (clips) the specified value so that it is not
|
|
* lesser than "minv", and not greater than "maxv".
|
|
*
|
|
* @param Value Value to clamp.
|
|
* @param minv Minimal allowed value.
|
|
* @param maxv Maximal allowed value.
|
|
* @return The clamped value.
|
|
*/
|
|
|
|
template< class T >
|
|
inline T clamp( const T& Value, const T minv, const T maxv )
|
|
{
|
|
if( Value < minv )
|
|
{
|
|
return( minv );
|
|
}
|
|
else
|
|
if( Value > maxv )
|
|
{
|
|
return( maxv );
|
|
}
|
|
else
|
|
{
|
|
return( Value );
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Power 2.4 approximation function, designed for sRGB gamma correction.
|
|
*
|
|
* @param x Argument, in the range 0.09 to 1.
|
|
* @return Value raised into power 2.4, approximate.
|
|
*/
|
|
|
|
template< class T >
|
|
inline T pow24_sRGB( const T x )
|
|
{
|
|
const double x2 = x * x;
|
|
const double x3 = x2 * x;
|
|
const double x4 = x2 * x2;
|
|
|
|
return( (T) ( 0.0985766365536824 + 0.839474952656502 * x2 +
|
|
0.363287814061725 * x3 - 0.0125559718896615 /
|
|
( 0.12758338921578 + 0.290283465468235 * x ) -
|
|
0.231757513261358 * x - 0.0395365717969074 * x4 ));
|
|
}
|
|
|
|
/**
|
|
* Power 1/2.4 approximation function, designed for sRGB gamma correction.
|
|
*
|
|
* @param x Argument, in the range 0.003 to 1.
|
|
* @return Value raised into power 1/2.4, approximate.
|
|
*/
|
|
|
|
template< class T >
|
|
inline T pow24i_sRGB( const T x )
|
|
{
|
|
const double sx = sqrt( x );
|
|
const double ssx = sqrt( sx );
|
|
const double sssx = sqrt( ssx );
|
|
|
|
return( (T) ( 0.000213364515060263 + 0.0149409239419218 * x +
|
|
0.433973412731747 * sx + ssx * ( 0.659628181609715 * sssx -
|
|
0.0380957908841466 - 0.0706476137208521 * sx )));
|
|
}
|
|
|
|
/**
|
|
* Function approximately linearizes the sRGB gamma value.
|
|
*
|
|
* @param s sRGB gamma value, in the range 0 to 1.
|
|
* @return Linearized sRGB gamma value, approximated.
|
|
*/
|
|
|
|
template< class T >
|
|
inline T convertSRGB2Lin( const T s )
|
|
{
|
|
const T a = (T) 0.055;
|
|
|
|
if( s <= (T) 0.04045 )
|
|
{
|
|
return( s / (T) 12.92 );
|
|
}
|
|
|
|
return( pow24_sRGB(( s + a ) / ( (T) 1 + a )));
|
|
}
|
|
|
|
/**
|
|
* Function approximately de-linearizes the linear gamma value.
|
|
*
|
|
* @param s Linear gamma value, in the range 0 to 1.
|
|
* @return sRGB gamma value, approximated.
|
|
*/
|
|
|
|
template< class T >
|
|
inline T convertLin2SRGB( const T s )
|
|
{
|
|
const T a = (T) 0.055;
|
|
|
|
if( s <= (T) 0.0031308 )
|
|
{
|
|
return( (T) 12.92 * s );
|
|
}
|
|
|
|
return(( (T) 1 + a ) * pow24i_sRGB( s ) - a );
|
|
}
|
|
|
|
/**
|
|
* Function converts (via typecast) specified array of type T1 values of
|
|
* length l into array of type T2 values. If T1 is the same as T2, copy
|
|
* operation is performed. When copying data at overlapping address spaces,
|
|
* "op" should be lower than "ip".
|
|
*
|
|
* @param ip Input buffer.
|
|
* @param[out] op Output buffer.
|
|
* @param l The number of elements to copy.
|
|
* @param ip Input buffer pointer increment.
|
|
* @param op Output buffer pointer increment.
|
|
*/
|
|
|
|
template< class T1, class T2 >
|
|
inline void copyArray( const T1* ip, T2* op, int l,
|
|
const int ipinc = 1, const int opinc = 1 )
|
|
{
|
|
while( l > 0 )
|
|
{
|
|
*op = (T2) *ip;
|
|
op += opinc;
|
|
ip += ipinc;
|
|
l--;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function adds values located in array "ip" to array "op".
|
|
*
|
|
* @param ip Input buffer.
|
|
* @param[out] op Output buffer.
|
|
* @param l The number of elements to add.
|
|
* @param ip Input buffer pointer increment.
|
|
* @param op Output buffer pointer increment.
|
|
*/
|
|
|
|
template< class T1, class T2 >
|
|
inline void addArray( const T1* ip, T2* op, int l,
|
|
const int ipinc = 1, const int opinc = 1 )
|
|
{
|
|
while( l > 0 )
|
|
{
|
|
*op += *ip;
|
|
op += opinc;
|
|
ip += ipinc;
|
|
l--;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function that replicates a set of adjacent elements several times in a row.
|
|
* This operation is usually used to replicate pixels at the start or end of
|
|
* image's scanline.
|
|
*
|
|
* @param ip Source array.
|
|
* @param ipl Source array length (usually 1..4, but can be any number).
|
|
* @param[out] op Destination buffer.
|
|
* @param l Number of times the source array should be replicated (the
|
|
* destination buffer should be able to hold ipl * l number of elements).
|
|
* @param opinc Destination buffer position increment after replicating the
|
|
* source array. This value should be equal to at least ipl.
|
|
*/
|
|
|
|
template< class T1, class T2 >
|
|
inline void replicateArray( const T1* const ip, const int ipl, T2* op, int l,
|
|
const int opinc )
|
|
{
|
|
if( ipl == 1 )
|
|
{
|
|
while( l > 0 )
|
|
{
|
|
op[ 0 ] = ip[ 0 ];
|
|
op += opinc;
|
|
l--;
|
|
}
|
|
}
|
|
else
|
|
if( ipl == 4 )
|
|
{
|
|
while( l > 0 )
|
|
{
|
|
op[ 0 ] = ip[ 0 ];
|
|
op[ 1 ] = ip[ 1 ];
|
|
op[ 2 ] = ip[ 2 ];
|
|
op[ 3 ] = ip[ 3 ];
|
|
op += opinc;
|
|
l--;
|
|
}
|
|
}
|
|
else
|
|
if( ipl == 3 )
|
|
{
|
|
while( l > 0 )
|
|
{
|
|
op[ 0 ] = ip[ 0 ];
|
|
op[ 1 ] = ip[ 1 ];
|
|
op[ 2 ] = ip[ 2 ];
|
|
op += opinc;
|
|
l--;
|
|
}
|
|
}
|
|
else
|
|
if( ipl == 2 )
|
|
{
|
|
while( l > 0 )
|
|
{
|
|
op[ 0 ] = ip[ 0 ];
|
|
op[ 1 ] = ip[ 1 ];
|
|
op += opinc;
|
|
l--;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
while( l > 0 )
|
|
{
|
|
int i;
|
|
|
|
for( i = 0; i < ipl; i++ )
|
|
{
|
|
op[ i ] = ip[ i ];
|
|
}
|
|
|
|
op += opinc;
|
|
l--;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function calculates frequency response of the specified FIR filter at the
|
|
* specified circular frequency. Phase can be calculated as atan2( im, re ).
|
|
* Function uses computationally-efficient oscillators instead of "cos" and
|
|
* "sin" functions.
|
|
*
|
|
* @param flt FIR filter's coefficients.
|
|
* @param fltlen Number of coefficients (taps) in the filter.
|
|
* @param th Circular frequency [0; pi].
|
|
* @param[out] re0 Resulting real part of the complex frequency response.
|
|
* @param[out] im0 Resulting imaginary part of the complex frequency response.
|
|
* @param fltlat Filter's latency in samples (taps).
|
|
*/
|
|
|
|
template< class T >
|
|
inline void calcFIRFilterResponse( const T* flt, int fltlen,
|
|
const double th, double& re0, double& im0, const int fltlat = 0 )
|
|
{
|
|
const double sincr = 2.0 * cos( th );
|
|
double cvalue1;
|
|
double svalue1;
|
|
|
|
if( fltlat == 0 )
|
|
{
|
|
cvalue1 = 1.0;
|
|
svalue1 = 0.0;
|
|
}
|
|
else
|
|
{
|
|
cvalue1 = cos( -fltlat * th );
|
|
svalue1 = sin( -fltlat * th );
|
|
}
|
|
|
|
double cvalue2 = cos( -( fltlat + 1 ) * th );
|
|
double svalue2 = sin( -( fltlat + 1 ) * th );
|
|
|
|
double re = 0.0;
|
|
double im = 0.0;
|
|
|
|
while( fltlen > 0 )
|
|
{
|
|
re += cvalue1 * flt[ 0 ];
|
|
im += svalue1 * flt[ 0 ];
|
|
flt++;
|
|
fltlen--;
|
|
|
|
double tmp = cvalue1;
|
|
cvalue1 = sincr * cvalue1 - cvalue2;
|
|
cvalue2 = tmp;
|
|
|
|
tmp = svalue1;
|
|
svalue1 = sincr * svalue1 - svalue2;
|
|
svalue2 = tmp;
|
|
}
|
|
|
|
re0 = re;
|
|
im0 = im;
|
|
}
|
|
|
|
/**
|
|
* Function normalizes FIR filter so that its frequency response at DC is
|
|
* equal to DCGain.
|
|
*
|
|
* @param[in,out] p Filter coefficients.
|
|
* @param l Filter length.
|
|
* @param DCGain Filter's gain at DC.
|
|
* @param pstep "p" array step.
|
|
*/
|
|
|
|
template< class T >
|
|
inline void normalizeFIRFilter( T* const p, const int l, const double DCGain,
|
|
const int pstep = 1 )
|
|
{
|
|
double s = 0.0;
|
|
T* pp = p;
|
|
int i = l;
|
|
|
|
while( i > 0 )
|
|
{
|
|
s += *pp;
|
|
pp += pstep;
|
|
i--;
|
|
}
|
|
|
|
s = DCGain / s;
|
|
pp = p;
|
|
i = l;
|
|
|
|
while( i > 0 )
|
|
{
|
|
*pp = (T) ( *pp * s );
|
|
pp += pstep;
|
|
i--;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @brief Memory buffer class for element array storage, with capacity
|
|
* tracking.
|
|
*
|
|
* Allows easier handling of memory blocks allocation and automatic
|
|
* deallocation for arrays (buffers) consisting of elements of specified
|
|
* class. Tracks buffer's capacity in "int" variable; unsuitable for
|
|
* allocation of very large memory blocks (with more than 2 billion elements).
|
|
*
|
|
* This class manages memory space only - it does not perform element class
|
|
* construction (initialization) operations. Buffer's required memory address
|
|
* alignment specification is supported.
|
|
*
|
|
* Uses standard library to allocate and deallocate memory.
|
|
*
|
|
* @tparam T Buffer element's type.
|
|
* @tparam capint Buffer capacity's type to use. Use size_t for large buffers.
|
|
*/
|
|
|
|
template< class T, typename capint = int >
|
|
class CBuffer
|
|
{
|
|
public:
|
|
CBuffer()
|
|
: Data( NULL )
|
|
, DataAligned( NULL )
|
|
, Capacity( 0 )
|
|
, Alignment( 0 )
|
|
{
|
|
}
|
|
|
|
/**
|
|
* Constructor creates the buffer with the specified capacity.
|
|
*
|
|
* @param aCapacity Buffer's capacity.
|
|
* @param aAlignment Buffer's required memory address alignment. 0 - use
|
|
* stdlib's default alignment.
|
|
*/
|
|
|
|
CBuffer( const capint aCapacity, const int aAlignment = 0 )
|
|
{
|
|
allocinit( aCapacity, aAlignment );
|
|
}
|
|
|
|
CBuffer( const CBuffer& Source )
|
|
{
|
|
allocinit( Source.Capacity, Source.Alignment );
|
|
memcpy( DataAligned, Source.DataAligned, Capacity * sizeof( T ));
|
|
}
|
|
|
|
~CBuffer()
|
|
{
|
|
freeData();
|
|
}
|
|
|
|
CBuffer& operator = ( const CBuffer& Source )
|
|
{
|
|
alloc( Source.Capacity, Source.Alignment );
|
|
memcpy( DataAligned, Source.DataAligned, Capacity * sizeof( T ));
|
|
return( *this );
|
|
}
|
|
|
|
/**
|
|
* Function allocates memory so that the specified number of elements
|
|
* can be stored in *this buffer object.
|
|
*
|
|
* @param aCapacity Storage for this number of elements to allocate.
|
|
* @param aAlignment Buffer's required memory address alignment,
|
|
* power-of-2 values only. 0 - use stdlib's default alignment.
|
|
*/
|
|
|
|
void alloc( const capint aCapacity, const int aAlignment = 0 )
|
|
{
|
|
freeData();
|
|
allocinit( aCapacity, aAlignment );
|
|
}
|
|
|
|
/**
|
|
* Function deallocates any previously allocated buffer.
|
|
*/
|
|
|
|
void free()
|
|
{
|
|
freeData();
|
|
Data = NULL;
|
|
DataAligned = NULL;
|
|
Capacity = 0;
|
|
Alignment = 0;
|
|
}
|
|
|
|
/**
|
|
* @return The capacity of the element buffer.
|
|
*/
|
|
|
|
capint getCapacity() const
|
|
{
|
|
return( Capacity );
|
|
}
|
|
|
|
/**
|
|
* Function "forces" *this buffer to have an arbitary capacity. Calling
|
|
* this function invalidates all further operations except deleting *this
|
|
* object. This function should not be usually used at all. Function can
|
|
* be used to "model" certain buffer capacity without calling a costly
|
|
* memory allocation function.
|
|
*
|
|
* @param NewCapacity A new "forced" capacity.
|
|
*/
|
|
|
|
void forceCapacity( const capint NewCapacity )
|
|
{
|
|
Capacity = NewCapacity;
|
|
}
|
|
|
|
/**
|
|
* Function reallocates *this buffer to a larger size so that it will be
|
|
* able to hold the specified number of elements. Downsizing is not
|
|
* performed. Alignment is not changed.
|
|
*
|
|
* @param NewCapacity New (increased) capacity.
|
|
* @param DoDataCopy "True" if data in the buffer should be retained.
|
|
*/
|
|
|
|
void increaseCapacity( const capint NewCapacity,
|
|
const bool DoDataCopy = true )
|
|
{
|
|
if( NewCapacity < Capacity )
|
|
{
|
|
return;
|
|
}
|
|
|
|
if( DoDataCopy )
|
|
{
|
|
const capint PrevCapacity = Capacity;
|
|
T* const PrevData = Data;
|
|
T* const PrevDataAligned = DataAligned;
|
|
|
|
allocinit( NewCapacity, Alignment );
|
|
memcpy( DataAligned, PrevDataAligned, PrevCapacity * sizeof( T ));
|
|
|
|
:: free( PrevData );
|
|
}
|
|
else
|
|
{
|
|
:: free( Data );
|
|
allocinit( NewCapacity, Alignment );
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function "truncates" (reduces) capacity of the buffer without
|
|
* reallocating it. Alignment is not changed.
|
|
*
|
|
* @param NewCapacity New required capacity.
|
|
*/
|
|
|
|
void truncateCapacity( const capint NewCapacity )
|
|
{
|
|
if( NewCapacity >= Capacity )
|
|
{
|
|
return;
|
|
}
|
|
|
|
Capacity = NewCapacity;
|
|
}
|
|
|
|
/**
|
|
* Function increases capacity so that the specified number of
|
|
* elements can be stored. This function increases the previous capacity
|
|
* value by third the current capacity value until space for the required
|
|
* number of elements is available. Alignment is not changed.
|
|
*
|
|
* @param ReqCapacity Required capacity.
|
|
*/
|
|
|
|
void updateCapacity( const capint ReqCapacity )
|
|
{
|
|
if( ReqCapacity <= Capacity )
|
|
{
|
|
return;
|
|
}
|
|
|
|
capint NewCapacity = Capacity;
|
|
|
|
while( NewCapacity < ReqCapacity )
|
|
{
|
|
NewCapacity += NewCapacity / 3 + 1;
|
|
}
|
|
|
|
increaseCapacity( NewCapacity );
|
|
}
|
|
|
|
operator T* () const
|
|
{
|
|
return( DataAligned );
|
|
}
|
|
|
|
private:
|
|
T* Data; ///< Element buffer pointer.
|
|
///<
|
|
T* DataAligned; ///< Memory address-aligned element buffer pointer.
|
|
///<
|
|
capint Capacity; ///< Element buffer capacity.
|
|
///<
|
|
int Alignment; ///< Memory address alignment in use. 0 - use stdlib's
|
|
///< default alignment.
|
|
///<
|
|
|
|
/**
|
|
* Internal element buffer allocation function used during object
|
|
* construction.
|
|
*
|
|
* @param aCapacity Storage for this number of elements to allocate.
|
|
* @param aAlignment Buffer's required memory address alignment. 0 - use
|
|
* stdlib's default alignment.
|
|
*/
|
|
|
|
void allocinit( const capint aCapacity, const int aAlignment )
|
|
{
|
|
if( aAlignment == 0 )
|
|
{
|
|
Data = (T*) :: malloc( aCapacity * sizeof( T ));
|
|
DataAligned = Data;
|
|
Alignment = 0;
|
|
}
|
|
else
|
|
{
|
|
Data = (T*) :: malloc( aCapacity * sizeof( T ) + aAlignment );
|
|
DataAligned = alignptr( Data, aAlignment );
|
|
Alignment = aAlignment;
|
|
}
|
|
|
|
Capacity = aCapacity;
|
|
}
|
|
|
|
/**
|
|
* Function frees a previously allocated Data buffer.
|
|
*/
|
|
|
|
void freeData()
|
|
{
|
|
:: free( Data );
|
|
}
|
|
|
|
/**
|
|
* Function modifies the specified pointer so that it becomes memory
|
|
* address-aligned.
|
|
*
|
|
* @param ptr Pointer to align.
|
|
* @param align Alignment in bytes to apply.
|
|
* @return Pointer aligned to align bytes. Works with power-of-2
|
|
* alignments only. If no alignment is necessary, "align" bytes will be
|
|
* added to the pointer value.
|
|
*/
|
|
|
|
template< class Tp >
|
|
inline Tp alignptr( const Tp ptr, const uintptr_t align )
|
|
{
|
|
return( (Tp) ( (uintptr_t) ptr + align -
|
|
( (uintptr_t) ptr & ( align - 1 ))) );
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Function optimizes the length of the symmetric-odd FIR filter by removing
|
|
* left- and rightmost elements that are below specific threshold.
|
|
*
|
|
* Synthetic test shows that filter gets optimized in 2..3% of cases and in
|
|
* each such case optimization reduces filter length by 6..8%. Optimization,
|
|
* however, may skew the results of algorithm modeling and complexity
|
|
* calculation leading to a choice of a less optimal algorithm.
|
|
*
|
|
* @param[in,out] Flt Buffer that contains filter being optimized.
|
|
* @param[in,out] FltLatency Variable that holds the current latency of the
|
|
* filter. May be adjusted on function return.
|
|
* @param Threshold Threshold level.
|
|
*/
|
|
|
|
template< class T >
|
|
inline void optimizeFIRFilter( CBuffer< T >& Flt, int& FltLatency,
|
|
T const Threshold = (T) 0.00001 )
|
|
{
|
|
int i;
|
|
|
|
// Optimize length.
|
|
|
|
for( i = 0; i <= FltLatency; i++ )
|
|
{
|
|
if( fabs( Flt[ i ]) >= Threshold || i == FltLatency )
|
|
{
|
|
if( i > 0 )
|
|
{
|
|
const int NewCapacity = Flt.getCapacity() - i * 2;
|
|
copyArray( &Flt[ i ], &Flt[ 0 ], NewCapacity );
|
|
Flt.truncateCapacity( NewCapacity );
|
|
FltLatency -= i;
|
|
}
|
|
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @brief Array of structured objects.
|
|
*
|
|
* Implements allocation of a linear array of objects of class T (which are
|
|
* initialized), addressable via operator[]. Each object is created via the
|
|
* "operator new". New object insertions are quick since implementation uses
|
|
* prior space allocation (capacity), thus not requiring frequent memory block
|
|
* reallocations.
|
|
*
|
|
* @tparam T Array element's type.
|
|
*/
|
|
|
|
template< class T >
|
|
class CStructArray
|
|
{
|
|
public:
|
|
CStructArray()
|
|
: ItemCount( 0 )
|
|
{
|
|
}
|
|
|
|
CStructArray( const CStructArray& Source )
|
|
: ItemCount( 0 )
|
|
, Items( Source.getItemCount() )
|
|
{
|
|
while( ItemCount < Source.getItemCount() )
|
|
{
|
|
Items[ ItemCount ] = new T( Source[ ItemCount ]);
|
|
ItemCount++;
|
|
}
|
|
}
|
|
|
|
~CStructArray()
|
|
{
|
|
clear();
|
|
}
|
|
|
|
CStructArray& operator = ( const CStructArray& Source )
|
|
{
|
|
clear();
|
|
|
|
const int NewCount = Source.ItemCount;
|
|
Items.updateCapacity( NewCount );
|
|
|
|
while( ItemCount < NewCount )
|
|
{
|
|
Items[ ItemCount ] = new T( Source[ ItemCount ]);
|
|
ItemCount++;
|
|
}
|
|
|
|
return( *this );
|
|
}
|
|
|
|
T& operator []( const int Index )
|
|
{
|
|
return( *Items[ Index ]);
|
|
}
|
|
|
|
const T& operator []( const int Index ) const
|
|
{
|
|
return( *Items[ Index ]);
|
|
}
|
|
|
|
/**
|
|
* Function creates a new object of type T with the default constructor
|
|
* and adds this object to the array.
|
|
*
|
|
* @return Reference to a newly added object.
|
|
*/
|
|
|
|
T& add()
|
|
{
|
|
if( ItemCount == Items.getCapacity() )
|
|
{
|
|
Items.increaseCapacity( ItemCount * 3 / 2 + 1 );
|
|
}
|
|
|
|
Items[ ItemCount ] = new T();
|
|
ItemCount++;
|
|
|
|
return( (*this)[ ItemCount - 1 ]);
|
|
}
|
|
|
|
/**
|
|
* Function changes number of allocated items. New items are created with
|
|
* the default constructor. If NewCount is below the current item count,
|
|
* items that are above NewCount range will be destructed.
|
|
*
|
|
* @param NewCount New requested item count.
|
|
*/
|
|
|
|
void setItemCount( const int NewCount )
|
|
{
|
|
if( NewCount > ItemCount )
|
|
{
|
|
Items.increaseCapacity( NewCount );
|
|
|
|
while( ItemCount < NewCount )
|
|
{
|
|
Items[ ItemCount ] = new T();
|
|
ItemCount++;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
while( ItemCount > NewCount )
|
|
{
|
|
ItemCount--;
|
|
delete Items[ ItemCount ];
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function erases all items of *this array.
|
|
*/
|
|
|
|
void clear()
|
|
{
|
|
while( ItemCount > 0 )
|
|
{
|
|
ItemCount--;
|
|
delete Items[ ItemCount ];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @return The number of allocated items.
|
|
*/
|
|
|
|
int getItemCount() const
|
|
{
|
|
return( ItemCount );
|
|
}
|
|
|
|
private:
|
|
int ItemCount; ///< The number of items available in the array.
|
|
///<
|
|
CBuffer< T* > Items; ///< Element buffer.
|
|
///<
|
|
};
|
|
|
|
/**
|
|
* @brief Sine signal generator class.
|
|
*
|
|
* Class implements sine signal generator without biasing, with
|
|
* constructor-based initalization only. This generator uses oscillator
|
|
* instead of "sin" function.
|
|
*/
|
|
|
|
class CSineGen
|
|
{
|
|
public:
|
|
/**
|
|
* Constructor initializes *this sine signal generator.
|
|
*
|
|
* @param si Sine function increment, in radians.
|
|
* @param ph Starting phase, in radians. Add 0.5 * AVIR_PI for cosine
|
|
* function.
|
|
*/
|
|
|
|
CSineGen( const double si, const double ph )
|
|
: svalue1( sin( ph ))
|
|
, svalue2( sin( ph - si ))
|
|
, sincr( 2.0 * cos( si ))
|
|
{
|
|
}
|
|
|
|
/**
|
|
* @return The next value of the sine function, without biasing.
|
|
*/
|
|
|
|
double generate()
|
|
{
|
|
const double res = svalue1;
|
|
|
|
svalue1 = sincr * res - svalue2;
|
|
svalue2 = res;
|
|
|
|
return( res );
|
|
}
|
|
|
|
private:
|
|
double svalue1; ///< Current sine value.
|
|
///<
|
|
double svalue2; ///< Previous sine value.
|
|
///<
|
|
double sincr; ///< Sine value increment.
|
|
///<
|
|
};
|
|
|
|
/**
|
|
* @brief Peaked Cosine window function generator class.
|
|
*
|
|
* Class implements Peaked Cosine window function generator. Generates the
|
|
* right-handed half of the window function. The Alpha parameter of this
|
|
* window function offers the control of the balance between the early and
|
|
* later taps of the filter. E.g. at Alpha=1 both early and later taps are
|
|
* attenuated, but at Alpha=4 mostly later taps are attenuated. This offers a
|
|
* great control over ringing artifacts produced by a low-pass filter in image
|
|
* processing, without compromising achieved image sharpness.
|
|
*/
|
|
|
|
class CDSPWindowGenPeakedCosine
|
|
{
|
|
public:
|
|
/**
|
|
* Constructor initializes *this window function generator.
|
|
*
|
|
* @param aAlpha Alpha parameter, affects the peak shape (peak
|
|
* augmentation) of the window function. Should be >= 1.0.
|
|
* @param aLen2 Half filter's length (non-truncated).
|
|
*/
|
|
|
|
CDSPWindowGenPeakedCosine( const double aAlpha, const double aLen2 )
|
|
: Alpha( aAlpha )
|
|
, Len2( aLen2 )
|
|
, wn( 0 )
|
|
, w1( AVIR_PId2 / Len2, AVIR_PI * 0.5 )
|
|
{
|
|
}
|
|
|
|
/**
|
|
* @return The next Peaked Cosine window function coefficient.
|
|
*/
|
|
|
|
double generate()
|
|
{
|
|
const double h = pow( wn / Len2, Alpha );
|
|
wn++;
|
|
|
|
return( w1.generate() * ( 1.0 - h ));
|
|
}
|
|
|
|
private:
|
|
double Alpha; ///< Alpha parameter, affects the peak shape of window.
|
|
///<
|
|
double Len2; ///< Half length of the window function.
|
|
///<
|
|
int wn; ///< Window function integer position. 0 - center of the
|
|
///< window function.
|
|
///<
|
|
CSineGen w1; ///< Sine-wave generator.
|
|
///<
|
|
};
|
|
|
|
/**
|
|
* @brief FIR filter-based equalizer generator.
|
|
*
|
|
* Class implements an object used to generate symmetric-odd FIR filters with
|
|
* the specified frequency response (aka paragraphic equalizer). The
|
|
* calculated filter is windowed by the Peaked Cosine window function.
|
|
*
|
|
* In image processing, due to short length of filters being used (6-8 taps)
|
|
* the resulting frequency response of the filter is approximate and may be
|
|
* mathematically imperfect, but still adequate to the visual requirements.
|
|
*
|
|
* On a side note, this equalizer generator can be successfully used for audio
|
|
* signal equalization as well: for example, it is used in almost the same
|
|
* form in Voxengo Marvel GEQ equalizer plug-in.
|
|
*
|
|
* Filter generation is based on decomposition of frequency range into
|
|
* spectral bands, with each band represented by linear and ramp "kernels".
|
|
* When the filter is built, these kernels are combined together with
|
|
* different weights that approximate the required frequency response.
|
|
*/
|
|
|
|
class CDSPFIREQ
|
|
{
|
|
public:
|
|
/**
|
|
* Function initializes *this object with the required parameters. The
|
|
* gain of frequencies beyond the MinFreq..MaxFreq range are controlled by
|
|
* the first and the last band's gain.
|
|
*
|
|
* @param SampleRate Processing sample rate (use 2 for image processing).
|
|
* @param aFilterLength Required filter length in samples (taps). The
|
|
* actual filter length is truncated to an integer value.
|
|
* @param aBandCount Number of band crossover points required to control,
|
|
* including bands at MinFreq and MaxFreq.
|
|
* @param MinFreq Minimal frequency that should be controlled.
|
|
* @param MaxFreq Maximal frequency that should be controlled.
|
|
* @param IsLogBands "True" if the bands should be spaced logarithmically.
|
|
* @param WFAlpha Peaked Cosine window function's Alpha parameter.
|
|
*/
|
|
|
|
void init( const double SampleRate, const double aFilterLength,
|
|
const int aBandCount, const double MinFreq, const double MaxFreq,
|
|
const bool IsLogBands, const double WFAlpha )
|
|
{
|
|
FilterLength = aFilterLength;
|
|
BandCount = aBandCount;
|
|
|
|
CenterFreqs.alloc( BandCount );
|
|
|
|
z = (int) ceil( FilterLength * 0.5 );
|
|
zi = z + ( z & 1 );
|
|
z2 = z * 2;
|
|
|
|
CBuffer< double > oscbuf( z2 );
|
|
initOscBuf( oscbuf );
|
|
|
|
CBuffer< double > winbuf( z );
|
|
initWinBuf( winbuf, WFAlpha );
|
|
|
|
UseFirstVirtBand = ( MinFreq > 0.0 );
|
|
const int k = zi * ( BandCount + ( UseFirstVirtBand ? 1 : 0 ));
|
|
Kernels1.alloc( k );
|
|
Kernels2.alloc( k );
|
|
|
|
double m; // Frequency step multiplier.
|
|
double mo; // Frequency step offset (addition).
|
|
|
|
if( IsLogBands )
|
|
{
|
|
m = exp( log( MaxFreq / MinFreq ) / ( BandCount - 1 ));
|
|
mo = 0.0;
|
|
}
|
|
else
|
|
{
|
|
m = 1.0;
|
|
mo = ( MaxFreq - MinFreq ) / ( BandCount - 1 );
|
|
}
|
|
|
|
double f = MinFreq;
|
|
double x1 = 0.0;
|
|
double x2;
|
|
int si;
|
|
|
|
if( UseFirstVirtBand )
|
|
{
|
|
si = 0;
|
|
}
|
|
else
|
|
{
|
|
si = 1;
|
|
CenterFreqs[ 0 ] = 0.0;
|
|
f = f * m + mo;
|
|
}
|
|
|
|
double* kernbuf1 = &Kernels1[ 0 ];
|
|
double* kernbuf2 = &Kernels2[ 0 ];
|
|
int i;
|
|
|
|
for( i = si; i < BandCount; i++ )
|
|
{
|
|
x2 = f * 2.0 / SampleRate;
|
|
CenterFreqs[ i ] = x2;
|
|
|
|
fillBandKernel( x1, x2, kernbuf1, kernbuf2, oscbuf, winbuf );
|
|
|
|
kernbuf1 += zi;
|
|
kernbuf2 += zi;
|
|
x1 = x2;
|
|
f = f * m + mo;
|
|
}
|
|
|
|
if( x1 < 1.0 )
|
|
{
|
|
UseLastVirtBand = true;
|
|
fillBandKernel( x1, 1.0, kernbuf1, kernbuf2, oscbuf, winbuf );
|
|
}
|
|
else
|
|
{
|
|
UseLastVirtBand = false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @return Filter's length, in samples (taps).
|
|
*/
|
|
|
|
int getFilterLength() const
|
|
{
|
|
return( z2 - 1 );
|
|
}
|
|
|
|
/**
|
|
* @return Filter's latency (group delay), in samples (taps).
|
|
*/
|
|
|
|
int getFilterLatency() const
|
|
{
|
|
return( z - 1 );
|
|
}
|
|
|
|
/**
|
|
* Function creates symmetric-odd FIR filter with the specified gain
|
|
* levels at band crossover points.
|
|
*
|
|
* @param BandGains Array of linear gain levels, count=BandCount specified
|
|
* in the init() function.
|
|
* @param[out] Filter Output filter buffer, length = getFilterLength().
|
|
*/
|
|
|
|
void buildFilter( const double* const BandGains, double* const Filter )
|
|
{
|
|
const double* kernbuf1 = &Kernels1[ 0 ];
|
|
const double* kernbuf2 = &Kernels2[ 0 ];
|
|
double x1 = 0.0;
|
|
double y1 = BandGains[ 0 ];
|
|
double x2;
|
|
double y2;
|
|
|
|
int i;
|
|
int si;
|
|
|
|
if( UseFirstVirtBand )
|
|
{
|
|
si = 1;
|
|
x2 = CenterFreqs[ 0 ];
|
|
y2 = y1;
|
|
}
|
|
else
|
|
{
|
|
si = 2;
|
|
x2 = CenterFreqs[ 1 ];
|
|
y2 = BandGains[ 1 ];
|
|
}
|
|
|
|
copyBandKernel( Filter, kernbuf1, kernbuf2, y1 - y2,
|
|
x1 * y2 - x2 * y1 );
|
|
|
|
kernbuf1 += zi;
|
|
kernbuf2 += zi;
|
|
x1 = x2;
|
|
y1 = y2;
|
|
|
|
for( i = si; i < BandCount; i++ )
|
|
{
|
|
x2 = CenterFreqs[ i ];
|
|
y2 = BandGains[ i ];
|
|
|
|
addBandKernel( Filter, kernbuf1, kernbuf2, y1 - y2,
|
|
x1 * y2 - x2 * y1 );
|
|
|
|
kernbuf1 += zi;
|
|
kernbuf2 += zi;
|
|
x1 = x2;
|
|
y1 = y2;
|
|
}
|
|
|
|
if( UseLastVirtBand )
|
|
{
|
|
addBandKernel( Filter, kernbuf1, kernbuf2, y1 - y2,
|
|
x1 * y2 - y1 );
|
|
}
|
|
|
|
for( i = 0; i < z - 1; i++ )
|
|
{
|
|
Filter[ z + i ] = Filter[ z - 2 - i ];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function calculates filter's length (in samples) and latency depending
|
|
* on the required non-truncated filter length.
|
|
*
|
|
* @param aFilterLength Required filter length in samples (non-truncated).
|
|
* @param[out] Latency Resulting latency (group delay) of the filter,
|
|
* in samples (taps).
|
|
* @return Filter length in samples (taps).
|
|
*/
|
|
|
|
static int calcFilterLength( const double aFilterLength, int& Latency )
|
|
{
|
|
const int l = (int) ceil( aFilterLength * 0.5 );
|
|
Latency = l - 1;
|
|
|
|
return( l * 2 - 1 );
|
|
}
|
|
|
|
private:
|
|
double FilterLength; ///< Length of filter.
|
|
///<
|
|
int z; ///< Equals (int) ceil( FilterLength * 0.5 ).
|
|
///<
|
|
int zi; ///< Equals "z" if z is even, or z + 1 if z is odd. Used as a
|
|
///< Kernels1 and Kernels2 size multiplier and kernel buffer increment
|
|
///< to make sure each kernel buffer is 16-byte aligned.
|
|
///<
|
|
int z2; ///< Equals z * 2.
|
|
///<
|
|
int BandCount; ///< Number of controllable bands.
|
|
///<
|
|
CBuffer< double > CenterFreqs; ///< Center frequencies for all bands,
|
|
///< normalized to 0.0-1.0 range.
|
|
///<
|
|
CBuffer< double > Kernels1; ///< Half-length kernel buffers for each
|
|
///< spectral band (linear part).
|
|
///<
|
|
CBuffer< double > Kernels2; ///< Half-length kernel buffers for each
|
|
///< spectral band (ramp part).
|
|
///<
|
|
bool UseFirstVirtBand; ///< "True" if the first virtual band
|
|
///< (between 0.0 and MinFreq) should be used. The first virtual band
|
|
///< won't be used if MinFreq equals 0.0.
|
|
///<
|
|
bool UseLastVirtBand; ///< "True" if the last virtual band (between
|
|
///< MaxFreq and SampleRate * 0.5) should be used. The last virtual
|
|
///< band won't be used if MaxFreq * 2.0 equals SampleRate.
|
|
///<
|
|
|
|
/**
|
|
* Function initializes the "oscbuf" used in the fillBandKernel()
|
|
* function.
|
|
*
|
|
* @param oscbuf Oscillator buffer, length = z * 2.
|
|
*/
|
|
|
|
void initOscBuf( double* oscbuf ) const
|
|
{
|
|
int i = z;
|
|
|
|
while( i > 0 )
|
|
{
|
|
oscbuf[ 0 ] = 0.0;
|
|
oscbuf[ 1 ] = 1.0;
|
|
oscbuf += 2;
|
|
i--;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function initializes window function buffer. This function generates
|
|
* Peaked Cosine window function.
|
|
*
|
|
* @param winbuf Windowing buffer.
|
|
* @param Alpha Peaked Cosine alpha parameter.
|
|
*/
|
|
|
|
void initWinBuf( double* winbuf, const double Alpha ) const
|
|
{
|
|
CDSPWindowGenPeakedCosine wf( Alpha, FilterLength * 0.5 );
|
|
int i;
|
|
|
|
for( i = 1; i <= z; i++ )
|
|
{
|
|
winbuf[ z - i ] = wf.generate();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function fills first half of symmetric-odd FIR kernel for the band.
|
|
* This function should be called successively for adjacent bands.
|
|
* Previous band's x2 should be equal to current band's x1. A band kernel
|
|
* consists of 2 elements: linear kernel and ramp kernel.
|
|
*
|
|
* @param x1 Band's left corner frequency (0..1).
|
|
* @param x2 Band's right corner frequency (0..1).
|
|
* @param kernbuf1 Band kernel buffer 1 (linear part), length = z.
|
|
* @param kernbuf2 Band kernel buffer 2 (ramp part), length = z.
|
|
* @param oscbuf Oscillation buffer. Before the first call of the
|
|
* fillBandKernel() should be initialized with the call of the
|
|
* initOscBuf() function.
|
|
* @param winbuf Buffer that contains windowing function.
|
|
*/
|
|
|
|
void fillBandKernel( const double x1, const double x2, double* kernbuf1,
|
|
double* kernbuf2, double* oscbuf, const double* const winbuf )
|
|
{
|
|
const double s2_incr = AVIR_PI * x2;
|
|
const double s2_coeff = 2.0 * cos( s2_incr );
|
|
|
|
double s2_value1 = sin( s2_incr * ( -z + 1 ));
|
|
double c2_value1 = sin( s2_incr * ( -z + 1 ) + AVIR_PI * 0.5 );
|
|
oscbuf[ 0 ] = sin( s2_incr * -z );
|
|
oscbuf[ 1 ] = sin( s2_incr * -z + AVIR_PI * 0.5 );
|
|
|
|
int ks;
|
|
|
|
for( ks = 1; ks < z; ks++ )
|
|
{
|
|
const int ks2 = ks * 2;
|
|
const double s1_value1 = oscbuf[ ks2 ];
|
|
const double c1_value1 = oscbuf[ ks2 + 1 ];
|
|
oscbuf[ ks2 ] = s2_value1;
|
|
oscbuf[ ks2 + 1 ] = c2_value1;
|
|
|
|
const double x = AVIR_PI * ( ks - z );
|
|
const double v0 = winbuf[ ks - 1 ] / (( x1 - x2 ) * x );
|
|
|
|
kernbuf1[ ks - 1 ] = ( x2 * s2_value1 - x1 * s1_value1 +
|
|
( c2_value1 - c1_value1 ) / x ) * v0;
|
|
|
|
kernbuf2[ ks - 1 ] = ( s2_value1 - s1_value1 ) * v0;
|
|
|
|
s2_value1 = s2_coeff * s2_value1 - oscbuf[ ks2 - 2 ];
|
|
c2_value1 = s2_coeff * c2_value1 - oscbuf[ ks2 - 1 ];
|
|
}
|
|
|
|
kernbuf1[ z - 1 ] = ( x2 * x2 - x1 * x1 ) / ( x1 - x2 ) * 0.5;
|
|
kernbuf2[ z - 1 ] = -1.0;
|
|
}
|
|
|
|
/**
|
|
* Function copies band kernel's elements to the output buffer.
|
|
*
|
|
* @param outbuf Output buffer.
|
|
* @param kernbuf1 Kernel buffer 1 (linear part).
|
|
* @param kernbuf2 Kernel buffer 2 (ramp part).
|
|
* @param c Multiplier for linear kernel element.
|
|
* @param d Multiplier for ramp kernel element.
|
|
*/
|
|
|
|
void copyBandKernel( double* outbuf, const double* const kernbuf1,
|
|
const double* const kernbuf2, const double c, const double d ) const
|
|
{
|
|
int ks;
|
|
|
|
for( ks = 0; ks < z; ks++ )
|
|
{
|
|
outbuf[ ks ] = c * kernbuf1[ ks ] + d * kernbuf2[ ks ];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function adds band kernel's elements to the output buffer.
|
|
*
|
|
* @param outbuf Output buffer.
|
|
* @param kernbuf1 Kernel buffer 1 (linear part).
|
|
* @param kernbuf2 Kernel buffer 2 (ramp part).
|
|
* @param c Multiplier for linear kernel element.
|
|
* @param d Multiplier for ramp kernel element.
|
|
*/
|
|
|
|
void addBandKernel( double* outbuf, const double* const kernbuf1,
|
|
const double* const kernbuf2, const double c, const double d ) const
|
|
{
|
|
int ks;
|
|
|
|
for( ks = 0; ks < z; ks++ )
|
|
{
|
|
outbuf[ ks ] += c * kernbuf1[ ks ] + d * kernbuf2[ ks ];
|
|
}
|
|
}
|
|
};
|
|
|
|
/**
|
|
* @brief Low-pass filter windowed by Peaked Cosine window function.
|
|
*
|
|
* This class implements calculation of linear-phase symmetric-odd FIR
|
|
* low-pass filter windowed by the Peaked Cosine window function, for image
|
|
* processing applications.
|
|
*/
|
|
|
|
class CDSPPeakedCosineLPF
|
|
{
|
|
public:
|
|
int fl2; ///< Half filter's length, excluding the peak value. This value
|
|
///< can be also used as filter's latency in samples (taps).
|
|
///<
|
|
int FilterLen; ///< Filter's length in samples (taps).
|
|
///<
|
|
|
|
/**
|
|
* Constructor initalizes *this object.
|
|
*
|
|
* @param aLen2 Half-length (non-truncated) of low-pass filter, in samples
|
|
* (taps).
|
|
* @param aFreq2 Low-pass filter's corner frequency [0; pi].
|
|
* @param aAlpha Peaked Cosine window function Alpha parameter.
|
|
*/
|
|
|
|
CDSPPeakedCosineLPF( const double aLen2, const double aFreq2,
|
|
const double aAlpha )
|
|
: fl2( (int) ceil( aLen2 ) - 1 )
|
|
, FilterLen( fl2 + fl2 + 1 )
|
|
, Len2( aLen2 )
|
|
, Freq2( aFreq2 )
|
|
, Alpha( aAlpha )
|
|
{
|
|
}
|
|
|
|
/**
|
|
* Function generates a linear-phase low-pass filter windowed by Peaked
|
|
* Cosine window function.
|
|
*
|
|
* @param[out] op Output buffer, length = FilterLen (fl2 * 2 + 1).
|
|
* @param DCGain Required gain at DC. The resulting filter will be
|
|
* normalized to achieve this DC gain.
|
|
*/
|
|
|
|
template< class T >
|
|
void generateLPF( T* op, const double DCGain )
|
|
{
|
|
CDSPWindowGenPeakedCosine wf( Alpha, Len2 );
|
|
CSineGen f2( Freq2, 0.0 );
|
|
|
|
op += fl2;
|
|
T* op2 = op;
|
|
f2.generate();
|
|
int t = 1;
|
|
|
|
*op = (T) ( Freq2 * wf.generate() / AVIR_PI );
|
|
double s = *op;
|
|
|
|
while( t <= fl2 )
|
|
{
|
|
const double v = f2.generate() * wf.generate() / t / AVIR_PI;
|
|
op++;
|
|
op2--;
|
|
*op = (T) v;
|
|
*op2 = (T) v;
|
|
s += *op + *op2;
|
|
t++;
|
|
}
|
|
|
|
t = FilterLen;
|
|
s = DCGain / s;
|
|
|
|
while( t > 0 )
|
|
{
|
|
*op2 = (T) ( *op2 * s );
|
|
op2++;
|
|
t--;
|
|
}
|
|
}
|
|
|
|
private:
|
|
double Len2; ///< Half-length (non-truncated) of low-pass filter, in
|
|
///< samples (taps).
|
|
///<
|
|
double Freq2; ///< Low-pass filter's corner frequency.
|
|
///<
|
|
double Alpha; ///< Peaked Cosine window function Alpha parameter.
|
|
///<
|
|
};
|
|
|
|
/**
|
|
* @brief Buffer class for parametrized low-pass filter.
|
|
*
|
|
* This class extends the CBuffer< double > class by adding several variables
|
|
* that define a symmetric-odd FIR low-pass filter windowed by Peaked Cosine
|
|
* window function. This class can be used to compare filters without
|
|
* comparing their buffer contents.
|
|
*/
|
|
|
|
class CFltBuffer : public CBuffer< double >
|
|
{
|
|
public:
|
|
double Len2; ///< Half-length (non-truncated) of low-pass filters, in
|
|
///< samples (taps).
|
|
///<
|
|
double Freq; ///< Low-pass filter's corner frequency.
|
|
///<
|
|
double Alpha; ///< Peaked Cosine window function Alpha parameter.
|
|
///<
|
|
double DCGain; ///< DC gain applied to the filter.
|
|
///<
|
|
|
|
CFltBuffer()
|
|
: CBuffer< double >()
|
|
, Len2( 0.0 )
|
|
, Freq( 0.0 )
|
|
, Alpha( 0.0 )
|
|
, DCGain( 0.0 )
|
|
{
|
|
}
|
|
|
|
/**
|
|
* @param b2 Filter buffer to compare *this object to.
|
|
* @return Operator returns "true" if both filters have same parameters.
|
|
*/
|
|
|
|
bool operator == ( const CFltBuffer& b2 ) const
|
|
{
|
|
return( Len2 == b2.Len2 && Freq == b2.Freq && Alpha == b2.Alpha &&
|
|
DCGain == b2.DCGain );
|
|
}
|
|
};
|
|
|
|
/**
|
|
* @brief Sinc function-based fractional delay filter bank.
|
|
*
|
|
* Class implements storage and initialization of a bank of sinc
|
|
* function-based fractional delay filters, expressed as 1st order polynomial
|
|
* interpolation coefficients. The filters are produced from a single "long"
|
|
* windowed low-pass filter. Also supports 0th-order ("nearest neighbor")
|
|
* interpolation.
|
|
*
|
|
* This class also supports multiplication of each fractional delay filter by
|
|
* an external filter (usually a low-pass filter).
|
|
*
|
|
* @tparam fptype Specifies storage type of the filter coefficients bank. The
|
|
* filters are initially calculated using the "double" precision.
|
|
*/
|
|
|
|
template< class fptype >
|
|
class CDSPFracFilterBankLin
|
|
{
|
|
public:
|
|
CDSPFracFilterBankLin()
|
|
: Order( -1 )
|
|
{
|
|
}
|
|
|
|
/**
|
|
* Copy constructor copies a limited set of parameters of the source
|
|
* filter bank. The actual filters are not copied. Such copying is used
|
|
* during filtering steps "modeling" stage. A further init() function
|
|
* call is required.
|
|
*
|
|
* @param s Source filter bank.
|
|
*/
|
|
|
|
void copyInitParams( const CDSPFracFilterBankLin& s )
|
|
{
|
|
WFLen2 = s.WFLen2;
|
|
WFFreq = s.WFFreq;
|
|
WFAlpha = s.WFAlpha;
|
|
FracCount = s.FracCount;
|
|
Order = s.Order;
|
|
Alignment = s.Alignment;
|
|
SrcFilterLen = s.SrcFilterLen;
|
|
FilterLen = s.FilterLen;
|
|
FilterSize = s.FilterSize;
|
|
IsSrcTableBuilt = false;
|
|
ExtFilter = s.ExtFilter;
|
|
TableFillFlags.alloc( s.TableFillFlags.getCapacity() );
|
|
int i;
|
|
|
|
// Copy table fill flags, but shifted so that further initialization
|
|
// is still possible (such feature should not be used, though).
|
|
|
|
for( i = 0; i < TableFillFlags.getCapacity(); i++ )
|
|
{
|
|
TableFillFlags[ i ] = (uint8_t) ( s.TableFillFlags[ i ] << 2 );
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Operator compares *this filter bank and another filter bank and returns
|
|
* "true" if their parameters are equal. Alignment is not taken into
|
|
* account.
|
|
*
|
|
* @param s Filter bank to compare to.
|
|
* @return "True" if compared banks have equal parameters.
|
|
*/
|
|
|
|
bool operator == ( const CDSPFracFilterBankLin& s ) const
|
|
{
|
|
return( Order == s.Order && WFLen2 == s.WFLen2 &&
|
|
WFFreq == s.WFFreq && WFAlpha == s.WFAlpha &&
|
|
FracCount == s.FracCount && ExtFilter == s.ExtFilter );
|
|
}
|
|
|
|
/**
|
|
* Function initializes (builds) the filter bank based on the supplied
|
|
* parameters. If the supplied parameters are equal to previously defined
|
|
* parameters, function does nothing (alignment is assumed to be never
|
|
* changing between the init() function calls).
|
|
*
|
|
* @param ReqFracCount Required number of fractional delays in the filter
|
|
* bank. The minimal value is 2.
|
|
* @param ReqOrder Required order of the interpolation polynomial
|
|
* (0 or 1).
|
|
* @param BaseLen Low-pass filter's base length, in samples (taps).
|
|
* Affects the actual length of the filter and its overall steepness.
|
|
* @param Cutoff Low-pass filter's normalized cutoff frequency [0; 1].
|
|
* @param aWFAlpha Peaked Cosine window function's Alpha parameter.
|
|
* @param aExtFilter External filter to apply to each fractional delay
|
|
* filter.
|
|
* @param aAlignment Memory alignment of the filter bank, power-of-2
|
|
* value. 0 - use default stdlib alignment.
|
|
* @param FltLenAlign Filter's length alignment, power-of-2 value.
|
|
*/
|
|
|
|
void init( const int ReqFracCount, const int ReqOrder,
|
|
const double BaseLen, const double Cutoff, const double aWFAlpha,
|
|
const CFltBuffer& aExtFilter, const int aAlignment = 0,
|
|
const int FltLenAlign = 1 )
|
|
{
|
|
double NewWFLen2 = 0.5 * BaseLen * ReqFracCount;
|
|
double NewWFFreq = AVIR_PI * Cutoff / ReqFracCount;
|
|
double NewWFAlpha = aWFAlpha;
|
|
|
|
if( ReqOrder == Order && NewWFLen2 == WFLen2 && NewWFFreq == WFFreq &&
|
|
NewWFAlpha == WFAlpha && ReqFracCount == FracCount &&
|
|
aExtFilter == ExtFilter )
|
|
{
|
|
IsInitRequired = false;
|
|
return;
|
|
}
|
|
|
|
WFLen2 = NewWFLen2;
|
|
WFFreq = NewWFFreq;
|
|
WFAlpha = NewWFAlpha;
|
|
FracCount = ReqFracCount;
|
|
Order = ReqOrder;
|
|
Alignment = aAlignment;
|
|
ExtFilter = aExtFilter;
|
|
|
|
CDSPPeakedCosineLPF p( WFLen2, WFFreq, WFAlpha );
|
|
SrcFilterLen = ( p.fl2 / ReqFracCount + 1 ) * 2;
|
|
|
|
const int ElementSize = ReqOrder + 1;
|
|
FilterLen = SrcFilterLen;
|
|
|
|
if( ExtFilter.getCapacity() > 0 )
|
|
{
|
|
FilterLen += ExtFilter.getCapacity() - 1;
|
|
}
|
|
|
|
FilterLen = ( FilterLen + FltLenAlign - 1 ) & ~( FltLenAlign - 1 );
|
|
FilterSize = FilterLen * ElementSize;
|
|
IsSrcTableBuilt = false;
|
|
IsInitRequired = true;
|
|
}
|
|
|
|
/**
|
|
* @return The length of each fractional delay filter, in samples (taps).
|
|
* Always an even value.
|
|
*/
|
|
|
|
int getFilterLen() const
|
|
{
|
|
return( FilterLen );
|
|
}
|
|
|
|
/**
|
|
* @return The number of fractional filters in use by *this bank.
|
|
*/
|
|
|
|
int getFracCount() const
|
|
{
|
|
return( FracCount );
|
|
}
|
|
|
|
/**
|
|
* @return The order of the interpolation polynomial.
|
|
*/
|
|
|
|
int getOrder() const
|
|
{
|
|
return( Order );
|
|
}
|
|
|
|
/**
|
|
* Function returns the pointer to the specified interpolation table
|
|
* filter.
|
|
*
|
|
* @param i Filter (fractional delay) index, in the range 0 to
|
|
* ReqFracCount - 1, inclusive.
|
|
* @return Pointer to filter. Higher order polynomial coefficients are
|
|
* stored after after previous order coefficients, separated by FilterLen
|
|
* elements.
|
|
*/
|
|
|
|
const fptype* getFilter( const int i )
|
|
{
|
|
if( !IsSrcTableBuilt )
|
|
{
|
|
buildSrcTable();
|
|
}
|
|
|
|
fptype* const Res = &Table[ i * FilterSize ];
|
|
|
|
if(( TableFillFlags[ i ] & 2 ) == 0 )
|
|
{
|
|
createFilter( i );
|
|
TableFillFlags[ i ] |= 2;
|
|
|
|
if( Order > 0 )
|
|
{
|
|
createFilter( i + 1 );
|
|
const fptype* const Res2 = Res + FilterSize;
|
|
fptype* const op = Res + FilterLen;
|
|
int j;
|
|
|
|
// Create higher-order interpolation coefficients (linear
|
|
// interpolation).
|
|
|
|
for( j = 0; j < FilterLen; j++ )
|
|
{
|
|
op[ j ] = Res2[ j ] - Res[ j ];
|
|
}
|
|
}
|
|
}
|
|
|
|
return( Res );
|
|
}
|
|
|
|
/**
|
|
* Function makes sure all fractional delay filters were created.
|
|
*/
|
|
|
|
void createAllFilters()
|
|
{
|
|
int i;
|
|
|
|
for( i = 0; i < FracCount; i++ )
|
|
{
|
|
getFilter( i );
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function returns an approximate initialization complexity, expressed in
|
|
* the number of multiply-add operations. This includes fractional delay
|
|
* filters calculation and multiplication by an external filter. This
|
|
* function can only be called after the init() function.
|
|
*
|
|
* @param FracUseMap Fractional delays use map, each element corresponds
|
|
* to a single fractional delay, will be compared to the internal table
|
|
* fill flags. This map should include 0 and 1 values only.
|
|
* @return The complexity of the initialization, expressed in the number
|
|
* of multiply-add operations.
|
|
*/
|
|
|
|
int calcInitComplexity( const CBuffer< uint8_t >& FracUseMap ) const
|
|
{
|
|
const int FltInitCost = 65; // Cost to initialize a single sample
|
|
// of the fractional delay filter.
|
|
const int FltUseCost = FilterLen * Order +
|
|
SrcFilterLen * ExtFilter.getCapacity(); // Cost to use a single
|
|
// fractional delay filter.
|
|
const int ucb[ 2 ] = { 0, FltUseCost };
|
|
int ic;
|
|
int i;
|
|
|
|
if( IsInitRequired )
|
|
{
|
|
ic = FracCount * SrcFilterLen * FltInitCost;
|
|
|
|
for( i = 0; i < FracCount; i++ )
|
|
{
|
|
ic += ucb[ FracUseMap[ i ]];
|
|
}
|
|
}
|
|
else
|
|
{
|
|
ic = 0;
|
|
|
|
for( i = 0; i < FracCount; i++ )
|
|
{
|
|
if( FracUseMap[ i ] != 0 )
|
|
{
|
|
ic += ucb[ TableFillFlags[ i ] == 0 ? 1 : 0 ];
|
|
}
|
|
}
|
|
}
|
|
|
|
return( ic );
|
|
}
|
|
|
|
private:
|
|
static const int InterpPoints = 2; ///< The maximal number of points the
|
|
///< interpolation is based on.
|
|
///<
|
|
double WFLen2; ///< Window function's Len2 parameter.
|
|
///<
|
|
double WFFreq; ///< Window function's Freq parameter.
|
|
///<
|
|
double WFAlpha; ///< Window function's Alpha parameter.
|
|
///<
|
|
int FracCount; ///< The required number of fractional delay filters.
|
|
///<
|
|
int Order; ///< The order of the interpolation polynomial.
|
|
///<
|
|
int Alignment; ///< The required filter table alignment.
|
|
///<
|
|
int SrcFilterLen; ///< Length of the "source" filters. This is always an
|
|
///< even value.
|
|
///<
|
|
int FilterLen; ///< Specifies the number of samples (taps) each fractional
|
|
///< delay filter has. This is always an even value, adjusted by the
|
|
///< FltLenAlign.
|
|
///<
|
|
int FilterSize; ///< The size of a single filter element, equals
|
|
///< FilterLen * ElementSize.
|
|
///<
|
|
bool IsInitRequired; ///< "True" if SrcTable filter table initialization
|
|
///< is required. This value is available only after the call to the
|
|
///< init() function.
|
|
///<
|
|
CBuffer< fptype > Table; ///< Interpolation table, size equals to
|
|
///< ReqFracCount * FilterLen * ElementSize.
|
|
///<
|
|
CBuffer< uint8_t > TableFillFlags; ///< Contains ReqFracCount + 1
|
|
///< elements. Bit 0 of every element is 1 if Table already contains
|
|
///< the filter from SrcTable filtered by ExtFilter. Bit 1 of every
|
|
///< element means higher order coefficients were filled for the
|
|
///< filter.
|
|
///<
|
|
CFltBuffer ExtFilter; ///< External filter that should be applied to every
|
|
///< fractional delay filter. Can be empty. Half of this filter's
|
|
///< capacity is used as latency (group delay) value of the filter.
|
|
///<
|
|
CBuffer< double > SrcTable; ///< Source table of delay filters, contains
|
|
///< ReqFracCount + 1 elements. This table is used to fill the Table
|
|
///< with the actual filters, filtered by an external filter.
|
|
///<
|
|
bool IsSrcTableBuilt; ///< "True" if the SrcTable was built already. This
|
|
///< variable is set to "false" in the init() function.
|
|
///<
|
|
|
|
/**
|
|
* Function builds source table used in the createFilter() function.
|
|
*/
|
|
|
|
void buildSrcTable()
|
|
{
|
|
IsSrcTableBuilt = true;
|
|
IsInitRequired = false;
|
|
|
|
CDSPPeakedCosineLPF p( WFLen2, WFFreq, WFAlpha );
|
|
|
|
const int BufLen = SrcFilterLen * FracCount + InterpPoints - 1;
|
|
const int BufOffs = InterpPoints / 2 - 1;
|
|
const int BufCenter = SrcFilterLen * FracCount / 2 + BufOffs;
|
|
|
|
CBuffer< double > Buf( BufLen );
|
|
memset( Buf, 0, ( BufCenter - p.fl2 ) * sizeof( double ));
|
|
int i = BufLen - BufCenter - p.fl2 - 1;
|
|
memset( &Buf[ BufLen - i ], 0, i * sizeof( double ));
|
|
|
|
p.generateLPF( &Buf[ BufCenter - p.fl2 ], FracCount ); asm("int3");
|
|
|
|
SrcTable.alloc(( FracCount + 1 ) * SrcFilterLen );
|
|
TableFillFlags.alloc( FracCount + 1 );
|
|
int j;
|
|
double* op0 = SrcTable;
|
|
|
|
for( i = FracCount; i >= 0; i-- )
|
|
{
|
|
TableFillFlags[ i ] = 0;
|
|
double* p = Buf + BufOffs + i;
|
|
|
|
for( j = 0; j < SrcFilterLen; j++ )
|
|
{
|
|
op0[ 0 ] = p[ 0 ];
|
|
op0++;
|
|
p += FracCount;
|
|
}
|
|
}
|
|
|
|
Table.alloc(( FracCount + 1 ) * FilterSize, Alignment );
|
|
}
|
|
|
|
/**
|
|
* Function creates the specified filter in the Table by copying it from
|
|
* the SrcTable and filtering by ExtFilter. Function does nothing if
|
|
* filter was already created.
|
|
*
|
|
* @param k Filter index to create, in the range 0 to FracCount,
|
|
* inclusive.
|
|
*/
|
|
|
|
void createFilter( const int k )
|
|
{
|
|
if( TableFillFlags[ k ] != 0 )
|
|
{
|
|
return;
|
|
}
|
|
|
|
TableFillFlags[ k ] |= 1;
|
|
const int ExtFilterLatency = ExtFilter.getCapacity() / 2;
|
|
const int ResLatency = ExtFilterLatency + SrcFilterLen / 2;
|
|
int ResLen = SrcFilterLen;
|
|
|
|
if( ExtFilter.getCapacity() > 0 )
|
|
{
|
|
ResLen += ExtFilter.getCapacity() - 1;
|
|
}
|
|
|
|
const int ResOffs = FilterLen / 2 - ResLatency;
|
|
fptype* op = &Table[ k * FilterSize ];
|
|
int i;
|
|
|
|
for( i = 0; i < ResOffs; i++ )
|
|
{
|
|
op[ i ] = 0.0;
|
|
}
|
|
|
|
for( i = ResOffs + ResLen; i < FilterLen; i++ )
|
|
{
|
|
op[ i ] = 0.0;
|
|
}
|
|
|
|
op += ResOffs;
|
|
const double* const srcflt = &SrcTable[ k * SrcFilterLen ];
|
|
|
|
if( ExtFilter.getCapacity() == 0 )
|
|
{
|
|
for( i = 0; i < ResLen; i++ )
|
|
{
|
|
op[ i ] = (fptype) srcflt[ i ];
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
// Perform convolution of extflt and srcflt.
|
|
|
|
const double* const extflt = &ExtFilter[ 0 ];
|
|
int j;
|
|
|
|
for( j = 0; j < ResLen; j++ )
|
|
{
|
|
int k = 0;
|
|
int l = j - ExtFilter.getCapacity() + 1;
|
|
int r = l + ExtFilter.getCapacity();
|
|
|
|
if( l < 0 )
|
|
{
|
|
k -= l;
|
|
l = 0;
|
|
}
|
|
|
|
if( r > SrcFilterLen )
|
|
{
|
|
r = SrcFilterLen;
|
|
}
|
|
|
|
const double* const extfltb = extflt + k;
|
|
const double* const srcfltb = srcflt + l;
|
|
double s = 0.0;
|
|
l = r - l;
|
|
|
|
for( i = 0; i < l; i++ )
|
|
{
|
|
s += extfltb[ i ] * srcfltb[ i ];
|
|
}
|
|
|
|
op[ j ] = (fptype) s;
|
|
}
|
|
}
|
|
};
|
|
|
|
/**
|
|
* @brief Thread pool for multi-threaded image resizing operation.
|
|
*
|
|
* This base class is used to organize a multi-threaded image resizing
|
|
* operation. The thread pool should consist of threads that initially wait
|
|
* for a signal. Upon receiving a signal (via the startAllWorkloads()
|
|
* function) each previously added thread should execute its workload's
|
|
* process() function once, and return to the wait signal state again. The
|
|
* thread pool should be also able to efficiently wait for all workloads to
|
|
* finish via the waitAllWorkloadsToFinish() function.
|
|
*
|
|
* The image resizing algorithm makes calls to functions of this class.
|
|
*/
|
|
|
|
class CImageResizerThreadPool
|
|
{
|
|
public:
|
|
CImageResizerThreadPool()
|
|
{
|
|
}
|
|
|
|
virtual ~CImageResizerThreadPool()
|
|
{
|
|
}
|
|
|
|
/**
|
|
* @brief Thread pool's workload object class.
|
|
*
|
|
* This class should be used as a base class for objects that perform the
|
|
* actual work spread over several threads.
|
|
*/
|
|
|
|
class CWorkload
|
|
{
|
|
public:
|
|
virtual ~CWorkload()
|
|
{
|
|
}
|
|
|
|
/**
|
|
* Function that gets called from the thread when thread pool's
|
|
* startAllWorkloads() function is called.
|
|
*/
|
|
|
|
virtual void process() = 0;
|
|
};
|
|
|
|
/**
|
|
* @return The suggested number of workloads (and their associated
|
|
* threads) to add. The minimal value this function can return is 1. The
|
|
* usual value may depend on the number of physical and virtual cores
|
|
* present in the system, and on other considerations.
|
|
*/
|
|
|
|
virtual int getSuggestedWorkloadCount() const
|
|
{
|
|
return( 1 );
|
|
}
|
|
|
|
/**
|
|
* Function adds a new workload (and possibly thread) to the thread pool.
|
|
* The caller decides how many parallel workloads (and threads) it
|
|
* requires, but this number will not exceed the value returned by the
|
|
* getSuggestedWorkloadCount() function. It is implementation-specific how
|
|
* many workloads to associate with a single thread. But for efficiency
|
|
* reasons each workload should be associated with its own thread.
|
|
*
|
|
* Note that the same set of workload objects will be processed each time
|
|
* the startAllWorkloads() function is called. This means that workload
|
|
* objects are added only once. The caller changes the state of the
|
|
* workload objects and then calls the startAllWorkloads() function to
|
|
* process them.
|
|
*
|
|
* @param Workload Workload object whose process() function will be called
|
|
* from within the thread when the startAllWorkloads() function is called.
|
|
*/
|
|
|
|
virtual void addWorkload( CWorkload* const Workload )
|
|
{
|
|
}
|
|
|
|
/**
|
|
* Function starts all workloads associated with threads previously added
|
|
* via the addWorkload() function. It is assumed that this function
|
|
* performs the necessary "memory barrier" (or "cache sync") kind of
|
|
* operation so that all threads catch up the prior changes made to the
|
|
* workload objects during their wait state.
|
|
*/
|
|
|
|
virtual void startAllWorkloads()
|
|
{
|
|
}
|
|
|
|
/**
|
|
* Function waits for all workloads to finish.
|
|
*/
|
|
|
|
virtual void waitAllWorkloadsToFinish()
|
|
{
|
|
}
|
|
|
|
/**
|
|
* Function removes all workloads previously added via the addWorkload()
|
|
* function. This function gets called only after the
|
|
* waitAllWorkloadsToFinish() function call.
|
|
*/
|
|
|
|
virtual void removeAllWorkloads()
|
|
{
|
|
}
|
|
};
|
|
|
|
/**
|
|
* @brief Resizing algorithm parameters structure.
|
|
*
|
|
* This structure holds all selectable parameters used by the resizing
|
|
* algorithm at various stages, for both downsizing and upsizing. There are no
|
|
* other parameters exist that can optimize the performance of the resizing
|
|
* algorithm. Filter length parameters can take fractional values.
|
|
*
|
|
* Beside quality, these parameters (except Alpha parameters) directly affect
|
|
* the computative cost of the resizing algorithm. It is possible to trade
|
|
* the visual quality for computative cost.
|
|
*
|
|
* Anti-alias filtering during downsizing can be defined as a considerable
|
|
* reduction of contrast of smallest features of an image. Unfortunately, such
|
|
* de-contrasting partially affects features of all sizes thus producing a
|
|
* non-linearity of frequency response. All pre-defined parameter sets are
|
|
* described by 3 values separated by slashes. The first value is the
|
|
* de-contrasting factor of small features (which are being removed) while
|
|
* the second value is the de-contrasting factor of large features (which
|
|
* should remain intact), with value of 1 equating to "no contrast change".
|
|
* The third value is the optimization score (see below), with value of 0
|
|
* equating to the "perfect" linearity of frequency response.
|
|
*
|
|
* The pre-defined parameter sets offered by this library were auto-optimized
|
|
* for the given LPFltBaseLen, IntFltLen and CorrFltAlpha values. The
|
|
* optimization goal was to minimize the score: the sum of squares of the
|
|
* difference between original and processed images (which was not actually
|
|
* resized, k=1). The original image was a 0.5 megapixel uniformly-distributed
|
|
* white-noise image with pixel intensities in the 0-1 range. Such goal
|
|
* converges very well and produces filtering system with the flattest
|
|
* frequency response possible for the given constraints. With this goal,
|
|
* increasing the LPFltBaseLen value reduces the general amount of aliasing
|
|
* artifacts.
|
|
*/
|
|
|
|
struct CImageResizerParams
|
|
{
|
|
double CorrFltAlpha; ///< Alpha parameter of the Peaked Cosine window
|
|
///< function used on the correction filter. The "usable" values are
|
|
///< in the narrow range 1.0 to 1.5.
|
|
///<
|
|
double CorrFltLen; ///< Correction filter's length in samples (taps). The
|
|
///< "usable" range is narrow, 5.5 to 8, as to minimize the
|
|
///< "overcorrection" which is mathematically precise, but visually
|
|
///< unacceptable.
|
|
///<
|
|
double IntFltAlpha; ///< Alpha parameter of the Peaked Cosine window
|
|
///< function used on the interpolation low-pass filter. The "usable"
|
|
///< values are in the range 1.5 to 2.5.
|
|
///<
|
|
double IntFltCutoff; ///< Interpolation low-pass filter's cutoff frequency
|
|
///< (normalized, [0; 1]). The "usable" range is 0.6 to 0.8.
|
|
///<
|
|
double IntFltLen; ///< Interpolation low-pass filter's length in samples
|
|
///< (taps). The length value should be at least 18 or otherwise a
|
|
///< "dark grid" artifact will be introduced if a further sharpening
|
|
///< is applied. IntFltLen together with other IntFlt parameters
|
|
///< should be tuned in a way that produces the flattest frequency
|
|
///< response in 0-0.5 normalized frequency range (this range is due
|
|
///< to 2X upsampling).
|
|
///<
|
|
double LPFltAlpha; ///< Alpha parameter of the Peaked Cosine window
|
|
///< function used on the low-pass filter. The "usable" values are
|
|
///< in the range 1.5 to 6.5.
|
|
///<
|
|
double LPFltBaseLen; ///< Base length of the low-pass (aka anti-aliasing
|
|
///< or reconstruction) filter, in samples (taps), further adjusted by
|
|
///< the actual cutoff frequency, upsampling and downsampling factors.
|
|
///< The "usable" range is between 6 and 9.
|
|
///<
|
|
double LPFltCutoffMult; ///< Low-pass filter's cutoff frequency
|
|
///< multiplier. This value can be both below and above 1.0 as
|
|
///< low-pass filters are inserted on downsampling and upsampling
|
|
///< steps and always have corner frequency equal to or below 0.5pi.
|
|
///< This multiplier shifts low-pass filter's corner frequency towards
|
|
///< lower (if below 1.0) or higher (if above 1.0) frequencies. This
|
|
///< multiplier can be way below 1.0 since any additional
|
|
///< high-frequency damping will be partially corrected by the
|
|
///< correction filter. The "usable" range is 0.3 to 1.0.
|
|
///<
|
|
|
|
CImageResizerParams()
|
|
: HBFltAlpha( 1.75395 )
|
|
, HBFltCutoff( 0.40356 )
|
|
, HBFltLen( 22.00000 )
|
|
{
|
|
}
|
|
|
|
double HBFltAlpha; ///< Half-band filter's Alpha. Assigned internally.
|
|
///<
|
|
double HBFltCutoff; ///< Half-band filter's cutoff point [0; 1]. Assigned
|
|
///< internally.
|
|
///<
|
|
double HBFltLen; ///< Length of the half-band low-pass filter. Assigned
|
|
///< internally. Internally used to perform 2X or higher downsampling.
|
|
///< These filter parameters should be treated as "technical" and do
|
|
///< not require adjustment as they were tuned to suit all
|
|
///< combinations of other parameters. This half-band filter provides
|
|
///< a wide transition band (for minimal ringing artifacts) and a high
|
|
///< stop-band attenuation (for minimal aliasing).
|
|
///<
|
|
};
|
|
|
|
/**
|
|
* @brief The default set of resizing algorithm parameters
|
|
* (10.01/1.029/0.019169).
|
|
*
|
|
* This is the default set of resizing parameters that was designed to deliver
|
|
* a sharp image while still providing a low amount of ringing artifacts, and
|
|
* having a reasonable computational cost.
|
|
*/
|
|
|
|
struct CImageResizerParamsDef : public CImageResizerParams
|
|
{
|
|
CImageResizerParamsDef()
|
|
{
|
|
CorrFltAlpha = 1.0;//10.01/1.88/1.029(522.43)/0.019169:258648,446808
|
|
CorrFltLen = 6.30770;
|
|
IntFltAlpha = 2.27825;
|
|
IntFltCutoff = 0.75493;
|
|
IntFltLen = 18.0;
|
|
LPFltAlpha = 3.40127;
|
|
LPFltBaseLen = 7.78;
|
|
LPFltCutoffMult = 0.78797;
|
|
}
|
|
};
|
|
|
|
/**
|
|
* @brief Set of resizing algorithm parameters for ultra-low-ringing
|
|
* performance (7.69/1.069/0.000245).
|
|
*
|
|
* This set of resizing algorithm parameters offers the lowest amount of
|
|
* ringing this library is capable of providing while still offering a decent
|
|
* quality. Low ringing is attained at the expense of higher aliasing
|
|
* artifacts and a slightly reduced contrast.
|
|
*/
|
|
|
|
struct CImageResizerParamsULR : public CImageResizerParams
|
|
{
|
|
CImageResizerParamsULR()
|
|
{
|
|
CorrFltAlpha = 1.0;//7.69/1.97/1.069(31445.45)/0.000245:258627,436845
|
|
CorrFltLen = 5.83280;
|
|
IntFltAlpha = 2.11453;
|
|
IntFltCutoff = 0.73986;
|
|
IntFltLen = 18.0;
|
|
LPFltAlpha = 1.73455;
|
|
LPFltBaseLen = 6.40;
|
|
LPFltCutoffMult = 0.61314;
|
|
}
|
|
};
|
|
|
|
/**
|
|
* @brief Set of resizing algorithm parameters for low-ringing performance
|
|
* (7.86/1.065/0.000106).
|
|
*
|
|
* This set of resizing algorithm parameters offers a very low-ringing
|
|
* performance at the expense of higher aliasing artifacts and a slightly
|
|
* reduced contrast.
|
|
*/
|
|
|
|
struct CImageResizerParamsLR : public CImageResizerParams
|
|
{
|
|
CImageResizerParamsLR()
|
|
{
|
|
CorrFltAlpha = 1.0;//7.86/1.96/1.065(73865.02)/0.000106:258636,437381
|
|
CorrFltLen = 5.87671;
|
|
IntFltAlpha = 2.25322;
|
|
IntFltCutoff = 0.74090;
|
|
IntFltLen = 18.0;
|
|
LPFltAlpha = 1.79306;
|
|
LPFltBaseLen = 7.00;
|
|
LPFltCutoffMult = 0.68881;
|
|
}
|
|
};
|
|
|
|
/**
|
|
* @brief Set of resizing algorithm parameters for lower-ringing performance
|
|
* (8.86/1.046/0.010168).
|
|
*
|
|
* This set of resizing algorithm parameters offers a lower-ringing
|
|
* performance in comparison to the default setting, at the expense of higher
|
|
* aliasing artifacts and a slightly reduced contrast.
|
|
*/
|
|
|
|
struct CImageResizerParamsLow : public CImageResizerParams
|
|
{
|
|
CImageResizerParamsLow()
|
|
{
|
|
CorrFltAlpha = 1.0;//8.86/1.92/1.046(871.54)/0.010168:258647,442252
|
|
CorrFltLen = 6.09757;
|
|
IntFltAlpha = 2.36704;
|
|
IntFltCutoff = 0.74674;
|
|
IntFltLen = 18.0;
|
|
LPFltAlpha = 2.19427;
|
|
LPFltBaseLen = 7.66;
|
|
LPFltCutoffMult = 0.75380;
|
|
}
|
|
};
|
|
|
|
/**
|
|
* @brief Set of resizing algorithm parameters for low-aliasing
|
|
* resizing (11.81/1.012/0.038379).
|
|
*
|
|
* This set of resizing algorithm parameters offers a considerable
|
|
* anti-aliasing performance with a good frequency response linearity (and
|
|
* contrast). This is an intermediate setting between the default and Ultra
|
|
* parameters.
|
|
*/
|
|
|
|
struct CImageResizerParamsHigh : public CImageResizerParams
|
|
{
|
|
CImageResizerParamsHigh()
|
|
{
|
|
CorrFltAlpha = 1.0;//11.81/1.83/1.012(307.84)/0.038379:258660,452719
|
|
CorrFltLen = 6.80909;
|
|
IntFltAlpha = 2.44917;
|
|
IntFltCutoff = 0.75856;
|
|
IntFltLen = 18.0;
|
|
LPFltAlpha = 4.39527;
|
|
LPFltBaseLen = 8.18;
|
|
LPFltCutoffMult = 0.79172;
|
|
}
|
|
};
|
|
|
|
/**
|
|
* @brief Set of resizing algorithm parameters for ultra low-aliasing
|
|
* resizing (13.65/1.001/0.000483).
|
|
*
|
|
* This set of resizing algorithm parameters offers a very considerable
|
|
* anti-aliasing performance with a good frequency response linearity (and
|
|
* contrast). This set of parameters is computationally expensive and may
|
|
* produce ringing artifacts on sharp features.
|
|
*/
|
|
|
|
struct CImageResizerParamsUltra : public CImageResizerParams
|
|
{
|
|
CImageResizerParamsUltra()
|
|
{
|
|
CorrFltAlpha = 1.0;//13.65/1.79/1.001(28288.41)/0.000483:258658,457974
|
|
CorrFltLen = 7.48060;
|
|
IntFltAlpha = 1.93750;
|
|
IntFltCutoff = 0.75462;
|
|
IntFltLen = 18.0;
|
|
LPFltAlpha = 5.55209;
|
|
LPFltBaseLen = 8.34;
|
|
LPFltCutoffMult = 0.78002;
|
|
}
|
|
};
|
|
|
|
/**
|
|
* @brief Image resizing variables class.
|
|
*
|
|
* This is an utility "catch all" class that defines various variables used
|
|
* during image resizing. Several variables that are explicitly initialized in
|
|
* this class' constructor are also used as additional "input" variables to
|
|
* the image resizing function. These variables will not be changed by the
|
|
* avir::CImageResizer<>::resizeImage() function.
|
|
*/
|
|
|
|
class CImageResizerVars
|
|
{
|
|
public:
|
|
int ElCount; ///< The number of "fptype" elements used to store 1 pixel.
|
|
///<
|
|
int ElCountIO; ///< The number of source and destination image's elements
|
|
///< used to store 1 pixel.
|
|
///<
|
|
int fppack; ///< The number of atomic types stored in a single "fptype"
|
|
///< element.
|
|
///<
|
|
int fpalign; ///< Suggested alignment size in bytes. This is not a
|
|
///< required alignment, because image resizing algorithm cannot be
|
|
///< made to have a strictly aligned data access in all cases (e.g.
|
|
///< de-interleaved interpolation cannot perform aligned accesses).
|
|
///<
|
|
int elalign; ///< Length alignment of arrays of elements. This applies to
|
|
///< filters and intermediate buffers: this constant forces filters
|
|
///< and scanlines to have a length which is a multiple of this value,
|
|
///< for more efficient SIMD implementation.
|
|
///<
|
|
int packmode; ///< 0 if interleaved packing, 1 if de-interleaved.
|
|
///<
|
|
int BufLen[ 2 ]; ///< Intermediate buffers' lengths in "fptype" elements.
|
|
int BufOffs[ 2 ]; ///< Offsets into the intermediate buffers, used to
|
|
///< provide prefix elements required during processing so that no
|
|
///< "out of range" access happens. This offset is a multiple of
|
|
///< ElCount if pixels are stored in interleaved form.
|
|
///<
|
|
double k; ///< Resizing step coefficient, updated to reflect the actually
|
|
///< used coefficient during resizing.
|
|
///<
|
|
double o; ///< Starting pixel offset inside the source image, updated to
|
|
///< reflect the actually used offset during resizing.
|
|
///<
|
|
int ResizeStep; ///< Index of the resizing step in the latest filtering
|
|
///< steps array.
|
|
///<
|
|
double InGammaMult; ///< Input gamma multiplier, used to convert input
|
|
///< data to 0 to 1 range. 0.0 if no gamma is in use.
|
|
///<
|
|
double OutGammaMult; ///< Output gamma multiplier, used to convert data to
|
|
///< 0 to 255/65535 range. 0.0 if no gamma is in use.
|
|
///<
|
|
|
|
double ox; ///< Start X pixel offset within source image (can be
|
|
///< negative). Positive offset moves image to the left.
|
|
///<
|
|
double oy; ///< Start Y pixel offset within source image (can be
|
|
///< negative). Positive offset moves image to the top.
|
|
///<
|
|
CImageResizerThreadPool* ThreadPool; ///< Thread pool to be used by the
|
|
///< image resizing function. Set to NULL to use single-threaded
|
|
///< processing.
|
|
///<
|
|
bool UseSRGBGamma; ///< Perform sRGB gamma linearization (correction).
|
|
///<
|
|
int BuildMode; ///< The build mode to use, for debugging purposes. Set to
|
|
///< -1 to select a minimal-complexity mode automatically. All build
|
|
///< modes deliver similar results with minor deviations.
|
|
///<
|
|
int RndSeed; ///< Random seed parameter. This parameter may be incremented
|
|
///< after each random generator initialization. The use of this
|
|
///< variable depends on the ditherer implementation.
|
|
///<
|
|
|
|
CImageResizerVars()
|
|
: ox( 0.0 )
|
|
, oy( 0.0 )
|
|
, ThreadPool( NULL )
|
|
, UseSRGBGamma( false )
|
|
, BuildMode( -1 )
|
|
, RndSeed( 0 )
|
|
{
|
|
}
|
|
};
|
|
|
|
/**
|
|
* @brief Image resizer's filtering step class.
|
|
*
|
|
* Class defines data to perform a single filtering step over a whole
|
|
* horizontal or vertical scanline. Resizing consists of 1 or more steps that
|
|
* may be performed before the actual resizing takes place. Filtering may also
|
|
* follow a resizing step. Each step must ensure that scanline data contains
|
|
* enough pixels to perform the next step (which may be resizing) without
|
|
* exceeding scanline's bounds.
|
|
*
|
|
* A derived class must implement several "const" and "static" functions that
|
|
* are used to perform the actual filtering in interleaved or de-interleaved
|
|
* mode.
|
|
*
|
|
* @tparam fptype Floating point type to use for storing pixel elements. SIMD
|
|
* types can be used: in this case each element may hold a whole pixel.
|
|
* @tparam fptypeatom The atomic type the "fptype" consists of.
|
|
*/
|
|
|
|
template< class fptype, class fptypeatom >
|
|
class CImageResizerFilterStep
|
|
{
|
|
public:
|
|
bool IsUpsample; ///< "True" if this step is an upsampling step, "false"
|
|
///< if downsampling step. Should be set to "false" if ResampleFactor
|
|
///< equals 0.
|
|
///<
|
|
int ResampleFactor; ///< Resample factor (>=1). If 0, this is a resizing
|
|
///< step. This value should be >1 if IsUpsample equals "true".
|
|
///<
|
|
CBuffer< fptype > Flt; ///< Filter to use at this step.
|
|
///<
|
|
CFltBuffer FltOrig; ///< Originally-designed filter. This buffer may not
|
|
///< be assigned. Assigned by filters that precede the resizing step
|
|
///< if such filter is planned to be embedded into the interpolation
|
|
///< filter as "external" filter. If IsUpsample=true and this filter
|
|
///< buffer is not empty, the upsampling step will not itself apply
|
|
///< any filtering over upsampled input scanline.
|
|
///<
|
|
double DCGain; ///< DC gain which was applied to the filter. Not defined
|
|
///< if ResampleFactor = 0.
|
|
///<
|
|
int FltLatency; ///< Filter's latency (group delay, shift) in pixels.
|
|
///<
|
|
const CImageResizerVars* Vars; ///< Image resizing-related variables.
|
|
///<
|
|
int InLen; ///< Input scanline's length in pixels.
|
|
///<
|
|
int InBuf; ///< Input buffer index, 0 or 1.
|
|
///<
|
|
int InPrefix; ///< Required input prefix pixels. These prefix pixels will
|
|
///< be filled with source scanline's first pixel value. If IsUpsample
|
|
///< is "true", this is the additional number of times the first pixel
|
|
///< will be filtered before processing scanline, this number is also
|
|
///< reflected in the OutPrefix.
|
|
///<
|
|
int InSuffix; ///< Required input suffix pixels. These suffix pixels will
|
|
///< be filled with source scanline's last pixel value. If IsUpsample
|
|
///< is "true", this is the additional number of times the last pixel
|
|
///< will be filtered before processing scanline, this number is also
|
|
///< reflected in the OutSuffix.
|
|
///<
|
|
int InElIncr; ///< Pixel element increment within the input buffer, used
|
|
///< during de-interleaved processing: in this case each image's
|
|
///< channel is stored independently, InElIncr elements apart.
|
|
///<
|
|
int OutLen; ///< Length of the resulting scanline.
|
|
///<
|
|
int OutBuf; ///< Output buffer index. 0 or 1; 2 for the last step.
|
|
///<
|
|
int OutPrefix; ///< Required output prefix pixels. These prefix pixels
|
|
///< will not be pre-filled with any values. Value is valid only if
|
|
///< IsUpsample equals "true".
|
|
///<
|
|
int OutSuffix; ///< Required input suffix pixels. These suffix pixels will
|
|
///< not be pre-filled with any values. Value is valid only if
|
|
///< IsUpsample equals "true".
|
|
///<
|
|
int OutElIncr; ///< Pixel element increment within the output buffer, used
|
|
///< during de-interleaved processing. Equals to the InBufElIncr of
|
|
///< the next step.
|
|
///<
|
|
CBuffer< fptype > PrefixDC; ///< DC component fluctuations added at the
|
|
///< start of the resulting scanline, used when IsUpsample equals
|
|
///< "true".
|
|
///<
|
|
CBuffer< fptype > SuffixDC; ///< DC component fluctuations added at the
|
|
///< end of the resulting scanline, used when IsUpsample equals
|
|
///< "true".
|
|
///<
|
|
int EdgePixelCount; ///< The number of edge pixels added. Affects the
|
|
///< initial position within the input scanline, used to produce edge
|
|
///< pixels. This variable is used and should be defined when
|
|
///< IsUpsample=false and ResampleFactor>0. When assigning this
|
|
///< variable it is also necessary to update InPrefix, OutLen and
|
|
///< Vars.o variables.
|
|
///<
|
|
static const int EdgePixelCountDef = 3; ///< The default number of pixels
|
|
///< additionally produced at scanline edges during filtering. This is
|
|
///< required to reduce edge artifacts.
|
|
///<
|
|
|
|
/**
|
|
* @brief Resizing position structure.
|
|
*
|
|
* Structure holds resizing position and pointer to fractional delay
|
|
* filter.
|
|
*/
|
|
|
|
struct CResizePos
|
|
{
|
|
int SrcPosInt; ///< Source scanline position.
|
|
///<
|
|
int fti; ///< Fractional delay filter index.
|
|
///<
|
|
const fptype* ftp; ///< Fractional delay filter pointer.
|
|
///<
|
|
fptypeatom x; ///< Interpolation coefficient between delay filters.
|
|
///<
|
|
int SrcOffs; ///< Source scanline offset.
|
|
///<
|
|
};
|
|
|
|
/**
|
|
* @brief Resizing positions buffer class.
|
|
*
|
|
* This class combines buffer together with variables that define resizing
|
|
* stepping.
|
|
*/
|
|
|
|
class CRPosBuf : public CBuffer< CResizePos >
|
|
{
|
|
public:
|
|
double k; ///< Resizing step.
|
|
///<
|
|
double o; ///< Resizing offset.
|
|
///<
|
|
int FracCount; ///< The number of fractional delay filters in a filter
|
|
///< bank used together with this buffer.
|
|
///<
|
|
};
|
|
|
|
/**
|
|
* @brief Resizing positions buffer array class.
|
|
*
|
|
* This class combines structure array of the CRPosBuf class objects with
|
|
* the function that locates or creates buffer with the required resizing
|
|
* stepping.
|
|
*/
|
|
|
|
class CRPosBufArray : public CStructArray< CRPosBuf >
|
|
{
|
|
public:
|
|
using CStructArray< CRPosBuf > :: add;
|
|
using CStructArray< CRPosBuf > :: getItemCount;
|
|
|
|
/**
|
|
* Function returns the resizing positions buffer with the required
|
|
* stepping. If no such buffer exists, it is created.
|
|
*
|
|
* @param k Resizing step.
|
|
* @param o Resizing offset.
|
|
* @param FracCount The number of fractional delay filters in a filter
|
|
* bank used together with this buffer.
|
|
* @return Reference to the CRPosBuf object.
|
|
*/
|
|
|
|
CRPosBuf& getRPosBuf( const double k, const double o,
|
|
const int FracCount )
|
|
{
|
|
int i;
|
|
|
|
for( i = 0; i < getItemCount(); i++ )
|
|
{
|
|
CRPosBuf& Buf = (*this)[ i ];
|
|
|
|
if( Buf.k == k && Buf.o == o && Buf.FracCount == FracCount )
|
|
{
|
|
return( Buf );
|
|
}
|
|
}
|
|
|
|
CRPosBuf& NewBuf = add();
|
|
NewBuf.k = k;
|
|
NewBuf.o = o;
|
|
NewBuf.FracCount = FracCount;
|
|
|
|
return( NewBuf );
|
|
}
|
|
};
|
|
|
|
CRPosBuf* RPosBuf; ///< Resizing positions buffer. Used when
|
|
///< ResampleFactor equals 0 (resizing step).
|
|
///<
|
|
CDSPFracFilterBankLin< fptype >* FltBank; ///< Filter bank in use by *this
|
|
///< resizing step.
|
|
///<
|
|
};
|
|
|
|
/**
|
|
* @brief Interleaved filtering steps implementation class.
|
|
*
|
|
* This class implements scanline filtering functions in interleaved mode.
|
|
* This means that each pixel is processed independently, not in groups.
|
|
*
|
|
* @tparam fptype Floating point type to use for storing pixel elements. SIMD
|
|
* types can be used: in this case each element may hold a whole pixel.
|
|
* @tparam fptypeatom The atomic type the "fptype" consists of.
|
|
*/
|
|
|
|
template< class fptype, class fptypeatom >
|
|
class CImageResizerFilterStepINL :
|
|
public CImageResizerFilterStep< fptype, fptypeatom >
|
|
{
|
|
public:
|
|
using CImageResizerFilterStep< fptype, fptypeatom > :: IsUpsample;
|
|
using CImageResizerFilterStep< fptype, fptypeatom > :: ResampleFactor;
|
|
using CImageResizerFilterStep< fptype, fptypeatom > :: Flt;
|
|
using CImageResizerFilterStep< fptype, fptypeatom > :: FltOrig;
|
|
using CImageResizerFilterStep< fptype, fptypeatom > :: FltLatency;
|
|
using CImageResizerFilterStep< fptype, fptypeatom > :: Vars;
|
|
using CImageResizerFilterStep< fptype, fptypeatom > :: InLen;
|
|
using CImageResizerFilterStep< fptype, fptypeatom > :: InPrefix;
|
|
using CImageResizerFilterStep< fptype, fptypeatom > :: InSuffix;
|
|
using CImageResizerFilterStep< fptype, fptypeatom > :: OutLen;
|
|
using CImageResizerFilterStep< fptype, fptypeatom > :: OutPrefix;
|
|
using CImageResizerFilterStep< fptype, fptypeatom > :: OutSuffix;
|
|
using CImageResizerFilterStep< fptype, fptypeatom > :: PrefixDC;
|
|
using CImageResizerFilterStep< fptype, fptypeatom > :: SuffixDC;
|
|
using CImageResizerFilterStep< fptype, fptypeatom > :: RPosBuf;
|
|
using CImageResizerFilterStep< fptype, fptypeatom > :: FltBank;
|
|
using CImageResizerFilterStep< fptype, fptypeatom > :: EdgePixelCount;
|
|
|
|
/**
|
|
* Function performs "packing" of a scanline and type conversion.
|
|
* Scanline, depending on the "fptype" can be potentially stored as a
|
|
* packed SIMD values having a certain atomic type. If required, the sRGB
|
|
* gamma correction is applied.
|
|
*
|
|
* @param ip Input scanline.
|
|
* @param op0 Output scanline.
|
|
* @param l0 The number of pixels to "pack".
|
|
*/
|
|
|
|
template< class Tin >
|
|
void packScanline( const Tin* ip, fptype* const op0, const int l0 ) const
|
|
{
|
|
const int ElCount = Vars -> ElCount;
|
|
const int ElCountIO = Vars -> ElCountIO;
|
|
fptype* op = op0;
|
|
int l = l0;
|
|
|
|
if( !Vars -> UseSRGBGamma )
|
|
{
|
|
if( ElCountIO == 1 )
|
|
{
|
|
while( l > 0 )
|
|
{
|
|
fptypeatom* v = (fptypeatom*) op;
|
|
v[ 0 ] = (fptypeatom) ip[ 0 ];
|
|
op += ElCount;
|
|
ip++;
|
|
l--;
|
|
}
|
|
}
|
|
else
|
|
if( ElCountIO == 4 )
|
|
{
|
|
while( l > 0 )
|
|
{
|
|
fptypeatom* v = (fptypeatom*) op;
|
|
v[ 0 ] = (fptypeatom) ip[ 0 ];
|
|
v[ 1 ] = (fptypeatom) ip[ 1 ];
|
|
v[ 2 ] = (fptypeatom) ip[ 2 ];
|
|
v[ 3 ] = (fptypeatom) ip[ 3 ];
|
|
op += ElCount;
|
|
ip += 4;
|
|
l--;
|
|
}
|
|
}
|
|
else
|
|
if( ElCountIO == 3 )
|
|
{
|
|
while( l > 0 )
|
|
{
|
|
fptypeatom* v = (fptypeatom*) op;
|
|
v[ 0 ] = (fptypeatom) ip[ 0 ];
|
|
v[ 1 ] = (fptypeatom) ip[ 1 ];
|
|
v[ 2 ] = (fptypeatom) ip[ 2 ];
|
|
op += ElCount;
|
|
ip += 3;
|
|
l--;
|
|
}
|
|
}
|
|
else
|
|
if( ElCountIO == 2 )
|
|
{
|
|
while( l > 0 )
|
|
{
|
|
fptypeatom* v = (fptypeatom*) op;
|
|
v[ 0 ] = (fptypeatom) ip[ 0 ];
|
|
v[ 1 ] = (fptypeatom) ip[ 1 ];
|
|
op += ElCount;
|
|
ip += 2;
|
|
l--;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
const fptypeatom gm = (fptypeatom) Vars -> InGammaMult;
|
|
|
|
if( ElCountIO == 1 )
|
|
{
|
|
while( l > 0 )
|
|
{
|
|
fptypeatom* v = (fptypeatom*) op;
|
|
v[ 0 ] = convertSRGB2Lin( (fptypeatom) ip[ 0 ] * gm );
|
|
op += ElCount;
|
|
ip++;
|
|
l--;
|
|
}
|
|
}
|
|
else
|
|
if( ElCountIO == 4 )
|
|
{
|
|
while( l > 0 )
|
|
{
|
|
fptypeatom* v = (fptypeatom*) op;
|
|
v[ 0 ] = convertSRGB2Lin( (fptypeatom) ip[ 0 ] * gm );
|
|
v[ 1 ] = convertSRGB2Lin( (fptypeatom) ip[ 1 ] * gm );
|
|
v[ 2 ] = convertSRGB2Lin( (fptypeatom) ip[ 2 ] * gm );
|
|
v[ 3 ] = convertSRGB2Lin( (fptypeatom) ip[ 3 ] * gm );
|
|
op += ElCount;
|
|
ip += 4;
|
|
l--;
|
|
}
|
|
}
|
|
else
|
|
if( ElCountIO == 3 )
|
|
{
|
|
while( l > 0 )
|
|
{
|
|
fptypeatom* v = (fptypeatom*) op;
|
|
v[ 0 ] = convertSRGB2Lin( (fptypeatom) ip[ 0 ] * gm );
|
|
v[ 1 ] = convertSRGB2Lin( (fptypeatom) ip[ 1 ] * gm );
|
|
v[ 2 ] = convertSRGB2Lin( (fptypeatom) ip[ 2 ] * gm );
|
|
op += ElCount;
|
|
ip += 3;
|
|
l--;
|
|
}
|
|
}
|
|
else
|
|
if( ElCountIO == 2 )
|
|
{
|
|
while( l > 0 )
|
|
{
|
|
fptypeatom* v = (fptypeatom*) op;
|
|
v[ 0 ] = convertSRGB2Lin( (fptypeatom) ip[ 0 ] * gm );
|
|
v[ 1 ] = convertSRGB2Lin( (fptypeatom) ip[ 1 ] * gm );
|
|
op += ElCount;
|
|
ip += 2;
|
|
l--;
|
|
}
|
|
}
|
|
}
|
|
|
|
const int ZeroCount = ElCount * Vars -> fppack - ElCountIO;
|
|
op = op0;
|
|
l = l0;
|
|
|
|
if( ZeroCount == 1 )
|
|
{
|
|
while( l > 0 )
|
|
{
|
|
fptypeatom* v = (fptypeatom*) op + ElCountIO;
|
|
v[ 0 ] = (fptypeatom) 0;
|
|
op += ElCount;
|
|
l--;
|
|
}
|
|
}
|
|
else
|
|
if( ZeroCount == 2 )
|
|
{
|
|
while( l > 0 )
|
|
{
|
|
fptypeatom* v = (fptypeatom*) op + ElCountIO;
|
|
v[ 0 ] = (fptypeatom) 0;
|
|
v[ 1 ] = (fptypeatom) 0;
|
|
op += ElCount;
|
|
l--;
|
|
}
|
|
}
|
|
else
|
|
if( ZeroCount == 3 )
|
|
{
|
|
while( l > 0 )
|
|
{
|
|
fptypeatom* v = (fptypeatom*) op + ElCountIO;
|
|
v[ 0 ] = (fptypeatom) 0;
|
|
v[ 1 ] = (fptypeatom) 0;
|
|
v[ 2 ] = (fptypeatom) 0;
|
|
op += ElCount;
|
|
l--;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function applies Linear to sRGB gamma correction to the specified
|
|
* scanline.
|
|
*
|
|
* @param p Scanline.
|
|
* @param l The number of pixels to de-linearize.
|
|
* @param Vars0 Image resizing-related variables.
|
|
*/
|
|
|
|
static void applySRGBGamma( fptype* p, int l,
|
|
const CImageResizerVars& Vars0 )
|
|
{
|
|
const int ElCount = Vars0.ElCount;
|
|
const int ElCountIO = Vars0.ElCountIO;
|
|
const fptypeatom gm = (fptypeatom) Vars0.OutGammaMult;
|
|
|
|
if( ElCountIO == 1 )
|
|
{
|
|
while( l > 0 )
|
|
{
|
|
fptypeatom* v = (fptypeatom*) p;
|
|
v[ 0 ] = convertLin2SRGB( v[ 0 ]) * gm;
|
|
p += ElCount;
|
|
l--;
|
|
}
|
|
}
|
|
else
|
|
if( ElCountIO == 4 )
|
|
{
|
|
while( l > 0 )
|
|
{
|
|
fptypeatom* v = (fptypeatom*) p;
|
|
v[ 0 ] = convertLin2SRGB( v[ 0 ]) * gm;
|
|
v[ 1 ] = convertLin2SRGB( v[ 1 ]) * gm;
|
|
v[ 2 ] = convertLin2SRGB( v[ 2 ]) * gm;
|
|
v[ 3 ] = convertLin2SRGB( v[ 3 ]) * gm;
|
|
p += ElCount;
|
|
l--;
|
|
}
|
|
}
|
|
else
|
|
if( ElCountIO == 3 )
|
|
{
|
|
while( l > 0 )
|
|
{
|
|
fptypeatom* v = (fptypeatom*) p;
|
|
v[ 0 ] = convertLin2SRGB( v[ 0 ]) * gm;
|
|
v[ 1 ] = convertLin2SRGB( v[ 1 ]) * gm;
|
|
v[ 2 ] = convertLin2SRGB( v[ 2 ]) * gm;
|
|
p += ElCount;
|
|
l--;
|
|
}
|
|
}
|
|
else
|
|
if( ElCountIO == 2 )
|
|
{
|
|
while( l > 0 )
|
|
{
|
|
fptypeatom* v = (fptypeatom*) p;
|
|
v[ 0 ] = convertLin2SRGB( v[ 0 ]) * gm;
|
|
v[ 1 ] = convertLin2SRGB( v[ 1 ]) * gm;
|
|
p += ElCount;
|
|
l--;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function converts vertical scanline to horizontal scanline. This
|
|
* function is called by the image resizer when image is resized
|
|
* vertically. This means that the vertical scanline is stored in the
|
|
* same format produced by the packScanline() and maintained by other
|
|
* filtering functions.
|
|
*
|
|
* @param ip Input vertical scanline.
|
|
* @param op Output buffer (temporary buffer used during resizing).
|
|
* @param SrcLen The number of pixels in the input scanline, also used to
|
|
* calculate input buffer increment.
|
|
* @param SrcIncr Input buffer increment to the next vertical pixel.
|
|
*/
|
|
|
|
void convertVtoH( const fptype* ip, fptype* op, const int SrcLen,
|
|
const int SrcIncr ) const
|
|
{
|
|
const int ElCount = Vars -> ElCount;
|
|
int j;
|
|
|
|
if( ElCount == 1 )
|
|
{
|
|
for( j = 0; j < SrcLen; j++ )
|
|
{
|
|
op[ 0 ] = ip[ 0 ];
|
|
ip += SrcIncr;
|
|
op++;
|
|
}
|
|
}
|
|
else
|
|
if( ElCount == 4 )
|
|
{
|
|
for( j = 0; j < SrcLen; j++ )
|
|
{
|
|
op[ 0 ] = ip[ 0 ];
|
|
op[ 1 ] = ip[ 1 ];
|
|
op[ 2 ] = ip[ 2 ];
|
|
op[ 3 ] = ip[ 3 ];
|
|
ip += SrcIncr;
|
|
op += 4;
|
|
}
|
|
}
|
|
else
|
|
if( ElCount == 3 )
|
|
{
|
|
for( j = 0; j < SrcLen; j++ )
|
|
{
|
|
op[ 0 ] = ip[ 0 ];
|
|
op[ 1 ] = ip[ 1 ];
|
|
op[ 2 ] = ip[ 2 ];
|
|
ip += SrcIncr;
|
|
op += 3;
|
|
}
|
|
}
|
|
else
|
|
if( ElCount == 2 )
|
|
{
|
|
for( j = 0; j < SrcLen; j++ )
|
|
{
|
|
op[ 0 ] = ip[ 0 ];
|
|
op[ 1 ] = ip[ 1 ];
|
|
ip += SrcIncr;
|
|
op += 2;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function performs "unpacking" of a scanline and type conversion
|
|
* (truncation is used when floating point is converted to integer).
|
|
* Scanline, depending on the "fptype" can be potentially stored as a
|
|
* packed SIMD values having a certain atomic type. The unpacking function
|
|
* assumes that scanline is stored in the style produced by the
|
|
* packScanline() function.
|
|
*
|
|
* @param ip Input scanline.
|
|
* @param op Output scanline.
|
|
* @param l The number of pixels to "unpack".
|
|
* @param Vars0 Image resizing-related variables.
|
|
*/
|
|
|
|
template< class Tout >
|
|
static void unpackScanline( const fptype* ip, Tout* op, int l,
|
|
const CImageResizerVars& Vars0 )
|
|
{
|
|
const int ElCount = Vars0.ElCount;
|
|
const int ElCountIO = Vars0.ElCountIO;
|
|
/* dprintf(2, "BOOP ElCount=%d ElCountIO=%d l=%d op=%p\n", ElCount, ElCountIO, l, op); */
|
|
|
|
if( ElCountIO == 1 )
|
|
{
|
|
while( l > 0 )
|
|
{
|
|
const fptypeatom* v = (const fptypeatom*) ip;
|
|
op[ 0 ] = (Tout) v[ 0 ];
|
|
ip += ElCount;
|
|
op++;
|
|
l--;
|
|
}
|
|
}
|
|
else
|
|
if( ElCountIO == 4 )
|
|
{
|
|
while( l > 0 )
|
|
{
|
|
const fptypeatom* v = (const fptypeatom*) ip;
|
|
op[ 0 ] = (Tout) v[ 0 ];
|
|
op[ 1 ] = (Tout) v[ 1 ];
|
|
op[ 2 ] = (Tout) v[ 2 ];
|
|
op[ 3 ] = (Tout) v[ 3 ];
|
|
ip += ElCount;
|
|
op += 4;
|
|
l--;
|
|
}
|
|
}
|
|
else
|
|
if( ElCountIO == 3 )
|
|
{
|
|
while( l > 0 )
|
|
{
|
|
const fptypeatom* v = (const fptypeatom*) ip;
|
|
/* DebugBreak(); */
|
|
/* dprintf(2, "BOOP ElCount=%d ElCountIO=%d l=%d op=%p v=%p v[0]=%d\n", ElCount, ElCountIO, l, op, v, (Tout)v[0]); */
|
|
op[ 0 ] = (Tout) v[ 0 ];
|
|
op[ 1 ] = (Tout) v[ 1 ];
|
|
op[ 2 ] = (Tout) v[ 2 ];
|
|
ip += ElCount;
|
|
op += 3;
|
|
l--;
|
|
}
|
|
}
|
|
else
|
|
if( ElCountIO == 2 )
|
|
{
|
|
while( l > 0 )
|
|
{
|
|
const fptypeatom* v = (const fptypeatom*) ip;
|
|
op[ 0 ] = (Tout) v[ 0 ];
|
|
op[ 1 ] = (Tout) v[ 1 ];
|
|
ip += ElCount;
|
|
op += 2;
|
|
l--;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function prepares input scanline buffer for *this filtering step.
|
|
* Left- and right-most pixels are replicated to make sure no buffer
|
|
* overrun happens. Such approach also allows to bypass any pointer
|
|
* range checks.
|
|
*
|
|
* @param Src Source buffer.
|
|
*/
|
|
|
|
void prepareInBuf( fptype* Src ) const
|
|
{
|
|
if( IsUpsample || InPrefix + InSuffix == 0 )
|
|
{
|
|
return;
|
|
}
|
|
|
|
const int ElCount = Vars -> ElCount;
|
|
replicateArray( Src, ElCount, Src - ElCount, InPrefix, -ElCount );
|
|
|
|
Src += ( InLen - 1 ) * ElCount;
|
|
replicateArray( Src, ElCount, Src + ElCount, InSuffix, ElCount );
|
|
}
|
|
|
|
/**
|
|
* Function peforms scanline upsampling with filtering.
|
|
*
|
|
* @param Src Source scanline buffer (length = this -> InLen). Source
|
|
* scanline increment will be equal to ElCount.
|
|
* @param Dst Destination scanline buffer.
|
|
*/
|
|
|
|
void doUpsample( const fptype* const Src, fptype* const Dst ) const
|
|
{
|
|
const int ElCount = Vars -> ElCount;
|
|
fptype* op0 = &Dst[ -OutPrefix * ElCount ];
|
|
memset( op0, 0, ( OutPrefix + OutLen + OutSuffix ) * ElCount *
|
|
sizeof( fptype ));
|
|
|
|
const fptype* ip = Src;
|
|
const int opstep = ElCount * ResampleFactor;
|
|
int l;
|
|
|
|
if( FltOrig.getCapacity() > 0 )
|
|
{
|
|
// Do not perform filtering, only upsample.
|
|
|
|
op0 += ( OutPrefix % ResampleFactor ) * ElCount;
|
|
l = OutPrefix / ResampleFactor;
|
|
|
|
if( ElCount == 1 )
|
|
{
|
|
while( l > 0 )
|
|
{
|
|
op0[ 0 ] = ip[ 0 ];
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
|
|
l = InLen - 1;
|
|
|
|
while( l > 0 )
|
|
{
|
|
op0[ 0 ] = ip[ 0 ];
|
|
op0 += opstep;
|
|
ip += ElCount;
|
|
l--;
|
|
}
|
|
|
|
l = OutSuffix / ResampleFactor;
|
|
|
|
while( l >= 0 )
|
|
{
|
|
op0[ 0 ] = ip[ 0 ];
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
}
|
|
else
|
|
if( ElCount == 4 )
|
|
{
|
|
while( l > 0 )
|
|
{
|
|
op0[ 0 ] = ip[ 0 ];
|
|
op0[ 1 ] = ip[ 1 ];
|
|
op0[ 2 ] = ip[ 2 ];
|
|
op0[ 3 ] = ip[ 3 ];
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
|
|
l = InLen - 1;
|
|
|
|
while( l > 0 )
|
|
{
|
|
op0[ 0 ] = ip[ 0 ];
|
|
op0[ 1 ] = ip[ 1 ];
|
|
op0[ 2 ] = ip[ 2 ];
|
|
op0[ 3 ] = ip[ 3 ];
|
|
op0 += opstep;
|
|
ip += ElCount;
|
|
l--;
|
|
}
|
|
|
|
l = OutSuffix / ResampleFactor;
|
|
|
|
while( l >= 0 )
|
|
{
|
|
op0[ 0 ] = ip[ 0 ];
|
|
op0[ 1 ] = ip[ 1 ];
|
|
op0[ 2 ] = ip[ 2 ];
|
|
op0[ 3 ] = ip[ 3 ];
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
}
|
|
else
|
|
if( ElCount == 3 )
|
|
{
|
|
while( l > 0 )
|
|
{
|
|
op0[ 0 ] = ip[ 0 ];
|
|
op0[ 1 ] = ip[ 1 ];
|
|
op0[ 2 ] = ip[ 2 ];
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
|
|
l = InLen - 1;
|
|
|
|
while( l > 0 )
|
|
{
|
|
op0[ 0 ] = ip[ 0 ];
|
|
op0[ 1 ] = ip[ 1 ];
|
|
op0[ 2 ] = ip[ 2 ];
|
|
op0 += opstep;
|
|
ip += ElCount;
|
|
l--;
|
|
}
|
|
|
|
l = OutSuffix / ResampleFactor;
|
|
|
|
while( l >= 0 )
|
|
{
|
|
op0[ 0 ] = ip[ 0 ];
|
|
op0[ 1 ] = ip[ 1 ];
|
|
op0[ 2 ] = ip[ 2 ];
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
}
|
|
else
|
|
if( ElCount == 2 )
|
|
{
|
|
while( l > 0 )
|
|
{
|
|
op0[ 0 ] = ip[ 0 ];
|
|
op0[ 1 ] = ip[ 1 ];
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
|
|
l = InLen - 1;
|
|
|
|
while( l > 0 )
|
|
{
|
|
op0[ 0 ] = ip[ 0 ];
|
|
op0[ 1 ] = ip[ 1 ];
|
|
op0 += opstep;
|
|
ip += ElCount;
|
|
l--;
|
|
}
|
|
|
|
l = OutSuffix / ResampleFactor;
|
|
|
|
while( l >= 0 )
|
|
{
|
|
op0[ 0 ] = ip[ 0 ];
|
|
op0[ 1 ] = ip[ 1 ];
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
const fptype* const f = Flt;
|
|
const int flen = Flt.getCapacity();
|
|
fptype* op;
|
|
int i;
|
|
|
|
if( ElCount == 1 )
|
|
{
|
|
l = InPrefix;
|
|
|
|
while( l > 0 )
|
|
{
|
|
op = op0;
|
|
|
|
for( i = 0; i < flen; i++ )
|
|
{
|
|
op[ i ] += f[ i ] * ip[ 0 ];
|
|
}
|
|
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
|
|
l = InLen - 1;
|
|
|
|
while( l > 0 )
|
|
{
|
|
op = op0;
|
|
|
|
for( i = 0; i < flen; i++ )
|
|
{
|
|
op[ i ] += f[ i ] * ip[ 0 ];
|
|
}
|
|
|
|
ip += ElCount;
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
|
|
l = InSuffix;
|
|
|
|
while( l >= 0 )
|
|
{
|
|
op = op0;
|
|
|
|
for( i = 0; i < flen; i++ )
|
|
{
|
|
op[ i ] += f[ i ] * ip[ 0 ];
|
|
}
|
|
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
}
|
|
else
|
|
if( ElCount == 4 )
|
|
{
|
|
l = InPrefix;
|
|
|
|
while( l > 0 )
|
|
{
|
|
op = op0;
|
|
|
|
for( i = 0; i < flen; i++ )
|
|
{
|
|
op[ 0 ] += f[ i ] * ip[ 0 ];
|
|
op[ 1 ] += f[ i ] * ip[ 1 ];
|
|
op[ 2 ] += f[ i ] * ip[ 2 ];
|
|
op[ 3 ] += f[ i ] * ip[ 3 ];
|
|
op += 4;
|
|
}
|
|
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
|
|
l = InLen - 1;
|
|
|
|
while( l > 0 )
|
|
{
|
|
op = op0;
|
|
|
|
for( i = 0; i < flen; i++ )
|
|
{
|
|
op[ 0 ] += f[ i ] * ip[ 0 ];
|
|
op[ 1 ] += f[ i ] * ip[ 1 ];
|
|
op[ 2 ] += f[ i ] * ip[ 2 ];
|
|
op[ 3 ] += f[ i ] * ip[ 3 ];
|
|
op += 4;
|
|
}
|
|
|
|
ip += ElCount;
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
|
|
l = InSuffix;
|
|
|
|
while( l >= 0 )
|
|
{
|
|
op = op0;
|
|
|
|
for( i = 0; i < flen; i++ )
|
|
{
|
|
op[ 0 ] += f[ i ] * ip[ 0 ];
|
|
op[ 1 ] += f[ i ] * ip[ 1 ];
|
|
op[ 2 ] += f[ i ] * ip[ 2 ];
|
|
op[ 3 ] += f[ i ] * ip[ 3 ];
|
|
op += 4;
|
|
}
|
|
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
}
|
|
else
|
|
if( ElCount == 3 )
|
|
{
|
|
l = InPrefix;
|
|
|
|
while( l > 0 )
|
|
{
|
|
op = op0;
|
|
|
|
for( i = 0; i < flen; i++ )
|
|
{
|
|
op[ 0 ] += f[ i ] * ip[ 0 ];
|
|
op[ 1 ] += f[ i ] * ip[ 1 ];
|
|
op[ 2 ] += f[ i ] * ip[ 2 ];
|
|
op += 3;
|
|
}
|
|
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
|
|
l = InLen - 1;
|
|
|
|
while( l > 0 )
|
|
{
|
|
op = op0;
|
|
|
|
for( i = 0; i < flen; i++ )
|
|
{
|
|
op[ 0 ] += f[ i ] * ip[ 0 ];
|
|
op[ 1 ] += f[ i ] * ip[ 1 ];
|
|
op[ 2 ] += f[ i ] * ip[ 2 ];
|
|
op += 3;
|
|
}
|
|
|
|
ip += ElCount;
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
|
|
l = InSuffix;
|
|
|
|
while( l >= 0 )
|
|
{
|
|
op = op0;
|
|
|
|
for( i = 0; i < flen; i++ )
|
|
{
|
|
op[ 0 ] += f[ i ] * ip[ 0 ];
|
|
op[ 1 ] += f[ i ] * ip[ 1 ];
|
|
op[ 2 ] += f[ i ] * ip[ 2 ];
|
|
op += 3;
|
|
}
|
|
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
}
|
|
else
|
|
if( ElCount == 2 )
|
|
{
|
|
l = InPrefix;
|
|
|
|
while( l > 0 )
|
|
{
|
|
op = op0;
|
|
|
|
for( i = 0; i < flen; i++ )
|
|
{
|
|
op[ 0 ] += f[ i ] * ip[ 0 ];
|
|
op[ 1 ] += f[ i ] * ip[ 1 ];
|
|
op += 2;
|
|
}
|
|
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
|
|
l = InLen - 1;
|
|
|
|
while( l > 0 )
|
|
{
|
|
op = op0;
|
|
|
|
for( i = 0; i < flen; i++ )
|
|
{
|
|
op[ 0 ] += f[ i ] * ip[ 0 ];
|
|
op[ 1 ] += f[ i ] * ip[ 1 ];
|
|
op += 2;
|
|
}
|
|
|
|
ip += ElCount;
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
|
|
l = InSuffix;
|
|
|
|
while( l >= 0 )
|
|
{
|
|
op = op0;
|
|
|
|
for( i = 0; i < flen; i++ )
|
|
{
|
|
op[ 0 ] += f[ i ] * ip[ 0 ];
|
|
op[ 1 ] += f[ i ] * ip[ 1 ];
|
|
op += 2;
|
|
}
|
|
|
|
op0 += opstep;
|
|
l--;
|
|
}
|
|
}
|
|
|
|
op = op0;
|
|
const fptype* dc = SuffixDC;
|
|
l = SuffixDC.getCapacity();
|
|
|
|
if( ElCount == 1 )
|
|
{
|
|
for( i = 0; i < l; i++ )
|
|
{
|
|
op[ i ] += ip[ 0 ] * dc[ i ];
|
|
}
|
|
}
|
|
else
|
|
if( ElCount == 4 )
|
|
{
|
|
while( l > 0 )
|
|
{
|
|
op[ 0 ] += ip[ 0 ] * dc[ 0 ];
|
|
op[ 1 ] += ip[ 1 ] * dc[ 0 ];
|
|
op[ 2 ] += ip[ 2 ] * dc[ 0 ];
|
|
op[ 3 ] += ip[ 3 ] * dc[ 0 ];
|
|
dc++;
|
|
op += 4;
|
|
l--;
|
|
}
|
|
}
|
|
else
|
|
if( ElCount == 3 )
|
|
{
|
|
while( l > 0 )
|
|
{
|
|
op[ 0 ] += ip[ 0 ] * dc[ 0 ];
|
|
op[ 1 ] += ip[ 1 ] * dc[ 0 ];
|
|
op[ 2 ] += ip[ 2 ] * dc[ 0 ];
|
|
dc++;
|
|
op += 3;
|
|
l--;
|
|
}
|
|
}
|
|
else
|
|
if( ElCount == 2 )
|
|
{
|
|
while( l > 0 )
|
|
{
|
|
op[ 0 ] += ip[ 0 ] * dc[ 0 ];
|
|
op[ 1 ] += ip[ 1 ] * dc[ 0 ];
|
|
dc++;
|
|
op += 2;
|
|
l--;
|
|
}
|
|
}
|
|
|
|
ip = Src;
|
|
op = Dst - InPrefix * opstep;
|
|
dc = PrefixDC;
|
|
l = PrefixDC.getCapacity();
|
|
|
|
if( ElCount == 1 )
|
|
{
|
|
for( i = 0; i < l; i++ )
|
|
{
|
|
op[ i ] += ip[ 0 ] * dc[ i ];
|
|
}
|
|
}
|
|
else
|
|
if( ElCount == 4 )
|
|
{
|
|
while( l > 0 )
|
|
{
|
|
op[ 0 ] += ip[ 0 ] * dc[ 0 ];
|
|
op[ 1 ] += ip[ 1 ] * dc[ 0 ];
|
|
op[ 2 ] += ip[ 2 ] * dc[ 0 ];
|
|
op[ 3 ] += ip[ 3 ] * dc[ 0 ];
|
|
dc++;
|
|
op += 4;
|
|
l--;
|
|
}
|
|
}
|
|
else
|
|
if( ElCount == 3 )
|
|
{
|
|
while( l > 0 )
|
|
{
|
|
op[ 0 ] += ip[ 0 ] * dc[ 0 ];
|
|
op[ 1 ] += ip[ 1 ] * dc[ 0 ];
|
|
op[ 2 ] += ip[ 2 ] * dc[ 0 ];
|
|
dc++;
|
|
op += 3;
|
|
l--;
|
|
}
|
|
}
|
|
else
|
|
if( ElCount == 2 )
|
|
{
|
|
while( l > 0 )
|
|
{
|
|
op[ 0 ] += ip[ 0 ] * dc[ 0 ];
|
|
op[ 1 ] += ip[ 1 ] * dc[ 0 ];
|
|
dc++;
|
|
op += 2;
|
|
l--;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function peforms scanline filtering with optional downsampling.
|
|
* Function makes use of the symmetry of the filter.
|
|
*
|
|
* @param Src Source scanline buffer (length = this -> InLen). Source
|
|
* scanline increment will be equal to ElCount.
|
|
* @param Dst Destination scanline buffer.
|
|
* @param DstIncr Destination scanline buffer increment, used for
|
|
* horizontal or vertical scanline stepping.
|
|
*/
|
|
|
|
void doFilter( const fptype* const Src, fptype* Dst,
|
|
const int DstIncr ) const
|
|
{
|
|
const int ElCount = Vars -> ElCount;
|
|
const fptype* const f = &Flt[ FltLatency ];
|
|
const int flen = FltLatency + 1;
|
|
const int ipstep = ElCount * ResampleFactor;
|
|
const fptype* ip = Src - EdgePixelCount * ipstep;
|
|
const fptype* ip1;
|
|
const fptype* ip2;
|
|
int l = OutLen;
|
|
int i;
|
|
|
|
if( ElCount == 1 )
|
|
{
|
|
while( l > 0 )
|
|
{
|
|
fptype s = f[ 0 ] * ip[ 0 ];
|
|
ip1 = ip;
|
|
ip2 = ip;
|
|
|
|
for( i = 1; i < flen; i++ )
|
|
{
|
|
ip1++;
|
|
ip2--;
|
|
s += f[ i ] * ( ip1[ 0 ] + ip2[ 0 ]);
|
|
}
|
|
|
|
Dst[ 0 ] = s;
|
|
Dst += DstIncr;
|
|
ip += ipstep;
|
|
l--;
|
|
}
|
|
}
|
|
else
|
|
if( ElCount == 4 )
|
|
{
|
|
while( l > 0 )
|
|
{
|
|
fptype s1 = f[ 0 ] * ip[ 0 ];
|
|
fptype s2 = f[ 0 ] * ip[ 1 ];
|
|
fptype s3 = f[ 0 ] * ip[ 2 ];
|
|
fptype s4 = f[ 0 ] * ip[ 3 ];
|
|
ip1 = ip;
|
|
ip2 = ip;
|
|
|
|
for( i = 1; i < flen; i++ )
|
|
{
|
|
ip1 += 4;
|
|
ip2 -= 4;
|
|
s1 += f[ i ] * ( ip1[ 0 ] + ip2[ 0 ]);
|
|
s2 += f[ i ] * ( ip1[ 1 ] + ip2[ 1 ]);
|
|
s3 += f[ i ] * ( ip1[ 2 ] + ip2[ 2 ]);
|
|
s4 += f[ i ] * ( ip1[ 3 ] + ip2[ 3 ]);
|
|
}
|
|
|
|
Dst[ 0 ] = s1;
|
|
Dst[ 1 ] = s2;
|
|
Dst[ 2 ] = s3;
|
|
Dst[ 3 ] = s4;
|
|
Dst += DstIncr;
|
|
ip += ipstep;
|
|
l--;
|
|
}
|
|
}
|
|
else
|
|
if( ElCount == 3 )
|
|
{
|
|
while( l > 0 )
|
|
{
|
|
fptype s1 = f[ 0 ] * ip[ 0 ];
|
|
fptype s2 = f[ 0 ] * ip[ 1 ];
|
|
fptype s3 = f[ 0 ] * ip[ 2 ];
|
|
ip1 = ip;
|
|
ip2 = ip;
|
|
|
|
for( i = 1; i < flen; i++ )
|
|
{
|
|
ip1 += 3;
|
|
ip2 -= 3;
|
|
s1 += f[ i ] * ( ip1[ 0 ] + ip2[ 0 ]);
|
|
s2 += f[ i ] * ( ip1[ 1 ] + ip2[ 1 ]);
|
|
s3 += f[ i ] * ( ip1[ 2 ] + ip2[ 2 ]);
|
|
}
|
|
|
|
Dst[ 0 ] = s1;
|
|
Dst[ 1 ] = s2;
|
|
Dst[ 2 ] = s3;
|
|
Dst += DstIncr;
|
|
ip += ipstep;
|
|
l--;
|
|
}
|
|
}
|
|
else
|
|
if( ElCount == 2 )
|
|
{
|
|
while( l > 0 )
|
|
{
|
|
fptype s1 = f[ 0 ] * ip[ 0 ];
|
|
fptype s2 = f[ 0 ] * ip[ 1 ];
|
|
ip1 = ip;
|
|
ip2 = ip;
|
|
|
|
for( i = 1; i < flen; i++ )
|
|
{
|
|
ip1 += 2;
|
|
ip2 -= 2;
|
|
s1 += f[ i ] * ( ip1[ 0 ] + ip2[ 0 ]);
|
|
s2 += f[ i ] * ( ip1[ 1 ] + ip2[ 1 ]);
|
|
}
|
|
|
|
Dst[ 0 ] = s1;
|
|
Dst[ 1 ] = s2;
|
|
Dst += DstIncr;
|
|
ip += ipstep;
|
|
l--;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function performs resizing of a single scanline. This function does
|
|
* not "know" about the length of the source scanline buffer. This buffer
|
|
* should be padded with enough pixels so that ( SrcPos - FilterLenD2 ) is
|
|
* always >= 0 and ( SrcPos + ( DstLineLen - 1 ) * k + FilterLenD2 + 1 )
|
|
* does not exceed source scanline's buffer length. SrcLine's increment is
|
|
* assumed to be equal to ElCount.
|
|
*
|
|
* @param SrcLine Source scanline buffer.
|
|
* @param DstLine Destination (resized) scanline buffer.
|
|
* @param DstLineIncr Destination scanline position increment, used for
|
|
* horizontal or vertical scanline stepping.
|
|
* @param xx Temporary buffer, of size FltBank -> getFilterLen(), must be
|
|
* aligned by fpclass :: fpalign.
|
|
*/
|
|
|
|
void doResize( const fptype* SrcLine, fptype* DstLine,
|
|
const int DstLineIncr, fptype* const ) const
|
|
{
|
|
const int IntFltLen = FltBank -> getFilterLen();
|
|
const int ElCount = Vars -> ElCount;
|
|
const typename CImageResizerFilterStep< fptype, fptypeatom > ::
|
|
CResizePos* rpos = &(*RPosBuf)[ 0 ];
|
|
|
|
const typename CImageResizerFilterStep< fptype, fptypeatom > ::
|
|
CResizePos* const rpose = rpos + OutLen;
|
|
|
|
#define AVIR_RESIZE_PART1 \
|
|
while( rpos < rpose ) \
|
|
{ \
|
|
const fptype x = (fptype) rpos -> x; \
|
|
const fptype* const ftp = rpos -> ftp; \
|
|
const fptype* const ftp2 = ftp + IntFltLen; \
|
|
const fptype* Src = SrcLine + rpos -> SrcOffs; \
|
|
int i;
|
|
|
|
#define AVIR_RESIZE_PART1nx \
|
|
while( rpos < rpose ) \
|
|
{ \
|
|
const fptype* const ftp = rpos -> ftp; \
|
|
const fptype* Src = SrcLine + rpos -> SrcOffs; \
|
|
int i;
|
|
|
|
#define AVIR_RESIZE_PART2 \
|
|
DstLine += DstLineIncr; \
|
|
rpos++; \
|
|
}
|
|
|
|
if( FltBank -> getOrder() == 1 )
|
|
{
|
|
if( ElCount == 1 )
|
|
{
|
|
AVIR_RESIZE_PART1
|
|
|
|
fptype sum = 0.0;
|
|
|
|
for( i = 0; i < IntFltLen; i++ )
|
|
{
|
|
sum += ( ftp[ i ] + ftp2[ i ] * x ) * Src[ i ];
|
|
}
|
|
|
|
DstLine[ 0 ] = sum;
|
|
|
|
AVIR_RESIZE_PART2
|
|
}
|
|
else
|
|
if( ElCount == 4 )
|
|
{
|
|
AVIR_RESIZE_PART1
|
|
|
|
fptype sum[ 4 ];
|
|
sum[ 0 ] = 0.0;
|
|
sum[ 1 ] = 0.0;
|
|
sum[ 2 ] = 0.0;
|
|
sum[ 3 ] = 0.0;
|
|
|
|
for( i = 0; i < IntFltLen; i++ )
|
|
{
|
|
const fptype xx = ftp[ i ] + ftp2[ i ] * x;
|
|
sum[ 0 ] += xx * Src[ 0 ];
|
|
sum[ 1 ] += xx * Src[ 1 ];
|
|
sum[ 2 ] += xx * Src[ 2 ];
|
|
sum[ 3 ] += xx * Src[ 3 ];
|
|
Src += 4;
|
|
}
|
|
|
|
DstLine[ 0 ] = sum[ 0 ];
|
|
DstLine[ 1 ] = sum[ 1 ];
|
|
DstLine[ 2 ] = sum[ 2 ];
|
|
DstLine[ 3 ] = sum[ 3 ];
|
|
|
|
AVIR_RESIZE_PART2
|
|
}
|
|
else
|
|
if( ElCount == 3 )
|
|
{
|
|
AVIR_RESIZE_PART1
|
|
|
|
fptype sum[ 3 ];
|
|
sum[ 0 ] = 0.0;
|
|
sum[ 1 ] = 0.0;
|
|
sum[ 2 ] = 0.0;
|
|
|
|
for( i = 0; i < IntFltLen; i++ )
|
|
{
|
|
const fptype xx = ftp[ i ] + ftp2[ i ] * x;
|
|
sum[ 0 ] += xx * Src[ 0 ];
|
|
sum[ 1 ] += xx * Src[ 1 ];
|
|
sum[ 2 ] += xx * Src[ 2 ];
|
|
Src += 3;
|
|
}
|
|
|
|
DstLine[ 0 ] = sum[ 0 ];
|
|
DstLine[ 1 ] = sum[ 1 ];
|
|
DstLine[ 2 ] = sum[ 2 ];
|
|
|
|
AVIR_RESIZE_PART2
|
|
}
|
|
else
|
|
if( ElCount == 2 )
|
|
{
|
|
AVIR_RESIZE_PART1
|
|
|
|
fptype sum[ 2 ];
|
|
sum[ 0 ] = 0.0;
|
|
sum[ 1 ] = 0.0;
|
|
|
|
for( i = 0; i < IntFltLen; i++ )
|
|
{
|
|
const fptype xx = ftp[ i ] + ftp2[ i ] * x;
|
|
sum[ 0 ] += xx * Src[ 0 ];
|
|
sum[ 1 ] += xx * Src[ 1 ];
|
|
Src += 2;
|
|
}
|
|
|
|
DstLine[ 0 ] = sum[ 0 ];
|
|
DstLine[ 1 ] = sum[ 1 ];
|
|
|
|
AVIR_RESIZE_PART2
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if( ElCount == 1 )
|
|
{
|
|
AVIR_RESIZE_PART1nx
|
|
|
|
fptype sum = 0.0;
|
|
|
|
for( i = 0; i < IntFltLen; i++ )
|
|
{
|
|
sum += ftp[ i ] * Src[ i ];
|
|
}
|
|
|
|
DstLine[ 0 ] = sum;
|
|
|
|
AVIR_RESIZE_PART2
|
|
}
|
|
else
|
|
if( ElCount == 4 )
|
|
{
|
|
AVIR_RESIZE_PART1nx
|
|
|
|
fptype sum[ 4 ];
|
|
sum[ 0 ] = 0.0;
|
|
sum[ 1 ] = 0.0;
|
|
sum[ 2 ] = 0.0;
|
|
sum[ 3 ] = 0.0;
|
|
|
|
for( i = 0; i < IntFltLen; i++ )
|
|
{
|
|
const fptype xx = ftp[ i ];
|
|
sum[ 0 ] += xx * Src[ 0 ];
|
|
sum[ 1 ] += xx * Src[ 1 ];
|
|
sum[ 2 ] += xx * Src[ 2 ];
|
|
sum[ 3 ] += xx * Src[ 3 ];
|
|
Src += 4;
|
|
}
|
|
|
|
DstLine[ 0 ] = sum[ 0 ];
|
|
DstLine[ 1 ] = sum[ 1 ];
|
|
DstLine[ 2 ] = sum[ 2 ];
|
|
DstLine[ 3 ] = sum[ 3 ];
|
|
|
|
AVIR_RESIZE_PART2
|
|
}
|
|
else
|
|
if( ElCount == 3 )
|
|
{
|
|
AVIR_RESIZE_PART1nx
|
|
|
|
fptype sum[ 3 ];
|
|
sum[ 0 ] = 0.0;
|
|
sum[ 1 ] = 0.0;
|
|
sum[ 2 ] = 0.0;
|
|
|
|
for( i = 0; i < IntFltLen; i++ )
|
|
{
|
|
const fptype xx = ftp[ i ];
|
|
sum[ 0 ] += xx * Src[ 0 ];
|
|
sum[ 1 ] += xx * Src[ 1 ];
|
|
sum[ 2 ] += xx * Src[ 2 ];
|
|
Src += 3;
|
|
}
|
|
|
|
DstLine[ 0 ] = sum[ 0 ];
|
|
DstLine[ 1 ] = sum[ 1 ];
|
|
DstLine[ 2 ] = sum[ 2 ];
|
|
|
|
AVIR_RESIZE_PART2
|
|
}
|
|
else
|
|
if( ElCount == 2 )
|
|
{
|
|
AVIR_RESIZE_PART1nx
|
|
|
|
fptype sum[ 2 ];
|
|
sum[ 0 ] = 0.0;
|
|
sum[ 1 ] = 0.0;
|
|
|
|
for( i = 0; i < IntFltLen; i++ )
|
|
{
|
|
const fptype xx = ftp[ i ];
|
|
sum[ 0 ] += xx * Src[ 0 ];
|
|
sum[ 1 ] += xx * Src[ 1 ];
|
|
Src += 2;
|
|
}
|
|
|
|
DstLine[ 0 ] = sum[ 0 ];
|
|
DstLine[ 1 ] = sum[ 1 ];
|
|
|
|
AVIR_RESIZE_PART2
|
|
}
|
|
}
|
|
}
|
|
#undef AVIR_RESIZE_PART2
|
|
#undef AVIR_RESIZE_PART1nx
|
|
#undef AVIR_RESIZE_PART1
|
|
};
|
|
|
|
/**
|
|
* @brief Image resizer's default dithering class.
|
|
*
|
|
* This class defines an object that performs rounding, clipping and dithering
|
|
* operations over horizontal scanline pixels before scanline is stored in the
|
|
* output buffer.
|
|
*
|
|
* The ditherer should expect the same storage order of the pixels in a
|
|
* scanline as used in the "filtering step" class. So, a separate ditherer
|
|
* class should be defined for each scanline pixel storage style. The default
|
|
* ditherer implements a simple rounding without dithering: it can be used for
|
|
* an efficient dithering method which can be multi-threaded.
|
|
*
|
|
* @tparam fptype Floating point type to use for storing pixel data. SIMD
|
|
* types can be used.
|
|
*/
|
|
|
|
template< class fptype >
|
|
class CImageResizerDithererDefINL
|
|
{
|
|
public:
|
|
/**
|
|
* Function initializes the ditherer object.
|
|
*
|
|
* @param aLen Scanline length in pixels to process.
|
|
* @param aVars Image resizing-related variables.
|
|
* @param aTrMul Bit-depth truncation multiplier. 1 - no additional
|
|
* truncation.
|
|
* @param aPkOut Peak output value allowed.
|
|
*/
|
|
|
|
void init( const int aLen, const CImageResizerVars& aVars,
|
|
const double aTrMul, const double aPkOut )
|
|
{
|
|
Len = aLen;
|
|
Vars = &aVars;
|
|
LenE = aLen * Vars -> ElCount;
|
|
TrMul0 = aTrMul;
|
|
PkOut0 = aPkOut;
|
|
}
|
|
|
|
/**
|
|
* @return "True" if dithering is recursive relative to scanlines meaning
|
|
* multi-threaded execution is not supported by this dithering method.
|
|
*/
|
|
|
|
static bool isRecursive()
|
|
{
|
|
return( false );
|
|
}
|
|
|
|
/**
|
|
* Function performs rounding and clipping operations.
|
|
*
|
|
* @param ResScanline The buffer containing the final scanline.
|
|
*/
|
|
|
|
void dither( fptype* const ResScanline ) const
|
|
{
|
|
const fptype c0 = 0.0;
|
|
const fptype PkOut = (fptype) PkOut0;
|
|
int j;
|
|
|
|
if( TrMul0 == 1.0 )
|
|
{
|
|
// Optimization - do not perform bit depth truncation.
|
|
|
|
for( j = 0; j < LenE; j++ )
|
|
{
|
|
ResScanline[ j ] = clamp( round( ResScanline[ j ]), c0,
|
|
PkOut );
|
|
}
|
|
}
|
|
else
|
|
{
|
|
const fptype TrMul = (fptype) TrMul0;
|
|
|
|
for( j = 0; j < LenE; j++ )
|
|
{
|
|
const fptype z0 = round( ResScanline[ j ] / TrMul ) * TrMul;
|
|
ResScanline[ j ] = clamp( z0, c0, PkOut );
|
|
}
|
|
}
|
|
}
|
|
|
|
protected:
|
|
int Len; ///< Scanline's length in pixels.
|
|
///<
|
|
const CImageResizerVars* Vars; ///< Image resizing-related variables.
|
|
///<
|
|
int LenE; ///< = LenE * ElCount.
|
|
///<
|
|
double TrMul0; ///< Bit-depth truncation multiplier.
|
|
///<
|
|
double PkOut0; ///< Peak output value allowed.
|
|
///<
|
|
};
|
|
|
|
/**
|
|
* @brief Image resizer's error-diffusion dithering class, interleaved mode.
|
|
*
|
|
* This ditherer implements error-diffusion dithering which looks good, and
|
|
* whose results are compressed by PNG well. This implementation uses
|
|
* weighting coefficients obtained via machine optimization and visual
|
|
* evaluation.
|
|
*
|
|
* @tparam fptype Floating point type to use for storing pixel data. SIMD
|
|
* types can be used.
|
|
*/
|
|
|
|
template< class fptype >
|
|
class CImageResizerDithererErrdINL :
|
|
public CImageResizerDithererDefINL< fptype >
|
|
{
|
|
public:
|
|
/**
|
|
* Function initializes the ditherer object.
|
|
*
|
|
* @param aLen Scanline length in pixels to process.
|
|
* @param aVars Image resizing-related variables.
|
|
* @param aTrMul Bit-depth truncation multiplier. 1 - no additional
|
|
* truncation.
|
|
* @param aPkOut Peak output value allowed.
|
|
*/
|
|
|
|
void init( const int aLen, const CImageResizerVars& aVars,
|
|
const double aTrMul, const double aPkOut )
|
|
{
|
|
CImageResizerDithererDefINL< fptype > :: init( aLen, aVars, aTrMul,
|
|
aPkOut );
|
|
|
|
ResScanlineDith0.alloc( LenE + Vars -> ElCount, sizeof( fptype ));
|
|
ResScanlineDith = ResScanlineDith0 + Vars -> ElCount;
|
|
int i;
|
|
|
|
for( i = 0; i < LenE + Vars -> ElCount; i++ )
|
|
{
|
|
ResScanlineDith0[ i ] = 0.0;
|
|
}
|
|
}
|
|
|
|
static bool isRecursive()
|
|
{
|
|
return( true );
|
|
}
|
|
|
|
void dither( fptype* const ResScanline )
|
|
{
|
|
const int ElCount = Vars -> ElCount;
|
|
const fptype c0 = 0.0;
|
|
const fptype TrMul = (fptype) TrMul0;
|
|
const fptype PkOut = (fptype) PkOut0;
|
|
int j;
|
|
|
|
for( j = 0; j < LenE; j++ )
|
|
{
|
|
ResScanline[ j ] += ResScanlineDith[ j ];
|
|
ResScanlineDith[ j ] = 0.0;
|
|
}
|
|
|
|
for( j = 0; j < LenE - ElCount; j++ )
|
|
{
|
|
// Perform rounding, noise estimation and saturation.
|
|
|
|
const fptype z0 = round( ResScanline[ j ] / TrMul ) * TrMul;
|
|
const fptype Noise = ResScanline[ j ] - z0;
|
|
ResScanline[ j ] = clamp( z0, c0, PkOut );
|
|
|
|
ResScanline[ j + ElCount ] += Noise * (fptype) 0.364842;
|
|
ResScanlineDith[ j - ElCount ] += Noise * (fptype) 0.207305;
|
|
ResScanlineDith[ j ] += Noise * (fptype) 0.364842;
|
|
ResScanlineDith[ j + ElCount ] += Noise * (fptype) 0.063011;
|
|
}
|
|
|
|
while( j < LenE )
|
|
{
|
|
const fptype z0 = round( ResScanline[ j ] / TrMul ) * TrMul;
|
|
const fptype Noise = ResScanline[ j ] - z0;
|
|
ResScanline[ j ] = clamp( z0, c0, PkOut );
|
|
|
|
ResScanlineDith[ j - ElCount ] += Noise * (fptype) 0.207305;
|
|
ResScanlineDith[ j ] += Noise * (fptype) 0.364842;
|
|
j++;
|
|
}
|
|
}
|
|
|
|
protected:
|
|
using CImageResizerDithererDefINL< fptype > :: Len;
|
|
using CImageResizerDithererDefINL< fptype > :: Vars;
|
|
using CImageResizerDithererDefINL< fptype > :: LenE;
|
|
using CImageResizerDithererDefINL< fptype > :: TrMul0;
|
|
using CImageResizerDithererDefINL< fptype > :: PkOut0;
|
|
|
|
CBuffer< fptype > ResScanlineDith0; ///< Error diffusion buffer.
|
|
///<
|
|
fptype* ResScanlineDith; ///< Error diffusion buffer pointer which skips
|
|
///< the first ElCount elements.
|
|
///<
|
|
};
|
|
|
|
/**
|
|
* @brief Floating-point processing definition and abstraction class.
|
|
*
|
|
* This class defines several constants and typedefs that point to classes
|
|
* that should be used by the image resizing algorithm. Such "definition
|
|
* class" can be used to define alternative scanline processing algorithms
|
|
* (e.g. SIMD) and image scanline packing styles used during processing. This
|
|
* class also offers an abstraction layer for dithering, rounding and
|
|
* clamping (saturation) operation.
|
|
*
|
|
* The fpclass_def class can be used to define processing using both SIMD and
|
|
* non-SIMD types, but using algorithms that are operate on interleaved pixels
|
|
* and non-SIMD optimized themselves.
|
|
*
|
|
* @tparam afptype Floating point type to use for storing intermediate data
|
|
* and variables. For variables that are not used in intensive calculations
|
|
* the "double" type is always used. On the latest Intel processors (like
|
|
* i7-4770K) there is almost no performance difference between "double" and
|
|
* "float". Image quality differences between "double" and "float" are not
|
|
* apparent on 8-bit images. At the same time the "float" uses half amount of
|
|
* working memory the "double" type uses. SIMD types can be used. The
|
|
* functions round() and clamp() in the "avir" or other visible namespace
|
|
* should be available for the specified type. SIMD types allow to perform
|
|
* resizing of images with more than 4 channels, to be exact 4 * SIMD element
|
|
* number (e.g. 16 for float4), without modification of the image resizing
|
|
* algorithm required.
|
|
* @tparam afptypeatom The atomic type the "afptype" consists of.
|
|
* @tparam adith Ditherer class to use during processing.
|
|
*/
|
|
|
|
template< class afptype, class afptypeatom = afptype,
|
|
class adith = CImageResizerDithererDefINL< afptype > >
|
|
class fpclass_def
|
|
{
|
|
public:
|
|
typedef afptype fptype; ///< Floating-point type to use during processing.
|
|
///<
|
|
typedef afptypeatom fptypeatom; ///< Atomic type "fptype" consists of.
|
|
///<
|
|
static const int fppack = sizeof( fptype ) / sizeof( fptypeatom ); ///<
|
|
///< The number of atomic types stored in a single "fptype" element.
|
|
///<
|
|
static const int fpalign = sizeof( fptype ); ///< Suggested alignment size
|
|
///< in bytes. This is not a required alignment, because image
|
|
///< resizing algorithm cannot be made to have a strictly aligned data
|
|
///< access at all steps (e.g. interpolation cannot perform aligned
|
|
///< accesses).
|
|
///<
|
|
static const int elalign = 1; ///< Length alignment of arrays of elements.
|
|
///< This applies to filters and intermediate buffers: this constant
|
|
///< forces filters and scanlines to have a length which is a multiple
|
|
///< of this value, for more efficient SIMD implementation.
|
|
///<
|
|
static const int packmode = 0; ///< 0 if interleaved packing, 1 if
|
|
///< de-interleaved.
|
|
///<
|
|
typedef CImageResizerFilterStepINL< fptype, fptypeatom > CFilterStep; ///<
|
|
///< Filtering step class to use during processing.
|
|
///<
|
|
typedef adith CDitherer; ///< Ditherer class to use during processing.
|
|
///<
|
|
};
|
|
|
|
/**
|
|
* @brief Image resizer class.
|
|
*
|
|
* The object of this class can be used to resize 1-4 channel images to any
|
|
* required size. Resizing is performed by utilizing interpolated sinc
|
|
* fractional delay filters plus (if necessary) a cascade of built-in
|
|
* sinc function-based 2X upsampling or 2X downsampling stages, followed by a
|
|
* correction filtering.
|
|
*
|
|
* Object of this class can be allocated on stack.
|
|
*
|
|
* @tparam fpclass Floating-point processing definition class to use. See
|
|
* avir::fpclass_def for more details.
|
|
*/
|
|
|
|
template< class fpclass = fpclass_def< float > >
|
|
class CImageResizer
|
|
{
|
|
public:
|
|
/**
|
|
* Constructor initializes the resizer.
|
|
*
|
|
* @param aResBitDepth Required bit depth of resulting image (1-16). If
|
|
* integer value output is used (e.g. uint8_t), the bit depth also affects
|
|
* rounding: for example, if aResBitDepth=6 and "Tout" is uint8_t, the
|
|
* result will be rounded to 6 most significant bits (2 least significant
|
|
* bits truncated, with dithering applied).
|
|
* @param aSrcBitDepth Source image's real bit-depth. Set to 0 to use
|
|
* aResBitDepth.
|
|
* @param aParams Resizing algorithm's parameters to use. Leave out for
|
|
* default values. Can be useful when performing automatic optimization of
|
|
* parameters.
|
|
*/
|
|
|
|
CImageResizer( const int aResBitDepth = 8, const int aSrcBitDepth = 0,
|
|
const CImageResizerParams& aParams = CImageResizerParamsDef() )
|
|
: Params( aParams )
|
|
, ResBitDepth( aResBitDepth )
|
|
{
|
|
SrcBitDepth = ( aSrcBitDepth == 0 ? ResBitDepth : aSrcBitDepth );
|
|
|
|
initFilterBank( FixedFilterBank, 1.0, false, CFltBuffer() );
|
|
FixedFilterBank.createAllFilters();
|
|
}
|
|
|
|
/**
|
|
* Function resizes image.
|
|
*
|
|
* @param SrcBuf Source image buffer.
|
|
* @param SrcWidth Source image width.
|
|
* @param SrcHeight Source image height.
|
|
* @param SrcScanlineSize Physical size of source scanline in elements
|
|
* (not bytes). If this value is below 1, SrcWidth * ElCountIO will be
|
|
* used as the physical source scanline size.
|
|
* @param[out] NewBuf Buffer to accept the resized image. Can be equal to
|
|
* SrcBuf if the size of the resized image is smaller or equal to source
|
|
* image in size.
|
|
* @param NewWidth New image width.
|
|
* @param NewHeight New image height.
|
|
* @param ElCountIO The number of elements (channels) used to store each
|
|
* source and destination pixel (1-4).
|
|
* @param k Resizing step (one output pixel corresponds to "k" input
|
|
* pixels). A downsizing factor if > 1.0; upsizing factor if <= 1.0.
|
|
* Multiply by -1 if you would like to bypass "ox" and "oy" adjustment
|
|
* which is done by default to produce a centered image. If step value
|
|
* equals 0, the step value will be chosen automatically and independently
|
|
* for horizontal and vertical resizing.
|
|
* @param[in,out] aVars Pointer to variables structure to be passed to the
|
|
* image resizing function. Can be NULL. Only variables that are
|
|
* initialized in default constructor of this structure are accepted by
|
|
* this function. These variables will not be changed by this function.
|
|
* All other variables can be modified by this function. The access to
|
|
* this object is not thread-safe, each concurrent instance of this
|
|
* function should use a separate aVars object.
|
|
* @tparam Tin Input buffer element's type. Can be uint8_t (0-255 value
|
|
* range), uint16_t (0-65535 value range), float (0.0-1.0 value range),
|
|
* double (0.0-1.0 value range). Larger integer types are treated as
|
|
* uint16_t. Signed integer types are unsupported.
|
|
* @tparam Tout Output buffer element's type. Can be uint8_t (0-255 value
|
|
* range), uint16_t (0-65535 value range), float (0.0-1.0 value range),
|
|
* double (0.0-1.0 value range). Larger integer types are treated as
|
|
* uint16_t. Signed integer types are unsupported.
|
|
*/
|
|
|
|
template< class Tin, class Tout >
|
|
void resizeImage( const Tin* const SrcBuf, const int SrcWidth,
|
|
const int SrcHeight, int SrcScanlineSize, Tout* const NewBuf,
|
|
const int NewWidth, const int NewHeight, const int ElCountIO,
|
|
const double k, CImageResizerVars* const aVars = NULL ) const
|
|
{
|
|
if( SrcWidth == 0 || SrcHeight == 0 )
|
|
{
|
|
memset( NewBuf, 0, (size_t) NewWidth * NewHeight *
|
|
sizeof( Tout ));
|
|
|
|
return;
|
|
}
|
|
else
|
|
if( NewWidth == 0 || NewHeight == 0 )
|
|
{
|
|
return;
|
|
}
|
|
|
|
CImageResizerVars DefVars;
|
|
CImageResizerVars& Vars = ( aVars == NULL ? DefVars : *aVars );
|
|
|
|
CImageResizerThreadPool DefThreadPool;
|
|
CImageResizerThreadPool& ThreadPool = ( Vars.ThreadPool == NULL ?
|
|
DefThreadPool : *Vars.ThreadPool );
|
|
|
|
// Define resizing steps, also optionally modify offsets so that
|
|
// resizing produces a "centered" image.
|
|
|
|
double kx;
|
|
double ky;
|
|
double ox = Vars.ox;
|
|
double oy = Vars.oy;
|
|
|
|
if( k == 0.0 )
|
|
{
|
|
if( NewWidth > SrcWidth )
|
|
{
|
|
kx = (double) ( SrcWidth - 1 ) / ( NewWidth - 1 );
|
|
}
|
|
else
|
|
{
|
|
kx = (double) SrcWidth / NewWidth;
|
|
ox += ( kx - 1.0 ) * 0.5;
|
|
}
|
|
|
|
if( NewHeight > SrcHeight )
|
|
{
|
|
ky = (double) ( SrcHeight - 1 ) / ( NewHeight - 1 );
|
|
}
|
|
else
|
|
{
|
|
ky = (double) SrcHeight / NewHeight;
|
|
oy += ( ky - 1.0 ) * 0.5;
|
|
}
|
|
}
|
|
else
|
|
if( k > 0.0 )
|
|
{
|
|
kx = k;
|
|
ky = k;
|
|
|
|
if( k > 1.0 )
|
|
{
|
|
const double ko = ( k - 1.0 ) * 0.5;
|
|
ox += ko;
|
|
oy += ko;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
kx = -k;
|
|
ky = -k;
|
|
}
|
|
|
|
// Evaluate pre-multipliers used on the output stage.
|
|
|
|
const bool IsInFloat = ( (Tin) 0.4 != 0 );
|
|
const bool IsOutFloat = ( (Tout) 0.4 != 0 );
|
|
double OutMul; // Output multiplier.
|
|
|
|
if( Vars.UseSRGBGamma )
|
|
{
|
|
if( IsInFloat )
|
|
{
|
|
Vars.InGammaMult = 1.0;
|
|
}
|
|
else
|
|
{
|
|
Vars.InGammaMult =
|
|
1.0 / ( sizeof( Tin ) == 1 ? 255.0 : 65535.0 );
|
|
}
|
|
|
|
if( IsOutFloat )
|
|
{
|
|
Vars.OutGammaMult = 1.0;
|
|
}
|
|
else
|
|
{
|
|
Vars.OutGammaMult = ( sizeof( Tout ) == 1 ? 255.0 : 65535.0 );
|
|
}
|
|
|
|
OutMul = 1.0;
|
|
}
|
|
else
|
|
{
|
|
if( IsOutFloat )
|
|
{
|
|
OutMul = 1.0;
|
|
}
|
|
else
|
|
{
|
|
OutMul = ( sizeof( Tout ) == 1 ? 255.0 : 65535.0 );
|
|
}
|
|
|
|
if( !IsInFloat )
|
|
{
|
|
OutMul /= ( sizeof( Tin ) == 1 ? 255.0 : 65535.0 );
|
|
}
|
|
}
|
|
|
|
// Fill widely-used variables.
|
|
|
|
const int ElCount = ( ElCountIO + fpclass :: fppack - 1 ) /
|
|
fpclass :: fppack;
|
|
|
|
const int NewWidthE = NewWidth * ElCount;
|
|
|
|
if( SrcScanlineSize < 1 )
|
|
{
|
|
SrcScanlineSize = SrcWidth * ElCountIO;
|
|
}
|
|
|
|
Vars.ElCount = ElCount;
|
|
Vars.ElCountIO = ElCountIO;
|
|
Vars.fppack = fpclass :: fppack;
|
|
Vars.fpalign = fpclass :: fpalign;
|
|
Vars.elalign = fpclass :: elalign;
|
|
Vars.packmode = fpclass :: packmode;
|
|
|
|
// Horizontal scanline filtering and resizing.
|
|
|
|
CDSPFracFilterBankLin< fptype > FltBank;
|
|
CFilterSteps FltSteps;
|
|
typename CFilterStep :: CRPosBufArray RPosBufArray;
|
|
CBuffer< uint8_t > UsedFracMap;
|
|
|
|
// Perform the filtering steps modeling at various modes, find the
|
|
// most efficient mode for both horizontal and vertical resizing.
|
|
|
|
int UseBuildMode = 1;
|
|
const int BuildModeCount =
|
|
( FixedFilterBank.getOrder() == 0 ? 4 : 2 );
|
|
|
|
int m;
|
|
|
|
if( Vars.BuildMode >= 0 )
|
|
{
|
|
UseBuildMode = Vars.BuildMode;
|
|
}
|
|
else
|
|
{
|
|
int BestScore = 0x7FFFFFFF;
|
|
|
|
for( m = 0; m < BuildModeCount; m++ )
|
|
{
|
|
CDSPFracFilterBankLin< fptype > TmpBank;
|
|
CFilterSteps TmpSteps;
|
|
Vars.k = kx;
|
|
Vars.o = ox;
|
|
buildFilterSteps( TmpSteps, Vars, TmpBank, OutMul, m, true );
|
|
updateFilterStepBuffers( TmpSteps, Vars, RPosBufArray,
|
|
SrcWidth, NewWidth );
|
|
|
|
fillUsedFracMap( TmpSteps[ Vars.ResizeStep ], UsedFracMap );
|
|
const int c = calcComplexity( TmpSteps, Vars, UsedFracMap,
|
|
SrcHeight );
|
|
|
|
if( c < BestScore )
|
|
{
|
|
UseBuildMode = m;
|
|
BestScore = c;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Perform the actual filtering steps building.
|
|
|
|
Vars.k = kx;
|
|
Vars.o = ox;
|
|
buildFilterSteps( FltSteps, Vars, FltBank, OutMul, UseBuildMode,
|
|
false );
|
|
|
|
updateFilterStepBuffers( FltSteps, Vars, RPosBufArray, SrcWidth,
|
|
NewWidth );
|
|
|
|
updateBufLenAndRPosPtrs( FltSteps, Vars, NewWidth );
|
|
|
|
const int ThreadCount = ThreadPool.getSuggestedWorkloadCount();
|
|
// Includes the current thread.
|
|
|
|
CStructArray< CThreadData< Tin, Tout > > td;
|
|
td.setItemCount( ThreadCount );
|
|
int i;
|
|
|
|
for( i = 0; i < ThreadCount; i++ )
|
|
{
|
|
if( i > 0 )
|
|
{
|
|
ThreadPool.addWorkload( &td[ i ]);
|
|
}
|
|
|
|
td[ i ].init( i, ThreadCount, FltSteps, Vars );
|
|
|
|
td[ i ].initScanlineQueue( td[ i ].sopResizeH, SrcHeight,
|
|
SrcWidth );
|
|
}
|
|
|
|
CBuffer< fptype, size_t > FltBuf( (size_t) NewWidthE * SrcHeight,
|
|
fpclass :: fpalign ); // Temporary buffer that receives
|
|
// horizontally-filtered and resized image.
|
|
|
|
for( i = 0; i < SrcHeight; i++ )
|
|
{
|
|
td[ i % ThreadCount ].addScanlineToQueue(
|
|
(void*) &SrcBuf[ (size_t) i * SrcScanlineSize ],
|
|
&FltBuf[ (size_t) i * NewWidthE ]);
|
|
}
|
|
|
|
ThreadPool.startAllWorkloads();
|
|
td[ 0 ].processScanlineQueue();
|
|
ThreadPool.waitAllWorkloadsToFinish();
|
|
|
|
// Vertical scanline filtering and resizing, reuse previously defined
|
|
// filtering steps if possible.
|
|
|
|
const int PrevUseBuildMode = UseBuildMode;
|
|
|
|
if( Vars.BuildMode >= 0 )
|
|
{
|
|
UseBuildMode = Vars.BuildMode;
|
|
}
|
|
else
|
|
{
|
|
CImageResizerVars TmpVars( Vars );
|
|
int BestScore = 0x7FFFFFFF;
|
|
|
|
for( m = 0; m < BuildModeCount; m++ )
|
|
{
|
|
CDSPFracFilterBankLin< fptype > TmpBank;
|
|
TmpBank.copyInitParams( FltBank );
|
|
CFilterSteps TmpSteps;
|
|
TmpVars.k = ky;
|
|
TmpVars.o = oy;
|
|
buildFilterSteps( TmpSteps, TmpVars, TmpBank, 1.0, m, true );
|
|
updateFilterStepBuffers( TmpSteps, TmpVars, RPosBufArray,
|
|
SrcHeight, NewHeight );
|
|
|
|
fillUsedFracMap( TmpSteps[ TmpVars.ResizeStep ],
|
|
UsedFracMap );
|
|
|
|
const int c = calcComplexity( TmpSteps, TmpVars, UsedFracMap,
|
|
NewWidth );
|
|
|
|
if( c < BestScore )
|
|
{
|
|
UseBuildMode = m;
|
|
BestScore = c;
|
|
}
|
|
}
|
|
}
|
|
|
|
Vars.k = ky;
|
|
Vars.o = oy;
|
|
|
|
if( UseBuildMode == PrevUseBuildMode && ky == kx )
|
|
{
|
|
if( OutMul != 1.0 )
|
|
{
|
|
modifyCorrFilterDCGain( FltSteps, 1.0 / OutMul );
|
|
}
|
|
}
|
|
else
|
|
{
|
|
buildFilterSteps( FltSteps, Vars, FltBank, 1.0, UseBuildMode,
|
|
false );
|
|
}
|
|
|
|
updateFilterStepBuffers( FltSteps, Vars, RPosBufArray, SrcHeight,
|
|
NewHeight );
|
|
|
|
updateBufLenAndRPosPtrs( FltSteps, Vars, NewWidth );
|
|
|
|
if( IsOutFloat && sizeof( FltBuf[ 0 ]) == sizeof( Tout ) &&
|
|
fpclass :: packmode == 0 )
|
|
{
|
|
// In-place output.
|
|
|
|
for( i = 0; i < ThreadCount; i++ )
|
|
{
|
|
td[ i ].initScanlineQueue( td[ i ].sopResizeV, NewWidth,
|
|
SrcHeight, NewWidthE, NewWidthE );
|
|
}
|
|
|
|
for( i = 0; i < NewWidth; i++ )
|
|
{
|
|
td[ i % ThreadCount ].addScanlineToQueue(
|
|
&FltBuf[ (size_t) i * ElCount ],
|
|
(fptype*) &NewBuf[ (size_t) i * ElCount ]);
|
|
}
|
|
|
|
ThreadPool.startAllWorkloads();
|
|
td[ 0 ].processScanlineQueue();
|
|
ThreadPool.waitAllWorkloadsToFinish();
|
|
ThreadPool.removeAllWorkloads();
|
|
|
|
return;
|
|
}
|
|
|
|
CBuffer< fptype, size_t > ResBuf( (size_t) NewWidthE * NewHeight,
|
|
fpclass :: fpalign );
|
|
|
|
for( i = 0; i < ThreadCount; i++ )
|
|
{
|
|
td[ i ].initScanlineQueue( td[ i ].sopResizeV, NewWidth,
|
|
SrcHeight, NewWidthE, NewWidthE );
|
|
}
|
|
|
|
const int im = ( fpclass :: packmode == 0 ? ElCount : 1 );
|
|
|
|
for( i = 0; i < NewWidth; i++ )
|
|
{
|
|
td[ i % ThreadCount ].addScanlineToQueue(
|
|
&FltBuf[ (size_t) i * im ], &ResBuf[ (size_t) i * im ]);
|
|
}
|
|
|
|
ThreadPool.startAllWorkloads();
|
|
td[ 0 ].processScanlineQueue();
|
|
ThreadPool.waitAllWorkloadsToFinish();
|
|
|
|
if( IsOutFloat )
|
|
{
|
|
// Perform output, but skip dithering.
|
|
|
|
for( i = 0; i < ThreadCount; i++ )
|
|
{
|
|
td[ i ].initScanlineQueue( td[ i ].sopUnpackH,
|
|
NewHeight, NewWidth );
|
|
}
|
|
|
|
for( i = 0; i < NewHeight; i++ )
|
|
{
|
|
td[ i % ThreadCount ].addScanlineToQueue(
|
|
&ResBuf[ (size_t) i * NewWidthE ],
|
|
&NewBuf[ (size_t) i * NewWidth * ElCountIO ]);
|
|
}
|
|
|
|
ThreadPool.startAllWorkloads();
|
|
td[ 0 ].processScanlineQueue();
|
|
ThreadPool.waitAllWorkloadsToFinish();
|
|
ThreadPool.removeAllWorkloads();
|
|
|
|
return;
|
|
}
|
|
|
|
// Perform output with dithering (for integer output only).
|
|
|
|
int TruncBits; // The number of lower bits to truncate and dither.
|
|
int OutRange; // Output range.
|
|
|
|
if( sizeof( Tout ) == 1 )
|
|
{
|
|
TruncBits = 8 - ResBitDepth;
|
|
OutRange = 255;
|
|
}
|
|
else
|
|
{
|
|
TruncBits = 16 - ResBitDepth;
|
|
OutRange = 65535;
|
|
}
|
|
|
|
const double PkOut = OutRange;
|
|
const double TrMul = ( TruncBits > 0 ?
|
|
PkOut / ( OutRange >> TruncBits ) : 1.0 );
|
|
|
|
if( CDitherer :: isRecursive() )
|
|
{
|
|
td[ 0 ].getDitherer().init( NewWidth, Vars, TrMul, PkOut );
|
|
|
|
if( Vars.UseSRGBGamma )
|
|
{
|
|
for( i = 0; i < NewHeight; i++ )
|
|
{
|
|
fptype* const ResScanline =
|
|
&ResBuf[ (size_t) i * NewWidthE ];
|
|
|
|
CFilterStep :: applySRGBGamma( ResScanline, NewWidth,
|
|
Vars );
|
|
|
|
td[ 0 ].getDitherer().dither( ResScanline );
|
|
|
|
CFilterStep :: unpackScanline( ResScanline,
|
|
&NewBuf[ (size_t) i * NewWidth * ElCountIO ],
|
|
NewWidth, Vars );
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for( i = 0; i < NewHeight; i++ )
|
|
{
|
|
fptype* const ResScanline =
|
|
&ResBuf[ (size_t) i * NewWidthE ];
|
|
|
|
td[ 0 ].getDitherer().dither( ResScanline );
|
|
|
|
CFilterStep :: unpackScanline( ResScanline,
|
|
&NewBuf[ (size_t) i * NewWidth * ElCountIO ],
|
|
NewWidth, Vars );
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for( i = 0; i < ThreadCount; i++ )
|
|
{
|
|
td[ i ].initScanlineQueue( td[ i ].sopDitherAndUnpackH,
|
|
NewHeight, NewWidth );
|
|
|
|
td[ i ].getDitherer().init( NewWidth, Vars, TrMul, PkOut );
|
|
}
|
|
|
|
for( i = 0; i < NewHeight; i++ )
|
|
{
|
|
td[ i % ThreadCount ].addScanlineToQueue(
|
|
&ResBuf[ (size_t) i * NewWidthE ],
|
|
&NewBuf[ (size_t) i * NewWidth * ElCountIO ]);
|
|
}
|
|
|
|
ThreadPool.startAllWorkloads();
|
|
td[ 0 ].processScanlineQueue();
|
|
ThreadPool.waitAllWorkloadsToFinish();
|
|
}
|
|
|
|
ThreadPool.removeAllWorkloads();
|
|
}
|
|
|
|
private:
|
|
typedef typename fpclass :: fptype fptype; ///< Floating-point type to use
|
|
///< during processing.
|
|
///<
|
|
typedef typename fpclass :: CFilterStep CFilterStep; ///< Filtering step
|
|
///< class to use during processing.
|
|
///<
|
|
typedef typename fpclass :: CDitherer CDitherer; ///< Ditherer class to
|
|
///< use during processing.
|
|
///<
|
|
CImageResizerParams Params; ///< Algorithm's parameters currently in use.
|
|
///<
|
|
int SrcBitDepth; ///< Bit resolution of the source image.
|
|
///<
|
|
int ResBitDepth; ///< Bit resolution of the resulting image.
|
|
///<
|
|
CDSPFracFilterBankLin< fptype > FixedFilterBank; ///< Fractional delay
|
|
///< filter bank with fixed characteristics, mainly for upsizing
|
|
///< cases.
|
|
///<
|
|
|
|
/**
|
|
* @brief Filtering steps array.
|
|
*
|
|
* The object of this class stores filtering steps together.
|
|
*/
|
|
|
|
typedef CStructArray< CFilterStep > CFilterSteps;
|
|
|
|
/**
|
|
* Function initializes the filter bank in the specified resizing step
|
|
* according to the source and resulting image bit depths.
|
|
*
|
|
* @param FltBank Filter bank to initialize.
|
|
* @param CutoffMult Cutoff multiplier, 0 to 1. 1 corresponds to 0.5pi
|
|
* cutoff point.
|
|
* @param ForceHiOrder "True" if a high-order interpolation should be
|
|
* forced which requires considerably less resources for initialization.
|
|
* @param ExtFilter External filter to apply to interpolation filter.
|
|
*/
|
|
|
|
void initFilterBank( CDSPFracFilterBankLin< fptype >& FltBank,
|
|
const double CutoffMult, const bool ForceHiOrder,
|
|
const CFltBuffer& ExtFilter ) const
|
|
{
|
|
const int IntBitDepth = ( ResBitDepth > SrcBitDepth ? ResBitDepth :
|
|
SrcBitDepth );
|
|
|
|
const double SNR = -6.02 * ( IntBitDepth + 3 );
|
|
int UseOrder;
|
|
int FracCount; // The number of fractional delay filters sampled by
|
|
// the filter bank. This variable affects the signal-to-noise
|
|
// ratio at interpolation stage. Theoretically, at UseOrder==1,
|
|
// 8-bit image resizing requires 66.2 dB SNR or 11. 16-bit
|
|
// resizing requires 114.4 dB SNR or 150. At UseOrder=0 the
|
|
// required number of filters is exponentially higher.
|
|
|
|
if( ForceHiOrder || IntBitDepth > 8 )
|
|
{
|
|
UseOrder = 1; // -146 dB max
|
|
FracCount = (int) ceil( 0.23134052 * exp( -0.058062929 * SNR ));
|
|
}
|
|
else
|
|
{
|
|
UseOrder = 0; // -72 dB max
|
|
FracCount = (int) ceil( 0.33287686 * exp( -0.11334583 * SNR ));
|
|
}
|
|
|
|
if( FracCount < 2 )
|
|
{
|
|
FracCount = 2;
|
|
}
|
|
|
|
FltBank.init( FracCount, UseOrder, Params.IntFltLen / CutoffMult,
|
|
Params.IntFltCutoff * CutoffMult, Params.IntFltAlpha, ExtFilter,
|
|
fpclass :: fpalign, fpclass :: elalign );
|
|
}
|
|
|
|
/**
|
|
* Function allocates filter buffer taking "fpclass" alignments into
|
|
* account. The allocated buffer may be larger than the requested size: in
|
|
* this case the additional elements will be zeroed by this function.
|
|
*
|
|
* @param Flt Filter buffer.
|
|
* @param ReqCapacity The required filter buffer's capacity.
|
|
* @param IsModel "True" if filtering steps modeling is performed without
|
|
* actual filter allocation.
|
|
* @param FltExt If non-NULL this variable will receive the number of
|
|
* elements the filter was extended by.
|
|
*/
|
|
|
|
static void allocFilter( CBuffer< fptype >& Flt, const int ReqCapacity,
|
|
const bool IsModel = false, int* const FltExt = NULL )
|
|
{
|
|
int UseCapacity = ( ReqCapacity + fpclass :: elalign - 1 ) &
|
|
~( fpclass :: elalign - 1 );
|
|
|
|
int Ext = UseCapacity - ReqCapacity;
|
|
|
|
if( FltExt != NULL )
|
|
{
|
|
*FltExt = Ext;
|
|
}
|
|
|
|
if( IsModel )
|
|
{
|
|
Flt.forceCapacity( UseCapacity );
|
|
return;
|
|
}
|
|
|
|
Flt.alloc( UseCapacity, fpclass :: fpalign );
|
|
|
|
while( Ext > 0 )
|
|
{
|
|
Ext--;
|
|
Flt[ ReqCapacity + Ext ] = 0.0;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function assigns filter parameters to the specified filtering step
|
|
* object.
|
|
*
|
|
* @param fs Filtering step to assign parameter to. This step cannot be
|
|
* the last step if ResampleFactor greater than 1 was specified.
|
|
* @param IsUpsample "True" if upsampling step. Should be set to "false"
|
|
* if FltCutoff is negative.
|
|
* @param ResampleFactor Resampling factor of this filter (>=1).
|
|
* @param FltCutoff Filter cutoff point. This value will be divided by the
|
|
* ResampleFactor if IsUpsample equals "true". If zero value was
|
|
* specified, the "half-band" predefined filter will be created. In this
|
|
* case the ResampleFactor will modify the filter cutoff point.
|
|
* @param DCGain DC gain to apply to the filter. Assigned to filtering
|
|
* step's DCGain variable.
|
|
* @param UseFltOrig "True" if the originally-designed filter should be
|
|
* left in filtering step's FltOrig buffer. Otherwise it will be freed.
|
|
* @param IsModel "True" if filtering steps modeling is performed without
|
|
* actual filter building.
|
|
*/
|
|
|
|
void assignFilterParams( CFilterStep& fs, const bool IsUpsample,
|
|
const int ResampleFactor, const double FltCutoff, const double DCGain,
|
|
const bool UseFltOrig, const bool IsModel ) const
|
|
{
|
|
double FltAlpha;
|
|
double Len2;
|
|
double Freq;
|
|
|
|
if( FltCutoff == 0.0 )
|
|
{
|
|
const double m = 2.0 / ResampleFactor;
|
|
FltAlpha = Params.HBFltAlpha;
|
|
Len2 = 0.5 * Params.HBFltLen / m;
|
|
Freq = AVIR_PI * Params.HBFltCutoff * m;
|
|
}
|
|
else
|
|
{
|
|
FltAlpha = Params.LPFltAlpha;
|
|
Len2 = 0.25 * Params.LPFltBaseLen / FltCutoff;
|
|
Freq = AVIR_PI * Params.LPFltCutoffMult * FltCutoff;
|
|
}
|
|
|
|
if( IsUpsample )
|
|
{
|
|
Len2 *= ResampleFactor;
|
|
Freq /= ResampleFactor;
|
|
fs.DCGain = DCGain * ResampleFactor;
|
|
}
|
|
else
|
|
{
|
|
fs.DCGain = DCGain;
|
|
}
|
|
|
|
fs.FltOrig.Len2 = Len2;
|
|
fs.FltOrig.Freq = Freq;
|
|
fs.FltOrig.Alpha = FltAlpha;
|
|
fs.FltOrig.DCGain = fs.DCGain;
|
|
|
|
CDSPPeakedCosineLPF w( Len2, Freq, FltAlpha );
|
|
|
|
fs.IsUpsample = IsUpsample;
|
|
fs.ResampleFactor = ResampleFactor;
|
|
fs.FltLatency = w.fl2;
|
|
|
|
int FltExt; // Filter's extension due to fpclass :: elalign.
|
|
|
|
if( IsModel )
|
|
{
|
|
allocFilter( fs.Flt, w.FilterLen, true, &FltExt );
|
|
|
|
if( UseFltOrig )
|
|
{
|
|
// Allocate a real buffer even in modeling mode since this
|
|
// filter may be copied by the filter bank.
|
|
|
|
fs.FltOrig.alloc( w.FilterLen );
|
|
memset( &fs.FltOrig[ 0 ], 0,
|
|
w.FilterLen * sizeof( fs.FltOrig[ 0 ]));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
fs.FltOrig.alloc( w.FilterLen );
|
|
|
|
w.generateLPF( &fs.FltOrig[ 0 ], 1.0 );
|
|
optimizeFIRFilter( fs.FltOrig, fs.FltLatency );
|
|
normalizeFIRFilter( &fs.FltOrig[ 0 ], fs.FltOrig.getCapacity(),
|
|
fs.DCGain );
|
|
|
|
allocFilter( fs.Flt, fs.FltOrig.getCapacity(), false, &FltExt );
|
|
copyArray( &fs.FltOrig[ 0 ], &fs.Flt[ 0 ],
|
|
fs.FltOrig.getCapacity() );
|
|
|
|
if( !UseFltOrig )
|
|
{
|
|
fs.FltOrig.free();
|
|
}
|
|
}
|
|
|
|
if( IsUpsample )
|
|
{
|
|
int l = fs.Flt.getCapacity() - fs.FltLatency - ResampleFactor -
|
|
FltExt;
|
|
|
|
allocFilter( fs.PrefixDC, l, IsModel );
|
|
allocFilter( fs.SuffixDC, fs.FltLatency, IsModel );
|
|
|
|
if( IsModel )
|
|
{
|
|
return;
|
|
}
|
|
|
|
// Create prefix and suffix "tails" used during upsampling.
|
|
|
|
const fptype* ip = &fs.Flt[ fs.FltLatency + ResampleFactor ];
|
|
copyArray( ip, &fs.PrefixDC[ 0 ], l );
|
|
|
|
while( true )
|
|
{
|
|
ip += ResampleFactor;
|
|
l -= ResampleFactor;
|
|
|
|
if( l <= 0 )
|
|
{
|
|
break;
|
|
}
|
|
|
|
addArray( ip, &fs.PrefixDC[ 0 ], l );
|
|
}
|
|
|
|
l = fs.FltLatency;
|
|
fptype* op = &fs.SuffixDC[ 0 ];
|
|
copyArray( &fs.Flt[ 0 ], op, l );
|
|
|
|
while( true )
|
|
{
|
|
op += ResampleFactor;
|
|
l -= ResampleFactor;
|
|
|
|
if( l <= 0 )
|
|
{
|
|
break;
|
|
}
|
|
|
|
addArray( &fs.Flt[ 0 ], op, l );
|
|
}
|
|
}
|
|
else
|
|
if( !UseFltOrig )
|
|
{
|
|
fs.EdgePixelCount = fs.EdgePixelCountDef;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function adds a correction filter that tries to achieve a linear
|
|
* frequency response at all frequencies. The actual resulting response
|
|
* may feature a slight damping of the highest frequencies since a
|
|
* suitably short correction filter cannot fix steep high-frequency
|
|
* damping.
|
|
*
|
|
* This function assumes that the resizing step is currently the last
|
|
* step, even if it was not inserted yet: this allows placement of the
|
|
* correction filter both before and after the resizing step.
|
|
*
|
|
* @param Steps Filtering steps.
|
|
* @param bw Resulting bandwidth relative to the original bandwidth (which
|
|
* is 1.0), usually 1/k. Should be <= 1.0.
|
|
* @param IsPreCorrection "True" if the filtering step was already created
|
|
* and it is first in the Steps array. "True" also adds edge pixels to
|
|
* reduce edge artifacts.
|
|
* @param IsModel "True" if filtering steps modeling is performed without
|
|
* actual filter building.
|
|
*/
|
|
|
|
void addCorrectionFilter( CFilterSteps& Steps, const double bw,
|
|
const bool IsPreCorrection, const bool IsModel ) const
|
|
{
|
|
CFilterStep& fs = ( IsPreCorrection ? Steps[ 0 ] : Steps.add() );
|
|
fs.IsUpsample = false;
|
|
fs.ResampleFactor = 1;
|
|
fs.DCGain = 1.0;
|
|
fs.EdgePixelCount = ( IsPreCorrection ? fs.EdgePixelCountDef : 0 );
|
|
|
|
if( IsModel )
|
|
{
|
|
allocFilter( fs.Flt, CDSPFIREQ :: calcFilterLength(
|
|
Params.CorrFltLen, fs.FltLatency ), true );
|
|
|
|
return;
|
|
}
|
|
|
|
const int BinCount = 65; // Frequency response bins to control.
|
|
const int BinCount1 = BinCount - 1;
|
|
double curbw = 1.0; // Bandwidth of the filter at the current step.
|
|
int i;
|
|
int j;
|
|
double re;
|
|
double im;
|
|
|
|
CBuffer< double > Bins( BinCount ); // Adjustment introduced by all
|
|
// steps at all frequencies of interest.
|
|
|
|
for( j = 0; j < BinCount; j++ )
|
|
{
|
|
Bins[ j ] = 1.0;
|
|
}
|
|
|
|
const int si = ( IsPreCorrection ? 1 : 0 );
|
|
|
|
for( i = si; i < Steps.getItemCount() - ( si ^ 1 ); i++ )
|
|
{
|
|
const CFilterStep& fs = Steps[ i ];
|
|
|
|
if( fs.IsUpsample )
|
|
{
|
|
curbw *= fs.ResampleFactor;
|
|
|
|
if( fs.FltOrig.getCapacity() > 0 )
|
|
{
|
|
continue;
|
|
}
|
|
}
|
|
|
|
const double dcg = 1.0 / fs.DCGain; // DC gain correction.
|
|
const fptype* Flt;
|
|
int FltLen;
|
|
|
|
if( fs.ResampleFactor == 0 )
|
|
{
|
|
Flt = fs.FltBank -> getFilter( 0 );
|
|
FltLen = fs.FltBank -> getFilterLen();
|
|
}
|
|
else
|
|
{
|
|
Flt = &fs.Flt[ 0 ];
|
|
FltLen = fs.Flt.getCapacity();
|
|
}
|
|
|
|
// Calculate frequency response adjustment introduced by the
|
|
// filter at this step, within the bounds of bandwidth of
|
|
// interest.
|
|
|
|
for( j = 0; j < BinCount; j++ )
|
|
{
|
|
const double th = AVIR_PI * bw / curbw * j / BinCount1;
|
|
|
|
calcFIRFilterResponse( Flt, FltLen, th, re, im );
|
|
|
|
Bins[ j ] /= sqrt( re * re + im * im ) * dcg;
|
|
}
|
|
|
|
if( !fs.IsUpsample && fs.ResampleFactor > 1 )
|
|
{
|
|
curbw /= fs.ResampleFactor;
|
|
}
|
|
}
|
|
|
|
// Calculate filter.
|
|
|
|
CDSPFIREQ EQ;
|
|
EQ.init( bw * 2.0, Params.CorrFltLen, BinCount, 0.0, bw, false,
|
|
Params.CorrFltAlpha );
|
|
|
|
fs.FltLatency = EQ.getFilterLatency();
|
|
|
|
CBuffer< double > Filter( EQ.getFilterLength() );
|
|
EQ.buildFilter( Bins, &Filter[ 0 ]);
|
|
normalizeFIRFilter( &Filter[ 0 ], Filter.getCapacity(), 1.0 );
|
|
optimizeFIRFilter( Filter, fs.FltLatency );
|
|
normalizeFIRFilter( &Filter[ 0 ], Filter.getCapacity(), 1.0 );
|
|
|
|
allocFilter( fs.Flt, Filter.getCapacity() );
|
|
copyArray( &Filter[ 0 ], &fs.Flt[ 0 ], Filter.getCapacity() );
|
|
|
|
// Print a theoretically achieved final frequency response at various
|
|
// feature sizes (from DC to 1 pixel). Values above 255 means features
|
|
// become brighter, values below 255 means features become dimmer.
|
|
|
|
/* const double sbw = ( bw > 1.0 ? 1.0 / bw : 1.0 );
|
|
|
|
for( j = 0; j < BinCount; j++ )
|
|
{
|
|
const double th = AVIR_PI * sbw * j / BinCount1;
|
|
|
|
calcFIRFilterResponse( &fs.Flt[ 0 ], fs.Flt.getCapacity(),
|
|
th, re, im );
|
|
|
|
printf( "%f\n", sqrt( re * re + im * im ) / Bins[ j ] * 255 );
|
|
}
|
|
|
|
printf( "***\n" );*/
|
|
}
|
|
|
|
/**
|
|
* Function adds a sharpening filter if image is being upsized. Such
|
|
* sharpening allows to spot interpolation filter's stop-band attenuation:
|
|
* if attenuation is too weak, a "dark grid" and other artifacts may
|
|
* become visible.
|
|
*
|
|
* It is assumed that 40 decibel stop-band attenuation should be
|
|
* considered a required minimum: this allows application of (deliberately
|
|
* strong) 64X sharpening without spotting any artifacts.
|
|
*
|
|
* @param Steps Filtering steps.
|
|
* @param bw Resulting bandwidth relative to the original bandwidth (which
|
|
* is 1.0), usually 1/k.
|
|
* @param IsModel "True" if filtering steps modeling is performed without
|
|
* actual filter building.
|
|
*/
|
|
|
|
static void addSharpenTest( CFilterSteps& Steps, const double bw,
|
|
const bool IsModel )
|
|
{
|
|
if( bw <= 1.0 )
|
|
{
|
|
return;
|
|
}
|
|
|
|
const double FltLen = 10.0 * bw;
|
|
|
|
CFilterStep& fs = Steps.add();
|
|
fs.IsUpsample = false;
|
|
fs.ResampleFactor = 1;
|
|
fs.DCGain = 1.0;
|
|
fs.EdgePixelCount = 0;
|
|
|
|
if( IsModel )
|
|
{
|
|
allocFilter( fs.Flt, CDSPFIREQ :: calcFilterLength( FltLen,
|
|
fs.FltLatency ), true );
|
|
|
|
return;
|
|
}
|
|
|
|
const int BinCount = 200;
|
|
CBuffer< double > Bins( BinCount );
|
|
int Thresh = (int) round( BinCount / bw * 1.75 );
|
|
|
|
if( Thresh > BinCount )
|
|
{
|
|
Thresh = BinCount;
|
|
}
|
|
|
|
int j;
|
|
|
|
for( j = 0; j < Thresh; j++ )
|
|
{
|
|
Bins[ j ] = 1.0;
|
|
}
|
|
|
|
for( j = Thresh; j < BinCount; j++ )
|
|
{
|
|
Bins[ j ] = 256.0;
|
|
}
|
|
|
|
CDSPFIREQ EQ;
|
|
EQ.init( bw * 2.0, FltLen, BinCount, 0.0, bw, false, 1.7 );
|
|
|
|
fs.FltLatency = EQ.getFilterLatency();
|
|
|
|
CBuffer< double > Filter( EQ.getFilterLength() );
|
|
EQ.buildFilter( Bins, &Filter[ 0 ]);
|
|
normalizeFIRFilter( &Filter[ 0 ], Filter.getCapacity(), 1.0 );
|
|
optimizeFIRFilter( Filter, fs.FltLatency );
|
|
normalizeFIRFilter( &Filter[ 0 ], Filter.getCapacity(), 1.0 );
|
|
|
|
allocFilter( fs.Flt, Filter.getCapacity() );
|
|
copyArray( &Filter[ 0 ], &fs.Flt[ 0 ], Filter.getCapacity() );
|
|
|
|
/* for( j = 0; j < BinCount; j++ )
|
|
{
|
|
const double th = AVIR_PI * j / ( BinCount - 1 );
|
|
double re;
|
|
double im;
|
|
|
|
calcFIRFilterResponse( &fs.Flt[ 0 ], fs.Flt.getCapacity(),
|
|
th, re, im );
|
|
|
|
printf( "%f\n", sqrt( re * re + im * im ));
|
|
}
|
|
|
|
printf( "***\n" );*/
|
|
}
|
|
|
|
/**
|
|
* Function builds sequence of filtering steps depending on the specified
|
|
* resizing coefficient. The last steps included are always the resizing
|
|
* step then (possibly) the correction step.
|
|
*
|
|
* @param Steps Array that receives filtering steps.
|
|
* @param[out] Vars Variables object.
|
|
* @param FltBank Filter bank to initialize and use.
|
|
* @param DCGain The overall DC gain to apply. This DC gain is applied to
|
|
* the first filtering step only (upsampling or filtering step).
|
|
* @param ModeFlags Build mode flags to use. This is a bitmap of switches
|
|
* that enable or disable certain algorithm features.
|
|
* @param IsModel "True" if filtering steps modeling is performed without
|
|
* the actual filter allocation and building.
|
|
*/
|
|
|
|
void buildFilterSteps( CFilterSteps& Steps, CImageResizerVars& Vars,
|
|
CDSPFracFilterBankLin< fptype >& FltBank, const double DCGain,
|
|
const int ModeFlags, const bool IsModel ) const
|
|
{
|
|
Steps.clear();
|
|
|
|
const bool DoFltAndIntCombo = (( ModeFlags & 1 ) != 0 ); // Do filter
|
|
// and interpolator combining.
|
|
const bool ForceHiOrderInt = (( ModeFlags & 2 ) != 0 ); // Force use
|
|
// of a higher-order interpolation.
|
|
const bool UseHalfband = (( ModeFlags & 4 ) != 0 ); // Use half-band
|
|
// filter.
|
|
|
|
const double bw = 1.0 / Vars.k; // Resulting bandwidth.
|
|
const int UpsampleFactor = ( (int) floor( Vars.k ) < 2 ? 2 : 1 );
|
|
double IntCutoffMult; // Interpolation filter cutoff multiplier.
|
|
CFilterStep* ReuseStep; // If not NULL, resizing step should use
|
|
// this step object instead of creating a new one.
|
|
CFilterStep* ExtFltStep; // Use FltOrig of this step as the external
|
|
// filter to applied to the interpolator.
|
|
bool IsPreCorrection; // "True" if the correction filter is applied
|
|
// first.
|
|
double FltCutoff; // Cutoff frequency of the first filtering step.
|
|
double corrbw; ///< Bandwidth at the correction step.
|
|
|
|
if( Vars.k <= 1.0 )
|
|
{
|
|
IsPreCorrection = true;
|
|
FltCutoff = 1.0;
|
|
corrbw = 1.0;
|
|
Steps.add();
|
|
}
|
|
else
|
|
{
|
|
IsPreCorrection = false;
|
|
FltCutoff = bw;
|
|
corrbw = bw;
|
|
}
|
|
|
|
// Add 1 upsampling or several downsampling filters.
|
|
|
|
if( UpsampleFactor > 1 )
|
|
{
|
|
CFilterStep& fs = Steps.add();
|
|
assignFilterParams( fs, true, UpsampleFactor, FltCutoff, DCGain,
|
|
DoFltAndIntCombo, IsModel );
|
|
|
|
IntCutoffMult = FltCutoff * 2.0 / UpsampleFactor;
|
|
ReuseStep = NULL;
|
|
ExtFltStep = ( DoFltAndIntCombo ? &fs : NULL );
|
|
}
|
|
else
|
|
{
|
|
int DownsampleFactor;
|
|
|
|
while( true )
|
|
{
|
|
DownsampleFactor = (int) floor( 0.5 / FltCutoff );
|
|
bool DoHBFltAdd;
|
|
|
|
if( DownsampleFactor > 16 )
|
|
{
|
|
// Add half-band filter unconditionally in order to keep
|
|
// filter lengths lower for more precise frequency
|
|
// response and less edge artifacts.
|
|
|
|
DoHBFltAdd = true;
|
|
DownsampleFactor = 16;
|
|
}
|
|
else
|
|
{
|
|
DoHBFltAdd = ( UseHalfband && DownsampleFactor > 1 );
|
|
}
|
|
|
|
if( DoHBFltAdd )
|
|
{
|
|
assignFilterParams( Steps.add(), false, DownsampleFactor,
|
|
0.0, 1.0, false, IsModel );
|
|
|
|
FltCutoff *= DownsampleFactor;
|
|
}
|
|
else
|
|
{
|
|
if( DownsampleFactor < 1 )
|
|
{
|
|
DownsampleFactor = 1;
|
|
}
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
CFilterStep& fs = Steps.add();
|
|
assignFilterParams( fs, false, DownsampleFactor, FltCutoff,
|
|
DCGain, DoFltAndIntCombo, IsModel );
|
|
|
|
IntCutoffMult = FltCutoff / 0.5;
|
|
|
|
if( DoFltAndIntCombo )
|
|
{
|
|
ReuseStep = &fs;
|
|
ExtFltStep = &fs;
|
|
}
|
|
else
|
|
{
|
|
IntCutoffMult *= DownsampleFactor;
|
|
ReuseStep = NULL;
|
|
ExtFltStep = NULL;
|
|
}
|
|
}
|
|
|
|
// Insert resizing and correction steps.
|
|
|
|
CFilterStep& fs = ( ReuseStep == NULL ? Steps.add() : *ReuseStep );
|
|
|
|
Vars.ResizeStep = Steps.getItemCount() - 1;
|
|
fs.IsUpsample = false;
|
|
fs.ResampleFactor = 0;
|
|
fs.DCGain = ( ExtFltStep == NULL ? 1.0 : ExtFltStep -> DCGain );
|
|
|
|
initFilterBank( FltBank, IntCutoffMult, ForceHiOrderInt,
|
|
( ExtFltStep == NULL ? fs.FltOrig : ExtFltStep -> FltOrig ));
|
|
|
|
if( FltBank == FixedFilterBank )
|
|
{
|
|
fs.FltBank = (CDSPFracFilterBankLin< fptype >*) &FixedFilterBank;
|
|
}
|
|
else
|
|
{
|
|
fs.FltBank = &FltBank;
|
|
}
|
|
|
|
addCorrectionFilter( Steps, corrbw, IsPreCorrection, IsModel );
|
|
|
|
//addSharpenTest( Steps, bw, IsModel );
|
|
}
|
|
|
|
/**
|
|
* Function extends *this upsampling step so that it produces more
|
|
* upsampled pixels that cover the prefix and suffix needs of the next
|
|
* step. After the call to this function the InPrefix and InSuffix
|
|
* variables of the next step will be set to zero.
|
|
*
|
|
* @param fs Upsampling filtering step.
|
|
* @param NextStep The next step structure.
|
|
*/
|
|
|
|
static void extendUpsample( CFilterStep& fs, CFilterStep& NextStep )
|
|
{
|
|
fs.InPrefix = ( NextStep.InPrefix + fs.ResampleFactor - 1 ) /
|
|
fs.ResampleFactor;
|
|
|
|
fs.OutPrefix += fs.InPrefix * fs.ResampleFactor;
|
|
NextStep.InPrefix = 0;
|
|
|
|
fs.InSuffix = ( NextStep.InSuffix + fs.ResampleFactor - 1 ) /
|
|
fs.ResampleFactor;
|
|
|
|
fs.OutSuffix += fs.InSuffix * fs.ResampleFactor;
|
|
NextStep.InSuffix = 0;
|
|
}
|
|
|
|
/**
|
|
* Function fills resizing step's RPosBuf array, excluding the actual
|
|
* "ftp" pointers and "SrcOffs" offsets.
|
|
*
|
|
* This array should be cleared if the resizing step or offset were
|
|
* changed. Otherwise this function only fills the elements required to
|
|
* cover resizing step's OutLen.
|
|
*
|
|
* This function is called by the updateFilterStepBuffers() function.
|
|
*
|
|
* @param fs Resizing step.
|
|
* @param Vars Variables object.
|
|
*/
|
|
|
|
static void fillRPosBuf( CFilterStep& fs, const CImageResizerVars& Vars )
|
|
{
|
|
const int PrevLen = fs.RPosBuf -> getCapacity();
|
|
|
|
if( fs.OutLen > PrevLen )
|
|
{
|
|
fs.RPosBuf -> increaseCapacity( fs.OutLen );
|
|
}
|
|
|
|
typename CFilterStep :: CResizePos* rpos = &(*fs.RPosBuf)[ PrevLen ];
|
|
const int FracCount = fs.FltBank -> getFracCount();
|
|
const double o = Vars.o;
|
|
const double k = Vars.k;
|
|
int i;
|
|
|
|
for( i = PrevLen; i < fs.OutLen; i++ )
|
|
{
|
|
const double SrcPos = o + k * i;
|
|
const int SrcPosInt = (int) floor( SrcPos );
|
|
const double x = ( SrcPos - SrcPosInt ) * FracCount;
|
|
const int fti = (int) x;
|
|
rpos -> x = (typename fpclass :: fptypeatom) ( x - fti );
|
|
rpos -> fti = fti;
|
|
rpos -> SrcPosInt = SrcPosInt;
|
|
rpos++;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function updates filtering step buffer lengths depending on the
|
|
* specified source and new scanline lengths. This function should be
|
|
* called after the buildFilterSteps() function.
|
|
*
|
|
* @param Steps Array that receives filtering steps.
|
|
* @param[out] Vars Variables object, will receive buffer size and length.
|
|
* This function expects "k" and "o" variable values that will be
|
|
* adjusted by this function.
|
|
* @param RPosBufArray Resizing position buffers array, used to obtain
|
|
* buffer to initialize and use (will be reused if it is already fully or
|
|
* partially filled).
|
|
* @param SrcLen Source scanline's length in pixels.
|
|
* @param NewLen New scanline's length in pixels.
|
|
*/
|
|
|
|
static void updateFilterStepBuffers( CFilterSteps& Steps,
|
|
CImageResizerVars& Vars,
|
|
typename CFilterStep :: CRPosBufArray& RPosBufArray, int SrcLen,
|
|
const int NewLen )
|
|
{
|
|
int upstep = -1;
|
|
int InBuf = 0;
|
|
int i;
|
|
|
|
for( i = 0; i < Steps.getItemCount(); i++ )
|
|
{
|
|
CFilterStep& fs = Steps[ i ];
|
|
|
|
fs.Vars = &Vars;
|
|
fs.InLen = SrcLen;
|
|
fs.InBuf = InBuf;
|
|
fs.OutBuf = ( InBuf + 1 ) & 1;
|
|
|
|
if( fs.IsUpsample )
|
|
{
|
|
upstep = i;
|
|
Vars.k *= fs.ResampleFactor;
|
|
Vars.o *= fs.ResampleFactor;
|
|
fs.InPrefix = 0;
|
|
fs.InSuffix = 0;
|
|
fs.OutLen = fs.InLen * fs.ResampleFactor;
|
|
fs.OutPrefix = fs.FltLatency;
|
|
fs.OutSuffix = fs.Flt.getCapacity() - fs.FltLatency -
|
|
fs.ResampleFactor;
|
|
|
|
int l0 = fs.OutPrefix + fs.OutLen + fs.OutSuffix;
|
|
int l = fs.InLen * fs.ResampleFactor +
|
|
fs.SuffixDC.getCapacity();
|
|
|
|
if( l > l0 )
|
|
{
|
|
fs.OutSuffix += l - l0;
|
|
}
|
|
|
|
l0 = fs.OutLen + fs.OutSuffix;
|
|
|
|
if( fs.PrefixDC.getCapacity() > l0 )
|
|
{
|
|
fs.OutSuffix += fs.PrefixDC.getCapacity() - l0;
|
|
}
|
|
}
|
|
else
|
|
if( fs.ResampleFactor == 0 )
|
|
{
|
|
const int FilterLenD2 = fs.FltBank -> getFilterLen() / 2;
|
|
const int FilterLenD21 = FilterLenD2 - 1;
|
|
|
|
const int ResizeLPix = (int) floor( Vars.o ) - FilterLenD21;
|
|
fs.InPrefix = ( ResizeLPix < 0 ? -ResizeLPix : 0 );
|
|
const int ResizeRPix = (int) floor( Vars.o +
|
|
( NewLen - 1 ) * Vars.k ) + FilterLenD2 + 1;
|
|
|
|
fs.InSuffix = ( ResizeRPix > fs.InLen ?
|
|
ResizeRPix - fs.InLen : 0 );
|
|
|
|
fs.OutLen = NewLen;
|
|
fs.RPosBuf = &RPosBufArray.getRPosBuf( Vars.k, Vars.o,
|
|
fs.FltBank -> getFracCount() );
|
|
|
|
fillRPosBuf( fs, Vars );
|
|
}
|
|
else
|
|
{
|
|
Vars.k /= fs.ResampleFactor;
|
|
Vars.o /= fs.ResampleFactor;
|
|
Vars.o += fs.EdgePixelCount;
|
|
|
|
fs.InPrefix = fs.FltLatency;
|
|
fs.InSuffix = fs.Flt.getCapacity() - fs.FltLatency - 1;
|
|
|
|
// Additionally extend OutLen to produce more precise edge
|
|
// pixels.
|
|
|
|
fs.OutLen = ( fs.InLen + fs.ResampleFactor - 1 ) /
|
|
fs.ResampleFactor + fs.EdgePixelCount;
|
|
|
|
fs.InSuffix += ( fs.OutLen - 1 ) * fs.ResampleFactor + 1 -
|
|
fs.InLen;
|
|
|
|
fs.InPrefix += fs.EdgePixelCount * fs.ResampleFactor;
|
|
fs.OutLen += fs.EdgePixelCount;
|
|
}
|
|
|
|
InBuf = fs.OutBuf;
|
|
SrcLen = fs.OutLen;
|
|
}
|
|
|
|
Steps[ Steps.getItemCount() - 1 ].OutBuf = 2;
|
|
|
|
if( upstep != -1 )
|
|
{
|
|
extendUpsample( Steps[ upstep ], Steps[ upstep + 1 ]);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function calculates an optimal intermediate buffer length that will
|
|
* cover all needs of the specified filtering steps. This function should
|
|
* be called after the updateFilterStepBuffers() function.
|
|
*
|
|
* Function also updates resizing step's RPosBuf pointers to the filter
|
|
* bank and SrcOffs values.
|
|
*
|
|
* @param Steps Filtering steps.
|
|
* @param[out] Vars Variables object, will receive buffer size and length.
|
|
* @param ResElIncr Resulting (final) element increment, used to produce
|
|
* de-interleaved result. For horizontal processing this value is equal
|
|
* to last step's OutLen, for vertical processing this value is equal to
|
|
* resulting image's width.
|
|
*/
|
|
|
|
static void updateBufLenAndRPosPtrs( CFilterSteps& Steps,
|
|
CImageResizerVars& Vars, const int ResElIncr )
|
|
{
|
|
int MaxPrefix[ 2 ] = { 0, 0 };
|
|
int MaxLen[ 2 ] = { 0, 0 };
|
|
int i;
|
|
|
|
for( i = 0; i < Steps.getItemCount(); i++ )
|
|
{
|
|
CFilterStep& fs = Steps[ i ];
|
|
const int ib = fs.InBuf;
|
|
|
|
if( fs.InPrefix > MaxPrefix[ ib ])
|
|
{
|
|
MaxPrefix[ ib ] = fs.InPrefix;
|
|
}
|
|
|
|
int l = fs.InLen + fs.InSuffix;
|
|
|
|
if( l > MaxLen[ ib ])
|
|
{
|
|
MaxLen[ ib ] = l;
|
|
}
|
|
|
|
fs.InElIncr = fs.InPrefix + l;
|
|
|
|
if( fs.OutBuf == 2 )
|
|
{
|
|
break;
|
|
}
|
|
|
|
const int ob = fs.OutBuf;
|
|
|
|
if( fs.IsUpsample )
|
|
{
|
|
if( fs.OutPrefix > MaxPrefix[ ob ])
|
|
{
|
|
MaxPrefix[ ob ] = fs.OutPrefix;
|
|
}
|
|
|
|
l = fs.OutLen + fs.OutSuffix;
|
|
|
|
if( l > MaxLen[ ob ])
|
|
{
|
|
MaxLen[ ob ] = l;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if( fs.OutLen > MaxLen[ ob ])
|
|
{
|
|
MaxLen[ ob ] = fs.OutLen;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Update OutElIncr values of all steps.
|
|
|
|
for( i = 0; i < Steps.getItemCount(); i++ )
|
|
{
|
|
CFilterStep& fs = Steps[ i ];
|
|
|
|
if( fs.OutBuf == 2 )
|
|
{
|
|
fs.OutElIncr = ResElIncr;
|
|
break;
|
|
}
|
|
|
|
CFilterStep& fs2 = Steps[ i + 1 ];
|
|
|
|
if( fs.IsUpsample )
|
|
{
|
|
fs.OutElIncr = fs.OutPrefix + fs.OutLen + fs.OutSuffix;
|
|
|
|
if( fs.OutElIncr > fs2.InElIncr )
|
|
{
|
|
fs2.InElIncr = fs.OutElIncr;
|
|
}
|
|
else
|
|
{
|
|
fs.OutElIncr = fs2.InElIncr;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
fs.OutElIncr = fs2.InElIncr;
|
|
}
|
|
}
|
|
|
|
// Update temporary buffer's length.
|
|
|
|
for( i = 0; i < 2; i++ )
|
|
{
|
|
Vars.BufLen[ i ] = MaxPrefix[ i ] + MaxLen[ i ];
|
|
Vars.BufOffs[ i ] = MaxPrefix[ i ];
|
|
|
|
if( Vars.packmode == 0 )
|
|
{
|
|
Vars.BufOffs[ i ] *= Vars.ElCount;
|
|
}
|
|
|
|
Vars.BufLen[ i ] *= Vars.ElCount;
|
|
}
|
|
|
|
// Update RPosBuf pointers and SrcOffs.
|
|
|
|
CFilterStep& fs = Steps[ Vars.ResizeStep ];
|
|
typename CFilterStep :: CResizePos* rpos = &(*fs.RPosBuf)[ 0 ];
|
|
const int em = ( fpclass :: packmode == 0 ? Vars.ElCount : 1 );
|
|
const int FilterLenD21 = fs.FltBank -> getFilterLen() / 2 - 1;
|
|
|
|
for( i = 0; i < fs.OutLen; i++ )
|
|
{
|
|
rpos -> ftp = fs.FltBank -> getFilter( rpos -> fti );
|
|
rpos -> SrcOffs = ( rpos -> SrcPosInt - FilterLenD21 ) * em;
|
|
rpos++;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function modifies the overall (DC) gain of the correction filter in the
|
|
* pre-built filtering steps array.
|
|
*
|
|
* @param Steps Filtering steps.
|
|
* @param m Multiplier to apply to the correction filter.
|
|
*/
|
|
|
|
void modifyCorrFilterDCGain( CFilterSteps& Steps, const double m ) const
|
|
{
|
|
CBuffer< fptype >* Flt;
|
|
const int z = Steps.getItemCount() - 1;
|
|
|
|
if( !Steps[ z ].IsUpsample && Steps[ z ].ResampleFactor == 1 )
|
|
{
|
|
Flt = &Steps[ z ].Flt;
|
|
}
|
|
else
|
|
{
|
|
Flt = &Steps[ 0 ].Flt;
|
|
}
|
|
|
|
int i;
|
|
|
|
for( i = 0; i < Flt -> getCapacity(); i++ )
|
|
{
|
|
(*Flt)[ i ] = (fptype) ( (double) (*Flt)[ i ] * m );
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function builds a map of used fractional delay filters based on the
|
|
* resizing positions buffer.
|
|
*
|
|
* @param fs Resizing step.
|
|
* @param[out] UsedFracMap Map of used fractional delay filters.
|
|
*/
|
|
|
|
static void fillUsedFracMap( const CFilterStep& fs,
|
|
CBuffer< uint8_t >& UsedFracMap )
|
|
{
|
|
const int FracCount = fs.FltBank -> getFracCount();
|
|
UsedFracMap.increaseCapacity( FracCount, false );
|
|
memset( &UsedFracMap[ 0 ], 0, FracCount * sizeof( UsedFracMap[ 0 ]));
|
|
|
|
typename CFilterStep :: CResizePos* rpos = &(*fs.RPosBuf)[ 0 ];
|
|
int i;
|
|
|
|
for( i = 0; i < fs.OutLen; i++ )
|
|
{
|
|
UsedFracMap[ rpos -> fti ] |= 1;
|
|
rpos++;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function calculates the overall filtering steps complexity per
|
|
* scanline. Each complexity unit corresponds to a single multiply-add
|
|
* operation. Data copy and pointer math operations are not included in
|
|
* this calculation, it is assumed that they correlate to the multiply-add
|
|
* operations. Calculation also does not include final rounding, dithering
|
|
* and clamping operations since they cannot be optimized out anyway.
|
|
*
|
|
* Calculation of the CRPosBuf buffer is not included since it cannot be
|
|
* avoided.
|
|
*
|
|
* This function should be called after the updateFilterStepBuffers()
|
|
* function.
|
|
*
|
|
* @param Steps Filtering steps array.
|
|
* @param Vars Variables object.
|
|
* @param UsedFracMap The map of used fractional delay filters.
|
|
* @param ScanlineCount Scanline count.
|
|
*/
|
|
|
|
static int calcComplexity( const CFilterSteps& Steps,
|
|
const CImageResizerVars& Vars, const CBuffer< uint8_t >& UsedFracMap,
|
|
const int ScanlineCount )
|
|
{
|
|
int fcnum; // Filter complexity multiplier numerator.
|
|
int fcdenom; // Filter complexity multiplier denominator.
|
|
|
|
if( Vars.packmode != 0 )
|
|
{
|
|
fcnum = 1;
|
|
fcdenom = 1;
|
|
}
|
|
else
|
|
{
|
|
// In interleaved processing mode, filters require 1 less
|
|
// multiplication per 2 multiply-add instructions.
|
|
|
|
fcnum = 3;
|
|
fcdenom = 4;
|
|
}
|
|
|
|
int s = 0; // Complexity per one scanline.
|
|
int s2 = 0; // Complexity per all scanlines.
|
|
int i;
|
|
|
|
for( i = 0; i < Steps.getItemCount(); i++ )
|
|
{
|
|
const CFilterStep& fs = Steps[ i ];
|
|
|
|
s2 += 65 * fs.Flt.getCapacity(); // Filter creation complexity.
|
|
|
|
if( fs.IsUpsample )
|
|
{
|
|
if( fs.FltOrig.getCapacity() > 0 )
|
|
{
|
|
continue;
|
|
}
|
|
|
|
s += ( fs.Flt.getCapacity() *
|
|
( fs.InPrefix + fs.InLen + fs.InSuffix ) +
|
|
fs.SuffixDC.getCapacity() + fs.PrefixDC.getCapacity() ) *
|
|
Vars.ElCount;
|
|
}
|
|
else
|
|
if( fs.ResampleFactor == 0 )
|
|
{
|
|
s += fs.FltBank -> getFilterLen() *
|
|
( fs.FltBank -> getOrder() + Vars.ElCount ) * fs.OutLen;
|
|
|
|
s2 += fs.FltBank -> calcInitComplexity( UsedFracMap );
|
|
}
|
|
else
|
|
{
|
|
s += fs.Flt.getCapacity() * Vars.ElCount * fs.OutLen *
|
|
fcnum / fcdenom;
|
|
}
|
|
}
|
|
|
|
return( s + s2 / ScanlineCount );
|
|
}
|
|
|
|
/**
|
|
* @brief Thread-isolated data used for scanline processing.
|
|
*
|
|
* This structure holds data necessary for image's horizontal or vertical
|
|
* scanline processing, including scanline processing queue.
|
|
*
|
|
* @tparam Tin Source element data type. Intermediate buffers store data
|
|
* in floating point format.
|
|
* @tparam Tout Destination element data type. Intermediate buffers store
|
|
* data in floating point format.
|
|
*/
|
|
|
|
template< class Tin, class Tout >
|
|
class CThreadData : public CImageResizerThreadPool :: CWorkload
|
|
{
|
|
public:
|
|
virtual void process()
|
|
{
|
|
processScanlineQueue();
|
|
}
|
|
|
|
/**
|
|
* This enumeration lists possible scanline operations.
|
|
*/
|
|
|
|
enum EScanlineOperation
|
|
{
|
|
sopResizeH, ///< Resize horizontal scanline.
|
|
///<
|
|
sopResizeV, ///< Resize vertical scanline.
|
|
///<
|
|
sopDitherAndUnpackH, ///< Dither and unpack horizontal scanline.
|
|
///<
|
|
sopUnpackH ///< Unpack horizontal scanline.
|
|
///<
|
|
};
|
|
|
|
/**
|
|
* Function initializes *this thread data object and assigns certain
|
|
* variables provided by the higher level code.
|
|
*
|
|
* @param aThreadIndex Index of this thread data (0-based).
|
|
* @param aThreadCount Total number of threads used during processing.
|
|
* @param aSteps Filtering steps.
|
|
* @param aVars Image resizer variables.
|
|
*/
|
|
|
|
void init( const int aThreadIndex, const int aThreadCount,
|
|
const CFilterSteps& aSteps, const CImageResizerVars& aVars )
|
|
{
|
|
ThreadIndex = aThreadIndex;
|
|
ThreadCount = aThreadCount;
|
|
Steps = &aSteps;
|
|
Vars = &aVars;
|
|
}
|
|
|
|
/**
|
|
* Function initializes scanline processing queue, and updates
|
|
* capacities of intermediate buffers.
|
|
*
|
|
* @param aOp Operation to perform over scanline.
|
|
* @param TotalLines The total number of scanlines that will be
|
|
* processed by all threads.
|
|
* @param aSrcLen Source scanline length in pixels.
|
|
* @param aSrcIncr Source scanline buffer increment. Ignored in
|
|
* horizontal scanline processing.
|
|
* @param aResIncr Resulting scanline buffer increment. Ignored in
|
|
* horizontal scanline processing.
|
|
*/
|
|
|
|
void initScanlineQueue( const EScanlineOperation aOp,
|
|
const int TotalLines, const int aSrcLen, const int aSrcIncr = 0,
|
|
const int aResIncr = 0 )
|
|
{
|
|
const int l = Vars -> BufLen[ 0 ] + Vars -> BufLen[ 1 ];
|
|
|
|
if( Bufs.getCapacity() < l )
|
|
{
|
|
Bufs.alloc( l, fpclass :: fpalign );
|
|
}
|
|
|
|
BufPtrs[ 0 ] = Bufs + Vars -> BufOffs[ 0 ];
|
|
BufPtrs[ 1 ] = Bufs + Vars -> BufLen[ 0 ] + Vars -> BufOffs[ 1 ];
|
|
|
|
int j;
|
|
int ml = 0;
|
|
|
|
for( j = 0; j < Steps -> getItemCount(); j++ )
|
|
{
|
|
const CFilterStep& fs = (*Steps)[ j ];
|
|
|
|
if( fs.ResampleFactor == 0 &&
|
|
ml < fs.FltBank -> getFilterLen() )
|
|
{
|
|
ml = fs.FltBank -> getFilterLen();
|
|
}
|
|
}
|
|
|
|
TmpFltBuf.alloc( ml, fpclass :: fpalign );
|
|
ScanlineOp = aOp;
|
|
SrcLen = aSrcLen;
|
|
SrcIncr = aSrcIncr;
|
|
ResIncr = aResIncr;
|
|
QueueLen = 0;
|
|
Queue.increaseCapacity(( TotalLines + ThreadCount - 1 ) /
|
|
ThreadCount, false );
|
|
}
|
|
|
|
/**
|
|
* Function adds a scanline to the queue buffer. The
|
|
* initScanlineQueue() function should be called before calling this
|
|
* function. The number of calls to this add function should not
|
|
* exceed the TotalLines spread over all threads.
|
|
*
|
|
* @param SrcBuf Source scanline buffer.
|
|
* @param ResBuf Resulting scanline buffer.
|
|
*/
|
|
|
|
void addScanlineToQueue( void* const SrcBuf, void* const ResBuf )
|
|
{
|
|
Queue[ QueueLen ].SrcBuf = SrcBuf;
|
|
Queue[ QueueLen ].ResBuf = ResBuf;
|
|
QueueLen++;
|
|
}
|
|
|
|
/**
|
|
* Function processes all queued scanlines.
|
|
*/
|
|
|
|
void processScanlineQueue()
|
|
{
|
|
int i;
|
|
|
|
switch( ScanlineOp )
|
|
{
|
|
case sopResizeH:
|
|
{
|
|
for( i = 0; i < QueueLen; i++ )
|
|
{
|
|
resizeScanlineH( (Tin*) Queue[ i ].SrcBuf,
|
|
(fptype*) Queue[ i ].ResBuf );
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
case sopResizeV:
|
|
{
|
|
for( i = 0; i < QueueLen; i++ )
|
|
{
|
|
resizeScanlineV( (fptype*) Queue[ i ].SrcBuf,
|
|
(fptype*) Queue[ i ].ResBuf );
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
case sopDitherAndUnpackH:
|
|
{
|
|
if( Vars -> UseSRGBGamma )
|
|
{
|
|
for( i = 0; i < QueueLen; i++ )
|
|
{
|
|
CFilterStep :: applySRGBGamma(
|
|
(fptype*) Queue[ i ].SrcBuf, SrcLen, *Vars );
|
|
|
|
Ditherer.dither( (fptype*) Queue[ i ].SrcBuf );
|
|
|
|
CFilterStep :: unpackScanline(
|
|
(fptype*) Queue[ i ].SrcBuf,
|
|
(Tout*) Queue[ i ].ResBuf, SrcLen, *Vars );
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for( i = 0; i < QueueLen; i++ )
|
|
{
|
|
Ditherer.dither( (fptype*) Queue[ i ].SrcBuf );
|
|
|
|
CFilterStep :: unpackScanline(
|
|
(fptype*) Queue[ i ].SrcBuf,
|
|
(Tout*) Queue[ i ].ResBuf, SrcLen, *Vars );
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
case sopUnpackH:
|
|
{
|
|
if( Vars -> UseSRGBGamma )
|
|
{
|
|
for( i = 0; i < QueueLen; i++ )
|
|
{
|
|
CFilterStep :: applySRGBGamma(
|
|
(fptype*) Queue[ i ].SrcBuf, SrcLen, *Vars );
|
|
|
|
CFilterStep :: unpackScanline(
|
|
(fptype*) Queue[ i ].SrcBuf,
|
|
(Tout*) Queue[ i ].ResBuf, SrcLen, *Vars );
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for( i = 0; i < QueueLen; i++ )
|
|
{
|
|
CFilterStep :: unpackScanline(
|
|
(fptype*) Queue[ i ].SrcBuf,
|
|
(Tout*) Queue[ i ].ResBuf, SrcLen, *Vars );
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function returns ditherer object associated with *this thread data
|
|
* object.
|
|
*/
|
|
|
|
CDitherer& getDitherer()
|
|
{
|
|
return( Ditherer );
|
|
}
|
|
|
|
private:
|
|
int ThreadIndex; ///< Thread index.
|
|
///<
|
|
int ThreadCount; ///< Thread count.
|
|
///<
|
|
const CFilterSteps* Steps; ///< Filtering steps.
|
|
///<
|
|
const CImageResizerVars* Vars; ///< Image resizer variables.
|
|
///<
|
|
CBuffer< fptype > Bufs; ///< Flip-flop intermediate buffers.
|
|
///<
|
|
fptype* BufPtrs[ 3 ]; ///< Flip-flop buffer pointers (referenced by
|
|
///< filtering step's InBuf and OutBuf indices).
|
|
///<
|
|
CBuffer< fptype > TmpFltBuf; ///< Temporary buffer used in the
|
|
///< doResize() function, aligned by fpclass :: fpalign.
|
|
///<
|
|
EScanlineOperation ScanlineOp; ///< Operation to perform over
|
|
///< scanline.
|
|
///<
|
|
int SrcLen; ///< Source scanline length in the last queue.
|
|
///<
|
|
int SrcIncr; ///< Source scanline buffer increment in the last queue.
|
|
///<
|
|
int ResIncr; ///< Resulting scanline buffer increment in the last
|
|
///< queue.
|
|
///<
|
|
CDitherer Ditherer; ///< Ditherer object to use.
|
|
///<
|
|
|
|
/**
|
|
* @brief Scanline processing queue item.
|
|
*
|
|
* Scanline processing queue item.
|
|
*/
|
|
|
|
struct CQueueItem
|
|
{
|
|
void* SrcBuf; ///< Source scanline buffer, will by typecasted to
|
|
///< Tin or fptype*.
|
|
///<
|
|
void* ResBuf; ///< Resulting scanline buffer, will by typecasted
|
|
///< to Tout or fptype*.
|
|
///<
|
|
};
|
|
|
|
CBuffer< CQueueItem > Queue; ///< Scanline processing queue.
|
|
///<
|
|
int QueueLen; ///< Queue length.
|
|
///<
|
|
|
|
/**
|
|
* Function resizes a single horizontal scanline.
|
|
*
|
|
* @param SrcBuf Source scanline buffer. Can be either horizontal or
|
|
* vertical.
|
|
* @param ResBuf Resulting scanline buffer.
|
|
*/
|
|
|
|
void resizeScanlineH( const Tin* const SrcBuf, fptype* const ResBuf )
|
|
{
|
|
(*Steps)[ 0 ].packScanline( SrcBuf, BufPtrs[ 0 ], SrcLen );
|
|
BufPtrs[ 2 ] = ResBuf;
|
|
int j;
|
|
|
|
for( j = 0; j < Steps -> getItemCount(); j++ )
|
|
{
|
|
const CFilterStep& fs = (*Steps)[ j ];
|
|
fs.prepareInBuf( BufPtrs[ fs.InBuf ]);
|
|
const int DstIncr =
|
|
( Vars -> packmode == 0 ? Vars -> ElCount : 1 );
|
|
|
|
if( fs.ResampleFactor != 0 )
|
|
{
|
|
if( fs.IsUpsample )
|
|
{
|
|
fs.doUpsample( BufPtrs[ fs.InBuf ],
|
|
BufPtrs[ fs.OutBuf ]);
|
|
}
|
|
else
|
|
{
|
|
fs.doFilter( BufPtrs[ fs.InBuf ],
|
|
BufPtrs[ fs.OutBuf ], DstIncr );
|
|
}
|
|
}
|
|
else
|
|
{
|
|
fs.doResize( BufPtrs[ fs.InBuf ], BufPtrs[ fs.OutBuf ],
|
|
DstIncr, TmpFltBuf );
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function resizes a single vertical scanline.
|
|
*
|
|
* @param SrcBuf Source scanline buffer. Can be either horizontal or
|
|
* vertical.
|
|
* @param ResBuf Resulting scanline buffer.
|
|
*/
|
|
|
|
void resizeScanlineV( const fptype* const SrcBuf,
|
|
fptype* const ResBuf )
|
|
{
|
|
(*Steps)[ 0 ].convertVtoH( SrcBuf, BufPtrs[ 0 ], SrcLen,
|
|
SrcIncr );
|
|
|
|
BufPtrs[ 2 ] = ResBuf;
|
|
int j;
|
|
|
|
for( j = 0; j < Steps -> getItemCount(); j++ )
|
|
{
|
|
const CFilterStep& fs = (*Steps)[ j ];
|
|
fs.prepareInBuf( BufPtrs[ fs.InBuf ]);
|
|
const int DstIncr = ( fs.OutBuf == 2 ? ResIncr :
|
|
( Vars -> packmode == 0 ? Vars -> ElCount : 1 ));
|
|
|
|
if( fs.ResampleFactor != 0 )
|
|
{
|
|
if( fs.IsUpsample )
|
|
{
|
|
fs.doUpsample( BufPtrs[ fs.InBuf ],
|
|
BufPtrs[ fs.OutBuf ]);
|
|
}
|
|
else
|
|
{
|
|
fs.doFilter( BufPtrs[ fs.InBuf ],
|
|
BufPtrs[ fs.OutBuf ], DstIncr );
|
|
}
|
|
}
|
|
else
|
|
{
|
|
fs.doResize( BufPtrs[ fs.InBuf ], BufPtrs[ fs.OutBuf ],
|
|
DstIncr, TmpFltBuf );
|
|
}
|
|
}
|
|
}
|
|
};
|
|
};
|
|
|
|
#undef AVIR_PI
|
|
#undef AVIR_PId2
|
|
|
|
} // namespace avir
|
|
|
|
#endif // AVIR_CIMAGERESIZER_INCLUDED
|