AVIR
High-quality pro image resizing library
 All Classes Files Functions Variables Typedefs Macros
avir.h
Go to the documentation of this file.
1 //$ nobt
2 //$ nocpp
3 
54 #ifndef AVIR_CIMAGERESIZER_INCLUDED
55 #define AVIR_CIMAGERESIZER_INCLUDED
56 
57 #include <stdint.h>
58 #include <stdlib.h>
59 #include <string.h>
60 #include <math.h>
61 
62 namespace avir {
63 
68 #define AVIR_VERSION "3.0"
69 
75 #define AVIR_PI 3.1415926535897932
76 
82 #define AVIR_PId2 1.5707963267948966
83 
90 #define AVIR_NOCTOR( ClassName ) \
91  private: \
92  ClassName( const ClassName& ) { } \
93  ClassName& operator = ( const ClassName& ) { return( *this ); }
94 
103 template< class T >
104 inline T round( const T d )
105 {
106  return( d < (T) 0 ? -(T) (int) ( (T) 0.5 - d ) :
107  (T) (int) ( d + (T) 0.5 ));
108 }
109 
120 template< class T >
121 inline T clamp( const T& Value, const T minv, const T maxv )
122 {
123  if( Value < minv )
124  {
125  return( minv );
126  }
127  else
128  if( Value > maxv )
129  {
130  return( maxv );
131  }
132  else
133  {
134  return( Value );
135  }
136 }
137 
145 template< class T >
146 inline T pow24_sRGB( const T x )
147 {
148  const double x2 = (double) x * x;
149  const double x3 = x2 * x;
150  const double x4 = x2 * x2;
151 
152  return( (T) ( 0.0985766365536824 + 0.839474952656502 * x2 +
153  0.363287814061725 * x3 - 0.0125559718896615 /
154  ( 0.12758338921578 + 0.290283465468235 * x ) -
155  0.231757513261358 * x - 0.0395365717969074 * x4 ));
156 }
157 
165 template< class T >
166 inline T pow24i_sRGB( const T x )
167 {
168  const double sx = sqrt( (double) x );
169  const double ssx = sqrt( sx );
170  const double sssx = sqrt( ssx );
171 
172  return( (T) ( 0.000213364515060263 + 0.0149409239419218 * x +
173  0.433973412731747 * sx + ssx * ( 0.659628181609715 * sssx -
174  0.0380957908841466 - 0.0706476137208521 * sx )));
175 }
176 
184 template< class T >
185 inline T convertSRGB2Lin( const T s )
186 {
187  const T a = (T) 0.055;
188 
189  if( s <= (T) 0.04045 )
190  {
191  return( s / (T) 12.92 );
192  }
193 
194  return( pow24_sRGB(( s + a ) / ( (T) 1 + a )));
195 }
196 
204 template< class T >
205 inline T convertLin2SRGB( const T s )
206 {
207  const T a = (T) 0.055;
208 
209  if( s <= (T) 0.0031308 )
210  {
211  return( (T) 12.92 * s );
212  }
213 
214  return(( (T) 1 + a ) * pow24i_sRGB( s ) - a );
215 }
216 
230 template< class T1, class T2 >
231 inline void copyArray( const T1* ip, T2* op, int l,
232  const int ipinc = 1, const int opinc = 1 )
233 {
234  while( l > 0 )
235  {
236  *op = (T2) *ip;
237  op += opinc;
238  ip += ipinc;
239  l--;
240  }
241 }
242 
253 template< class T1, class T2 >
254 inline void addArray( const T1* ip, T2* op, int l,
255  const int ipinc = 1, const int opinc = 1 )
256 {
257  while( l > 0 )
258  {
259  *op += *ip;
260  op += opinc;
261  ip += ipinc;
262  l--;
263  }
264 }
265 
280 template< class T1, class T2 >
281 inline void replicateArray( const T1* const ip, const int ipl, T2* op, int l,
282  const int opinc )
283 {
284  if( ipl == 1 )
285  {
286  while( l > 0 )
287  {
288  op[ 0 ] = (T2) ip[ 0 ];
289  op += opinc;
290  l--;
291  }
292  }
293  else
294  if( ipl == 4 )
295  {
296  while( l > 0 )
297  {
298  op[ 0 ] = (T2) ip[ 0 ];
299  op[ 1 ] = (T2) ip[ 1 ];
300  op[ 2 ] = (T2) ip[ 2 ];
301  op[ 3 ] = (T2) ip[ 3 ];
302  op += opinc;
303  l--;
304  }
305  }
306  else
307  if( ipl == 3 )
308  {
309  while( l > 0 )
310  {
311  op[ 0 ] = (T2) ip[ 0 ];
312  op[ 1 ] = (T2) ip[ 1 ];
313  op[ 2 ] = (T2) ip[ 2 ];
314  op += opinc;
315  l--;
316  }
317  }
318  else
319  if( ipl == 2 )
320  {
321  while( l > 0 )
322  {
323  op[ 0 ] = (T2) ip[ 0 ];
324  op[ 1 ] = (T2) ip[ 1 ];
325  op += opinc;
326  l--;
327  }
328  }
329  else
330  {
331  while( l > 0 )
332  {
333  int i;
334 
335  for( i = 0; i < ipl; i++ )
336  {
337  op[ i ] = (T2) ip[ i ];
338  }
339 
340  op += opinc;
341  l--;
342  }
343  }
344 }
345 
360 template< class T >
361 inline void calcFIRFilterResponse( const T* flt, int fltlen,
362  const double th, double& re0, double& im0, const int fltlat = 0 )
363 {
364  const double sincr = 2.0 * cos( th );
365  double cvalue1;
366  double svalue1;
367 
368  if( fltlat == 0 )
369  {
370  cvalue1 = 1.0;
371  svalue1 = 0.0;
372  }
373  else
374  {
375  cvalue1 = cos( -fltlat * th );
376  svalue1 = sin( -fltlat * th );
377  }
378 
379  double cvalue2 = cos( -( fltlat + 1 ) * th );
380  double svalue2 = sin( -( fltlat + 1 ) * th );
381 
382  double re = 0.0;
383  double im = 0.0;
384 
385  while( fltlen > 0 )
386  {
387  re += cvalue1 * flt[ 0 ];
388  im += svalue1 * flt[ 0 ];
389  flt++;
390  fltlen--;
391 
392  double tmp = cvalue1;
393  cvalue1 = sincr * cvalue1 - cvalue2;
394  cvalue2 = tmp;
395 
396  tmp = svalue1;
397  svalue1 = sincr * svalue1 - svalue2;
398  svalue2 = tmp;
399  }
400 
401  re0 = re;
402  im0 = im;
403 }
404 
415 template< class T >
416 inline void normalizeFIRFilter( T* const p, const int l, const double DCGain,
417  const int pstep = 1 )
418 {
419  double s = 0.0;
420  T* pp = p;
421  int i = l;
422 
423  while( i > 0 )
424  {
425  s += *pp;
426  pp += pstep;
427  i--;
428  }
429 
430  s = DCGain / s;
431  pp = p;
432  i = l;
433 
434  while( i > 0 )
435  {
436  *pp = (T) ( *pp * s );
437  pp += pstep;
438  i--;
439  }
440 }
441 
461 template< class T, typename capint = int >
462 class CBuffer
463 {
464 public:
465  CBuffer()
466  : Data( NULL )
467  , DataAligned( NULL )
468  , Capacity( 0 )
469  , Alignment( 0 )
470  {
471  }
472 
481  CBuffer( const capint aCapacity, const int aAlignment = 0 )
482  {
483  allocinit( aCapacity, aAlignment );
484  }
485 
486  CBuffer( const CBuffer& Source )
487  {
488  allocinit( Source.Capacity, Source.Alignment );
489 
490  if( Capacity > 0 )
491  {
492  memcpy( DataAligned, Source.DataAligned, Capacity * sizeof( T ));
493  }
494  }
495 
496  ~CBuffer()
497  {
498  freeData();
499  }
500 
501  CBuffer& operator = ( const CBuffer& Source )
502  {
503  alloc( Source.Capacity, Source.Alignment );
504 
505  if( Capacity > 0 )
506  {
507  memcpy( DataAligned, Source.DataAligned, Capacity * sizeof( T ));
508  }
509 
510  return( *this );
511  }
512 
522  void alloc( const capint aCapacity, const int aAlignment = 0 )
523  {
524  freeData();
525  allocinit( aCapacity, aAlignment );
526  }
527 
532  void free()
533  {
534  freeData();
535  Data = NULL;
536  DataAligned = NULL;
537  Capacity = 0;
538  Alignment = 0;
539  }
540 
545  capint getCapacity() const
546  {
547  return( Capacity );
548  }
549 
560  void forceCapacity( const capint NewCapacity )
561  {
562  Capacity = NewCapacity;
563  }
564 
574  void increaseCapacity( const capint NewCapacity,
575  const bool DoDataCopy = true )
576  {
577  if( NewCapacity < Capacity )
578  {
579  return;
580  }
581 
582  if( DoDataCopy )
583  {
584  const capint PrevCapacity = Capacity;
585  T* const PrevData = Data;
586  T* const PrevDataAligned = DataAligned;
587 
588  allocinit( NewCapacity, Alignment );
589 
590  if( PrevCapacity > 0 )
591  {
592  memcpy( DataAligned, PrevDataAligned,
593  PrevCapacity * sizeof( T ));
594  }
595 
596  :: free( PrevData );
597  }
598  else
599  {
600  :: free( Data );
601  allocinit( NewCapacity, Alignment );
602  }
603  }
604 
612  void truncateCapacity( const capint NewCapacity )
613  {
614  if( NewCapacity >= Capacity )
615  {
616  return;
617  }
618 
619  Capacity = NewCapacity;
620  }
621 
631  void updateCapacity( const capint ReqCapacity )
632  {
633  if( ReqCapacity <= Capacity )
634  {
635  return;
636  }
637 
638  capint NewCapacity = Capacity;
639 
640  while( NewCapacity < ReqCapacity )
641  {
642  NewCapacity += NewCapacity / 3 + 1;
643  }
644 
645  increaseCapacity( NewCapacity );
646  }
647 
648  operator T* () const
649  {
650  return( DataAligned );
651  }
652 
653 private:
654  T* Data;
655  T* DataAligned;
657  capint Capacity;
659  int Alignment;
661 
673  void allocinit( const capint aCapacity, const int aAlignment )
674  {
675  if( aAlignment == 0 )
676  {
677  Data = (T*) :: malloc( aCapacity * sizeof( T ));
678  DataAligned = Data;
679  Alignment = 0;
680  }
681  else
682  {
683  Data = (T*) :: malloc( aCapacity * sizeof( T ) + aAlignment );
684  DataAligned = alignptr( Data, aAlignment );
685  Alignment = aAlignment;
686  }
687 
688  Capacity = aCapacity;
689  }
690 
695  void freeData()
696  {
697  :: free( Data );
698  }
699 
711  template< class Tp >
712  inline Tp alignptr( const Tp ptr, const uintptr_t align )
713  {
714  return( (Tp) ( (uintptr_t) ptr + align -
715  ( (uintptr_t) ptr & ( align - 1 ))) );
716  }
717 };
718 
731 template< class T >
733 {
734 public:
735  CStructArray()
736  : ItemCount( 0 )
737  {
738  }
739 
740  CStructArray( const CStructArray& Source )
741  : ItemCount( 0 )
742  , Items( Source.getItemCount() )
743  {
744  while( ItemCount < Source.getItemCount() )
745  {
746  Items[ ItemCount ] = new T( Source[ ItemCount ]);
747  ItemCount++;
748  }
749  }
750 
751  ~CStructArray()
752  {
753  clear();
754  }
755 
756  CStructArray& operator = ( const CStructArray& Source )
757  {
758  clear();
759 
760  const int NewCount = Source.ItemCount;
761  Items.updateCapacity( NewCount );
762 
763  while( ItemCount < NewCount )
764  {
765  Items[ ItemCount ] = new T( Source[ ItemCount ]);
766  ItemCount++;
767  }
768 
769  return( *this );
770  }
771 
772  T& operator []( const int Index )
773  {
774  return( *Items[ Index ]);
775  }
776 
777  const T& operator []( const int Index ) const
778  {
779  return( *Items[ Index ]);
780  }
781 
789  T& add()
790  {
791  if( ItemCount == Items.getCapacity() )
792  {
793  Items.increaseCapacity( ItemCount * 3 / 2 + 1 );
794  }
795 
796  Items[ ItemCount ] = new T();
797  ItemCount++;
798 
799  return( (*this)[ ItemCount - 1 ]);
800  }
801 
810  void setItemCount( const int NewCount )
811  {
812  if( NewCount > ItemCount )
813  {
814  Items.increaseCapacity( NewCount );
815 
816  while( ItemCount < NewCount )
817  {
818  Items[ ItemCount ] = new T();
819  ItemCount++;
820  }
821  }
822  else
823  {
824  while( ItemCount > NewCount )
825  {
826  ItemCount--;
827  delete Items[ ItemCount ];
828  }
829  }
830  }
831 
836  void clear()
837  {
838  while( ItemCount > 0 )
839  {
840  ItemCount--;
841  delete Items[ ItemCount ];
842  }
843  }
844 
849  int getItemCount() const
850  {
851  return( ItemCount );
852  }
853 
854 private:
855  int ItemCount;
856  CBuffer< T* > Items;
858 };
860 
869 class CSineGen
870 {
871 public:
880  CSineGen( const double si, const double ph )
881  : svalue1( sin( ph ))
882  , svalue2( sin( ph - si ))
883  , sincr( 2.0 * cos( si ))
884  {
885  }
886 
891  double generate()
892  {
893  const double res = svalue1;
894 
895  svalue1 = sincr * res - svalue2;
896  svalue2 = res;
897 
898  return( res );
899  }
900 
901 private:
902  double svalue1;
903  double svalue2;
905  double sincr;
907 };
909 
923 {
924 public:
933  CDSPWindowGenPeakedCosine( const double aAlpha, const double aLen2 )
934  : Alpha( aAlpha )
935  , Len2( aLen2 )
936  , Len2i( 1.0 / aLen2 )
937  , wn( 0.0 )
938  , w1( AVIR_PId2 / Len2, AVIR_PI * 0.5 )
939  {
940  }
941 
946  double generate()
947  {
948  const double h = pow( wn * Len2i, Alpha );
949  wn += 1.0;
950 
951  return( w1.generate() * ( 1.0 - h ));
952  }
953 
954 private:
955  double Alpha;
956  double Len2;
958  double Len2i;
960  double wn;
962  CSineGen w1;
965 };
967 
990 {
991 public:
1008  void init( const double SampleRate, const double aFilterLength,
1009  const int aBandCount, const double MinFreq, const double MaxFreq,
1010  const bool IsLogBands, const double WFAlpha )
1011  {
1012  FilterLength = aFilterLength;
1013  BandCount = aBandCount;
1014 
1015  CenterFreqs.alloc( BandCount );
1016 
1017  z = (int) ceil( FilterLength * 0.5 );
1018  zi = z + ( z & 1 );
1019  z2 = z * 2;
1020 
1021  CBuffer< double > oscbuf( z2 );
1022  initOscBuf( oscbuf );
1023 
1024  CBuffer< double > winbuf( z );
1025  initWinBuf( winbuf, WFAlpha );
1026 
1027  UseFirstVirtBand = ( MinFreq > 0.0 );
1028  const int k = zi * ( BandCount + ( UseFirstVirtBand ? 1 : 0 ));
1029  Kernels1.alloc( k );
1030  Kernels2.alloc( k );
1031 
1032  double m; // Frequency step multiplier.
1033  double mo; // Frequency step offset (addition).
1034 
1035  if( IsLogBands )
1036  {
1037  m = exp( log( MaxFreq / MinFreq ) / ( BandCount - 1 ));
1038  mo = 0.0;
1039  }
1040  else
1041  {
1042  m = 1.0;
1043  mo = ( MaxFreq - MinFreq ) / ( BandCount - 1 );
1044  }
1045 
1046  double f = MinFreq;
1047  double x1 = 0.0;
1048  double x2;
1049  int si;
1050 
1051  if( UseFirstVirtBand )
1052  {
1053  si = 0;
1054  }
1055  else
1056  {
1057  si = 1;
1058  CenterFreqs[ 0 ] = 0.0;
1059  f = f * m + mo;
1060  }
1061 
1062  double* kernbuf1 = &Kernels1[ 0 ];
1063  double* kernbuf2 = &Kernels2[ 0 ];
1064  int i;
1065 
1066  for( i = si; i < BandCount; i++ )
1067  {
1068  x2 = f * 2.0 / SampleRate;
1069  CenterFreqs[ i ] = x2;
1070 
1071  fillBandKernel( x1, x2, kernbuf1, kernbuf2, oscbuf, winbuf );
1072 
1073  kernbuf1 += zi;
1074  kernbuf2 += zi;
1075  x1 = x2;
1076  f = f * m + mo;
1077  }
1078 
1079  if( x1 < 1.0 )
1080  {
1081  UseLastVirtBand = true;
1082  fillBandKernel( x1, 1.0, kernbuf1, kernbuf2, oscbuf, winbuf );
1083  }
1084  else
1085  {
1086  UseLastVirtBand = false;
1087  }
1088  }
1089 
1094  int getFilterLength() const
1095  {
1096  return( z2 - 1 );
1097  }
1098 
1103  int getFilterLatency() const
1104  {
1105  return( z - 1 );
1106  }
1107 
1117  void buildFilter( const double* const BandGains, double* const Filter )
1118  {
1119  const double* kernbuf1 = &Kernels1[ 0 ];
1120  const double* kernbuf2 = &Kernels2[ 0 ];
1121  double x1 = 0.0;
1122  double y1 = BandGains[ 0 ];
1123  double x2;
1124  double y2;
1125 
1126  int i;
1127  int si;
1128 
1129  if( UseFirstVirtBand )
1130  {
1131  si = 1;
1132  x2 = CenterFreqs[ 0 ];
1133  y2 = y1;
1134  }
1135  else
1136  {
1137  si = 2;
1138  x2 = CenterFreqs[ 1 ];
1139  y2 = BandGains[ 1 ];
1140  }
1141 
1142  copyBandKernel( Filter, kernbuf1, kernbuf2, y1 - y2,
1143  x1 * y2 - x2 * y1 );
1144 
1145  kernbuf1 += zi;
1146  kernbuf2 += zi;
1147  x1 = x2;
1148  y1 = y2;
1149 
1150  for( i = si; i < BandCount; i++ )
1151  {
1152  x2 = CenterFreqs[ i ];
1153  y2 = BandGains[ i ];
1154 
1155  addBandKernel( Filter, kernbuf1, kernbuf2, y1 - y2,
1156  x1 * y2 - x2 * y1 );
1157 
1158  kernbuf1 += zi;
1159  kernbuf2 += zi;
1160  x1 = x2;
1161  y1 = y2;
1162  }
1163 
1164  if( UseLastVirtBand )
1165  {
1166  addBandKernel( Filter, kernbuf1, kernbuf2, y1 - y2,
1167  x1 * y2 - y1 );
1168  }
1169 
1170  for( i = 0; i < z - 1; i++ )
1171  {
1172  Filter[ z + i ] = Filter[ z - 2 - i ];
1173  }
1174  }
1175 
1186  static int calcFilterLength( const double aFilterLength, int& Latency )
1187  {
1188  const int l = (int) ceil( aFilterLength * 0.5 );
1189  Latency = l - 1;
1190 
1191  return( l * 2 - 1 );
1192  }
1193 
1194 private:
1195  double FilterLength;
1196  int z;
1198  int zi;
1200  int z2;
1204  int BandCount;
1206  CBuffer< double > CenterFreqs;
1208  CBuffer< double > Kernels1;
1211  CBuffer< double > Kernels2;
1214  bool UseFirstVirtBand;
1217  bool UseLastVirtBand;
1221 
1232  void initOscBuf( double* oscbuf ) const
1233  {
1234  int i = z;
1235 
1236  while( i > 0 )
1237  {
1238  oscbuf[ 0 ] = 0.0;
1239  oscbuf[ 1 ] = 1.0;
1240  oscbuf += 2;
1241  i--;
1242  }
1243  }
1244 
1253  void initWinBuf( double* winbuf, const double Alpha ) const
1254  {
1255  CDSPWindowGenPeakedCosine wf( Alpha, FilterLength * 0.5 );
1256  int i;
1257 
1258  for( i = 1; i <= z; i++ )
1259  {
1260  winbuf[ z - i ] = wf.generate();
1261  }
1262  }
1263 
1280  void fillBandKernel( const double x1, const double x2, double* kernbuf1,
1281  double* kernbuf2, double* oscbuf, const double* const winbuf )
1282  {
1283  const double s2_incr = AVIR_PI * x2;
1284  const double s2_coeff = 2.0 * cos( s2_incr );
1285 
1286  double s2_value1 = sin( s2_incr * ( -z + 1 ));
1287  double c2_value1 = sin( s2_incr * ( -z + 1 ) + AVIR_PI * 0.5 );
1288  oscbuf[ 0 ] = sin( s2_incr * -z );
1289  oscbuf[ 1 ] = sin( s2_incr * -z + AVIR_PI * 0.5 );
1290 
1291  int ks;
1292 
1293  for( ks = 1; ks < z; ks++ )
1294  {
1295  const int ks2 = ks * 2;
1296  const double s1_value1 = oscbuf[ ks2 ];
1297  const double c1_value1 = oscbuf[ ks2 + 1 ];
1298  oscbuf[ ks2 ] = s2_value1;
1299  oscbuf[ ks2 + 1 ] = c2_value1;
1300 
1301  const double x = AVIR_PI * ( ks - z );
1302  const double v0 = winbuf[ ks - 1 ] / (( x1 - x2 ) * x );
1303 
1304  kernbuf1[ ks - 1 ] = ( x2 * s2_value1 - x1 * s1_value1 +
1305  ( c2_value1 - c1_value1 ) / x ) * v0;
1306 
1307  kernbuf2[ ks - 1 ] = ( s2_value1 - s1_value1 ) * v0;
1308 
1309  s2_value1 = s2_coeff * s2_value1 - oscbuf[ ks2 - 2 ];
1310  c2_value1 = s2_coeff * c2_value1 - oscbuf[ ks2 - 1 ];
1311  }
1312 
1313  kernbuf1[ z - 1 ] = ( x2 * x2 - x1 * x1 ) / ( x1 - x2 ) * 0.5;
1314  kernbuf2[ z - 1 ] = -1.0;
1315  }
1316 
1327  void copyBandKernel( double* outbuf, const double* const kernbuf1,
1328  const double* const kernbuf2, const double c, const double d ) const
1329  {
1330  int ks;
1331 
1332  for( ks = 0; ks < z; ks++ )
1333  {
1334  outbuf[ ks ] = c * kernbuf1[ ks ] + d * kernbuf2[ ks ];
1335  }
1336  }
1337 
1348  void addBandKernel( double* outbuf, const double* const kernbuf1,
1349  const double* const kernbuf2, const double c, const double d ) const
1350  {
1351  int ks;
1352 
1353  for( ks = 0; ks < z; ks++ )
1354  {
1355  outbuf[ ks ] += c * kernbuf1[ ks ] + d * kernbuf2[ ks ];
1356  }
1357  }
1358 };
1359 
1369 {
1370 public:
1371  int fl2;
1372  int FilterLen;
1375 
1386  CDSPPeakedCosineLPF( const double aLen2, const double aFreq2,
1387  const double aAlpha )
1388  : fl2( (int) ceil( aLen2 ) - 1 )
1389  , FilterLen( fl2 + fl2 + 1 )
1390  , Len2( aLen2 )
1391  , Freq2( aFreq2 )
1392  , Alpha( aAlpha )
1393  {
1394  }
1395 
1406  template< class T >
1407  void generateLPF( T* op, const double DCGain )
1408  {
1409  CDSPWindowGenPeakedCosine wf( Alpha, Len2 );
1410  CSineGen f2( Freq2, 0.0 );
1411 
1412  op += fl2;
1413  T* op2 = op;
1414  f2.generate();
1415 
1416  if( DCGain > 0.0 )
1417  {
1418  int t = 1;
1419 
1420  *op = (T) ( Freq2 * wf.generate() );
1421  double s = *op;
1422 
1423  while( t <= fl2 )
1424  {
1425  const T v = (T) ( f2.generate() * wf.generate() / t );
1426  op++;
1427  op2--;
1428  *op = v;
1429  *op2 = v;
1430  s += v + v;
1431  t++;
1432  }
1433 
1434  t = FilterLen;
1435  s = DCGain / s;
1436 
1437  while( t > 0 )
1438  {
1439  *op2 = (T) ( *op2 * s );
1440  op2++;
1441  t--;
1442  }
1443  }
1444  else
1445  {
1446  int t = 1;
1447 
1448  *op = (T) ( Freq2 * wf.generate() );
1449 
1450  while( t <= fl2 )
1451  {
1452  const T v = (T) ( f2.generate() * wf.generate() / t );
1453  op++;
1454  op2--;
1455  *op = v;
1456  *op2 = v;
1457  t++;
1458  }
1459  }
1460  }
1461 
1462 private:
1463  double Len2;
1464  double Freq2;
1467  double Alpha;
1469 };
1471 
1481 class CFltBuffer : public CBuffer< double >
1482 {
1483 public:
1484  double Len2;
1485  double Freq;
1488  double Alpha;
1490  double DCGain;
1492 
1494  CFltBuffer()
1495  : CBuffer< double >()
1496  , Len2( 0.0 )
1497  , Freq( 0.0 )
1498  , Alpha( 0.0 )
1499  , DCGain( 0.0 )
1500  {
1501  }
1502 
1508  bool operator == ( const CFltBuffer& b2 ) const
1509  {
1510  return( Len2 == b2.Len2 && Freq == b2.Freq && Alpha == b2.Alpha &&
1511  DCGain == b2.DCGain );
1512  }
1513 };
1514 
1531 template< class fptype >
1533 {
1534  AVIR_NOCTOR( CDSPFracFilterBankLin );
1535 
1536 public:
1538  : Order( -1 )
1539  {
1540  }
1541 
1552  {
1553  WFLen2 = s.WFLen2;
1554  WFFreq = s.WFFreq;
1555  WFAlpha = s.WFAlpha;
1556  FracCount = s.FracCount;
1557  Order = s.Order;
1558  Alignment = s.Alignment;
1559  SrcFilterLen = s.SrcFilterLen;
1560  FilterLen = s.FilterLen;
1561  FilterSize = s.FilterSize;
1562  IsSrcTableBuilt = false;
1563  ExtFilter = s.ExtFilter;
1564  TableFillFlags.alloc( s.TableFillFlags.getCapacity() );
1565  int i;
1566 
1567  // Copy table fill flags, but shifted so that further initialization
1568  // is still possible (such feature should not be used, though).
1569 
1570  for( i = 0; i < TableFillFlags.getCapacity(); i++ )
1571  {
1572  TableFillFlags[ i ] = (uint8_t) ( s.TableFillFlags[ i ] << 2 );
1573  }
1574  }
1575 
1585  bool operator == ( const CDSPFracFilterBankLin& s ) const
1586  {
1587  return( Order == s.Order && WFLen2 == s.WFLen2 &&
1588  WFFreq == s.WFFreq && WFAlpha == s.WFAlpha &&
1589  FracCount == s.FracCount && ExtFilter == s.ExtFilter );
1590  }
1591 
1613  void init( const int ReqFracCount, const int ReqOrder,
1614  const double BaseLen, const double Cutoff, const double aWFAlpha,
1615  const CFltBuffer& aExtFilter, const int aAlignment = 0,
1616  const int FltLenAlign = 1 )
1617  {
1618  double NewWFLen2 = 0.5 * BaseLen * ReqFracCount;
1619  double NewWFFreq = AVIR_PI * Cutoff / ReqFracCount;
1620  double NewWFAlpha = aWFAlpha;
1621 
1622  if( ReqOrder == Order && NewWFLen2 == WFLen2 && NewWFFreq == WFFreq &&
1623  NewWFAlpha == WFAlpha && ReqFracCount == FracCount &&
1624  aExtFilter == ExtFilter )
1625  {
1626  IsInitRequired = false;
1627  return;
1628  }
1629 
1630  WFLen2 = NewWFLen2;
1631  WFFreq = NewWFFreq;
1632  WFAlpha = NewWFAlpha;
1633  FracCount = ReqFracCount;
1634  Order = ReqOrder;
1635  Alignment = aAlignment;
1636  ExtFilter = aExtFilter;
1637 
1638  CDSPPeakedCosineLPF p( WFLen2, WFFreq, WFAlpha );
1639  SrcFilterLen = ( p.fl2 / ReqFracCount + 1 ) * 2;
1640 
1641  const int ElementSize = ReqOrder + 1;
1642  FilterLen = SrcFilterLen;
1643 
1644  if( ExtFilter.getCapacity() > 0 )
1645  {
1646  FilterLen += ExtFilter.getCapacity() - 1;
1647  }
1648 
1649  FilterLen = ( FilterLen + FltLenAlign - 1 ) & ~( FltLenAlign - 1 );
1650  FilterSize = FilterLen * ElementSize;
1651  IsSrcTableBuilt = false;
1652  IsInitRequired = true;
1653  }
1654 
1660  int getFilterLen() const
1661  {
1662  return( FilterLen );
1663  }
1664 
1669  int getFracCount() const
1670  {
1671  return( FracCount );
1672  }
1673 
1678  int getOrder() const
1679  {
1680  return( Order );
1681  }
1682 
1694  const fptype* getFilter( const int i )
1695  {
1696  if( !IsSrcTableBuilt )
1697  {
1698  buildSrcTable();
1699  }
1700 
1701  fptype* const Res = &Table[ i * FilterSize ];
1702 
1703  if(( TableFillFlags[ i ] & 2 ) == 0 )
1704  {
1705  createFilter( i );
1706  TableFillFlags[ i ] |= 2;
1707 
1708  if( Order > 0 )
1709  {
1710  createFilter( i + 1 );
1711  const fptype* const Res2 = Res + FilterSize;
1712  fptype* const op = Res + FilterLen;
1713  int j;
1714 
1715  // Create higher-order interpolation coefficients (linear
1716  // interpolation).
1717 
1718  for( j = 0; j < FilterLen; j++ )
1719  {
1720  op[ j ] = Res2[ j ] - Res[ j ];
1721  }
1722  }
1723  }
1724 
1725  return( Res );
1726  }
1727 
1733  {
1734  int i;
1735 
1736  for( i = 0; i < FracCount; i++ )
1737  {
1738  getFilter( i );
1739  }
1740  }
1741 
1755  int calcInitComplexity( const CBuffer< uint8_t >& FracUseMap ) const
1756  {
1757  const int FltInitCost = 65; // Cost to initialize a single sample
1758  // of the fractional delay filter.
1759  const int FltUseCost = FilterLen * Order +
1760  SrcFilterLen * ExtFilter.getCapacity(); // Cost to use a single
1761  // fractional delay filter.
1762  const int ucb[ 2 ] = { 0, FltUseCost };
1763  int ic;
1764  int i;
1765 
1766  if( IsInitRequired )
1767  {
1768  ic = FracCount * SrcFilterLen * FltInitCost;
1769 
1770  for( i = 0; i < FracCount; i++ )
1771  {
1772  ic += ucb[ FracUseMap[ i ]];
1773  }
1774  }
1775  else
1776  {
1777  ic = 0;
1778 
1779  for( i = 0; i < FracCount; i++ )
1780  {
1781  if( FracUseMap[ i ] != 0 )
1782  {
1783  ic += ucb[ TableFillFlags[ i ] == 0 ? 1 : 0 ];
1784  }
1785  }
1786  }
1787 
1788  return( ic );
1789  }
1790 
1791 private:
1792  static const int InterpPoints = 2;
1793  double WFLen2;
1796  double WFFreq;
1798  double WFAlpha;
1800  int FracCount;
1802  int Order;
1804  int Alignment;
1806  int SrcFilterLen;
1808  int FilterLen;
1811  int FilterSize;
1815  bool IsInitRequired;
1818  CBuffer< fptype > Table;
1822  CBuffer< uint8_t > TableFillFlags;
1825  CFltBuffer ExtFilter;
1831  CBuffer< double > SrcTable;
1835  bool IsSrcTableBuilt;
1839 
1846  void buildSrcTable()
1847  {
1848  IsSrcTableBuilt = true;
1849  IsInitRequired = false;
1850 
1851  CDSPPeakedCosineLPF p( WFLen2, WFFreq, WFAlpha );
1852 
1853  const int BufLen = SrcFilterLen * FracCount + InterpPoints - 1;
1854  const int BufOffs = InterpPoints / 2 - 1;
1855  const int BufCenter = SrcFilterLen * FracCount / 2 + BufOffs;
1856 
1857  CBuffer< double > Buf( BufLen );
1858  memset( Buf, 0, ( BufCenter - p.fl2 ) * sizeof( double ));
1859  int i = BufLen - BufCenter - p.fl2 - 1;
1860  memset( &Buf[ BufLen - i ], 0, i * sizeof( double ));
1861 
1862  p.generateLPF( &Buf[ BufCenter - p.fl2 ], 0.0 );
1863 
1864  SrcTable.alloc(( FracCount + 1 ) * SrcFilterLen );
1865  TableFillFlags.alloc( FracCount + 1 );
1866  int j;
1867  double* op0 = SrcTable;
1868 
1869  for( i = FracCount; i >= 0; i-- )
1870  {
1871  TableFillFlags[ i ] = 0;
1872  double* p = Buf + BufOffs + i;
1873 
1874  for( j = 0; j < SrcFilterLen; j++ )
1875  {
1876  op0[ 0 ] = p[ 0 ];
1877  op0++;
1878  p += FracCount;
1879  }
1880 
1881  normalizeFIRFilter( op0 - SrcFilterLen, SrcFilterLen, 1.0 );
1882  }
1883 
1884  Table.alloc(( FracCount + 1 ) * FilterSize, Alignment );
1885  }
1886 
1896  void createFilter( const int k )
1897  {
1898  if( TableFillFlags[ k ] != 0 )
1899  {
1900  return;
1901  }
1902 
1903  TableFillFlags[ k ] |= 1;
1904  const int ExtFilterLatency = ExtFilter.getCapacity() / 2;
1905  const int ResLatency = ExtFilterLatency + SrcFilterLen / 2;
1906  int ResLen = SrcFilterLen;
1907 
1908  if( ExtFilter.getCapacity() > 0 )
1909  {
1910  ResLen += ExtFilter.getCapacity() - 1;
1911  }
1912 
1913  const int ResOffs = FilterLen / 2 - ResLatency;
1914  fptype* op = &Table[ k * FilterSize ];
1915  int i;
1916 
1917  for( i = 0; i < ResOffs; i++ )
1918  {
1919  op[ i ] = (fptype) 0;
1920  }
1921 
1922  for( i = ResOffs + ResLen; i < FilterLen; i++ )
1923  {
1924  op[ i ] = (fptype) 0;
1925  }
1926 
1927  op += ResOffs;
1928  const double* const srcflt = &SrcTable[ k * SrcFilterLen ];
1929 
1930  if( ExtFilter.getCapacity() == 0 )
1931  {
1932  for( i = 0; i < ResLen; i++ )
1933  {
1934  op[ i ] = (fptype) srcflt[ i ];
1935  }
1936 
1937  return;
1938  }
1939 
1940  // Perform convolution of extflt and srcflt.
1941 
1942  const double* const extflt = &ExtFilter[ 0 ];
1943  int j;
1944 
1945  for( j = 0; j < ResLen; j++ )
1946  {
1947  int k = 0;
1948  int l = j - ExtFilter.getCapacity() + 1;
1949  int r = l + ExtFilter.getCapacity();
1950 
1951  if( l < 0 )
1952  {
1953  k -= l;
1954  l = 0;
1955  }
1956 
1957  if( r > SrcFilterLen )
1958  {
1959  r = SrcFilterLen;
1960  }
1961 
1962  const double* const extfltb = extflt + k;
1963  const double* const srcfltb = srcflt + l;
1964  double s = 0.0;
1965  l = r - l;
1966 
1967  for( i = 0; i < l; i++ )
1968  {
1969  s += extfltb[ i ] * srcfltb[ i ];
1970  }
1971 
1972  op[ j ] = (fptype) s;
1973  }
1974  }
1975 };
1976 
1992 {
1993 public:
1995  {
1996  }
1997 
1998  virtual ~CImageResizerThreadPool()
1999  {
2000  }
2001 
2010  {
2011  public:
2012  virtual ~CWorkload()
2013  {
2014  }
2015 
2021  virtual void process() = 0;
2022  };
2023 
2031  virtual int getSuggestedWorkloadCount() const
2032  {
2033  return( 1 );
2034  }
2035 
2054  virtual void addWorkload( CWorkload* const Workload )
2055  {
2056  }
2057 
2066  virtual void startAllWorkloads()
2067  {
2068  }
2069 
2075  {
2076  }
2077 
2084  virtual void removeAllWorkloads()
2085  {
2086  }
2087 };
2088 
2125 {
2126  double CorrFltAlpha;
2127  double CorrFltLen;
2131  double IntFltAlpha;
2136  double IntFltCutoff;
2140  double IntFltLen;
2143  double LPFltAlpha;
2151  double LPFltBaseLen;
2155  double LPFltCutoffMult;
2160 
2171  : HBFltAlpha( 1.94609 )
2172  , HBFltCutoff( 0.46437 )
2173  , HBFltLen( 24 )
2174  {
2175  }
2176 
2177  double HBFltAlpha;
2178  double HBFltCutoff;
2180  double HBFltLen;
2183 };
2191 
2202 {
2204  {
2205  CorrFltAlpha = 0.97946;//10.06/1.88/1.029(256064.90)/0.000039:258649,447179
2206  CorrFltLen = 6.4262;
2207  IntFltAlpha = 6.41341;
2208  IntFltCutoff = 0.7372;
2209  IntFltLen = 18;
2210  LPFltAlpha = 4.76449;
2211  LPFltBaseLen = 7.55999999999998;
2212  LPFltCutoffMult = 0.79285;
2213  }
2214 };
2215 
2227 {
2229  {
2230  CorrFltAlpha = 0.95521;//7.50/2.01/1.083(11568559.86)/0.000001:258649,434609
2231  CorrFltLen = 5.70774;
2232  IntFltAlpha = 1.00766;
2233  IntFltCutoff = 0.74202;
2234  IntFltLen = 18;
2235  LPFltAlpha = 1.6801;
2236  LPFltBaseLen = 6.62;
2237  LPFltCutoffMult = 0.67821;
2238  }
2239 };
2240 
2251 {
2253  {
2254  CorrFltAlpha = 1;//7.91/1.96/1.065(1980857.66)/0.000004:258649,437578
2255  CorrFltLen = 5.865;
2256  IntFltAlpha = 1.79529;
2257  IntFltCutoff = 0.74325;
2258  IntFltLen = 18;
2259  LPFltAlpha = 1.87597;
2260  LPFltBaseLen = 6.89999999999999;
2261  LPFltCutoffMult = 0.69326;
2262  }
2263 };
2264 
2275 {
2277  {
2278  CorrFltAlpha = 0.99739;//9.21/1.91/1.040(391960.71)/0.000023:258649,444105
2279  CorrFltLen = 6.20326;
2280  IntFltAlpha = 4.6836;
2281  IntFltCutoff = 0.73879;
2282  IntFltLen = 18;
2283  LPFltAlpha = 7.86565;
2284  LPFltBaseLen = 6.91999999999999;
2285  LPFltCutoffMult = 0.78379;
2286  }
2287 };
2288 
2300 {
2302  {
2303  CorrFltAlpha = 0.97433;//11.59/1.84/1.015(73054.59)/0.000159:258649,451830
2304  CorrFltLen = 6.87893;
2305  IntFltAlpha = 7.74731;
2306  IntFltCutoff = 0.73844;
2307  IntFltLen = 18;
2308  LPFltAlpha = 4.8149;
2309  LPFltBaseLen = 8.07999999999996;
2310  LPFltCutoffMult = 0.79335;
2311  }
2312 };
2313 
2325 {
2327  {
2328  CorrFltAlpha = 0.99705;//13.68/1.79/1.000(521792.07)/0.000026:258649,457973
2329  CorrFltLen = 7.42695;
2330  IntFltAlpha = 1.71985;
2331  IntFltCutoff = 0.7571;
2332  IntFltLen = 18;
2333  LPFltAlpha = 6.71313;
2334  LPFltBaseLen = 8.27999999999996;
2335  LPFltCutoffMult = 0.78413;
2336  }
2337 };
2338 
2350 {
2351 public:
2352  int ElCount;
2353  int ElCountIO;
2355  int fppack;
2358  int fpalign;
2361  int elalign;
2366  int packmode;
2371  int BufLen[ 2 ];
2373  int BufOffs[ 2 ];
2374  double k;
2379  double o;
2382  int ResizeStep;
2385  bool IsResize2;
2388  double InGammaMult;
2390  double OutGammaMult;
2393 
2396  double ox;
2397  double oy;
2403  bool UseSRGBGamma;
2407  int BuildMode;
2409  int RndSeed;
2413 
2418  : ox( 0.0 )
2419  , oy( 0.0 )
2420  , ThreadPool( NULL )
2421  , UseSRGBGamma( false )
2422  , BuildMode( -1 )
2423  , RndSeed( 0 )
2424  {
2425  }
2426 };
2427 
2447 template< class fptype, class fptypeatom >
2449 {
2450  AVIR_NOCTOR( CImageResizerFilterStep );
2451 
2452 public:
2453  bool IsUpsample;
2454  int ResampleFactor;
2463  double DCGain;
2470  int FltLatency;
2473  const CImageResizerVars* Vars;
2475  int InLen;
2477  int InBuf;
2479  int InPrefix;
2481  int InSuffix;
2487  int InElIncr;
2493  int OutLen;
2497  int OutBuf;
2499  int OutPrefix;
2501  int OutSuffix;
2505  int OutElIncr;
2517  int EdgePixelCount;
2521  static const int EdgePixelCountDef = 3;
2528 
2539  struct CResizePos
2540  {
2542  int fti;
2544  const fptype* ftp;
2546  fptypeatom x;
2548  int SrcOffs;
2550  int fl;
2552  };
2554 
2562  class CRPosBuf : public CBuffer< CResizePos >
2563  {
2564  public:
2565  double k;
2566  double o;
2568  int FracCount;
2570  };
2573 
2582  class CRPosBufArray : public CStructArray< CRPosBuf >
2583  {
2584  public:
2587 
2599  CRPosBuf& getRPosBuf( const double k, const double o,
2600  const int FracCount )
2601  {
2602  int i;
2603 
2604  for( i = 0; i < getItemCount(); i++ )
2605  {
2606  CRPosBuf& Buf = (*this)[ i ];
2607 
2608  if( Buf.k == k && Buf.o == o && Buf.FracCount == FracCount )
2609  {
2610  return( Buf );
2611  }
2612  }
2613 
2614  CRPosBuf& NewBuf = add();
2615  NewBuf.k = k;
2616  NewBuf.o = o;
2617  NewBuf.FracCount = FracCount;
2618 
2619  return( NewBuf );
2620  }
2621  };
2622 
2623  CRPosBuf* RPosBuf;
2627 
2631  {
2632  }
2633 };
2634 
2646 template< class fptype, class fptypeatom >
2648  public CImageResizerFilterStep< fptype, fptypeatom >
2649 {
2650 public:
2668 
2680  template< class Tin >
2681  void packScanline( const Tin* ip, fptype* const op0, const int l0 ) const
2682  {
2683  const int ElCount = Vars -> ElCount;
2684  const int ElCountIO = Vars -> ElCountIO;
2685  fptype* op = op0;
2686  int l = l0;
2687 
2688  if( !Vars -> UseSRGBGamma )
2689  {
2690  if( ElCountIO == 1 )
2691  {
2692  while( l > 0 )
2693  {
2694  fptypeatom* v = (fptypeatom*) op;
2695  v[ 0 ] = (fptypeatom) ip[ 0 ];
2696  op += ElCount;
2697  ip++;
2698  l--;
2699  }
2700  }
2701  else
2702  if( ElCountIO == 4 )
2703  {
2704  while( l > 0 )
2705  {
2706  fptypeatom* v = (fptypeatom*) op;
2707  v[ 0 ] = (fptypeatom) ip[ 0 ];
2708  v[ 1 ] = (fptypeatom) ip[ 1 ];
2709  v[ 2 ] = (fptypeatom) ip[ 2 ];
2710  v[ 3 ] = (fptypeatom) ip[ 3 ];
2711  op += ElCount;
2712  ip += 4;
2713  l--;
2714  }
2715  }
2716  else
2717  if( ElCountIO == 3 )
2718  {
2719  while( l > 0 )
2720  {
2721  fptypeatom* v = (fptypeatom*) op;
2722  v[ 0 ] = (fptypeatom) ip[ 0 ];
2723  v[ 1 ] = (fptypeatom) ip[ 1 ];
2724  v[ 2 ] = (fptypeatom) ip[ 2 ];
2725  op += ElCount;
2726  ip += 3;
2727  l--;
2728  }
2729  }
2730  else
2731  if( ElCountIO == 2 )
2732  {
2733  while( l > 0 )
2734  {
2735  fptypeatom* v = (fptypeatom*) op;
2736  v[ 0 ] = (fptypeatom) ip[ 0 ];
2737  v[ 1 ] = (fptypeatom) ip[ 1 ];
2738  op += ElCount;
2739  ip += 2;
2740  l--;
2741  }
2742  }
2743  }
2744  else
2745  {
2746  const fptypeatom gm = (fptypeatom) Vars -> InGammaMult;
2747 
2748  if( ElCountIO == 1 )
2749  {
2750  while( l > 0 )
2751  {
2752  fptypeatom* v = (fptypeatom*) op;
2753  v[ 0 ] = convertSRGB2Lin( (fptypeatom) ip[ 0 ] * gm );
2754  op += ElCount;
2755  ip++;
2756  l--;
2757  }
2758  }
2759  else
2760  if( ElCountIO == 4 )
2761  {
2762  while( l > 0 )
2763  {
2764  fptypeatom* v = (fptypeatom*) op;
2765  v[ 0 ] = convertSRGB2Lin( (fptypeatom) ip[ 0 ] * gm );
2766  v[ 1 ] = convertSRGB2Lin( (fptypeatom) ip[ 1 ] * gm );
2767  v[ 2 ] = convertSRGB2Lin( (fptypeatom) ip[ 2 ] * gm );
2768  v[ 3 ] = convertSRGB2Lin( (fptypeatom) ip[ 3 ] * gm );
2769  op += ElCount;
2770  ip += 4;
2771  l--;
2772  }
2773  }
2774  else
2775  if( ElCountIO == 3 )
2776  {
2777  while( l > 0 )
2778  {
2779  fptypeatom* v = (fptypeatom*) op;
2780  v[ 0 ] = convertSRGB2Lin( (fptypeatom) ip[ 0 ] * gm );
2781  v[ 1 ] = convertSRGB2Lin( (fptypeatom) ip[ 1 ] * gm );
2782  v[ 2 ] = convertSRGB2Lin( (fptypeatom) ip[ 2 ] * gm );
2783  op += ElCount;
2784  ip += 3;
2785  l--;
2786  }
2787  }
2788  else
2789  if( ElCountIO == 2 )
2790  {
2791  while( l > 0 )
2792  {
2793  fptypeatom* v = (fptypeatom*) op;
2794  v[ 0 ] = convertSRGB2Lin( (fptypeatom) ip[ 0 ] * gm );
2795  v[ 1 ] = convertSRGB2Lin( (fptypeatom) ip[ 1 ] * gm );
2796  op += ElCount;
2797  ip += 2;
2798  l--;
2799  }
2800  }
2801  }
2802 
2803  const int ZeroCount = ElCount * Vars -> fppack - ElCountIO;
2804  op = (fptype*) ( (fptypeatom*) op0 + ElCountIO );
2805  l = l0;
2806 
2807  if( ZeroCount == 1 )
2808  {
2809  while( l > 0 )
2810  {
2811  fptypeatom* v = (fptypeatom*) op;
2812  v[ 0 ] = (fptypeatom) 0;
2813  op += ElCount;
2814  l--;
2815  }
2816  }
2817  else
2818  if( ZeroCount == 2 )
2819  {
2820  while( l > 0 )
2821  {
2822  fptypeatom* v = (fptypeatom*) op;
2823  v[ 0 ] = (fptypeatom) 0;
2824  v[ 1 ] = (fptypeatom) 0;
2825  op += ElCount;
2826  l--;
2827  }
2828  }
2829  else
2830  if( ZeroCount == 3 )
2831  {
2832  while( l > 0 )
2833  {
2834  fptypeatom* v = (fptypeatom*) op;
2835  v[ 0 ] = (fptypeatom) 0;
2836  v[ 1 ] = (fptypeatom) 0;
2837  v[ 2 ] = (fptypeatom) 0;
2838  op += ElCount;
2839  l--;
2840  }
2841  }
2842  }
2843 
2853  static void applySRGBGamma( fptype* p, int l,
2854  const CImageResizerVars& Vars0 )
2855  {
2856  const int ElCount = Vars0.ElCount;
2857  const int ElCountIO = Vars0.ElCountIO;
2858  const fptypeatom gm = (fptypeatom) Vars0.OutGammaMult;
2859 
2860  if( ElCountIO == 1 )
2861  {
2862  while( l > 0 )
2863  {
2864  fptypeatom* v = (fptypeatom*) p;
2865  v[ 0 ] = convertLin2SRGB( v[ 0 ]) * gm;
2866  p += ElCount;
2867  l--;
2868  }
2869  }
2870  else
2871  if( ElCountIO == 4 )
2872  {
2873  while( l > 0 )
2874  {
2875  fptypeatom* v = (fptypeatom*) p;
2876  v[ 0 ] = convertLin2SRGB( v[ 0 ]) * gm;
2877  v[ 1 ] = convertLin2SRGB( v[ 1 ]) * gm;
2878  v[ 2 ] = convertLin2SRGB( v[ 2 ]) * gm;
2879  v[ 3 ] = convertLin2SRGB( v[ 3 ]) * gm;
2880  p += ElCount;
2881  l--;
2882  }
2883  }
2884  else
2885  if( ElCountIO == 3 )
2886  {
2887  while( l > 0 )
2888  {
2889  fptypeatom* v = (fptypeatom*) p;
2890  v[ 0 ] = convertLin2SRGB( v[ 0 ]) * gm;
2891  v[ 1 ] = convertLin2SRGB( v[ 1 ]) * gm;
2892  v[ 2 ] = convertLin2SRGB( v[ 2 ]) * gm;
2893  p += ElCount;
2894  l--;
2895  }
2896  }
2897  else
2898  if( ElCountIO == 2 )
2899  {
2900  while( l > 0 )
2901  {
2902  fptypeatom* v = (fptypeatom*) p;
2903  v[ 0 ] = convertLin2SRGB( v[ 0 ]) * gm;
2904  v[ 1 ] = convertLin2SRGB( v[ 1 ]) * gm;
2905  p += ElCount;
2906  l--;
2907  }
2908  }
2909  }
2910 
2925  void convertVtoH( const fptype* ip, fptype* op, const int SrcLen,
2926  const int SrcIncr ) const
2927  {
2928  const int ElCount = Vars -> ElCount;
2929  int j;
2930 
2931  if( ElCount == 1 )
2932  {
2933  for( j = 0; j < SrcLen; j++ )
2934  {
2935  op[ 0 ] = ip[ 0 ];
2936  ip += SrcIncr;
2937  op++;
2938  }
2939  }
2940  else
2941  if( ElCount == 4 )
2942  {
2943  for( j = 0; j < SrcLen; j++ )
2944  {
2945  op[ 0 ] = ip[ 0 ];
2946  op[ 1 ] = ip[ 1 ];
2947  op[ 2 ] = ip[ 2 ];
2948  op[ 3 ] = ip[ 3 ];
2949  ip += SrcIncr;
2950  op += 4;
2951  }
2952  }
2953  else
2954  if( ElCount == 3 )
2955  {
2956  for( j = 0; j < SrcLen; j++ )
2957  {
2958  op[ 0 ] = ip[ 0 ];
2959  op[ 1 ] = ip[ 1 ];
2960  op[ 2 ] = ip[ 2 ];
2961  ip += SrcIncr;
2962  op += 3;
2963  }
2964  }
2965  else
2966  if( ElCount == 2 )
2967  {
2968  for( j = 0; j < SrcLen; j++ )
2969  {
2970  op[ 0 ] = ip[ 0 ];
2971  op[ 1 ] = ip[ 1 ];
2972  ip += SrcIncr;
2973  op += 2;
2974  }
2975  }
2976  }
2977 
2992  template< class Tout >
2993  static void unpackScanline( const fptype* ip, Tout* op, int l,
2994  const CImageResizerVars& Vars0 )
2995  {
2996  const int ElCount = Vars0.ElCount;
2997  const int ElCountIO = Vars0.ElCountIO;
2998 
2999  if( ElCountIO == 1 )
3000  {
3001  while( l > 0 )
3002  {
3003  const fptypeatom* v = (const fptypeatom*) ip;
3004  op[ 0 ] = (Tout) v[ 0 ];
3005  ip += ElCount;
3006  op++;
3007  l--;
3008  }
3009  }
3010  else
3011  if( ElCountIO == 4 )
3012  {
3013  while( l > 0 )
3014  {
3015  const fptypeatom* v = (const fptypeatom*) ip;
3016  op[ 0 ] = (Tout) v[ 0 ];
3017  op[ 1 ] = (Tout) v[ 1 ];
3018  op[ 2 ] = (Tout) v[ 2 ];
3019  op[ 3 ] = (Tout) v[ 3 ];
3020  ip += ElCount;
3021  op += 4;
3022  l--;
3023  }
3024  }
3025  else
3026  if( ElCountIO == 3 )
3027  {
3028  while( l > 0 )
3029  {
3030  const fptypeatom* v = (const fptypeatom*) ip;
3031  op[ 0 ] = (Tout) v[ 0 ];
3032  op[ 1 ] = (Tout) v[ 1 ];
3033  op[ 2 ] = (Tout) v[ 2 ];
3034  ip += ElCount;
3035  op += 3;
3036  l--;
3037  }
3038  }
3039  else
3040  if( ElCountIO == 2 )
3041  {
3042  while( l > 0 )
3043  {
3044  const fptypeatom* v = (const fptypeatom*) ip;
3045  op[ 0 ] = (Tout) v[ 0 ];
3046  op[ 1 ] = (Tout) v[ 1 ];
3047  ip += ElCount;
3048  op += 2;
3049  l--;
3050  }
3051  }
3052  }
3053 
3063  void calcScanlineBias( const fptype* p, const int SrcLen,
3064  fptype* const ElBiases ) const
3065  {
3066  const int ElCount = Vars -> ElCount;
3067  int l = SrcLen;
3068 
3069  if( ElCount == 1 )
3070  {
3071  fptype b0 = (fptype) 0;
3072 
3073  while( l > 0 )
3074  {
3075  b0 += p[ 0 ];
3076  p++;
3077  l--;
3078  }
3079 
3080  ElBiases[ 0 ] = b0 / (fptype) SrcLen;
3081  }
3082  else
3083  if( ElCount == 4 )
3084  {
3085  fptype b0 = (fptype) 0;
3086  fptype b1 = (fptype) 0;
3087  fptype b2 = (fptype) 0;
3088  fptype b3 = (fptype) 0;
3089 
3090  while( l > 0 )
3091  {
3092  b0 += p[ 0 ];
3093  b1 += p[ 1 ];
3094  b2 += p[ 2 ];
3095  b3 += p[ 3 ];
3096  p += 4;
3097  l--;
3098  }
3099 
3100  ElBiases[ 0 ] = b0 / (fptype) SrcLen;
3101  ElBiases[ 1 ] = b1 / (fptype) SrcLen;
3102  ElBiases[ 2 ] = b2 / (fptype) SrcLen;
3103  ElBiases[ 3 ] = b3 / (fptype) SrcLen;
3104  }
3105  else
3106  if( ElCount == 3 )
3107  {
3108  fptype b0 = (fptype) 0;
3109  fptype b1 = (fptype) 0;
3110  fptype b2 = (fptype) 0;
3111 
3112  while( l > 0 )
3113  {
3114  b0 += p[ 0 ];
3115  b1 += p[ 1 ];
3116  b2 += p[ 2 ];
3117  p += 3;
3118  l--;
3119  }
3120 
3121  ElBiases[ 0 ] = b0 / (fptype) SrcLen;
3122  ElBiases[ 1 ] = b1 / (fptype) SrcLen;
3123  ElBiases[ 2 ] = b2 / (fptype) SrcLen;
3124  }
3125  else
3126  if( ElCount == 2 )
3127  {
3128  fptype b0 = (fptype) 0;
3129  fptype b1 = (fptype) 0;
3130 
3131  while( l > 0 )
3132  {
3133  b0 += p[ 0 ];
3134  b1 += p[ 1 ];
3135  p += 2;
3136  l--;
3137  }
3138 
3139  ElBiases[ 0 ] = b0 / (fptype) SrcLen;
3140  ElBiases[ 1 ] = b1 / (fptype) SrcLen;
3141  }
3142  }
3143 
3153  void unbiasScanline( fptype* p, int l,
3154  const fptype* const ElBiases ) const
3155  {
3156  const int ElCount = Vars -> ElCount;
3157 
3158  if( ElCount == 1 )
3159  {
3160  const fptype b0 = ElBiases[ 0 ];
3161 
3162  while( l > 0 )
3163  {
3164  p[ 0 ] -= b0;
3165  p++;
3166  l--;
3167  }
3168  }
3169  else
3170  if( ElCount == 4 )
3171  {
3172  const fptype b0 = ElBiases[ 0 ];
3173  const fptype b1 = ElBiases[ 1 ];
3174  const fptype b2 = ElBiases[ 2 ];
3175  const fptype b3 = ElBiases[ 3 ];
3176 
3177  while( l > 0 )
3178  {
3179  p[ 0 ] -= b0;
3180  p[ 1 ] -= b1;
3181  p[ 2 ] -= b2;
3182  p[ 3 ] -= b3;
3183  p += 4;
3184  l--;
3185  }
3186  }
3187  else
3188  if( ElCount == 3 )
3189  {
3190  const fptype b0 = ElBiases[ 0 ];
3191  const fptype b1 = ElBiases[ 1 ];
3192  const fptype b2 = ElBiases[ 2 ];
3193 
3194  while( l > 0 )
3195  {
3196  p[ 0 ] -= b0;
3197  p[ 1 ] -= b1;
3198  p[ 2 ] -= b2;
3199  p += 3;
3200  l--;
3201  }
3202  }
3203  else
3204  if( ElCount == 2 )
3205  {
3206  const fptype b0 = ElBiases[ 0 ];
3207  const fptype b1 = ElBiases[ 1 ];
3208 
3209  while( l > 0 )
3210  {
3211  p[ 0 ] -= b0;
3212  p[ 1 ] -= b1;
3213  p += 2;
3214  l--;
3215  }
3216  }
3217  }
3218 
3228  void prepareInBuf( fptype* Src ) const
3229  {
3230  if( IsUpsample || InPrefix + InSuffix == 0 )
3231  {
3232  return;
3233  }
3234 
3235  const int ElCount = Vars -> ElCount;
3236  replicateArray( Src, ElCount, Src - ElCount, InPrefix, -ElCount );
3237 
3238  Src += ( InLen - 1 ) * ElCount;
3239  replicateArray( Src, ElCount, Src + ElCount, InSuffix, ElCount );
3240  }
3241 
3250  void doUpsample( const fptype* const Src, fptype* const Dst ) const
3251  {
3252  const int ElCount = Vars -> ElCount;
3253  fptype* op0 = &Dst[ -OutPrefix * ElCount ];
3254  memset( op0, 0, ( OutPrefix + OutLen + OutSuffix ) * ElCount *
3255  sizeof( fptype ));
3256 
3257  const fptype* ip = Src;
3258  const int opstep = ElCount * ResampleFactor;
3259  int l;
3260 
3261  if( FltOrig.getCapacity() > 0 )
3262  {
3263  // Do not perform filtering, only upsample.
3264 
3265  op0 += ( OutPrefix % ResampleFactor ) * ElCount;
3266  l = OutPrefix / ResampleFactor;
3267 
3268  if( ElCount == 1 )
3269  {
3270  while( l > 0 )
3271  {
3272  op0[ 0 ] = ip[ 0 ];
3273  op0 += opstep;
3274  l--;
3275  }
3276 
3277  l = InLen - 1;
3278 
3279  while( l > 0 )
3280  {
3281  op0[ 0 ] = ip[ 0 ];
3282  op0 += opstep;
3283  ip += ElCount;
3284  l--;
3285  }
3286 
3287  l = OutSuffix / ResampleFactor;
3288 
3289  while( l >= 0 )
3290  {
3291  op0[ 0 ] = ip[ 0 ];
3292  op0 += opstep;
3293  l--;
3294  }
3295  }
3296  else
3297  if( ElCount == 4 )
3298  {
3299  while( l > 0 )
3300  {
3301  op0[ 0 ] = ip[ 0 ];
3302  op0[ 1 ] = ip[ 1 ];
3303  op0[ 2 ] = ip[ 2 ];
3304  op0[ 3 ] = ip[ 3 ];
3305  op0 += opstep;
3306  l--;
3307  }
3308 
3309  l = InLen - 1;
3310 
3311  while( l > 0 )
3312  {
3313  op0[ 0 ] = ip[ 0 ];
3314  op0[ 1 ] = ip[ 1 ];
3315  op0[ 2 ] = ip[ 2 ];
3316  op0[ 3 ] = ip[ 3 ];
3317  op0 += opstep;
3318  ip += ElCount;
3319  l--;
3320  }
3321 
3322  l = OutSuffix / ResampleFactor;
3323 
3324  while( l >= 0 )
3325  {
3326  op0[ 0 ] = ip[ 0 ];
3327  op0[ 1 ] = ip[ 1 ];
3328  op0[ 2 ] = ip[ 2 ];
3329  op0[ 3 ] = ip[ 3 ];
3330  op0 += opstep;
3331  l--;
3332  }
3333  }
3334  else
3335  if( ElCount == 3 )
3336  {
3337  while( l > 0 )
3338  {
3339  op0[ 0 ] = ip[ 0 ];
3340  op0[ 1 ] = ip[ 1 ];
3341  op0[ 2 ] = ip[ 2 ];
3342  op0 += opstep;
3343  l--;
3344  }
3345 
3346  l = InLen - 1;
3347 
3348  while( l > 0 )
3349  {
3350  op0[ 0 ] = ip[ 0 ];
3351  op0[ 1 ] = ip[ 1 ];
3352  op0[ 2 ] = ip[ 2 ];
3353  op0 += opstep;
3354  ip += ElCount;
3355  l--;
3356  }
3357 
3358  l = OutSuffix / ResampleFactor;
3359 
3360  while( l >= 0 )
3361  {
3362  op0[ 0 ] = ip[ 0 ];
3363  op0[ 1 ] = ip[ 1 ];
3364  op0[ 2 ] = ip[ 2 ];
3365  op0 += opstep;
3366  l--;
3367  }
3368  }
3369  else
3370  if( ElCount == 2 )
3371  {
3372  while( l > 0 )
3373  {
3374  op0[ 0 ] = ip[ 0 ];
3375  op0[ 1 ] = ip[ 1 ];
3376  op0 += opstep;
3377  l--;
3378  }
3379 
3380  l = InLen - 1;
3381 
3382  while( l > 0 )
3383  {
3384  op0[ 0 ] = ip[ 0 ];
3385  op0[ 1 ] = ip[ 1 ];
3386  op0 += opstep;
3387  ip += ElCount;
3388  l--;
3389  }
3390 
3391  l = OutSuffix / ResampleFactor;
3392 
3393  while( l >= 0 )
3394  {
3395  op0[ 0 ] = ip[ 0 ];
3396  op0[ 1 ] = ip[ 1 ];
3397  op0 += opstep;
3398  l--;
3399  }
3400  }
3401 
3402  return;
3403  }
3404 
3405  const fptype* const f = Flt;
3406  const int flen = Flt.getCapacity();
3407  fptype* op;
3408  int i;
3409 
3410  if( ElCount == 1 )
3411  {
3412  l = InPrefix;
3413 
3414  while( l > 0 )
3415  {
3416  op = op0;
3417 
3418  for( i = 0; i < flen; i++ )
3419  {
3420  op[ i ] += f[ i ] * ip[ 0 ];
3421  }
3422 
3423  op0 += opstep;
3424  l--;
3425  }
3426 
3427  l = InLen - 1;
3428 
3429  while( l > 0 )
3430  {
3431  op = op0;
3432 
3433  for( i = 0; i < flen; i++ )
3434  {
3435  op[ i ] += f[ i ] * ip[ 0 ];
3436  }
3437 
3438  ip += ElCount;
3439  op0 += opstep;
3440  l--;
3441  }
3442 
3443  l = InSuffix;
3444 
3445  while( l >= 0 )
3446  {
3447  op = op0;
3448 
3449  for( i = 0; i < flen; i++ )
3450  {
3451  op[ i ] += f[ i ] * ip[ 0 ];
3452  }
3453 
3454  op0 += opstep;
3455  l--;
3456  }
3457  }
3458  else
3459  if( ElCount == 4 )
3460  {
3461  l = InPrefix;
3462 
3463  while( l > 0 )
3464  {
3465  op = op0;
3466 
3467  for( i = 0; i < flen; i++ )
3468  {
3469  op[ 0 ] += f[ i ] * ip[ 0 ];
3470  op[ 1 ] += f[ i ] * ip[ 1 ];
3471  op[ 2 ] += f[ i ] * ip[ 2 ];
3472  op[ 3 ] += f[ i ] * ip[ 3 ];
3473  op += 4;
3474  }
3475 
3476  op0 += opstep;
3477  l--;
3478  }
3479 
3480  l = InLen - 1;
3481 
3482  while( l > 0 )
3483  {
3484  op = op0;
3485 
3486  for( i = 0; i < flen; i++ )
3487  {
3488  op[ 0 ] += f[ i ] * ip[ 0 ];
3489  op[ 1 ] += f[ i ] * ip[ 1 ];
3490  op[ 2 ] += f[ i ] * ip[ 2 ];
3491  op[ 3 ] += f[ i ] * ip[ 3 ];
3492  op += 4;
3493  }
3494 
3495  ip += ElCount;
3496  op0 += opstep;
3497  l--;
3498  }
3499 
3500  l = InSuffix;
3501 
3502  while( l >= 0 )
3503  {
3504  op = op0;
3505 
3506  for( i = 0; i < flen; i++ )
3507  {
3508  op[ 0 ] += f[ i ] * ip[ 0 ];
3509  op[ 1 ] += f[ i ] * ip[ 1 ];
3510  op[ 2 ] += f[ i ] * ip[ 2 ];
3511  op[ 3 ] += f[ i ] * ip[ 3 ];
3512  op += 4;
3513  }
3514 
3515  op0 += opstep;
3516  l--;
3517  }
3518  }
3519  else
3520  if( ElCount == 3 )
3521  {
3522  l = InPrefix;
3523 
3524  while( l > 0 )
3525  {
3526  op = op0;
3527 
3528  for( i = 0; i < flen; i++ )
3529  {
3530  op[ 0 ] += f[ i ] * ip[ 0 ];
3531  op[ 1 ] += f[ i ] * ip[ 1 ];
3532  op[ 2 ] += f[ i ] * ip[ 2 ];
3533  op += 3;
3534  }
3535 
3536  op0 += opstep;
3537  l--;
3538  }
3539 
3540  l = InLen - 1;
3541 
3542  while( l > 0 )
3543  {
3544  op = op0;
3545 
3546  for( i = 0; i < flen; i++ )
3547  {
3548  op[ 0 ] += f[ i ] * ip[ 0 ];
3549  op[ 1 ] += f[ i ] * ip[ 1 ];
3550  op[ 2 ] += f[ i ] * ip[ 2 ];
3551  op += 3;
3552  }
3553 
3554  ip += ElCount;
3555  op0 += opstep;
3556  l--;
3557  }
3558 
3559  l = InSuffix;
3560 
3561  while( l >= 0 )
3562  {
3563  op = op0;
3564 
3565  for( i = 0; i < flen; i++ )
3566  {
3567  op[ 0 ] += f[ i ] * ip[ 0 ];
3568  op[ 1 ] += f[ i ] * ip[ 1 ];
3569  op[ 2 ] += f[ i ] * ip[ 2 ];
3570  op += 3;
3571  }
3572 
3573  op0 += opstep;
3574  l--;
3575  }
3576  }
3577  else
3578  if( ElCount == 2 )
3579  {
3580  l = InPrefix;
3581 
3582  while( l > 0 )
3583  {
3584  op = op0;
3585 
3586  for( i = 0; i < flen; i++ )
3587  {
3588  op[ 0 ] += f[ i ] * ip[ 0 ];
3589  op[ 1 ] += f[ i ] * ip[ 1 ];
3590  op += 2;
3591  }
3592 
3593  op0 += opstep;
3594  l--;
3595  }
3596 
3597  l = InLen - 1;
3598 
3599  while( l > 0 )
3600  {
3601  op = op0;
3602 
3603  for( i = 0; i < flen; i++ )
3604  {
3605  op[ 0 ] += f[ i ] * ip[ 0 ];
3606  op[ 1 ] += f[ i ] * ip[ 1 ];
3607  op += 2;
3608  }
3609 
3610  ip += ElCount;
3611  op0 += opstep;
3612  l--;
3613  }
3614 
3615  l = InSuffix;
3616 
3617  while( l >= 0 )
3618  {
3619  op = op0;
3620 
3621  for( i = 0; i < flen; i++ )
3622  {
3623  op[ 0 ] += f[ i ] * ip[ 0 ];
3624  op[ 1 ] += f[ i ] * ip[ 1 ];
3625  op += 2;
3626  }
3627 
3628  op0 += opstep;
3629  l--;
3630  }
3631  }
3632 
3633  op = op0;
3634  const fptype* dc = SuffixDC;
3635  l = SuffixDC.getCapacity();
3636 
3637  if( ElCount == 1 )
3638  {
3639  for( i = 0; i < l; i++ )
3640  {
3641  op[ i ] += ip[ 0 ] * dc[ i ];
3642  }
3643  }
3644  else
3645  if( ElCount == 4 )
3646  {
3647  while( l > 0 )
3648  {
3649  op[ 0 ] += ip[ 0 ] * dc[ 0 ];
3650  op[ 1 ] += ip[ 1 ] * dc[ 0 ];
3651  op[ 2 ] += ip[ 2 ] * dc[ 0 ];
3652  op[ 3 ] += ip[ 3 ] * dc[ 0 ];
3653  dc++;
3654  op += 4;
3655  l--;
3656  }
3657  }
3658  else
3659  if( ElCount == 3 )
3660  {
3661  while( l > 0 )
3662  {
3663  op[ 0 ] += ip[ 0 ] * dc[ 0 ];
3664  op[ 1 ] += ip[ 1 ] * dc[ 0 ];
3665  op[ 2 ] += ip[ 2 ] * dc[ 0 ];
3666  dc++;
3667  op += 3;
3668  l--;
3669  }
3670  }
3671  else
3672  if( ElCount == 2 )
3673  {
3674  while( l > 0 )
3675  {
3676  op[ 0 ] += ip[ 0 ] * dc[ 0 ];
3677  op[ 1 ] += ip[ 1 ] * dc[ 0 ];
3678  dc++;
3679  op += 2;
3680  l--;
3681  }
3682  }
3683 
3684  ip = Src;
3685  op = Dst - InPrefix * opstep;
3686  dc = PrefixDC;
3687  l = PrefixDC.getCapacity();
3688 
3689  if( ElCount == 1 )
3690  {
3691  for( i = 0; i < l; i++ )
3692  {
3693  op[ i ] += ip[ 0 ] * dc[ i ];
3694  }
3695  }
3696  else
3697  if( ElCount == 4 )
3698  {
3699  while( l > 0 )
3700  {
3701  op[ 0 ] += ip[ 0 ] * dc[ 0 ];
3702  op[ 1 ] += ip[ 1 ] * dc[ 0 ];
3703  op[ 2 ] += ip[ 2 ] * dc[ 0 ];
3704  op[ 3 ] += ip[ 3 ] * dc[ 0 ];
3705  dc++;
3706  op += 4;
3707  l--;
3708  }
3709  }
3710  else
3711  if( ElCount == 3 )
3712  {
3713  while( l > 0 )
3714  {
3715  op[ 0 ] += ip[ 0 ] * dc[ 0 ];
3716  op[ 1 ] += ip[ 1 ] * dc[ 0 ];
3717  op[ 2 ] += ip[ 2 ] * dc[ 0 ];
3718  dc++;
3719  op += 3;
3720  l--;
3721  }
3722  }
3723  else
3724  if( ElCount == 2 )
3725  {
3726  while( l > 0 )
3727  {
3728  op[ 0 ] += ip[ 0 ] * dc[ 0 ];
3729  op[ 1 ] += ip[ 1 ] * dc[ 0 ];
3730  dc++;
3731  op += 2;
3732  l--;
3733  }
3734  }
3735  }
3736 
3748  void doFilter( const fptype* const Src, fptype* Dst,
3749  const int DstIncr ) const
3750  {
3751  const int ElCount = Vars -> ElCount;
3752  const fptype* const f = &Flt[ FltLatency ];
3753  const int flen = FltLatency + 1;
3754  const int ipstep = ElCount * ResampleFactor;
3755  const fptype* ip = Src - EdgePixelCount * ipstep;
3756  const fptype* ip1;
3757  const fptype* ip2;
3758  int l = OutLen;
3759  int i;
3760 
3761  if( ElCount == 1 )
3762  {
3763  while( l > 0 )
3764  {
3765  fptype s = f[ 0 ] * ip[ 0 ];
3766  ip1 = ip;
3767  ip2 = ip;
3768 
3769  for( i = 1; i < flen; i++ )
3770  {
3771  ip1++;
3772  ip2--;
3773  s += f[ i ] * ( ip1[ 0 ] + ip2[ 0 ]);
3774  }
3775 
3776  Dst[ 0 ] = s;
3777  Dst += DstIncr;
3778  ip += ipstep;
3779  l--;
3780  }
3781  }
3782  else
3783  if( ElCount == 4 )
3784  {
3785  while( l > 0 )
3786  {
3787  fptype s1 = f[ 0 ] * ip[ 0 ];
3788  fptype s2 = f[ 0 ] * ip[ 1 ];
3789  fptype s3 = f[ 0 ] * ip[ 2 ];
3790  fptype s4 = f[ 0 ] * ip[ 3 ];
3791  ip1 = ip;
3792  ip2 = ip;
3793 
3794  for( i = 1; i < flen; i++ )
3795  {
3796  ip1 += 4;
3797  ip2 -= 4;
3798  s1 += f[ i ] * ( ip1[ 0 ] + ip2[ 0 ]);
3799  s2 += f[ i ] * ( ip1[ 1 ] + ip2[ 1 ]);
3800  s3 += f[ i ] * ( ip1[ 2 ] + ip2[ 2 ]);
3801  s4 += f[ i ] * ( ip1[ 3 ] + ip2[ 3 ]);
3802  }
3803 
3804  Dst[ 0 ] = s1;
3805  Dst[ 1 ] = s2;
3806  Dst[ 2 ] = s3;
3807  Dst[ 3 ] = s4;
3808  Dst += DstIncr;
3809  ip += ipstep;
3810  l--;
3811  }
3812  }
3813  else
3814  if( ElCount == 3 )
3815  {
3816  while( l > 0 )
3817  {
3818  fptype s1 = f[ 0 ] * ip[ 0 ];
3819  fptype s2 = f[ 0 ] * ip[ 1 ];
3820  fptype s3 = f[ 0 ] * ip[ 2 ];
3821  ip1 = ip;
3822  ip2 = ip;
3823 
3824  for( i = 1; i < flen; i++ )
3825  {
3826  ip1 += 3;
3827  ip2 -= 3;
3828  s1 += f[ i ] * ( ip1[ 0 ] + ip2[ 0 ]);
3829  s2 += f[ i ] * ( ip1[ 1 ] + ip2[ 1 ]);
3830  s3 += f[ i ] * ( ip1[ 2 ] + ip2[ 2 ]);
3831  }
3832 
3833  Dst[ 0 ] = s1;
3834  Dst[ 1 ] = s2;
3835  Dst[ 2 ] = s3;
3836  Dst += DstIncr;
3837  ip += ipstep;
3838  l--;
3839  }
3840  }
3841  else
3842  if( ElCount == 2 )
3843  {
3844  while( l > 0 )
3845  {
3846  fptype s1 = f[ 0 ] * ip[ 0 ];
3847  fptype s2 = f[ 0 ] * ip[ 1 ];
3848  ip1 = ip;
3849  ip2 = ip;
3850 
3851  for( i = 1; i < flen; i++ )
3852  {
3853  ip1 += 2;
3854  ip2 -= 2;
3855  s1 += f[ i ] * ( ip1[ 0 ] + ip2[ 0 ]);
3856  s2 += f[ i ] * ( ip1[ 1 ] + ip2[ 1 ]);
3857  }
3858 
3859  Dst[ 0 ] = s1;
3860  Dst[ 1 ] = s2;
3861  Dst += DstIncr;
3862  ip += ipstep;
3863  l--;
3864  }
3865  }
3866  }
3867 
3885  void doResize( const fptype* SrcLine, fptype* DstLine,
3886  const int DstLineIncr, const fptype* const ElBiases,
3887  fptype* const ) const
3888  {
3889  const int IntFltLen = FltBank -> getFilterLen();
3890  const int ElCount = Vars -> ElCount;
3892  CResizePos* rpos = &(*RPosBuf)[ 0 ];
3893 
3895  CResizePos* const rpose = rpos + OutLen;
3896 
3897 #define AVIR_RESIZE_PART1 \
3898  while( rpos < rpose ) \
3899  { \
3900  const fptype x = (fptype) rpos -> x; \
3901  const fptype* const ftp = rpos -> ftp; \
3902  const fptype* const ftp2 = ftp + IntFltLen; \
3903  const fptype* Src = SrcLine + rpos -> SrcOffs; \
3904  int i;
3905 
3906 #define AVIR_RESIZE_PART1nx \
3907  while( rpos < rpose ) \
3908  { \
3909  const fptype* const ftp = rpos -> ftp; \
3910  const fptype* Src = SrcLine + rpos -> SrcOffs; \
3911  int i;
3912 
3913 #define AVIR_RESIZE_PART2 \
3914  DstLine += DstLineIncr; \
3915  rpos++; \
3916  }
3917 
3918  if( FltBank -> getOrder() == 1 )
3919  {
3920  if( ElCount == 1 )
3921  {
3922  AVIR_RESIZE_PART1
3923 
3924  fptype sum0 = ElBiases[ 0 ];
3925 
3926  for( i = 0; i < IntFltLen; i++ )
3927  {
3928  sum0 += ( ftp[ i ] + ftp2[ i ] * x ) * Src[ i ];
3929  }
3930 
3931  DstLine[ 0 ] = sum0;
3932 
3933  AVIR_RESIZE_PART2
3934  }
3935  else
3936  if( ElCount == 4 )
3937  {
3938  AVIR_RESIZE_PART1
3939 
3940  fptype sum0 = ElBiases[ 0 ];
3941  fptype sum1 = ElBiases[ 1 ];
3942  fptype sum2 = ElBiases[ 2 ];
3943  fptype sum3 = ElBiases[ 3 ];
3944 
3945  for( i = 0; i < IntFltLen; i++ )
3946  {
3947  const fptype xx = ftp[ i ] + ftp2[ i ] * x;
3948  sum0 += xx * Src[ 0 ];
3949  sum1 += xx * Src[ 1 ];
3950  sum2 += xx * Src[ 2 ];
3951  sum3 += xx * Src[ 3 ];
3952  Src += 4;
3953  }
3954 
3955  DstLine[ 0 ] = sum0;
3956  DstLine[ 1 ] = sum1;
3957  DstLine[ 2 ] = sum2;
3958  DstLine[ 3 ] = sum3;
3959 
3960  AVIR_RESIZE_PART2
3961  }
3962  else
3963  if( ElCount == 3 )
3964  {
3965  AVIR_RESIZE_PART1
3966 
3967  fptype sum0 = ElBiases[ 0 ];
3968  fptype sum1 = ElBiases[ 1 ];
3969  fptype sum2 = ElBiases[ 2 ];
3970 
3971  for( i = 0; i < IntFltLen; i++ )
3972  {
3973  const fptype xx = ftp[ i ] + ftp2[ i ] * x;
3974  sum0 += xx * Src[ 0 ];
3975  sum1 += xx * Src[ 1 ];
3976  sum2 += xx * Src[ 2 ];
3977  Src += 3;
3978  }
3979 
3980  DstLine[ 0 ] = sum0;
3981  DstLine[ 1 ] = sum1;
3982  DstLine[ 2 ] = sum2;
3983 
3984  AVIR_RESIZE_PART2
3985  }
3986  else
3987  if( ElCount == 2 )
3988  {
3989  AVIR_RESIZE_PART1
3990 
3991  fptype sum0 = ElBiases[ 0 ];
3992  fptype sum1 = ElBiases[ 1 ];
3993 
3994  for( i = 0; i < IntFltLen; i++ )
3995  {
3996  const fptype xx = ftp[ i ] + ftp2[ i ] * x;
3997  sum0 += xx * Src[ 0 ];
3998  sum1 += xx * Src[ 1 ];
3999  Src += 2;
4000  }
4001 
4002  DstLine[ 0 ] = sum0;
4003  DstLine[ 1 ] = sum1;
4004 
4005  AVIR_RESIZE_PART2
4006  }
4007  }
4008  else
4009  {
4010  if( ElCount == 1 )
4011  {
4012  AVIR_RESIZE_PART1nx
4013 
4014  fptype sum0 = ElBiases[ 0 ];
4015 
4016  for( i = 0; i < IntFltLen; i++ )
4017  {
4018  sum0 += ftp[ i ] * Src[ i ];
4019  }
4020 
4021  DstLine[ 0 ] = sum0;
4022 
4023  AVIR_RESIZE_PART2
4024  }
4025  else
4026  if( ElCount == 4 )
4027  {
4028  AVIR_RESIZE_PART1nx
4029 
4030  fptype sum0 = ElBiases[ 0 ];
4031  fptype sum1 = ElBiases[ 1 ];
4032  fptype sum2 = ElBiases[ 2 ];
4033  fptype sum3 = ElBiases[ 3 ];
4034 
4035  for( i = 0; i < IntFltLen; i++ )
4036  {
4037  const fptype xx = ftp[ i ];
4038  sum0 += xx * Src[ 0 ];
4039  sum1 += xx * Src[ 1 ];
4040  sum2 += xx * Src[ 2 ];
4041  sum3 += xx * Src[ 3 ];
4042  Src += 4;
4043  }
4044 
4045  DstLine[ 0 ] = sum0;
4046  DstLine[ 1 ] = sum1;
4047  DstLine[ 2 ] = sum2;
4048  DstLine[ 3 ] = sum3;
4049 
4050  AVIR_RESIZE_PART2
4051  }
4052  else
4053  if( ElCount == 3 )
4054  {
4055  AVIR_RESIZE_PART1nx
4056 
4057  fptype sum0 = ElBiases[ 0 ];
4058  fptype sum1 = ElBiases[ 1 ];
4059  fptype sum2 = ElBiases[ 2 ];
4060 
4061  for( i = 0; i < IntFltLen; i++ )
4062  {
4063  const fptype xx = ftp[ i ];
4064  sum0 += xx * Src[ 0 ];
4065  sum1 += xx * Src[ 1 ];
4066  sum2 += xx * Src[ 2 ];
4067  Src += 3;
4068  }
4069 
4070  DstLine[ 0 ] = sum0;
4071  DstLine[ 1 ] = sum1;
4072  DstLine[ 2 ] = sum2;
4073 
4074  AVIR_RESIZE_PART2
4075  }
4076  else
4077  if( ElCount == 2 )
4078  {
4079  AVIR_RESIZE_PART1nx
4080 
4081  fptype sum0 = ElBiases[ 0 ];
4082  fptype sum1 = ElBiases[ 1 ];
4083 
4084  for( i = 0; i < IntFltLen; i++ )
4085  {
4086  const fptype xx = ftp[ i ];
4087  sum0 += xx * Src[ 0 ];
4088  sum1 += xx * Src[ 1 ];
4089  Src += 2;
4090  }
4091 
4092  DstLine[ 0 ] = sum0;
4093  DstLine[ 1 ] = sum1;
4094 
4095  AVIR_RESIZE_PART2
4096  }
4097  }
4098  }
4099 #undef AVIR_RESIZE_PART2
4100 #undef AVIR_RESIZE_PART1nx
4101 #undef AVIR_RESIZE_PART1
4102 
4117  void doResize2( const fptype* SrcLine, fptype* DstLine,
4118  const int DstLineIncr, const fptype* const ElBiases,
4119  fptype* const ) const
4120  {
4121  const int IntFltLen0 = FltBank -> getFilterLen();
4122  const int ElCount = Vars -> ElCount;
4124  CResizePos* rpos = &(*RPosBuf)[ 0 ];
4125 
4127  CResizePos* const rpose = rpos + OutLen;
4128 
4129 #define AVIR_RESIZE_PART1 \
4130  while( rpos < rpose ) \
4131  { \
4132  const fptype x = (fptype) rpos -> x; \
4133  const fptype* const ftp = rpos -> ftp; \
4134  const fptype* const ftp2 = ftp + IntFltLen0; \
4135  const fptype* Src = SrcLine + rpos -> SrcOffs; \
4136  const int IntFltLen = rpos -> fl; \
4137  int i;
4138 
4139 #define AVIR_RESIZE_PART1nx \
4140  while( rpos < rpose ) \
4141  { \
4142  const fptype* const ftp = rpos -> ftp; \
4143  const fptype* Src = SrcLine + rpos -> SrcOffs; \
4144  const int IntFltLen = rpos -> fl; \
4145  int i;
4146 
4147 #define AVIR_RESIZE_PART2 \
4148  DstLine += DstLineIncr; \
4149  rpos++; \
4150  }
4151 
4152  if( FltBank -> getOrder() == 1 )
4153  {
4154  if( ElCount == 1 )
4155  {
4156  AVIR_RESIZE_PART1
4157 
4158  fptype sum0 = ElBiases[ 0 ];
4159 
4160  for( i = 0; i < IntFltLen; i += 2 )
4161  {
4162  sum0 += ( ftp[ i ] + ftp2[ i ] * x ) * Src[ i ];
4163  }
4164 
4165  DstLine[ 0 ] = sum0;
4166 
4167  AVIR_RESIZE_PART2
4168  }
4169  else
4170  if( ElCount == 4 )
4171  {
4172  AVIR_RESIZE_PART1
4173 
4174  fptype sum0 = ElBiases[ 0 ];
4175  fptype sum1 = ElBiases[ 1 ];
4176  fptype sum2 = ElBiases[ 2 ];
4177  fptype sum3 = ElBiases[ 3 ];
4178 
4179  for( i = 0; i < IntFltLen; i += 2 )
4180  {
4181  const fptype xx = ftp[ i ] + ftp2[ i ] * x;
4182  sum0 += xx * Src[ 0 ];
4183  sum1 += xx * Src[ 1 ];
4184  sum2 += xx * Src[ 2 ];
4185  sum3 += xx * Src[ 3 ];
4186  Src += 4 * 2;
4187  }
4188 
4189  DstLine[ 0 ] = sum0;
4190  DstLine[ 1 ] = sum1;
4191  DstLine[ 2 ] = sum2;
4192  DstLine[ 3 ] = sum3;
4193 
4194  AVIR_RESIZE_PART2
4195  }
4196  else
4197  if( ElCount == 3 )
4198  {
4199  AVIR_RESIZE_PART1
4200 
4201  fptype sum0 = ElBiases[ 0 ];
4202  fptype sum1 = ElBiases[ 1 ];
4203  fptype sum2 = ElBiases[ 2 ];
4204 
4205  for( i = 0; i < IntFltLen; i += 2 )
4206  {
4207  const fptype xx = ftp[ i ] + ftp2[ i ] * x;
4208  sum0 += xx * Src[ 0 ];
4209  sum1 += xx * Src[ 1 ];
4210  sum2 += xx * Src[ 2 ];
4211  Src += 3 * 2;
4212  }
4213 
4214  DstLine[ 0 ] = sum0;
4215  DstLine[ 1 ] = sum1;
4216  DstLine[ 2 ] = sum2;
4217 
4218  AVIR_RESIZE_PART2
4219  }
4220  else
4221  if( ElCount == 2 )
4222  {
4223  AVIR_RESIZE_PART1
4224 
4225  fptype sum0 = ElBiases[ 0 ];
4226  fptype sum1 = ElBiases[ 1 ];
4227 
4228  for( i = 0; i < IntFltLen; i += 2 )
4229  {
4230  const fptype xx = ftp[ i ] + ftp2[ i ] * x;
4231  sum0 += xx * Src[ 0 ];
4232  sum1 += xx * Src[ 1 ];
4233  Src += 2 * 2;
4234  }
4235 
4236  DstLine[ 0 ] = sum0;
4237  DstLine[ 1 ] = sum1;
4238 
4239  AVIR_RESIZE_PART2
4240  }
4241  }
4242  else
4243  {
4244  if( ElCount == 1 )
4245  {
4246  AVIR_RESIZE_PART1nx
4247 
4248  fptype sum0 = ElBiases[ 0 ];
4249 
4250  for( i = 0; i < IntFltLen; i += 2 )
4251  {
4252  sum0 += ftp[ i ] * Src[ i ];
4253  }
4254 
4255  DstLine[ 0 ] = sum0;
4256 
4257  AVIR_RESIZE_PART2
4258  }
4259  else
4260  if( ElCount == 4 )
4261  {
4262  AVIR_RESIZE_PART1nx
4263 
4264  fptype sum0 = ElBiases[ 0 ];
4265  fptype sum1 = ElBiases[ 1 ];
4266  fptype sum2 = ElBiases[ 2 ];
4267  fptype sum3 = ElBiases[ 3 ];
4268 
4269  for( i = 0; i < IntFltLen; i += 2 )
4270  {
4271  const fptype xx = ftp[ i ];
4272  sum0 += xx * Src[ 0 ];
4273  sum1 += xx * Src[ 1 ];
4274  sum2 += xx * Src[ 2 ];
4275  sum3 += xx * Src[ 3 ];
4276  Src += 4 * 2;
4277  }
4278 
4279  DstLine[ 0 ] = sum0;
4280  DstLine[ 1 ] = sum1;
4281  DstLine[ 2 ] = sum2;
4282  DstLine[ 3 ] = sum3;
4283 
4284  AVIR_RESIZE_PART2
4285  }
4286  else
4287  if( ElCount == 3 )
4288  {
4289  AVIR_RESIZE_PART1nx
4290 
4291  fptype sum0 = ElBiases[ 0 ];
4292  fptype sum1 = ElBiases[ 1 ];
4293  fptype sum2 = ElBiases[ 2 ];
4294 
4295  for( i = 0; i < IntFltLen; i += 2 )
4296  {
4297  const fptype xx = ftp[ i ];
4298  sum0 += xx * Src[ 0 ];
4299  sum1 += xx * Src[ 1 ];
4300  sum2 += xx * Src[ 2 ];
4301  Src += 3 * 2;
4302  }
4303 
4304  DstLine[ 0 ] = sum0;
4305  DstLine[ 1 ] = sum1;
4306  DstLine[ 2 ] = sum2;
4307 
4308  AVIR_RESIZE_PART2
4309  }
4310  else
4311  if( ElCount == 2 )
4312  {
4313  AVIR_RESIZE_PART1nx
4314 
4315  fptype sum0 = ElBiases[ 0 ];
4316  fptype sum1 = ElBiases[ 1 ];
4317 
4318  for( i = 0; i < IntFltLen; i += 2 )
4319  {
4320  const fptype xx = ftp[ i ];
4321  sum0 += xx * Src[ 0 ];
4322  sum1 += xx * Src[ 1 ];
4323  Src += 2 * 2;
4324  }
4325 
4326  DstLine[ 0 ] = sum0;
4327  DstLine[ 1 ] = sum1;
4328 
4329  AVIR_RESIZE_PART2
4330  }
4331  }
4332  }
4333 #undef AVIR_RESIZE_PART2
4334 #undef AVIR_RESIZE_PART1nx
4335 #undef AVIR_RESIZE_PART1
4336 };
4337 
4355 template< class fptype >
4357 {
4358 public:
4369  void init( const int aLen, const CImageResizerVars& aVars,
4370  const double aTrMul, const double aPkOut )
4371  {
4372  Len = aLen;
4373  Vars = &aVars;
4374  LenE = aLen * Vars -> ElCount;
4375  TrMul0 = aTrMul;
4376  PkOut0 = aPkOut;
4377  }
4378 
4384  static bool isRecursive()
4385  {
4386  return( false );
4387  }
4388 
4395  void dither( fptype* const ResScanline ) const
4396  {
4397  const fptype c0 = (fptype) 0;
4398  const fptype PkOut = (fptype) PkOut0;
4399  int j;
4400 
4401  if( TrMul0 == 1.0 )
4402  {
4403  // Optimization - do not perform bit depth truncation.
4404 
4405  for( j = 0; j < LenE; j++ )
4406  {
4407  ResScanline[ j ] = clamp( round( ResScanline[ j ]), c0,
4408  PkOut );
4409  }
4410  }
4411  else
4412  {
4413  const fptype TrMul = (fptype) TrMul0;
4414 
4415  for( j = 0; j < LenE; j++ )
4416  {
4417  const fptype z0 = round( ResScanline[ j ] / TrMul ) * TrMul;
4418  ResScanline[ j ] = clamp( z0, c0, PkOut );
4419  }
4420  }
4421  }
4422 
4423 protected:
4424  int Len;
4425  const CImageResizerVars* Vars;
4427  int LenE;
4429  double TrMul0;
4431  double PkOut0;
4433 };
4435 
4448 template< class fptype >
4450  public CImageResizerDithererDefINL< fptype >
4451 {
4452 public:
4463  void init( const int aLen, const CImageResizerVars& aVars,
4464  const double aTrMul, const double aPkOut )
4465  {
4466  CImageResizerDithererDefINL< fptype > :: init( aLen, aVars, aTrMul,
4467  aPkOut );
4468 
4469  ResScanlineDith0.alloc( LenE + Vars -> ElCount, sizeof( fptype ));
4470  ResScanlineDith = ResScanlineDith0 + Vars -> ElCount;
4471  int i;
4472 
4473  for( i = 0; i < LenE + Vars -> ElCount; i++ )
4474  {
4475  ResScanlineDith0[ i ] = (fptype) 0;
4476  }
4477  }
4478 
4479  static bool isRecursive()
4480  {
4481  return( true );
4482  }
4483 
4484  void dither( fptype* const ResScanline )
4485  {
4486  const int ElCount = Vars -> ElCount;
4487  const fptype c0 = (fptype) 0;
4488  const fptype TrMul = (fptype) TrMul0;
4489  const fptype PkOut = (fptype) PkOut0;
4490  int j;
4491 
4492  for( j = 0; j < LenE; j++ )
4493  {
4494  ResScanline[ j ] += ResScanlineDith[ j ];
4495  ResScanlineDith[ j ] = (fptype) 0;
4496  }
4497 
4498  for( j = 0; j < LenE - ElCount; j++ )
4499  {
4500  // Perform rounding, noise estimation and saturation.
4501 
4502  const fptype z0 = round( ResScanline[ j ] / TrMul ) * TrMul;
4503  const fptype Noise = ResScanline[ j ] - z0;
4504  ResScanline[ j ] = clamp( z0, c0, PkOut );
4505 
4506  ResScanline[ j + ElCount ] += Noise * (fptype) 0.364842;
4507  ResScanlineDith[ j - ElCount ] += Noise * (fptype) 0.207305;
4508  ResScanlineDith[ j ] += Noise * (fptype) 0.364842;
4509  ResScanlineDith[ j + ElCount ] += Noise * (fptype) 0.063011;
4510  }
4511 
4512  while( j < LenE )
4513  {
4514  const fptype z0 = round( ResScanline[ j ] / TrMul ) * TrMul;
4515  const fptype Noise = ResScanline[ j ] - z0;
4516  ResScanline[ j ] = clamp( z0, c0, PkOut );
4517 
4518  ResScanlineDith[ j - ElCount ] += Noise * (fptype) 0.207305;
4519  ResScanlineDith[ j ] += Noise * (fptype) 0.364842;
4520  j++;
4521  }
4522  }
4523 
4524 protected:
4525  using CImageResizerDithererDefINL< fptype > :: Len;
4526  using CImageResizerDithererDefINL< fptype > :: Vars;
4527  using CImageResizerDithererDefINL< fptype > :: LenE;
4528  using CImageResizerDithererDefINL< fptype > :: TrMul0;
4529  using CImageResizerDithererDefINL< fptype > :: PkOut0;
4530 
4532  fptype* ResScanlineDith;
4534 };
4537 
4568 template< class afptype, class afptypeatom = afptype,
4571 {
4572 public:
4573  typedef afptype fptype;
4574  typedef afptypeatom fptypeatom;
4576  static const int fppack = sizeof( fptype ) / sizeof( fptypeatom );
4578  static const int fpalign = sizeof( fptype );
4581  static const int elalign = 1;
4587  static const int packmode = 0;
4595  typedef adith CDitherer;
4598 };
4600 
4616 template< class fpclass = fpclass_def< float > >
4618 {
4619  AVIR_NOCTOR( CImageResizer );
4620 
4621 public:
4637  CImageResizer( const int aResBitDepth = 8, const int aSrcBitDepth = 0,
4638  const CImageResizerParams& aParams = CImageResizerParamsDef() )
4639  : Params( aParams )
4640  , ResBitDepth( aResBitDepth )
4641  {
4642  SrcBitDepth = ( aSrcBitDepth == 0 ? ResBitDepth : aSrcBitDepth );
4643 
4644  initFilterBank( FixedFilterBank, 1.0, false, CFltBuffer() );
4645  FixedFilterBank.createAllFilters();
4646  }
4647 
4687  template< class Tin, class Tout >
4688  void resizeImage( const Tin* const SrcBuf, const int SrcWidth,
4689  const int SrcHeight, int SrcScanlineSize, Tout* const NewBuf,
4690  const int NewWidth, const int NewHeight, const int ElCountIO,
4691  const double k, CImageResizerVars* const aVars = NULL ) const
4692  {
4693  if( SrcWidth == 0 || SrcHeight == 0 )
4694  {
4695  memset( NewBuf, 0, (size_t) NewWidth * NewHeight *
4696  sizeof( Tout ));
4697 
4698  return;
4699  }
4700  else
4701  if( NewWidth == 0 || NewHeight == 0 )
4702  {
4703  return;
4704  }
4705 
4706  CImageResizerVars DefVars;
4707  CImageResizerVars& Vars = ( aVars == NULL ? DefVars : *aVars );
4708 
4709  CImageResizerThreadPool DefThreadPool;
4710  CImageResizerThreadPool& ThreadPool = ( Vars.ThreadPool == NULL ?
4711  DefThreadPool : *Vars.ThreadPool );
4712 
4713  // Define resizing steps, also optionally modify offsets so that
4714  // resizing produces a "centered" image.
4715 
4716  double kx;
4717  double ky;
4718  double ox = Vars.ox;
4719  double oy = Vars.oy;
4720 
4721  if( k == 0.0 )
4722  {
4723  kx = (double) SrcWidth / NewWidth;
4724  ox += ( kx - 1.0 ) * 0.5;
4725 
4726  ky = (double) SrcHeight / NewHeight;
4727  oy += ( ky - 1.0 ) * 0.5;
4728  }
4729  else
4730  if( k > 0.0 )
4731  {
4732  kx = k;
4733  ky = k;
4734 
4735  const double ko = ( k - 1.0 ) * 0.5;
4736  ox += ko;
4737  oy += ko;
4738  }
4739  else
4740  {
4741  kx = -k;
4742  ky = -k;
4743  }
4744 
4745  // Evaluate pre-multipliers used on the output stage.
4746 
4747  const bool IsInFloat = ( (Tin) 0.25 != 0 );
4748  const bool IsOutFloat = ( (Tout) 0.25 != 0 );
4749  double OutMul; // Output multiplier.
4750 
4751  if( Vars.UseSRGBGamma )
4752  {
4753  if( IsInFloat )
4754  {
4755  Vars.InGammaMult = 1.0;
4756  }
4757  else
4758  {
4759  Vars.InGammaMult =
4760  1.0 / ( sizeof( Tin ) == 1 ? 255.0 : 65535.0 );
4761  }
4762 
4763  if( IsOutFloat )
4764  {
4765  Vars.OutGammaMult = 1.0;
4766  }
4767  else
4768  {
4769  Vars.OutGammaMult = ( sizeof( Tout ) == 1 ? 255.0 : 65535.0 );
4770  }
4771 
4772  OutMul = 1.0;
4773  }
4774  else
4775  {
4776  if( IsOutFloat )
4777  {
4778  OutMul = 1.0;
4779  }
4780  else
4781  {
4782  OutMul = ( sizeof( Tout ) == 1 ? 255.0 : 65535.0 );
4783  }
4784 
4785  if( !IsInFloat )
4786  {
4787  OutMul /= ( sizeof( Tin ) == 1 ? 255.0 : 65535.0 );
4788  }
4789  }
4790 
4791  // Fill widely-used variables.
4792 
4793  const int ElCount = ( ElCountIO + fpclass :: fppack - 1 ) /
4794  fpclass :: fppack;
4795 
4796  const int NewWidthE = NewWidth * ElCount;
4797 
4798  if( SrcScanlineSize < 1 )
4799  {
4800  SrcScanlineSize = SrcWidth * ElCountIO;
4801  }
4802 
4803  Vars.ElCount = ElCount;
4804  Vars.ElCountIO = ElCountIO;
4805  Vars.fppack = fpclass :: fppack;
4806  Vars.fpalign = fpclass :: fpalign;
4807  Vars.elalign = fpclass :: elalign;
4808  Vars.packmode = fpclass :: packmode;
4809 
4810  // Horizontal scanline filtering and resizing.
4811 
4813  CFilterSteps FltSteps;
4814  typename CFilterStep :: CRPosBufArray RPosBufArray;
4815  CBuffer< uint8_t > UsedFracMap;
4816 
4817  // Perform the filtering steps modeling at various modes, find the
4818  // most efficient mode for both horizontal and vertical resizing.
4819 
4820  int UseBuildMode = 1;
4821  const int BuildModeCount =
4822  ( FixedFilterBank.getOrder() == 0 ? 4 : 2 );
4823 
4824  int m;
4825 
4826  if( Vars.BuildMode >= 0 )
4827  {
4828  UseBuildMode = Vars.BuildMode;
4829  }
4830  else
4831  {
4832  int BestScore = 0x7FFFFFFF;
4833 
4834  for( m = 0; m < BuildModeCount; m++ )
4835  {
4837  CFilterSteps TmpSteps;
4838  Vars.k = kx;
4839  Vars.o = ox;
4840  buildFilterSteps( TmpSteps, Vars, TmpBank, OutMul, m, true );
4841  updateFilterStepBuffers( TmpSteps, Vars, RPosBufArray,
4842  SrcWidth, NewWidth );
4843 
4844  fillUsedFracMap( TmpSteps[ Vars.ResizeStep ], UsedFracMap );
4845  const int c = calcComplexity( TmpSteps, Vars, UsedFracMap,
4846  SrcHeight );
4847 
4848  if( c < BestScore )
4849  {
4850  UseBuildMode = m;
4851  BestScore = c;
4852  }
4853  }
4854  }
4855 
4856  // Perform the actual filtering steps building.
4857 
4858  Vars.k = kx;
4859  Vars.o = ox;
4860  buildFilterSteps( FltSteps, Vars, FltBank, OutMul, UseBuildMode,
4861  false );
4862 
4863  updateFilterStepBuffers( FltSteps, Vars, RPosBufArray, SrcWidth,
4864  NewWidth );
4865 
4866  updateBufLenAndRPosPtrs( FltSteps, Vars, NewWidth );
4867 
4868  const int ThreadCount = ThreadPool.getSuggestedWorkloadCount();
4869  // Includes the current thread.
4870 
4872  td.setItemCount( ThreadCount );
4873  int i;
4874 
4875  for( i = 0; i < ThreadCount; i++ )
4876  {
4877  if( i > 0 )
4878  {
4879  ThreadPool.addWorkload( &td[ i ]);
4880  }
4881 
4882  td[ i ].init( i, ThreadCount, FltSteps, Vars );
4883 
4884  td[ i ].initScanlineQueue( td[ i ].sopResizeH, SrcHeight,
4885  SrcWidth );
4886  }
4887 
4888  CBuffer< fptype, size_t > FltBuf( (size_t) NewWidthE * SrcHeight,
4889  fpclass :: fpalign ); // Temporary buffer that receives
4890  // horizontally-filtered and resized image.
4891 
4892  for( i = 0; i < SrcHeight; i++ )
4893  {
4894  td[ i % ThreadCount ].addScanlineToQueue(
4895  (void*) &SrcBuf[ (size_t) i * SrcScanlineSize ],
4896  &FltBuf[ (size_t) i * NewWidthE ]);
4897  }
4898 
4899  ThreadPool.startAllWorkloads();
4900  td[ 0 ].processScanlineQueue();
4901  ThreadPool.waitAllWorkloadsToFinish();
4902 
4903  // Vertical scanline filtering and resizing, reuse previously defined
4904  // filtering steps if possible.
4905 
4906  const int PrevUseBuildMode = UseBuildMode;
4907 
4908  if( Vars.BuildMode >= 0 )
4909  {
4910  UseBuildMode = Vars.BuildMode;
4911  }
4912  else
4913  {
4914  CImageResizerVars TmpVars( Vars );
4915  int BestScore = 0x7FFFFFFF;
4916 
4917  for( m = 0; m < BuildModeCount; m++ )
4918  {
4920  TmpBank.copyInitParams( FltBank );
4921  CFilterSteps TmpSteps;
4922  TmpVars.k = ky;
4923  TmpVars.o = oy;
4924  buildFilterSteps( TmpSteps, TmpVars, TmpBank, 1.0, m, true );
4925  updateFilterStepBuffers( TmpSteps, TmpVars, RPosBufArray,
4926  SrcHeight, NewHeight );
4927 
4928  fillUsedFracMap( TmpSteps[ TmpVars.ResizeStep ],
4929  UsedFracMap );
4930 
4931  const int c = calcComplexity( TmpSteps, TmpVars, UsedFracMap,
4932  NewWidth );
4933 
4934  if( c < BestScore )
4935  {
4936  UseBuildMode = m;
4937  BestScore = c;
4938  }
4939  }
4940  }
4941 
4942  Vars.k = ky;
4943  Vars.o = oy;
4944 
4945  if( UseBuildMode == PrevUseBuildMode && ky == kx )
4946  {
4947  if( OutMul != 1.0 )
4948  {
4949  modifyCorrFilterDCGain( FltSteps, 1.0 / OutMul );
4950  }
4951  }
4952  else
4953  {
4954  buildFilterSteps( FltSteps, Vars, FltBank, 1.0, UseBuildMode,
4955  false );
4956  }
4957 
4958  updateFilterStepBuffers( FltSteps, Vars, RPosBufArray, SrcHeight,
4959  NewHeight );
4960 
4961  updateBufLenAndRPosPtrs( FltSteps, Vars, NewWidth );
4962 
4963  if( IsOutFloat && sizeof( FltBuf[ 0 ]) == sizeof( Tout ) &&
4964  fpclass :: packmode == 0 )
4965  {
4966  // In-place output.
4967 
4968  for( i = 0; i < ThreadCount; i++ )
4969  {
4970  td[ i ].initScanlineQueue( td[ i ].sopResizeV, NewWidth,
4971  SrcHeight, NewWidthE, NewWidthE );
4972  }
4973 
4974  for( i = 0; i < NewWidth; i++ )
4975  {
4976  td[ i % ThreadCount ].addScanlineToQueue(
4977  &FltBuf[ (size_t) i * ElCount ],
4978  (fptype*) &NewBuf[ (size_t) i * ElCount ]);
4979  }
4980 
4981  ThreadPool.startAllWorkloads();
4982  td[ 0 ].processScanlineQueue();
4983  ThreadPool.waitAllWorkloadsToFinish();
4984  ThreadPool.removeAllWorkloads();
4985 
4986  return;
4987  }
4988 
4989  CBuffer< fptype, size_t > ResBuf( (size_t) NewWidthE * NewHeight,
4990  fpclass :: fpalign );
4991 
4992  for( i = 0; i < ThreadCount; i++ )
4993  {
4994  td[ i ].initScanlineQueue( td[ i ].sopResizeV, NewWidth,
4995  SrcHeight, NewWidthE, NewWidthE );
4996  }
4997 
4998  const int im = ( fpclass :: packmode == 0 ? ElCount : 1 );
4999 
5000  for( i = 0; i < NewWidth; i++ )
5001  {
5002  td[ i % ThreadCount ].addScanlineToQueue(
5003  &FltBuf[ (size_t) i * im ], &ResBuf[ (size_t) i * im ]);
5004  }
5005 
5006  ThreadPool.startAllWorkloads();
5007  td[ 0 ].processScanlineQueue();
5008  ThreadPool.waitAllWorkloadsToFinish();
5009 
5010  if( IsOutFloat )
5011  {
5012  // Perform output, but skip dithering.
5013 
5014  for( i = 0; i < ThreadCount; i++ )
5015  {
5016  td[ i ].initScanlineQueue( td[ i ].sopUnpackH,
5017  NewHeight, NewWidth );
5018  }
5019 
5020  for( i = 0; i < NewHeight; i++ )
5021  {
5022  td[ i % ThreadCount ].addScanlineToQueue(
5023  &ResBuf[ (size_t) i * NewWidthE ],
5024  &NewBuf[ (size_t) i * NewWidth * ElCountIO ]);
5025  }
5026 
5027  ThreadPool.startAllWorkloads();
5028  td[ 0 ].processScanlineQueue();
5029  ThreadPool.waitAllWorkloadsToFinish();
5030  ThreadPool.removeAllWorkloads();
5031 
5032  return;
5033  }
5034 
5035  // Perform output with dithering (for integer output only).
5036 
5037  int TruncBits; // The number of lower bits to truncate and dither.
5038  int OutRange; // Output range.
5039 
5040  if( sizeof( Tout ) == 1 )
5041  {
5042  TruncBits = 8 - ResBitDepth;
5043  OutRange = 255;
5044  }
5045  else
5046  {
5047  TruncBits = 16 - ResBitDepth;
5048  OutRange = 65535;
5049  }
5050 
5051  const double PkOut = OutRange;
5052  const double TrMul = ( TruncBits > 0 ?
5053  PkOut / ( OutRange >> TruncBits ) : 1.0 );
5054 
5055  if( CDitherer :: isRecursive() )
5056  {
5057  td[ 0 ].getDitherer().init( NewWidth, Vars, TrMul, PkOut );
5058 
5059  if( Vars.UseSRGBGamma )
5060  {
5061  for( i = 0; i < NewHeight; i++ )
5062  {
5063  fptype* const ResScanline =
5064  &ResBuf[ (size_t) i * NewWidthE ];
5065 
5066  CFilterStep :: applySRGBGamma( ResScanline, NewWidth,
5067  Vars );
5068 
5069  td[ 0 ].getDitherer().dither( ResScanline );
5070 
5071  CFilterStep :: unpackScanline( ResScanline,
5072  &NewBuf[ (size_t) i * NewWidth * ElCountIO ],
5073  NewWidth, Vars );
5074  }
5075  }
5076  else
5077  {
5078  for( i = 0; i < NewHeight; i++ )
5079  {
5080  fptype* const ResScanline =
5081  &ResBuf[ (size_t) i * NewWidthE ];
5082 
5083  td[ 0 ].getDitherer().dither( ResScanline );
5084 
5085  CFilterStep :: unpackScanline( ResScanline,
5086  &NewBuf[ (size_t) i * NewWidth * ElCountIO ],
5087  NewWidth, Vars );
5088  }
5089  }
5090  }
5091  else
5092  {
5093  for( i = 0; i < ThreadCount; i++ )
5094  {
5095  td[ i ].initScanlineQueue( td[ i ].sopDitherAndUnpackH,
5096  NewHeight, NewWidth );
5097 
5098  td[ i ].getDitherer().init( NewWidth, Vars, TrMul, PkOut );
5099  }
5100 
5101  for( i = 0; i < NewHeight; i++ )
5102  {
5103  td[ i % ThreadCount ].addScanlineToQueue(
5104  &ResBuf[ (size_t) i * NewWidthE ],
5105  &NewBuf[ (size_t) i * NewWidth * ElCountIO ]);
5106  }
5107 
5108  ThreadPool.startAllWorkloads();
5109  td[ 0 ].processScanlineQueue();
5110  ThreadPool.waitAllWorkloadsToFinish();
5111  }
5112 
5113  ThreadPool.removeAllWorkloads();
5114  }
5115 
5116 private:
5117  typedef typename fpclass :: fptype fptype;
5118  typedef typename fpclass :: CFilterStep CFilterStep;
5121  typedef typename fpclass :: CDitherer CDitherer;
5124  CImageResizerParams Params;
5127  int SrcBitDepth;
5129  int ResBitDepth;
5131  CDSPFracFilterBankLin< fptype > FixedFilterBank;
5133 
5143  typedef CStructArray< CFilterStep > CFilterSteps;
5144 
5157  void initFilterBank( CDSPFracFilterBankLin< fptype >& FltBank,
5158  const double CutoffMult, const bool ForceHiOrder,
5159  const CFltBuffer& ExtFilter ) const
5160  {
5161  const int IntBitDepth = ( ResBitDepth > SrcBitDepth ? ResBitDepth :
5162  SrcBitDepth );
5163 
5164  const double SNR = -6.02 * ( IntBitDepth + 3 );
5165  int UseOrder;
5166  int FracCount; // The number of fractional delay filters sampled by
5167  // the filter bank. This variable affects the signal-to-noise
5168  // ratio at interpolation stage. Theoretically, at UseOrder==1,
5169  // 8-bit image resizing requires 66.2 dB SNR or 11. 16-bit
5170  // resizing requires 114.4 dB SNR or 150. At UseOrder=0 the
5171  // required number of filters is exponentially higher.
5172 
5173  if( ForceHiOrder || IntBitDepth > 8 )
5174  {
5175  UseOrder = 1; // -146 dB max
5176  FracCount = (int) ceil( 0.23134052 * exp( -0.058062929 * SNR ));
5177  }
5178  else
5179  {
5180  UseOrder = 0; // -72 dB max
5181  FracCount = (int) ceil( 0.33287686 * exp( -0.11334583 * SNR ));
5182  }
5183 
5184  if( FracCount < 2 )
5185  {
5186  FracCount = 2;
5187  }
5188 
5189  FltBank.init( FracCount, UseOrder, Params.IntFltLen / CutoffMult,
5190  Params.IntFltCutoff * CutoffMult, Params.IntFltAlpha, ExtFilter,
5191  fpclass :: fpalign, fpclass :: elalign );
5192  }
5193 
5207  static void allocFilter( CBuffer< fptype >& Flt, const int ReqCapacity,
5208  const bool IsModel = false, int* const FltExt = NULL )
5209  {
5210  int UseCapacity = ( ReqCapacity + fpclass :: elalign - 1 ) &
5211  ~( fpclass :: elalign - 1 );
5212 
5213  int Ext = UseCapacity - ReqCapacity;
5214 
5215  if( FltExt != NULL )
5216  {
5217  *FltExt = Ext;
5218  }
5219 
5220  if( IsModel )
5221  {
5222  Flt.forceCapacity( UseCapacity );
5223  return;
5224  }
5225 
5226  Flt.alloc( UseCapacity, fpclass :: fpalign );
5227 
5228  while( Ext > 0 )
5229  {
5230  Ext--;
5231  Flt[ ReqCapacity + Ext ] = (fptype) 0;
5232  }
5233  }
5234 
5256  void assignFilterParams( CFilterStep& fs, const bool IsUpsample,
5257  const int ResampleFactor, const double FltCutoff, const double DCGain,
5258  const bool UseFltOrig, const bool IsModel ) const
5259  {
5260  double FltAlpha;
5261  double Len2;
5262  double Freq;
5263 
5264  if( FltCutoff == 0.0 )
5265  {
5266  const double m = 2.0 / ResampleFactor;
5267  FltAlpha = Params.HBFltAlpha;
5268  Len2 = 0.5 * Params.HBFltLen / m;
5269  Freq = AVIR_PI * Params.HBFltCutoff * m;
5270  }
5271  else
5272  {
5273  FltAlpha = Params.LPFltAlpha;
5274  Len2 = 0.25 * Params.LPFltBaseLen / FltCutoff;
5275  Freq = AVIR_PI * Params.LPFltCutoffMult * FltCutoff;
5276  }
5277 
5278  if( IsUpsample )
5279  {
5280  Len2 *= ResampleFactor;
5281  Freq /= ResampleFactor;
5282  fs.DCGain = DCGain * ResampleFactor;
5283  }
5284  else
5285  {
5286  fs.DCGain = DCGain;
5287  }
5288 
5289  fs.FltOrig.Len2 = Len2;
5290  fs.FltOrig.Freq = Freq;
5291  fs.FltOrig.Alpha = FltAlpha;
5292  fs.FltOrig.DCGain = fs.DCGain;
5293 
5294  CDSPPeakedCosineLPF w( Len2, Freq, FltAlpha );
5295 
5296  fs.IsUpsample = IsUpsample;
5297  fs.ResampleFactor = ResampleFactor;
5298  fs.FltLatency = w.fl2;
5299 
5300  int FltExt; // Filter's extension due to fpclass :: elalign.
5301 
5302  if( IsModel )
5303  {
5304  allocFilter( fs.Flt, w.FilterLen, true, &FltExt );
5305 
5306  if( UseFltOrig )
5307  {
5308  // Allocate a real buffer even in modeling mode since this
5309  // filter may be copied by the filter bank.
5310 
5311  fs.FltOrig.alloc( w.FilterLen );
5312  memset( &fs.FltOrig[ 0 ], 0,
5313  w.FilterLen * sizeof( fs.FltOrig[ 0 ]));
5314  }
5315  }
5316  else
5317  {
5318  fs.FltOrig.alloc( w.FilterLen );
5319 
5320  w.generateLPF( &fs.FltOrig[ 0 ], fs.DCGain );
5321 
5322  allocFilter( fs.Flt, fs.FltOrig.getCapacity(), false, &FltExt );
5323  copyArray( &fs.FltOrig[ 0 ], &fs.Flt[ 0 ],
5324  fs.FltOrig.getCapacity() );
5325 
5326  if( !UseFltOrig )
5327  {
5328  fs.FltOrig.free();
5329  }
5330  }
5331 
5332  if( IsUpsample )
5333  {
5334  int l = fs.Flt.getCapacity() - fs.FltLatency - ResampleFactor -
5335  FltExt;
5336 
5337  allocFilter( fs.PrefixDC, l, IsModel );
5338  allocFilter( fs.SuffixDC, fs.FltLatency, IsModel );
5339 
5340  if( IsModel )
5341  {
5342  return;
5343  }
5344 
5345  // Create prefix and suffix "tails" used during upsampling.
5346 
5347  const fptype* ip = &fs.Flt[ fs.FltLatency + ResampleFactor ];
5348  copyArray( ip, &fs.PrefixDC[ 0 ], l );
5349 
5350  while( true )
5351  {
5352  ip += ResampleFactor;
5353  l -= ResampleFactor;
5354 
5355  if( l <= 0 )
5356  {
5357  break;
5358  }
5359 
5360  addArray( ip, &fs.PrefixDC[ 0 ], l );
5361  }
5362 
5363  l = fs.FltLatency;
5364  fptype* op = &fs.SuffixDC[ 0 ];
5365  copyArray( &fs.Flt[ 0 ], op, l );
5366 
5367  while( true )
5368  {
5369  op += ResampleFactor;
5370  l -= ResampleFactor;
5371 
5372  if( l <= 0 )
5373  {
5374  break;
5375  }
5376 
5377  addArray( &fs.Flt[ 0 ], op, l );
5378  }
5379  }
5380  else
5381  if( !UseFltOrig )
5382  {
5383  fs.EdgePixelCount = fs.EdgePixelCountDef;
5384  }
5385  }
5386 
5408  void addCorrectionFilter( CFilterSteps& Steps, const double bw,
5409  const bool IsPreCorrection, const bool IsModel ) const
5410  {
5411  CFilterStep& fs = ( IsPreCorrection ? Steps[ 0 ] : Steps.add() );
5412  fs.IsUpsample = false;
5413  fs.ResampleFactor = 1;
5414  fs.DCGain = 1.0;
5415  fs.EdgePixelCount = ( IsPreCorrection ? fs.EdgePixelCountDef : 0 );
5416 
5417  if( IsModel )
5418  {
5419  allocFilter( fs.Flt, CDSPFIREQ :: calcFilterLength(
5420  Params.CorrFltLen, fs.FltLatency ), true );
5421 
5422  return;
5423  }
5424 
5425  const int BinCount = 65; // Frequency response bins to control.
5426  const int BinCount1 = BinCount - 1;
5427  double curbw = 1.0; // Bandwidth of the filter at the current step.
5428  int i;
5429  int j;
5430  double re;
5431  double im;
5432 
5433  CBuffer< double > Bins( BinCount ); // Adjustment introduced by all
5434  // steps at all frequencies of interest.
5435 
5436  for( j = 0; j < BinCount; j++ )
5437  {
5438  Bins[ j ] = 1.0;
5439  }
5440 
5441  const int si = ( IsPreCorrection ? 1 : 0 );
5442 
5443  for( i = si; i < Steps.getItemCount() - ( si ^ 1 ); i++ )
5444  {
5445  const CFilterStep& fs = Steps[ i ];
5446 
5447  if( fs.IsUpsample )
5448  {
5449  curbw *= fs.ResampleFactor;
5450 
5451  if( fs.FltOrig.getCapacity() > 0 )
5452  {
5453  continue;
5454  }
5455  }
5456 
5457  const fptype* Flt;
5458  int FltLen;
5459 
5460  if( fs.ResampleFactor == 0 )
5461  {
5462  Flt = fs.FltBank -> getFilter( 0 );
5463  FltLen = fs.FltBank -> getFilterLen();
5464  }
5465  else
5466  {
5467  Flt = &fs.Flt[ 0 ];
5468  FltLen = fs.Flt.getCapacity();
5469  }
5470 
5471  // Calculate frequency response adjustment introduced by the
5472  // filter at this step, within the bounds of bandwidth of
5473  // interest.
5474 
5475  const double thm = AVIR_PI * bw / ( curbw * BinCount1 );
5476 
5477  for( j = 0; j < BinCount; j++ )
5478  {
5479  calcFIRFilterResponse( Flt, FltLen, j * thm, re, im );
5480 
5481  Bins[ j ] *= fs.DCGain / sqrt( re * re + im * im );
5482  }
5483 
5484  if( !fs.IsUpsample && fs.ResampleFactor > 1 )
5485  {
5486  curbw /= fs.ResampleFactor;
5487  }
5488  }
5489 
5490  // Calculate filter.
5491 
5492  CDSPFIREQ EQ;
5493  EQ.init( bw * 2.0, Params.CorrFltLen, BinCount, 0.0, bw, false,
5494  Params.CorrFltAlpha );
5495 
5496  fs.FltLatency = EQ.getFilterLatency();
5497 
5498  CBuffer< double > Filter( EQ.getFilterLength() );
5499  EQ.buildFilter( Bins, &Filter[ 0 ]);
5500  normalizeFIRFilter( &Filter[ 0 ], Filter.getCapacity(), 1.0 );
5501 
5502  allocFilter( fs.Flt, Filter.getCapacity() );
5503  copyArray( &Filter[ 0 ], &fs.Flt[ 0 ], Filter.getCapacity() );
5504 
5505  // Print a theoretically achieved final frequency response at various
5506  // feature sizes (from DC to 1 pixel). Values above 255 means features
5507  // become brighter, values below 255 means features become dimmer.
5508 
5509 /* const double sbw = ( bw > 1.0 ? 1.0 / bw : 1.0 );
5510 
5511  for( j = 0; j < BinCount; j++ )
5512  {
5513  const double th = AVIR_PI * sbw * j / BinCount1;
5514 
5515  calcFIRFilterResponse( &fs.Flt[ 0 ], fs.Flt.getCapacity(),
5516  th, re, im );
5517 
5518  printf( "%f\n", sqrt( re * re + im * im ) / Bins[ j ] * 255 );
5519  }
5520 
5521  printf( "***\n" );*/
5522  }
5523 
5541  static void addSharpenTest( CFilterSteps& Steps, const double bw,
5542  const bool IsModel )
5543  {
5544  if( bw <= 1.0 )
5545  {
5546  return;
5547  }
5548 
5549  const double FltLen = 10.0 * bw;
5550 
5551  CFilterStep& fs = Steps.add();
5552  fs.IsUpsample = false;
5553  fs.ResampleFactor = 1;
5554  fs.DCGain = 1.0;
5555  fs.EdgePixelCount = 0;
5556 
5557  if( IsModel )
5558  {
5559  allocFilter( fs.Flt, CDSPFIREQ :: calcFilterLength( FltLen,
5560  fs.FltLatency ), true );
5561 
5562  return;
5563  }
5564 
5565  const int BinCount = 200;
5566  CBuffer< double > Bins( BinCount );
5567  int Thresh = (int) round( BinCount / bw * 1.75 );
5568 
5569  if( Thresh > BinCount )
5570  {
5571  Thresh = BinCount;
5572  }
5573 
5574  int j;
5575 
5576  for( j = 0; j < Thresh; j++ )
5577  {
5578  Bins[ j ] = 1.0;
5579  }
5580 
5581  for( j = Thresh; j < BinCount; j++ )
5582  {
5583  Bins[ j ] = 256.0;
5584  }
5585 
5586  CDSPFIREQ EQ;
5587  EQ.init( bw * 2.0, FltLen, BinCount, 0.0, bw, false, 1.7 );
5588 
5589  fs.FltLatency = EQ.getFilterLatency();
5590 
5591  CBuffer< double > Filter( EQ.getFilterLength() );
5592  EQ.buildFilter( Bins, &Filter[ 0 ]);
5593  normalizeFIRFilter( &Filter[ 0 ], Filter.getCapacity(), 1.0 );
5594 
5595  allocFilter( fs.Flt, Filter.getCapacity() );
5596  copyArray( &Filter[ 0 ], &fs.Flt[ 0 ], Filter.getCapacity() );
5597 
5598 /* for( j = 0; j < BinCount; j++ )
5599  {
5600  const double th = AVIR_PI * j / ( BinCount - 1 );
5601  double re;
5602  double im;
5603 
5604  calcFIRFilterResponse( &fs.Flt[ 0 ], fs.Flt.getCapacity(),
5605  th, re, im );
5606 
5607  printf( "%f\n", sqrt( re * re + im * im ));
5608  }
5609 
5610  printf( "***\n" );*/
5611  }
5612 
5629  void buildFilterSteps( CFilterSteps& Steps, CImageResizerVars& Vars,
5630  CDSPFracFilterBankLin< fptype >& FltBank, const double DCGain,
5631  const int ModeFlags, const bool IsModel ) const
5632  {
5633  Steps.clear();
5634 
5635  const bool DoFltAndIntCombo = (( ModeFlags & 1 ) != 0 ); // Do filter
5636  // and interpolator combining.
5637  const bool ForceHiOrderInt = (( ModeFlags & 2 ) != 0 ); // Force use
5638  // of a higher-order interpolation.
5639  const bool UseHalfband = (( ModeFlags & 4 ) != 0 ); // Use half-band
5640  // filter.
5641 
5642  const double bw = 1.0 / Vars.k; // Resulting bandwidth.
5643  const int UpsampleFactor = ( (int) floor( Vars.k ) < 2 ? 2 : 1 );
5644  double IntCutoffMult; // Interpolation filter cutoff multiplier.
5645  CFilterStep* ReuseStep; // If not NULL, resizing step should use
5646  // this step object instead of creating a new one.
5647  CFilterStep* ExtFltStep; // Use FltOrig of this step as the external
5648  // filter to applied to the interpolator.
5649  bool IsPreCorrection; // "True" if the correction filter is applied
5650  // first.
5651  double FltCutoff; // Cutoff frequency of the first filtering step.
5652  double corrbw;
5653 
5654  if( Vars.k <= 1.0 )
5655  {
5656  IsPreCorrection = true;
5657  FltCutoff = 1.0;
5658  corrbw = 1.0;
5659  Steps.add();
5660  }
5661  else
5662  {
5663  IsPreCorrection = false;
5664  FltCutoff = bw;
5665  corrbw = bw;
5666  }
5667 
5668  // Add 1 upsampling or several downsampling filters.
5669 
5670  if( UpsampleFactor > 1 )
5671  {
5672  CFilterStep& fs = Steps.add();
5673  assignFilterParams( fs, true, UpsampleFactor, FltCutoff, DCGain,
5674  DoFltAndIntCombo, IsModel );
5675 
5676  IntCutoffMult = FltCutoff * 2.0 / UpsampleFactor;
5677  ReuseStep = NULL;
5678  ExtFltStep = ( DoFltAndIntCombo ? &fs : NULL );
5679  }
5680  else
5681  {
5682  int DownsampleFactor;
5683 
5684  while( true )
5685  {
5686  DownsampleFactor = (int) floor( 0.5 / FltCutoff );
5687  bool DoHBFltAdd = ( UseHalfband && DownsampleFactor > 1 );
5688 
5689  if( DoHBFltAdd )
5690  {
5691  assignFilterParams( Steps.add(), false, DownsampleFactor,
5692  0.0, 1.0, false, IsModel );
5693 
5694  FltCutoff *= DownsampleFactor;
5695  }
5696  else
5697  {
5698  if( DownsampleFactor < 1 )
5699  {
5700  DownsampleFactor = 1;
5701  }
5702 
5703  break;
5704  }
5705  }
5706 
5707  CFilterStep& fs = Steps.add();
5708  assignFilterParams( fs, false, DownsampleFactor, FltCutoff,
5709  DCGain, DoFltAndIntCombo, IsModel );
5710 
5711  IntCutoffMult = FltCutoff / 0.5;
5712 
5713  if( DoFltAndIntCombo )
5714  {
5715  ReuseStep = &fs;
5716  ExtFltStep = &fs;
5717  }
5718  else
5719  {
5720  IntCutoffMult *= DownsampleFactor;
5721  ReuseStep = NULL;
5722  ExtFltStep = NULL;
5723  }
5724  }
5725 
5726  // Insert resizing and correction steps.
5727 
5728  CFilterStep& fs = ( ReuseStep == NULL ? Steps.add() : *ReuseStep );
5729 
5730  Vars.ResizeStep = Steps.getItemCount() - 1;
5731  fs.IsUpsample = false;
5732  fs.ResampleFactor = 0;
5733  fs.DCGain = ( ExtFltStep == NULL ? 1.0 : ExtFltStep -> DCGain );
5734 
5735  initFilterBank( FltBank, IntCutoffMult, ForceHiOrderInt,
5736  ( ExtFltStep == NULL ? fs.FltOrig : ExtFltStep -> FltOrig ));
5737 
5738  if( FltBank == FixedFilterBank )
5739  {
5740  fs.FltBank = (CDSPFracFilterBankLin< fptype >*) &FixedFilterBank;
5741  }
5742  else
5743  {
5744  fs.FltBank = &FltBank;
5745  }
5746 
5747  addCorrectionFilter( Steps, corrbw, IsPreCorrection, IsModel );
5748 
5749  //addSharpenTest( Steps, bw, IsModel );
5750  }
5751 
5762  static void extendUpsample( CFilterStep& fs, CFilterStep& NextStep )
5763  {
5764  fs.InPrefix = ( NextStep.InPrefix + fs.ResampleFactor - 1 ) /
5765  fs.ResampleFactor;
5766 
5767  fs.OutPrefix += fs.InPrefix * fs.ResampleFactor;
5768  NextStep.InPrefix = 0;
5769 
5770  fs.InSuffix = ( NextStep.InSuffix + fs.ResampleFactor - 1 ) /
5771  fs.ResampleFactor;
5772 
5773  fs.OutSuffix += fs.InSuffix * fs.ResampleFactor;
5774  NextStep.InSuffix = 0;
5775  }
5776 
5791  static void fillRPosBuf( CFilterStep& fs, const CImageResizerVars& Vars )
5792  {
5793  const int PrevLen = fs.RPosBuf -> getCapacity();
5794 
5795  if( fs.OutLen > PrevLen )
5796  {
5797  fs.RPosBuf -> increaseCapacity( fs.OutLen );
5798  }
5799 
5800  typename CFilterStep :: CResizePos* rpos = &(*fs.RPosBuf)[ PrevLen ];
5801  const int FracCount = fs.FltBank -> getFracCount();
5802  const double o = Vars.o;
5803  const double k = Vars.k;
5804  int i;
5805 
5806  for( i = PrevLen; i < fs.OutLen; i++ )
5807  {
5808  const double SrcPos = o + k * i;
5809  const int SrcPosInt = (int) floor( SrcPos );
5810  const double x = ( SrcPos - SrcPosInt ) * FracCount;
5811  const int fti = (int) x;
5812  rpos -> x = (typename fpclass :: fptypeatom) ( x - fti );
5813  rpos -> fti = fti;
5814  rpos -> SrcPosInt = SrcPosInt;
5815  rpos++;
5816  }
5817  }
5818 
5835  static void updateFilterStepBuffers( CFilterSteps& Steps,
5836  CImageResizerVars& Vars,
5837  typename CFilterStep :: CRPosBufArray& RPosBufArray, int SrcLen,
5838  const int NewLen )
5839  {
5840  int upstep = -1;
5841  int InBuf = 0;
5842  int i;
5843 
5844  for( i = 0; i < Steps.getItemCount(); i++ )
5845  {
5846  CFilterStep& fs = Steps[ i ];
5847 
5848  fs.Vars = &Vars;
5849  fs.InLen = SrcLen;
5850  fs.InBuf = InBuf;
5851  fs.OutBuf = ( InBuf + 1 ) & 1;
5852 
5853  if( fs.IsUpsample )
5854  {
5855  upstep = i;
5856  Vars.k *= fs.ResampleFactor;
5857  Vars.o *= fs.ResampleFactor;
5858  fs.InPrefix = 0;
5859  fs.InSuffix = 0;
5860  fs.OutLen = fs.InLen * fs.ResampleFactor;
5861  fs.OutPrefix = fs.FltLatency;
5862  fs.OutSuffix = fs.Flt.getCapacity() - fs.FltLatency -
5863  fs.ResampleFactor;
5864 
5865  int l0 = fs.OutPrefix + fs.OutLen + fs.OutSuffix;
5866  int l = fs.InLen * fs.ResampleFactor +
5867  fs.SuffixDC.getCapacity();
5868 
5869  if( l > l0 )
5870  {
5871  fs.OutSuffix += l - l0;
5872  }
5873 
5874  l0 = fs.OutLen + fs.OutSuffix;
5875 
5876  if( fs.PrefixDC.getCapacity() > l0 )
5877  {
5878  fs.OutSuffix += fs.PrefixDC.getCapacity() - l0;
5879  }
5880  }
5881  else
5882  if( fs.ResampleFactor == 0 )
5883  {
5884  const int FilterLenD2 = fs.FltBank -> getFilterLen() / 2;
5885  const int FilterLenD21 = FilterLenD2 - 1;
5886 
5887  const int ResizeLPix = (int) floor( Vars.o ) - FilterLenD21;
5888  fs.InPrefix = ( ResizeLPix < 0 ? -ResizeLPix : 0 );
5889  const int ResizeRPix = (int) floor( Vars.o +
5890  ( NewLen - 1 ) * Vars.k ) + FilterLenD2 + 1;
5891 
5892  fs.InSuffix = ( ResizeRPix > fs.InLen ?
5893  ResizeRPix - fs.InLen : 0 );
5894 
5895  fs.OutLen = NewLen;
5896  fs.RPosBuf = &RPosBufArray.getRPosBuf( Vars.k, Vars.o,
5897  fs.FltBank -> getFracCount() );
5898 
5899  fillRPosBuf( fs, Vars );
5900  }
5901  else
5902  {
5903  Vars.k /= fs.ResampleFactor;
5904  Vars.o /= fs.ResampleFactor;
5905  Vars.o += fs.EdgePixelCount;
5906 
5907  fs.InPrefix = fs.FltLatency;
5908  fs.InSuffix = fs.Flt.getCapacity() - fs.FltLatency - 1;
5909 
5910  // Additionally extend OutLen to produce more precise edge
5911  // pixels.
5912 
5913  fs.OutLen = ( fs.InLen + fs.ResampleFactor - 1 ) /
5914  fs.ResampleFactor + fs.EdgePixelCount;
5915 
5916  fs.InSuffix += ( fs.OutLen - 1 ) * fs.ResampleFactor + 1 -
5917  fs.InLen;
5918 
5919  fs.InPrefix += fs.EdgePixelCount * fs.ResampleFactor;
5920  fs.OutLen += fs.EdgePixelCount;
5921  }
5922 
5923  InBuf = fs.OutBuf;
5924  SrcLen = fs.OutLen;
5925  }
5926 
5927  Steps[ Steps.getItemCount() - 1 ].OutBuf = 2;
5928  Vars.IsResize2 = false;
5929 
5930  if( upstep != -1 )
5931  {
5932  extendUpsample( Steps[ upstep ], Steps[ upstep + 1 ]);
5933 
5934  if( Steps[ upstep ].ResampleFactor == 2 &&
5935  Vars.ResizeStep == upstep + 1 &&
5936  fpclass :: packmode == 0 &&
5937  Steps[ upstep ].FltOrig.getCapacity() > 0 )
5938  {
5939  // Interpolation with preceeding 2x filterless upsample,
5940  // interleaved resizing only.
5941 
5942  Vars.IsResize2 = true;
5943  }
5944  }
5945  }
5946 
5963  static void updateBufLenAndRPosPtrs( CFilterSteps& Steps,
5964  CImageResizerVars& Vars, const int ResElIncr )
5965  {
5966  int MaxPrefix[ 2 ] = { 0, 0 };
5967  int MaxLen[ 2 ] = { 0, 0 };
5968  int i;
5969 
5970  for( i = 0; i < Steps.getItemCount(); i++ )
5971  {
5972  CFilterStep& fs = Steps[ i ];
5973  const int ib = fs.InBuf;
5974 
5975  if( fs.InPrefix > MaxPrefix[ ib ])
5976  {
5977  MaxPrefix[ ib ] = fs.InPrefix;
5978  }
5979 
5980  int l = fs.InLen + fs.InSuffix;
5981 
5982  if( l > MaxLen[ ib ])
5983  {
5984  MaxLen[ ib ] = l;
5985  }
5986 
5987  fs.InElIncr = fs.InPrefix + l;
5988 
5989  if( fs.OutBuf == 2 )
5990  {
5991  break;
5992  }
5993 
5994  const int ob = fs.OutBuf;
5995 
5996  if( fs.IsUpsample )
5997  {
5998  if( fs.OutPrefix > MaxPrefix[ ob ])
5999  {
6000  MaxPrefix[ ob ] = fs.OutPrefix;
6001  }
6002 
6003  l = fs.OutLen + fs.OutSuffix;
6004 
6005  if( l > MaxLen[ ob ])
6006  {
6007  MaxLen[ ob ] = l;
6008  }
6009  }
6010  else
6011  {
6012  if( fs.OutLen > MaxLen[ ob ])
6013  {
6014  MaxLen[ ob ] = fs.OutLen;
6015  }
6016  }
6017  }
6018 
6019  // Update OutElIncr values of all steps.
6020 
6021  for( i = 0; i < Steps.getItemCount(); i++ )
6022  {
6023  CFilterStep& fs = Steps[ i ];
6024 
6025  if( fs.OutBuf == 2 )
6026  {
6027  fs.OutElIncr = ResElIncr;
6028  break;
6029  }
6030 
6031  CFilterStep& fs2 = Steps[ i + 1 ];
6032 
6033  if( fs.IsUpsample )
6034  {
6035  fs.OutElIncr = fs.OutPrefix + fs.OutLen + fs.OutSuffix;
6036 
6037  if( fs.OutElIncr > fs2.InElIncr )
6038  {
6039  fs2.InElIncr = fs.OutElIncr;
6040  }
6041  else
6042  {
6043  fs.OutElIncr = fs2.InElIncr;
6044  }
6045  }
6046  else
6047  {
6048  fs.OutElIncr = fs2.InElIncr;
6049  }
6050  }
6051 
6052  // Update temporary buffer's length.
6053 
6054  for( i = 0; i < 2; i++ )
6055  {
6056  Vars.BufLen[ i ] = MaxPrefix[ i ] + MaxLen[ i ];
6057  Vars.BufOffs[ i ] = MaxPrefix[ i ];
6058 
6059  if( Vars.packmode == 0 )
6060  {
6061  Vars.BufOffs[ i ] *= Vars.ElCount;
6062  }
6063 
6064  Vars.BufLen[ i ] *= Vars.ElCount;
6065  }
6066 
6067  // Update RPosBuf pointers and SrcOffs.
6068 
6069  CFilterStep& fs = Steps[ Vars.ResizeStep ];
6070  typename CFilterStep :: CResizePos* rpos = &(*fs.RPosBuf)[ 0 ];
6071  const int em = ( fpclass :: packmode == 0 ? Vars.ElCount : 1 );
6072  const int fl = fs.FltBank -> getFilterLen();
6073  const int FilterLenD21 = fl / 2 - 1;
6074 
6075  if( Vars.IsResize2 )
6076  {
6077  for( i = 0; i < fs.OutLen; i++ )
6078  {
6079  const int p = rpos -> SrcPosInt - FilterLenD21;
6080  const int fo = p & 1;
6081  rpos -> SrcOffs = ( p + fo ) * em;
6082  rpos -> ftp = fs.FltBank -> getFilter( rpos -> fti ) + fo;
6083  rpos -> fl = fl - fo;
6084  rpos++;
6085  }
6086  }
6087  else
6088  {
6089  for( i = 0; i < fs.OutLen; i++ )
6090  {
6091  rpos -> SrcOffs = ( rpos -> SrcPosInt - FilterLenD21 ) * em;
6092  rpos -> ftp = fs.FltBank -> getFilter( rpos -> fti );
6093  rpos++;
6094  }
6095  }
6096  }
6097 
6106  void modifyCorrFilterDCGain( CFilterSteps& Steps, const double m ) const
6107  {
6108  CBuffer< fptype >* Flt;
6109  const int z = Steps.getItemCount() - 1;
6110 
6111  if( !Steps[ z ].IsUpsample && Steps[ z ].ResampleFactor == 1 )
6112  {
6113  Flt = &Steps[ z ].Flt;
6114  }
6115  else
6116  {
6117  Flt = &Steps[ 0 ].Flt;
6118  }
6119 
6120  int i;
6121 
6122  for( i = 0; i < Flt -> getCapacity(); i++ )
6123  {
6124  (*Flt)[ i ] = (fptype) ( (double) (*Flt)[ i ] * m );
6125  }
6126  }
6127 
6136  static void fillUsedFracMap( const CFilterStep& fs,
6137  CBuffer< uint8_t >& UsedFracMap )
6138  {
6139  const int FracCount = fs.FltBank -> getFracCount();
6140  UsedFracMap.increaseCapacity( FracCount, false );
6141  memset( &UsedFracMap[ 0 ], 0, FracCount * sizeof( UsedFracMap[ 0 ]));
6142 
6143  typename CFilterStep :: CResizePos* rpos = &(*fs.RPosBuf)[ 0 ];
6144  int i;
6145 
6146  for( i = 0; i < fs.OutLen; i++ )
6147  {
6148  UsedFracMap[ rpos -> fti ] |= 1;
6149  rpos++;
6150  }
6151  }
6152 
6173  static int calcComplexity( const CFilterSteps& Steps,
6174  const CImageResizerVars& Vars, const CBuffer< uint8_t >& UsedFracMap,
6175  const int ScanlineCount )
6176  {
6177  int fcnum; // Filter complexity multiplier numerator.
6178  int fcdenom; // Filter complexity multiplier denominator.
6179 
6180  if( Vars.packmode != 0 )
6181  {
6182  fcnum = 1;
6183  fcdenom = 1;
6184  }
6185  else
6186  {
6187  // In interleaved processing mode, filters require 1 less
6188  // multiplication per 2 multiply-add instructions.
6189 
6190  fcnum = 3;
6191  fcdenom = 4;
6192  }
6193 
6194  int s = 0; // Complexity per one scanline.
6195  int s2 = 0; // Complexity per all scanlines.
6196  int i;
6197 
6198  for( i = 0; i < Steps.getItemCount(); i++ )
6199  {
6200  const CFilterStep& fs = Steps[ i ];
6201 
6202  s2 += 65 * fs.Flt.getCapacity(); // Filter creation complexity.
6203 
6204  if( fs.IsUpsample )
6205  {
6206  if( fs.FltOrig.getCapacity() > 0 )
6207  {
6208  continue;
6209  }
6210 
6211  s += ( fs.Flt.getCapacity() *
6212  ( fs.InPrefix + fs.InLen + fs.InSuffix ) +
6213  fs.SuffixDC.getCapacity() + fs.PrefixDC.getCapacity() ) *
6214  Vars.ElCount;
6215  }
6216  else
6217  if( fs.ResampleFactor == 0 )
6218  {
6219  s += fs.FltBank -> getFilterLen() *
6220  ( fs.FltBank -> getOrder() + Vars.ElCount ) * fs.OutLen;
6221 
6222  if( i == Vars.ResizeStep && Vars.IsResize2 )
6223  {
6224  s >>= 1;
6225  }
6226 
6227  s2 += fs.FltBank -> calcInitComplexity( UsedFracMap );
6228  }
6229  else
6230  {
6231  s += fs.Flt.getCapacity() * Vars.ElCount * fs.OutLen *
6232  fcnum / fcdenom;
6233  }
6234  }
6235 
6236  return( s + s2 / ScanlineCount );
6237  }
6238 
6251  template< class Tin, class Tout >
6252  class CThreadData : public CImageResizerThreadPool :: CWorkload
6253  {
6254  public:
6255  virtual void process()
6256  {
6257  processScanlineQueue();
6258  }
6259 
6264  enum EScanlineOperation
6265  {
6266  sopResizeH,
6267  sopResizeV,
6269  sopDitherAndUnpackH,
6271  sopUnpackH
6273  };
6275 
6286  void init( const int aThreadIndex, const int aThreadCount,
6287  const CFilterSteps& aSteps, const CImageResizerVars& aVars )
6288  {
6289  ThreadIndex = aThreadIndex;
6290  ThreadCount = aThreadCount;
6291  Steps = &aSteps;
6292  Vars = &aVars;
6293  }
6294 
6309  void initScanlineQueue( const EScanlineOperation aOp,
6310  const int TotalLines, const int aSrcLen, const int aSrcIncr = 0,
6311  const int aResIncr = 0 )
6312  {
6313  const int l = Vars -> BufLen[ 0 ] + Vars -> BufLen[ 1 ];
6314 
6315  if( Bufs.getCapacity() < l )
6316  {
6317  Bufs.alloc( l, fpclass :: fpalign );
6318  }
6319 
6320  BufPtrs[ 0 ] = Bufs + Vars -> BufOffs[ 0 ];
6321  BufPtrs[ 1 ] = Bufs + Vars -> BufLen[ 0 ] + Vars -> BufOffs[ 1 ];
6322 
6323  int j;
6324  int ml = 0;
6325 
6326  for( j = 0; j < Steps -> getItemCount(); j++ )
6327  {
6328  const CFilterStep& fs = (*Steps)[ j ];
6329 
6330  if( fs.ResampleFactor == 0 &&
6331  ml < fs.FltBank -> getFilterLen() )
6332  {
6333  ml = fs.FltBank -> getFilterLen();
6334  }
6335  }
6336 
6337  TmpFltBuf.alloc( ml, fpclass :: fpalign );
6338  ScanlineOp = aOp;
6339  SrcLen = aSrcLen;
6340  SrcIncr = aSrcIncr;
6341  ResIncr = aResIncr;
6342  QueueLen = 0;
6343  Queue.increaseCapacity(( TotalLines + ThreadCount - 1 ) /
6344  ThreadCount, false );
6345  }
6346 
6357  void addScanlineToQueue( void* const SrcBuf, void* const ResBuf )
6358  {
6359  Queue[ QueueLen ].SrcBuf = SrcBuf;
6360  Queue[ QueueLen ].ResBuf = ResBuf;
6361  QueueLen++;
6362  }
6363 
6368  void processScanlineQueue()
6369  {
6370  int i;
6371 
6372  switch( ScanlineOp )
6373  {
6374  case sopResizeH:
6375  {
6376  for( i = 0; i < QueueLen; i++ )
6377  {
6378  resizeScanlineH( (Tin*) Queue[ i ].SrcBuf,
6379  (fptype*) Queue[ i ].ResBuf );
6380  }
6381 
6382  break;
6383  }
6384 
6385  case sopResizeV:
6386  {
6387  for( i = 0; i < QueueLen; i++ )
6388  {
6389  resizeScanlineV( (fptype*) Queue[ i ].SrcBuf,
6390  (fptype*) Queue[ i ].ResBuf );
6391  }
6392 
6393  break;
6394  }
6395 
6396  case sopDitherAndUnpackH:
6397  {
6398  if( Vars -> UseSRGBGamma )
6399  {
6400  for( i = 0; i < QueueLen; i++ )
6401  {
6402  CFilterStep :: applySRGBGamma(
6403  (fptype*) Queue[ i ].SrcBuf, SrcLen, *Vars );
6404 
6405  Ditherer.dither( (fptype*) Queue[ i ].SrcBuf );
6406 
6407  CFilterStep :: unpackScanline(
6408  (fptype*) Queue[ i ].SrcBuf,
6409  (Tout*) Queue[ i ].ResBuf, SrcLen, *Vars );
6410  }
6411  }
6412  else
6413  {
6414  for( i = 0; i < QueueLen; i++ )
6415  {
6416  Ditherer.dither( (fptype*) Queue[ i ].SrcBuf );
6417 
6418  CFilterStep :: unpackScanline(
6419  (fptype*) Queue[ i ].SrcBuf,
6420  (Tout*) Queue[ i ].ResBuf, SrcLen, *Vars );
6421  }
6422  }
6423 
6424  break;
6425  }
6426 
6427  case sopUnpackH:
6428  {
6429  if( Vars -> UseSRGBGamma )
6430  {
6431  for( i = 0; i < QueueLen; i++ )
6432  {
6433  CFilterStep :: applySRGBGamma(
6434  (fptype*) Queue[ i ].SrcBuf, SrcLen, *Vars );
6435 
6436  CFilterStep :: unpackScanline(
6437  (fptype*) Queue[ i ].SrcBuf,
6438  (Tout*) Queue[ i ].ResBuf, SrcLen, *Vars );
6439  }
6440  }
6441  else
6442  {
6443  for( i = 0; i < QueueLen; i++ )
6444  {
6445  CFilterStep :: unpackScanline(
6446  (fptype*) Queue[ i ].SrcBuf,
6447  (Tout*) Queue[ i ].ResBuf, SrcLen, *Vars );
6448  }
6449  }
6450 
6451  break;
6452  }
6453  }
6454  }
6455 
6461  CDitherer& getDitherer()
6462  {
6463  return( Ditherer );
6464  }
6465 
6466  private:
6467  int ThreadIndex;
6468  int ThreadCount;
6470  const CFilterSteps* Steps;
6472  const CImageResizerVars* Vars;
6474  CBuffer< fptype > Bufs;
6476  fptype* BufPtrs[ 3 ];
6478  CBuffer< fptype > TmpFltBuf;
6481  EScanlineOperation ScanlineOp;
6484  int SrcLen;
6487  int SrcIncr;
6489  int ResIncr;
6491  CDitherer Ditherer;
6494 
6502  struct CQueueItem
6503  {
6504  void* SrcBuf;
6505  void* ResBuf;
6508  };
6511 
6512  CBuffer< CQueueItem > Queue;
6513  int QueueLen;
6515 
6525  void resizeScanlineH( const Tin* const SrcBuf, fptype* const ResBuf )
6526  {
6527  const CFilterStep& fs0 = (*Steps)[ 0 ];
6528 
6529  fs0.packScanline( SrcBuf, BufPtrs[ 0 ], SrcLen );
6530  BufPtrs[ 2 ] = ResBuf;
6531 
6532  fptype ElBiases[ 4 ];
6533  fs0.calcScanlineBias( BufPtrs[ 0 ], SrcLen, ElBiases );
6534  fs0.unbiasScanline( BufPtrs[ 0 ], SrcLen, ElBiases );
6535 
6536  int j;
6537 
6538  for( j = 0; j < Steps -> getItemCount(); j++ )
6539  {
6540  const CFilterStep& fs = (*Steps)[ j ];
6541  fs.prepareInBuf( BufPtrs[ fs.InBuf ]);
6542  const int DstIncr =
6543  ( Vars -> packmode == 0 ? Vars -> ElCount : 1 );
6544 
6545  if( fs.ResampleFactor != 0 )
6546  {
6547  if( fs.IsUpsample )
6548  {
6549  fs.doUpsample( BufPtrs[ fs.InBuf ],
6550  BufPtrs[ fs.OutBuf ]);
6551  }
6552  else
6553  {
6554  fs.doFilter( BufPtrs[ fs.InBuf ],
6555  BufPtrs[ fs.OutBuf ], DstIncr );
6556  }
6557  }
6558  else
6559  {
6560  if( Vars -> IsResize2 )
6561  {
6562  fs.doResize2( BufPtrs[ fs.InBuf ],
6563  BufPtrs[ fs.OutBuf ], DstIncr, ElBiases,
6564  TmpFltBuf );
6565  }
6566  else
6567  {
6568  fs.doResize( BufPtrs[ fs.InBuf ],
6569  BufPtrs[ fs.OutBuf ], DstIncr, ElBiases,
6570  TmpFltBuf );
6571  }
6572  }
6573  }
6574  }
6575 
6584  void resizeScanlineV( const fptype* const SrcBuf,
6585  fptype* const ResBuf )
6586  {
6587  const CFilterStep& fs0 = (*Steps)[ 0 ];
6588 
6589  fs0.convertVtoH( SrcBuf, BufPtrs[ 0 ], SrcLen, SrcIncr );
6590  BufPtrs[ 2 ] = ResBuf;
6591 
6592  fptype ElBiases[ 4 ];
6593  fs0.calcScanlineBias( BufPtrs[ 0 ], SrcLen, ElBiases );
6594  fs0.unbiasScanline( BufPtrs[ 0 ], SrcLen, ElBiases );
6595 
6596  int j;
6597 
6598  for( j = 0; j < Steps -> getItemCount(); j++ )
6599  {
6600  const CFilterStep& fs = (*Steps)[ j ];
6601  fs.prepareInBuf( BufPtrs[ fs.InBuf ]);
6602  const int DstIncr = ( fs.OutBuf == 2 ? ResIncr :
6603  ( Vars -> packmode == 0 ? Vars -> ElCount : 1 ));
6604 
6605  if( fs.ResampleFactor != 0 )
6606  {
6607  if( fs.IsUpsample )
6608  {
6609  fs.doUpsample( BufPtrs[ fs.InBuf ],
6610  BufPtrs[ fs.OutBuf ]);
6611  }
6612  else
6613  {
6614  fs.doFilter( BufPtrs[ fs.InBuf ],
6615  BufPtrs[ fs.OutBuf ], DstIncr );
6616  }
6617  }
6618  else
6619  {
6620  if( Vars -> IsResize2 )
6621  {
6622  fs.doResize2( BufPtrs[ fs.InBuf ],
6623  BufPtrs[ fs.OutBuf ], DstIncr, ElBiases,
6624  TmpFltBuf );
6625  }
6626  else
6627  {
6628  fs.doResize( BufPtrs[ fs.InBuf ],
6629  BufPtrs[ fs.OutBuf ], DstIncr, ElBiases,
6630  TmpFltBuf );
6631  }
6632  }
6633  }
6634  }
6635  };
6636 };
6637 
6638 #undef AVIR_PI
6639 #undef AVIR_PId2
6640 #undef AVIR_NOCTOR
6641 
6642 } // namespace avir
6643 
6644 #endif // AVIR_CIMAGERESIZER_INCLUDED
#define AVIR_PI
Definition: avir.h:75
int ElCountIO
Definition: avir.h:2354
double CorrFltAlpha
Definition: avir.h:2126
double ox
Definition: avir.h:2396
Thread pool for multi-threaded image resizing operation.
Definition: avir.h:1991
void packScanline(const Tin *ip, fptype *const op0, const int l0) const
Definition: avir.h:2681
fptypeatom x
Definition: avir.h:2547
double OutGammaMult
Definition: avir.h:2392
void prepareInBuf(fptype *Src) const
Definition: avir.h:3228
double LPFltBaseLen
Definition: avir.h:2154
void updateCapacity(const capint ReqCapacity)
Definition: avir.h:631
double k
Definition: avir.h:2565
Resizing position structure.
Definition: avir.h:2539
double Alpha
Definition: avir.h:1489
static const int fpalign
Definition: avir.h:4580
void init(const int aLen, const CImageResizerVars &aVars, const double aTrMul, const double aPkOut)
Definition: avir.h:4463
void init(const double SampleRate, const double aFilterLength, const int aBandCount, const double MinFreq, const double MaxFreq, const bool IsLogBands, const double WFAlpha)
Definition: avir.h:1008
int getItemCount() const
Definition: avir.h:849
int fl2
Definition: avir.h:1371
static int calcFilterLength(const double aFilterLength, int &Latency)
Definition: avir.h:1186
bool IsResize2
Definition: avir.h:2387
CBuffer(const capint aCapacity, const int aAlignment=0)
Definition: avir.h:481
static void applySRGBGamma(fptype *p, int l, const CImageResizerVars &Vars0)
Definition: avir.h:2853
int getOrder() const
Definition: avir.h:1678
Resizing algorithm parameters structure.
Definition: avir.h:2124
int BuildMode
Definition: avir.h:2408
void createAllFilters()
Definition: avir.h:1732
Sinc function-based fractional delay filter bank.
Definition: avir.h:1532
CBuffer< fptype > Flt
Definition: avir.h:2460
double k
Definition: avir.h:2378
void doResize2(const fptype *SrcLine, fptype *DstLine, const int DstLineIncr, const fptype *const ElBiases, fptype *const ) const
Definition: avir.h:4117
void free()
Definition: avir.h:532
Resizing positions buffer array class.
Definition: avir.h:2582
Memory buffer class for element array storage, with capacity tracking.
Definition: avir.h:462
int fpalign
Definition: avir.h:2360
adith CDitherer
Definition: avir.h:4597
static const int elalign
Definition: avir.h:4586
void increaseCapacity(const capint NewCapacity, const bool DoDataCopy=true)
Definition: avir.h:574
void doFilter(const fptype *const Src, fptype *Dst, const int DstIncr) const
Definition: avir.h:3748
CImageResizer(const int aResBitDepth=8, const int aSrcBitDepth=0, const CImageResizerParams &aParams=CImageResizerParamsDef())
Definition: avir.h:4637
CBuffer< fptype > ResScanlineDith0
Definition: avir.h:4531
int FracCount
Definition: avir.h:2569
int getFilterLength() const
Definition: avir.h:1094
int EdgePixelCount
Definition: avir.h:2520
bool IsUpsample
Definition: avir.h:2453
int RndSeed
Definition: avir.h:2412
Image resizing variables class.
Definition: avir.h:2349
int packmode
Definition: avir.h:2370
double DCGain
Definition: avir.h:2469
CDSPWindowGenPeakedCosine(const double aAlpha, const double aLen2)
Definition: avir.h:933
Set of resizing algorithm parameters for ultra low-aliasing resizing (13.68/1.79/1.000(521792.07)/0.000026).
Definition: avir.h:2324
int FilterLen
Definition: avir.h:1374
bool operator==(const CDSPFracFilterBankLin &s) const
Definition: avir.h:1585
double Len2
Definition: avir.h:1484
int ResizeStep
Definition: avir.h:2384
double o
Definition: avir.h:2567
double DCGain
Definition: avir.h:1491
int OutSuffix
Definition: avir.h:2504
double generate()
Definition: avir.h:946
static const int fppack
Definition: avir.h:4577
CSineGen(const double si, const double ph)
Definition: avir.h:880
CDSPPeakedCosineLPF(const double aLen2, const double aFreq2, const double aAlpha)
Definition: avir.h:1386
int getFilterLen() const
Definition: avir.h:1660
Set of resizing algorithm parameters for lower-ringing performance (9.21/1.91/1.040(391960.71)/0.000023).
Definition: avir.h:2274
int InElIncr
Definition: avir.h:2492
int FltLatency
Definition: avir.h:2472
void init(const int ReqFracCount, const int ReqOrder, const double BaseLen, const double Cutoff, const double aWFAlpha, const CFltBuffer &aExtFilter, const int aAlignment=0, const int FltLenAlign=1)
Definition: avir.h:1613
Buffer class for parametrized low-pass filter.
Definition: avir.h:1481
double HBFltAlpha
Definition: avir.h:2177
void calcScanlineBias(const fptype *p, const int SrcLen, fptype *const ElBiases) const
Definition: avir.h:3063
const CImageResizerVars * Vars
Definition: avir.h:2474
static const int EdgePixelCountDef
Definition: avir.h:2527
static bool isRecursive()
Definition: avir.h:4384
virtual int getSuggestedWorkloadCount() const
Definition: avir.h:2031
void truncateCapacity(const capint NewCapacity)
Definition: avir.h:612
double oy
Definition: avir.h:2399
Image resizer's default dithering class.
Definition: avir.h:4356
void copyInitParams(const CDSPFracFilterBankLin &s)
Definition: avir.h:1551
virtual void waitAllWorkloadsToFinish()
Definition: avir.h:2074
bool UseSRGBGamma
Definition: avir.h:2406
int SrcOffs
Definition: avir.h:2549
virtual void addWorkload(CWorkload *const Workload)
Definition: avir.h:2054
int InBuf
Definition: avir.h:2478
#define AVIR_PId2
Definition: avir.h:82
Interleaved filtering steps implementation class.
Definition: avir.h:2647
virtual void startAllWorkloads()
Definition: avir.h:2066
CBuffer< fptype > PrefixDC
Definition: avir.h:2512
double LPFltCutoffMult
Definition: avir.h:2159
Image resizer's filtering step class.
Definition: avir.h:2448
int calcInitComplexity(const CBuffer< uint8_t > &FracUseMap) const
Definition: avir.h:1755
T & add()
Definition: avir.h:789
void clear()
Definition: avir.h:836
Resizing positions buffer class.
Definition: avir.h:2562
static void unpackScanline(const fptype *ip, Tout *op, int l, const CImageResizerVars &Vars0)
Definition: avir.h:2993
int getFilterLatency() const
Definition: avir.h:1103
Set of resizing algorithm parameters for low-aliasing resizing (11.59/1.84/1.015(73054.59)/0.000159).
Definition: avir.h:2299
void forceCapacity(const capint NewCapacity)
Definition: avir.h:560
int ResampleFactor
Definition: avir.h:2457
Thread pool's workload object class.
Definition: avir.h:2009
const fptype * getFilter(const int i)
Definition: avir.h:1694
Peaked Cosine window function generator class.
Definition: avir.h:922
CRPosBuf & getRPosBuf(const double k, const double o, const int FracCount)
Definition: avir.h:2599
double o
Definition: avir.h:2381
int ElCount
Definition: avir.h:2352
int OutPrefix
Definition: avir.h:2500
virtual void removeAllWorkloads()
Definition: avir.h:2084
double HBFltLen
Definition: avir.h:2182
Set of resizing algorithm parameters for ultra-low-ringing performance (7.50/2.01/1.083(11568559.86)/0.000001).
Definition: avir.h:2226
CImageResizerFilterStepINL< fptype, fptypeatom > CFilterStep
Definition: avir.h:4594
void convertVtoH(const fptype *ip, fptype *op, const int SrcLen, const int SrcIncr) const
Definition: avir.h:2925
void init(const int aLen, const CImageResizerVars &aVars, const double aTrMul, const double aPkOut)
Definition: avir.h:4369
CDSPFracFilterBankLin< fptype > * FltBank
Definition: avir.h:2626
double IntFltCutoff
Definition: avir.h:2139
Floating-point processing definition and abstraction class.
Definition: avir.h:4570
int OutBuf
Definition: avir.h:2498
void doUpsample(const fptype *const Src, fptype *const Dst) const
Definition: avir.h:3250
void resizeImage(const Tin *const SrcBuf, const int SrcWidth, const int SrcHeight, int SrcScanlineSize, Tout *const NewBuf, const int NewWidth, const int NewHeight, const int ElCountIO, const double k, CImageResizerVars *const aVars=NULL) const
Definition: avir.h:4688
double LPFltAlpha
Definition: avir.h:2150
int InSuffix
Definition: avir.h:2486
double IntFltAlpha
Definition: avir.h:2135
int LenE
Definition: avir.h:4428
int Len
Definition: avir.h:4424
int InPrefix
Definition: avir.h:2480
afptype fptype
Definition: avir.h:4573
Image resizer's error-diffusion dithering class, interleaved mode.
Definition: avir.h:4449
const CImageResizerVars * Vars
Definition: avir.h:4426
double CorrFltLen
Definition: avir.h:2130
int OutLen
Definition: avir.h:2496
double Freq
Definition: avir.h:1487
void buildFilter(const double *const BandGains, double *const Filter)
Definition: avir.h:1117
double generate()
Definition: avir.h:891
Array of structured objects.
Definition: avir.h:732
void generateLPF(T *op, const double DCGain)
Definition: avir.h:1407
Sine signal generator class.
Definition: avir.h:869
void unbiasScanline(fptype *p, int l, const fptype *const ElBiases) const
Definition: avir.h:3153
int getFracCount() const
Definition: avir.h:1669
void setItemCount(const int NewCount)
Definition: avir.h:810
double TrMul0
Definition: avir.h:4430
int OutElIncr
Definition: avir.h:2508
void alloc(const capint aCapacity, const int aAlignment=0)
Definition: avir.h:522
CRPosBuf * RPosBuf
Definition: avir.h:2623
int InLen
Definition: avir.h:2476
CBuffer< fptype > SuffixDC
Definition: avir.h:2516
The default set of resizing algorithm parameters (10.06/1.88/1.029(256064.90)/0.000039).
Definition: avir.h:2201
const fptype * ftp
Definition: avir.h:2545
Low-pass filter windowed by Peaked Cosine window function.
Definition: avir.h:1368
double InGammaMult
Definition: avir.h:2389
int elalign
Definition: avir.h:2365
bool operator==(const CFltBuffer &b2) const
Definition: avir.h:1508
double HBFltCutoff
Definition: avir.h:2179
CImageResizerThreadPool * ThreadPool
Definition: avir.h:2402
FIR filter-based equalizer generator.
Definition: avir.h:989
CFltBuffer FltOrig
Definition: avir.h:2462
void doResize(const fptype *SrcLine, fptype *DstLine, const int DstLineIncr, const fptype *const ElBiases, fptype *const ) const
Definition: avir.h:3885
capint getCapacity() const
Definition: avir.h:545
afptypeatom fptypeatom
Definition: avir.h:4575
int fppack
Definition: avir.h:2357
double IntFltLen
Definition: avir.h:2142
fptype * ResScanlineDith
Definition: avir.h:4533
Set of resizing algorithm parameters for low-ringing performance (7.91/1.96/1.065(1980857.66)/0.000004).
Definition: avir.h:2250
int SrcPosInt
Definition: avir.h:2541
void dither(fptype *const ResScanline) const
Definition: avir.h:4395
static const int packmode
Definition: avir.h:4591
double PkOut0
Definition: avir.h:4432
Image resizer class.
Definition: avir.h:4617