AVIR
High-quality pro image resizing library
 
Loading...
Searching...
No Matches
lancir.h
Go to the documentation of this file.
1//$ nobt
2//$ nocpp
3
48
49#ifndef AVIR_CLANCIR_INCLUDED
50#define AVIR_CLANCIR_INCLUDED
51
52#if __cplusplus >= 201103L
53
54 #include <cstddef>
55 #include <cstdint>
56 #include <cstring>
57 #include <cmath>
58
59#else // __cplusplus >= 201103L
60
61 #include <stddef.h>
62 #include <stdint.h>
63 #include <string.h>
64 #include <math.h>
65
66#endif // __cplusplus >= 201103L
67
73
74#if defined( __AVX__ )
75
76 #include <immintrin.h>
77
78 #define LANCIR_AVX
79 #define LANCIR_SSE2 // Some functions use SSE2; AVX has a higher priority.
80 #define LANCIR_ALIGN 32
81
82#elif defined( __aarch64__ ) || defined( __arm64__ ) || \
83 defined( _M_ARM64 ) || defined( _M_ARM64EC )
84
85 #if defined( _MSC_VER )
86 #include <arm64_neon.h>
87
88 #if _MSC_VER < 1925
89 #define LANCIR_ARM32 // Do not use some newer NEON intrinsics.
90 #endif // _MSC_VER < 1925
91 #else // defined( _MSC_VER )
92 #include <arm_neon.h>
93 #endif // defined( _MSC_VER )
94
95 #define LANCIR_NEON
96 #define LANCIR_ALIGN 16
97
98#elif defined( __ARM_NEON ) || defined( __ARM_NEON__ ) || defined( _M_ARM )
99
100 #include <arm_neon.h>
101
102 #define LANCIR_ARM32
103 #define LANCIR_NEON
104 #define LANCIR_ALIGN 16
105
106#elif defined( __SSE2__ ) || defined( _M_AMD64 ) || \
107 ( defined( _M_IX86_FP ) && _M_IX86_FP == 2 )
108
109 #if defined( _MSC_VER )
110 #include <intrin.h>
111 #else // defined( _MSC_VER )
112 #include <emmintrin.h>
113 #endif // defined( _MSC_VER )
114
115 #define LANCIR_SSE2
116 #define LANCIR_ALIGN 16
117
118#else // SSE2
119
120 #define LANCIR_ALIGN 4
121
122#endif // SSE2
123
124namespace avir {
125
126#if __cplusplus >= 201103L
127
128 using std :: memset;
129 using std :: memcpy;
130 using std :: fabs;
131 using std :: floor;
132 using std :: ceil;
133 using std :: sin;
134 using std :: cos;
135 using std :: intptr_t;
136 using std :: uintptr_t;
137
138#endif // __cplusplus >= 201103L
139
147
149{
150public:
157 double kx;
163 double ky;
164 double ox;
166 double oy;
168 double la;
170
182
183 CLancIRParams( const int aSrcSSize = 0, const int aNewSSize = 0,
184 const double akx = 0.0, const double aky = 0.0,
185 const double aox = 0.0, const double aoy = 0.0 )
186 : SrcSSize( aSrcSSize )
187 , NewSSize( aNewSSize )
188 , kx( akx )
189 , ky( aky )
190 , ox( aox )
191 , oy( aoy )
192 , la( 3.0 )
193 {
194 }
195};
196
214
215class CLancIR
216{
217private:
218 CLancIR( const CLancIR& )
219 {
220 // Unsupported.
221 }
222
223 CLancIR& operator = ( const CLancIR& )
224 {
225 // Unsupported.
226 return( *this );
227 }
228
229public:
230 CLancIR()
231 : FltBuf0( NULL )
232 , FltBuf0Len( 0 )
233 , spv0( NULL )
234 , spv0len( 0 )
235 , spv( NULL )
236 {
237 }
238
239 ~CLancIR()
240 {
241 delete[] FltBuf0;
242 delete[] spv0;
243 }
244
270
271 template< typename Tin, typename Tout >
272 int resizeImage( const Tin* const SrcBuf, const int SrcWidth,
273 const int SrcHeight, Tout* const NewBuf, const int NewWidth,
274 const int NewHeight, const int ElCount,
275 const CLancIRParams* const aParams = NULL )
276 {
277 if(( SrcWidth < 0 ) | ( SrcHeight < 0 ) | ( NewWidth <= 0 ) |
278 ( NewHeight <= 0 ) | ( SrcBuf == NULL ) | ( NewBuf == NULL ) |
279 ( (const void*) SrcBuf == (const void*) NewBuf ))
280 {
281 return( 0 );
282 }
283
284 static const CLancIRParams DefParams;
285 const CLancIRParams& Params = ( aParams != NULL ?
286 *aParams : DefParams );
287
288 if( Params.la < 2.0 )
289 {
290 return( 0 );
291 }
292
293 const int OutSLen = NewWidth * ElCount;
294 const size_t NewScanlineSize = ( Params.NewSSize < 1 ?
295 OutSLen : Params.NewSSize );
296
297 if(( SrcWidth == 0 ) | ( SrcHeight == 0 ))
298 {
299 Tout* op = NewBuf;
300 int i;
301
302 for( i = 0; i < NewHeight; i++ )
303 {
304 memset( op, 0, OutSLen * sizeof( Tout ));
305 op += NewScanlineSize;
306 }
307
308 return( NewHeight );
309 }
310
311 const size_t SrcScanlineSize = ( Params.SrcSSize < 1 ?
312 SrcWidth * ElCount : Params.SrcSSize );
313
314 double ox = Params.ox;
315 double oy = Params.oy;
316 double kx;
317 double ky;
318
319 if( Params.kx >= 0.0 )
320 {
321 kx = ( Params.kx == 0.0 ?
322 (double) SrcWidth / NewWidth : Params.kx );
323
324 ox += ( kx - 1.0 ) * 0.5;
325 }
326 else
327 {
328 kx = -Params.kx;
329 }
330
331 if( Params.ky >= 0.0 )
332 {
333 ky = ( Params.ky == 0.0 ?
334 (double) SrcHeight / NewHeight : Params.ky );
335
336 oy += ( ky - 1.0 ) * 0.5;
337 }
338 else
339 {
340 ky = -Params.ky;
341 }
342
343 if( rfv.update( Params.la, ky, ElCount ))
344 {
345 rsv.reset();
346 rsh.reset();
347 }
348
349 CResizeFilters* rfh; // Pointer to resizing filters for horizontal
350 // resizing, may equal to `rfv` if the same stepping is in use.
351
352 if( kx == ky )
353 {
354 rfh = &rfv;
355 }
356 else
357 {
358 rfh = &rfh0;
359
360 if( rfh0.update( Params.la, kx, ElCount ))
361 {
362 rsh.reset();
363 }
364 }
365
366 rsv.update( SrcHeight, NewHeight, oy, rfv, spv );
367 rsh.update( SrcWidth, NewWidth, ox, *rfh );
368
369 // Calculate vertical progressive resizing's batch size. Progressive
370 // batching is used to try to keep addressing within the cache
371 // capacity. This technique definitely works well for single-threaded
372 // resizing on most CPUs, but may not provide an additional benefit
373 // for multi-threaded resizing, or in a system-wide high-load
374 // situations.
375
376 const size_t FltWidthE = ( rsh.padl + SrcWidth + rsh.padr ) * ElCount;
377 const double CacheSize = 5500000.0; // Tuned for various CPUs.
378 const double OpSize = (double) SrcScanlineSize * SrcHeight *
379 sizeof( Tin ) + (double) FltWidthE * NewHeight * sizeof( float );
380
381 int BatchSize = (int) ( NewHeight * CacheSize / ( OpSize + 1.0 ));
382
383 if( BatchSize < 8 )
384 {
385 BatchSize = 8;
386 }
387
388 if( BatchSize > NewHeight )
389 {
390 BatchSize = NewHeight;
391 }
392
393 // Allocate/resize intermediate buffers.
394
395 const int svs = ( rsv.padl + SrcHeight + rsv.padr ) * ElCount;
396 float* const pspv0 = spv0;
397 reallocBuf( spv0, spv, spv0len, ( svs > OutSLen ? svs : OutSLen ));
398 reallocBuf( FltBuf0, FltBuf, FltBuf0Len, FltWidthE * BatchSize );
399
400 if( spv0 != pspv0 )
401 {
402 rsv.updateSPO( rfv, spv );
403 }
404
405 // Prepare output-related constants.
406
407 static const bool IsInFloat = ( (Tin) 0.25f != 0 );
408 static const bool IsOutFloat = ( (Tout) 0.25f != 0 );
409 static const bool IsUnityMul = ( IsInFloat && IsOutFloat ) ||
410 ( IsInFloat == IsOutFloat && sizeof( Tin ) == sizeof( Tout ));
411
412 const int Clamp = ( sizeof( Tout ) == 1 ? 255 : 65535 );
413 const float OutMul = ( IsOutFloat ? 1.0f : (float) Clamp ) /
414 ( IsInFloat ? 1 : ( sizeof( Tin ) == 1 ? 255 : 65535 ));
415
416 // Perform batched resizing.
417
418 const CResizePos* rpv = rsv.pos;
419 Tout* opn = NewBuf;
420 int bl = NewHeight;
421
422 while( bl > 0 )
423 {
424 const int bc = ( bl > BatchSize ? BatchSize : bl );
425
426 int kl = rfv.KernelLen;
427 const Tin* ip = SrcBuf;
428 float* op = FltBuf + rsh.padl * ElCount;
429
430 const int so = (int) rpv[ 0 ].so;
431 float* const sp = spv + so * ElCount;
432
433 int cc = (int) rpv[ bc - 1 ].so - so + kl; // Pixel copy count.
434 int rl = 0; // Leftmost pixel's replication count.
435 int rr = 0; // Rightmost pixel's replication count.
436
437 const int socc = so + cc;
438 const int spe = rsv.padl + SrcHeight;
439
440 // Calculate scanline copying and padding parameters, depending on
441 // the batch's size and its vertical offset.
442
443 if( so < rsv.padl )
444 {
445 if( socc <= rsv.padl )
446 {
447 rl = cc;
448 cc = 0;
449 }
450 else
451 {
452 if( socc > spe )
453 {
454 rr = socc - spe;
455 cc -= rr;
456 }
457
458 rl = rsv.padl - so;
459 cc -= rl;
460 }
461 }
462 else
463 {
464 if( so >= spe )
465 {
466 rr = cc;
467 cc = 0;
468 ip += SrcHeight * SrcScanlineSize;
469 }
470 else
471 {
472 if( socc > spe )
473 {
474 rr = socc - spe;
475 cc -= rr;
476 }
477
478 ip += ( so - rsv.padl ) * SrcScanlineSize;
479 }
480 }
481
482 // Batched vertical resizing.
483
484 int i;
485
486 if( ElCount == 1 )
487 {
488 for( i = 0; i < SrcWidth; i++ )
489 {
490 copyScanline1v( ip, SrcScanlineSize, sp, cc, rl, rr );
491 resize1< false >( NULL, op, FltWidthE, rpv, kl, bc );
492 ip += 1;
493 op += 1;
494 }
495 }
496 else
497 if( ElCount == 2 )
498 {
499 for( i = 0; i < SrcWidth; i++ )
500 {
501 copyScanline2v( ip, SrcScanlineSize, sp, cc, rl, rr );
502 resize2< false >( NULL, op, FltWidthE, rpv, kl, bc );
503 ip += 2;
504 op += 2;
505 }
506 }
507 else
508 if( ElCount == 3 )
509 {
510 for( i = 0; i < SrcWidth; i++ )
511 {
512 copyScanline3v( ip, SrcScanlineSize, sp, cc, rl, rr );
513 resize3< false >( NULL, op, FltWidthE, rpv, kl, bc );
514 ip += 3;
515 op += 3;
516 }
517 }
518 else // ElCount == 4
519 {
520 for( i = 0; i < SrcWidth; i++ )
521 {
522 copyScanline4v( ip, SrcScanlineSize, sp, cc, rl, rr );
523 resize4< false >( NULL, op, FltWidthE, rpv, kl, bc );
524 ip += 4;
525 op += 4;
526 }
527 }
528
529 // Perform horizontal resizing batch, and produce final output.
530
531 float* ipf = FltBuf;
532 kl = rfh -> KernelLen;
533
534 if( ElCount == 1 )
535 {
536 for( i = 0; i < bc; i++ )
537 {
538 padScanline1h( ipf, rsh, SrcWidth );
539 resize1< true >( ipf, spv, 1, rsh.pos, kl, NewWidth );
541 OutSLen, Clamp, OutMul );
542
543 ipf += FltWidthE;
544 opn += NewScanlineSize;
545 }
546 }
547 else
548 if( ElCount == 2 )
549 {
550 for( i = 0; i < bc; i++ )
551 {
552 padScanline2h( ipf, rsh, SrcWidth );
553 resize2< true >( ipf, spv, 2, rsh.pos, kl, NewWidth );
555 OutSLen, Clamp, OutMul );
556
557 ipf += FltWidthE;
558 opn += NewScanlineSize;
559 }
560 }
561 else
562 if( ElCount == 3 )
563 {
564 for( i = 0; i < bc; i++ )
565 {
566 padScanline3h( ipf, rsh, SrcWidth );
567 resize3< true >( ipf, spv, 3, rsh.pos, kl, NewWidth );
569 OutSLen, Clamp, OutMul );
570
571 ipf += FltWidthE;
572 opn += NewScanlineSize;
573 }
574 }
575 else // ElCount == 4
576 {
577 for( i = 0; i < bc; i++ )
578 {
579 padScanline4h( ipf, rsh, SrcWidth );
580 resize4< true >( ipf, spv, 4, rsh.pos, kl, NewWidth );
582 OutSLen, Clamp, OutMul );
583
584 ipf += FltWidthE;
585 opn += NewScanlineSize;
586 }
587 }
588
589 rpv += bc;
590 bl -= bc;
591 }
592
593 return( NewHeight );
594 }
595
624
625 template< typename Tin, typename Tout >
626 int resizeImage( const Tin* const SrcBuf, const int SrcWidth,
627 const int SrcHeight, const int SrcSSize, Tout* const NewBuf,
628 const int NewWidth, const int NewHeight, const int NewSSize,
629 const int ElCount, const double kx0 = 0.0, const double ky0 = 0.0,
630 double ox = 0.0, double oy = 0.0 )
631 {
632 const CLancIRParams Params( SrcSSize, NewSSize, kx0, ky0, ox, oy );
633
634 return( resizeImage( SrcBuf, SrcWidth, SrcHeight, NewBuf, NewWidth,
635 NewHeight, ElCount, &Params ));
636 }
637
638protected:
639 float* FltBuf0;
640 size_t FltBuf0Len;
641 float* FltBuf;
642 float* spv0;
645 float* spv;
646
662
663 template< typename Tb, typename Tl >
664 static void reallocBuf( Tb*& buf0, Tb*& buf, Tl& len, Tl newlen )
665 {
666 newlen += LANCIR_ALIGN;
667
668 if( newlen > len )
669 {
670 if( buf0 != NULL )
671 {
672 delete[] buf0;
673 buf0 = NULL;
674 len = 0;
675 }
676
677 buf0 = new Tb[ newlen ];
678 len = newlen;
679 buf = (Tb*) (( (uintptr_t) buf0 + LANCIR_ALIGN - 1 ) &
680 ~(uintptr_t) ( LANCIR_ALIGN - 1 ));
681 }
682 }
683
697
698 template< typename Tb, typename Tl >
699 static void reallocBuf( Tb*& buf, Tl& len, const Tl newlen )
700 {
701 if( newlen > len )
702 {
703 if( buf != NULL )
704 {
705 delete[] buf;
706 buf = NULL;
707 len = 0;
708 }
709
710 buf = new Tb[ newlen ];
711 len = newlen;
712 }
713 }
714
715 class CResizeScanline;
716
720
721 class CResizeFilters
722 {
723 friend class CResizeScanline;
724
725 public:
729
730 CResizeFilters()
731 : Filters( NULL )
732 , FiltersLen( 0 )
733 , la( 0.0 )
734 {
735 memset( Bufs0, 0, sizeof( Bufs0 ));
736 memset( Bufs0Len, 0, sizeof( Bufs0Len ));
737 }
738
740 {
741 int i;
742
743 for( i = 0; i < BufCount; i++ )
744 {
745 delete[] Bufs0[ i ];
746 }
747
748 delete[] Filters;
749 }
750
762
763 bool update( const double la0, const double k0, const int ElCount0 )
764 {
765 if( la0 == la && k0 == k && ElCount0 == ElCount )
766 {
767 return( false );
768 }
769
770 const double NormFreq = ( k0 <= 1.0 ? 1.0 : 1.0 / k0 );
771 Freq = 3.1415926535897932 * NormFreq;
772 FreqA = Freq / la0;
773
774 Len2 = la0 / NormFreq;
775 fl2 = (int) ceil( Len2 );
776 KernelLen = fl2 + fl2;
777
778 #if LANCIR_ALIGN > 4
779
780 ElRepl = ElCount0;
782
783 const int elalign =
784 (int) ( LANCIR_ALIGN / sizeof( float )) - 1;
785
786 KernelLenA = ( KernelLenA + elalign ) & ~elalign;
787
788 #else // LANCIR_ALIGN > 4
789
790 ElRepl = 1;
792
793 #endif // LANCIR_ALIGN > 4
794
795 FracCount = 1000; // Enough for Lanczos implicit 8-bit precision.
796
797 la = 0.0;
799
800 memset( Filters, 0, FiltersLen * sizeof( Filters[ 0 ]));
801
802 setBuf( 0 );
803
804 la = la0;
805 k = k0;
806 ElCount = ElCount0;
807
808 return( true );
809 }
810
820
821 const float* getFilter( const double x )
822 {
823 const int Frac = (int) ( x * FracCount + 0.5 );
824 float* flt = Filters[ Frac ];
825
826 if( flt != NULL )
827 {
828 return( flt );
829 }
830
831 flt = Bufs[ CurBuf ] + CurBufFill * KernelLenA;
832 Filters[ Frac ] = flt;
833 CurBufFill++;
834
835 if( CurBufFill == BufLen )
836 {
837 setBuf( CurBuf + 1 );
838 }
839
840 makeFilterNorm( flt, 1.0 - (double) Frac / FracCount );
841
842 if( ElRepl > 1 )
843 {
845 }
846
847 return( flt );
848 }
849
850 protected:
851 double Freq;
852 double FreqA;
853 double Len2;
854 int fl2;
859 int ElRepl;
860 static const int BufCount = 4;
862 static const int BufLen = 256;
865 float* Bufs0[ BufCount ];
869 float* Bufs[ BufCount ];
870 int CurBuf;
873 float** Filters;
877 double la;
878 double k;
880
889
890 void setBuf( const int bi )
891 {
892 reallocBuf( Bufs0[ bi ], Bufs[ bi ], Bufs0Len[ bi ],
893 BufLen * KernelLenA );
894
895 CurBuf = bi;
896 CurBufFill = 0;
897 }
898
906
908 {
909 public:
918
919 CSineGen( const double si, const double ph )
920 : svalue1( sin( ph ))
921 , svalue2( sin( ph - si ))
922 , sincr( 2.0 * cos( si ))
923 {
924 }
925
930
931 double generate()
932 {
933 const double res = svalue1;
934
935 svalue1 = sincr * res - svalue2;
936 svalue2 = res;
937
938 return( res );
939 }
940
941 private:
942 double svalue1;
943 double svalue2;
944 double sincr;
945 };
946
957
958 void makeFilterNorm( float* op, const double FracDelay ) const
959 {
960 CSineGen f( Freq, Freq * ( FracDelay - fl2 ));
961 CSineGen fw( FreqA, FreqA * ( FracDelay - fl2 ));
962
963 float* op0 = op;
964 double s = 0.0;
965 double ut;
966
967 int t = -fl2;
968
969 if( t + FracDelay < -Len2 )
970 {
971 f.generate();
972 fw.generate();
973 *op = (float) 0;
974 op++;
975 t++;
976 }
977
978 int IsZeroX = ( fabs( FracDelay - 1.0 ) < 2.3e-13 );
979 int mt = 0 - IsZeroX;
980 IsZeroX |= ( fabs( FracDelay ) < 2.3e-13 );
981
982 while( t < mt )
983 {
984 ut = t + FracDelay;
985 *op = (float) ( f.generate() * fw.generate() / ( ut * ut ));
986 s += *op;
987 op++;
988 t++;
989 }
990
991 if( IsZeroX ) // t+FracDelay==0
992 {
993 *op = (float) ( Freq * FreqA );
994 s += *op;
995 f.generate();
996 fw.generate();
997 }
998 else
999 {
1000 ut = FracDelay; // t==0
1001 *op = (float) ( f.generate() * fw.generate() / ( ut * ut ));
1002 s += *op;
1003 }
1004
1005 mt = fl2 - 2;
1006
1007 while( t < mt )
1008 {
1009 op++;
1010 t++;
1011 ut = t + FracDelay;
1012 *op = (float) ( f.generate() * fw.generate() / ( ut * ut ));
1013 s += *op;
1014 }
1015
1016 op++;
1017 ut = t + 1 + FracDelay;
1018
1019 if( ut > Len2 )
1020 {
1021 *op = (float) 0;
1022 }
1023 else
1024 {
1025 *op = (float) ( f.generate() * fw.generate() / ( ut * ut ));
1026 s += *op;
1027 }
1028
1029 s = 1.0 / s;
1030 t = (int) ( op - op0 + 1 );
1031
1032 while( t != 0 )
1033 {
1034 *op0 = (float) ( *op0 * s );
1035 op0++;
1036 t--;
1037 }
1038 }
1039
1052
1053 static void replicateFilter( float* const p, const int kl,
1054 const int erp )
1055 {
1056 const float* ip = p + kl - 1;
1057 float* op = p + ( kl - 1 ) * erp;
1058 int c = kl;
1059
1060 if( erp == 2 )
1061 {
1062 while( c != 0 )
1063 {
1064 const float v = *ip;
1065 op[ 0 ] = v;
1066 op[ 1 ] = v;
1067 ip--;
1068 op -= 2;
1069 c--;
1070 }
1071 }
1072 else
1073 if( erp == 3 )
1074 {
1075 while( c != 0 )
1076 {
1077 const float v = *ip;
1078 op[ 0 ] = v;
1079 op[ 1 ] = v;
1080 op[ 2 ] = v;
1081 ip--;
1082 op -= 3;
1083 c--;
1084 }
1085 }
1086 else // erp == 4
1087 {
1088 while( c != 0 )
1089 {
1090 const float v = *ip;
1091 op[ 0 ] = v;
1092 op[ 1 ] = v;
1093 op[ 2 ] = v;
1094 op[ 3 ] = v;
1095 ip--;
1096 op -= 4;
1097 c--;
1098 }
1099 }
1100 }
1101 };
1102
1107
1109 {
1110 const float* flt;
1111 intptr_t spo;
1113 intptr_t so;
1114 };
1115
1123
1124 class CResizeScanline
1125 {
1126 public:
1127 int padl;
1128 int padr;
1131
1132 CResizeScanline()
1133 : pos( NULL )
1134 , poslen( 0 )
1135 , SrcLen( 0 )
1136 {
1137 }
1138
1140 {
1141 delete[] pos;
1142 }
1143
1151
1152 void reset()
1153 {
1154 SrcLen = 0;
1155 }
1156
1171
1172 void update( const int SrcLen0, const int DstLen0, const double o0,
1173 CResizeFilters& rf, float* const sp = NULL )
1174 {
1175 if( SrcLen0 == SrcLen && DstLen0 == DstLen && o0 == o )
1176 {
1177 return;
1178 }
1179
1180 const int fl2m1 = rf.fl2 - 1;
1181 padl = fl2m1 - (int) floor( o0 );
1182
1183 if( padl < 0 )
1184 {
1185 padl = 0;
1186 }
1187
1188 // Make sure `padr` and `pos` are in sync: calculate ending `pos`
1189 // offset in advance.
1190
1191 const double k = rf.k;
1192
1193 const int DstLen_m1 = DstLen0 - 1;
1194 const double oe = o0 + k * DstLen_m1;
1195 const int ie = (int) floor( oe );
1196
1197 padr = ie + rf.fl2 + 1 - SrcLen0;
1198
1199 if( padr < 0 )
1200 {
1201 padr = 0;
1202 }
1203
1204 SrcLen = 0;
1205 reallocBuf( pos, poslen, DstLen0 );
1206
1207 const intptr_t ElCountF = rf.ElCount * sizeof( float );
1208 const int so = padl - fl2m1;
1209 CResizePos* rp = pos;
1210 intptr_t rpso;
1211 int i;
1212
1213 for( i = 0; i < DstLen_m1; i++ )
1214 {
1215 const double ox = o0 + k * i;
1216 const int ix = (int) floor( ox );
1217
1218 rp -> flt = rf.getFilter( ox - ix );
1219 rpso = so + ix;
1220 rp -> spo = (intptr_t) sp + rpso * ElCountF;
1221 rp -> so = rpso;
1222 rp++;
1223 }
1224
1225 rp -> flt = rf.getFilter( oe - ie );
1226 rpso = so + ie;
1227 rp -> spo = (intptr_t) sp + rpso * ElCountF;
1228 rp -> so = rpso;
1229
1230 SrcLen = SrcLen0;
1231 DstLen = DstLen0;
1232 o = o0;
1233 }
1234
1245
1246 void updateSPO( CResizeFilters& rf, float* const sp )
1247 {
1248 const intptr_t ElCountF = rf.ElCount * sizeof( float );
1249 CResizePos* const rp = pos;
1250 int i;
1251
1252 for( i = 0; i < DstLen; i++ )
1253 {
1254 rp[ i ].spo = (intptr_t) sp + rp[ i ].so * ElCountF;
1255 }
1256 }
1257
1258 protected:
1262 double o;
1263 };
1264
1270
1287
1288 template< typename T >
1289 static void copyScanline1v( const T* ip, const size_t ipinc, float* op,
1290 int cc, int repl, int repr )
1291 {
1292 float v0;
1293
1294 if( repl > 0 )
1295 {
1296 v0 = (float) ip[ 0 ];
1297
1298 do
1299 {
1300 op[ 0 ] = v0;
1301 op += 1;
1302
1303 } while( --repl != 0 );
1304 }
1305
1306 while( cc != 0 )
1307 {
1308 op[ 0 ] = (float) ip[ 0 ];
1309 ip += ipinc;
1310 op += 1;
1311 cc--;
1312 }
1313
1314 if( repr > 0 )
1315 {
1316 const T* const ipe = ip - ipinc;
1317 v0 = (float) ipe[ 0 ];
1318
1319 do
1320 {
1321 op[ 0 ] = v0;
1322 op += 1;
1323
1324 } while( --repr != 0 );
1325 }
1326 }
1327
1328 template< typename T >
1329 static void copyScanline2v( const T* ip, const size_t ipinc, float* op,
1330 int cc, int repl, int repr )
1331 {
1332 float v0, v1;
1333
1334 if( repl > 0 )
1335 {
1336 v0 = (float) ip[ 0 ];
1337 v1 = (float) ip[ 1 ];
1338
1339 do
1340 {
1341 op[ 0 ] = v0;
1342 op[ 1 ] = v1;
1343 op += 2;
1344
1345 } while( --repl != 0 );
1346 }
1347
1348 while( cc != 0 )
1349 {
1350 op[ 0 ] = (float) ip[ 0 ];
1351 op[ 1 ] = (float) ip[ 1 ];
1352 ip += ipinc;
1353 op += 2;
1354 cc--;
1355 }
1356
1357 if( repr > 0 )
1358 {
1359 const T* const ipe = ip - ipinc;
1360 v0 = (float) ipe[ 0 ];
1361 v1 = (float) ipe[ 1 ];
1362
1363 do
1364 {
1365 op[ 0 ] = v0;
1366 op[ 1 ] = v1;
1367 op += 2;
1368
1369 } while( --repr != 0 );
1370 }
1371 }
1372
1373 template< typename T >
1374 static void copyScanline3v( const T* ip, const size_t ipinc, float* op,
1375 int cc, int repl, int repr )
1376 {
1377 float v0, v1, v2;
1378
1379 if( repl > 0 )
1380 {
1381 v0 = (float) ip[ 0 ];
1382 v1 = (float) ip[ 1 ];
1383 v2 = (float) ip[ 2 ];
1384
1385 do
1386 {
1387 op[ 0 ] = v0;
1388 op[ 1 ] = v1;
1389 op[ 2 ] = v2;
1390 op += 3;
1391
1392 } while( --repl != 0 );
1393 }
1394
1395 while( cc != 0 )
1396 {
1397 op[ 0 ] = (float) ip[ 0 ];
1398 op[ 1 ] = (float) ip[ 1 ];
1399 op[ 2 ] = (float) ip[ 2 ];
1400 ip += ipinc;
1401 op += 3;
1402 cc--;
1403 }
1404
1405 if( repr > 0 )
1406 {
1407 const T* const ipe = ip - ipinc;
1408 v0 = (float) ipe[ 0 ];
1409 v1 = (float) ipe[ 1 ];
1410 v2 = (float) ipe[ 2 ];
1411
1412 do
1413 {
1414 op[ 0 ] = v0;
1415 op[ 1 ] = v1;
1416 op[ 2 ] = v2;
1417 op += 3;
1418
1419 } while( --repr != 0 );
1420 }
1421 }
1422
1423 template< typename T >
1424 static void copyScanline4v( const T* ip, const size_t ipinc, float* op,
1425 int cc, int repl, int repr )
1426 {
1427 float v0, v1, v2, v3;
1428
1429 if( repl > 0 )
1430 {
1431 v0 = (float) ip[ 0 ];
1432 v1 = (float) ip[ 1 ];
1433 v2 = (float) ip[ 2 ];
1434 v3 = (float) ip[ 3 ];
1435
1436 do
1437 {
1438 op[ 0 ] = v0;
1439 op[ 1 ] = v1;
1440 op[ 2 ] = v2;
1441 op[ 3 ] = v3;
1442 op += 4;
1443
1444 } while( --repl != 0 );
1445 }
1446
1447 while( cc != 0 )
1448 {
1449 op[ 0 ] = (float) ip[ 0 ];
1450 op[ 1 ] = (float) ip[ 1 ];
1451 op[ 2 ] = (float) ip[ 2 ];
1452 op[ 3 ] = (float) ip[ 3 ];
1453 ip += ipinc;
1454 op += 4;
1455 cc--;
1456 }
1457
1458 if( repr > 0 )
1459 {
1460 const T* const ipe = ip - ipinc;
1461 v0 = (float) ipe[ 0 ];
1462 v1 = (float) ipe[ 1 ];
1463 v2 = (float) ipe[ 2 ];
1464 v3 = (float) ipe[ 3 ];
1465
1466 do
1467 {
1468 op[ 0 ] = v0;
1469 op[ 1 ] = v1;
1470 op[ 2 ] = v2;
1471 op[ 3 ] = v3;
1472 op += 4;
1473
1474 } while( --repr != 0 );
1475 }
1476 }
1477
1479
1492
1493 static void padScanline1h( float* op, CResizeScanline& rs, const int l )
1494 {
1495 const float* ip = op + rs.padl;
1496
1497 float v0 = ip[ 0 ];
1498 int i;
1499
1500 for( i = 0; i < rs.padl; i++ )
1501 {
1502 op[ i ] = v0;
1503 }
1504
1505 ip += l;
1506 op += rs.padl + l;
1507
1508 v0 = ip[ -1 ];
1509
1510 for( i = 0; i < rs.padr; i++ )
1511 {
1512 op[ i ] = v0;
1513 }
1514 }
1515
1516 static void padScanline2h( float* op, CResizeScanline& rs, const int l )
1517 {
1518 const float* ip = op + rs.padl * 2;
1519
1520 float v0 = ip[ 0 ];
1521 float v1 = ip[ 1 ];
1522 int i;
1523
1524 for( i = 0; i < rs.padl; i++ )
1525 {
1526 op[ 0 ] = v0;
1527 op[ 1 ] = v1;
1528 op += 2;
1529 }
1530
1531 const int lc = l * 2;
1532 ip += lc;
1533 op += lc;
1534
1535 v0 = ip[ -2 ];
1536 v1 = ip[ -1 ];
1537
1538 for( i = 0; i < rs.padr; i++ )
1539 {
1540 op[ 0 ] = v0;
1541 op[ 1 ] = v1;
1542 op += 2;
1543 }
1544 }
1545
1546 static void padScanline3h( float* op, CResizeScanline& rs, const int l )
1547 {
1548 const float* ip = op + rs.padl * 3;
1549
1550 float v0 = ip[ 0 ];
1551 float v1 = ip[ 1 ];
1552 float v2 = ip[ 2 ];
1553 int i;
1554
1555 for( i = 0; i < rs.padl; i++ )
1556 {
1557 op[ 0 ] = v0;
1558 op[ 1 ] = v1;
1559 op[ 2 ] = v2;
1560 op += 3;
1561 }
1562
1563 const int lc = l * 3;
1564 ip += lc;
1565 op += lc;
1566
1567 v0 = ip[ -3 ];
1568 v1 = ip[ -2 ];
1569 v2 = ip[ -1 ];
1570
1571 for( i = 0; i < rs.padr; i++ )
1572 {
1573 op[ 0 ] = v0;
1574 op[ 1 ] = v1;
1575 op[ 2 ] = v2;
1576 op += 3;
1577 }
1578 }
1579
1580 static void padScanline4h( float* op, CResizeScanline& rs, const int l )
1581 {
1582 const float* ip = op + rs.padl * 4;
1583
1584 float v0 = ip[ 0 ];
1585 float v1 = ip[ 1 ];
1586 float v2 = ip[ 2 ];
1587 float v3 = ip[ 3 ];
1588 int i;
1589
1590 for( i = 0; i < rs.padl; i++ )
1591 {
1592 op[ 0 ] = v0;
1593 op[ 1 ] = v1;
1594 op[ 2 ] = v2;
1595 op[ 3 ] = v3;
1596 op += 4;
1597 }
1598
1599 const int lc = l * 4;
1600 ip += lc;
1601 op += lc;
1602
1603 v0 = ip[ -4 ];
1604 v1 = ip[ -3 ];
1605 v2 = ip[ -2 ];
1606 v3 = ip[ -1 ];
1607
1608 for( i = 0; i < rs.padr; i++ )
1609 {
1610 op[ 0 ] = v0;
1611 op[ 1 ] = v1;
1612 op[ 2 ] = v2;
1613 op[ 3 ] = v3;
1614 op += 4;
1615 }
1616 }
1617
1619
1627
1628 static inline int roundclamp( const float v, const int Clamp )
1629 {
1630 if( v < 0.5f )
1631 {
1632 return( 0 );
1633 }
1634
1635 const int vr = (int) ( v + 0.5f );
1636
1637 return( vr > Clamp ? Clamp : vr );
1638 }
1639
1659
1660 template< bool IsOutFloat, bool IsUnityMul, typename T >
1661 static void outputScanline( const float* ip, T* op, int l,
1662 const int Clamp, const float OutMul )
1663 {
1664 if( IsOutFloat )
1665 {
1666 if( IsUnityMul )
1667 {
1668 if( sizeof( op[ 0 ]) == sizeof( ip[ 0 ]))
1669 {
1670 memcpy( op, ip, l * sizeof( op[ 0 ]));
1671 }
1672 else
1673 {
1674 int l4 = l >> 2;
1675 l &= 3;
1676
1677 while( l4 != 0 )
1678 {
1679 op[ 0 ] = (T) ip[ 0 ];
1680 op[ 1 ] = (T) ip[ 1 ];
1681 op[ 2 ] = (T) ip[ 2 ];
1682 op[ 3 ] = (T) ip[ 3 ];
1683 ip += 4;
1684 op += 4;
1685 l4--;
1686 }
1687
1688 while( l != 0 )
1689 {
1690 *op = (T) *ip;
1691 ip++;
1692 op++;
1693 l--;
1694 }
1695 }
1696 }
1697 else
1698 {
1699 int l4 = l >> 2;
1700 l &= 3;
1701 bool DoScalar = true;
1702
1703 if( sizeof( op[ 0 ]) == sizeof( ip[ 0 ]))
1704 {
1705 #if defined( LANCIR_SSE2 )
1706
1707 DoScalar = false;
1708 const __m128 om = _mm_set1_ps( OutMul );
1709
1710 while( l4 != 0 )
1711 {
1712 _mm_storeu_ps( (float*) op,
1713 _mm_mul_ps( _mm_load_ps( ip ), om ));
1714
1715 ip += 4;
1716 op += 4;
1717 l4--;
1718 }
1719
1720 #elif defined( LANCIR_NEON )
1721
1722 DoScalar = false;
1723 const float32x4_t om = vdupq_n_f32( OutMul );
1724
1725 while( l4 != 0 )
1726 {
1727 vst1q_f32( (float*) op,
1728 vmulq_f32( vld1q_f32( ip ), om ));
1729
1730 ip += 4;
1731 op += 4;
1732 l4--;
1733 }
1734
1735 #endif // defined( LANCIR_NEON )
1736 }
1737
1738 if( DoScalar )
1739 {
1740 while( l4 != 0 )
1741 {
1742 op[ 0 ] = (T) ( ip[ 0 ] * OutMul );
1743 op[ 1 ] = (T) ( ip[ 1 ] * OutMul );
1744 op[ 2 ] = (T) ( ip[ 2 ] * OutMul );
1745 op[ 3 ] = (T) ( ip[ 3 ] * OutMul );
1746 ip += 4;
1747 op += 4;
1748 l4--;
1749 }
1750 }
1751
1752 while( l != 0 )
1753 {
1754 *op = (T) ( *ip * OutMul );
1755 ip++;
1756 op++;
1757 l--;
1758 }
1759 }
1760 }
1761 else
1762 {
1763 int l4 = l >> 2;
1764 l &= 3;
1765
1766 #if defined( LANCIR_SSE2 )
1767
1768 const __m128 minv = _mm_setzero_ps();
1769 const __m128 maxv = _mm_set1_ps( (float) Clamp );
1770 const __m128 om = _mm_set1_ps( OutMul );
1771
1772 unsigned int prevrm = _MM_GET_ROUNDING_MODE();
1773 _MM_SET_ROUNDING_MODE( _MM_ROUND_NEAREST );
1774
1775 if( sizeof( op[ 0 ]) == 4 )
1776 {
1777 while( l4 != 0 )
1778 {
1779 const __m128 v = _mm_load_ps( ip );
1780 const __m128 cv = _mm_max_ps( _mm_min_ps(
1781 ( IsUnityMul ? v : _mm_mul_ps( v, om )),
1782 maxv ), minv );
1783
1784 _mm_storeu_si128( (__m128i*) op, _mm_cvtps_epi32( cv ));
1785
1786 ip += 4;
1787 op += 4;
1788 l4--;
1789 }
1790 }
1791 else
1792 if( sizeof( op[ 0 ]) == 2 )
1793 {
1794 while( l4 != 0 )
1795 {
1796 const __m128 v = _mm_load_ps( ip );
1797 const __m128 cv = _mm_max_ps( _mm_min_ps(
1798 ( IsUnityMul ? v : _mm_mul_ps( v, om )),
1799 maxv ), minv );
1800
1801 const __m128i v32 = _mm_cvtps_epi32( cv );
1802 const __m128i v16s = _mm_shufflehi_epi16(
1803 _mm_shufflelo_epi16( v32, 0 | 2 << 2 ), 0 | 2 << 2 );
1804
1805 const __m128i v16 = _mm_shuffle_epi32( v16s, 0 | 2 << 2 );
1806
1807 __m128i tmp;
1808 _mm_store_si128( &tmp, v16 );
1809 memcpy( op, &tmp, 8 );
1810
1811 ip += 4;
1812 op += 4;
1813 l4--;
1814 }
1815 }
1816 else
1817 {
1818 while( l4 != 0 )
1819 {
1820 const __m128 v = _mm_load_ps( ip );
1821 const __m128 cv = _mm_max_ps( _mm_min_ps(
1822 ( IsUnityMul ? v : _mm_mul_ps( v, om )),
1823 maxv ), minv );
1824
1825 const __m128i v32 = _mm_cvtps_epi32( cv );
1826 const __m128i v16s = _mm_shufflehi_epi16(
1827 _mm_shufflelo_epi16( v32, 0 | 2 << 2 ), 0 | 2 << 2 );
1828
1829 const __m128i v16 = _mm_shuffle_epi32( v16s, 0 | 2 << 2 );
1830 const __m128i v8 = _mm_packus_epi16( v16, v16 );
1831
1832 *(int*) op = _mm_cvtsi128_si32( v8 );
1833
1834 ip += 4;
1835 op += 4;
1836 l4--;
1837 }
1838 }
1839
1840 _MM_SET_ROUNDING_MODE( prevrm );
1841
1842 #elif defined( LANCIR_NEON )
1843
1844 const float32x4_t minv = vdupq_n_f32( 0.0f );
1845 const float32x4_t maxv = vdupq_n_f32( (float) Clamp );
1846 const float32x4_t om = vdupq_n_f32( OutMul );
1847 const float32x4_t v05 = vdupq_n_f32( 0.5f );
1848
1849 if( sizeof( op[ 0 ]) == 4 )
1850 {
1851 while( l4 != 0 )
1852 {
1853 const float32x4_t v = vld1q_f32( ip );
1854 const float32x4_t cv = vmaxq_f32( vminq_f32(
1855 ( IsUnityMul ? v : vmulq_f32( v, om )),
1856 maxv ), minv );
1857
1858 vst1q_u32( (unsigned int*) op, vcvtq_u32_f32( vaddq_f32(
1859 cv, v05 )));
1860
1861 ip += 4;
1862 op += 4;
1863 l4--;
1864 }
1865 }
1866 else
1867 if( sizeof( op[ 0 ]) == 2 )
1868 {
1869 while( l4 != 0 )
1870 {
1871 const float32x4_t v = vld1q_f32( ip );
1872 const float32x4_t cv = vmaxq_f32( vminq_f32(
1873 ( IsUnityMul ? v : vmulq_f32( v, om )),
1874 maxv ), minv );
1875
1876 const uint32x4_t v32 = vcvtq_u32_f32(
1877 vaddq_f32( cv, v05 ));
1878
1879 const uint16x4_t v16 = vmovn_u32( v32 );
1880
1881 vst1_u16( (unsigned short*) op, v16 );
1882
1883 ip += 4;
1884 op += 4;
1885 l4--;
1886 }
1887 }
1888 else
1889 {
1890 while( l4 != 0 )
1891 {
1892 const float32x4_t v = vld1q_f32( ip );
1893 const float32x4_t cv = vmaxq_f32( vminq_f32(
1894 ( IsUnityMul ? v : vmulq_f32( v, om )),
1895 maxv ), minv );
1896
1897 const uint32x4_t v32 = vcvtq_u32_f32(
1898 vaddq_f32( cv, v05 ));
1899
1900 const uint16x4_t v16 = vmovn_u32( v32 );
1901 const uint8x8_t v8 = vmovn_u16( vcombine_u16( v16, v16 ));
1902
1903 *(unsigned int*) op = vget_lane_u32( (uint32x2_t) v8, 0 );
1904
1905 ip += 4;
1906 op += 4;
1907 l4--;
1908 }
1909 }
1910
1911 #else // defined( LANCIR_NEON )
1912
1913 if( IsUnityMul )
1914 {
1915 while( l4 != 0 )
1916 {
1917 op[ 0 ] = (T) roundclamp( ip[ 0 ], Clamp );
1918 op[ 1 ] = (T) roundclamp( ip[ 1 ], Clamp );
1919 op[ 2 ] = (T) roundclamp( ip[ 2 ], Clamp );
1920 op[ 3 ] = (T) roundclamp( ip[ 3 ], Clamp );
1921 ip += 4;
1922 op += 4;
1923 l4--;
1924 }
1925 }
1926 else
1927 {
1928 while( l4 != 0 )
1929 {
1930 op[ 0 ] = (T) roundclamp( ip[ 0 ] * OutMul, Clamp );
1931 op[ 1 ] = (T) roundclamp( ip[ 1 ] * OutMul, Clamp );
1932 op[ 2 ] = (T) roundclamp( ip[ 2 ] * OutMul, Clamp );
1933 op[ 3 ] = (T) roundclamp( ip[ 3 ] * OutMul, Clamp );
1934 ip += 4;
1935 op += 4;
1936 l4--;
1937 }
1938 }
1939
1940 #endif // defined( LANCIR_NEON )
1941
1942 if( IsUnityMul )
1943 {
1944 while( l != 0 )
1945 {
1946 *op = (T) roundclamp( *ip, Clamp );
1947 ip++;
1948 op++;
1949 l--;
1950 }
1951 }
1952 else
1953 {
1954 while( l != 0 )
1955 {
1956 *op = (T) roundclamp( *ip * OutMul, Clamp );
1957 ip++;
1958 op++;
1959 l--;
1960 }
1961 }
1962 }
1963 }
1964
1969
1970 #define LANCIR_LF_PRE \
1971 const CResizePos* const rpe = rp + DstLen; \
1972 while( rp != rpe ) \
1973 { \
1974 const float* flt = rp -> flt; \
1975 const float* ip; \
1976 if( UseSP ) \
1977 { \
1978 ip = (const float*) ( (intptr_t) sp + rp -> spo ); \
1979 } \
1980 else \
1981 { \
1982 ip = (const float*) rp -> spo; \
1983 }
1984
1989
1990 #define LANCIR_LF_POST \
1991 op += opinc; \
1992 rp++; \
1993 }
1994
2008
2009 template< bool UseSP >
2010 static void resize1( const float* const sp, float* op, const size_t opinc,
2011 const CResizePos* rp, const int kl, const int DstLen )
2012 {
2013 const int ci = kl >> 2;
2014
2015 if(( kl & 3 ) == 0 )
2016 {
2018
2019 int c = ci;
2020
2021 #if defined( LANCIR_SSE2 )
2022
2023 __m128 sum = _mm_mul_ps( _mm_load_ps( flt ), _mm_loadu_ps( ip ));
2024
2025 while( --c != 0 )
2026 {
2027 flt += 4;
2028 ip += 4;
2029 sum = _mm_add_ps( sum, _mm_mul_ps( _mm_load_ps( flt ),
2030 _mm_loadu_ps( ip )));
2031 }
2032
2033 sum = _mm_add_ps( sum, _mm_movehl_ps( sum, sum ));
2034
2035 _mm_store_ss( op, _mm_add_ss( sum,
2036 _mm_shuffle_ps( sum, sum, 1 )));
2037
2038 #elif defined( LANCIR_NEON )
2039
2040 float32x4_t sum = vmulq_f32( vld1q_f32( flt ), vld1q_f32( ip ));
2041
2042 while( --c != 0 )
2043 {
2044 flt += 4;
2045 ip += 4;
2046 sum = vmlaq_f32( sum, vld1q_f32( flt ), vld1q_f32( ip ));
2047 }
2048
2049 #if defined( LANCIR_ARM32 )
2050 const float32x2_t sum2 = vadd_f32( vget_high_f32( sum ),
2051 vget_low_f32( sum ));
2052
2053 op[ 0 ] = vget_lane_f32( sum2, 0 ) + vget_lane_f32( sum2, 1 );
2054 #else // defined( LANCIR_ARM32 )
2055 op[ 0 ] = vaddvq_f32( sum );
2056 #endif // defined( LANCIR_ARM32 )
2057
2058 #else // defined( LANCIR_NEON )
2059
2060 float sum0 = flt[ 0 ] * ip[ 0 ];
2061 float sum1 = flt[ 1 ] * ip[ 1 ];
2062 float sum2 = flt[ 2 ] * ip[ 2 ];
2063 float sum3 = flt[ 3 ] * ip[ 3 ];
2064
2065 while( --c != 0 )
2066 {
2067 flt += 4;
2068 ip += 4;
2069 sum0 += flt[ 0 ] * ip[ 0 ];
2070 sum1 += flt[ 1 ] * ip[ 1 ];
2071 sum2 += flt[ 2 ] * ip[ 2 ];
2072 sum3 += flt[ 3 ] * ip[ 3 ];
2073 }
2074
2075 op[ 0 ] = ( sum0 + sum1 ) + ( sum2 + sum3 );
2076
2077 #endif // defined( LANCIR_NEON )
2078
2080 }
2081 else
2082 {
2084
2085 int c = ci;
2086
2087 #if defined( LANCIR_SSE2 )
2088
2089 __m128 sum = _mm_mul_ps( _mm_load_ps( flt ), _mm_loadu_ps( ip ));
2090
2091 while( --c != 0 )
2092 {
2093 flt += 4;
2094 ip += 4;
2095 sum = _mm_add_ps( sum, _mm_mul_ps( _mm_load_ps( flt ),
2096 _mm_loadu_ps( ip )));
2097 }
2098
2099 sum = _mm_add_ps( sum, _mm_movehl_ps( sum, sum ));
2100
2101 const __m128 sum2 = _mm_mul_ps( _mm_loadu_ps( flt + 2 ),
2102 _mm_loadu_ps( ip + 2 ));
2103
2104 sum = _mm_add_ps( sum, _mm_movehl_ps( sum2, sum2 ));
2105
2106 _mm_store_ss( op, _mm_add_ss( sum,
2107 _mm_shuffle_ps( sum, sum, 1 )));
2108
2109 #elif defined( LANCIR_NEON )
2110
2111 float32x4_t sum = vmulq_f32( vld1q_f32( flt ), vld1q_f32( ip ));
2112
2113 while( --c != 0 )
2114 {
2115 flt += 4;
2116 ip += 4;
2117 sum = vmlaq_f32( sum, vld1q_f32( flt ), vld1q_f32( ip ));
2118 }
2119
2120 float32x2_t sum2 = vadd_f32( vget_high_f32( sum ),
2121 vget_low_f32( sum ));
2122
2123 sum2 = vmla_f32( sum2, vld1_f32( flt + 4 ), vld1_f32( ip + 4 ));
2124
2125 #if defined( LANCIR_ARM32 )
2126 op[ 0 ] = vget_lane_f32( sum2, 0 ) + vget_lane_f32( sum2, 1 );
2127 #else // defined( LANCIR_ARM32 )
2128 op[ 0 ] = vaddv_f32( sum2 );
2129 #endif // defined( LANCIR_ARM32 )
2130
2131 #else // defined( LANCIR_NEON )
2132
2133 float sum0 = flt[ 0 ] * ip[ 0 ];
2134 float sum1 = flt[ 1 ] * ip[ 1 ];
2135 float sum2 = flt[ 2 ] * ip[ 2 ];
2136 float sum3 = flt[ 3 ] * ip[ 3 ];
2137
2138 while( --c != 0 )
2139 {
2140 flt += 4;
2141 ip += 4;
2142 sum0 += flt[ 0 ] * ip[ 0 ];
2143 sum1 += flt[ 1 ] * ip[ 1 ];
2144 sum2 += flt[ 2 ] * ip[ 2 ];
2145 sum3 += flt[ 3 ] * ip[ 3 ];
2146 }
2147
2148 op[ 0 ] = ( sum0 + sum1 ) + ( sum2 + sum3 ) +
2149 flt[ 4 ] * ip[ 4 ] + flt[ 5 ] * ip[ 5 ];
2150
2151 #endif // defined( LANCIR_NEON )
2152
2154 }
2155 }
2156
2157 template< bool UseSP >
2158 static void resize2( const float* const sp, float* op, const size_t opinc,
2159 const CResizePos* rp, const int kl, const int DstLen )
2160 {
2161 #if LANCIR_ALIGN > 4
2162 const int ci = kl >> 2;
2163 const int cir = kl & 3;
2164 #else // LANCIR_ALIGN > 4
2165 const int ci = kl >> 1;
2166 #endif // LANCIR_ALIGN > 4
2167
2169
2170 int c = ci;
2171
2172 #if defined( LANCIR_AVX )
2173
2174 __m256 sum = _mm256_mul_ps( _mm256_load_ps( flt ),
2175 _mm256_loadu_ps( ip ));
2176
2177 while( --c != 0 )
2178 {
2179 flt += 8;
2180 ip += 8;
2181 sum = _mm256_add_ps( sum, _mm256_mul_ps( _mm256_load_ps( flt ),
2182 _mm256_loadu_ps( ip )));
2183 }
2184
2185 __m128 res = _mm_add_ps( _mm256_extractf128_ps( sum, 0 ),
2186 _mm256_extractf128_ps( sum, 1 ));
2187
2188 if( cir == 2 )
2189 {
2190 res = _mm_add_ps( res, _mm_mul_ps( _mm_load_ps( flt + 8 ),
2191 _mm_loadu_ps( ip + 8 )));
2192 }
2193
2194 res = _mm_add_ps( res, _mm_movehl_ps( res, res ));
2195
2196 _mm_store_ss( op, res );
2197 _mm_store_ss( op + 1, _mm_shuffle_ps( res, res, 1 ));
2198
2199 #elif defined( LANCIR_SSE2 )
2200
2201 __m128 sumA = _mm_mul_ps( _mm_load_ps( flt ), _mm_loadu_ps( ip ));
2202 __m128 sumB = _mm_mul_ps( _mm_load_ps( flt + 4 ),
2203 _mm_loadu_ps( ip + 4 ));
2204
2205 while( --c != 0 )
2206 {
2207 flt += 8;
2208 ip += 8;
2209 sumA = _mm_add_ps( sumA, _mm_mul_ps( _mm_load_ps( flt ),
2210 _mm_loadu_ps( ip )));
2211
2212 sumB = _mm_add_ps( sumB, _mm_mul_ps( _mm_load_ps( flt + 4 ),
2213 _mm_loadu_ps( ip + 4 )));
2214 }
2215
2216 sumA = _mm_add_ps( sumA, sumB );
2217
2218 if( cir == 2 )
2219 {
2220 sumA = _mm_add_ps( sumA, _mm_mul_ps( _mm_load_ps( flt + 8 ),
2221 _mm_loadu_ps( ip + 8 )));
2222 }
2223
2224 sumA = _mm_add_ps( sumA, _mm_movehl_ps( sumA, sumA ));
2225
2226 _mm_store_ss( op, sumA );
2227 _mm_store_ss( op + 1, _mm_shuffle_ps( sumA, sumA, 1 ));
2228
2229 #elif defined( LANCIR_NEON )
2230
2231 float32x4_t sumA = vmulq_f32( vld1q_f32( flt ), vld1q_f32( ip ));
2232 float32x4_t sumB = vmulq_f32( vld1q_f32( flt + 4 ),
2233 vld1q_f32( ip + 4 ));
2234
2235 while( --c != 0 )
2236 {
2237 flt += 8;
2238 ip += 8;
2239 sumA = vmlaq_f32( sumA, vld1q_f32( flt ), vld1q_f32( ip ));
2240 sumB = vmlaq_f32( sumB, vld1q_f32( flt + 4 ),
2241 vld1q_f32( ip + 4 ));
2242 }
2243
2244 sumA = vaddq_f32( sumA, sumB );
2245
2246 if( cir == 2 )
2247 {
2248 sumA = vmlaq_f32( sumA, vld1q_f32( flt + 8 ),
2249 vld1q_f32( ip + 8 ));
2250 }
2251
2252 vst1_f32( op, vadd_f32( vget_high_f32( sumA ), vget_low_f32( sumA )));
2253
2254 #else // defined( LANCIR_NEON )
2255
2256 const float xx = flt[ 0 ];
2257 const float xx2 = flt[ 1 ];
2258 float sum0 = xx * ip[ 0 ];
2259 float sum1 = xx * ip[ 1 ];
2260 float sum2 = xx2 * ip[ 2 ];
2261 float sum3 = xx2 * ip[ 3 ];
2262
2263 while( --c != 0 )
2264 {
2265 flt += 2;
2266 ip += 4;
2267 const float xx = flt[ 0 ];
2268 const float xx2 = flt[ 1 ];
2269 sum0 += xx * ip[ 0 ];
2270 sum1 += xx * ip[ 1 ];
2271 sum2 += xx2 * ip[ 2 ];
2272 sum3 += xx2 * ip[ 3 ];
2273 }
2274
2275 op[ 0 ] = sum0 + sum2;
2276 op[ 1 ] = sum1 + sum3;
2277
2278 #endif // defined( LANCIR_NEON )
2279
2281 }
2282
2283 template< bool UseSP >
2284 static void resize3( const float* const sp, float* op, const size_t opinc,
2285 const CResizePos* rp, const int kl, const int DstLen )
2286 {
2287 #if LANCIR_ALIGN > 4
2288
2289 const int ci = kl >> 2;
2290 const int cir = kl & 3;
2291
2293
2294 float res[ 12 ];
2295 int c = ci;
2296
2297 #if defined( LANCIR_AVX )
2298
2299 __m128 sumA = _mm_mul_ps( _mm_load_ps( flt ), _mm_loadu_ps( ip ));
2300 __m256 sumB = _mm256_mul_ps( _mm256_loadu_ps( flt + 4 ),
2301 _mm256_loadu_ps( ip + 4 ));
2302
2303 while( --c != 0 )
2304 {
2305 flt += 12;
2306 ip += 12;
2307 sumA = _mm_add_ps( sumA, _mm_mul_ps( _mm_load_ps( flt ),
2308 _mm_loadu_ps( ip )));
2309
2310 sumB = _mm256_add_ps( sumB, _mm256_mul_ps(
2311 _mm256_loadu_ps( flt + 4 ), _mm256_loadu_ps( ip + 4 )));
2312 }
2313
2314 if( cir == 2 )
2315 {
2316 sumA = _mm_add_ps( sumA, _mm_mul_ps( _mm_load_ps( flt + 12 ),
2317 _mm_loadu_ps( ip + 12 )));
2318 }
2319
2320 _mm_storeu_ps( res, sumA );
2321
2322 float o0 = res[ 0 ] + res[ 3 ];
2323 float o1 = res[ 1 ];
2324 float o2 = res[ 2 ];
2325
2326 _mm256_storeu_ps( res + 4, sumB );
2327
2328 o1 += res[ 4 ];
2329 o2 += res[ 5 ];
2330
2331 #elif defined( LANCIR_SSE2 )
2332
2333 __m128 sumA = _mm_mul_ps( _mm_load_ps( flt ), _mm_loadu_ps( ip ));
2334 __m128 sumB = _mm_mul_ps( _mm_load_ps( flt + 4 ),
2335 _mm_loadu_ps( ip + 4 ));
2336
2337 __m128 sumC = _mm_mul_ps( _mm_load_ps( flt + 8 ),
2338 _mm_loadu_ps( ip + 8 ));
2339
2340 while( --c != 0 )
2341 {
2342 flt += 12;
2343 ip += 12;
2344 sumA = _mm_add_ps( sumA, _mm_mul_ps( _mm_load_ps( flt ),
2345 _mm_loadu_ps( ip )));
2346
2347 sumB = _mm_add_ps( sumB, _mm_mul_ps( _mm_load_ps( flt + 4 ),
2348 _mm_loadu_ps( ip + 4 )));
2349
2350 sumC = _mm_add_ps( sumC, _mm_mul_ps( _mm_load_ps( flt + 8 ),
2351 _mm_loadu_ps( ip + 8 )));
2352 }
2353
2354 if( cir == 2 )
2355 {
2356 sumA = _mm_add_ps( sumA, _mm_mul_ps( _mm_load_ps( flt + 12 ),
2357 _mm_loadu_ps( ip + 12 )));
2358 }
2359
2360 _mm_storeu_ps( res, sumA );
2361 _mm_storeu_ps( res + 4, sumB );
2362
2363 float o0 = res[ 0 ] + res[ 3 ];
2364 float o1 = res[ 1 ] + res[ 4 ];
2365 float o2 = res[ 2 ] + res[ 5 ];
2366
2367 _mm_storeu_ps( res + 8, sumC );
2368
2369 #elif defined( LANCIR_NEON )
2370
2371 float32x4_t sumA = vmulq_f32( vld1q_f32( flt ), vld1q_f32( ip ));
2372 float32x4_t sumB = vmulq_f32( vld1q_f32( flt + 4 ),
2373 vld1q_f32( ip + 4 ));
2374
2375 float32x4_t sumC = vmulq_f32( vld1q_f32( flt + 8 ),
2376 vld1q_f32( ip + 8 ));
2377
2378 while( --c != 0 )
2379 {
2380 flt += 12;
2381 ip += 12;
2382 sumA = vmlaq_f32( sumA, vld1q_f32( flt ), vld1q_f32( ip ));
2383 sumB = vmlaq_f32( sumB, vld1q_f32( flt + 4 ),
2384 vld1q_f32( ip + 4 ));
2385
2386 sumC = vmlaq_f32( sumC, vld1q_f32( flt + 8 ),
2387 vld1q_f32( ip + 8 ));
2388 }
2389
2390 if( cir == 2 )
2391 {
2392 sumA = vmlaq_f32( sumA, vld1q_f32( flt + 12 ),
2393 vld1q_f32( ip + 12 ));
2394 }
2395
2396 vst1q_f32( res, sumA );
2397 vst1q_f32( res + 4, sumB );
2398
2399 float o0 = res[ 0 ] + res[ 3 ];
2400 float o1 = res[ 1 ] + res[ 4 ];
2401 float o2 = res[ 2 ] + res[ 5 ];
2402
2403 vst1q_f32( res + 8, sumC );
2404
2405 #endif // defined( LANCIR_NEON )
2406
2407 o0 += res[ 6 ] + res[ 9 ];
2408 o1 += res[ 7 ] + res[ 10 ];
2409 o2 += res[ 8 ] + res[ 11 ];
2410
2411 if( cir == 2 )
2412 {
2413 o1 += flt[ 16 ] * ip[ 16 ];
2414 o2 += flt[ 17 ] * ip[ 17 ];
2415 }
2416
2417 op[ 0 ] = o0;
2418 op[ 1 ] = o1;
2419 op[ 2 ] = o2;
2420
2421 #else // LANCIR_ALIGN > 4
2422
2423 const int ci = kl >> 1;
2424
2426
2427 int c = ci;
2428
2429 const float xx = flt[ 0 ];
2430 float sum0 = xx * ip[ 0 ];
2431 float sum1 = xx * ip[ 1 ];
2432 float sum2 = xx * ip[ 2 ];
2433 const float xx2 = flt[ 1 ];
2434 float sum3 = xx2 * ip[ 3 ];
2435 float sum4 = xx2 * ip[ 4 ];
2436 float sum5 = xx2 * ip[ 5 ];
2437
2438 while( --c != 0 )
2439 {
2440 flt += 2;
2441 ip += 6;
2442 const float xx = flt[ 0 ];
2443 sum0 += xx * ip[ 0 ];
2444 sum1 += xx * ip[ 1 ];
2445 sum2 += xx * ip[ 2 ];
2446 const float xx2 = flt[ 1 ];
2447 sum3 += xx2 * ip[ 3 ];
2448 sum4 += xx2 * ip[ 4 ];
2449 sum5 += xx2 * ip[ 5 ];
2450 }
2451
2452 op[ 0 ] = sum0 + sum3;
2453 op[ 1 ] = sum1 + sum4;
2454 op[ 2 ] = sum2 + sum5;
2455
2456 #endif // LANCIR_ALIGN > 4
2457
2459 }
2460
2461 template< bool UseSP >
2462 static void resize4( const float* const sp, float* op, const size_t opinc,
2463 const CResizePos* rp, const int kl, const int DstLen )
2464 {
2465 #if LANCIR_ALIGN > 4
2466 const int ci = kl >> 1;
2467 #else // LANCIR_ALIGN > 4
2468 const int ci = kl;
2469 #endif // LANCIR_ALIGN > 4
2470
2472
2473 int c = ci;
2474
2475 #if defined( LANCIR_AVX )
2476
2477 __m256 sum = _mm256_mul_ps( _mm256_load_ps( flt ),
2478 _mm256_loadu_ps( ip ));
2479
2480 while( --c != 0 )
2481 {
2482 flt += 8;
2483 ip += 8;
2484 sum = _mm256_add_ps( sum, _mm256_mul_ps( _mm256_load_ps( flt ),
2485 _mm256_loadu_ps( ip )));
2486 }
2487
2488 _mm_store_ps( op, _mm_add_ps( _mm256_extractf128_ps( sum, 0 ),
2489 _mm256_extractf128_ps( sum, 1 )));
2490
2491 #elif defined( LANCIR_SSE2 )
2492
2493 __m128 sumA = _mm_mul_ps( _mm_load_ps( flt ), _mm_load_ps( ip ));
2494 __m128 sumB = _mm_mul_ps( _mm_load_ps( flt + 4 ),
2495 _mm_load_ps( ip + 4 ));
2496
2497 while( --c != 0 )
2498 {
2499 flt += 8;
2500 ip += 8;
2501 sumA = _mm_add_ps( sumA, _mm_mul_ps( _mm_load_ps( flt ),
2502 _mm_load_ps( ip )));
2503
2504 sumB = _mm_add_ps( sumB, _mm_mul_ps( _mm_load_ps( flt + 4 ),
2505 _mm_load_ps( ip + 4 )));
2506 }
2507
2508 _mm_store_ps( op, _mm_add_ps( sumA, sumB ));
2509
2510 #elif defined( LANCIR_NEON )
2511
2512 float32x4_t sumA = vmulq_f32( vld1q_f32( flt ), vld1q_f32( ip ));
2513 float32x4_t sumB = vmulq_f32( vld1q_f32( flt + 4 ),
2514 vld1q_f32( ip + 4 ));
2515
2516 while( --c != 0 )
2517 {
2518 flt += 8;
2519 ip += 8;
2520 sumA = vmlaq_f32( sumA, vld1q_f32( flt ), vld1q_f32( ip ));
2521 sumB = vmlaq_f32( sumB, vld1q_f32( flt + 4 ),
2522 vld1q_f32( ip + 4 ));
2523 }
2524
2525 vst1q_f32( op, vaddq_f32( sumA, sumB ));
2526
2527 #else // defined( LANCIR_NEON )
2528
2529 const float xx = flt[ 0 ];
2530 float sum0 = xx * ip[ 0 ];
2531 float sum1 = xx * ip[ 1 ];
2532 float sum2 = xx * ip[ 2 ];
2533 float sum3 = xx * ip[ 3 ];
2534
2535 while( --c != 0 )
2536 {
2537 flt++;
2538 ip += 4;
2539 const float xx = flt[ 0 ];
2540 sum0 += xx * ip[ 0 ];
2541 sum1 += xx * ip[ 1 ];
2542 sum2 += xx * ip[ 2 ];
2543 sum3 += xx * ip[ 3 ];
2544 }
2545
2546 op[ 0 ] = sum0;
2547 op[ 1 ] = sum1;
2548 op[ 2 ] = sum2;
2549 op[ 3 ] = sum3;
2550
2551 #endif // defined( LANCIR_NEON )
2552
2554 }
2555
2557
2558 #undef LANCIR_LF_PRE
2559 #undef LANCIR_LF_POST
2560};
2561
2562} // namespace avir
2563
2564#endif // AVIR_CLANCIR_INCLUDED
#define LANCIR_ALIGN
Address alignment (granularity) used by resizing functions, in bytes.
Definition lancir.h:120
#define LANCIR_LF_POST
Scanline resize function epilogue.
Definition lancir.h:1990
#define LANCIR_LF_PRE
Scanline resize function prologue.
Definition lancir.h:1970
LANCIR resizing parameters class.
Definition lancir.h:149
CLancIRParams(const int aSrcSSize=0, const int aNewSSize=0, const double akx=0.0, const double aky=0.0, const double aox=0.0, const double aoy=0.0)
Default constructor, with optional arguments that correspond to class variables.
Definition lancir.h:183
double oy
Start Y pixel offset within the source image, can be negative. A positive offset moves the image to t...
Definition lancir.h:166
double ky
Resizing step - vertical. Same as kx.
Definition lancir.h:163
double la
Lanczos window function's a parameter, greater or equal to 2.0.
Definition lancir.h:168
int NewSSize
Physical size of the destination scanline, in elements (not bytes). If this value is below 1,...
Definition lancir.h:154
double ox
Start X pixel offset within the source image, can be negative. A positive offset moves the image to t...
Definition lancir.h:164
int SrcSSize
Physical size of the source scanline, in elements (not bytes). If this value is below 1,...
Definition lancir.h:151
double kx
Resizing step - horizontal (one output pixel corresponds to k input pixels). A downsizing factor if g...
Definition lancir.h:157
static void copyScanline1v(const T *ip, const size_t ipinc, float *op, int cc, int repl, int repr)
Scanline copying function, for vertical resizing.
Definition lancir.h:1289
float * FltBuf
Address-aligned FltBuf0.
Definition lancir.h:641
int resizeImage(const Tin *const SrcBuf, const int SrcWidth, const int SrcHeight, const int SrcSSize, Tout *const NewBuf, const int NewWidth, const int NewHeight, const int NewSSize, const int ElCount, const double kx0=0.0, const double ky0=0.0, double ox=0.0, double oy=0.0)
Legacy image resizing function.
Definition lancir.h:626
CResizeScanline rsv
Vertical resize scanline.
Definition lancir.h:1268
CResizeFilters rfh0
Resizing filters for horizontal resizing (may not be in use).
Definition lancir.h:1266
float * spv0
Scanline buffer for vertical resizing, also used at the output stage.
Definition lancir.h:642
CResizeFilters rfv
Resizing filters for vertical resizing.
Definition lancir.h:1265
static void padScanline1h(float *op, CResizeScanline &rs, const int l)
Scanline padding function, for horizontal resizing.
Definition lancir.h:1493
float * FltBuf0
Intermediate resizing buffer.
Definition lancir.h:639
size_t FltBuf0Len
Length of FltBuf0.
Definition lancir.h:640
int spv0len
Length of spv0.
Definition lancir.h:644
static void reallocBuf(Tb *&buf, Tl &len, const Tl newlen)
Typed buffer reallocation function.
Definition lancir.h:699
CResizeScanline rsh
Horizontal resize scanline.
Definition lancir.h:1269
static void outputScanline(const float *ip, T *op, int l, const int Clamp, const float OutMul)
Scanline output function.
Definition lancir.h:1661
static void resize1(const float *const sp, float *op, const size_t opinc, const CResizePos *rp, const int kl, const int DstLen)
Function performs scanline resizing. Variants for 1-4-channel images.
Definition lancir.h:2010
float * spv
Address-aligned spv0.
Definition lancir.h:645
static int roundclamp(const float v, const int Clamp)
Function rounds a value and applies clamping.
Definition lancir.h:1628
static void reallocBuf(Tb *&buf0, Tb *&buf, Tl &len, Tl newlen)
Typed buffer reallocation function, with address alignment.
Definition lancir.h:664
int resizeImage(const Tin *const SrcBuf, const int SrcWidth, const int SrcHeight, Tout *const NewBuf, const int NewWidth, const int NewHeight, const int ElCount, const CLancIRParams *const aParams=NULL)
Function resizes an image.
Definition lancir.h:272
Class for fractional delay filter bank storage and calculation.
Definition lancir.h:722
float * Bufs[BufCount]
Address-aligned Bufs0.
Definition lancir.h:869
int CurBufFill
The number of fractional positions filled in the current filter buffer.
Definition lancir.h:871
const float * getFilter(const double x)
Filter acquisition function.
Definition lancir.h:821
double Len2
Half resampling filter's length, unrounded.
Definition lancir.h:853
double k
Current k.
Definition lancir.h:878
int CurBuf
Filter buffer currently being filled.
Definition lancir.h:870
int ElCount
Current ElCount.
Definition lancir.h:879
static const int BufLen
The number of fractional filters a single buffer (filter batch) may contain. Both the BufLen and BufC...
Definition lancir.h:862
int ElRepl
The number of repetitions of each filter tap.
Definition lancir.h:859
bool update(const double la0, const double k0, const int ElCount0)
Function updates the filter bank.
Definition lancir.h:763
static const int BufCount
The maximal number of buffers (filter batches) that can be in use.
Definition lancir.h:860
void makeFilterNorm(float *op, const double FracDelay) const
Filter calculation function.
Definition lancir.h:958
void setBuf(const int bi)
Current buffer (filter batch) repositioning function.
Definition lancir.h:890
int KernelLenA
SIMD-aligned and replicated filter kernel's length.
Definition lancir.h:857
static void replicateFilter(float *const p, const int kl, const int erp)
Filter tap replication function, for SIMD operations.
Definition lancir.h:1053
double la
Current la.
Definition lancir.h:877
float ** Filters
Fractional delay filters for all positions. A particular pointer equals NULL if a filter for such pos...
Definition lancir.h:873
double FreqA
Circular frequency of the window function.
Definition lancir.h:852
int fl2
Half resampling filter's length, integer.
Definition lancir.h:854
float * Bufs0[BufCount]
Buffers that hold all filters, original.
Definition lancir.h:865
int FiltersLen
Allocated length of Filters, in elements.
Definition lancir.h:876
double Freq
Circular frequency of the filter.
Definition lancir.h:851
int FracCount
The number of fractional positions for which filters can be created.
Definition lancir.h:855
int KernelLen
Resampling filter kernel's length, taps. Available after the update() function call....
Definition lancir.h:726
int Bufs0Len[BufCount]
Allocated lengthes in Bufs0, in float elements.
Definition lancir.h:867
Sine-wave signal generator class.
Definition lancir.h:908
CSineGen(const double si, const double ph)
Constructor initializes this sine-wave signal generator.
Definition lancir.h:919
double generate()
Generate the next sample.
Definition lancir.h:931
Structure defines source scanline positions and filters for each destination pixel.
Definition lancir.h:1109
intptr_t so
Offset within the source scanline, in pixels.
Definition lancir.h:1113
intptr_t spo
Source scanline's pixel offset, in bytes, or a direct pointer to scanline buffer.
Definition lancir.h:1111
const float * flt
Fractional delay filter.
Definition lancir.h:1110
Scanline resizing positions class.
Definition lancir.h:1125
int SrcLen
Current SrcLen.
Definition lancir.h:1260
int poslen
Allocated pos buffer's length.
Definition lancir.h:1259
int padl
Left-padding (in pixels) required for source scanline.
Definition lancir.h:1127
CResizePos * pos
Source scanline positions (offsets) and filters for each destination pixel position.
Definition lancir.h:1129
void update(const int SrcLen0, const int DstLen0, const double o0, CResizeFilters &rf, float *const sp=NULL)
Scanline positions update function.
Definition lancir.h:1172
void reset()
Object's reset function.
Definition lancir.h:1152
int padr
Right-padding (in pixels) required for source scanline.
Definition lancir.h:1128
double o
Current o.
Definition lancir.h:1262
void updateSPO(CResizeFilters &rf, float *const sp)
Scanline pixel offsets update function.
Definition lancir.h:1246
int DstLen
Current DstLen.
Definition lancir.h:1261