AVIR
High-quality pro image resizing library
 
Loading...
Searching...
No Matches
lancir.h
Go to the documentation of this file.
1
45
46#ifndef AVIR_CLANCIR_INCLUDED
47#define AVIR_CLANCIR_INCLUDED
48
49#include <cstring>
50#include <cmath>
51
52#if __cplusplus >= 201103L
53
54 #include <cstdint>
55
56#else // __cplusplus >= 201103L
57
58 #include <stdint.h>
59
60#endif // __cplusplus >= 201103L
61
67
73
74#if defined( __AVX__ )
75
76 #include <immintrin.h>
77
78 #define LANCIR_AVX
79 #define LANCIR_SSE2 // Some functions use SSE2; AVX has a higher priority.
80 #define LANCIR_ALIGN 32
81
82#elif defined( __aarch64__ ) || defined( __arm64__ ) || \
83 defined( _M_ARM64 ) || defined( _M_ARM64EC )
84
85 #if defined( _MSC_VER )
86 #include <arm64_neon.h>
87
88 #if _MSC_VER < 1925
89 #define LANCIR_ARM32 // Do not use some newer NEON intrinsics.
90 #endif // _MSC_VER < 1925
91 #else // defined( _MSC_VER )
92 #include <arm_neon.h>
93 #endif // defined( _MSC_VER )
94
95 #define LANCIR_NEON
96 #define LANCIR_ALIGN 16
97
98#elif defined( __ARM_NEON ) || defined( __ARM_NEON__ ) || defined( _M_ARM )
99
100 #include <arm_neon.h>
101
102 #define LANCIR_ARM32
103 #define LANCIR_NEON
104 #define LANCIR_ALIGN 16
105
106#elif defined( __SSE2__ ) || defined( _M_AMD64 ) || \
107 ( defined( _M_IX86_FP ) && _M_IX86_FP == 2 )
108
109 #if defined( _MSC_VER )
110 #include <intrin.h>
111 #else // defined( _MSC_VER )
112 #include <emmintrin.h>
113 #endif // defined( _MSC_VER )
114
115 #define LANCIR_SSE2
116 #define LANCIR_ALIGN 16
117
118#elif defined( __wasm_simd128__ )
119
120 #include <wasm_simd128.h>
121
122 #define LANCIR_WASM
123 #define LANCIR_ALIGN 16
124
125#else // WASM
126
127 #define LANCIR_ALIGN 4
128
129#endif // WASM
130
131#if defined( LANCIR_SSE2 )
132
133 #define lancvec_t __m128
134 #define lancvec_const_splat( v ) _mm_set1_ps( v )
135 #define lancvec_load( m ) _mm_load_ps( m )
136 #define lancvec_loadu( m ) _mm_loadu_ps( m )
137 #define lancvec_store( m, v ) _mm_store_ps( m, v )
138 #define lancvec_storeu( m, v ) _mm_storeu_ps( m, v )
139 #define lancvec_add( v1, v2 ) _mm_add_ps( v1, v2 )
140 #define lancvec_mul( v1, v2 ) _mm_mul_ps( v1, v2 )
141 #define lancvec_min( v1, v2 ) _mm_min_ps( v1, v2 )
142 #define lancvec_max( v1, v2 ) _mm_max_ps( v1, v2 )
143 #define lancvec_madd( va, v1, v2 ) _mm_add_ps( va, _mm_mul_ps( v1, v2 ))
144 #define lancvec_addhl( vl, vh ) _mm_add_ps( vl, _mm_movehl_ps( vh, vh ))
145 #define lancvec_store32_addhl( m, v ) \
146 _mm_store_ss( m, _mm_add_ss( v, _mm_shuffle_ps( v, v, 1 )))
147
148 #define lancvec_store64_addhl( m, v ) \
149 _mm_storel_pi( (__m64*) ( m ), lancvec_addhl( v, v ))
150
151#elif defined( LANCIR_NEON )
152
153 #define lancvec_t float32x4_t
154 #define lancvec_const_splat( v ) vdupq_n_f32( v )
155 #define lancvec_load( m ) vld1q_f32( m )
156 #define lancvec_store( m, v ) vst1q_f32( m, v )
157 #define lancvec_add( v1, v2 ) vaddq_f32( v1, v2 )
158 #define lancvec_mul( v1, v2 ) vmulq_f32( v1, v2 )
159 #define lancvec_min( v1, v2 ) vminq_f32( v1, v2 )
160 #define lancvec_max( v1, v2 ) vmaxq_f32( v1, v2 )
161 #define lancvec_madd( va, v1, v2 ) vmlaq_f32( va, v1, v2 )
162
163 #if defined( LANCIR_ARM32 )
164 #define lancvec_store32_hadd( m, v ) { \
165 const float32x2_t v2 = vadd_f32( vget_high_f32( v ), \
166 vget_low_f32( v )); \
167 *( m ) = vget_lane_f32( v2, 0 ) + \
168 vget_lane_f32( v2, 1 ); } (void) 0
169 #else // defined( LANCIR_ARM32 )
170 #define lancvec_store32_hadd( m, v ) *( m ) = vaddvq_f32( v )
171 #endif // defined( LANCIR_ARM32 )
172
173 #define lancvec_store64_addhl( m, v ) \
174 vst1_f32( m, vadd_f32( vget_high_f32( v ), vget_low_f32( v )));
175
176#elif defined( LANCIR_WASM )
177
178 #define lancvec_t v128_t
179 #define lancvec_const_splat( v ) wasm_f32x4_const_splat( v )
180 #define lancvec_load32_splat( m ) wasm_v128_load32_splat( m )
181 #define lancvec_load( m ) wasm_v128_load( m )
182 #define lancvec_store( m, v ) wasm_v128_store( m, v )
183 #define lancvec_add( v1, v2 ) wasm_f32x4_add( v1, v2 )
184 #define lancvec_mul( v1, v2 ) wasm_f32x4_mul( v1, v2 )
185 #define lancvec_min( v1, v2 ) wasm_f32x4_min( v1, v2 )
186 #define lancvec_max( v1, v2 ) wasm_f32x4_max( v1, v2 )
187 #define lancvec_madd( va, v1, v2 ) wasm_f32x4_add( va, \
188 wasm_f32x4_mul( v1, v2 ))
189
190 #define lancvec_addhl( vl, vh ) wasm_f32x4_add( vl, \
191 wasm_i32x4_shuffle( vh, vh, 6, 7, 2, 3 ))
192
193 #define lancvec_store32_addhl( m, v ) \
194 *( m ) = ( wasm_f32x4_extract_lane( v, 0 ) + \
195 wasm_f32x4_extract_lane( v, 1 ))
196
197 #define lancvec_store64_addhl( m, v ) \
198 wasm_v128_store64_lane( m, lancvec_addhl( v, v ), 0 )
199
200#endif // defined( LANCIR_WASM )
201
202#if LANCIR_ALIGN > 4
203
204 #if !defined( lancvec_load32_splat )
205 #define lancvec_load32_splat( m ) lancvec_const_splat( *( m ))
206 #endif // !defined( lancvec_load32_splat )
207
208 #if !defined( lancvec_loadu )
209 #define lancvec_loadu( m ) lancvec_load( m )
210 #endif // !defined( lancvec_loadu )
211
212 #if !defined( lancvec_storeu )
213 #define lancvec_storeu( m, v ) lancvec_store( m, v )
214 #endif // !defined( lancvec_storeu )
215
216 #if !defined( lancvec_store32_hadd )
217 #define lancvec_store32_hadd( m, v ) { \
218 const lancvec_t v2 = lancvec_addhl( v, v ); \
219 lancvec_store32_addhl( m, v2 ); } (void) 0
220 #endif // !defined( lancvec_store32_hadd )
221
222#endif // LANCIR_ALIGN > 4
223
224namespace avir {
225
226using std :: memcpy;
227using std :: memset;
228using std :: fabs;
229using std :: floor;
230using std :: ceil;
231using std :: sin;
232using std :: cos;
233using std :: size_t;
234
235#if __cplusplus >= 201103L
236
237 using std :: intptr_t;
238 using std :: uintptr_t;
239
240#else // __cplusplus >= 201103L
241
242 // Workaround for pre-C++11 compilers. `nullptr` is a keyword, and not a
243 // macro, but check if such workaround is already in place.
244
245 #if !defined( nullptr )
246 #define nullptr NULL
247 #define LANCIR_NULLPTR
248 #endif // !defined( nullptr )
249
250#endif // __cplusplus >= 201103L
251
259
261{
262public:
269 double kx;
275 double ky;
276 double ox;
278 double oy;
280 double la;
282
294
295 CLancIRParams( const int aSrcSSize = 0, const int aNewSSize = 0,
296 const double akx = 0.0, const double aky = 0.0,
297 const double aox = 0.0, const double aoy = 0.0 )
298 : SrcSSize( aSrcSSize )
299 , NewSSize( aNewSSize )
300 , kx( akx )
301 , ky( aky )
302 , ox( aox )
303 , oy( aoy )
304 , la( 3.0 )
305 {
306 }
307};
308
326
327class CLancIR
328{
329private:
330 CLancIR( const CLancIR& )
331 {
332 // Unsupported.
333 }
334
335 CLancIR& operator = ( const CLancIR& )
336 {
337 // Unsupported.
338 return( *this );
339 }
340
341public:
342 CLancIR()
343 : FltBuf0( nullptr )
344 , FltBuf0Len( 0 )
345 , spv0( nullptr )
346 , spv0len( 0 )
347 , spv( nullptr )
348 {
349 }
350
351 ~CLancIR()
352 {
353 delete[] FltBuf0;
354 delete[] spv0;
355 }
356
385
386 template< typename Tin, typename Tout >
387 int resizeImage( const Tin* const SrcBuf, const int SrcWidth,
388 const int SrcHeight, Tout* const NewBuf, const int NewWidth,
389 const int NewHeight, const int ElCount,
390 const CLancIRParams* const aParams = nullptr )
391 {
392 if(( SrcWidth < 0 ) | ( SrcHeight < 0 ) |
393 ( NewWidth <= 0 ) | ( NewHeight <= 0 ) |
394 ( SrcBuf == nullptr ) | ( NewBuf == nullptr ) |
395 ( (const void*) SrcBuf == (const void*) NewBuf ))
396 {
397 return( 0 );
398 }
399
400 static const CLancIRParams DefParams;
401 const CLancIRParams& Params = ( aParams != nullptr ?
402 *aParams : DefParams );
403
404 if( Params.la < 2.0 )
405 {
406 return( 0 );
407 }
408
409 const int OutSLen = NewWidth * ElCount;
410 const size_t NewScanlineSize = (size_t) ( Params.NewSSize < 1 ?
411 OutSLen : Params.NewSSize );
412
413 if(( SrcWidth == 0 ) | ( SrcHeight == 0 ))
414 {
415 Tout* op = NewBuf;
416 int i;
417
418 for( i = 0; i < NewHeight; i++ )
419 {
420 memset( op, 0, (size_t) OutSLen * sizeof( Tout ));
421 op += NewScanlineSize;
422 }
423
424 return( NewHeight );
425 }
426
427 const size_t SrcScanlineSize = (size_t) ( Params.SrcSSize < 1 ?
428 SrcWidth * ElCount : Params.SrcSSize );
429
430 double ox = Params.ox;
431 double oy = Params.oy;
432 double kx;
433 double ky;
434
435 if( Params.kx >= 0.0 )
436 {
437 kx = ( Params.kx == 0.0 ?
438 (double) SrcWidth / NewWidth : Params.kx );
439
440 ox += ( kx - 1.0 ) * 0.5;
441 }
442 else
443 {
444 kx = -Params.kx;
445 }
446
447 if( Params.ky >= 0.0 )
448 {
449 ky = ( Params.ky == 0.0 ?
450 (double) SrcHeight / NewHeight : Params.ky );
451
452 oy += ( ky - 1.0 ) * 0.5;
453 }
454 else
455 {
456 ky = -Params.ky;
457 }
458
459 if( rfv.update( Params.la, ky, ElCount ))
460 {
461 rsv.reset();
462 rsh.reset();
463 }
464
465 CResizeFilters* rfh; // Pointer to resizing filters for horizontal
466 // resizing, may equal to `rfv` if the same stepping is in use.
467
468 if( kx == ky )
469 {
470 rfh = &rfv;
471 }
472 else
473 {
474 rfh = &rfh0;
475
476 if( rfh0.update( Params.la, kx, ElCount ))
477 {
478 rsh.reset();
479 }
480 }
481
482 rsv.update( SrcHeight, NewHeight, oy, rfv, spv );
483 rsh.update( SrcWidth, NewWidth, ox, *rfh );
484
485 // Calculate vertical progressive resizing's batch size. Progressive
486 // batching is used to try to keep addressing within the cache
487 // capacity. This technique definitely works well for single-threaded
488 // resizing on most CPUs, but may not provide an additional benefit
489 // for multi-threaded resizing, or in a system-wide high-load
490 // situations.
491
492 const size_t FltWidthE = (size_t) (( rsh.padl + SrcWidth +
493 rsh.padr ) * ElCount );
494
495 const double CacheSize = 5500000.0; // Tuned for various CPUs.
496 const double OpSize = (double) SrcScanlineSize * SrcHeight *
497 sizeof( Tin ) + (double) FltWidthE * NewHeight * sizeof( float );
498
499 int BatchSize = (int) ( NewHeight * CacheSize / ( OpSize + 1.0 ));
500
501 if( BatchSize < 8 )
502 {
503 BatchSize = 8;
504 }
505
506 if( BatchSize > NewHeight )
507 {
508 BatchSize = NewHeight;
509 }
510
511 // Allocate/resize intermediate buffers.
512
513 const int svs = ( rsv.padl + SrcHeight + rsv.padr ) * ElCount;
514 float* const pspv0 = spv0;
515 reallocBuf( spv0, spv, spv0len, ( svs > OutSLen ? svs : OutSLen ));
517 FltWidthE * (size_t) BatchSize );
518
519 if( spv0 != pspv0 )
520 {
521 rsv.updateSPO( rfv, spv );
522 }
523
524 // Prepare output-related constants.
525
526 static const bool IsInFloat = ( (Tin) 0.25f != 0 );
527 static const bool IsOutFloat = ( (Tout) 0.25f != 0 );
528 static const bool IsUnityMul = ( IsInFloat && IsOutFloat ) ||
529 ( IsInFloat == IsOutFloat && sizeof( Tin ) == sizeof( Tout ));
530
531 const float Clamp = ( sizeof( Tout ) == 1 ? 255.0f : 65535.0f );
532 const float OutMul = ( IsOutFloat ? 1.0f : Clamp ) /
533 ( IsInFloat ? 1.0f : ( sizeof( Tin ) == 1 ? 255.0f : 65535.0f ));
534
535 // Perform batched resizing.
536
537 const CResizePos* rpv = rsv.pos;
538 Tout* opn = NewBuf;
539 int bl = NewHeight;
540
541 while( bl > 0 )
542 {
543 const int bc = ( bl > BatchSize ? BatchSize : bl );
544
545 int kl = rfv.KernelLen;
546 const Tin* ip = SrcBuf;
547 float* op = FltBuf + rsh.padl * ElCount;
548
549 const int so = (int) rpv[ 0 ].so;
550 float* const sp = spv + so * ElCount;
551
552 int cc = (int) rpv[ bc - 1 ].so - so + kl; // Pixel copy count.
553 int rl = 0; // Leftmost pixel's replication count.
554 int rr = 0; // Rightmost pixel's replication count.
555
556 const int socc = so + cc;
557 const int spe = rsv.padl + SrcHeight;
558
559 // Calculate scanline copying and padding parameters, depending on
560 // the batch's size and its vertical offset.
561
562 if( so < rsv.padl )
563 {
564 if( socc <= rsv.padl )
565 {
566 rl = cc;
567 cc = 0;
568 }
569 else
570 {
571 if( socc > spe )
572 {
573 rr = socc - spe;
574 cc -= rr;
575 }
576
577 rl = rsv.padl - so;
578 cc -= rl;
579 }
580 }
581 else
582 {
583 if( so >= spe )
584 {
585 rr = cc;
586 cc = 0;
587 ip += (size_t) SrcHeight * SrcScanlineSize;
588 }
589 else
590 {
591 if( socc > spe )
592 {
593 rr = socc - spe;
594 cc -= rr;
595 }
596
597 ip += (size_t) ( so - rsv.padl ) * SrcScanlineSize;
598 }
599 }
600
601 // Batched vertical resizing.
602
603 int i;
604
605 if( ElCount == 1 )
606 {
607 for( i = 0; i < SrcWidth; i++ )
608 {
609 copyScanline1v( ip, SrcScanlineSize, sp, cc, rl, rr );
610 resize1< false >( nullptr, op, FltWidthE, rpv, kl, bc );
611 ip += 1;
612 op += 1;
613 }
614 }
615 else
616 if( ElCount == 2 )
617 {
618 for( i = 0; i < SrcWidth; i++ )
619 {
620 copyScanline2v( ip, SrcScanlineSize, sp, cc, rl, rr );
621 resize2< false >( nullptr, op, FltWidthE, rpv, kl, bc );
622 ip += 2;
623 op += 2;
624 }
625 }
626 else
627 if( ElCount == 3 )
628 {
629 for( i = 0; i < SrcWidth; i++ )
630 {
631 copyScanline3v( ip, SrcScanlineSize, sp, cc, rl, rr );
632 resize3< false >( nullptr, op, FltWidthE, rpv, kl, bc );
633 ip += 3;
634 op += 3;
635 }
636 }
637 else // ElCount == 4
638 {
639 for( i = 0; i < SrcWidth; i++ )
640 {
641 copyScanline4v( ip, SrcScanlineSize, sp, cc, rl, rr );
642 resize4< false >( nullptr, op, FltWidthE, rpv, kl, bc );
643 ip += 4;
644 op += 4;
645 }
646 }
647
648 // Perform horizontal resizing batch, and produce final output.
649
650 float* ipf = FltBuf;
651 kl = rfh -> KernelLen;
652
653 if( ElCount == 1 )
654 {
655 for( i = 0; i < bc; i++ )
656 {
657 padScanline1h( ipf, rsh, SrcWidth );
658 resize1< true >( ipf, spv, 1, rsh.pos, kl, NewWidth );
660 OutSLen, Clamp, OutMul );
661
662 ipf += FltWidthE;
663 opn += NewScanlineSize;
664 }
665 }
666 else
667 if( ElCount == 2 )
668 {
669 for( i = 0; i < bc; i++ )
670 {
671 padScanline2h( ipf, rsh, SrcWidth );
672 resize2< true >( ipf, spv, 2, rsh.pos, kl, NewWidth );
674 OutSLen, Clamp, OutMul );
675
676 ipf += FltWidthE;
677 opn += NewScanlineSize;
678 }
679 }
680 else
681 if( ElCount == 3 )
682 {
683 for( i = 0; i < bc; i++ )
684 {
685 padScanline3h( ipf, rsh, SrcWidth );
686 resize3< true >( ipf, spv, 3, rsh.pos, kl, NewWidth );
688 OutSLen, Clamp, OutMul );
689
690 ipf += FltWidthE;
691 opn += NewScanlineSize;
692 }
693 }
694 else // ElCount == 4
695 {
696 for( i = 0; i < bc; i++ )
697 {
698 padScanline4h( ipf, rsh, SrcWidth );
699 resize4< true >( ipf, spv, 4, rsh.pos, kl, NewWidth );
701 OutSLen, Clamp, OutMul );
702
703 ipf += FltWidthE;
704 opn += NewScanlineSize;
705 }
706 }
707
708 rpv += bc;
709 bl -= bc;
710 }
711
712 return( NewHeight );
713 }
714
743
744 template< typename Tin, typename Tout >
745 int resizeImage( const Tin* const SrcBuf, const int SrcWidth,
746 const int SrcHeight, const int SrcSSize, Tout* const NewBuf,
747 const int NewWidth, const int NewHeight, const int NewSSize,
748 const int ElCount, const double kx0 = 0.0, const double ky0 = 0.0,
749 double ox = 0.0, double oy = 0.0 )
750 {
751 const CLancIRParams Params( SrcSSize, NewSSize, kx0, ky0, ox, oy );
752
753 return( resizeImage( SrcBuf, SrcWidth, SrcHeight, NewBuf, NewWidth,
754 NewHeight, ElCount, &Params ));
755 }
756
757protected:
758 float* FltBuf0;
759 size_t FltBuf0Len;
760 float* FltBuf;
761 float* spv0;
764 float* spv;
765
781
782 template< typename Tb, typename Tl >
783 static void reallocBuf( Tb*& buf0, Tb*& buf, Tl& len, Tl newlen )
784 {
785 newlen += LANCIR_ALIGN;
786
787 if( newlen > len )
788 {
789 if( buf0 != nullptr )
790 {
791 delete[] buf0;
792 buf0 = nullptr;
793 len = 0;
794 }
795
796 buf0 = new Tb[ newlen ];
797 len = newlen;
798 buf = (Tb*) (( (uintptr_t) buf0 + LANCIR_ALIGN - 1 ) &
799 ~(uintptr_t) ( LANCIR_ALIGN - 1 ));
800 }
801 }
802
816
817 template< typename Tb, typename Tl >
818 static void reallocBuf( Tb*& buf, Tl& len, const Tl newlen )
819 {
820 if( newlen > len )
821 {
822 if( buf != nullptr )
823 {
824 delete[] buf;
825 buf = nullptr;
826 len = 0;
827 }
828
829 buf = new Tb[ newlen ];
830 len = newlen;
831 }
832 }
833
834 class CResizeScanline;
835
839
840 class CResizeFilters
841 {
842 friend class CResizeScanline;
843
844 public:
848
849 CResizeFilters()
850 : Filters( nullptr )
851 , FiltersLen( 0 )
852 , la( 0.0 )
853 {
854 memset( Bufs0, 0, sizeof( Bufs0 ));
855 memset( Bufs0Len, 0, sizeof( Bufs0Len ));
856 }
857
859 {
860 int i;
861
862 for( i = 0; i < BufCount; i++ )
863 {
864 delete[] Bufs0[ i ];
865 }
866
867 delete[] Filters;
868 }
869
881
882 bool update( const double la0, const double k0, const int ElCount0 )
883 {
884 if( la0 == la && k0 == k && ElCount0 == ElCount )
885 {
886 return( false );
887 }
888
889 const double NormFreq = ( k0 <= 1.0 ? 1.0 : 1.0 / k0 );
890 Freq = 3.1415926535897932 * NormFreq;
891 FreqA = Freq / la0;
892
893 Len2 = la0 / NormFreq;
894 fl2 = (int) ceil( Len2 );
895 KernelLen = fl2 + fl2;
896
897 #if LANCIR_ALIGN > 4
898
899 ElRepl = ElCount0;
901
902 const int elalign =
903 (int) ( LANCIR_ALIGN / sizeof( float )) - 1;
904
905 KernelLenA = ( KernelLenA + elalign ) & ~elalign;
906
907 #else // LANCIR_ALIGN > 4
908
909 ElRepl = 1;
911
912 #endif // LANCIR_ALIGN > 4
913
914 FracCount = 1000; // Enough for Lanczos implicit 8-bit precision.
915
916 la = 0.0;
918
919 memset( Filters, 0, (size_t) FiltersLen * sizeof( Filters[ 0 ]));
920
921 setBuf( 0 );
922
923 la = la0;
924 k = k0;
925 ElCount = ElCount0;
926
927 return( true );
928 }
929
939
940 const float* getFilter( const double x )
941 {
942 const int Frac = (int) ( x * FracCount + 0.5 );
943 float* flt = Filters[ Frac ];
944
945 if( flt != nullptr )
946 {
947 return( flt );
948 }
949
950 flt = Bufs[ CurBuf ] + CurBufFill * KernelLenA;
951 Filters[ Frac ] = flt;
952 CurBufFill++;
953
954 if( CurBufFill == BufLen )
955 {
956 setBuf( CurBuf + 1 );
957 }
958
959 makeFilterNorm( flt, 1.0 - (double) Frac / FracCount );
960
961 if( ElRepl > 1 )
962 {
964 }
965
966 return( flt );
967 }
968
969 protected:
970 double Freq;
971 double FreqA;
972 double Len2;
973 int fl2;
978 int ElRepl;
979 static const int BufCount = 4;
981 static const int BufLen = 256;
984 float* Bufs0[ BufCount ];
988 float* Bufs[ BufCount ];
989 int CurBuf;
992 float** Filters;
996 double la;
997 double k;
999
1008
1009 void setBuf( const int bi )
1010 {
1011 reallocBuf( Bufs0[ bi ], Bufs[ bi ], Bufs0Len[ bi ],
1012 BufLen * KernelLenA );
1013
1014 CurBuf = bi;
1015 CurBufFill = 0;
1016 }
1017
1025
1027 {
1028 public:
1037
1038 CSineGen( const double si, const double ph )
1039 : svalue1( sin( ph ))
1040 , svalue2( sin( ph - si ))
1041 , sincr( 2.0 * cos( si ))
1042 {
1043 }
1044
1048
1049 double generate()
1050 {
1051 const double res = svalue1;
1052
1053 svalue1 = sincr * res - svalue2;
1054 svalue2 = res;
1055
1056 return( res );
1057 }
1058
1059 private:
1060 double svalue1;
1061 double svalue2;
1062 double sincr;
1063 };
1064
1075
1076 void makeFilterNorm( float* op, const double FracDelay ) const
1077 {
1078 CSineGen f( Freq, Freq * ( FracDelay - fl2 ));
1079 CSineGen fw( FreqA, FreqA * ( FracDelay - fl2 ));
1080
1081 float* op0 = op;
1082 double s = 0.0;
1083 double ut;
1084
1085 int t = -fl2;
1086
1087 if( t + FracDelay < -Len2 )
1088 {
1089 f.generate();
1090 fw.generate();
1091 *op = 0;
1092 op++;
1093 t++;
1094 }
1095
1096 int IsZeroX = ( fabs( FracDelay - 1.0 ) < 2.3e-13 );
1097 int mt = 0 - IsZeroX;
1098 IsZeroX |= ( fabs( FracDelay ) < 2.3e-13 );
1099
1100 while( t < mt )
1101 {
1102 ut = t + FracDelay;
1103 *op = (float) ( f.generate() * fw.generate() / ( ut * ut ));
1104 s += *op;
1105 op++;
1106 t++;
1107 }
1108
1109 if( IsZeroX ) // t+FracDelay==0
1110 {
1111 *op = (float) ( Freq * FreqA );
1112 s += *op;
1113 f.generate();
1114 fw.generate();
1115 }
1116 else
1117 {
1118 ut = FracDelay; // t==0
1119 *op = (float) ( f.generate() * fw.generate() / ( ut * ut ));
1120 s += *op;
1121 }
1122
1123 mt = fl2 - 2;
1124
1125 while( t < mt )
1126 {
1127 op++;
1128 t++;
1129 ut = t + FracDelay;
1130 *op = (float) ( f.generate() * fw.generate() / ( ut * ut ));
1131 s += *op;
1132 }
1133
1134 op++;
1135 ut = t + 1 + FracDelay;
1136
1137 if( ut > Len2 )
1138 {
1139 *op = 0;
1140 }
1141 else
1142 {
1143 *op = (float) ( f.generate() * fw.generate() / ( ut * ut ));
1144 s += *op;
1145 }
1146
1147 s = 1.0 / s;
1148 t = (int) ( op - op0 + 1 );
1149
1150 while( t != 0 )
1151 {
1152 *op0 = (float) ( *op0 * s );
1153 op0++;
1154 t--;
1155 }
1156 }
1157
1170
1171 static void replicateFilter( float* const p, const int kl,
1172 const int erp )
1173 {
1174 const float* ip = p + kl - 1;
1175 float* op = p + ( kl - 1 ) * erp;
1176 int c = kl;
1177
1178 if( erp == 2 )
1179 {
1180 while( c != 0 )
1181 {
1182 const float v = *ip;
1183 op[ 0 ] = v;
1184 op[ 1 ] = v;
1185 ip--;
1186 op -= 2;
1187 c--;
1188 }
1189 }
1190 else
1191 if( erp == 3 )
1192 {
1193 while( c != 0 )
1194 {
1195 const float v = *ip;
1196 op[ 0 ] = v;
1197 op[ 1 ] = v;
1198 op[ 2 ] = v;
1199 ip--;
1200 op -= 3;
1201 c--;
1202 }
1203 }
1204 else // erp == 4
1205 {
1206 while( c != 0 )
1207 {
1208 const float v = *ip;
1209 op[ 0 ] = v;
1210 op[ 1 ] = v;
1211 op[ 2 ] = v;
1212 op[ 3 ] = v;
1213 ip--;
1214 op -= 4;
1215 c--;
1216 }
1217 }
1218 }
1219 };
1220
1225
1227 {
1228 const float* flt;
1229 intptr_t spo;
1231 intptr_t so;
1232 };
1233
1241
1242 class CResizeScanline
1243 {
1244 public:
1245 int padl;
1246 int padr;
1249
1250 CResizeScanline()
1251 : pos( nullptr )
1252 , poslen( 0 )
1253 , SrcLen( 0 )
1254 {
1255 }
1256
1258 {
1259 delete[] pos;
1260 }
1261
1269
1270 void reset()
1271 {
1272 SrcLen = 0;
1273 }
1274
1289
1290 void update( const int SrcLen0, const int DstLen0, const double o0,
1291 CResizeFilters& rf, float* const sp = nullptr )
1292 {
1293 if( SrcLen0 == SrcLen && DstLen0 == DstLen && o0 == o )
1294 {
1295 return;
1296 }
1297
1298 const int fl2m1 = rf.fl2 - 1;
1299 padl = fl2m1 - (int) floor( o0 );
1300
1301 if( padl < 0 )
1302 {
1303 padl = 0;
1304 }
1305
1306 // Make sure `padr` and `pos` are in sync: calculate ending `pos`
1307 // offset in advance.
1308
1309 const double k = rf.k;
1310
1311 const int DstLen_m1 = DstLen0 - 1;
1312 const double oe = o0 + k * DstLen_m1;
1313 const int ie = (int) floor( oe );
1314
1315 padr = ie + rf.fl2 + 1 - SrcLen0;
1316
1317 if( padr < 0 )
1318 {
1319 padr = 0;
1320 }
1321
1322 SrcLen = 0;
1323 reallocBuf( pos, poslen, DstLen0 );
1324
1325 const intptr_t ElCountF = rf.ElCount * (intptr_t) sizeof( float );
1326 const int so = padl - fl2m1;
1327 CResizePos* rp = pos;
1328 intptr_t rpso;
1329 int i;
1330
1331 for( i = 0; i < DstLen_m1; i++ )
1332 {
1333 const double ox = o0 + k * i;
1334 const int ix = (int) floor( ox );
1335
1336 rp -> flt = rf.getFilter( ox - ix );
1337 rpso = so + ix;
1338 rp -> spo = (intptr_t) sp + rpso * ElCountF;
1339 rp -> so = rpso;
1340 rp++;
1341 }
1342
1343 rp -> flt = rf.getFilter( oe - ie );
1344 rpso = so + ie;
1345 rp -> spo = (intptr_t) sp + rpso * ElCountF;
1346 rp -> so = rpso;
1347
1348 SrcLen = SrcLen0;
1349 DstLen = DstLen0;
1350 o = o0;
1351 }
1352
1363
1364 void updateSPO( CResizeFilters& rf, float* const sp )
1365 {
1366 const intptr_t ElCountF = rf.ElCount * (intptr_t) sizeof( float );
1367 CResizePos* const rp = pos;
1368 int i;
1369
1370 for( i = 0; i < DstLen; i++ )
1371 {
1372 rp[ i ].spo = (intptr_t) sp + rp[ i ].so * ElCountF;
1373 }
1374 }
1375
1376 protected:
1380 double o;
1381 };
1382
1388
1405
1406 template< typename T >
1407 static void copyScanline1v( const T* ip, const size_t ipinc, float* op,
1408 int cc, int repl, int repr )
1409 {
1410 float v0;
1411
1412 if( repl > 0 )
1413 {
1414 v0 = (float) ip[ 0 ];
1415
1416 do
1417 {
1418 op[ 0 ] = v0;
1419 op += 1;
1420
1421 } while( --repl != 0 );
1422 }
1423
1424 while( cc != 0 )
1425 {
1426 op[ 0 ] = (float) ip[ 0 ];
1427 ip += ipinc;
1428 op += 1;
1429 cc--;
1430 }
1431
1432 if( repr > 0 )
1433 {
1434 const T* const ipe = ip - ipinc;
1435 v0 = (float) ipe[ 0 ];
1436
1437 do
1438 {
1439 op[ 0 ] = v0;
1440 op += 1;
1441
1442 } while( --repr != 0 );
1443 }
1444 }
1445
1446 template< typename T >
1447 static void copyScanline2v( const T* ip, const size_t ipinc, float* op,
1448 int cc, int repl, int repr )
1449 {
1450 float v0, v1;
1451
1452 if( repl > 0 )
1453 {
1454 v0 = (float) ip[ 0 ];
1455 v1 = (float) ip[ 1 ];
1456
1457 do
1458 {
1459 op[ 0 ] = v0;
1460 op[ 1 ] = v1;
1461 op += 2;
1462
1463 } while( --repl != 0 );
1464 }
1465
1466 while( cc != 0 )
1467 {
1468 op[ 0 ] = (float) ip[ 0 ];
1469 op[ 1 ] = (float) ip[ 1 ];
1470 ip += ipinc;
1471 op += 2;
1472 cc--;
1473 }
1474
1475 if( repr > 0 )
1476 {
1477 const T* const ipe = ip - ipinc;
1478 v0 = (float) ipe[ 0 ];
1479 v1 = (float) ipe[ 1 ];
1480
1481 do
1482 {
1483 op[ 0 ] = v0;
1484 op[ 1 ] = v1;
1485 op += 2;
1486
1487 } while( --repr != 0 );
1488 }
1489 }
1490
1491 template< typename T >
1492 static void copyScanline3v( const T* ip, const size_t ipinc, float* op,
1493 int cc, int repl, int repr )
1494 {
1495 float v0, v1, v2;
1496
1497 if( repl > 0 )
1498 {
1499 v0 = (float) ip[ 0 ];
1500 v1 = (float) ip[ 1 ];
1501 v2 = (float) ip[ 2 ];
1502
1503 do
1504 {
1505 op[ 0 ] = v0;
1506 op[ 1 ] = v1;
1507 op[ 2 ] = v2;
1508 op += 3;
1509
1510 } while( --repl != 0 );
1511 }
1512
1513 while( cc != 0 )
1514 {
1515 op[ 0 ] = (float) ip[ 0 ];
1516 op[ 1 ] = (float) ip[ 1 ];
1517 op[ 2 ] = (float) ip[ 2 ];
1518 ip += ipinc;
1519 op += 3;
1520 cc--;
1521 }
1522
1523 if( repr > 0 )
1524 {
1525 const T* const ipe = ip - ipinc;
1526 v0 = (float) ipe[ 0 ];
1527 v1 = (float) ipe[ 1 ];
1528 v2 = (float) ipe[ 2 ];
1529
1530 do
1531 {
1532 op[ 0 ] = v0;
1533 op[ 1 ] = v1;
1534 op[ 2 ] = v2;
1535 op += 3;
1536
1537 } while( --repr != 0 );
1538 }
1539 }
1540
1541 template< typename T >
1542 static void copyScanline4v( const T* ip, const size_t ipinc, float* op,
1543 int cc, int repl, int repr )
1544 {
1545 float v0, v1, v2, v3;
1546
1547 if( repl > 0 )
1548 {
1549 v0 = (float) ip[ 0 ];
1550 v1 = (float) ip[ 1 ];
1551 v2 = (float) ip[ 2 ];
1552 v3 = (float) ip[ 3 ];
1553
1554 do
1555 {
1556 op[ 0 ] = v0;
1557 op[ 1 ] = v1;
1558 op[ 2 ] = v2;
1559 op[ 3 ] = v3;
1560 op += 4;
1561
1562 } while( --repl != 0 );
1563 }
1564
1565 while( cc != 0 )
1566 {
1567 op[ 0 ] = (float) ip[ 0 ];
1568 op[ 1 ] = (float) ip[ 1 ];
1569 op[ 2 ] = (float) ip[ 2 ];
1570 op[ 3 ] = (float) ip[ 3 ];
1571 ip += ipinc;
1572 op += 4;
1573 cc--;
1574 }
1575
1576 if( repr > 0 )
1577 {
1578 const T* const ipe = ip - ipinc;
1579 v0 = (float) ipe[ 0 ];
1580 v1 = (float) ipe[ 1 ];
1581 v2 = (float) ipe[ 2 ];
1582 v3 = (float) ipe[ 3 ];
1583
1584 do
1585 {
1586 op[ 0 ] = v0;
1587 op[ 1 ] = v1;
1588 op[ 2 ] = v2;
1589 op[ 3 ] = v3;
1590 op += 4;
1591
1592 } while( --repr != 0 );
1593 }
1594 }
1595
1597
1610
1611 static void padScanline1h( float* op, CResizeScanline& rs, const int l )
1612 {
1613 const float* ip = op + rs.padl;
1614
1615 float v0 = ip[ 0 ];
1616 int i;
1617
1618 for( i = 0; i < rs.padl; i++ )
1619 {
1620 op[ i ] = v0;
1621 }
1622
1623 ip += l;
1624 op += rs.padl + l;
1625
1626 v0 = ip[ -1 ];
1627
1628 for( i = 0; i < rs.padr; i++ )
1629 {
1630 op[ i ] = v0;
1631 }
1632 }
1633
1634 static void padScanline2h( float* op, CResizeScanline& rs, const int l )
1635 {
1636 const float* ip = op + rs.padl * 2;
1637
1638 float v0 = ip[ 0 ];
1639 float v1 = ip[ 1 ];
1640 int i;
1641
1642 for( i = 0; i < rs.padl; i++ )
1643 {
1644 op[ 0 ] = v0;
1645 op[ 1 ] = v1;
1646 op += 2;
1647 }
1648
1649 const int lc = l * 2;
1650 ip += lc;
1651 op += lc;
1652
1653 v0 = ip[ -2 ];
1654 v1 = ip[ -1 ];
1655
1656 for( i = 0; i < rs.padr; i++ )
1657 {
1658 op[ 0 ] = v0;
1659 op[ 1 ] = v1;
1660 op += 2;
1661 }
1662 }
1663
1664 static void padScanline3h( float* op, CResizeScanline& rs, const int l )
1665 {
1666 const float* ip = op + rs.padl * 3;
1667
1668 float v0 = ip[ 0 ];
1669 float v1 = ip[ 1 ];
1670 float v2 = ip[ 2 ];
1671 int i;
1672
1673 for( i = 0; i < rs.padl; i++ )
1674 {
1675 op[ 0 ] = v0;
1676 op[ 1 ] = v1;
1677 op[ 2 ] = v2;
1678 op += 3;
1679 }
1680
1681 const int lc = l * 3;
1682 ip += lc;
1683 op += lc;
1684
1685 v0 = ip[ -3 ];
1686 v1 = ip[ -2 ];
1687 v2 = ip[ -1 ];
1688
1689 for( i = 0; i < rs.padr; i++ )
1690 {
1691 op[ 0 ] = v0;
1692 op[ 1 ] = v1;
1693 op[ 2 ] = v2;
1694 op += 3;
1695 }
1696 }
1697
1698 static void padScanline4h( float* op, CResizeScanline& rs, const int l )
1699 {
1700 const float* ip = op + rs.padl * 4;
1701
1702 float v0 = ip[ 0 ];
1703 float v1 = ip[ 1 ];
1704 float v2 = ip[ 2 ];
1705 float v3 = ip[ 3 ];
1706 int i;
1707
1708 for( i = 0; i < rs.padl; i++ )
1709 {
1710 op[ 0 ] = v0;
1711 op[ 1 ] = v1;
1712 op[ 2 ] = v2;
1713 op[ 3 ] = v3;
1714 op += 4;
1715 }
1716
1717 const int lc = l * 4;
1718 ip += lc;
1719 op += lc;
1720
1721 v0 = ip[ -4 ];
1722 v1 = ip[ -3 ];
1723 v2 = ip[ -2 ];
1724 v3 = ip[ -1 ];
1725
1726 for( i = 0; i < rs.padr; i++ )
1727 {
1728 op[ 0 ] = v0;
1729 op[ 1 ] = v1;
1730 op[ 2 ] = v2;
1731 op[ 3 ] = v3;
1732 op += 4;
1733 }
1734 }
1735
1737
1745
1746 static inline int roundclamp( const float v, const float Clamp )
1747 {
1748 return( (int) (( v > Clamp ? Clamp : ( v < 0.0f ? 0.0f : v )) +
1749 0.5f ));
1750 }
1751
1771
1772 template< bool IsOutFloat, bool IsUnityMul, typename T >
1773 static void outputScanline( const float* ip, T* op, int l,
1774 const float Clamp, const float OutMul )
1775 {
1776 if( IsOutFloat )
1777 {
1778 if( IsUnityMul )
1779 {
1780 if( sizeof( op[ 0 ]) == sizeof( ip[ 0 ]))
1781 {
1782 memcpy( op, ip, (size_t) l * sizeof( op[ 0 ]));
1783 }
1784 else
1785 {
1786 int l4 = l >> 2;
1787 l &= 3;
1788
1789 while( l4 != 0 )
1790 {
1791 op[ 0 ] = (T) ip[ 0 ];
1792 op[ 1 ] = (T) ip[ 1 ];
1793 op[ 2 ] = (T) ip[ 2 ];
1794 op[ 3 ] = (T) ip[ 3 ];
1795 ip += 4;
1796 op += 4;
1797 l4--;
1798 }
1799
1800 while( l != 0 )
1801 {
1802 *op = (T) *ip;
1803 ip++;
1804 op++;
1805 l--;
1806 }
1807 }
1808 }
1809 else
1810 {
1811 int l4 = l >> 2;
1812 l &= 3;
1813 bool DoScalar = true;
1814
1815 if( sizeof( op[ 0 ]) == sizeof( ip[ 0 ]))
1816 {
1817 #if LANCIR_ALIGN > 4
1818
1819 DoScalar = false;
1820 const lancvec_t om = lancvec_load32_splat( &OutMul );
1821
1822 while( l4 != 0 )
1823 {
1824 lancvec_storeu( (float*) op,
1825 lancvec_mul( lancvec_load( ip ), om ));
1826
1827 ip += 4;
1828 op += 4;
1829 l4--;
1830 }
1831
1832 #endif // LANCIR_ALIGN > 4
1833 }
1834
1835 if( DoScalar )
1836 {
1837 while( l4 != 0 )
1838 {
1839 op[ 0 ] = (T) ( ip[ 0 ] * OutMul );
1840 op[ 1 ] = (T) ( ip[ 1 ] * OutMul );
1841 op[ 2 ] = (T) ( ip[ 2 ] * OutMul );
1842 op[ 3 ] = (T) ( ip[ 3 ] * OutMul );
1843 ip += 4;
1844 op += 4;
1845 l4--;
1846 }
1847 }
1848
1849 while( l != 0 )
1850 {
1851 *op = (T) ( *ip * OutMul );
1852 ip++;
1853 op++;
1854 l--;
1855 }
1856 }
1857 }
1858 else
1859 {
1860 int l4 = l >> 2;
1861 l &= 3;
1862
1863 #if LANCIR_ALIGN > 4
1864
1865 const lancvec_t minv = lancvec_const_splat( 0.0f );
1866 const lancvec_t maxv = lancvec_load32_splat( &Clamp );
1867 const lancvec_t om = lancvec_load32_splat( &OutMul );
1868
1869 #if defined( LANCIR_SSE2 )
1870 unsigned int prevrm = _MM_GET_ROUNDING_MODE();
1871 _MM_SET_ROUNDING_MODE( _MM_ROUND_NEAREST );
1872 #else // defined( LANCIR_SSE2 )
1873 const lancvec_t v05 = lancvec_const_splat( 0.5f );
1874 #endif // defined( LANCIR_SSE2 )
1875
1876 if( sizeof( op[ 0 ]) == 4 )
1877 {
1878 while( l4 != 0 )
1879 {
1880 const lancvec_t v = lancvec_load( ip );
1881 const lancvec_t cv = lancvec_max( lancvec_min(
1882 ( IsUnityMul ? v : lancvec_mul( v, om )),
1883 maxv ), minv );
1884
1885 #if defined( LANCIR_SSE2 )
1886
1887 _mm_storeu_si128( (__m128i*) op, _mm_cvtps_epi32( cv ));
1888
1889 #elif defined( LANCIR_NEON )
1890
1891 vst1q_u32( (unsigned int*) op, vcvtq_u32_f32( vaddq_f32(
1892 cv, v05 )));
1893
1894 #elif defined( LANCIR_WASM )
1895
1896 wasm_v128_store( op, wasm_i32x4_trunc_sat_f32x4(
1897 wasm_f32x4_add( cv, v05 )));
1898
1899 #endif // defined( LANCIR_WASM )
1900
1901 ip += 4;
1902 op += 4;
1903 l4--;
1904 }
1905 }
1906 else
1907 if( sizeof( op[ 0 ]) == 2 )
1908 {
1909 while( l4 != 0 )
1910 {
1911 const lancvec_t v = lancvec_load( ip );
1912 const lancvec_t cv = lancvec_max( lancvec_min(
1913 ( IsUnityMul ? v : lancvec_mul( v, om )),
1914 maxv ), minv );
1915
1916 #if defined( LANCIR_SSE2 )
1917
1918 const __m128i v32 = _mm_cvtps_epi32( cv );
1919 const __m128i v16s = _mm_shufflehi_epi16(
1920 _mm_shufflelo_epi16( v32, 0 | 2 << 2 ), 0 | 2 << 2 );
1921
1922 const __m128i v16 = _mm_shuffle_epi32( v16s, 0 | 2 << 2 );
1923
1924 __m128i tmp;
1925 _mm_store_si128( &tmp, v16 );
1926 memcpy( op, &tmp, 8 );
1927
1928 #elif defined( LANCIR_NEON )
1929
1930 const uint32x4_t v32 = vcvtq_u32_f32(
1931 vaddq_f32( cv, v05 ));
1932
1933 const uint16x4_t v16 = vmovn_u32( v32 );
1934
1935 vst1_u16( (unsigned short*) op, v16 );
1936
1937 #elif defined( LANCIR_WASM )
1938
1939 const v128_t v32 = wasm_i32x4_trunc_sat_f32x4(
1940 wasm_f32x4_add( cv, v05 ));
1941
1942 wasm_v128_store64_lane( op,
1943 wasm_u16x8_narrow_i32x4( v32, v32 ), 0 );
1944
1945 #endif // defined( LANCIR_WASM )
1946
1947 ip += 4;
1948 op += 4;
1949 l4--;
1950 }
1951 }
1952 else
1953 {
1954 while( l4 != 0 )
1955 {
1956 const lancvec_t v = lancvec_load( ip );
1957 const lancvec_t cv = lancvec_max( lancvec_min(
1958 ( IsUnityMul ? v : lancvec_mul( v, om )),
1959 maxv ), minv );
1960
1961 #if defined( LANCIR_SSE2 )
1962
1963 const __m128i v32 = _mm_cvtps_epi32( cv );
1964 const __m128i v16s = _mm_shufflehi_epi16(
1965 _mm_shufflelo_epi16( v32, 0 | 2 << 2 ), 0 | 2 << 2 );
1966
1967 const __m128i v16 = _mm_shuffle_epi32( v16s, 0 | 2 << 2 );
1968 const __m128i v8 = _mm_packus_epi16( v16, v16 );
1969
1970 *(int*) op = _mm_cvtsi128_si32( v8 );
1971
1972 #elif defined( LANCIR_NEON )
1973
1974 const uint32x4_t v32 = vcvtq_u32_f32(
1975 vaddq_f32( cv, v05 ));
1976
1977 const uint16x4_t v16 = vmovn_u32( v32 );
1978 const uint8x8_t v8 = vmovn_u16( vcombine_u16( v16, v16 ));
1979
1980 *(unsigned int*) op = vget_lane_u32( (uint32x2_t) v8, 0 );
1981
1982 #elif defined( LANCIR_WASM )
1983
1984 const v128_t v32 = wasm_i32x4_trunc_sat_f32x4(
1985 wasm_f32x4_add( cv, v05 ));
1986
1987 const v128_t v16 = wasm_u16x8_narrow_i32x4( v32, v32 );
1988
1989 wasm_v128_store32_lane( op,
1990 wasm_u8x16_narrow_i16x8( v16, v16 ), 0 );
1991
1992 #endif // defined( LANCIR_WASM )
1993
1994 ip += 4;
1995 op += 4;
1996 l4--;
1997 }
1998 }
1999
2000 #if defined( LANCIR_SSE2 )
2001 _MM_SET_ROUNDING_MODE( prevrm );
2002 #endif // defined( LANCIR_SSE2 )
2003
2004 #else // LANCIR_ALIGN > 4
2005
2006 if( IsUnityMul )
2007 {
2008 while( l4 != 0 )
2009 {
2010 op[ 0 ] = (T) roundclamp( ip[ 0 ], Clamp );
2011 op[ 1 ] = (T) roundclamp( ip[ 1 ], Clamp );
2012 op[ 2 ] = (T) roundclamp( ip[ 2 ], Clamp );
2013 op[ 3 ] = (T) roundclamp( ip[ 3 ], Clamp );
2014 ip += 4;
2015 op += 4;
2016 l4--;
2017 }
2018 }
2019 else
2020 {
2021 while( l4 != 0 )
2022 {
2023 op[ 0 ] = (T) roundclamp( ip[ 0 ] * OutMul, Clamp );
2024 op[ 1 ] = (T) roundclamp( ip[ 1 ] * OutMul, Clamp );
2025 op[ 2 ] = (T) roundclamp( ip[ 2 ] * OutMul, Clamp );
2026 op[ 3 ] = (T) roundclamp( ip[ 3 ] * OutMul, Clamp );
2027 ip += 4;
2028 op += 4;
2029 l4--;
2030 }
2031 }
2032
2033 #endif // LANCIR_ALIGN > 4
2034
2035 if( IsUnityMul )
2036 {
2037 while( l != 0 )
2038 {
2039 *op = (T) roundclamp( *ip, Clamp );
2040 ip++;
2041 op++;
2042 l--;
2043 }
2044 }
2045 else
2046 {
2047 while( l != 0 )
2048 {
2049 *op = (T) roundclamp( *ip * OutMul, Clamp );
2050 ip++;
2051 op++;
2052 l--;
2053 }
2054 }
2055 }
2056 }
2057
2062
2063 #define LANCIR_LF_PRE \
2064 const CResizePos* const rpe = rp + DstLen; \
2065 while( rp != rpe ) \
2066 { \
2067 const float* flt = rp -> flt; \
2068 const float* ip; \
2069 if( UseSP ) \
2070 { \
2071 ip = (const float*) ( (intptr_t) sp + rp -> spo ); \
2072 } \
2073 else \
2074 { \
2075 ip = (const float*) rp -> spo; \
2076 }
2077
2082
2083 #define LANCIR_LF_POST \
2084 op += opinc; \
2085 rp++; \
2086 }
2087
2101
2102 template< bool UseSP >
2103 static void resize1( const float* const sp, float* op, const size_t opinc,
2104 const CResizePos* rp, const int kl, const int DstLen )
2105 {
2106 const int ci = kl >> 2;
2107
2108 if(( kl & 3 ) == 0 )
2109 {
2111
2112 int c = ci;
2113
2114 #if LANCIR_ALIGN > 4
2115
2116 lancvec_t sum = lancvec_mul(
2117 lancvec_load( flt ), lancvec_loadu( ip ));
2118
2119 while( --c != 0 )
2120 {
2121 flt += 4;
2122 ip += 4;
2123 sum = lancvec_madd( sum, lancvec_load( flt ),
2124 lancvec_loadu( ip ));
2125 }
2126
2127 lancvec_store32_hadd( op, sum );
2128
2129 #else // LANCIR_ALIGN > 4
2130
2131 float sum0 = flt[ 0 ] * ip[ 0 ];
2132 float sum1 = flt[ 1 ] * ip[ 1 ];
2133 float sum2 = flt[ 2 ] * ip[ 2 ];
2134 float sum3 = flt[ 3 ] * ip[ 3 ];
2135
2136 while( --c != 0 )
2137 {
2138 flt += 4;
2139 ip += 4;
2140 sum0 += flt[ 0 ] * ip[ 0 ];
2141 sum1 += flt[ 1 ] * ip[ 1 ];
2142 sum2 += flt[ 2 ] * ip[ 2 ];
2143 sum3 += flt[ 3 ] * ip[ 3 ];
2144 }
2145
2146 op[ 0 ] = ( sum0 + sum1 ) + ( sum2 + sum3 );
2147
2148 #endif // LANCIR_ALIGN > 4
2149
2151 }
2152 else
2153 {
2155
2156 int c = ci;
2157
2158 #if LANCIR_ALIGN > 4
2159
2160 lancvec_t sum = lancvec_mul( lancvec_load( flt ),
2161 lancvec_loadu( ip ));
2162
2163 while( --c != 0 )
2164 {
2165 flt += 4;
2166 ip += 4;
2167 sum = lancvec_madd( sum, lancvec_load( flt ),
2168 lancvec_loadu( ip ));
2169 }
2170
2171 #if defined( LANCIR_NEON )
2172
2173 float32x2_t sum2 = vadd_f32( vget_high_f32( sum ),
2174 vget_low_f32( sum ));
2175
2176 sum2 = vmla_f32( sum2, vld1_f32( flt + 4 ),
2177 vld1_f32( ip + 4 ));
2178
2179 #if defined( LANCIR_ARM32 )
2180 op[ 0 ] = vget_lane_f32( sum2, 0 ) +
2181 vget_lane_f32( sum2, 1 );
2182 #else // defined( LANCIR_ARM32 )
2183 op[ 0 ] = vaddv_f32( sum2 );
2184 #endif // defined( LANCIR_ARM32 )
2185
2186 #else // defined( LANCIR_NEON )
2187
2188 const lancvec_t sum2 = lancvec_mul( lancvec_loadu( flt + 2 ),
2189 lancvec_loadu( ip + 2 ));
2190
2191 sum = lancvec_addhl( sum, sum );
2192 sum = lancvec_addhl( sum, sum2 );
2193
2194 lancvec_store32_addhl( op, sum );
2195
2196 #endif // defined( LANCIR_NEON )
2197
2198 #else // LANCIR_ALIGN > 4
2199
2200 float sum0 = flt[ 0 ] * ip[ 0 ];
2201 float sum1 = flt[ 1 ] * ip[ 1 ];
2202 float sum2 = flt[ 2 ] * ip[ 2 ];
2203 float sum3 = flt[ 3 ] * ip[ 3 ];
2204
2205 while( --c != 0 )
2206 {
2207 flt += 4;
2208 ip += 4;
2209 sum0 += flt[ 0 ] * ip[ 0 ];
2210 sum1 += flt[ 1 ] * ip[ 1 ];
2211 sum2 += flt[ 2 ] * ip[ 2 ];
2212 sum3 += flt[ 3 ] * ip[ 3 ];
2213 }
2214
2215 op[ 0 ] = ( sum0 + sum1 ) + ( sum2 + sum3 ) +
2216 flt[ 4 ] * ip[ 4 ] + flt[ 5 ] * ip[ 5 ];
2217
2218 #endif // LANCIR_ALIGN > 4
2219
2221 }
2222 }
2223
2224 template< bool UseSP >
2225 static void resize2( const float* const sp, float* op, const size_t opinc,
2226 const CResizePos* rp, const int kl, const int DstLen )
2227 {
2228 #if LANCIR_ALIGN > 4
2229 const int ci = kl >> 2;
2230 const int cir = kl & 3;
2231 #else // LANCIR_ALIGN > 4
2232 const int ci = kl >> 1;
2233 #endif // LANCIR_ALIGN > 4
2234
2236
2237 int c = ci;
2238
2239 #if defined( LANCIR_AVX )
2240
2241 __m256 sum = _mm256_mul_ps( _mm256_load_ps( flt ),
2242 _mm256_loadu_ps( ip ));
2243
2244 while( --c != 0 )
2245 {
2246 flt += 8;
2247 ip += 8;
2248 sum = _mm256_add_ps( sum, _mm256_mul_ps( _mm256_load_ps( flt ),
2249 _mm256_loadu_ps( ip )));
2250 }
2251
2252 __m128 res = _mm_add_ps( _mm256_extractf128_ps( sum, 0 ),
2253 _mm256_extractf128_ps( sum, 1 ));
2254
2255 if( cir == 2 )
2256 {
2257 res = _mm_add_ps( res, _mm_mul_ps( _mm_load_ps( flt + 8 ),
2258 _mm_loadu_ps( ip + 8 )));
2259 }
2260
2261 _mm_storel_pi( (__m64*) op,
2262 _mm_add_ps( res, _mm_movehl_ps( res, res )));
2263
2264 #elif LANCIR_ALIGN > 4
2265
2266 lancvec_t sumA = lancvec_mul(
2267 lancvec_load( flt ), lancvec_loadu( ip ));
2268
2269 lancvec_t sumB = lancvec_mul(
2270 lancvec_load( flt + 4 ), lancvec_loadu( ip + 4 ));
2271
2272 while( --c != 0 )
2273 {
2274 flt += 8;
2275 ip += 8;
2276 sumA = lancvec_madd( sumA, lancvec_load( flt ),
2277 lancvec_loadu( ip ));
2278
2279 sumB = lancvec_madd( sumB, lancvec_load( flt + 4 ),
2280 lancvec_loadu( ip + 4 ));
2281 }
2282
2283 sumA = lancvec_add( sumA, sumB );
2284
2285 if( cir == 2 )
2286 {
2287 sumA = lancvec_madd( sumA, lancvec_load( flt + 8 ),
2288 lancvec_loadu( ip + 8 ));
2289 }
2290
2291 lancvec_store64_addhl( op, sumA );
2292
2293 #else // LANCIR_ALIGN > 4
2294
2295 const float xx = flt[ 0 ];
2296 const float xx2 = flt[ 1 ];
2297 float sum0 = xx * ip[ 0 ];
2298 float sum1 = xx * ip[ 1 ];
2299 float sum2 = xx2 * ip[ 2 ];
2300 float sum3 = xx2 * ip[ 3 ];
2301
2302 while( --c != 0 )
2303 {
2304 flt += 2;
2305 ip += 4;
2306 const float xx = flt[ 0 ];
2307 const float xx2 = flt[ 1 ];
2308 sum0 += xx * ip[ 0 ];
2309 sum1 += xx * ip[ 1 ];
2310 sum2 += xx2 * ip[ 2 ];
2311 sum3 += xx2 * ip[ 3 ];
2312 }
2313
2314 op[ 0 ] = sum0 + sum2;
2315 op[ 1 ] = sum1 + sum3;
2316
2317 #endif // LANCIR_ALIGN > 4
2318
2320 }
2321
2322 template< bool UseSP >
2323 static void resize3( const float* const sp, float* op, const size_t opinc,
2324 const CResizePos* rp, const int kl, const int DstLen )
2325 {
2326 #if LANCIR_ALIGN > 4
2327
2328 const int ci = kl >> 2;
2329 const int cir = kl & 3;
2330
2332
2333 float res[ 12 ];
2334 int c = ci;
2335
2336 #if defined( LANCIR_AVX )
2337
2338 __m128 sumA = _mm_mul_ps( _mm_load_ps( flt ), _mm_loadu_ps( ip ));
2339 __m256 sumB = _mm256_mul_ps( _mm256_loadu_ps( flt + 4 ),
2340 _mm256_loadu_ps( ip + 4 ));
2341
2342 while( --c != 0 )
2343 {
2344 flt += 12;
2345 ip += 12;
2346 sumA = _mm_add_ps( sumA, _mm_mul_ps( _mm_load_ps( flt ),
2347 _mm_loadu_ps( ip )));
2348
2349 sumB = _mm256_add_ps( sumB, _mm256_mul_ps(
2350 _mm256_loadu_ps( flt + 4 ), _mm256_loadu_ps( ip + 4 )));
2351 }
2352
2353 if( cir == 2 )
2354 {
2355 sumA = _mm_add_ps( sumA, _mm_mul_ps( _mm_load_ps( flt + 12 ),
2356 _mm_loadu_ps( ip + 12 )));
2357 }
2358
2359 _mm_storeu_ps( res, sumA );
2360
2361 float o0 = res[ 0 ] + res[ 3 ];
2362 float o1 = res[ 1 ];
2363 float o2 = res[ 2 ];
2364
2365 _mm256_storeu_ps( res + 4, sumB );
2366
2367 o1 += res[ 4 ];
2368 o2 += res[ 5 ];
2369
2370 #else // defined( LANCIR_AVX )
2371
2372 lancvec_t sumA = lancvec_mul( lancvec_load( flt ),
2373 lancvec_loadu( ip ));
2374
2375 lancvec_t sumB = lancvec_mul( lancvec_load( flt + 4 ),
2376 lancvec_loadu( ip + 4 ));
2377
2378 lancvec_t sumC = lancvec_mul( lancvec_load( flt + 8 ),
2379 lancvec_loadu( ip + 8 ));
2380
2381 while( --c != 0 )
2382 {
2383 flt += 12;
2384 ip += 12;
2385 sumA = lancvec_madd( sumA, lancvec_load( flt ),
2386 lancvec_loadu( ip ));
2387
2388 sumB = lancvec_madd( sumB, lancvec_load( flt + 4 ),
2389 lancvec_loadu( ip + 4 ));
2390
2391 sumC = lancvec_madd( sumC, lancvec_load( flt + 8 ),
2392 lancvec_loadu( ip + 8 ));
2393 }
2394
2395 if( cir == 2 )
2396 {
2397 sumA = lancvec_madd( sumA, lancvec_load( flt + 12 ),
2398 lancvec_loadu( ip + 12 ));
2399 }
2400
2401 lancvec_storeu( res, sumA );
2402 lancvec_storeu( res + 4, sumB );
2403
2404 float o0 = res[ 0 ] + res[ 3 ];
2405 float o1 = res[ 1 ] + res[ 4 ];
2406 float o2 = res[ 2 ] + res[ 5 ];
2407
2408 lancvec_storeu( res + 8, sumC );
2409
2410 #endif // defined( LANCIR_AVX )
2411
2412 o0 += res[ 6 ] + res[ 9 ];
2413 o1 += res[ 7 ] + res[ 10 ];
2414 o2 += res[ 8 ] + res[ 11 ];
2415
2416 if( cir == 2 )
2417 {
2418 o1 += flt[ 16 ] * ip[ 16 ];
2419 o2 += flt[ 17 ] * ip[ 17 ];
2420 }
2421
2422 op[ 0 ] = o0;
2423 op[ 1 ] = o1;
2424 op[ 2 ] = o2;
2425
2426 #else // LANCIR_ALIGN > 4
2427
2428 const int ci = kl >> 1;
2429
2431
2432 int c = ci;
2433
2434 const float xx = flt[ 0 ];
2435 float sum0 = xx * ip[ 0 ];
2436 float sum1 = xx * ip[ 1 ];
2437 float sum2 = xx * ip[ 2 ];
2438 const float xx2 = flt[ 1 ];
2439 float sum3 = xx2 * ip[ 3 ];
2440 float sum4 = xx2 * ip[ 4 ];
2441 float sum5 = xx2 * ip[ 5 ];
2442
2443 while( --c != 0 )
2444 {
2445 flt += 2;
2446 ip += 6;
2447 const float xx = flt[ 0 ];
2448 sum0 += xx * ip[ 0 ];
2449 sum1 += xx * ip[ 1 ];
2450 sum2 += xx * ip[ 2 ];
2451 const float xx2 = flt[ 1 ];
2452 sum3 += xx2 * ip[ 3 ];
2453 sum4 += xx2 * ip[ 4 ];
2454 sum5 += xx2 * ip[ 5 ];
2455 }
2456
2457 op[ 0 ] = sum0 + sum3;
2458 op[ 1 ] = sum1 + sum4;
2459 op[ 2 ] = sum2 + sum5;
2460
2461 #endif // LANCIR_ALIGN > 4
2462
2464 }
2465
2466 template< bool UseSP >
2467 static void resize4( const float* const sp, float* op, const size_t opinc,
2468 const CResizePos* rp, const int kl, const int DstLen )
2469 {
2470 #if LANCIR_ALIGN > 4
2471 const int ci = kl >> 1;
2472 #else // LANCIR_ALIGN > 4
2473 const int ci = kl;
2474 #endif // LANCIR_ALIGN > 4
2475
2477
2478 int c = ci;
2479
2480 #if defined( LANCIR_AVX )
2481
2482 __m256 sum = _mm256_mul_ps( _mm256_load_ps( flt ),
2483 _mm256_loadu_ps( ip ));
2484
2485 while( --c != 0 )
2486 {
2487 flt += 8;
2488 ip += 8;
2489 sum = _mm256_add_ps( sum, _mm256_mul_ps( _mm256_load_ps( flt ),
2490 _mm256_loadu_ps( ip )));
2491 }
2492
2493 _mm_store_ps( op, _mm_add_ps( _mm256_extractf128_ps( sum, 0 ),
2494 _mm256_extractf128_ps( sum, 1 )));
2495
2496 #elif LANCIR_ALIGN > 4
2497
2498 lancvec_t sumA = lancvec_mul( lancvec_load( flt ),
2499 lancvec_load( ip ));
2500
2501 lancvec_t sumB = lancvec_mul( lancvec_load( flt + 4 ),
2502 lancvec_load( ip + 4 ));
2503
2504 while( --c != 0 )
2505 {
2506 flt += 8;
2507 ip += 8;
2508 sumA = lancvec_madd( sumA, lancvec_load( flt ),
2509 lancvec_load( ip ));
2510
2511 sumB = lancvec_madd( sumB, lancvec_load( flt + 4 ),
2512 lancvec_load( ip + 4 ));
2513 }
2514
2515 lancvec_store( op, lancvec_add( sumA, sumB ));
2516
2517 #else // LANCIR_ALIGN > 4
2518
2519 const float xx = flt[ 0 ];
2520 float sum0 = xx * ip[ 0 ];
2521 float sum1 = xx * ip[ 1 ];
2522 float sum2 = xx * ip[ 2 ];
2523 float sum3 = xx * ip[ 3 ];
2524
2525 while( --c != 0 )
2526 {
2527 flt++;
2528 ip += 4;
2529 const float xx = flt[ 0 ];
2530 sum0 += xx * ip[ 0 ];
2531 sum1 += xx * ip[ 1 ];
2532 sum2 += xx * ip[ 2 ];
2533 sum3 += xx * ip[ 3 ];
2534 }
2535
2536 op[ 0 ] = sum0;
2537 op[ 1 ] = sum1;
2538 op[ 2 ] = sum2;
2539 op[ 3 ] = sum3;
2540
2541 #endif // LANCIR_ALIGN > 4
2542
2544 }
2545
2547
2548 #undef LANCIR_LF_PRE
2549 #undef LANCIR_LF_POST
2550};
2551
2552#undef lancvec_t
2553#undef lancvec_const_splat
2554#undef lancvec_load32_splat
2555#undef lancvec_load
2556#undef lancvec_loadu
2557#undef lancvec_store
2558#undef lancvec_storeu
2559#undef lancvec_add
2560#undef lancvec_mul
2561#undef lancvec_min
2562#undef lancvec_max
2563#undef lancvec_madd
2564#undef lancvec_addhl
2565#undef lancvec_store32_addhl
2566#undef lancvec_store32_hadd
2567#undef lancvec_store64_addhl
2568
2569#if defined( LANCIR_NULLPTR )
2570 #undef nullptr
2571 #undef LANCIR_NULLPTR
2572#endif // defined( LANCIR_NULLPTR )
2573
2574} // namespace avir
2575
2576#endif // AVIR_CLANCIR_INCLUDED
#define LANCIR_ALIGN
Address alignment (granularity) used by resizing functions, in bytes.
Definition lancir.h:127
#define LANCIR_LF_POST
Scanline resize function epilogue.
Definition lancir.h:2083
#define LANCIR_LF_PRE
Scanline resize function prologue.
Definition lancir.h:2063
LANCIR resizing parameters class.
Definition lancir.h:261
CLancIRParams(const int aSrcSSize=0, const int aNewSSize=0, const double akx=0.0, const double aky=0.0, const double aox=0.0, const double aoy=0.0)
Default constructor, with optional arguments that correspond to class variables.
Definition lancir.h:295
double oy
Start Y pixel offset within the source image, can be negative. A positive offset moves the image to t...
Definition lancir.h:278
double ky
Resizing step - vertical. Same as kx.
Definition lancir.h:275
double la
Lanczos window function's a parameter, greater or equal to 2.0.
Definition lancir.h:280
int NewSSize
Physical size of the destination scanline, in elements (not bytes). If this value is below 1,...
Definition lancir.h:266
double ox
Start X pixel offset within the source image, can be negative. A positive offset moves the image to t...
Definition lancir.h:276
int SrcSSize
Physical size of the source scanline, in elements (not bytes). If this value is below 1,...
Definition lancir.h:263
double kx
Resizing step - horizontal (one output pixel corresponds to k input pixels). A downsizing factor if g...
Definition lancir.h:269
static void copyScanline1v(const T *ip, const size_t ipinc, float *op, int cc, int repl, int repr)
Scanline copying function, for vertical resizing.
Definition lancir.h:1407
float * FltBuf
Address-aligned FltBuf0.
Definition lancir.h:760
int resizeImage(const Tin *const SrcBuf, const int SrcWidth, const int SrcHeight, const int SrcSSize, Tout *const NewBuf, const int NewWidth, const int NewHeight, const int NewSSize, const int ElCount, const double kx0=0.0, const double ky0=0.0, double ox=0.0, double oy=0.0)
Legacy image resizing function.
Definition lancir.h:745
static void resize3(const float *const sp, float *op, const size_t opinc, const CResizePos *rp, const int kl, const int DstLen)
Function performs scanline resizing. Variants for 1-4-channel images.
Definition lancir.h:2323
static void resize2(const float *const sp, float *op, const size_t opinc, const CResizePos *rp, const int kl, const int DstLen)
Function performs scanline resizing. Variants for 1-4-channel images.
Definition lancir.h:2225
CResizeScanline rsv
Vertical resize scanline.
Definition lancir.h:1386
static void padScanline2h(float *op, CResizeScanline &rs, const int l)
Scanline padding function, for horizontal resizing.
Definition lancir.h:1634
static void resize4(const float *const sp, float *op, const size_t opinc, const CResizePos *rp, const int kl, const int DstLen)
Function performs scanline resizing. Variants for 1-4-channel images.
Definition lancir.h:2467
static int roundclamp(const float v, const float Clamp)
Rounds a value, and applies clamping.
Definition lancir.h:1746
CResizeFilters rfh0
Resizing filters for horizontal resizing (may not be in use).
Definition lancir.h:1384
float * spv0
Scanline buffer for vertical resizing, also used at the output stage.
Definition lancir.h:761
static void outputScanline(const float *ip, T *op, int l, const float Clamp, const float OutMul)
Scanline output function.
Definition lancir.h:1773
CResizeFilters rfv
Resizing filters for vertical resizing.
Definition lancir.h:1383
static void copyScanline4v(const T *ip, const size_t ipinc, float *op, int cc, int repl, int repr)
Scanline copying function, for vertical resizing.
Definition lancir.h:1542
static void padScanline1h(float *op, CResizeScanline &rs, const int l)
Scanline padding function, for horizontal resizing.
Definition lancir.h:1611
static void padScanline4h(float *op, CResizeScanline &rs, const int l)
Scanline padding function, for horizontal resizing.
Definition lancir.h:1698
float * FltBuf0
Intermediate resizing buffer.
Definition lancir.h:758
static void copyScanline2v(const T *ip, const size_t ipinc, float *op, int cc, int repl, int repr)
Scanline copying function, for vertical resizing.
Definition lancir.h:1447
int resizeImage(const Tin *const SrcBuf, const int SrcWidth, const int SrcHeight, Tout *const NewBuf, const int NewWidth, const int NewHeight, const int ElCount, const CLancIRParams *const aParams=nullptr)
Function resizes an image.
Definition lancir.h:387
size_t FltBuf0Len
Length of FltBuf0.
Definition lancir.h:759
int spv0len
Length of spv0.
Definition lancir.h:763
static void reallocBuf(Tb *&buf, Tl &len, const Tl newlen)
Typed buffer reallocation function.
Definition lancir.h:818
static void copyScanline3v(const T *ip, const size_t ipinc, float *op, int cc, int repl, int repr)
Scanline copying function, for vertical resizing.
Definition lancir.h:1492
CResizeScanline rsh
Horizontal resize scanline.
Definition lancir.h:1387
static void resize1(const float *const sp, float *op, const size_t opinc, const CResizePos *rp, const int kl, const int DstLen)
Function performs scanline resizing. Variants for 1-4-channel images.
Definition lancir.h:2103
static void padScanline3h(float *op, CResizeScanline &rs, const int l)
Scanline padding function, for horizontal resizing.
Definition lancir.h:1664
float * spv
Address-aligned spv0.
Definition lancir.h:764
static void reallocBuf(Tb *&buf0, Tb *&buf, Tl &len, Tl newlen)
Typed buffer reallocation function, with address alignment.
Definition lancir.h:783
Class for fractional delay filter bank storage and calculation.
Definition lancir.h:841
float * Bufs[BufCount]
Address-aligned Bufs0.
Definition lancir.h:988
int CurBufFill
The number of fractional positions filled in the current filter buffer.
Definition lancir.h:990
const float * getFilter(const double x)
Filter acquisition function.
Definition lancir.h:940
double Len2
Half resampling filter's length, unrounded.
Definition lancir.h:972
double k
Current k.
Definition lancir.h:997
int CurBuf
Filter buffer currently being filled.
Definition lancir.h:989
int ElCount
Current ElCount.
Definition lancir.h:998
static const int BufLen
The number of fractional filters a single buffer (filter batch) may contain. Both the BufLen and BufC...
Definition lancir.h:981
int ElRepl
The number of repetitions of each filter tap.
Definition lancir.h:978
bool update(const double la0, const double k0, const int ElCount0)
Function updates the filter bank.
Definition lancir.h:882
static const int BufCount
The maximal number of buffers (filter batches) that can be in use.
Definition lancir.h:979
void makeFilterNorm(float *op, const double FracDelay) const
Filter calculation function.
Definition lancir.h:1076
void setBuf(const int bi)
Current buffer (filter batch) repositioning function.
Definition lancir.h:1009
int KernelLenA
SIMD-aligned and replicated filter kernel's length.
Definition lancir.h:976
static void replicateFilter(float *const p, const int kl, const int erp)
Filter tap replication function, for SIMD operations.
Definition lancir.h:1171
double la
Current la.
Definition lancir.h:996
float ** Filters
Fractional delay filters for all positions. A particular pointer equals nullptr, if a filter for such...
Definition lancir.h:992
double FreqA
Circular frequency of the window function.
Definition lancir.h:971
int fl2
Half resampling filter's length, integer.
Definition lancir.h:973
float * Bufs0[BufCount]
Buffers that hold all filters, original.
Definition lancir.h:984
int FiltersLen
Allocated length of Filters, in elements.
Definition lancir.h:995
double Freq
Circular frequency of the filter.
Definition lancir.h:970
int FracCount
The number of fractional positions for which filters can be created.
Definition lancir.h:974
int KernelLen
Resampling filter kernel's length, taps. Available after the update() function call....
Definition lancir.h:845
int Bufs0Len[BufCount]
Allocated lengthes in Bufs0, in float elements.
Definition lancir.h:986
Sine-wave signal generator class.
Definition lancir.h:1027
CSineGen(const double si, const double ph)
Constructor initializes this sine-wave signal generator.
Definition lancir.h:1038
double generate()
Generates the next sine-wave sample, without biasing.
Definition lancir.h:1049
Structure defines source scanline positions and filters for each destination pixel.
Definition lancir.h:1227
intptr_t so
Offset within the source scanline, in pixels.
Definition lancir.h:1231
intptr_t spo
Source scanline's pixel offset, in bytes, or a direct pointer to scanline buffer.
Definition lancir.h:1229
const float * flt
Fractional delay filter.
Definition lancir.h:1228
Scanline resizing positions class.
Definition lancir.h:1243
int SrcLen
Current SrcLen.
Definition lancir.h:1378
int poslen
Allocated pos buffer's length.
Definition lancir.h:1377
void update(const int SrcLen0, const int DstLen0, const double o0, CResizeFilters &rf, float *const sp=nullptr)
Scanline positions update function.
Definition lancir.h:1290
int padl
Left-padding (in pixels) required for source scanline.
Definition lancir.h:1245
CResizePos * pos
Source scanline positions (offsets) and filters for each destination pixel position.
Definition lancir.h:1247
void reset()
Object's reset function.
Definition lancir.h:1270
int padr
Right-padding (in pixels) required for source scanline.
Definition lancir.h:1246
double o
Current o.
Definition lancir.h:1380
void updateSPO(CResizeFilters &rf, float *const sp)
Scanline pixel offsets update function.
Definition lancir.h:1364
int DstLen
Current DstLen.
Definition lancir.h:1379