Skip to content

Commit 9a19e57

Browse files
committed
improve comments
1 parent 4565ac2 commit 9a19e57

File tree

4 files changed

+28
-24
lines changed

4 files changed

+28
-24
lines changed

src/main/java/de/tilman_neumann/jml/factor/siqs/sieve/Sieve03h.java

+7-6
Original file line numberDiff line numberDiff line change
@@ -487,8 +487,12 @@ private SmoothCandidate tdivUnsievedPrimeBaseElements(BigInteger A, BigInteger Q
487487
}
488488

489489
// Pass 1: Test solution arrays.
490-
// IMPORTANT: Java gives x % p = x for |x| < p, and we have many p bigger than any sieve array entry.
491-
// IMPORTANT: Not computing the modulus in these cases improves performance by almost factor 2!
490+
// The performance bottle-neck here is the modulus computation.
491+
// The current approach is already quite fast for large N, because then we have pMax > 3*sieveArraySize,
492+
// which means that for ~75% of x-values we can completely omit the mod-computation or replace it by a simple addition.
493+
// For (big |x|, small p) we compute x%p using long-valued Barrett reduction, see https://en.wikipedia.org/wiki/Barrett_reduction.
494+
// We can use the long-variant here because x*m will never overflow positive long values.
495+
// For some reasons I do not understand yet, it is faster to divide Q by p in pass 2 only, not here.
492496
int pass2Count = 0;
493497
int[] pArray = solutionArrays.pArray;
494498
int[] primes = solutionArrays.primes;
@@ -497,14 +501,12 @@ private SmoothCandidate tdivUnsievedPrimeBaseElements(BigInteger A, BigInteger Q
497501
int[] x1Array = solutionArrays.x1Array, x2Array = solutionArrays.x2Array;
498502

499503
final int xAbs = x<0 ? -x : x;
500-
for (int pIndex = pMinIndex-1; pIndex > 0; pIndex--) { // p[0]=2 was already tested
504+
for (int pIndex = pMinIndex-1; pIndex > 0; pIndex--) { // p[0]=2 has already been tested
501505
int p = pArray[pIndex];
502506
int xModP;
503507
if (xAbs<p) {
504508
xModP = x<0 ? x+p : x;
505509
} else {
506-
// Compute x%p using long-valued Barrett reduction, see https://en.wikipedia.org/wiki/Barrett_reduction.
507-
// We can use the long-variant here because x*m will never overflow positive long values.
508510
final long m = pinvArrayL[pIndex];
509511
final long q = ( ( ((long)x) * m) >>> 32); // first argument long optimizes register usage
510512
xModP = (int) ( ((long)x) - q * p);
@@ -525,7 +527,6 @@ private SmoothCandidate tdivUnsievedPrimeBaseElements(BigInteger A, BigInteger Q
525527
pass2Exponents[pass2Count] = exponents[pIndex];
526528
pass2LogPArray[pass2Count] = smallPrimesLogPArray[pIndex];
527529
pass2Powers[pass2Count++] = p;
528-
// for some reasons I do not understand it is faster to divide Q by p in pass 2 only, not here
529530
}
530531
}
531532

src/main/java/de/tilman_neumann/jml/factor/siqs/sieve/Sieve03hU.java

+7-6
Original file line numberDiff line numberDiff line change
@@ -598,8 +598,12 @@ private SmoothCandidate tdivUnsievedPrimeBaseElements(BigInteger A, BigInteger Q
598598
}
599599

600600
// Pass 1: Test solution arrays.
601-
// IMPORTANT: Java gives x % p = x for |x| < p, and we have many p bigger than any sieve array entry.
602-
// IMPORTANT: Not computing the modulus in these cases improves performance by almost factor 2!
601+
// The performance bottle-neck here is the modulus computation.
602+
// The current approach is already quite fast for large N, because then we have pMax > 3*sieveArraySize,
603+
// which means that for ~75% of x-values we can completely omit the mod-computation or replace it by a simple addition.
604+
// For (big |x|, small p) we compute x%p using long-valued Barrett reduction, see https://en.wikipedia.org/wiki/Barrett_reduction.
605+
// We can use the long-variant here because x*m will never overflow positive long values.
606+
// For some reasons I do not understand yet, it is faster to divide Q by p in pass 2 only, not here.
603607
int pass2Count = 0;
604608
int[] pArray = solutionArrays.pArray;
605609
int[] primes = solutionArrays.primes;
@@ -608,14 +612,12 @@ private SmoothCandidate tdivUnsievedPrimeBaseElements(BigInteger A, BigInteger Q
608612
int[] x1Array = solutionArrays.x1Array, x2Array = solutionArrays.x2Array;
609613

610614
final int xAbs = x<0 ? -x : x;
611-
for (int pIndex = pMinIndex-1; pIndex > 0; pIndex--) { // p[0]=2 was already tested
615+
for (int pIndex = pMinIndex-1; pIndex > 0; pIndex--) { // p[0]=2 has already been tested
612616
int p = pArray[pIndex];
613617
int xModP;
614618
if (xAbs<p) {
615619
xModP = x<0 ? x+p : x;
616620
} else {
617-
// Compute x%p using long-valued Barrett reduction, see https://en.wikipedia.org/wiki/Barrett_reduction.
618-
// We can use the long-variant here because x*m will never overflow positive long values.
619621
final long m = pinvArrayL[pIndex];
620622
final long q = ( ( ((long)x) * m) >>> 32); // first argument long optimizes register usage
621623
xModP = (int) ( ((long)x) - q * p);
@@ -636,7 +638,6 @@ private SmoothCandidate tdivUnsievedPrimeBaseElements(BigInteger A, BigInteger Q
636638
pass2Exponents[pass2Count] = exponents[pIndex];
637639
pass2LogPArray[pass2Count] = smallPrimesLogPArray[pIndex];
638640
pass2Powers[pass2Count++] = p;
639-
// for some reasons I do not understand it is faster to divide Q by p in pass 2 only, not here
640641
}
641642
}
642643

src/main/java/de/tilman_neumann/jml/factor/siqs/sieve/SingleBlockSieve.java

+7-6
Original file line numberDiff line numberDiff line change
@@ -488,8 +488,12 @@ private SmoothCandidate tdivUnsievedPrimeBaseElements(BigInteger A, BigInteger Q
488488
}
489489

490490
// Pass 1: Test solution arrays.
491-
// IMPORTANT: Java gives x % p = x for |x| < p, and we have many p bigger than any sieve array entry.
492-
// IMPORTANT: Not computing the modulus in these cases improves performance by almost factor 2!
491+
// The performance bottle-neck here is the modulus computation.
492+
// The current approach is already quite fast for large N, because then we have pMax > 3*sieveArraySize,
493+
// which means that for ~75% of x-values we can completely omit the mod-computation or replace it by a simple addition.
494+
// For (big |x|, small p) we compute x%p using long-valued Barrett reduction, see https://en.wikipedia.org/wiki/Barrett_reduction.
495+
// We can use the long-variant here because x*m will never overflow positive long values.
496+
// For some reasons I do not understand yet, it is faster to divide Q by p in pass 2 only, not here.
493497
int pass2Count = 0;
494498
int[] pArray = solutionArrays.pArray;
495499
int[] primes = solutionArrays.primes;
@@ -498,14 +502,12 @@ private SmoothCandidate tdivUnsievedPrimeBaseElements(BigInteger A, BigInteger Q
498502
int[] x1Array = solutionArrays.x1Array, x2Array = solutionArrays.x2Array;
499503

500504
final int xAbs = x<0 ? -x : x;
501-
for (int pIndex = pMinIndex-1; pIndex > 0; pIndex--) { // p[0]=2 was already tested
505+
for (int pIndex = pMinIndex-1; pIndex > 0; pIndex--) { // p[0]=2 has already been tested
502506
int p = pArray[pIndex];
503507
int xModP;
504508
if (xAbs<p) {
505509
xModP = x<0 ? x+p : x;
506510
} else {
507-
// Compute x%p using long-valued Barrett reduction, see https://en.wikipedia.org/wiki/Barrett_reduction.
508-
// We can use the long-variant here because x*m will never overflow positive long values.
509511
final long m = pinvArrayL[pIndex];
510512
final long q = ( ( ((long)x) * m) >>> 32); // first argument long optimizes register usage
511513
xModP = (int) ( ((long)x) - q * p);
@@ -526,7 +528,6 @@ private SmoothCandidate tdivUnsievedPrimeBaseElements(BigInteger A, BigInteger Q
526528
pass2Exponents[pass2Count] = exponents[pIndex];
527529
pass2LogPArray[pass2Count] = smallPrimesLogPArray[pIndex];
528530
pass2Powers[pass2Count++] = p;
529-
// for some reasons I do not understand it is faster to divide Q by p in pass 2 only, not here
530531
}
531532
}
532533

src/main/java/de/tilman_neumann/jml/factor/siqs/sieve/SingleBlockSieveU.java

+7-6
Original file line numberDiff line numberDiff line change
@@ -598,8 +598,12 @@ private SmoothCandidate tdivUnsievedPrimeBaseElements(BigInteger A, BigInteger Q
598598
}
599599

600600
// Pass 1: Test solution arrays.
601-
// IMPORTANT: Java gives x % p = x for |x| < p, and we have many p bigger than any sieve array entry.
602-
// IMPORTANT: Not computing the modulus in these cases improves performance by almost factor 2!
601+
// The performance bottle-neck here is the modulus computation.
602+
// The current approach is already quite fast for large N, because then we have pMax > 3*sieveArraySize,
603+
// which means that for ~75% of x-values we can completely omit the mod-computation or replace it by a simple addition.
604+
// For (big |x|, small p) we compute x%p using long-valued Barrett reduction, see https://en.wikipedia.org/wiki/Barrett_reduction.
605+
// We can use the long-variant here because x*m will never overflow positive long values.
606+
// For some reasons I do not understand yet, it is faster to divide Q by p in pass 2 only, not here.
603607
int pass2Count = 0;
604608
int[] pArray = solutionArrays.pArray;
605609
int[] primes = solutionArrays.primes;
@@ -608,14 +612,12 @@ private SmoothCandidate tdivUnsievedPrimeBaseElements(BigInteger A, BigInteger Q
608612
int[] x1Array = solutionArrays.x1Array, x2Array = solutionArrays.x2Array;
609613

610614
final int xAbs = x<0 ? -x : x;
611-
for (int pIndex = pMinIndex-1; pIndex > 0; pIndex--) { // p[0]=2 was already tested
615+
for (int pIndex = pMinIndex-1; pIndex > 0; pIndex--) { // p[0]=2 has already been tested
612616
int p = pArray[pIndex];
613617
int xModP;
614618
if (xAbs<p) {
615619
xModP = x<0 ? x+p : x;
616620
} else {
617-
// Compute x%p using long-valued Barrett reduction, see https://en.wikipedia.org/wiki/Barrett_reduction.
618-
// We can use the long-variant here because x*m will never overflow positive long values.
619621
final long m = pinvArrayL[pIndex];
620622
final long q = ( ( ((long)x) * m) >>> 32); // first argument long optimizes register usage
621623
xModP = (int) ( ((long)x) - q * p);
@@ -636,7 +638,6 @@ private SmoothCandidate tdivUnsievedPrimeBaseElements(BigInteger A, BigInteger Q
636638
pass2Exponents[pass2Count] = exponents[pIndex];
637639
pass2LogPArray[pass2Count] = smallPrimesLogPArray[pIndex];
638640
pass2Powers[pass2Count++] = p;
639-
// for some reasons I do not understand it is faster to divide Q by p in pass 2 only, not here
640641
}
641642
}
642643

0 commit comments

Comments
 (0)