aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames Smith <js5@sanger.ac.uk>2021-10-07 11:29:42 +0100
committerJames Smith <js5@sanger.ac.uk>2021-10-07 11:29:42 +0100
commitddd032709998a36ca06d4a82b3e4e8ced43a5485 (patch)
tree4472e0b37f5bbbd35fa584c868832ca8acc4e294
parent12438a4961abbf613dff8d2fe15552ee1e54e632 (diff)
downloadperlweeklychallenge-club-ddd032709998a36ca06d4a82b3e4e8ced43a5485.tar.gz
perlweeklychallenge-club-ddd032709998a36ca06d4a82b3e4e8ced43a5485.tar.bz2
perlweeklychallenge-club-ddd032709998a36ca06d4a82b3e4e8ced43a5485.zip
Update README.md
-rw-r--r--challenge-133/james-smith/README.md118
1 files changed, 115 insertions, 3 deletions
diff --git a/challenge-133/james-smith/README.md b/challenge-133/james-smith/README.md
index e467044ffc..c50473185e 100644
--- a/challenge-133/james-smith/README.md
+++ b/challenge-133/james-smith/README.md
@@ -58,18 +58,130 @@ And the support functions:
We cache the digit sum for each number in `%ds`;
* `prime_factors` - for a prime returns nothing, for a composite returns the factors,
We keep a list of primes `@primes`, and also the prime factors for each composite `%comp`.
-
+ **Note:** We don't need to continue with primes bigger than `sqrt($N)` so short cut the loop.
+
```perl
sub sum { my $t = 0; $t+=$_ foreach @_; $t; }
+
sub sum_digits { return $ds{$_[0]} ||= sum split //, $_[0]; }
sub prime_factors {
my $N = shift;
- ( $N % $_) or ( return @{ $comp{$N} = [ $_, @{$comp{ $N / $_ }||[$N/$_]}] } ) foreach @primes;
+ ( $N % $_) ? ( ( $N < $_ * $_ ) && last )
+ : ( return $sum_pf[$N] = $sum_pf[$N/$_] + $sum_pf[$_] ) foreach @primes;
+ $sum_pf[$N] = cl_sum_digits $N;
push @primes, $N;
- return;
+ return 0;
}
+```
+
+## Performance
+
+A slight modification to the code ( to compute Smith Numbers < N ) computes the 29,928 Smith numbers less than 1 million in about 3.3 seconds, and for 10,000,000 about 45 seconds. For large N the script falls over with out-of-memory errors.
+
+## Re-write in C
+
+The algorithm does lend itself to rewriting in a lower level language - it doesn't use any nice features of Perl except for the "auto-sizing" of arrays - no hashes, complex IO etc.
+
+So here is my first C programme for probably 30 years!
+
+### Timings for C vs perl
+
+| Max value | Count | Time C | Time perl |
+| ------------: | ---------: | ------: | --------: |
+| 100 | 6 | 0.00s | 0.01s |
+| 1,000 | 49 | 0.00s | 0.01s |
+| 10,000 | 376 | 0.00s | 0.03s |
+| 100,000 | 3,924 | 0.01s | 0.26s |
+| 1,000,000 | 29,928 | 0.07s | 3.25s |
+| 10,000,000 | 278,411 | 1.18s | 45.38s |
+| 100,000,000 | 2,632,758 | 23.65s | *fail* |
+| 1,000,000,000 | 25,154,060 | 514.17s | *fail* |
+
+This is the limit for the algorithm (would need 20+ Gbytes for the 10 billion case)
+
+### The C code
+```c
+#include <stdio.h>
+// Compute all Smith Numbers below MAX_N
+// Use guess that PSIZE should approximately 1.1 * PFSIZE / ln(PFSIZE)
+
+// 10^6
+ #define MAX_N 1000000
+ #define PSIZE 42000 // Have to guess this!
+// 10^7
+// #define MAX_N 10000000
+// #define PSIZE 400000 // Have to guess this!
+// 10^8
+// #define MAX_N 100000000
+// #define PSIZE 3200000 // Have to guess this!
+// 10^9
+// #define MAX_N 1000000000
+// #define PSIZE 28000000 // Have to guess this!
+
+#define PFSIZE (MAX_N/2)
+
+// Set up arrays
+int sum_pf[ PFSIZE+1 ], primes[ PSIZE ], prime_index = 0;
+
+// Compute sum of digits - unlike Perl we can't use split
+// so we use repeated modulus/divide by 10..
+
+int sum_digits(int n) {
+ int total = 0;
+ do { total += n%10; } while( n /= 10 );
+ return total;
+}
+
+// Get the sum of prime factors -
+// as we build this in order we only need to find a
+// factorisation then we just add together the
+// digit sum of the two factors (Here for speed we
+// know one will be prime.
+// We go through all primes we have until prime^2
+// is greater than the number itself.
+//
+// To make the last bit easier IF we have a prime
+// we return 0 as not composite...
+//
+// Note to save memory we only store the sum if
+// n < MAX_N/2 as we won't need it again (can't
+// be a factor of a larger number less than MAX_N
+
+int sum_prime_factors( int n ) {
+ int p;
+ for(int i=0; i< prime_index; i++ ) {
+ p = primes[i];
+ if( !(n % p) ) {
+ if( n <= PFSIZE ) {
+ return sum_pf[n] = sum_pf[n/p] + sum_pf[p];
+ } else {
+ return sum_pf[ n/p ] + sum_pf[ p ];
+ }
+ }
+ if( n < p*p ) { break; }
+ }
+ if( n <= PFSIZE ) {
+ sum_pf[ n ] = sum_digits(n);
+ primes[ prime_index++ ] = n;
+ }
+ return 0;
+}
+
+// Main is simple just loop and search, printing out
+// Smith numbers
+int main() {
+ int count = 0, n = 1;
+ while( n++ <= MAX_N ) {
+ if( sum_digits(n) == sum_prime_factors(n) ) {
+ printf( "%11d\n", n );
+ }
+ }
+}
```
+
+**Notes:** In this version we use an optimisation - we know that we will never need to use factorisations or primes greater than `MAX_N/2` so we don't store/cache these.
+