aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordrbaggy <js5@sanger.ac.uk>2021-07-26 20:51:11 +0100
committerdrbaggy <js5@sanger.ac.uk>2021-07-26 20:51:11 +0100
commitdccca56579cf1d83a4c6fbaa26f83766afaa27a7 (patch)
treea4ee018195ec0ddc8c58382062c55f9a265d25e4
parent4ed0ffead5c0399e307c196604d1f30fba5fdd86 (diff)
parent3069523c72138f77a3b1bd7886ed7be15010f821 (diff)
downloadperlweeklychallenge-club-dccca56579cf1d83a4c6fbaa26f83766afaa27a7.tar.gz
perlweeklychallenge-club-dccca56579cf1d83a4c6fbaa26f83766afaa27a7.tar.bz2
perlweeklychallenge-club-dccca56579cf1d83a4c6fbaa26f83766afaa27a7.zip
Merge branch 'master' of github.com:drbaggy/perlweeklychallenge-club
-rw-r--r--challenge-123/james-smith/README.md365
1 files changed, 83 insertions, 282 deletions
diff --git a/challenge-123/james-smith/README.md b/challenge-123/james-smith/README.md
index 7518cedda1..f3a8e508e2 100644
--- a/challenge-123/james-smith/README.md
+++ b/challenge-123/james-smith/README.md
@@ -1,4 +1,4 @@
-# Perl Weekly Challenge #122
+# Perl Weekly Challenge #123
You can find more information about this weeks, and previous weeks challenges at:
@@ -12,313 +12,114 @@ You can find the solutions here on github at:
https://github.com/drbaggy/perlweeklychallenge-club/tree/master/challenge-122/james-smith/perl
-# Task 1 - Average of Stream
+# Task 1 - Ugly Numbers
-***You are given a stream of numbers, `@N`. Write a script to print the average of the stream at every point.***
+***You are given an integer `$n >= 1`. Write a script to find the $nth element of Ugly Numbers.***
-## The solution
-
-Firstly - we create a "stream object" - we use a single function
-`stream()` for this which is a get/setter - call with a sequence of data
-and this pushes the values onto the stream. Call it without and it
-returns the first value of the stream OR dies.
+**Defn:** Ugly numbers are those number whose prime factors are 2, 3 or 5. For example, the first 10 Ugly Numbers are 1, 2, 3, 4, 5, 6, 8, 9, 10, 12.
-`stream_average` just pulls the next value from the stream (or dies)
-and computes the average -- updates total(`$t`) and count(`$n`) -- and
-returns the `$t`/`$n`
-
-```perl
-stream( map {$_*10} 1..50 ); ## Push values into stream...
-
-eval {say stream_average();} until $@;
-
-sub stream {
- state(@stream);
- @_ ? (push @stream,@_)
- : @stream ? shift @stream
- : die;
-}
-
-sub stream_average {
- state($n,$t);
- return ($t+=stream) / ++$n;
- }
-
-```
-
-# Task 2 - Basketball Points
+## The solution
-***You are given a score `$S`. You can win basketball points e.g. 1 point, 2 points and 3 points. Write a script to find out the different ways you can score `$S`..***
+There are two ways of working out the *nth* ugly number - we either have to search all numbers starting at 1 counting ugly numbers -OR- do something more "intellegent".
-## Solution.
+The former works well for small `n`, but doesn't scale well as the ugly numbers become more sparse.
-To get the combinations for a give number - we can shoot a 1, 2 or 3 point shot and then reconsider the combinations for the remaining point.
+### Method
-This leads us to a simple recursive solution.
+Any Ugly number is either a multiple of 2, 3 or 5 of another Ugly number. So to find the next ugly number we multiple all ugly numbers by 2, 3 or 5 and find the lowest value greater than the last ugly seen.
```perl
-sub pts {
- return $cache{$_[0]} ||= $_[0] < 1 ? [] : [
- map( {'1'.$_} @{pts( $_[0] - 1 )} ),
- map( {'2'.$_} @{pts( $_[0] - 2 )} ),
- map( {'3'.$_} @{pts( $_[0] - 3 )} )
- ];
+sub nth_ugly {
+ my $n = shift;
+ my @uglies = (1);
+ while(1) {
+ return $uglies[$n-1] if $n <= @uglies;
+ ## Get the next ugly....
+ my $next = 0;
+ foreach my $l (5,3,2) {
+ foreach(@uglies) {
+ next if $_ * $l <= $uglies[-1];
+ $next = $_*$l if !$next || $next > $_*$l;
+ last;
+ }
+ }
+ push @uglies, $next;
+ }
}
-```
-
-**Note** To reduce complexity we pre-populate the cache for the first three cases:
- 1. "1"
- 2. "11" & "2"
- 3. "111", "12", "21" and "3"
+```
-## Solution 2 - streaming
+We cache the values internally in the function - in the `state` variable `@uglies`
-A caching/collecting solution always fails as the size of the data increases beyond the size of the memory of the machine. So we need to investigate
-a streaming solution.
+### Optimization
-The first parameter is again the total we have to match. We keep track of the scores we have already seen and pass this as the 2nd parameter.
-If we overshoot our total then the first parameter is less than 0 and we generate nothing or equal to run and display the sequence of points.
+We can speed this up by short-cutting the inner loop - by remembering which was the first ugly failed the check `$_ * $l <= $uglies[-1]`.
```perl
-sub pts_streaming {
- return if $_[0]<0;
- return say $_[1] unless $_[0];
- pts_streaming( $_[0]-$_, $_[1].$_ ) foreach 1..3;
+sub nth_ugly_opt {
+ my $n = shift;
+ my @uglies = (1);
+ my @starts = (0,0,0,0,0,0);
+ while(1) {
+ return $uglies[$n-1] if $n <= @uglies;
+ ## Get the next ugly....
+ my $next = 0;
+ foreach my $l (5,3,2) {
+ foreach ( $starts[$l] .. $#uglies ) {
+ next if $uglies[$_] * $l <= $uglies[-1];
+ $starts[$l]=$_;
+ $next = $uglies[$_]*$l if !$next || $next > $uglies[$_]*$l;
+ last;
+ }
+ }
+ push @uglies, $next;
+ }
}
```
-There are a number of additional calls needed `$_[0] < 1`, we can optimize these out by using the pre-poulated part of the cache above.
-
-```perl
-sub pts_streaming_opt {
- return say "$_[1]1" if $_[0] == 1;
- return say "$_[1]11\n$_[1]2" if $_[0] == 2;
- return say "$_[1]111\n$_[1]12\n$_[1]21\n$_[1]3" if $_[0] == 3;
- pts_streaming_opt( $_[0]-$_, $_[1].$_ ) foreach 1..3;
-}
-```
+The extra overhead causes the optimized method to be slower than the simple method for low values of `$n` - but as `$n` goes up it saves many loop evaluations. Flip happens when `$n` is around `18` or `19`. By the time we get to `$n` as 1000 the optimized version is around 50 times faster than the naive one. For `$n = 10_0000` one of the largest values for which the ugly is less than 2^64 (and so all calculations are integer based) in around 600 times faster.
-If the score left is less than 4 we use the same pre-cache we had before to populate the values. This improves performance - for example for the
-30 point solution we reduce the function calls from 192 million to 31 million and time taken from 3 minutes 20 seconds to just 55 seconds with this
-simple tweak. This corresponds to an approximate reduction of 6.2 in function calls and about 3.5 reduction in time.
+Below shows these comparisons - against a simple scan algorithm (with caching of uglies). We can see that for values up to around 20-30 the scan method (because of it's simplicity is fastest) and the uglies are relatively dense. But soon the advantage of our message is seen - by the time `$n` reaches 500 (Ugly is 937,500) to optimized
+method is around 2000x faster than the scan method.
-## Comparing solutions
+| n | Ugly(n) | Scan | Simple | Optimized | Opt v simple | Opt v scan | Simple v scan |
+| -----: | ----------------------: | ----------: | ----------: | ----------: | -----------: | ------------: | ------------: |
+| 1 | 1 | 1,693,446/s | 3,131,931/s | 1,922,126/s | -39% | 14% | 14% |
+| 2 | 2 | 844,419/s | 1,019,073/s | 537,358/s | -47% | -36% | 21% |
+| 5 | 5 | 294,998/s | 287,307/s | 167,641/s | -42% | -43% | -3% |
+| 10 | 12 | 121,349/s | 109,978/s | 78,936/s | -28% | -35% | -11% |
+| 20 | 36 | 40,792/s | 35,257/s | 38,068/s | 5% | -7% | -11% |
+| 50 | 243 | 6,098/s | 6,656/s | 15,056/s | 126% | 148% | 10% |
+| 100 | 1,536 | 941/s | 1,653/s | 7,423/s | 349% | 688% | 76% |
+| 200 | 16,200 | 83.5/s | 395/s | 3,700/s | 843% | 4,333% | 374% |
+| 500 | 937,500 | 0.8/s | 58.8/s | 1,479/s | 2,417% | 185,492% | 7,273% |
+| 1,000 | 51,200,000 | | 13.8/s | 687/s | 4,866% | - | - |
+| 2,000 | 8,062,156,800 | | 3.35/s | 352/s | 10,402% | - | - |
+| 5,000 | 50,837,316,566,580 | | 0.49/s | 138/s | 28,036% | - | - |
+| 10,000 | 288,325,195,312,500,000 | | 0.11/s | 67.6/s | 57,754% | - | - |
-By comparing timings on the 2G test machine we note that up to a score of 26 the caching solution is efficient [basically everything in physical memory] but
-after this streaming solution is the only real option. The difference in time up to this point though is not that great. After we get past 27 the caching algorithm no longer executes - but as you can see the streaming solution continues going - just consuming more and more time (increases by a factor of 21 for every 5 extra points). The estimated size of the output file for `n=40` is around 600Gbytes, for `n=50` it would generate a 300 Terrabyte file with 10 trillion combinations in around 6 days!
+# Task 2 - Square Points
-| n | ways | calls cache | memory cache | time cache | calls stream | memory stream | time stream |
-| --: | --: | --: | --: | --: | --: | --: | --: |
-| 5 | 13 | 7 | 9,208K | 0.011 | 7 | 9,196K | 0.011 |
-| 10 | 274 | 22 | 9,204K | 0.010 | 157 | 9,096K | 0.011 |
-| 15 | 5,768 | 37 | 10,488K | 0.015 | 3,313 | 9,096K | 0.016 |
-| 20 | 121,415 | 52 | 38,916K | 0.117 | 69,748 | 9,100K | 0.134 |
-| 25 | 2,555,755 | 67 | 631M | 2.371 | 1,468,189 | 9,096K | 2.732 |
-| 26 | 4,700,770 | 70 | 1,156M | 4.620 | 2,700,421 | 9,100K | 4.930 |
-| 27 | 8,646,064 | 73 | 2,125M | 23.423 | 4,966,849 | 9,200K | 8.958 |
-| 28 | 15,902,591 | - | - | - | 9,135,460 | 9,096K | 16.606 |
-| 29 | 29,459,425 | - | - | - | 16,802,731 | 9,204K | 31.175 |
-| 30 | 53,798,080 | - | - | - | 30,905,041 | 9,200K | 61.527 |
-| 35 | 1,132,436,852 | - | - | - | 650,543,809 | 9,200K | 0:20:04 |
-| 40 | 23,837,527,729 | - | - | - | 13,693,793,230 | 9,196K | 6:56:20 |
+***You are given coordinates of four points i.e. (x1, y1), (x2, y2), (x3, y3) and (x4, y4). Write a script to find out if the given four points form a square.***
-## Number of solutions...
+## Solution.
-As we have the relationship that if `T(n)` is the number of score combinations for a total of `n` we have:
+First we need to think how we define a square - it has 4 sides of equal length and sides at right angles. If we want to define it terms of distances between points we have 4 pairs of points that are the same distance apart and two pairs of points which are at `sqrt(2)` times this distance.
-```
- T(n) = T(n-1) + T(n-2) + T(n-3)
-```
+We therefore measure the squares of the distances between the points, and collect them together. If the list of distances is 2, and the ratio of the squares of the distance is 2 then we have a square.
-We see that the sequence of numbers is the *Tribonacci* sequence. Listed
-below to 186 - the highest score in NBA history about ten-quindecillion (10^49) ways to get to that score. For that the output file would be 10^51 bytes (names stop at yottabyte ~ 10^24) in size and take around the 35 decillion years...!
+ * The `while/foreach` loops calculate the square of the distances between points, and stores these in the hash `%dist` where the distance is the key.
+ * We flip the hash so that the keys become values and values become keys. This allows us to check to see if we have one length 4 times and one length 2 times, for belt and braces we check the ratio of the length of the diagonal vs the length of the edges of the sides to see that it is 2.
-```
-1 1
-2 2
-3 4
-4 7
-5 13
-6 24
-7 44
-8 81
-9 149
-10 274
-11 504
-12 927
-13 1,705
-14 3,136
-15 5,768
-16 10,609
-17 19,513
-18 35,890
-19 66,012
-20 121,415
-21 223,317
-22 410,744
-23 755,476
-24 1,389,537
-25 2,555,757
-26 4,700,770
-27 8,646,064
-28 15,902,591
-29 29,249,425
-30 53,798,080
-31 98,950,096
-32 181,997,601
-33 334,745,777
-34 615,693,474
-35 1,132,436,852
-36 2,082,876,103
-37 3,831,006,429
-38 7,046,319,384
-39 12,960,201,916
-40 23,837,527,729
-41 43,844,049,029
-42 80,641,778,674
-43 148,323,355,432
-44 272,809,183,135
-45 501,774,317,241
-46 922,906,855,808
-47 1,697,490,356,184
-48 3,122,171,529,233
-49 5,742,568,741,225
-50 10,562,230,626,642
-51 19,426,970,897,100
-52 35,731,770,264,967
-53 65,720,971,788,709
-54 120,879,712,950,776
-55 222,332,455,004,452
-56 408,933,139,743,937
-57 752,145,307,699,165
-58 1,383,410,902,447,554
-59 2,544,489,349,890,656
-60 4,680,045,560,037,375
-61 8,607,945,812,375,585
-62 15,832,480,722,303,616
-63 29,120,472,094,716,576
-64 53,560,898,629,395,777
-65 98,513,851,446,415,969
-66 181,195,222,170,528,322
-67 333,269,972,246,340,068
-68 612,979,045,863,284,359
-69 1,127,444,240,280,152,749
-70 2,073,693,258,389,777,176
-71 3,814,116,544,533,214,284
-72 7,015,254,043,203,144,209
-73 12,903,063,846,126,135,669
-74 23,732,434,433,862,494,162
-75 43,650,752,323,191,774,040
-76 80,286,250,603,180,403,871
-77 147,669,437,360,234,672,073
-78 271,606,440,286,606,849,984
-79 499,562,128,250,021,925,928
-80 918,838,005,896,863,447,985
-81 1,690,006,574,433,492,223,897
-82 3,108,406,708,580,377,597,810
-83 5,717,251,288,910,733,269,692
-84 10,515,664,571,924,603,091,399
-85 19,341,322,569,415,713,958,901
-86 35,574,238,430,251,050,319,992
-87 65,431,225,571,591,367,370,292
-88 120,346,786,571,258,131,649,185
-89 221,352,250,573,100,549,339,469
-90 407,130,262,715,950,048,358,946
-91 748,829,299,860,308,729,347,600
-92 1,377,311,813,149,359,327,046,015
-93 2,533,271,375,725,618,104,752,561
-94 4,659,412,488,735,286,161,146,176
-95 8,569,995,677,610,263,592,944,752
-96 15,762,679,542,071,167,858,843,489
-97 28,992,087,708,416,717,612,934,417
-98 53,324,762,928,098,149,064,722,658
-99 98,079,530,178,586,034,536,500,564
-100 180,396,380,815,100,901,214,157,639
-101 331,800,673,921,785,084,815,380,861
-102 610,276,584,915,472,020,566,039,064
-103 1,122,473,639,652,358,006,595,577,564
-104 2,064,550,898,489,615,111,976,997,489
-105 3,797,301,123,057,445,139,138,614,117
-106 6,984,325,661,199,418,257,711,189,170
-107 12,846,177,682,746,478,508,826,800,776
-108 23,627,804,467,003,341,905,676,604,063
-109 43,458,307,810,949,238,672,214,594,009
-110 79,932,289,960,699,059,086,717,998,848
-111 147,018,402,238,651,639,664,609,196,920
-112 270,409,000,010,299,937,423,541,789,777
-113 497,359,692,209,650,636,174,868,985,545
-114 914,787,094,458,602,213,263,019,972,242
-115 1,682,555,786,678,552,786,861,430,747,564
-116 3,094,702,573,346,805,636,299,319,705,351
-117 5,692,045,454,483,960,636,423,770,425,157
-118 10,469,303,814,509,319,059,584,520,878,072
-119 19,256,051,842,340,085,332,307,611,008,580
-120 35,417,401,111,333,365,028,315,902,311,809
-121 65,142,756,768,182,769,420,208,034,198,461
-122 119,816,209,721,856,219,780,831,547,518,850
-123 220,376,367,601,372,354,229,355,484,029,120
-124 405,335,334,091,411,343,430,395,065,746,431
-125 745,527,911,414,639,917,440,582,097,294,401
-126 1,371,239,613,107,423,615,100,332,647,069,952
-127 2,522,102,858,613,474,875,971,309,810,110,784
-128 4,638,870,383,135,538,408,512,224,554,475,137
-129 8,532,212,854,856,436,899,583,867,011,655,873
-130 15,693,186,096,605,450,184,067,401,376,241,794
-131 28,864,269,334,597,425,492,163,492,942,372,804
-132 53,089,668,286,059,312,575,814,761,330,270,471
-133 97,647,123,717,262,188,252,045,655,648,885,069
-134 179,601,061,337,918,926,320,023,909,921,528,344
-135 330,337,853,341,240,427,147,884,326,900,683,884
-136 607,586,038,396,421,541,719,953,892,471,097,297
-137 1,117,524,953,075,580,895,187,862,129,293,309,525
-138 2,055,448,844,813,242,864,055,700,348,665,090,706
-139 3,780,559,836,285,245,300,963,516,370,429,497,528
-140 6,953,533,634,174,069,060,207,078,848,387,897,759
-141 12,789,542,315,272,557,225,226,295,567,482,485,993
-142 23,523,635,785,731,871,586,396,890,786,299,881,280
-143 43,266,711,735,178,497,871,830,265,202,170,265,032
-144 79,579,889,836,182,926,683,453,451,555,952,632,305
-145 146,370,237,357,093,296,141,680,607,544,422,778,617
-146 269,216,838,928,454,720,696,964,324,302,545,675,954
-147 495,166,966,121,730,943,522,098,383,402,921,086,876
-148 910,754,042,407,278,960,360,743,315,249,889,541,447
-149 1,675,137,847,457,464,624,579,806,022,955,356,304,277
-150 3,081,058,855,986,474,528,462,647,721,608,166,932,600
-151 5,666,950,745,851,218,113,403,197,059,813,412,778,324
-152 10,423,147,449,295,157,266,445,650,804,376,936,015,201
-153 19,171,157,051,132,849,908,311,495,585,798,515,726,125
-154 35,261,255,246,279,225,288,160,343,449,988,864,519,650
-155 64,855,559,746,707,232,462,917,489,840,164,316,260,976
-156 119,287,972,044,119,307,659,389,328,875,951,696,506,751
-157 219,404,787,037,105,765,410,467,162,166,104,877,287,377
-158 403,548,318,827,932,305,532,773,980,882,220,890,055,104
-159 742,241,077,909,157,378,602,630,471,924,277,463,849,232
-160 1,365,194,183,774,195,449,545,871,614,972,603,231,191,713
-161 2,510,983,580,511,285,133,681,276,067,779,101,585,096,049
-162 4,618,418,842,194,637,961,829,778,154,675,982,280,136,994
-163 8,494,596,606,480,118,545,056,925,837,427,687,096,424,756
-164 15,623,999,029,186,041,640,567,980,059,882,770,961,657,799
-165 28,737,014,477,860,798,147,454,684,051,986,440,338,219,549
-166 52,855,610,113,526,958,333,079,589,949,296,898,396,302,104
-167 97,216,623,620,573,798,121,102,254,061,166,109,696,179,452
-168 178,809,248,211,961,554,601,636,528,062,449,448,430,701,105
-169 328,881,481,946,062,311,055,818,372,072,912,456,523,182,661
-170 604,907,353,778,597,663,778,557,154,196,528,014,650,063,218
-171 1,112,598,083,936,621,529,436,012,054,331,889,919,603,946,984
-172 2,046,386,919,661,281,504,270,387,580,601,330,390,777,192,863
-173 3,763,892,357,376,500,697,484,956,789,129,748,325,031,203,065
-174 6,922,877,360,974,403,731,191,356,424,062,968,635,412,342,912
-175 12,733,156,638,012,185,932,946,700,793,794,047,351,220,738,840
-176 23,419,926,356,363,090,361,623,014,006,986,764,311,664,284,817
-177 43,075,960,355,349,680,025,761,071,224,843,780,298,297,366,569
-178 79,229,043,349,724,956,320,330,786,025,624,591,961,182,390,226
-179 145,724,930,061,437,726,707,714,871,257,455,136,571,144,041,612
-180 268,029,933,766,512,363,053,806,728,507,923,508,830,623,798,407
-181 492,983,907,177,675,046,081,852,385,791,003,237,362,950,230,245
-182 906,738,771,005,625,135,843,373,985,556,381,882,764,718,070,264
-183 1,667,752,611,949,812,544,979,033,099,855,308,628,958,292,098,916
-184 3,067,475,290,133,112,726,904,259,471,202,693,749,085,960,399,425
-185 5,641,966,673,088,550,407,726,666,556,614,384,260,808,970,568,605
-186 10,377,194,575,171,475,679,609,959,127,672,386,638,853,223,066,946
+```perl
+sub is_square {
+ my @pts = @_;
+ my %dist;
+ while(@pts>1) {
+ my $p = shift @pts;
+ $dist{ ($p->[0]-$_->[0]) ** 2 + ($p->[1]-$_->[1]) ** 2 }++ foreach @pts;
+ }
+ my %flip = reverse %dist;
+ return exists $flip{2} && exists $flip{4} && $flip{2} == 2*$flip{4} || 0;
+}
```