- Added solutions by Colin Crain.

author: Mohammad S Anwar <mohammad.anwar@yahoo.com> 2022-07-10 18:49:27 +0100
committer: Mohammad S Anwar <mohammad.anwar@yahoo.com> 2022-07-10 18:49:27 +0100
commit: 3ab8077572bab376dce07950aeb7bcb0331528ee (patch)
tree: bfe26c8ff9f2d93c8caf68b7fabb506335ebd7ed /challenge-172
parent: 54432b8360c5c63a4634657360b2d35f98931c0c (diff)
download: perlweeklychallenge-club-3ab8077572bab376dce07950aeb7bcb0331528ee.tar.gz
perlweeklychallenge-club-3ab8077572bab376dce07950aeb7bcb0331528ee.tar.bz2
perlweeklychallenge-club-3ab8077572bab376dce07950aeb7bcb0331528ee.zip
3 files changed, 314 insertions, 0 deletions
diff --git a/challenge-172/colin-crain/blog.txt b/challenge-172/colin-crain/blog.txt
new file mode 100644
index 0000000000..cea4301c54
--- /dev/null
+++ b/challenge-172/colin-crain/blog.txt
@@ -0,0 +1 @@
+https://colincrain.com/2022/07/10/five-finger-stat-punch
diff --git a/challenge-172/colin-crain/perl/ch-1.pl b/challenge-172/colin-crain/perl/ch-1.pl
new file mode 100755
index 0000000000..7013c0b1a7
--- /dev/null
+++ b/challenge-172/colin-crain/perl/ch-1.pl
@@ -0,0 +1,89 @@
+#!/Users/colincrain/perl5/perlbrew/perls/perl-5.32.0/bin/perl
+#
+#       partings-in-the-prime-of-life.pl
+#
+#       Prime Partition
+#         Submitted by: Mohammad S Anwar
+#         You are given two positive integers, $m and $n.
+# 
+#         Write a script to find out the Prime Partition of the given number.
+#         No duplicates allowed.
+# 
+#         For example,
+# 
+#             Input: $m = 18, $n = 2
+#             Output: 5, 13 or 7, 11
+#     
+#             Input: $m = 19, $n = 3
+#             Output: 3, 5, 11
+
+#         method:
+# 
+#         A decidedly non trivial problem for a first task this week!
+# 
+#         Prime partitioning is a decidely complex math problem, and it
+#         seems the only easily-managed, albeit computationallyexpensive
+#         solution is to investigate all patterns of or the list of primes
+#         smaller than the target number to see whether they can be summed
+#         to resolve correctly.
+# 
+#         As the resulting lists will be of largely-unknownn and of variable
+#         lengths, a recursive solution is warranted. A prime is
+#         added to a partial list, a partial sum is accumulated, and then
+#         the partial pattern and sum are handed to the routine again, this
+#         time searching for a new target properly reduced by the prime
+#         that was added.
+# 
+
+#
+#       © 2022 colin crain
+## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ##
+
+
+
+use warnings;
+use strict;
+use utf8;
+use feature ":5.26";
+use feature qw(signatures);
+no warnings 'experimental::signatures';
+
+use List::Util qw( sum0 );
+
+my  $target = shift @ARGV // 19;
+
+our @parts;
+my  @prime_list = sieve( $target );
+
+pparts( $target, 0, \@prime_list, [] );
+
+say "$_->@*" for @parts;
+
+
+## subs
+
+sub pparts( $target, $sum, $primes, $try ) {
+## recursive partions using only prime values from given list
+## alters global @parts array to record results
+    push @parts, $try and return if $sum == $target;
+    return                       if $primes->@* == 0; 
+    
+    for my $p ( $primes->@* ) {
+        my $new_sum     = $sum + $p;
+        my @new_primes  = grep { 
+                              $p < $_ && $new_sum + $_ <= $target 
+                          } $primes->@*;
+        pparts( $target, $new_sum, \@new_primes, [$try->@*, $p]);
+    }
+}
+
+sub sieve ($limit) {
+##  sieve of Eratosthenes
+    my @s = (1) x $limit;
+    @s[0,1] = (0,0);    
+    for my $f (2..sqrt($limit)) {    
+        $s[$f * $_] = 0 for $f..$limit/$f;
+    }
+    return grep { $s[$_] } (0..$limit);
+}
+
diff --git a/challenge-172/colin-crain/perl/ch-2.pl b/challenge-172/colin-crain/perl/ch-2.pl
new file mode 100755
index 0000000000..e6db3669a8
--- /dev/null
+++ b/challenge-172/colin-crain/perl/ch-2.pl
@@ -0,0 +1,224 @@
+#!/Users/colincrain/perl5/perlbrew/perls/perl-5.32.0/bin/perl
+#
+#       five-finger-stat-punch.pl
+#
+#       Five-number Summary
+#         Submitted by: Mohammad S Anwar
+#         You are given an array of integers.
+# 
+#         Write a script to compute the five-number summary of the given
+#         set of integers.
+# 
+#         You can find the definition and example in the 
+#         [wikipedia page](https://en.wikipedia.org/wiki/Five-number_summary).
+# 
+#       BACKGROUND
+# 
+#         In statistics, mathematical analysis is performed on what are
+#         large datasets. In this analysis, individual data points are
+#         deemphasized, and we look beyond the individual values in an
+#         effort to infer general statements about the collection as  a
+#         whole. The data itself is usually empirical in origin, gathered
+#         from real-world phenomena, and as such are always subject to some
+#         degree of collection or recording error. Much of statistical
+#         analysis addresses the precision within the dataset, assigning
+#         numerical weights to the confidence in the values, which can be
+#         mathematically extrapolated as a caveat onto any conclusions
+#         made. The analysis, after all, can only be as good as the data it
+#         comes from.
+# 
+#         Given some collection of data, when seeking a normal,
+#         representative, middle value, it might not be immediately obvious
+#         that this idea can be approached in more than one way. There are
+#         in fact quite a few. We could for instance find an arithmetic
+#         average, a mean value, summing and dividing, but that is only one
+#         of several mean values available to us.  We explored this idea in
+#         PWC157, calculating the Pythagorean Means. Ultimately the
+#         derivation of the data itself determines the best fit for the
+#         job.
+# 
+#         But even the average, an average, isn't necessarily meaningfully
+#         representative. Another thing we could look at is what value
+#         occurs most often. To take an extreme case, if the data isn't
+#         numerically related at all this can be a better way to summarize,
+#         obviously. If one number keeps showing up there is presumably
+#         some reason to be inferred.
+# 
+#         One somewhat radical approach to select a representative sample
+#         is to order the data and take the literal middle value.
+#         Arithmetically this doesn't make much sense, but statistically it
+#         does, as one unexpected benefit of this method is its remarkable
+#         resiliency against spurious, outlier data points.
+# 
+#         Given a collection of values of human heights, one might expect a
+#         range between extremes,  with babies at maybe 30cm and large
+#         adults at somewhat over 200cm. Averaging all of these might tell
+#         you something, but I'm not sure what. Depending on how many
+#         babies are involved the number might be around say, 150cm?  Just
+#         guessing here. However a single data entry error for the
+#         kilometers traveled on business for a year: 5000km, would not
+#         only make that average useless but also poison any further use of
+#         the value downstream. Using a median, though, the bad outlier
+#         data would only move the central element slightly, with about
+#         equal probability up or down. Our monsterous error has been
+#         transformed into a slight loss of precision.
+# 
+#         The quartitles are an extension of this data-levelling idea into
+#         two more subdivisions on either side of the median, to slot in
+#         and combine with the median to provide points at 25%, 50%, and
+#         75%. Exactly how these points are arrived at is not universally
+#         decided.
+# 
+#         The problem arrives when we wish to  take the median of one-half
+#         the data. If the whole set has a discrete median, do we include
+#         that when calculating one-half the data? Including it will make
+#         each computed half slightly larger than a real half, and not
+#         including it will make the cut smaller. There are more exact
+#         methods to accommodate more even division but we have to stop and
+#         consider the imprecision of the median itself before we make
+#         things more unnecessarily more complicated. Uncertainty carries
+#         over through the calculations, and the median is not the mean.
+# 
+#         It should be added that should we require a rigorous mathematical
+#         certainty there are well-defined approaches to looking at the way
+#         data is distributed within a range, such as the standard
+#         deviation. So we have that if we want it. On the other hand if
+#         we're looking for the middle, a representative low number, a
+#         representative high number, and a set of outer boundaries that
+#         all the data lies inside, then the 5-number summary has proven to
+#         be a pretty practical start at getting the feel of a dataset �
+#         arithmetically imperfect as it may be.
+# 
+#       METHOD
+# 
+#         Depending on whether the number of elements under consideration
+#         is even or odd, to find the median we either select the center
+#         element from the list or average the two elements either side of
+#         the center. In this simplest case of an arithmetic mean we add
+#         the two values and divide by 2.
+# 
+#         Then, depending on whether the left and right � equal � sides
+#         around the median have an even or odd count we need to again
+#         select either a discrete center element or average the two
+#         surrounding.
+# 
+#         Because again there oddly is no rigorous consistency in defining
+#         the calculation of the 1st and 3rd quartiles, and these differing
+#         approaches result in differing outcomes, we will need to pick
+#         one. We will use the common method of halving the array if the
+#         median point lies between two values, and if the median is a
+#         discreet element not including it in the half-array length.
+# 
+#         We are left with four cases to evaluate, for each of the four
+#         combinations of even and odd numbers of data points in both the
+#         full dataset and our chosen determined half-size.
+# 
+#         I still find it exceedingly odd that the method for determining
+#         the 25% and 75% medians, if you will, are not well-defined.
+#         However in the statistical context of the summary, a closer look
+#         suggests an essentially parallel ambiguity in what we want from
+#         our middle value. So in a weird way the ambiguity is consistent.
+# 
+#         After all, the statistics are based on the median, not the mean.
+
+#       � 2022 colin crain
+## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ##
+
+
+
+use warnings;
+use strict;
+use utf8;
+use feature ":5.26";
+use feature qw(signatures);
+no warnings 'experimental::signatures';
+
+
+my @arr = @ARGV;
+
+## default data array
+@arr == 0 and @arr = (1,2,3,4,5,6,7,8,9);
+
+
+## main
+@arr = sort { $a <=> $b } @arr;
+say " Data Values: [ @arr ]\n";
+my @five = ( $arr[0], quartiles(\@arr), $arr[-1] );
+my @names = ( "Minimum", "1st Quartile", "Median", "3rd Quartile", "Maximum" );
+say sprintf "%12s: %-s", $names[$_], $five[$_] for (0..4);
+
+
+## subs
+sub quartiles ( $arr ) {
+    my $count = scalar $arr->@*;
+    my $half  = int($count/2);          ## note integer truncation
+    
+    my ($q1, $q2, $q3);
+    if ( $count & 1 ) {                     ## odd num elements
+        $q2 = $arr->[$half];
+        if ( $half & 1 ) {                          ## odd halfway index
+            $q1 = $arr->[$half/2];
+            $q3 = $arr->[$half + $half/2 + 1];
+        }
+        else {                                      ## odd halfway index
+            $q1 = avg($arr->[$half/2-1], $arr->[$half/2]);
+            $q3 = avg($arr->[$half+$half/2], $arr->[$half+$half/2+1]);
+       }
+    }
+    else {                                  ## even num elements
+        $q2 = avg($arr->[$half-1], $arr->[$half]);
+        if ( $half & 1 ) {                          ## odd halfway index
+            $q1 = $arr->[$half/2];
+            $q3 = $arr->[$half + $half/2];
+        }
+        else {                                      ## even halfway index 
+            $q1 = avg($arr->[$half/2-1], $arr->[$half/2]);
+            $q3 = avg($arr->[$half+$half/2-1], $arr->[$half+$half/2]);
+        }
+    }
+    
+    return ( $q1, $q2, $q3 );
+}
+
+sub avg( $n1, $n2 ) {
+    return ($n1 + $n2)/2;
+}
+
+=cut
+
+the four cases, broken down: 
+
+(array length, computed half-array length)
+
+ODD,EVEN
+[0, 1, 2, 3,    4,    5, 6, 7, 8]           
+count = 9
+half  = 4
+q2 = half
+q1 = (1+2)/2 = avg(half/2-1, half/2)
+q3 = (6+7)/2 = avg(half/2+half, half/2+half+1)
+
+EVEN,EVEN
+[0, 1, 2, 3,    4, 5, 6, 7]                 
+count = 8
+half  = 4
+q2 = avg(half-1,half)
+q1=(1+2)/2 = avg(half/2-1, half/2)
+q3=(5+6)/2 = avg(half/2+half-1, half/2+half)
+ 
+ODD,ODD
+[0, 1, 2,    3,    4, 5, 6]                 
+count = 7
+half  = 3
+q2 = 3 = half
+q1 = 1 = half/2
+q3 = 5 = half + half/2 + 1
+
+EVEN,ODD
+[0, 1, 2,     3, 4, 5]                      
+count = 6
+half  = 3
+q2 = avg(half-1,half) 
+q1 = 1 = half/2
+q3 = 4 = half + half/2
+
author	Mohammad S Anwar <mohammad.anwar@yahoo.com>	2022-07-10 18:49:27 +0100
committer	Mohammad S Anwar <mohammad.anwar@yahoo.com>	2022-07-10 18:49:27 +0100
commit	3ab8077572bab376dce07950aeb7bcb0331528ee (patch)
tree	bfe26c8ff9f2d93c8caf68b7fabb506335ebd7ed /challenge-172
parent	54432b8360c5c63a4634657360b2d35f98931c0c (diff)
download	perlweeklychallenge-club-3ab8077572bab376dce07950aeb7bcb0331528ee.tar.gz perlweeklychallenge-club-3ab8077572bab376dce07950aeb7bcb0331528ee.tar.bz2 perlweeklychallenge-club-3ab8077572bab376dce07950aeb7bcb0331528ee.zip