Describe the algorithm and optimizations.

author: Abigail <abigail@abigail.be> 2020-10-25 22:20:14 +0100
committer: Abigail <abigail@abigail.be> 2020-10-25 22:20:14 +0100
commit: 80c29fadbdf077676de4b4989cd66ae20263c2db (patch)
tree: 9d675cf18b9d7de2b9d828d94d5d6bc10afbc0f5 /challenge-083
parent: 67ca64c525c64f0cf4a7f0771bab30a8b2e8ab31 (diff)
download: perlweeklychallenge-club-80c29fadbdf077676de4b4989cd66ae20263c2db.tar.gz
perlweeklychallenge-club-80c29fadbdf077676de4b4989cd66ae20263c2db.tar.bz2
perlweeklychallenge-club-80c29fadbdf077676de4b4989cd66ae20263c2db.zip
1 files changed, 121 insertions, 42 deletions
diff --git a/challenge-083/abigail/perl/ch-2.pl b/challenge-083/abigail/perl/ch-2.pl
index e580d5ac2d..8561ed162d 100644
--- a/challenge-083/abigail/perl/ch-2.pl
+++ b/challenge-083/abigail/perl/ch-2.pl
@@ -23,35 +23,112 @@ use experimental 'lexical_subs';
 #
 
 #
-# This looks like an NP-complete program. 2-partition, where we're
-# asked whether we can partition a set of integers into two sets with
-# equal sums is NP-complete. That means, if you flip the signs of one
-# of the sets, and add all numbers, the result is 0, which is minimal.
+# This is very similar to an NP-hard problem: partitioning a set into
+# two, so that the difference of the sums is minimal is NP-hard. And
+# this is equal to finding sign flips so the resulting sum is as close
+# to zero as possible.
 # 
 
 #
-# We solve this by using a stack of states. Each state means we're at
-# some point processing @$set. We're keeping a running sum, by either
-# adding or substracting the current number. We also keep a best score
-# so far (best sum: smallest non-negative sum; best flips: least amount
-# of flips to reach best sum). Each time we have processed the entire
-# set, we see whether we have a better score. If we haven't reached
-# the end of the set, we push two new states on the stack, one where
-# we add to the running sum, and one where we subtract from the running
-# sum. In the latter case, we increment the number of flips.
+# Some terminology:
+#   - score:  Sum of a set of numbers, with zero or more signs flipped.
+#   - set:    The given array.
+#   - N:      The number of elements in set.
 #
-# To optimize, if we can early determine we can no longer reach a valid
-# sum (that is, all future choices in this branch lead to negative sums),
-# or if we cannot improve the current best score (that is, all future
-# choices in this branch lead to a sum which is worse than the best sum)
-# we return early, and don't push new states.
+
+#
+# Consider the search space of the possible solutions, that is, all
+# the possible scores we can achieve by flipping zero or more signs
+# of the set.
+#
+# We can visualize this as a complete binary tree of depth N + 1.
+# Each node on depth i <= N corresponds to the i-th number in the set.
+# The subtree on the left corresponds to adding the i-th number to 
+# the score, the subtree on the right corresponds to subtracting the
+# i-th number from the score.
+# In each node we will have two numbers: the score (so far) we would get
+# if we'd follow the path from the root to this node, and the number of
+# signs we flipped along that path (that is, the number of times we
+# decended into a right subtree.
+# Leaf nodes correspond to having calculated a score for the entire set,
+# the one with the lowest, non-negative, score wins, with ties broken
+# by the least amount of flips.
+#
+# We can implement a standard stack based algorithm to do an in-order
+# traversal of the tree, calculating (but not storing) the numbers in
+# each node along the way. We visit each leaf node that way, and it is
+# then a simple matter of finding the best score/least number of flips.
+#
+# But given an example set
+#
+#   S = [20, 20, 20, 19, 18, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
+#
+# this requires visiting 8,388,607 nodes.
+#
+# We can do better.
+#
+# Imagine we're traversing the tree, and we're now at some node at depth j.
+# Let the score at this node be P, and the number of flips be F. We can
+# then, without further traversing the tree, quickly calculate the maximum
+# and minimum score we can reach from this subtree. Let Q be the sum of the
+# numbers in the set with index >= j. Then the maximum score we can reach
+# is P + Q, and the minimum P - Q. If P + Q is negative, there is no need
+# to traverse the subtree under the current node, as all posible scores will
+# be negative. Similar, if P - Q exceeds the current best score (or if
+# P - Q equals the best score, but F + N - j + 1 exceeds to best number of
+# flips) nothing in the subtree under the current tree can improve the best
+# score found so far.
+#
+# In all of these cases, we can stop descending the tree, and backtrack.
+#
+# In order to quickly find sums of subtrees, we preprocess the set, and
+# create an array partial_sums. $partial_sums [$j] contains the sum of
+# the numbers @set [$j .. $#set].
+#
+# Applying this to the example set S requires visiting only 1265 nodes.
+# Quite an improvement!
+#
+# But we still can do better.
+#
+# It may take a while before we have found a decent "best score". If we
+# can start with a good estimate of the winning numbers, we can back out
+# of the search more often. So, we first apply a greedy algorithm: we
+# sort the set (largest to smallest), and iterate over the set, keeping
+# running score. If we can subtract the current number from the running
+# score, without the running score going negative, we subtract. Else, we
+# add. And, of course, we tally the amount of times we subtracted.
+#
+# Applying this to S means we only visit 141 nodes.
+#
+# But there is more.
+#
+# Imagine the set contains a number multiple times. For instance, our
+# example set S contains the number 20 three times. There are 8 different
+# ways to flip its signs:
+#
+#     20   20   20
+#     20   20  -20
+#     20  -20   20
+#     20  -20  -20
+#    -20   20   20
+#    -20   20  -20
+#    -20  -20   20
+#    -20  -20  -20
+#
+# But there are only four different sums: 60, 20, -20, -60. All that
+# matters is of how many numbers the sign has been flipped.
+# If we're keeping track of the last number we flipped on the current
+# path (that is, the last time we decended into a right subtree),
+# and don't go left if the number associated with the current node
+# equals the number last flipped, we only follow unique path.
+# The set of sequences above reduces to
+#
+#     20   20   20
+#     20   20  -20
+#     20  -20  -20
+#    -20  -20  -20
 #
-# As a final optimization, we also put the last number whose sign was
-# flipped into the state. We use this to prevent pushing cases on
-# the stack where the we add the current number, and the current number
-# is equal to the last number whose sign was flipped. This makes that
-# we process runs of equal number far more efficiently, reducing the
-# amount of generated cases from exponential to linear.
+# Now, with the example set S, we're down to visiting only 52 nodes.
 #
 
 
@@ -62,17 +139,17 @@ my $set = [<> =~ /[0-9]+/g];
    $set = [sort {$b <=> $a} @$set];
 
 #
-# Initialize the best sums and best flips, by using a greedy algorithm.
+# Initialize the best score and best flips, by using a greedy algorithm.
 #
-my $best_sum   = 0;
+my $best_score = 0;
 my $best_flips = 0;
 
 foreach my $number (@$set) {
-    if ($best_sum -  $number < 0) {
-        $best_sum += $number;
+    if ($best_score -  $number < 0) {
+        $best_score += $number;
     }
     else {
-        $best_sum -= $number;
+        $best_score -= $number;
         $best_flips ++;
     }
 }
@@ -91,31 +168,32 @@ for (my $i = @$set; $i --;) {
 }
 
 #
-# @todo will contain 4-tuples [$index, $sum, $flips, $last_flipped],
+# @todo will contain 4-tuples [$index, $score, $flips, $last_flipped],
 # each encoding a state.
 #
 #    - $index:        The current index
-#    - $sum:          Sum of the numbers @$set [0 .. $index - 1], with
+#    - $score:        Sum of the numbers @$set [0 .. $index - 1], with
 #                     zero of signs flipped.
-#    - $flips:        The number of signs which have been flipped to reach $sum.
+#    - $flips:        The number of signs which have been flipped to
+#                     reach $score.
 #    - $last_flipped: The last number whose sign we have flipped.
 #
 my @todo = [0, 0, 0, 0];
 while (@todo) {
-    my ($index, $sum, $flips, $last_flipped) = @{pop @todo};
+    my ($index, $score, $flips, $last_flipped) = @{pop @todo};
 
     #
-    # We can't reach a positive sum, so no need to continue this branch.
+    # We can't reach a positive score, so no need to continue this branch.
     #
-    if ($sum + $$partial_sums [$index] < 0) {
+    if ($score + $$partial_sums [$index] < 0) {
         next;
     }
 
     #
     # If we can't improve on the current best score, no need to continue.
     #
-    if ($sum - $$partial_sums [$index] >  $best_sum ||
-        $sum - $$partial_sums [$index] == $best_sum &&
+    if ($score - $$partial_sums [$index] >  $best_score ||
+        $score - $$partial_sums [$index] == $best_score &&
               $flips + @$set - $index  >= $best_flips) {
         next;
     }
@@ -124,12 +202,13 @@ while (@todo) {
         #
         # We have exhausted the set. Do we have a better result?
         #
-        if ($sum >= 0 &&
-               ($sum < $best_sum || $sum == $best_sum && $flips < $best_flips)) {
+        if ($score >= 0 &&
+               ($score < $best_score ||
+                $score == $best_score && $flips < $best_flips)) {
             #
             # If so, update the score.
             #
-            ($best_sum, $best_flips) = ($sum, $flips);
+            ($best_score, $best_flips) = ($score, $flips);
         }
         next;
     }
@@ -138,14 +217,14 @@ while (@todo) {
     # Push the case where we are subtracting on the stack.
     #
     my $number = $$set [$index];
-    push @todo => [$index + 1, $sum - $number, $flips + 1, $number];
+    push @todo => [$index + 1, $score - $number, $flips + 1, $number];
 
     #
     # Push the case where we are adding on the stack, but
     # not if the current number equals the last number whose
     # sign was flipped.
     #
-    push @todo => [$index + 1, $sum + $number, $flips,     $last_flipped]
+    push @todo => [$index + 1, $score + $number, $flips,     $last_flipped]
           unless $last_flipped == $number;
 }
author	Abigail <abigail@abigail.be>	2020-10-25 22:20:14 +0100
committer	Abigail <abigail@abigail.be>	2020-10-25 22:20:14 +0100
commit	80c29fadbdf077676de4b4989cd66ae20263c2db (patch)
tree	9d675cf18b9d7de2b9d828d94d5d6bc10afbc0f5 /challenge-083
parent	67ca64c525c64f0cf4a7f0771bab30a8b2e8ab31 (diff)
download	perlweeklychallenge-club-80c29fadbdf077676de4b4989cd66ae20263c2db.tar.gz perlweeklychallenge-club-80c29fadbdf077676de4b4989cd66ae20263c2db.tar.bz2 perlweeklychallenge-club-80c29fadbdf077676de4b4989cd66ae20263c2db.zip