aboutsummaryrefslogtreecommitdiff
path: root/challenge-083
diff options
context:
space:
mode:
authorAbigail <abigail@abigail.be>2020-10-25 22:20:14 +0100
committerAbigail <abigail@abigail.be>2020-10-25 22:20:14 +0100
commit80c29fadbdf077676de4b4989cd66ae20263c2db (patch)
tree9d675cf18b9d7de2b9d828d94d5d6bc10afbc0f5 /challenge-083
parent67ca64c525c64f0cf4a7f0771bab30a8b2e8ab31 (diff)
downloadperlweeklychallenge-club-80c29fadbdf077676de4b4989cd66ae20263c2db.tar.gz
perlweeklychallenge-club-80c29fadbdf077676de4b4989cd66ae20263c2db.tar.bz2
perlweeklychallenge-club-80c29fadbdf077676de4b4989cd66ae20263c2db.zip
Describe the algorithm and optimizations.
Diffstat (limited to 'challenge-083')
-rw-r--r--challenge-083/abigail/perl/ch-2.pl163
1 files changed, 121 insertions, 42 deletions
diff --git a/challenge-083/abigail/perl/ch-2.pl b/challenge-083/abigail/perl/ch-2.pl
index e580d5ac2d..8561ed162d 100644
--- a/challenge-083/abigail/perl/ch-2.pl
+++ b/challenge-083/abigail/perl/ch-2.pl
@@ -23,35 +23,112 @@ use experimental 'lexical_subs';
#
#
-# This looks like an NP-complete program. 2-partition, where we're
-# asked whether we can partition a set of integers into two sets with
-# equal sums is NP-complete. That means, if you flip the signs of one
-# of the sets, and add all numbers, the result is 0, which is minimal.
+# This is very similar to an NP-hard problem: partitioning a set into
+# two, so that the difference of the sums is minimal is NP-hard. And
+# this is equal to finding sign flips so the resulting sum is as close
+# to zero as possible.
#
#
-# We solve this by using a stack of states. Each state means we're at
-# some point processing @$set. We're keeping a running sum, by either
-# adding or substracting the current number. We also keep a best score
-# so far (best sum: smallest non-negative sum; best flips: least amount
-# of flips to reach best sum). Each time we have processed the entire
-# set, we see whether we have a better score. If we haven't reached
-# the end of the set, we push two new states on the stack, one where
-# we add to the running sum, and one where we subtract from the running
-# sum. In the latter case, we increment the number of flips.
+# Some terminology:
+# - score: Sum of a set of numbers, with zero or more signs flipped.
+# - set: The given array.
+# - N: The number of elements in set.
#
-# To optimize, if we can early determine we can no longer reach a valid
-# sum (that is, all future choices in this branch lead to negative sums),
-# or if we cannot improve the current best score (that is, all future
-# choices in this branch lead to a sum which is worse than the best sum)
-# we return early, and don't push new states.
+
+#
+# Consider the search space of the possible solutions, that is, all
+# the possible scores we can achieve by flipping zero or more signs
+# of the set.
+#
+# We can visualize this as a complete binary tree of depth N + 1.
+# Each node on depth i <= N corresponds to the i-th number in the set.
+# The subtree on the left corresponds to adding the i-th number to
+# the score, the subtree on the right corresponds to subtracting the
+# i-th number from the score.
+# In each node we will have two numbers: the score (so far) we would get
+# if we'd follow the path from the root to this node, and the number of
+# signs we flipped along that path (that is, the number of times we
+# decended into a right subtree.
+# Leaf nodes correspond to having calculated a score for the entire set,
+# the one with the lowest, non-negative, score wins, with ties broken
+# by the least amount of flips.
+#
+# We can implement a standard stack based algorithm to do an in-order
+# traversal of the tree, calculating (but not storing) the numbers in
+# each node along the way. We visit each leaf node that way, and it is
+# then a simple matter of finding the best score/least number of flips.
+#
+# But given an example set
+#
+# S = [20, 20, 20, 19, 18, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
+#
+# this requires visiting 8,388,607 nodes.
+#
+# We can do better.
+#
+# Imagine we're traversing the tree, and we're now at some node at depth j.
+# Let the score at this node be P, and the number of flips be F. We can
+# then, without further traversing the tree, quickly calculate the maximum
+# and minimum score we can reach from this subtree. Let Q be the sum of the
+# numbers in the set with index >= j. Then the maximum score we can reach
+# is P + Q, and the minimum P - Q. If P + Q is negative, there is no need
+# to traverse the subtree under the current node, as all posible scores will
+# be negative. Similar, if P - Q exceeds the current best score (or if
+# P - Q equals the best score, but F + N - j + 1 exceeds to best number of
+# flips) nothing in the subtree under the current tree can improve the best
+# score found so far.
+#
+# In all of these cases, we can stop descending the tree, and backtrack.
+#
+# In order to quickly find sums of subtrees, we preprocess the set, and
+# create an array partial_sums. $partial_sums [$j] contains the sum of
+# the numbers @set [$j .. $#set].
+#
+# Applying this to the example set S requires visiting only 1265 nodes.
+# Quite an improvement!
+#
+# But we still can do better.
+#
+# It may take a while before we have found a decent "best score". If we
+# can start with a good estimate of the winning numbers, we can back out
+# of the search more often. So, we first apply a greedy algorithm: we
+# sort the set (largest to smallest), and iterate over the set, keeping
+# running score. If we can subtract the current number from the running
+# score, without the running score going negative, we subtract. Else, we
+# add. And, of course, we tally the amount of times we subtracted.
+#
+# Applying this to S means we only visit 141 nodes.
+#
+# But there is more.
+#
+# Imagine the set contains a number multiple times. For instance, our
+# example set S contains the number 20 three times. There are 8 different
+# ways to flip its signs:
+#
+# 20 20 20
+# 20 20 -20
+# 20 -20 20
+# 20 -20 -20
+# -20 20 20
+# -20 20 -20
+# -20 -20 20
+# -20 -20 -20
+#
+# But there are only four different sums: 60, 20, -20, -60. All that
+# matters is of how many numbers the sign has been flipped.
+# If we're keeping track of the last number we flipped on the current
+# path (that is, the last time we decended into a right subtree),
+# and don't go left if the number associated with the current node
+# equals the number last flipped, we only follow unique path.
+# The set of sequences above reduces to
+#
+# 20 20 20
+# 20 20 -20
+# 20 -20 -20
+# -20 -20 -20
#
-# As a final optimization, we also put the last number whose sign was
-# flipped into the state. We use this to prevent pushing cases on
-# the stack where the we add the current number, and the current number
-# is equal to the last number whose sign was flipped. This makes that
-# we process runs of equal number far more efficiently, reducing the
-# amount of generated cases from exponential to linear.
+# Now, with the example set S, we're down to visiting only 52 nodes.
#
@@ -62,17 +139,17 @@ my $set = [<> =~ /[0-9]+/g];
$set = [sort {$b <=> $a} @$set];
#
-# Initialize the best sums and best flips, by using a greedy algorithm.
+# Initialize the best score and best flips, by using a greedy algorithm.
#
-my $best_sum = 0;
+my $best_score = 0;
my $best_flips = 0;
foreach my $number (@$set) {
- if ($best_sum - $number < 0) {
- $best_sum += $number;
+ if ($best_score - $number < 0) {
+ $best_score += $number;
}
else {
- $best_sum -= $number;
+ $best_score -= $number;
$best_flips ++;
}
}
@@ -91,31 +168,32 @@ for (my $i = @$set; $i --;) {
}
#
-# @todo will contain 4-tuples [$index, $sum, $flips, $last_flipped],
+# @todo will contain 4-tuples [$index, $score, $flips, $last_flipped],
# each encoding a state.
#
# - $index: The current index
-# - $sum: Sum of the numbers @$set [0 .. $index - 1], with
+# - $score: Sum of the numbers @$set [0 .. $index - 1], with
# zero of signs flipped.
-# - $flips: The number of signs which have been flipped to reach $sum.
+# - $flips: The number of signs which have been flipped to
+# reach $score.
# - $last_flipped: The last number whose sign we have flipped.
#
my @todo = [0, 0, 0, 0];
while (@todo) {
- my ($index, $sum, $flips, $last_flipped) = @{pop @todo};
+ my ($index, $score, $flips, $last_flipped) = @{pop @todo};
#
- # We can't reach a positive sum, so no need to continue this branch.
+ # We can't reach a positive score, so no need to continue this branch.
#
- if ($sum + $$partial_sums [$index] < 0) {
+ if ($score + $$partial_sums [$index] < 0) {
next;
}
#
# If we can't improve on the current best score, no need to continue.
#
- if ($sum - $$partial_sums [$index] > $best_sum ||
- $sum - $$partial_sums [$index] == $best_sum &&
+ if ($score - $$partial_sums [$index] > $best_score ||
+ $score - $$partial_sums [$index] == $best_score &&
$flips + @$set - $index >= $best_flips) {
next;
}
@@ -124,12 +202,13 @@ while (@todo) {
#
# We have exhausted the set. Do we have a better result?
#
- if ($sum >= 0 &&
- ($sum < $best_sum || $sum == $best_sum && $flips < $best_flips)) {
+ if ($score >= 0 &&
+ ($score < $best_score ||
+ $score == $best_score && $flips < $best_flips)) {
#
# If so, update the score.
#
- ($best_sum, $best_flips) = ($sum, $flips);
+ ($best_score, $best_flips) = ($score, $flips);
}
next;
}
@@ -138,14 +217,14 @@ while (@todo) {
# Push the case where we are subtracting on the stack.
#
my $number = $$set [$index];
- push @todo => [$index + 1, $sum - $number, $flips + 1, $number];
+ push @todo => [$index + 1, $score - $number, $flips + 1, $number];
#
# Push the case where we are adding on the stack, but
# not if the current number equals the last number whose
# sign was flipped.
#
- push @todo => [$index + 1, $sum + $number, $flips, $last_flipped]
+ push @todo => [$index + 1, $score + $number, $flips, $last_flipped]
unless $last_flipped == $number;
}