From d8de821d746fef18220667a46c3818427b6ae675 Mon Sep 17 00:00:00 2001
From: James Smith <baggy@baggy.me.uk>
Date: Mon, 30 Aug 2021 09:56:03 +0100
Subject: Update README.md

---
 challenge-128/james-smith/README.md | 155 ++++++++++++++++++++++++++----------
 1 file changed, 114 insertions(+), 41 deletions(-)

diff --git a/challenge-128/james-smith/README.md b/challenge-128/james-smith/README.md
index 10adb6a346..66d70d3fa2 100644
--- a/challenge-128/james-smith/README.md
+++ b/challenge-128/james-smith/README.md
@@ -1,4 +1,4 @@
-# Perl Weekly Challenge #127
+# Perl Weekly Challenge #128
 
 You can find more information about this weeks, and previous weeks challenges at:
 
@@ -10,74 +10,147 @@ submit solutions in whichever language you feel comfortable with.
 
 You can find the solutions here on github at:
 
-https://github.com/drbaggy/perlweeklychallenge-club/tree/master/challenge-127/james-smith/perl
+https://github.com/drbaggy/perlweeklychallenge-club/tree/master/challenge-128/james-smith/perl
 
-# Task 1 -  Disjoint Sets
+# Task 1 -  Maximum Sub-Matrix
 
-***You are given two sets with unique integers. Write a script to figure out if they are disjoint.***
+***You are given m x n binary matrix having 0 or 1 in each cell. Write a script to find out maximum sub-matrix having only 0***
+
+## Ambiguity
+
+There may be multiple solutions (e.g. in Example 1 depending on how you write the algorithm) so rather than returning the matrix - my tests will be on the area of the matrix
 
 ## The solution
 
-This is a rewording of simple perl, which is to see if there any elements of set 1 in set 2. To avoid nested loops, we use the efficiency of perl's hash key search to search for members. So for set 1 we create a hash who's keys are the members of set 1. We then search through set 2 to see which members are keys in this hash (and so in both sets). If there are any we return 0 o/w return 1.
+Initialy this looks like an `O(n^4)` problem - you would need to scan the area to the right and below `O(n^2)` for a given cell `O(n^2)`. But with a bit of preprocessing - we can remove at least one of these loops - so the challenge becomes `O(n^3)`.
+
+### Preprocessing the matrix.
+
+To remove the inner loop - we can pre-compute this - the number of 0s in a continuouse line starting at the point and going right. For the first matrix we get
+
+```
+   [ 1 0 0 0 1 0 ]      [ 0 3 2 1 0 1 ]
+   [ 1 1 0 0 0 1 ]   -> [ 0 0 3 2 1 0 ]
+   [ 1 0 0 0 0 0 ]      [ 0 5 4 3 2 1 ]
+```
+
+The code that does that is this....
 
 ```perl
-sub disjoint_sets {
-  my %m = map { $_=>1 } @{$_[0]};
-  return grep( { $m{$_} } @{$_[1]}) ? 0 : 1;
-}
+  ## Last column 1s become 0s, 0s become 1s
+  my @runs = map { [1 - $_->[-1]] } @rows;
+
+  ## Remaining columns we are working backwards along the rows
+  ## Column is 0 if the matrix contains a 1 - o/w it is 1 more
+  ## than the cell to the right (which is the first cell in the row)
+  ## we use unshift to extend each row left...
+  foreach my $i (reverse 0..$w-1) {
+    unshift @{$runs[$_]}, $rows[$_][$i] ? 0 : $runs[$_][0]+1 foreach 0..$h;
+  }
 ```
 
-# Task 2 - Conflict Intervals
+We then have the `O(n^3)` to find the maximum area.
 
-***You are given a list of intervals. Write a script to find out if the current interval conflicts with any of the previous intervals.***
+For each cell we work out the maximum area of any rectangle.
 
-## Background
+For the first row it is just `$run[$y][$x]`. For subsequent rows it is the minimum of all `$run[$y][$x]` we have seen times height
 
-This is going back to my day job again - but I thought I would add a bit of background this time. I'm not a geneticist, but a web developer working at an institution that uses DNA sequencing to use genomics to investigate genetics. My first project was to develop/lead the developers for a genome browser. Often we had to display information about genomic features and whether or not they overlapped a particular region (to know whether to display them or not) or to bump them for display (to make sure features didn't merge/overlap)...
+```perl
+      my $max_w = 1e9;
+      foreach my $j ( $y .. $h ) {
+        last unless $runs[$j][$x]; ## We have a 1 in the rectangle quit
+        $max_w    = $runs[$j][$x]                  if $runs[$j][$x] < $max_w;
+        my $area  = $max_w * ( $j - $y + 1 );
+        $max_area = [ $area, $max_w, $j - $y + 1 ] if $area > $max_area->[0];
+      }
+```
 
-## Solution
+The variable `$max_area` contains three values `$max_area`, `$max_w`, `$max_h` - the latter two if you wish to draw the empty matrix.....
 
+Put it all together we have...
+```perl
+sub find_empty {
+  my @runs      = map { [ 1 - $_->[-1] ] } my @rows = @{$_[0]};
+  my ( $h, $w ) = ( @rows - 1, @{$rows[0]} - 1 );
 
-There are six arrangements more any two regions..... See picture below...
+  foreach my $i ( reverse 0 .. $w - 1 ) {
+    unshift @{$runs[$_]}, $rows[$_][$i] ? 0 : $runs[$_][0] + 1 foreach 0 .. $h;
+  }
 
+  my $max_area = [ 0, 0 , 0 ];
+  foreach my $x ( 0 .. $w ) {
+    foreach my $y ( 0 .. $h ) {
+      next unless $runs[$y][$x];
+      my $max_w = 1e9;
+      foreach my $j ( $y .. $h ) {
+        last unless $runs[$j][$x];
+        $max_w    = $runs[$j][$x]                  if $runs[$j][$x] < $max_w;
+        my $area  = $max_w * ( $j - $y + 1 );
+        $max_area = [ $area, $max_w, $j - $y + 1 ] if $area > $max_area->[0];
+      }
+    }
+  }
+
+  return $max_area;
+}
 ```
-         [============]
-[------]    [++++++]    [------]
-     [++++++++++++++++++++]
-     [++++++]      [++++++]
-```
 
-Over the 6 we have two of regions which are disjoint from the top region: Where `region_2_start > region_1_end` or `region_1_start > region_2_end`...
 
-Our loop finally becomes....
+# Task 2 - Minimum Platforms
+
+***You are given two arrays of arrival and departure times of trains at a railway station. Write a script to find out the minimum number of platforms needed so that no train needs to wait.***
+
+## Background
+
+As mentioned this is effectively my day job again. To display information about genomic features and whether or not they overlapped a particular region (to know whether to display them or not) or to bump them for display (to make sure features didn't merge/overlap) which is exactly this problem. This one is in someways easier as we have the trains already sorted into date order. If we didn't sorting them would make life a lot easier! - not so easy on a genome browser where there may be 10s of thousands of features in a region.
+
+This is actually more my brother's line of work - he works for what was BR computing - and one of his jobs is just this. For a while BR had 66 minutes in an hour to allow them to get trains in and out of one of the large busy stations.
+
+## Solution
+
+As we are assuming that starts are in time order we don't have to do a 2-sided test for overlaps.
+
+So foreach train we loop through all platforms - seeing if there is a platform with a slot in it (i.e. the last train has already left the platform). If there isn't we make a new platform and add the train to it, and repeat. All we do is store the last departure time for each platform, and because 24-hr date strings alphabetic/time order are the same - we only need to use the string comparison operator (in this case `gt`) to compare times.
+
+Here we use a little used concept in perl the label "`OUTER`" - this allows our inner loop to break out of it's own `foreach` loop and also jump to the next interation of it's parent loop.
 
 ```perl
-sub conflict_intervals {
-  my @in = @{ $_[0] };
-  my @conf;
-  while( my $int = pop @in ) {
-    foreach(@in) {
-      next unless $int->[1] < $_->[0] || $int->[0] < $_->[1];
-      unshift @conf, $int;
-      last;
+sub bump_platform {
+  my @arr = @{shift @_};
+  my @dep = @{shift @_};
+  my @plat = ();
+  OUTER: foreach my $st (@arr) {
+    foreach(0..$#plat) {
+      next unless $st gt $plat[$_];
+      $plat[$_] = shift @dep;
+      next OUTER;
     }
+    push @plat, shift @dep;
   }
-  return \@conf;
+  return scalar @plat;
 }
-```
 
-**Notes:**
- # Note we work from the end backwards - this is easier as we just pop the last interval off the loop each time and compare it with previous ones.
- # We then use unshift to add it to the start of what we return.
+```
 
-We can compact this slightly using the `&&` trick in the foor loop to do away with the `next`/`last` combination:
+**Notes:
 
+We can also keep information about which trains are on by re-writing what is stored in `@plat` rather than storing the last departure time - we can store the arrival/departure time of trains on each platform...
+ 
 ```perl
-sub conflict_intervals_compact {
-  my(@i,@o) = @{$_[0]};
-  while(my $r = pop @i) {
-    ($r->[1]<$_->[0] || $r->[0]<$_->[1]) && (unshift @o, $r) && last foreach @i;
+sub bump_platform_keep_trains {
+  my @arr = @{shift @_};
+  my @dep = @{shift @_};
+  my $t = 0;
+  my @plat; # = ( [ [shift @{$arr}, shift @{$dep}, my $t=1] ] );
+  OUTER: foreach my $st (@arr) {
+    foreach(@plat) {
+      next unless $st gt $_->[-1][1];
+      push @{$_}, [ $st, (shift @dep), ++$t ];
+      next OUTER;
+    }
+    push @plat, [ [ $st, (shift @dep), ++$t ] ];
   }
-  return \@o;
+  say '  ', join '  ', map { "Train $_->[2]: $_->[0]-$_->[1]" } @{$_} foreach @plat;
+  return scalar @plat;
 }
 ```
-- 
cgit 


From 400a63ba2c5d511d7870fc45978ea44a2623c740 Mon Sep 17 00:00:00 2001
From: James Smith <baggy@baggy.me.uk>
Date: Mon, 30 Aug 2021 09:56:24 +0100
Subject: Create blog.txt

---
 challenge-128/james-smith/blog.txt | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 challenge-128/james-smith/blog.txt

diff --git a/challenge-128/james-smith/blog.txt b/challenge-128/james-smith/blog.txt
new file mode 100644
index 0000000000..b84ed0fc12
--- /dev/null
+++ b/challenge-128/james-smith/blog.txt
@@ -0,0 +1 @@
+https://github.com/drbaggy/perlweeklychallenge-club/tree/master/challenge-128/james-smith
-- 
cgit