aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJörg Sommrey <28217714+jo-37@users.noreply.github.com>2020-12-23 19:35:03 +0100
committerJörg Sommrey <28217714+jo-37@users.noreply.github.com>2020-12-23 19:35:03 +0100
commitd9647dbc59d9546451f8759e6c448c5011096cb0 (patch)
tree684448a2cf19d3cf25cea8e7d219818db9a5e1ec
parent8f20af5028b170fe9ce26433f61cfe1bf10bdd42 (diff)
parent51beb00928ae3a8cb0c1fe8660ce2b80e9467a1a (diff)
downloadperlweeklychallenge-club-d9647dbc59d9546451f8759e6c448c5011096cb0.tar.gz
perlweeklychallenge-club-d9647dbc59d9546451f8759e6c448c5011096cb0.tar.bz2
perlweeklychallenge-club-d9647dbc59d9546451f8759e6c448c5011096cb0.zip
Solutions to challenge 092
-rwxr-xr-xchallenge-092/jo-37/perl/ch-1.pl79
-rwxr-xr-xchallenge-092/jo-37/perl/ch-2.pl73
2 files changed, 152 insertions, 0 deletions
diff --git a/challenge-092/jo-37/perl/ch-1.pl b/challenge-092/jo-37/perl/ch-1.pl
new file mode 100755
index 0000000000..594a55d31b
--- /dev/null
+++ b/challenge-092/jo-37/perl/ch-1.pl
@@ -0,0 +1,79 @@
+#!/usr/bin/perl
+
+use v5.16;
+use utf8;
+use charnames qw(:full :short);
+use Unicode::Normalize;
+use Digest;
+use Test2::V0;
+
+#use constant DIGEST => 'SHA-384';
+use constant DIGEST => 'SHA-1';
+
+# The task appears to be very easy, especially because the specification
+# comes along with a detailed description of an optimal implementation.
+# This invites to go the extra mile.
+#
+# - A unique "isomorphic canonicalization" is created. By taking a
+# message digest of this canonicalization, isomorphic strings can be
+# identified by identical digests.
+#
+# - Extended grapheme clusters (see
+# https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
+# are used to identify single "characters". By normalization (using
+# canonical decomposition) different representations of the same
+# symbol are transformed into identical grapheme clusters. See the
+# last test case for an example.
+sub uni_iso_digest {
+ # Normalize input (Normalization Form D - canonical decomposition)
+ local $_ = NFD shift;
+
+ my $digest = Digest->new(DIGEST);
+ my $n = 0;
+ my %chars;
+ my @canon;
+
+ # Assign an ascending number to every new character appearing in the
+ # string and append the current number's bits to the message.
+ # Use \X to grab for a grapheme cluster.
+ while (s/(\X)//) {
+ $chars{$1} = $n++ unless exists $chars{$1};
+ $digest->add_bits(pack('L', $chars{$1}), 32);
+ push @canon, $chars{$1};
+ }
+
+ # Return the canonicalization and the message digest in list
+ # context. Returns solely the digest in scalar context.
+ (\@canon, $digest->hexdigest);
+}
+
+is uni_iso_digest('abc'), uni_iso_digest('xyz'), 'Example 1';
+is uni_iso_digest('abb'), uni_iso_digest('xyy'), 'Example 2';
+isnt uni_iso_digest('sum'), uni_iso_digest('add'), 'Example 3';
+
+is uni_iso_digest('αββγ'), uni_iso_digest(9001), 'multi-byte vs. numeric';
+
+# Nine code points form four separate characters with two
+# different characters having two differing representations.
+my $str =
+ "\N{LATIN CAPITAL LETTER A WITH DIAERESIS}" .
+
+ "\N{LATIN CAPITAL LETTER A}" .
+ "\N{COMBINING DIAERESIS}" .
+
+ "\N{LATIN CAPITAL LETTER A}" .
+ "\N{COMBINING DIAERESIS}" .
+ "\N{COMBINING LONG STROKE OVERLAY}" .
+
+ "\N{LATIN CAPITAL LETTER A}" .
+ "\N{COMBINING LONG STROKE OVERLAY}" .
+ "\N{COMBINING DIAERESIS}";
+
+is uni_iso_digest($str), uni_iso_digest('xxyy'),
+ 'Take normalized grapheme clusters as the individual characters';
+
+# Retrieve the canonicalization only:
+my ($canon) = uni_iso_digest($str);
+say "canonical: (@$canon)";
+
+done_testing;
diff --git a/challenge-092/jo-37/perl/ch-2.pl b/challenge-092/jo-37/perl/ch-2.pl
new file mode 100755
index 0000000000..b695b153bf
--- /dev/null
+++ b/challenge-092/jo-37/perl/ch-2.pl
@@ -0,0 +1,73 @@
+#!/usr/bin/perl
+
+use v5.16;
+use Test2::V0;
+use experimental 'postderef';
+use List::MoreUtils 'minmax';
+
+# Intervals are represented here by refs to two-element arrays
+# containing the lower and upper border. At least one border (the same
+# side for all) has to be part of the interval.
+
+# Single insertion step: return the interval(s) to be passed to the
+# result and the new insertion (if any) when comparing the insertion
+# with a single part of the given interval list.
+sub pass_merge {
+ my ($base, $insert) = @_;
+
+ # No insertion: just pass the base.
+ return [$base] unless $insert;
+
+ # Base below insertion: pass the base and keep the insertion.
+ return ([$base], $insert) if $base->[1] < $insert->[0];
+
+ # Insertion below base: pass insertion and base, clear insertion.
+ return [$insert, $base] if $insert->[1] < $base->[0];
+
+ # Overlapping intervals: pass nothing and use merged intervals as
+ # new insertion.
+ ([], [minmax $base->@*, $insert->@*]);
+}
+
+# Insert the new interval (1st arg) into the list of given intervals by
+# sliding the insertion over the list of intervals and performing the
+# operations as provided by pass_merge.
+sub insert {
+ my $insert = shift;
+
+ ((map {
+ (my $pass, $insert) = pass_merge $_, $insert;
+ $pass->@*;
+ } @_),
+ # Append the insertion if it still exists.
+ ($insert) x !!$insert);
+}
+
+is [insert [2, 6], [1, 4], [8, 10]],
+ [[1, 6], [8, 10]], 'Example 1';
+
+is [insert [5, 8], [1, 2], [3, 7], [8, 10]],
+ [[1, 2], [3, 10]], 'Example 2';
+
+is [insert [10, 11], [1, 5], [7, 9]],
+ [[1, 5], [7, 9], [10, 11]], 'Example 3';
+
+is [insert [1, 2], [3, 4], [5, 6], [7, 8]],
+ [[1, 2], [3, 4], [5, 6], [7, 8]], 'Prepend new interval';
+
+is [insert [9, 10], [3, 4], [5, 6], [7, 8]],
+ [[3, 4], [5, 6], [7, 8], [9, 10]], 'Append new interval';
+
+is [insert [1, 8], [2, 3], [4, 5], [6, 7]],
+ [[1, 8]], 'Eat up everything';
+
+is [insert [3, 5], [1, 2], [4, 6], [7, 8], [9, 10]],
+ [[1, 2], [3, 6], [7, 8], [9, 10]], 'Left merge';
+
+is [insert [4, 6], [1, 2], [3, 5], [7, 8], [9, 10]],
+ [[1, 2], [3, 6], [7, 8], [9, 10]], 'Right merge';
+
+is [insert [5, 8], [1, 2], [4, 6], [7, 9], [10, 11], [12, 13]],
+ [[1, 2], [4, 9], [10, 11], [12, 13]], 'Join intervals';
+
+done_testing;