Solutions to challenge 092

author: Jörg Sommrey <28217714+jo-37@users.noreply.github.com> 2020-12-23 19:35:03 +0100
committer: Jörg Sommrey <28217714+jo-37@users.noreply.github.com> 2020-12-23 19:35:03 +0100
commit: d9647dbc59d9546451f8759e6c448c5011096cb0 (patch)
tree: 684448a2cf19d3cf25cea8e7d219818db9a5e1ec
parent: 8f20af5028b170fe9ce26433f61cfe1bf10bdd42 (diff)
parent: 51beb00928ae3a8cb0c1fe8660ce2b80e9467a1a (diff)
download: perlweeklychallenge-club-d9647dbc59d9546451f8759e6c448c5011096cb0.tar.gz
perlweeklychallenge-club-d9647dbc59d9546451f8759e6c448c5011096cb0.tar.bz2
perlweeklychallenge-club-d9647dbc59d9546451f8759e6c448c5011096cb0.zip
2 files changed, 152 insertions, 0 deletions
diff --git a/challenge-092/jo-37/perl/ch-1.pl b/challenge-092/jo-37/perl/ch-1.pl
new file mode 100755
index 0000000000..594a55d31b
--- /dev/null
+++ b/challenge-092/jo-37/perl/ch-1.pl
@@ -0,0 +1,79 @@
+#!/usr/bin/perl
+
+use v5.16;
+use utf8;
+use charnames qw(:full :short);
+use Unicode::Normalize;
+use Digest;
+use Test2::V0;
+
+#use constant DIGEST => 'SHA-384';
+use constant DIGEST => 'SHA-1';
+
+# The task appears to be very easy, especially because the specification
+# comes along with a detailed description of an optimal implementation.
+# This invites to go the extra mile.
+# 
+# - A unique "isomorphic canonicalization" is created.  By taking a
+#   message digest of this canonicalization, isomorphic strings can be
+#   identified by identical digests.
+#
+# - Extended grapheme clusters (see
+#   https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
+#   are used to identify single "characters".  By normalization (using
+#   canonical decomposition) different representations of the same
+#   symbol are transformed into identical grapheme clusters.  See the
+#   last test case for an example.
+sub uni_iso_digest {
+	# Normalize input (Normalization Form D - canonical decomposition)
+	local $_ = NFD shift;
+
+	my $digest = Digest->new(DIGEST);
+	my $n = 0;
+	my %chars;
+	my @canon;
+
+	# Assign an ascending number to every new character appearing in the
+	# string and append the current number's bits to the message.
+	# Use \X to grab for a grapheme cluster.
+	while (s/(\X)//) {
+		$chars{$1} = $n++ unless exists $chars{$1};
+		$digest->add_bits(pack('L', $chars{$1}), 32);
+		push @canon, $chars{$1};
+	}
+
+	# Return the canonicalization and the message digest in list
+	# context.  Returns solely the digest in scalar context.
+	(\@canon, $digest->hexdigest);
+}
+
+is uni_iso_digest('abc'), uni_iso_digest('xyz'), 'Example 1';
+is uni_iso_digest('abb'), uni_iso_digest('xyy'), 'Example 2';
+isnt uni_iso_digest('sum'), uni_iso_digest('add'), 'Example 3';
+
+is uni_iso_digest('αββγ'), uni_iso_digest(9001), 'multi-byte vs. numeric';
+
+# Nine code points form four separate characters with two
+# different characters having two differing representations.
+my $str = 
+	"\N{LATIN CAPITAL LETTER A WITH DIAERESIS}" .
+
+	"\N{LATIN CAPITAL LETTER A}" .
+	"\N{COMBINING DIAERESIS}" . 
+
+	"\N{LATIN CAPITAL LETTER A}" .
+	"\N{COMBINING DIAERESIS}" . 
+	"\N{COMBINING LONG STROKE OVERLAY}" .
+
+	"\N{LATIN CAPITAL LETTER A}" .
+	"\N{COMBINING LONG STROKE OVERLAY}" .
+	"\N{COMBINING DIAERESIS}";
+
+is uni_iso_digest($str), uni_iso_digest('xxyy'),
+	'Take normalized grapheme clusters as the individual characters';
+
+# Retrieve the canonicalization only:
+my ($canon) = uni_iso_digest($str);
+say "canonical: (@$canon)";
+
+done_testing;
diff --git a/challenge-092/jo-37/perl/ch-2.pl b/challenge-092/jo-37/perl/ch-2.pl
new file mode 100755
index 0000000000..b695b153bf
--- /dev/null
+++ b/challenge-092/jo-37/perl/ch-2.pl
@@ -0,0 +1,73 @@
+#!/usr/bin/perl
+
+use v5.16;
+use Test2::V0;
+use experimental 'postderef';
+use List::MoreUtils 'minmax';
+
+# Intervals are represented here by refs to two-element arrays
+# containing the lower and upper border.  At least one border (the same
+# side for all) has to be part of the interval.
+
+# Single insertion step: return the interval(s) to be passed to the
+# result and the new insertion (if any) when comparing the insertion
+# with a single part of the given interval list.
+sub pass_merge {
+	my ($base, $insert) = @_;
+
+	# No insertion: just pass the base.
+	return [$base] unless $insert;
+
+	# Base below insertion: pass the base and keep the insertion.
+	return ([$base], $insert) if $base->[1] < $insert->[0];
+
+	# Insertion below base: pass insertion and base, clear insertion.
+	return [$insert, $base] if $insert->[1] < $base->[0];
+
+	# Overlapping intervals: pass nothing and use merged intervals as
+	# new insertion.
+	([], [minmax $base->@*, $insert->@*]);
+}
+
+# Insert the new interval (1st arg) into the list of given intervals by
+# sliding the insertion over the list of intervals and performing the
+# operations as provided by pass_merge.
+sub insert {
+	my $insert = shift;
+
+	((map {
+			(my $pass, $insert) = pass_merge $_, $insert;
+			$pass->@*;
+			} @_),
+		# Append the insertion if it still exists.
+		($insert) x !!$insert);
+}
+
+is [insert [2, 6], [1, 4], [8, 10]],
+	[[1, 6], [8, 10]], 'Example 1';
+
+is [insert [5, 8], [1, 2], [3, 7], [8, 10]],
+	[[1, 2], [3, 10]], 'Example 2';
+
+is [insert [10, 11], [1, 5], [7, 9]],
+	[[1, 5], [7, 9], [10, 11]], 'Example 3';
+
+is [insert [1, 2], [3, 4], [5, 6], [7, 8]],
+	[[1, 2], [3, 4], [5, 6], [7, 8]], 'Prepend new interval';
+
+is [insert [9, 10], [3, 4], [5, 6], [7, 8]],
+	[[3, 4], [5, 6], [7, 8], [9, 10]], 'Append new interval';
+
+is [insert [1, 8], [2, 3], [4, 5], [6, 7]],
+	[[1, 8]], 'Eat up everything';
+
+is [insert [3, 5], [1, 2], [4, 6], [7, 8], [9, 10]],
+	[[1, 2], [3, 6], [7, 8], [9, 10]], 'Left merge';
+
+is [insert [4, 6], [1, 2], [3, 5], [7, 8], [9, 10]],
+	[[1, 2], [3, 6], [7, 8], [9, 10]], 'Right merge';
+
+is [insert [5, 8], [1, 2], [4, 6], [7, 9], [10, 11], [12, 13]],
+	[[1, 2], [4, 9], [10, 11], [12, 13]], 'Join intervals'; 
+
+done_testing;
author	Jörg Sommrey <28217714+jo-37@users.noreply.github.com>	2020-12-23 19:35:03 +0100
committer	Jörg Sommrey <28217714+jo-37@users.noreply.github.com>	2020-12-23 19:35:03 +0100
commit	d9647dbc59d9546451f8759e6c448c5011096cb0 (patch)
tree	684448a2cf19d3cf25cea8e7d219818db9a5e1ec
parent	8f20af5028b170fe9ce26433f61cfe1bf10bdd42 (diff)
parent	51beb00928ae3a8cb0c1fe8660ce2b80e9467a1a (diff)
download	perlweeklychallenge-club-d9647dbc59d9546451f8759e6c448c5011096cb0.tar.gz perlweeklychallenge-club-d9647dbc59d9546451f8759e6c448c5011096cb0.tar.bz2 perlweeklychallenge-club-d9647dbc59d9546451f8759e6c448c5011096cb0.zip