Perl solution for week 83/part 1.

author: Abigail <abigail@abigail.be> 2020-10-19 17:05:12 +0200
committer: Abigail <abigail@abigail.be> 2020-10-19 17:05:12 +0200
commit: 938de747ecef666e21bc4ec491d48a6cdf22d569 (patch)
tree: ee5c30285799e62f7b071d7506e2403f4bcc68ca
parent: c4f90a4fc0786f428e5aa1d6dab206cd5e9e01cc (diff)
download: perlweeklychallenge-club-938de747ecef666e21bc4ec491d48a6cdf22d569.tar.gz
perlweeklychallenge-club-938de747ecef666e21bc4ec491d48a6cdf22d569.tar.bz2
perlweeklychallenge-club-938de747ecef666e21bc4ec491d48a6cdf22d569.zip
13 files changed, 104 insertions, 0 deletions
diff --git a/challenge-083/abigail/input-1-1 b/challenge-083/abigail/input-1-1
new file mode 100644
index 0000000000..5529e0dd42
--- /dev/null
+++ b/challenge-083/abigail/input-1-1
@@ -0,0 +1 @@
+The Weekly Challenge
diff --git a/challenge-083/abigail/input-1-2 b/challenge-083/abigail/input-1-2
new file mode 100644
index 0000000000..c37cae9adc
--- /dev/null
+++ b/challenge-083/abigail/input-1-2
@@ -0,0 +1 @@
+The purpose of our lives is to be happy
diff --git a/challenge-083/abigail/input-1-3 b/challenge-083/abigail/input-1-3
new file mode 100644
index 0000000000..e03ee30b37
--- /dev/null
+++ b/challenge-083/abigail/input-1-3
@@ -0,0 +1 @@
+'s Ochtends lopen door Amsterdam-Zuid.
diff --git a/challenge-083/abigail/input-1-4 b/challenge-083/abigail/input-1-4
new file mode 100644
index 0000000000..8291d0473a
--- /dev/null
+++ b/challenge-083/abigail/input-1-4
@@ -0,0 +1 @@
+Markmið lífs okkar er að vera hamingjusöm
diff --git a/challenge-083/abigail/input-1-5 b/challenge-083/abigail/input-1-5
new file mode 100644
index 0000000000..134b092055
--- /dev/null
+++ b/challenge-083/abigail/input-1-5
@@ -0,0 +1 @@
+Fŏ͢o̐ᷜ bar b⃝a⃝z⃝
diff --git a/challenge-083/abigail/input-1-6 b/challenge-083/abigail/input-1-6
new file mode 100644
index 0000000000..43e997122f
--- /dev/null
+++ b/challenge-083/abigail/input-1-6
@@ -0,0 +1 @@
+Ο σκοπός της ζωής μας είναι να είμαστε ευτυχισμένοι
+\ No newline at end of file
diff --git a/challenge-083/abigail/output-1-1.exp b/challenge-083/abigail/output-1-1.exp
new file mode 100644
index 0000000000..23f20f1f2c
--- /dev/null
+++ b/challenge-083/abigail/output-1-1.exp
@@ -0,0 +1,2 @@
+# First example
+6
diff --git a/challenge-083/abigail/output-1-2.exp b/challenge-083/abigail/output-1-2.exp
new file mode 100644
index 0000000000..fd159104fd
--- /dev/null
+++ b/challenge-083/abigail/output-1-2.exp
@@ -0,0 +1,2 @@
+# Second example
+23
diff --git a/challenge-083/abigail/output-1-3.exp b/challenge-083/abigail/output-1-3.exp
new file mode 100644
index 0000000000..d31a46a8d4
--- /dev/null
+++ b/challenge-083/abigail/output-1-3.exp
@@ -0,0 +1,2 @@
+# Using ' and -, and trailing punctuation.
+18
diff --git a/challenge-083/abigail/output-1-4.exp b/challenge-083/abigail/output-1-4.exp
new file mode 100644
index 0000000000..902ff5684f
--- /dev/null
+++ b/challenge-083/abigail/output-1-4.exp
@@ -0,0 +1,2 @@
+# Icelandic, characters outside of ASCII
+17
diff --git a/challenge-083/abigail/output-1-5.exp b/challenge-083/abigail/output-1-5.exp
new file mode 100644
index 0000000000..b4478f3ca3
--- /dev/null
+++ b/challenge-083/abigail/output-1-5.exp
@@ -0,0 +1,2 @@
+# Using combining characters
+3
diff --git a/challenge-083/abigail/output-1-6.exp b/challenge-083/abigail/output-1-6.exp
new file mode 100644
index 0000000000..da3bab9392
--- /dev/null
+++ b/challenge-083/abigail/output-1-6.exp
@@ -0,0 +1,2 @@
+# Letters, but all outside ASCII and Latin-1
+30
diff --git a/challenge-083/abigail/perl/ch-1.pl b/challenge-083/abigail/perl/ch-1.pl
new file mode 100644
index 0000000000..3aae166d12
--- /dev/null
+++ b/challenge-083/abigail/perl/ch-1.pl
@@ -0,0 +1,86 @@
+#!/opt/perl/bin/perl
+
+use 5.032;
+
+use strict;
+use warnings;
+no  warnings 'syntax';
+
+use experimental 'signatures';
+use experimental 'lexical_subs';
+
+#
+# You are given a string $S with 3 or more words.
+#
+# Write a script to find the length of the string except the first
+# and last words ignoring whitespace.
+#
+
+
+#
+# It's not a given the input is in ASCII, so, we're assuming UTF8.
+# We need to tell Perl we're expecting input in UTF8.
+#
+binmode STDIN, ":encoding(UTF-8)" or die "binmode: $!";
+ 
+
+#
+# So, what is a word? \w+ sounds like a good idea, but that doesn't
+# capture words like "O'Reilly", hyphenated words,  or words consisting
+# of letter with combining characters. It also matches things like ___
+# or 123, which perhaps should not be considered words.
+#
+# A letter followed by zero or more combining combining characters is
+# matched by \X, but \X also matches non-word characters. So, for a
+# letter with combining characters, we can match it with:
+#
+#      (?:(?=\pL)\X*)
+#
+# Now, words can start or end with a ', or contain ' or - internally.
+# And while we will allow '- and -' internally, we don't allow double
+# '' or double --, nor any string of more than two of them. 
+#
+# We also require the sub strings consisting of letters (with their
+# combining characters) to be bounded by grapheme cluster boundary.
+#
+# This results in the following pattern for a word:
+#
+#
+
+my $word =
+     qr [(?(DEFINE)
+            (?<LETTERS>   \b{gcb} (?:(?=\pL)\X)+ \b{gcb})
+            (?<SEPARATOR> ['-] | '- | -')
+            (?<START>     '?)
+            (?<END>       '?)
+         )
+          (?&START)
+          (?&LETTERS) (?: (?&SEPARATOR) (?&LETTERS) ) *
+          (?&END)]x;
+
+
+# 
+# Now that we have a pattern for a word, we can remove the first
+# and last words. Removing the first match is easy, as Perl will, by
+# default, pick the left most possible match. 
+#
+# The last word is slightly more tricky. It's important to realize
+# than, by our definition, any word contains at least a letter, and
+# any letter is part of a word. So, if we match a word, followed by
+# a, possibly empty, string of non-letters, followed by the end of 
+# the string, we have the last word.
+#
+# After removing the first and last word, all we're left with is
+# removing whitespace, and getting the length of what is left over.
+#
+
+while (<>) {
+    chomp;
+    s/$word//;
+    s/$word(?=\P{l}*$)//;
+    s/\s+//g;
+    say length;
+}
+  
+
+__END__
author	Abigail <abigail@abigail.be>	2020-10-19 17:05:12 +0200
committer	Abigail <abigail@abigail.be>	2020-10-19 17:05:12 +0200
commit	938de747ecef666e21bc4ec491d48a6cdf22d569 (patch)
tree	ee5c30285799e62f7b071d7506e2403f4bcc68ca
parent	c4f90a4fc0786f428e5aa1d6dab206cd5e9e01cc (diff)
download	perlweeklychallenge-club-938de747ecef666e21bc4ec491d48a6cdf22d569.tar.gz perlweeklychallenge-club-938de747ecef666e21bc4ec491d48a6cdf22d569.tar.bz2 perlweeklychallenge-club-938de747ecef666e21bc4ec491d48a6cdf22d569.zip