Perl & Raku solutions to Tasks 1 & 2 for Week 255

author: PerlMonk-Athanasius <PerlMonk.Athanasius@gmail.com> 2024-02-11 21:50:41 +1000
committer: PerlMonk-Athanasius <PerlMonk.Athanasius@gmail.com> 2024-02-11 21:50:41 +1000
commit: 2c9ae4500938f7a075becd7cf38cb8703d473667 (patch)
tree: fc9c0546813fa55e1ecc1f02dc5ed5862f671d89
parent: 34fa48cf4b02c81fd6ed544498bde068aca981a2 (diff)
download: perlweeklychallenge-club-2c9ae4500938f7a075becd7cf38cb8703d473667.tar.gz
perlweeklychallenge-club-2c9ae4500938f7a075becd7cf38cb8703d473667.tar.bz2
perlweeklychallenge-club-2c9ae4500938f7a075becd7cf38cb8703d473667.zip
4 files changed, 1062 insertions, 0 deletions
diff --git a/challenge-255/athanasius/perl/ch-1.pl b/challenge-255/athanasius/perl/ch-1.pl
new file mode 100644
index 0000000000..3f0032db87
--- /dev/null
+++ b/challenge-255/athanasius/perl/ch-1.pl
@@ -0,0 +1,206 @@
+#!perl
+
+################################################################################
+=comment
+
+Perl Weekly Challenge 255
+=========================
+
+TASK #1
+-------
+*Odd Character*
+
+Submitted by: Mohammad Sajid Anwar
+
+You are given two strings, $s and $t. The string $t is generated using the
+shuffled characters of the string $s with an additional character.
+
+Write a script to find the additional character in the string $t.
+
+Example 1
+
+  Input: $s = "Perl" $t = "Preel"
+  Output: "e"
+
+Example 2
+
+  Input: $s = "Weekly" $t = "Weeakly"
+  Output: "a"
+
+Example 3
+
+  Input: $s = "Box" $t = "Boxy"
+  Output: "y"
+
+=cut
+################################################################################
+
+#--------------------------------------#
+# Copyright © 2024 PerlMonk Athanasius #
+#--------------------------------------#
+
+#===============================================================================
+=comment
+
+Interface
+---------
+1. If no command-line arguments are given, the test suite is run. Otherwise:
+2. If $CASE_SENSITIVE is set to a true value, "P" and "p" (for example) are
+   treated as different characters; otherwise, they are treated as the same
+   character.
+
+=cut
+#===============================================================================
+
+use v5.32.1;             # Enables strictures
+use warnings;
+use Const::Fast;
+use Set::Bag;
+use Test::More;
+
+const my $ASSERT         => 1;                  # Perform optional sanity checks
+const my $CASE_SENSITIVE => 1;
+const my $USAGE          => <<END;
+Usage:
+  perl $0 <s> <t>
+  perl $0
+
+    <s>    A string
+    <t>    A string containing the chars of s with 1 additional char
+END
+
+#-------------------------------------------------------------------------------
+BEGIN
+#-------------------------------------------------------------------------------
+{
+    $| = 1;
+    print "\nChallenge 255, Task #1: Odd Character (Perl)\n\n";
+}
+
+#===============================================================================
+MAIN:
+#===============================================================================
+{
+    my $argc = scalar @ARGV;
+
+    if    ($argc == 0)
+    {
+        run_tests();
+    }
+    elsif ($argc == 2)
+    {
+        my ($s, $t) = @ARGV;
+
+        print qq[Input:  \$s = "$s" \$t = "$t"\n];
+
+        my $odd_char = find_odd_character( $s, $t );
+
+        print qq[Output: "$odd_char"\n];
+    }
+    else
+    {
+        error( "Expected 0 or 2 arguments, found $argc" );
+    }
+}
+
+#-------------------------------------------------------------------------------
+sub find_odd_character
+#-------------------------------------------------------------------------------
+{
+    my ($s, $t) = @_;
+
+    length $t == 1 + length $s
+        or error( '$t must be exactly 1 character longer than $s' );
+
+    my %s_chars;
+     ++$s_chars{ $_ } for map { $CASE_SENSITIVE ? $_ : lc } split //, $s;
+
+    my %t_chars;
+     ++$t_chars{ $_ } for map { $CASE_SENSITIVE ? $_ : lc } split //, $t;
+
+    is_subset( \%s_chars, \%t_chars )
+        or error( 'The chars in $s are not a subset of the chars in $t' );
+
+    my $s_bag = Set::Bag->new( %s_chars );
+    my $t_bag = Set::Bag->new( %t_chars );
+    my $diff  = $t_bag - $s_bag;                                # Set difference
+
+    if ($ASSERT)                                                # Sanity checks
+    {
+        my @odd_chars = $diff->elements;
+
+        scalar @odd_chars >  0 or error( 'No odd character found' );
+        scalar @odd_chars == 1 or error( 'More than 1 odd character found' );
+
+        my $odd_char = $odd_chars[ 0 ];
+
+        $diff->grab( $odd_char ) == 1
+            or error( qq[More than 1 odd character "$odd_char" found] );
+    }
+
+    return ($diff->elements)[ 0 ];
+}
+
+#-------------------------------------------------------------------------------
+sub is_subset                                  # Not necessarily a proper subset
+#-------------------------------------------------------------------------------
+{
+    my ($x, $y) = @_;                                            # Hash pointers
+
+    for my $char (keys %$x)
+    {
+        if (exists $y->{ $char })
+        {
+            return 0 if $x->{ $char } > $y->{ $char };
+        }
+        else
+        {
+            return 0;
+        }
+    }
+
+    return 1;
+}
+
+#-------------------------------------------------------------------------------
+sub run_tests
+#-------------------------------------------------------------------------------
+{
+    print "Running the test suite\n";
+
+    while (my $line = <DATA>)
+    {
+        chomp $line;
+
+        my  ($test_name, $s, $t, $expected) = split / \| /x, $line;
+
+        for ($test_name, $s, $t, $expected)
+        {
+            s/ ^ \s+   //x;
+            s/   \s+ $ //x;
+        }
+
+        my $odd_char = find_odd_character( $s, $t );
+
+        is $odd_char, $expected, $test_name;
+    }
+
+    done_testing;
+}
+
+#-------------------------------------------------------------------------------
+sub error
+#-------------------------------------------------------------------------------
+{
+    my ($message) = @_;
+
+    die "ERROR: $message\n$USAGE";
+}
+
+################################################################################
+
+__DATA__
+Example 1|Perl  |Preel  |e
+Example 2|Weekly|Weeakly|a
+Example 3|Box   |Boxy   |y
+Empty    |      |q      |q
diff --git a/challenge-255/athanasius/perl/ch-2.pl b/challenge-255/athanasius/perl/ch-2.pl
new file mode 100644
index 0000000000..e521ef407b
--- /dev/null
+++ b/challenge-255/athanasius/perl/ch-2.pl
@@ -0,0 +1,356 @@
+#!perl
+
+################################################################################
+=comment
+
+Perl Weekly Challenge 255
+=========================
+
+TASK #2
+-------
+*Most Frequent Word*
+
+Submitted by: Mohammad Sajid Anwar
+
+You are given a paragraph $p and a banned word $w.
+
+Write a script to return the most frequent word that is not banned.
+
+Example 1
+
+  Input: $p = "Joe hit a ball, the hit ball flew far after it was hit."
+         $w = "hit"
+  Output: "ball"
+
+  The banned word "hit" occurs 3 times.
+  The other word "ball" occurs 2 times.
+
+Example 2
+
+  Input: $p = "Perl and Raku belong to the same family. Perl is the most popular
+               language in the weekly challenge."
+         $w = "the"
+  Output: "Perl"
+
+  The banned word "the" occurs 3 times.
+  The other word "Perl" occurs 2 times.
+
+=cut
+################################################################################
+
+#--------------------------------------#
+# Copyright © 2024 PerlMonk Athanasius #
+#--------------------------------------#
+
+#===============================================================================
+=comment
+
+Interface
+---------
+1. If no command-line arguments are given, the test suite is run. Otherwise:
+2. If $VERBOSE is set to a true value, the output is followed by a short explan-
+   ation like those in the Examples.
+
+Assumptions
+-----------
+1. "Words" are composed of the letters A-Z, a-z, and ' (the apostrophe) ONLY.
+2. Different forms of the same root word are treated as different words:
+   "belong", "belongs", and "belonged" are 3 distinct words; "Perl" and "Perl's"
+   are 2 distinct words.
+3. Hyphenated words are not recognised as such: e.g., "cul-de-sac" is treated
+   as 3 separate words.
+4. Capitalisation.
+   If a word appears with its initial letter both capitalised and uncapitalised,
+   AND each occurrence of the capitalised version appears at the beginning of a
+   sentence, then (and only then) the different forms are considered to be a
+   single word, and that word's uncapitalised form is treated as canonical.
+5. If two or more words share the same maximum frequency of occurrence, the
+   alphabetically-first word is given as the output.
+
+=cut
+#===============================================================================
+
+use v5.32.1;     # Enables strictures
+use warnings;
+use Const::Fast;
+use Test::More;
+
+const my $SCREEN_WIDTH => 80;
+const my $TAB          => 14;
+const my $VERBOSE      => 1;
+const my $USAGE        => <<END;
+Usage:
+  perl $0 <p> <w>
+  perl $0
+
+    <p>    A paragraph of text
+    <w>    A banned word
+END
+
+#-------------------------------------------------------------------------------
+BEGIN
+#-------------------------------------------------------------------------------
+{
+    $| = 1;
+    print "\nChallenge 255, Task #2: Most Frequent Word (Perl)\n\n";
+}
+
+#===============================================================================
+MAIN:
+#===============================================================================
+{
+    my $argc = scalar @ARGV;
+
+    if    ($argc == 0)
+    {
+        run_tests();
+    }
+    elsif ($argc == 2)
+    {
+        my ($p, $w) = @ARGV;
+
+        $p =~ /   [a-z]    /ix or error( 'The paragraph contains no words' );
+        $w =~ / ^ [a-z]+ $ /ix or error( 'The banned word is not valid' );
+
+        printf qq[Input:  \$p = "%s"\n], split_paragraph( $p );
+        print  qq[        \$w = "$w"\n];
+
+        my $result = find_most_freq_word( $p, $w );
+
+        if (scalar @{ $result->{words} } == 0)
+        {
+            print "Output: <none>\n";
+        }
+        else
+        {
+            printf qq[Output: "%s"\n], $result->{words}[ 0 ];
+        }
+
+        explain_output( $w, $result ) if $VERBOSE;
+    }
+    else
+    {
+        error( "Expected 0 or 2 arguments, found $argc" );
+    }
+}
+
+#-------------------------------------------------------------------------------
+sub find_most_freq_word
+#-------------------------------------------------------------------------------
+{
+    my ($p, $w) = @_;
+
+    # Remove (by replacing with spaces) all non-word characters, but leave 
+    # apostrophes and sentence-ending punctuation characters in place
+
+    $p =~ s/ [^A-Za-z'.?!\s] / /gx;
+
+    my $first_words = find_first_words( $p );      # Words beginning sentences
+
+    $p =~ s/ [.?!] / /gx;                          # Remove sentence terminators
+
+    # Record all words, together with their frequencies of occurrence
+
+    my %dict;
+     ++$dict{ $_ } for split / \s+ /x, $p;
+
+    merge_words( $first_words, \%dict ); # Merge capitalised/uncapitalised pairs
+
+    my %result;
+
+    remove_banned_word( $w, \%dict, \%result );
+
+    my $max_freq = 0;
+
+    $_ > $max_freq and $max_freq = $_ for values %dict;
+
+    $result{ 'max-freq' } = $max_freq;
+
+    my @words        = grep { $dict{ $_ } == $max_freq } keys %dict;
+    $result{ words } = [ sort @words ];
+
+    return \%result;
+}
+
+#-------------------------------------------------------------------------------
+sub find_first_words
+#-------------------------------------------------------------------------------
+{
+    my ($p) = @_;
+
+    my @matches = $p =~ / (?= ^ \s* || [.?!] \s+) ([A-Z] \S*) /gx;
+
+    my %first_words;
+     ++$first_words{ $_ } for @matches;
+
+    return \%first_words;
+}
+
+#-------------------------------------------------------------------------------
+sub merge_words
+#-------------------------------------------------------------------------------
+{
+    my ($first_words, $dict) = @_;
+
+    for my $word (keys %$first_words)
+    {
+        my $lc_word = $word =~ s/ ^ (.) / lc( $1 ) /erx;
+
+        if (exists $dict->{ $lc_word } &&
+            $first_words->{ $word } == $dict->{ $word })
+        {
+            $dict->{ $lc_word } += $dict->{ $word };
+
+            delete $dict->{ $word };
+        }
+    }
+}
+
+#-------------------------------------------------------------------------------
+sub remove_banned_word
+#-------------------------------------------------------------------------------
+{
+    my ($w, $dict, $result) = @_;
+
+    $result->{ 'banned-word' } = 0;
+
+    if (exists $dict->{ $w })                           # Remove the banned word
+    {
+        $result->{ 'banned-word' } = $dict->{ $w };
+
+        delete $dict->{ $w };
+    }
+    else
+    {
+        my $lc_bw = $w =~ s/ ^ (.) / lc( $1 ) /erx;
+
+        if (exists $dict->{ $lc_bw })      # Remove the banned word's lc version
+        {
+            $result->{ 'banned-word' } = $dict->{ $lc_bw };
+
+            delete $dict->{ $lc_bw };
+        }
+    }
+}
+
+#-------------------------------------------------------------------------------
+sub explain_output
+#-------------------------------------------------------------------------------
+{
+    my ($w, $result) = @_;
+    my  $w_freq      = $result->{ 'banned-word' };
+
+    printf qq[\nThe banned word "%s" occurs %d time%s\n],
+        $w, $w_freq, $w_freq == 1 ? '' : 's';
+
+    my $freq  = $result->{ 'max-freq' };
+    my $words = $result->{  words  };
+    my $count = scalar @$words;
+
+    if ($count == 0)
+    {
+        print "There are no other words\n";
+    }
+    else
+    {
+        printf qq[The other  word%s %s occur%s %d time%s%s\n],
+            ($count == 1 ? ''  : 's'),
+             join( ', ', map { qq["$_"] } @$words ),
+            ($count == 1 ? 's' : ''),
+             $freq,
+            ($freq  == 1 ? ''  : 's'),
+            ($count == 1 ? ''  : ' each');
+    }
+}
+
+#-------------------------------------------------------------------------------
+sub split_paragraph
+#-------------------------------------------------------------------------------
+{
+    my ($p) = @_;
+
+    # Insert "\n<tab>" at every $SCREEN_WIDTH position in $p
+
+    my $insert = "\n" . ' ' x $TAB;
+
+    for (my $i = $SCREEN_WIDTH - $TAB; $i < length $p; $i += $SCREEN_WIDTH)
+    {
+        --$i while substr( $p, $i, 1 ) =~ / \S /x;
+        
+        substr $p, $i, 1, $insert;
+    }
+
+    return $p;
+}
+
+#-------------------------------------------------------------------------------
+sub run_tests
+#-------------------------------------------------------------------------------
+{
+    print "Running the test suite\n";
+
+    while (my $line = <DATA>)
+    {
+        while ($line =~ / \+ $ /x)
+        {
+            $line .= <DATA>;
+            $line =~ s/ \+ \n \s* //x;
+        }
+
+        chomp $line;
+
+        my  ($test_name, $p, $w, $expected) = split / \| /x, $line;
+
+        for ($test_name, $p, $w, $expected)
+        {
+            s/ ^ \s+   //x;
+            s/   \s+ $ //x;
+        }
+
+        my $result = find_most_freq_word( $p, $w );
+
+        is $result->{words}[ 0 ], $expected, $test_name;
+    }
+
+    done_testing;
+}
+
+#-------------------------------------------------------------------------------
+sub error
+#-------------------------------------------------------------------------------
+{
+    my ($message) = @_;
+
+    die "ERROR: $message\n$USAGE";
+}
+
+################################################################################
+
+# For ease of copy/paste onto the command line:
+=text
+But, in a larger sense, we can not dedicate-we can not consecrate-we can not hallow-this ground. The brave men, living and dead, who struggled here, have consecrated it, far above our poor power to add or detract. The world will little note, nor long remember what we say here, but it can never forget what they did here. It is for us the living, rather, to be dedicated here to the unfinished work which they who fought here have thus far so nobly advanced. It is rather for us to be here dedicated to the great task remaining before us-that from these honored dead we take increased devotion to that cause for which they gave the last full measure of devotion-that we here highly resolve that these dead shall not have died in vain-that this nation, under God, shall have a new birth of freedom-and that government of the people, by the people, for the people, shall not perish from the earth.
+=cut
+
+################################################################################
+
+__DATA__
+Example 1 |Joe hit a ball, the hit ball flew far after it was hit.|hit|ball
+Example 2 |Perl and Raku belong to the same family. Perl is the most popular   +
+           language in the weekly challenge.|the|Perl
+Gettysburg|But, in a larger sense, we can not dedicate-we can not consecrate-  +
+           we can not hallow-this ground. The brave men, living and dead, who  +
+           struggled here, have consecrated it, far above our poor power to    +
+           add or detract. The world will little note, nor long remember what  +
+           we say here, but it can never forget what they did here. It is for  +
+           us the living, rather, to be dedicated here to the unfinished work  +
+           which they who fought here have thus far so nobly advanced. It is   +
+           rather for us to be here dedicated to the great task remaining      +
+           before us-that from these honored dead we take increased devotion   +
+           to that cause for which they gave the last full measure of devotion-+
+           that we here highly resolve that these dead shall not have died in  +
+           vain-that this nation, under God, shall have a new birth of freedom-+
+           and that government of the people, by the people, for the people,   +
+           shall not perish from the earth.|the|here
+Case 1    |Perl and Raku belong to the same family. Perl is the most popular   +
+           language in the weekly challenge.|The|Perl
+Case 2    |x y a? X w a u! X ab a c and x|unknown|x
diff --git a/challenge-255/athanasius/raku/ch-1.raku b/challenge-255/athanasius/raku/ch-1.raku
new file mode 100644
index 0000000000..d869c3efa2
--- /dev/null
+++ b/challenge-255/athanasius/raku/ch-1.raku
@@ -0,0 +1,177 @@
+use v6d;
+
+################################################################################
+=begin comment
+
+Perl Weekly Challenge 255
+=========================
+
+TASK #1
+-------
+*Odd Character*
+
+Submitted by: Mohammad Sajid Anwar
+
+You are given two strings, $s and $t. The string $t is generated using the
+shuffled characters of the string $s with an additional character.
+
+Write a script to find the additional character in the string $t.
+
+Example 1
+
+  Input: $s = "Perl" $t = "Preel"
+  Output: "e"
+
+Example 2
+
+  Input: $s = "Weekly" $t = "Weeakly"
+  Output: "a"
+
+Example 3
+
+  Input: $s = "Box" $t = "Boxy"
+  Output: "y"
+
+=end comment
+################################################################################
+
+#--------------------------------------#
+# Copyright © 2024 PerlMonk Athanasius #
+#--------------------------------------#
+
+#===============================================================================
+=begin comment
+
+Interface
+---------
+1. If no command-line arguments are given, the test suite is run. Otherwise:
+2. If CASE-SENSITIVE is set to True, "P" and "p" (for example) are treated as
+   different characters; otherwise, they are treated as the same character.
+
+=end comment
+#===============================================================================
+
+use Test;
+
+subset Char of Str where { .chars == 1 };
+
+my Bool constant ASSERT         = True;         # Perform optional sanity checks
+my Bool constant CASE-SENSITIVE = True;
+
+#-------------------------------------------------------------------------------
+BEGIN
+#-------------------------------------------------------------------------------
+{
+    "\nChallenge 255, Task #1: Odd Character (Raku)\n".put;
+}
+
+#===============================================================================
+multi sub MAIN
+(
+    Str:D $s,       #= A string
+    Str:D $t        #= A string containing the chars of s with 1 additional char
+)
+#===============================================================================
+{
+    qq[Input:  \$s = "$s" \$t = "$t"].put;
+
+    my Char $odd-char = find-odd-character( $s, $t );
+
+    qq[Output: "$odd-char"].put;
+}
+
+#===============================================================================
+multi sub MAIN()                                  # No input: run the test suite
+#===============================================================================
+{
+    run-tests();
+}
+
+#-------------------------------------------------------------------------------
+sub find-odd-character( Str:D $s, Str:D $t --> Char:D )
+#-------------------------------------------------------------------------------
+{
+    $t.chars == $s.chars + 1
+        or error( '$t must be exactly 1 character longer than $s' );
+
+    my BagHash[Char] $s-chars = BagHash[Char].new:
+        $s.split( '', :skip-empty ).map: { CASE-SENSITIVE ?? $_ !! .lc };
+
+    my BagHash[Char] $t-chars = BagHash[Char].new:
+        $t.split( '', :skip-empty ).map: { CASE-SENSITIVE ?? $_ !! .lc };
+
+    $s-chars ⊂ $t-chars                                         # Strict subset
+        or error( 'The chars in $s are not a subset of the chars in $t' );
+
+    my Pair @odd-chars = $t-chars (-) $s-chars;                 # Set difference
+
+    if ASSERT                                                   # Sanity checks
+    {
+        @odd-chars.elems      >  0 or error( 'No odd character found' );
+        @odd-chars.elems      == 1 or error( 'More than 1 odd char found' );
+        @odd-chars[ 0 ].value == 1 or error( 'More than 1 odd character "' ~
+                                              @odd-chars[ 0 ].key ~ '" found' );
+    }
+
+    return @odd-chars[ 0 ].key;
+}
+
+#-------------------------------------------------------------------------------
+sub run-tests()
+#-------------------------------------------------------------------------------
+{
+    'Running the test suite'.put;
+
+    for test-data.lines -> Str $line
+    {
+        my Str ($test-name, $s, $t, $expected) = $line.split: / \| /;
+
+        for     $test-name, $s, $t, $expected
+        {
+            s/ ^ \s+   //;
+            s/   \s+ $ //;
+        }
+
+        my Char $odd-char = find-odd-character( $s, $t );
+
+        is $odd-char, $expected, $test-name;
+    }
+
+    done-testing;
+}
+
+#-------------------------------------------------------------------------------
+sub error( Str:D $message )
+#-------------------------------------------------------------------------------
+{
+    "ERROR: $message".put;
+
+    USAGE();
+
+    exit 0;
+}
+
+#-------------------------------------------------------------------------------
+sub USAGE()
+#-------------------------------------------------------------------------------
+{
+    my Str $usage = $*USAGE;
+
+    $usage ~~ s:g/ ($*PROGRAM-NAME) /raku $0/;
+
+    $usage.put;
+}
+
+#-------------------------------------------------------------------------------
+sub test-data( --> Str:D )
+#-------------------------------------------------------------------------------
+{
+    return q:to/END/;
+        Example 1|Perl  |Preel  |e
+        Example 2|Weekly|Weeakly|a
+        Example 3|Box   |Boxy   |y
+        Empty    |      |q      |q
+        END
+}
+
+################################################################################
diff --git a/challenge-255/athanasius/raku/ch-2.raku b/challenge-255/athanasius/raku/ch-2.raku
new file mode 100644
index 0000000000..b23aec3f52
--- /dev/null
+++ b/challenge-255/athanasius/raku/ch-2.raku
@@ -0,0 +1,323 @@
+use v6d;
+
+################################################################################
+=begin comment
+
+Perl Weekly Challenge 255
+=========================
+
+TASK #2
+-------
+*Most Frequent Word*
+
+Submitted by: Mohammad Sajid Anwar
+
+You are given a paragraph $p and a banned word $w.
+
+Write a script to return the most frequent word that is not banned.
+
+Example 1
+
+  Input: $p = "Joe hit a ball, the hit ball flew far after it was hit."
+         $w = "hit"
+  Output: "ball"
+
+  The banned word "hit" occurs 3 times.
+  The other word "ball" occurs 2 times.
+
+Example 2
+
+  Input: $p = "Perl and Raku belong to the same family. Perl is the most popular
+               language in the weekly challenge."
+         $w = "the"
+  Output: "Perl"
+
+  The banned word "the" occurs 3 times.
+  The other word "Perl" occurs 2 times.
+
+=end comment
+################################################################################
+
+#--------------------------------------#
+# Copyright © 2024 PerlMonk Athanasius #
+#--------------------------------------#
+
+#===============================================================================
+=begin comment
+
+Interface
+---------
+1. If no command-line arguments are given, the test suite is run. Otherwise:
+2. If VERBOSE is set to True, the output is followed by a short explanation like
+   those in the Examples.
+
+Assumptions
+-----------
+1. "Words" are composed of the letters A-Z, a-z, and ' (the apostrophe) ONLY.
+2. Different forms of the same root word are treated as different words:
+   "belong", "belongs", and "belonged" are 3 distinct words; "Perl" and "Perl's"
+   are 2 distinct words.
+3. Hyphenated words are not recognised as such: e.g., "cul-de-sac" is treated
+   as 3 separate words.
+4. Capitalisation.
+   If a word appears with its initial letter both capitalised and uncapitalised,
+   AND each occurrence of the capitalised version appears at the beginning of a
+   sentence, then (and only then) the different forms are considered to be a
+   single word, and that word's uncapitalised form is treated as canonical.
+5. If two or more words share the same maximum frequency of occurrence, the
+   alphabetically-first word is given as the output.
+
+=end comment
+#===============================================================================
+
+use Test;
+
+my UInt constant SCREEN-WIDTH = 80;
+my UInt constant TAB          = 14;
+my Bool constant VERBOSE      = True;
+
+#-------------------------------------------------------------------------------
+BEGIN
+#-------------------------------------------------------------------------------
+{
+    "\nChallenge 255, Task #2: Most Frequent Word (Raku)\n".put;
+}
+
+#===============================================================================
+multi sub MAIN
+(
+    Str:D $p where { m:i/   <[ a .. z ]>    / },          #= A paragraph of text
+    Str:D $w where { m:i/ ^ <[ a .. z ]>+ $ / }           #= A banned word
+)
+#===============================================================================
+{
+    qq[Input:  \$p = "%s"\n].printf: split-paragraph( $p );
+    qq[        \$w = "$w"].put;
+
+    my %result = find-most-freq-word( $p, $w );
+
+    if %result< words >.elems == 0
+    {
+        'Output: <none>'.put;
+    }
+    else
+    {
+        qq[Output: "%s"\n].printf: %result< words >[ 0 ];
+    }
+
+    explain( $w, %result ) if VERBOSE;
+}
+
+#===============================================================================
+multi sub MAIN()                                  # No input: run the test suite
+#===============================================================================
+{
+    run-tests();
+}
+
+#-------------------------------------------------------------------------------
+sub find-most-freq-word( Str:D $p, Str:D $w --> Hash:D )
+#-------------------------------------------------------------------------------
+{
+    # Remove (by replacing with spaces) all non-word characters, but leave 
+    # apostrophes and sentence-ending punctuation characters in place
+
+    my Str $p1 = S:g/ <-[ A .. Z a .. z ' . ? ! \s ]> / / given $p;
+
+    my UInt %first-words = find-first-words( $p1 );  # Words beginning sentences
+
+    $p1 ~~ s:g/ <[ . ? ! ]> / /;                   # Remove sentence terminators
+
+    # Record all words, together with their frequencies of occurrence
+
+    my UInt %dict;
+          ++%dict{ $_ } for $p1.split: / \s+ /, :skip-empty;
+
+    merge-words( %first-words, %dict );  # Merge capitalised/uncapitalised pairs
+
+    my %result;
+
+    remove-banned-word( $w, %dict, %result );
+
+    my UInt $max-freq   = %dict.elems ?? (%dict<>:v.max) !! 0;
+    %result< max-freq > = $max-freq;
+    my Str  @words      = grep { %dict{ $_ } == $max-freq }, %dict<>:k;
+    %result< words >    = Array[Str].new: @words.sort;
+
+    return %result;
+}
+
+#-------------------------------------------------------------------------------
+sub find-first-words( Str:D $p --> Hash:D[UInt:D] )
+#-------------------------------------------------------------------------------
+{
+    my @matches = $p ~~ m:g/ [ ^ \s* || <[ . ? ! ]> \s+ ] (<[ A .. Z ]> \S*) /;
+
+    my UInt %first-words;
+          ++%first-words{ $_ } for @matches.map: { $_[ 0 ] };
+
+    return %first-words;
+}
+
+#-------------------------------------------------------------------------------
+sub merge-words( Hash:D[UInt:D] $first-words, Hash:D[UInt:D] $dict )
+#-------------------------------------------------------------------------------
+{
+    for $first-words<>:k -> Str $word
+    {
+        my Str $lc-word = $word.subst( / ^ (.) /, { $0.lc } );
+
+        if $dict{ $lc-word }:exists && $first-words{ $word } == $dict{ $word }
+        {
+            $dict{ $lc-word } += $dict{ $word };
+            $dict{ $word }:delete;
+        }
+    }
+}
+
+#-------------------------------------------------------------------------------
+sub remove-banned-word( Str:D $w, Hash:D[UInt:D] $dict, %result )
+#-------------------------------------------------------------------------------
+{
+    %result< banned-word > = 0;
+
+    if $dict{ $w }:exists                  # Remove the banned word
+    {
+        %result< banned-word > = $dict{ $w };
+        $dict{ $w }:delete;
+    }
+    else
+    {
+        my Str $lc-bw = $w.subst( / ^ (.) /, { $0.lc } );
+
+        if $dict{ $lc-bw }:exists
+        {
+            %result< banned-word > = $dict{ $lc-bw };
+            $dict{ $lc-bw }:delete;        # Remove the banned word's lc version
+        }
+    }
+}
+
+#-------------------------------------------------------------------------------
+sub explain( Str:D $w, %result )
+#-------------------------------------------------------------------------------
+{
+    my UInt $w-freq = %result< banned-word >;
+
+    qq[\nThe banned word "%s" occurs %d time%s\n].printf:
+        $w, $w-freq, $w-freq == 1 ?? '' !! 's';
+
+    my UInt       $freq  = %result< max-freq >;
+    my Array[Str] $words = %result< words >;
+    my UInt       $count = $words.elems;
+
+    if $count == 0
+    {
+        'There are no other words'.put;
+    }
+    else
+    {
+        qq[The other  word%s %s occur%s %d time%s%s\n].printf:
+            ($count == 1 ?? ''  !! 's'),
+             $words.map( { qq["$_"] } ).join( ', ' ),
+            ($count == 1 ?? 's' !! ''),
+             $freq,
+            ($freq  == 1 ?? ''  !! 's'),
+            ($count == 1 ?? ''  !! ' each');
+    }
+}
+
+#-------------------------------------------------------------------------------
+sub split-paragraph( Str:D $p is copy --> Str:D )
+#-------------------------------------------------------------------------------
+{
+    # Insert "\n<tab>" at every SCREEN-WIDTH position in $p
+
+    my Str $insert = "\n" ~ ' ' x TAB;
+
+    loop (my UInt $i = SCREEN-WIDTH - TAB; $i < $p.chars; $i += SCREEN-WIDTH)
+    {
+        --$i while $p.substr( $i, 1 ) ~~ / \S /;
+        
+        $p.substr-rw( $i, 1 ) = $insert;
+    }
+
+    return $p;
+}
+
+#-------------------------------------------------------------------------------
+sub run-tests()
+#-------------------------------------------------------------------------------
+{
+    'Running the test suite'.put;
+
+    my Str $test-data = S:g/ \+ \n \s* // with test-data();
+
+    for $test-data.lines -> Str $line
+    {
+        my Str ($test-name, $p, $w, $expected) = $line.split: / \| /;
+
+        for     $test-name, $p, $w, $expected
+        {
+            s/ ^ \s+   //;
+            s/   \s+ $ //;
+        }
+
+        my %result = find-most-freq-word( $p, $w );
+
+        is %result< words >[ 0 ], $expected, $test-name;
+    }
+
+    done-testing;
+}
+
+#-------------------------------------------------------------------------------
+sub USAGE()
+#-------------------------------------------------------------------------------
+{
+    my Str $usage = $*USAGE;
+
+    $usage ~~ s:g/ ($*PROGRAM-NAME) /raku $0/;
+
+    $usage.put;
+}
+
+#-------------------------------------------------------------------------------
+sub test-data( --> Str:D )
+#-------------------------------------------------------------------------------
+{
+    return q:to/END/;
+        Example 1 |Joe hit a ball, the hit ball flew far after it was hit.|hit|+
+                   ball
+        Example 2 |Perl and Raku belong to the same family. Perl is the most   +
+                   popular language in the weekly challenge.|the|Perl
+        Gettysburg|But, in a larger sense, we can not dedicate—we can not      +
+                   consecrate—we can not hallow-this ground. The brave men,    +
+                   living and dead, who struggled here, have consecrated it,   +
+                   far above our poor power to add or detract. The w
author	PerlMonk-Athanasius <PerlMonk.Athanasius@gmail.com>	2024-02-11 21:50:41 +1000
committer	PerlMonk-Athanasius <PerlMonk.Athanasius@gmail.com>	2024-02-11 21:50:41 +1000
commit	2c9ae4500938f7a075becd7cf38cb8703d473667 (patch)
tree	fc9c0546813fa55e1ecc1f02dc5ed5862f671d89
parent	34fa48cf4b02c81fd6ed544498bde068aca981a2 (diff)
download	perlweeklychallenge-club-2c9ae4500938f7a075becd7cf38cb8703d473667.tar.gz perlweeklychallenge-club-2c9ae4500938f7a075becd7cf38cb8703d473667.tar.bz2 perlweeklychallenge-club-2c9ae4500938f7a075becd7cf38cb8703d473667.zip