From 2a012dfc625cb64143284c474d5ebf39ddfa3abd Mon Sep 17 00:00:00 2001 From: Matthias Muth Date: Mon, 5 Aug 2024 00:01:41 +0200 Subject: Challenge 280 Task 1 and 2 solutions in Perl by Matthias Muth --- challenge-280/matthias-muth/README.md | 225 +++++++++++++++++-------------- challenge-280/matthias-muth/blog.txt | 1 + challenge-280/matthias-muth/perl/ch-1.pl | 38 ++++++ challenge-280/matthias-muth/perl/ch-2.pl | 69 ++++++++++ 4 files changed, 233 insertions(+), 100 deletions(-) create mode 100644 challenge-280/matthias-muth/blog.txt create mode 100755 challenge-280/matthias-muth/perl/ch-1.pl create mode 100755 challenge-280/matthias-muth/perl/ch-2.pl diff --git a/challenge-280/matthias-muth/README.md b/challenge-280/matthias-muth/README.md index 5df0c1440c..89dfb95dcb 100644 --- a/challenge-280/matthias-muth/README.md +++ b/challenge-280/matthias-muth/README.md @@ -1,148 +1,173 @@ -# More Weight for Examples! +# There Is More Than One Way To Regex -**Challenge 279 solutions in Perl by Matthias Muth** +**Challenge 280 solutions in Perl by Matthias Muth** -## Task 1: Sort Letters +## Task 1: Twice Appearance -> You are given two arrays, @letters and @weights.
-> Write a script to sort the given array @letters based on the @weights.
+> You are given a string, \$str, containing lowercase English letters only.
+> Write a script to print the first letter that appears twice.
>
> Example 1
-> Input: @letters = ('R', 'E', 'P', 'L')
-> @weights = (3, 2, 1, 4)
-> Output: PERL
+> Input: \$str = "acbddbca"
+> Output: "d"
>
> Example 2
-> Input: @letters = ('A', 'U', 'R', 'K')
-> @weights = (2, 4, 1, 3)
-> Output: RAKU
+> Input: \$str = "abccd"
+> Output: "c"
>
> Example 3
-> Input: @letters = ('O', 'H', 'Y', 'N', 'P', 'T')
-> @weights = (5, 4, 2, 6, 1, 3)
-> Output: PYTHON
+> Input: \$str = "abcdabbb"
+> Output: "a"
-Probably the most efficient way to solve this task is *not* to use sort, -but to directly assign letters to their positions in the result string. -This works for all the examples, -because in these examples, the `@weights` are very regular: +This is my no-frills-easy-reading solution: -- they can be mapped one-by-one to result positions, -- no weight appears twice, -- the weights cover all positions completely. +```perl +sub twice_appearance( $str ) { + my %seen; + for ( split "", $str ) { + return $_ + if $seen{$_}; + $seen{$_} = 1; + } + return (); +} +``` -Actually, at least for the examples, `@weights` should be called `@positions` -(minding that they are one-based, not zero-based). -So here's the simple version, to cover the examples.
-We need to initialize the result string to have the correct length, -because we will assign letters to positions in random order. -```perl -use v5.36; +I tried to develop a regex-based solution, but I failed!
+I started with this: -sub sort_letters_1( $letters, $weights ) { - my $result = " " x $letters->@*; - substr $result, $weights->[$_] - 1, 1, $letters->[$_] - for 0..$letters->$#*; - return $result; +```perl +sub twice_appearance_WRONG( $str ) { + return $str =~ /(.).*?\g1/ ? $1 : (); } ``` +But this doesn't work, because it finds 'the first letter that is repeated later on', not 'the first letter that is a duplicate of a letter that occurred before'. In Example 1 ("acbddbca") it finds 'a', because it tries 'a' first, but it should find 'd', because that is the first 'duplicating' letter (the first 'second letter', if you will). + +Then I tried a solution that captures any 'second' letter, and then checks with a lookbehind that that letter appears before: +```perlin the string +sub twice_appearance_LOOK_BEHIND_NO_GO( $str ) { + return $str =~ /(.)(? +But anyway, it aborts with an error +'Lookbehind longer than 255 not implemented ...'. -We should consider cases where weights do *not* map one-by-one to positions.
-Let's put some 'heavier weights' in, and consider this additional example: +I gave up.
+If anyone has a regex-based solution for this challenge task, +please post it in +[The Weekly Challenge - Perl & Raku group on Facebook](https://www.facebook.com/groups/theweeklychallengegroup/) or send me an [email](mailto:matthias.muth@gmx.de)! -> Example 4
-> Input: @letters = ('R', 'E', 'P', 'L')
-> @weights = (3333, 2222, 1111, 3333)
-> Output: PERL
- Here: -* the lowest weight is not 1, -* weights are not consecutive (they contain gaps), -* same weight values are used more than once for different letters
(behavior is not defined in this case, but we should do something useful), +## Task 2: Count Asterisks -* the weight values are not necessarily small integers
(which can cause memory problems when we incautiously map weight values to string positions or array indexes). +> You are given a string, \$str, where every two consecutive vertical bars are grouped into a pair.
+> Write a script to return the number of asterisks, \*, excluding any between each pair of vertical bars.
+>
+> Example 1
+> Input: \$str = "p|\*e\*rl|w\*\*e|\*ekly|"
+> Ouput: 2
+> The characters we are looking here are "p" and "w\*\*e".
+>
+> Example 2
+> Input: \$str = "perl"
+> Ouput: 0
+>
+> Example 3
+> Input: \$str = "th|ewe|e\*\*|k|l\*\*\*ych|alleng|e"
+> Ouput: 5
+> The characters we are looking here are "th", "e\*\*", "l\*\*\*ych" and "e".
-So what do we do? +##### Single regex version -My ideas are these: +I started with a single regex solution, which is, sorry for that, not very easy-to-read: -* Use the weight values as hash keys instead of string positions or array indexes to store where any letter is going to be put.
- This addresses both the 'not consecutive' and the 'no small integers' issues. - We can have a weight of 6548632 without running out of bounds as we would with a string or an array. -* Store a *list* of letters with each 'weight' hash key.
This deals with the case of multiple letters having the same weight value. - Every hash entry will contain an array-ref to a list of all letters having that weight. +```perl +sub count_asterisks_single_regex( $str ) { + return scalar( () = $str =~ /\G(?:\|[^|]*\||[^*])*+\*/g ); +} +``` -I'm also happy to highlight the `for_list` Perl feature -that was added in Perl 5.36 for iterating over multiple values at a time. -Especially its use together with the `mesh` function from `List::Util` -makes some things simple and nice.
-Where normally I would have to iterate over `0..$#array`, -I can avoid this here, as well as the use of `$_` in the loop.
-Certainly less 'perlish', but easy for the eyes! +What??? -To get the result string from the hash, we sort the hash keys (numerically!) -and concatenate all letters from their entries in order.
-Like this: +Ok, here is what it does, and what it uses.
Let's first add the `x` modifier to better see the pieces: ```perl -use v5.36; -no warnings 'experimental::for_list'; -use List::Util qw( mesh ); - -sub sort_letters( $letters, $weights ) { - my %buckets; - for my ( $letter, $weight ) ( mesh $letters, $weights ) { - push $buckets{$weight}->@*, $letter; - } - return join "", map $buckets{$_}->@*, sort { $a <=> $b } keys %buckets; -} + return scalar( () = $str =~ / \G (?: \| [^|]* \| | [^*] )*+ \* /xg ); ``` +Aha. So we loop over the string with the `g` modifier to find all occurrences of `\*` (at the end of the regex). And we use `\G` to always continue where we left off. +We skip over everything that we don't want: -## Task 2: Split String +- pairs of vertical bars and anything that is not a vertical bar in between:
`\| [^|]* \|` -> You are given a string, \$str.
-> Write a script to split the given string into two containing exactly same number of vowels and return true if you can otherwise false.
->
-> Example 1
-> Input: \$str = "perl"
-> Ouput: false
->
-> Example 2
-> Input: \$str = "book"
-> Ouput: true
-> Two possible strings "bo" and "ok" containing exactly one vowel each.
->
-> Example 3
-> Input: \$str = "good morning"
-> Ouput: true
-> Two possible strings "good " and "morning" containing two vowels each or "good m" and "orning" containing two vowels each.
+- anything that is not an asterisk:
`[^*]` + +We want to skip as many of both of these as we can, +so we group them together as alternatives, and add a `*` quantifier. + +Actually we use a `*+` ('possessive') quantifier +that keeps the regex engine from backtracking +once it finds a pair of vertical bars. +This inhibits retrying a vertical bar using the `[^*]` part +to find a `*` earlier (which then would also match *within* vertical bar pairs). + +What else? -Actually the task assignment 'split the given string into two containing exactly same number of vowels' can be completely ignored. The result is never used. It's like a piece of 'dead code' that never gets executed and that can be removed. +The regex delivers all matches, but we only want a count of the matches.
We get the count using a not so well-known property of the list assignment operator: It returns the number of elements of the *right hand side* of the assignment in scalar context. And it does so no matter what the left hand side is. So this: -The only information we need to return is whether *we can* split the string into two such pieces! +```perl +scalar( () = ( ) ) +``` + +has become a programming idiom in Perl to return the number of elements in a list *without assigning the list to an array variable first*.
Good for a one-liner!
+(See also [this useful stackoverflow article](https://stackoverflow.com/questions/2225460/how-do-i-find-the-number-of-values-in-a-perl-list).) -We can do so if and only if we can split up the string's *vowels* into two equal pieces.
-Which means we need an even number of vowels. +##### Two regex version: more easy-to-read -Ok, then let's count the vowels! -And the rest is easy... +My second solution uses two regexes: + +- one to remove all vertical bar pairs, +- and another one to find all asterisks. + +I guess it's much easier to read, especially with some parentheses added to help with understanding the operator grouping: ```perl -use v5.36; +sub count_asterisks_two_regexes( $str ) { + return scalar( () = ( $str =~ s/ \| [^|]* \| //xgr ) =~ / \* /xg ); +} +``` + +##### One regex and `tr`: my favorite (and shortest!) solution + +What I described so far helped me to arrive at my favorite solution.
+It is actually the shortest one, and I think it's the most readable. + +It uses + +- one regex to remove vertical bar pairs (as above), +- the `tr` operator to count the asterisks, by replacing them by - wait a minute - *asterisks*. + +The `tr` operator returns the number of characters that it replaced, so what more could we want? -sub split_string( $str ) { - my @vowels = $str =~ /[aeiou]/ig; - return scalar @vowels % 2 == 0; +Here we go: + +```perl +sub count_asterisks( $str ) { + return ( $str =~ s/ \| [^|]* \| //xgr ) =~ tr/*/*/; } ``` +This was an exercise in evolutionary programming... :-) + + + #### **Thank you for the challenge!** + diff --git a/challenge-280/matthias-muth/blog.txt b/challenge-280/matthias-muth/blog.txt new file mode 100644 index 0000000000..60ba8c61d5 --- /dev/null +++ b/challenge-280/matthias-muth/blog.txt @@ -0,0 +1 @@ +https://github.com/MatthiasMuth/perlweeklychallenge-club/tree/muthm-280/challenge-280/matthias-muth#readme diff --git a/challenge-280/matthias-muth/perl/ch-1.pl b/challenge-280/matthias-muth/perl/ch-1.pl new file mode 100755 index 0000000000..e19c337656 --- /dev/null +++ b/challenge-280/matthias-muth/perl/ch-1.pl @@ -0,0 +1,38 @@ +#!/usr/bin/env perl +# +# The Weekly Challenge - Perl & Raku +# (https://theweeklychallenge.org) +# +# Challenge 280 Task 1: Twice Appearance +# +# Perl solution by Matthias Muth. +# + +use v5.36; + +sub twice_appearance_WRONG( $str ) { + return $str =~ /(.).*?\g1/ ? $1 : ""; +} + +sub twice_appearance( $str ) { + my %seen; + for ( split "", $str ) { + return $_ + if $seen{$_}; + $seen{$_} = 1; + } + return (); +} + +sub twice_appearance_WRONG( $str ) { + return $str =~ /(.)(?( "p|*e*rl|w**e|*ekly|" ), 2, + 'Example 1: count_asterisks( "p|*e*rl|w**e|*ekly|" ) == 2'; + is $sub->( "perl" ), 0, + 'Example 2: count_asterisks( "perl" ) == 0'; + is $sub->( "th|ewe|e**|k|l***ych|alleng|e" ), 5, + 'Example 3: count_asterisks( "th|ewe|e**|k|l***ych|alleng|e" ) == 5'; +} +done_testing; -- cgit