aboutsummaryrefslogtreecommitdiff
path: root/challenge-024
diff options
context:
space:
mode:
authorRandy Lauen <randy.lauen@gmail.com>2019-09-03 19:57:49 -0500
committerRandy Lauen <randy.lauen@gmail.com>2019-09-03 19:57:49 -0500
commit9673cb4f1df96f0fbf5787a6f5f74d3178e21a9b (patch)
tree4f7908561dd3fd04156df0bbb609ccc99b9f731c /challenge-024
parent480648e55ce8812e067bf67ad0b1e2da6bac5115 (diff)
downloadperlweeklychallenge-club-9673cb4f1df96f0fbf5787a6f5f74d3178e21a9b.tar.gz
perlweeklychallenge-club-9673cb4f1df96f0fbf5787a6f5f74d3178e21a9b.tar.bz2
perlweeklychallenge-club-9673cb4f1df96f0fbf5787a6f5f74d3178e21a9b.zip
perl5 solution for task 2
Diffstat (limited to 'challenge-024')
-rw-r--r--challenge-024/randy-lauen/perl5/ch-2.pl108
1 files changed, 108 insertions, 0 deletions
diff --git a/challenge-024/randy-lauen/perl5/ch-2.pl b/challenge-024/randy-lauen/perl5/ch-2.pl
new file mode 100644
index 0000000000..8f65832fd5
--- /dev/null
+++ b/challenge-024/randy-lauen/perl5/ch-2.pl
@@ -0,0 +1,108 @@
+#!/usr/bin/env perl
+
+=head2 SYNOPSIS
+
+Task:
+ Create a script to implement full text search functionality using Inverted Index.
+
+Notes:
+ This script has a hardcoded list of documents. Run the script and pass a word as
+ the only argument to see which documents contain that word.
+
+Example Usage:
+ $ perl ch-2.pl minds
+ Found 2 document(s) for 'minds'
+ * "Pride and Prejudice": 1 occurence(s)
+ * "War of the Worlds": 1 occurence(s)
+
+ $ perl ch-2.pl universe
+ Found 0 document(s) for 'universe'
+
+=cut
+
+use v5.26;
+use strict;
+use warnings;
+
+use List::MoreUtils qw( frequency );
+
+my %index = build_inverse_index( get_documents() );
+my $keyword = lc $ARGV[0] // '';
+die "Must provide a keyword as an argument\n" unless length($keyword);
+
+my @matches = sort { $b->{freq} <=> $a->{freq} || $a->{doc} cmp $b->{doc} } $index{ $keyword }->@*;
+say "Found " . scalar(@matches) . " document(s) for '$keyword'";
+if ( @matches ) {
+ say qq[* "$_->{doc}": $_->{freq} occurence(s)] for @matches;
+}
+
+exit 0;
+
+
+sub build_inverse_index {
+ my %documents = @_;
+
+ my %index;
+
+ while ( my ($name, $text) = each %documents ) {
+ my @words = map { lc $_ } $text =~ /\w+/g;
+ my %freq = frequency @words;
+ foreach my $word ( keys %freq ) {
+ push $index{ $word }->@*, { doc => $name, freq => $freq{$word} };
+ }
+ }
+
+ return %index;
+}
+
+
+sub get_documents {
+ return (
+ 'Pride and Prejudice' => <<~'TXT',
+ It is a truth universally acknowledged, that
+ a single man in possession of a good fortune
+ must be in want of a wife. However little
+ known the feelings or views of such a man may
+ be on his first entering a neighbourhood,
+ this truth is so well fixed in the minds of
+ the surrounding families, that he is
+ considered the rightful property of some one
+ or other of their daughters.
+ TXT
+ 'War of the Worlds' => <<~'TXT',
+ No one would have believed, in the last years
+ of the nineteenth century, that human affairs
+ were being watched from the timeless worlds
+ of space. No one could have dreamed that we
+ were being scrutinised as someone with a
+ microscope studies creatures that swarm and
+ multiply in a drop of water. And yet, across
+ the gulf of space, minds immeasurably
+ superior to ours regarded this Earth with
+ envious eyes, and slowly, and surely, they
+ drew their plans against us...
+ TXT
+ 'Richard III' => <<~'TXT',
+ Now is the winter of our discontent made
+ glorious summer by this sun of York; and
+ all the clouds that lour'd upon our
+ house in the deep bosom of the ocean
+ buried. Now are our brows bound with
+ victorious wreaths; our bruised arms
+ hung up for monuments; our stern
+ alarums changed to merry meetings, our
+ dreadful marches to delightful
+ measures.
+ TXT
+#'(Unconfuse VIM syntax highlighting)
+ "Hitchhiker's Guide to the Galaxy" => <<~'TXT',
+ Far back in the mists of ancient
+ time, in the great and glorious days
+ of the former Galactic Empire, life
+ was wild, rich and largely tax free.
+ TXT
+ );
+}
+
+
+