diff options
| author | Doomtrain14 <yet.ebreo@gmail.com> | 2019-09-04 21:32:23 +0800 |
|---|---|---|
| committer | Doomtrain14 <yet.ebreo@gmail.com> | 2019-09-04 21:32:23 +0800 |
| commit | 60e3bdb31d53953b284fc9a7f9f288e75ce9dd39 (patch) | |
| tree | f6c852d5cb30a07eea342f7833032b6258f1b9b4 /challenge-024 | |
| parent | ae15c22e798605975af7feb763703d2d77849022 (diff) | |
| parent | 480648e55ce8812e067bf67ad0b1e2da6bac5115 (diff) | |
| download | perlweeklychallenge-club-60e3bdb31d53953b284fc9a7f9f288e75ce9dd39.tar.gz perlweeklychallenge-club-60e3bdb31d53953b284fc9a7f9f288e75ce9dd39.tar.bz2 perlweeklychallenge-club-60e3bdb31d53953b284fc9a7f9f288e75ce9dd39.zip | |
Merge branch 'master' of https://github.com/manwar/perlweeklychallenge-club
Diffstat (limited to 'challenge-024')
| -rwxr-xr-x | challenge-024/duane-powell/perl5/ch-1.sh | 4 | ||||
| -rwxr-xr-x | challenge-024/duane-powell/perl5/ch-2.pl | 213 | ||||
| -rwxr-xr-x | challenge-024/joelle-maslak/perl5/ch-2.pl | 52 | ||||
| -rwxr-xr-x | challenge-024/joelle-maslak/perl6/ch-2.p6 | 36 | ||||
| -rw-r--r-- | challenge-024/laurent-rosenfeld/blog.txt | 1 | ||||
| -rw-r--r-- | challenge-024/laurent-rosenfeld/perl5/ch-1.sh | 1 | ||||
| -rw-r--r-- | challenge-024/laurent-rosenfeld/perl5/ch-2.pl | 16 | ||||
| -rw-r--r-- | challenge-024/laurent-rosenfeld/perl6/ch-1.sh | 1 | ||||
| -rw-r--r-- | challenge-024/laurent-rosenfeld/perl6/ch-2.p6 | 10 | ||||
| -rw-r--r-- | challenge-024/mike-accardo/README | 1 | ||||
| -rw-r--r-- | challenge-024/roger-bell-west/blog.txt | 1 |
11 files changed, 336 insertions, 0 deletions
diff --git a/challenge-024/duane-powell/perl5/ch-1.sh b/challenge-024/duane-powell/perl5/ch-1.sh new file mode 100755 index 0000000000..cd8ad66f4a --- /dev/null +++ b/challenge-024/duane-powell/perl5/ch-1.sh @@ -0,0 +1,4 @@ +#!/bin/bash +#generate 10 char random passwords +perl -le 'print map { (0..9,A..Z,a..z)[rand 62] } 1..10' + diff --git a/challenge-024/duane-powell/perl5/ch-2.pl b/challenge-024/duane-powell/perl5/ch-2.pl new file mode 100755 index 0000000000..4d304e566d --- /dev/null +++ b/challenge-024/duane-powell/perl5/ch-2.pl @@ -0,0 +1,213 @@ +#!/usr/bin/perl +use strict; + +# Create a script to implement full text search functionality using Inverted Index. +# https://en.wikipedia.org/wiki/Search_engine_indexing#Inverted_indices + +usage() unless (@ARGV); +my ($word_or_ext,@dir) = @ARGV; +my $word = $word_or_ext if (@ARGV == 1) ? $word_or_ext : ""; +my $ext = $word_or_ext if (@ARGV > 1) ? $word_or_ext : ""; + +my $i = InvertedIndex->new(); +if ($ext) { + $i->build($ext,@dir); + $i->freeze(); +} elsif ($word) { + $i->search($word); +} else { + usage(); +} +exit; + +sub usage { + my $usage = <<USAGE; +usage: +$0 word (return list of files containing this word) +$0 extension list-of-dirs (find new files ending in extension and index their words) +USAGE + print $usage; + exit; +} + +package InvertedIndex; +use Storable; +use File::Find::Rule; + +sub new { + my $class = shift; + my $self = { + # use Storable to remember past indexing. + storable_index => './.inverted_index.storable', + + # unique file id counter + file_id => 0, + + # hash ref of files + files => { + # hash ref of file info + # $file => { # full path and file name + # id => unique id number, + # mtime => file's mtime, + # } + }, + + # hash ref of words + words=> { + # hash ref of file_ids with this word + # $word => { # lower case of actual word + # file_id => count, + # } + }, + + # hash ref to map file_id back to file name + ids => { + # $id => $file + }, + }; + bless $self, $class; + + # Reload previous self from disk + return $self->thaw(); +} + +sub thaw { + my $self = shift; + # Retrieve the Storable object from disk and return it. + if (-e $self->{storable_index}) { + my $i = retrieve($self->{storable_index}); + print "No files have been indexed yet.\n" unless ($i->{file_id}); + return $i; + } else { + # First call ever, so create the Storable. + print "No files have been indexed yet.\n"; + store $self, $self->{storable_index}; + exit; + } +} + +sub freeze { + my $self = shift; + store $self, $self->{storable_index}; +} + +sub build { + my ($self,$ext,@dir) = @_; + + # Get list of files to index + my @f = File::Find::Rule->file() + ->name( "*$ext" ) + ->in(@dir); + + my ($file_count,$word_count) = (0,0); + foreach my $f (@f) { + if (open(my $FH, '<', $f)) { + # Have we indexed this file before? + if (exists $self->{files}{$f}) { + # Yes but has the file been modified? + if ($self->{files}{$f}{mtime} >= (stat $f)[9]) { + close($FH); + next; + } + # The file has been modified, delete its previous word counts + foreach my $w (keys %{$self->{words}}) { + delete $self->{words}{$w}{ $self->{files}{$f}{id} }; + } + } else { + # No, it's a new file, generate a unique id + $self->{files}{$f}{id} = $self->{file_id}++; + $self->{ids}{ $self->{files}{$f}{id} } = $f; + } + + # Note the file's mtime + $self->{files}{$f}{mtime} = (stat $f)[9]; + $file_count++; + + # Parse and count words in $f + while (<$FH>) { + foreach (split(/\s+/,$_)) { + $word_count++; + my $w = lc($_); + $self->{words}{$w}{ $self->{files}{$f}{id} }++; + } + } + close($FH); + } else { + warn "Can not open $f for indexing\n"; + } + } + if ($file_count) { + print "Indexed $word_count words in $file_count new files.\n"; + } else { + print "No new files found.\n"; + } +} + +sub search { + my $self = shift; + my $word = lc(shift); + + # Build output hash and sort by highest matching word count + if (exists $self->{words}{$word}) { + my %out; + foreach my $file_id (keys %{ $self->{words}{$word} }) { + $out{$self->{ids}{$file_id}} = $self->{words}{$word}{$file_id}; + } + foreach (sort { $out{$b} <=> $out{$a} } keys %out) { + print "$out{$_} $_\n"; + } + } else { + print "$word not found in any of the $self->{file_id} indexed files.\n"; + } +} + +1; + +__END__ + +rm -f .inverted_index.storable <=============================================== delete Storable + +./ch-2.pl <==================================================================== show usage. +usage: +./ch-2.pl word (return list of files containing this word) +./ch-2.pl extension list-of-dirs (find new files ending in extension and index their words) + +./ch-2.pl my <================================================================= search for 'my', oops we need to index before searching. +No files have been indexed yet. + +./ch-2.pl pl /home/duane/Documents/dev/perl/weekly_challenge <================= index .pl files in weekly_challenge +Indexed 6176 words in 21 new files. + +./ch-2.pl my <=================================================+=============== return the count and file list with word 'my' +29 /home/duane/Documents/dev/perl/weekly_challenge/024/ch-2.pl +19 /home/duane/Documents/dev/perl/weekly_challenge/022/ch-2.pl +16 /home/duane/Documents/dev/perl/weekly_challenge/018/ch-2.pl +13 /home/duane/Documents/dev/perl/weekly_challenge/018/ch-1.pl +11 /home/duane/Documents/dev/perl/weekly_challenge/015/ch-2.pl +9 /home/duane/Documents/dev/perl/weekly_challenge/021/ch-1.pl +7 /home/duane/Documents/dev/perl/weekly_challenge/023/ch-2.pl +7 /home/duane/Documents/dev/perl/weekly_challenge/016/ch-1.pl +7 /home/duane/Documents/dev/perl/weekly_challenge/015/ch-1.pl +7 /home/duane/Documents/dev/perl/weekly_challenge/019/ch-1.pl +6 /home/duane/Documents/dev/perl/weekly_challenge/019/ch-2.pl +6 /home/duane/Documents/dev/perl/weekly_challenge/020/ch-2.pl +6 /home/duane/Documents/dev/perl/weekly_challenge/017/ch-2.pl +6 /home/duane/Documents/dev/perl/weekly_challenge/014/ch-2.pl +5 /home/duane/Documents/dev/perl/weekly_challenge/014/ch-1.pl +4 /home/duane/Documents/dev/perl/weekly_challenge/017/ch-1.pl +4 /home/duane/Documents/dev/perl/weekly_challenge/020/ch-1.pl +4 /home/duane/Documents/dev/perl/weekly_challenge/021/ch-2.pl +4 /home/duane/Documents/dev/perl/weekly_challenge/023/ch-1.pl +4 /home/duane/Documents/dev/perl/weekly_challenge/022/ch-1.pl + +./ch-2.pl pl /home/duane/Documents/dev/perl/weekly_challenge_fork/ /root/bin <= index files in multiple dirs. +Indexed 391811 words in 938 new files. + +./ch-2.pl pl /root/bin <====================================================== touch file to test reindexing. +No new files found. +touch /root/bin/pdu_control.pl +./ch-2.pl pl /root/bin +Indexed 524 words in 1 new files. + +./ch-2.pl this-word-does-not-exist +this-word-does-not-exist not found in any of the 959 indexed files <=========== reports total files searched on no match. diff --git a/challenge-024/joelle-maslak/perl5/ch-2.pl b/challenge-024/joelle-maslak/perl5/ch-2.pl new file mode 100755 index 0000000000..93f8a076d6 --- /dev/null +++ b/challenge-024/joelle-maslak/perl5/ch-2.pl @@ -0,0 +1,52 @@ +#!/usr/bin/env perl + +use v5.16; # for fc +use strict; +use warnings; + +use File::ByLine; +use List::Util qw(uniq); + +# Using file by line means that a very long single file might be able to +# be processed quicker (assuming IO can keep up). + +MAIN: { + my %docs; + + my $fbl = File::ByLine->new(); + $fbl->processes(10); + + for my $fn (uniq @ARGV) { + # Read the file, seperate out the words (use File::ByLine to + # allow 10 threads to simultaniously read the file) + # + # F::BL->map applies the map to each line. + my %found; + $docs{$fn} = [ + uniq $fbl->map( + sub { + grep { $_ ne '' and exists $found{$_} ? undef : ($found{$_} = 1) } + map { fc } # Case insensitive words + split /\W+/, $_ + }, + $fn, + ) + ]; + } + + # Build the index + my %index; + for my $fn (sort keys %docs) { + for my $word ($docs{$fn}->@*) { + $index{$word} = [] unless exists $index{$word}; + push $index{$word}->@*, $fn; + } + } + + # Output the index + for my $word (sort keys %index) { + say "$word: " . join(" ", $index{$word}->@*); + } +} + + diff --git a/challenge-024/joelle-maslak/perl6/ch-2.p6 b/challenge-024/joelle-maslak/perl6/ch-2.p6 new file mode 100755 index 0000000000..b8c687ee3a --- /dev/null +++ b/challenge-024/joelle-maslak/perl6/ch-2.p6 @@ -0,0 +1,36 @@ +#!/usr/bin/env perl6 +use v6; + +# This is the first one where I wrote a P5 solution before the P6 solution. +# +# It's also the only time the P6 output differs from the P5 output, +# although both meet the requirement of the challenge. +# +# Difference: Perl 6 IO.words splits differently than Perl 5 split /\W+/ + +sub MAIN(+@files) { + my %docs; + + # Read the files, seperating out the words. Sadly no parallelism + # here, I don't have a Perl6 module for doing simultanious file + # reads of large files. :( + for @files -> $fn { + %docs{$fn} = $fn.IO.words.unique; + } + + # Build the index + my %index; + for %docs.keys.sort -> $fn { + for @(%docs{$fn}) -> $word { + %index{$word} = [] unless %index{$word}:exists; + %index{$word}.push: $fn; + } + } + + # Output the index + for %index.keys.sort -> $word { + say "$word: { %index{$word}.join(" ") }"; + } +} + + diff --git a/challenge-024/laurent-rosenfeld/blog.txt b/challenge-024/laurent-rosenfeld/blog.txt new file mode 100644 index 0000000000..d48306ce08 --- /dev/null +++ b/challenge-024/laurent-rosenfeld/blog.txt @@ -0,0 +1 @@ +http://blogs.perl.org/users/laurent_r/2019/09/perl-weekly-challenge-24-smallest-script-and-inverted-index.html diff --git a/challenge-024/laurent-rosenfeld/perl5/ch-1.sh b/challenge-024/laurent-rosenfeld/perl5/ch-1.sh new file mode 100644 index 0000000000..69bac43ae2 --- /dev/null +++ b/challenge-024/laurent-rosenfeld/perl5/ch-1.sh @@ -0,0 +1 @@ +perl -e '' diff --git a/challenge-024/laurent-rosenfeld/perl5/ch-2.pl b/challenge-024/laurent-rosenfeld/perl5/ch-2.pl new file mode 100644 index 0000000000..8d20b84ba5 --- /dev/null +++ b/challenge-024/laurent-rosenfeld/perl5/ch-2.pl @@ -0,0 +1,16 @@ +#!/usr/bin/perl +use strict; +use warnings; +use Data::Dumper; + +my @files = glob "./*.pl"; +my %dict; +for my $file (@files) { + open my $IN, "<", $file or die "Cannot open $file $!"; + while (my $line = <$IN>) { + my @words = grep { /^\w{3,}$/ } split /\s+/, $line;; + $dict{$_}{$file} = 1 for @words; + } + close $IN; +} +print Dumper \%dict; diff --git a/challenge-024/laurent-rosenfeld/perl6/ch-1.sh b/challenge-024/laurent-rosenfeld/perl6/ch-1.sh new file mode 100644 index 0000000000..a41057bbec --- /dev/null +++ b/challenge-024/laurent-rosenfeld/perl6/ch-1.sh @@ -0,0 +1 @@ +perl6 -e '' diff --git a/challenge-024/laurent-rosenfeld/perl6/ch-2.p6 b/challenge-024/laurent-rosenfeld/perl6/ch-2.p6 new file mode 100644 index 0000000000..1293ba899d --- /dev/null +++ b/challenge-024/laurent-rosenfeld/perl6/ch-2.p6 @@ -0,0 +1,10 @@ +use v6; + +my @files = grep { /\.p6$/ or /\.pl6$/ }, dir('.'); +my %dict; +for @files -> $file { + for $file.IO.lines.words.grep({/^ \w ** 3..* $/}) -> $word { + %dict{$word}{$file} = True; + } +} +.say for %dict{'given'}.keys; diff --git a/challenge-024/mike-accardo/README b/challenge-024/mike-accardo/README new file mode 100644 index 0000000000..33606072ab --- /dev/null +++ b/challenge-024/mike-accardo/README @@ -0,0 +1 @@ +Solutions by Mike Accardo. diff --git a/challenge-024/roger-bell-west/blog.txt b/challenge-024/roger-bell-west/blog.txt new file mode 100644 index 0000000000..5185026238 --- /dev/null +++ b/challenge-024/roger-bell-west/blog.txt @@ -0,0 +1 @@ +https://blog.firedrake.org/archive/2019/09/Perl_Weekly_Challenge_24.html |
