Merge branch 'master' of https://github.com/manwar/perlweeklychallenge-club

author: Doomtrain14 <yet.ebreo@gmail.com> 2019-09-04 21:32:23 +0800
committer: Doomtrain14 <yet.ebreo@gmail.com> 2019-09-04 21:32:23 +0800
commit: 60e3bdb31d53953b284fc9a7f9f288e75ce9dd39 (patch)
tree: f6c852d5cb30a07eea342f7833032b6258f1b9b4 /challenge-024
parent: ae15c22e798605975af7feb763703d2d77849022 (diff)
parent: 480648e55ce8812e067bf67ad0b1e2da6bac5115 (diff)
download: perlweeklychallenge-club-60e3bdb31d53953b284fc9a7f9f288e75ce9dd39.tar.gz
perlweeklychallenge-club-60e3bdb31d53953b284fc9a7f9f288e75ce9dd39.tar.bz2
perlweeklychallenge-club-60e3bdb31d53953b284fc9a7f9f288e75ce9dd39.zip
11 files changed, 336 insertions, 0 deletions
diff --git a/challenge-024/duane-powell/perl5/ch-1.sh b/challenge-024/duane-powell/perl5/ch-1.sh
new file mode 100755
index 0000000000..cd8ad66f4a
--- /dev/null
+++ b/challenge-024/duane-powell/perl5/ch-1.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+#generate 10 char random passwords
+perl -le 'print map { (0..9,A..Z,a..z)[rand 62] } 1..10'
+
diff --git a/challenge-024/duane-powell/perl5/ch-2.pl b/challenge-024/duane-powell/perl5/ch-2.pl
new file mode 100755
index 0000000000..4d304e566d
--- /dev/null
+++ b/challenge-024/duane-powell/perl5/ch-2.pl
@@ -0,0 +1,213 @@
+#!/usr/bin/perl
+use strict;
+
+# Create a script to implement full text search functionality using Inverted Index.
+# https://en.wikipedia.org/wiki/Search_engine_indexing#Inverted_indices
+
+usage() unless (@ARGV);
+my ($word_or_ext,@dir) = @ARGV;
+my $word = $word_or_ext if (@ARGV == 1) ? $word_or_ext : "";
+my $ext  = $word_or_ext if (@ARGV  > 1) ? $word_or_ext : "";
+
+my $i = InvertedIndex->new();
+if ($ext) {
+	$i->build($ext,@dir);
+	$i->freeze();
+} elsif ($word) {
+	$i->search($word);
+} else {
+	usage();
+}
+exit;
+
+sub usage {
+	my $usage = <<USAGE;
+usage:
+$0 word                   (return list of files containing this word)
+$0 extension list-of-dirs (find new files ending in extension and index their words)
+USAGE
+	print $usage;
+	exit;
+}
+
+package InvertedIndex;
+use Storable;
+use File::Find::Rule;
+
+sub new {
+        my $class = shift;
+        my $self = {
+		# use Storable to remember past indexing. 
+		storable_index => './.inverted_index.storable',
+
+		# unique file id counter
+		file_id => 0,
+
+		# hash ref of files  
+		files => {
+			# hash ref of file info
+			# $file => { # full path and file name
+			#	id => unique id number,
+			#	mtime => file's mtime,
+			# } 
+		},
+
+		# hash ref of words
+		words=> {
+			# hash ref of file_ids with this word
+			# $word => { # lower case of actual word
+			# 	file_id => count, 
+			# }
+		},
+
+		# hash ref to map file_id back to file name
+		ids => {
+			# $id => $file
+		},
+	};
+        bless $self, $class;
+
+	# Reload previous self from disk
+	return $self->thaw();
+}
+
+sub thaw {
+        my $self = shift;
+	# Retrieve the Storable object from disk and return it.
+	if (-e $self->{storable_index}) {
+		my $i = retrieve($self->{storable_index});
+		print "No files have been indexed yet.\n" unless ($i->{file_id});
+		return $i; 
+	} else {
+		# First call ever, so create the Storable. 
+		print "No files have been indexed yet.\n";
+		store $self, $self->{storable_index};
+		exit;
+	}
+}
+
+sub freeze {
+        my $self = shift;
+	store $self, $self->{storable_index};
+}
+
+sub build {
+        my ($self,$ext,@dir)  = @_;
+
+	# Get list of files to index
+	my @f = File::Find::Rule->file()
+			   ->name( "*$ext" )
+			   ->in(@dir);
+
+	my ($file_count,$word_count) = (0,0);
+	foreach my $f (@f) {
+		if (open(my $FH, '<', $f)) {
+			# Have we indexed this file before?
+			if (exists $self->{files}{$f}) {
+				# Yes but has the file been modified?
+				if ($self->{files}{$f}{mtime} >= (stat $f)[9]) {
+					close($FH);
+					next;
+				}
+				# The file has been modified, delete its previous word counts
+				foreach my $w (keys %{$self->{words}}) {
+					delete $self->{words}{$w}{ $self->{files}{$f}{id} }; 
+				}
+			} else {
+				# No, it's a new file, generate a unique id 
+				$self->{files}{$f}{id} = $self->{file_id}++;
+				$self->{ids}{ $self->{files}{$f}{id} } = $f;
+			}
+
+			# Note the file's mtime
+			$self->{files}{$f}{mtime} = (stat $f)[9];
+			$file_count++;
+
+			# Parse and count words in $f 
+			while (<$FH>) {
+				foreach (split(/\s+/,$_)) {
+					$word_count++;
+					my $w = lc($_);
+					$self->{words}{$w}{ $self->{files}{$f}{id} }++;
+				}
+			}
+			close($FH);
+		} else {
+			 warn "Can not open $f for indexing\n";
+		}
+	}
+	if ($file_count) {
+		print "Indexed $word_count words in $file_count new files.\n";
+	} else {
+		print "No new files found.\n";
+	}
+}
+
+sub search {
+        my $self = shift;
+	my $word = lc(shift);
+
+	# Build output hash and sort by highest matching word count
+	if (exists $self->{words}{$word}) {
+		my %out; 
+		foreach my $file_id (keys %{ $self->{words}{$word} }) {
+			$out{$self->{ids}{$file_id}} = $self->{words}{$word}{$file_id};
+		}
+		foreach (sort { $out{$b} <=> $out{$a} } keys %out) {
+			print "$out{$_} $_\n";
+		}
+	} else {
+		print "$word not found in any of the $self->{file_id} indexed files.\n";
+	}
+}
+
+1;
+
+__END__
+
+rm -f .inverted_index.storable <=============================================== delete Storable
+
+./ch-2.pl <==================================================================== show usage.
+usage:
+./ch-2.pl word                   (return list of files containing this word)
+./ch-2.pl extension list-of-dirs (find new files ending in extension and index their words)
+
+./ch-2.pl my <================================================================= search for 'my', oops we need to index before searching.
+No files have been indexed yet.
+
+./ch-2.pl pl /home/duane/Documents/dev/perl/weekly_challenge <================= index .pl files in weekly_challenge
+Indexed 6176 words in 21 new files.
+
+./ch-2.pl my <=================================================+=============== return the count and file list with word 'my'
+29 /home/duane/Documents/dev/perl/weekly_challenge/024/ch-2.pl
+19 /home/duane/Documents/dev/perl/weekly_challenge/022/ch-2.pl
+16 /home/duane/Documents/dev/perl/weekly_challenge/018/ch-2.pl
+13 /home/duane/Documents/dev/perl/weekly_challenge/018/ch-1.pl
+11 /home/duane/Documents/dev/perl/weekly_challenge/015/ch-2.pl
+9 /home/duane/Documents/dev/perl/weekly_challenge/021/ch-1.pl
+7 /home/duane/Documents/dev/perl/weekly_challenge/023/ch-2.pl
+7 /home/duane/Documents/dev/perl/weekly_challenge/016/ch-1.pl
+7 /home/duane/Documents/dev/perl/weekly_challenge/015/ch-1.pl
+7 /home/duane/Documents/dev/perl/weekly_challenge/019/ch-1.pl
+6 /home/duane/Documents/dev/perl/weekly_challenge/019/ch-2.pl
+6 /home/duane/Documents/dev/perl/weekly_challenge/020/ch-2.pl
+6 /home/duane/Documents/dev/perl/weekly_challenge/017/ch-2.pl
+6 /home/duane/Documents/dev/perl/weekly_challenge/014/ch-2.pl
+5 /home/duane/Documents/dev/perl/weekly_challenge/014/ch-1.pl
+4 /home/duane/Documents/dev/perl/weekly_challenge/017/ch-1.pl
+4 /home/duane/Documents/dev/perl/weekly_challenge/020/ch-1.pl
+4 /home/duane/Documents/dev/perl/weekly_challenge/021/ch-2.pl
+4 /home/duane/Documents/dev/perl/weekly_challenge/023/ch-1.pl
+4 /home/duane/Documents/dev/perl/weekly_challenge/022/ch-1.pl
+
+./ch-2.pl pl /home/duane/Documents/dev/perl/weekly_challenge_fork/ /root/bin <= index files in multiple dirs.
+Indexed 391811 words in 938 new files.
+
+./ch-2.pl pl /root/bin  <====================================================== touch file to test reindexing. 
+No new files found.
+touch /root/bin/pdu_control.pl
+./ch-2.pl pl /root/bin
+Indexed 524 words in 1 new files.
+
+./ch-2.pl this-word-does-not-exist
+this-word-does-not-exist not found in any of the 959 indexed files <=========== reports total files searched on no match.
diff --git a/challenge-024/joelle-maslak/perl5/ch-2.pl b/challenge-024/joelle-maslak/perl5/ch-2.pl
new file mode 100755
index 0000000000..93f8a076d6
--- /dev/null
+++ b/challenge-024/joelle-maslak/perl5/ch-2.pl
@@ -0,0 +1,52 @@
+#!/usr/bin/env perl
+
+use v5.16;  # for fc
+use strict;
+use warnings;
+
+use File::ByLine;
+use List::Util qw(uniq);
+
+# Using file by line means that a very long single file might be able to
+# be processed quicker (assuming IO can keep up).
+
+MAIN: {
+    my %docs;
+
+    my $fbl = File::ByLine->new();
+    $fbl->processes(10);
+
+    for my $fn (uniq @ARGV) {
+        # Read the file, seperate out the words (use File::ByLine to
+        # allow 10 threads to simultaniously read the file)
+        #
+        # F::BL->map applies the map to each line.
+        my %found;
+        $docs{$fn} = [
+            uniq $fbl->map(
+                sub {
+                    grep { $_ ne '' and exists $found{$_} ? undef : ($found{$_} = 1) }
+                        map   { fc }    # Case insensitive words
+                        split /\W+/, $_
+                },
+                $fn,
+            )
+        ];
+    }
+
+    # Build the index
+    my %index;
+    for my $fn (sort keys %docs) {
+        for my $word ($docs{$fn}->@*) {
+            $index{$word} = [] unless exists $index{$word};
+            push $index{$word}->@*, $fn;
+        }
+    }
+
+    # Output the index
+    for my $word (sort keys %index) {
+        say "$word: " . join(" ", $index{$word}->@*);
+    }
+}
+
+
diff --git a/challenge-024/joelle-maslak/perl6/ch-2.p6 b/challenge-024/joelle-maslak/perl6/ch-2.p6
new file mode 100755
index 0000000000..b8c687ee3a
--- /dev/null
+++ b/challenge-024/joelle-maslak/perl6/ch-2.p6
@@ -0,0 +1,36 @@
+#!/usr/bin/env perl6
+use v6;
+
+# This is the first one where I wrote a P5 solution before the P6 solution.
+#
+# It's also the only time the P6 output differs from the P5 output,
+# although both meet the requirement of the challenge.
+#
+# Difference: Perl 6 IO.words splits differently than Perl 5 split /\W+/
+
+sub MAIN(+@files) {
+    my %docs;
+
+    # Read the files, seperating out the words.  Sadly no parallelism
+    # here, I don't have a Perl6 module for doing simultanious file
+    # reads of large files.  :(
+    for @files -> $fn {
+        %docs{$fn} = $fn.IO.words.unique;
+    }
+
+    # Build the index
+    my %index;
+    for %docs.keys.sort -> $fn {
+        for @(%docs{$fn}) -> $word {
+            %index{$word} = [] unless %index{$word}:exists;
+            %index{$word}.push: $fn;
+        }
+    }
+
+    # Output the index
+    for %index.keys.sort -> $word {
+        say "$word: { %index{$word}.join(" ") }";
+    }
+}
+
+
diff --git a/challenge-024/laurent-rosenfeld/blog.txt b/challenge-024/laurent-rosenfeld/blog.txt
new file mode 100644
index 0000000000..d48306ce08
--- /dev/null
+++ b/challenge-024/laurent-rosenfeld/blog.txt
@@ -0,0 +1 @@
+http://blogs.perl.org/users/laurent_r/2019/09/perl-weekly-challenge-24-smallest-script-and-inverted-index.html
diff --git a/challenge-024/laurent-rosenfeld/perl5/ch-1.sh b/challenge-024/laurent-rosenfeld/perl5/ch-1.sh
new file mode 100644
index 0000000000..69bac43ae2
--- /dev/null
+++ b/challenge-024/laurent-rosenfeld/perl5/ch-1.sh
@@ -0,0 +1 @@
+perl -e ''
diff --git a/challenge-024/laurent-rosenfeld/perl5/ch-2.pl b/challenge-024/laurent-rosenfeld/perl5/ch-2.pl
new file mode 100644
index 0000000000..8d20b84ba5
--- /dev/null
+++ b/challenge-024/laurent-rosenfeld/perl5/ch-2.pl
@@ -0,0 +1,16 @@
+#!/usr/bin/perl
+use strict;
+use warnings;
+use Data::Dumper;
+
+my @files = glob "./*.pl";
+my %dict;
+for my $file (@files) {
+    open my $IN, "<", $file or die "Cannot open $file $!";
+    while (my $line = <$IN>) {
+        my @words = grep { /^\w{3,}$/ } split /\s+/, $line;;
+        $dict{$_}{$file} = 1 for @words;
+    }
+    close $IN;
+}
+print Dumper \%dict;
diff --git a/challenge-024/laurent-rosenfeld/perl6/ch-1.sh b/challenge-024/laurent-rosenfeld/perl6/ch-1.sh
new file mode 100644
index 0000000000..a41057bbec
--- /dev/null
+++ b/challenge-024/laurent-rosenfeld/perl6/ch-1.sh
@@ -0,0 +1 @@
+perl6 -e ''
diff --git a/challenge-024/laurent-rosenfeld/perl6/ch-2.p6 b/challenge-024/laurent-rosenfeld/perl6/ch-2.p6
new file mode 100644
index 0000000000..1293ba899d
--- /dev/null
+++ b/challenge-024/laurent-rosenfeld/perl6/ch-2.p6
@@ -0,0 +1,10 @@
+use v6;
+
+my @files = grep { /\.p6$/ or /\.pl6$/ }, dir('.');
+my %dict;
+for @files -> $file {
+    for $file.IO.lines.words.grep({/^ \w ** 3..* $/}) -> $word {
+        %dict{$word}{$file} = True;
+    }
+}
+.say for %dict{'given'}.keys;
diff --git a/challenge-024/mike-accardo/README b/challenge-024/mike-accardo/README
new file mode 100644
index 0000000000..33606072ab
--- /dev/null
+++ b/challenge-024/mike-accardo/README
@@ -0,0 +1 @@
+Solutions by Mike Accardo.
diff --git a/challenge-024/roger-bell-west/blog.txt b/challenge-024/roger-bell-west/blog.txt
new file mode 100644
index 0000000000..5185026238
--- /dev/null
+++ b/challenge-024/roger-bell-west/blog.txt
@@ -0,0 +1 @@
+https://blog.firedrake.org/archive/2019/09/Perl_Weekly_Challenge_24.html
author	Doomtrain14 <yet.ebreo@gmail.com>	2019-09-04 21:32:23 +0800
committer	Doomtrain14 <yet.ebreo@gmail.com>	2019-09-04 21:32:23 +0800
commit	60e3bdb31d53953b284fc9a7f9f288e75ce9dd39 (patch)
tree	f6c852d5cb30a07eea342f7833032b6258f1b9b4 /challenge-024
parent	ae15c22e798605975af7feb763703d2d77849022 (diff)
parent	480648e55ce8812e067bf67ad0b1e2da6bac5115 (diff)
download	perlweeklychallenge-club-60e3bdb31d53953b284fc9a7f9f288e75ce9dd39.tar.gz perlweeklychallenge-club-60e3bdb31d53953b284fc9a7f9f288e75ce9dd39.tar.bz2 perlweeklychallenge-club-60e3bdb31d53953b284fc9a7f9f288e75ce9dd39.zip