diff options
| author | Paulo Custodio <pauloscustodio@gmail.com> | 2021-01-25 20:07:40 +0000 |
|---|---|---|
| committer | Paulo Custodio <pauloscustodio@gmail.com> | 2021-01-26 00:06:54 +0000 |
| commit | 3fa58628535d4041c7cc648c005080ca88f18c18 (patch) | |
| tree | 336fe3cc14f518f05e871ab974cc86a09a2fd8f6 /challenge-024 | |
| parent | 3d3900a2f0f69c54a34683e4e1b5da007b4af9d9 (diff) | |
| download | perlweeklychallenge-club-3fa58628535d4041c7cc648c005080ca88f18c18.tar.gz perlweeklychallenge-club-3fa58628535d4041c7cc648c005080ca88f18c18.tar.bz2 perlweeklychallenge-club-3fa58628535d4041c7cc648c005080ca88f18c18.zip | |
Replace tabs by spaces so that indentation looks correct
Diffstat (limited to 'challenge-024')
| -rw-r--r-- | challenge-024/paulo-custodio/perl/ch-2.pl | 198 | ||||
| -rw-r--r-- | challenge-024/paulo-custodio/test.pl | 70 |
2 files changed, 134 insertions, 134 deletions
diff --git a/challenge-024/paulo-custodio/perl/ch-2.pl b/challenge-024/paulo-custodio/perl/ch-2.pl index 31d780cfa9..5bd3c85fe5 100644 --- a/challenge-024/paulo-custodio/perl/ch-2.pl +++ b/challenge-024/paulo-custodio/perl/ch-2.pl @@ -3,14 +3,14 @@ # Challenge 019 # # Task #2 -# Create a script to implement full text search functionality using Inverted +# Create a script to implement full text search functionality using Inverted # Index. According to wikipedia: -# -# In computer science, an inverted index (also referred to as a postings file +# +# In computer science, an inverted index (also referred to as a postings file # or inverted file) is a database index storing a mapping from content, such as # words or numbers, to its locations in a table, or in a document or a set of # documents (named in contrast to a forward index, which maps from documents to -# content). The purpose of an inverted index is to allow fast full-text +# content). The purpose of an inverted index is to allow fast full-text # searches, at a cost of increased processing when a document is added to the # database. @@ -50,130 +50,130 @@ END } }; -# main +# main my($op, @args) = @ARGV; if (@ARGV>=2 && $op =~ /^add/i) { - add_doc($_) for @args; + add_doc($_) for @args; } elsif (@ARGV>=2 && $op =~ /^sea/) { - search($_) for @args; + search($_) for @args; } else { - die "Usage: add documents | search words\n"; + die "Usage: add documents | search words\n"; } # add words from document to index sub add_doc { - my($doc) = @_; - - # get title - my $title = path($doc)->basename; - - # connect to index database - my $dbh = DBI->connect("dbi:SQLite:dbname=".DBFILE,"","", - { RaiseError => 1, AutoCommit => 1 }); - - # get document id - my $document_id = get_document_id($dbh, $title); - - # read document - my $word_count = 0; - my $text = path($doc)->slurp; - while ($text =~ /(\w+)/g) { - my $word = lc($1); - my $word_id = get_word_id($dbh, $word); - add_found($dbh, $document_id, $word_id); - $word_count++; - } - - say "Indexed $title: found $word_count words"; - - # disconnect from database - $dbh->disconnect(); + my($doc) = @_; + + # get title + my $title = path($doc)->basename; + + # connect to index database + my $dbh = DBI->connect("dbi:SQLite:dbname=".DBFILE,"","", + { RaiseError => 1, AutoCommit => 1 }); + + # get document id + my $document_id = get_document_id($dbh, $title); + + # read document + my $word_count = 0; + my $text = path($doc)->slurp; + while ($text =~ /(\w+)/g) { + my $word = lc($1); + my $word_id = get_word_id($dbh, $word); + add_found($dbh, $document_id, $word_id); + $word_count++; + } + + say "Indexed $title: found $word_count words"; + + # disconnect from database + $dbh->disconnect(); } # get or add value to table sub get_or_add_id { - my($dbh, $table, $column, $value) = @_; - - for (1..2) { - # search document in db - my $sth = $dbh->prepare("SELECT id FROM $table WHERE $column = ?"); - $sth->execute($value); - my($id) = $sth->fetchrow(); - $sth->finish(); - - return $id if defined $id; - - # add value if not found - $sth = $dbh->prepare("INSERT INTO $table($column) VALUES(?)"); - $sth->execute($value); - $sth->finish(); - } - - die "failed to insert $column to $table value '$value'"; + my($dbh, $table, $column, $value) = @_; + + for (1..2) { + # search document in db + my $sth = $dbh->prepare("SELECT id FROM $table WHERE $column = ?"); + $sth->execute($value); + my($id) = $sth->fetchrow(); + $sth->finish(); + + return $id if defined $id; + + # add value if not found + $sth = $dbh->prepare("INSERT INTO $table($column) VALUES(?)"); + $sth->execute($value); + $sth->finish(); + } + + die "failed to insert $column to $table value '$value'"; } # get or add document id sub get_document_id { - my($dbh, $title) = @_; - return get_or_add_id($dbh, "documents", "title", $title); + my($dbh, $title) = @_; + return get_or_add_id($dbh, "documents", "title", $title); } # get or add word id sub get_word_id { - my($dbh, $word) = @_; - return get_or_add_id($dbh, "words", "word", $word); + my($dbh, $word) = @_; + return get_or_add_id($dbh, "words", "word", $word); } # add a found location if not already found sub add_found { - my($dbh, $document_id, $word_id) = @_; - - # search location in db - my $sth = $dbh->prepare(" - SELECT id FROM found - WHERE document_id = ? - AND word_id = ?"); - $sth->execute($document_id, $word_id); - my($id) = $sth->fetchrow(); - $sth->finish(); - - return if defined($id); - - # location not found, insert in db - $sth = $dbh->prepare("INSERT INTO found (document_id, word_id) - VALUES(?,?)"); - $sth->execute($document_id, $word_id); - $sth->finish(); + my($dbh, $document_id, $word_id) = @_; + + # search location in db + my $sth = $dbh->prepare(" + SELECT id FROM found + WHERE document_id = ? + AND word_id = ?"); + $sth->execute($document_id, $word_id); + my($id) = $sth->fetchrow(); + $sth->finish(); + + return if defined($id); + + # location not found, insert in db + $sth = $dbh->prepare("INSERT INTO found (document_id, word_id) + VALUES(?,?)"); + $sth->execute($document_id, $word_id); + $sth->finish(); } # search word sub search { - my($word) = @_; - - # connect to index database - my $dbh = DBI->connect("dbi:SQLite:dbname=".DBFILE,"","", - { RaiseError => 1, AutoCommit => 1 }); - - # search locations of each word - my $sth = $dbh->prepare(" - SELECT word, title - FROM documents, words, found - WHERE word = ? - AND found.document_id = documents.id - AND found.word_id = words.id - ORDER BY title"); - - $sth->execute($word); - - while (my($word, $title) = $sth->fetchrow()) { - say "$word\t$title"; - } - - $sth->finish(); - - # disconnect from database - $dbh->disconnect(); + my($word) = @_; + + # connect to index database + my $dbh = DBI->connect("dbi:SQLite:dbname=".DBFILE,"","", + { RaiseError => 1, AutoCommit => 1 }); + + # search locations of each word + my $sth = $dbh->prepare(" + SELECT word, title + FROM documents, words, found + WHERE word = ? + AND found.document_id = documents.id + AND found.word_id = words.id + ORDER BY title"); + + $sth->execute($word); + + while (my($word, $title) = $sth->fetchrow()) { + say "$word $title"; + } + + $sth->finish(); + + # disconnect from database + $dbh->disconnect(); } diff --git a/challenge-024/paulo-custodio/test.pl b/challenge-024/paulo-custodio/test.pl index a5930b3da4..48d6cebbee 100644 --- a/challenge-024/paulo-custodio/test.pl +++ b/challenge-024/paulo-custodio/test.pl @@ -6,72 +6,72 @@ use 5.030; use Path::Tiny; use Test::More; use WWW::Mechanize; -use utf8::all; # books are in UTF-8 +use utf8::all; # books are in UTF-8 # Challenge 024 # Task #1 # Create a smallest script in terms of size that on execution doesn't throw any -# error. The script doesn't have to do anything special. You could even come up +# error. The script doesn't have to do anything special. You could even come up # with smallest one-liner. -# +# # My solution: an empty file! (0 bytes) - Perl executes it and does not throw an error is capture("perl perl/ch-1.pl"), ""; # Task #2 -# Create a script to implement full text search functionality using Inverted +# Create a script to implement full text search functionality using Inverted # Index. According to wikipedia: # # Download a couple of books from https://www.gutenberg.org/ebooks/ -get_book('The Masque of the Red Death.txt', - 'https://www.gutenberg.org/files/1064/1064-0.txt'); -get_book('The Fall of the House of Usher.txt', - 'https://www.gutenberg.org/cache/epub/932/pg932.txt'); -get_book('The Cask of Amontillado.txt', - 'https://www.gutenberg.org/cache/epub/1063/pg1063.txt'); -get_book('The Raven.txt', - 'https://www.gutenberg.org/cache/epub/17192/pg17192.txt'); +get_book('The Masque of the Red Death.txt', + 'https://www.gutenberg.org/files/1064/1064-0.txt'); +get_book('The Fall of the House of Usher.txt', + 'https://www.gutenberg.org/cache/epub/932/pg932.txt'); +get_book('The Cask of Amontillado.txt', + 'https://www.gutenberg.org/cache/epub/1063/pg1063.txt'); +get_book('The Raven.txt', + 'https://www.gutenberg.org/cache/epub/17192/pg17192.txt'); # show index is capture("perl perl/ch-2.pl search death"), <<END; -death The Fall of the House of Usher.txt -death The Masque of the Red Death.txt -death The Raven.txt +death The Fall of the House of Usher.txt +death The Masque of the Red Death.txt +death The Raven.txt END is capture("perl perl/ch-2.pl search mystery"), <<END; -mystery The Fall of the House of Usher.txt -mystery The Raven.txt +mystery The Fall of the House of Usher.txt +mystery The Raven.txt END is capture("perl perl/ch-2.pl search imagination"), <<END; -imagination The Fall of the House of Usher.txt -imagination The Raven.txt +imagination The Fall of the House of Usher.txt +imagination The Raven.txt END done_testing; sub capture { - my($cmd) = @_; - my $out = `$cmd`; - $out =~ s/[ \t\v\f\r]*\n/\n/g; - return $out; + my($cmd) = @_; + my $out = `$cmd`; + $out =~ s/[ \t\v\f\r]*\n/\n/g; + return $out; } sub run { - my($cmd) = @_; - ok 0==system($cmd), $cmd; + my($cmd) = @_; + ok 0==system($cmd), $cmd; } sub get_book { - my($file, $url) = @_; - if (!-f $file) { - say "Getting $url --> $file"; - my $mech = WWW::Mechanize->new(); - $mech->get($url); - path($file)->spew($mech->content); - - # build index - run("perl perl/ch-2.pl add '$file'"); - } + my($file, $url) = @_; + if (!-f $file) { + say "Getting $url --> $file"; + my $mech = WWW::Mechanize->new(); + $mech->get($url); + path($file)->spew($mech->content); + + # build index + run("perl perl/ch-2.pl add '$file'"); + } } |
