Add C solution

author: Paulo Custodio <pauloscustodio@gmail.com> 2023-05-03 12:09:29 +0100
committer: Paulo Custodio <pauloscustodio@gmail.com> 2023-05-03 12:10:57 +0100
commit: 02e546117f67c8b8fabd0ecba2ade1c5b4ff56f6 (patch)
tree: 345b87193914524c9c3b36d50fcacf92b2f904e5
parent: 6c6480baed8abb849ed2d2ffa29f5e8710b64268 (diff)
download: perlweeklychallenge-club-02e546117f67c8b8fabd0ecba2ade1c5b4ff56f6.tar.gz
perlweeklychallenge-club-02e546117f67c8b8fabd0ecba2ade1c5b4ff56f6.tar.bz2
perlweeklychallenge-club-02e546117f67c8b8fabd0ecba2ade1c5b4ff56f6.zip
5 files changed, 316 insertions, 18 deletions
diff --git a/challenge-001/paulo-custodio/test.pl b/challenge-001/paulo-custodio/test.pl
index e05049e9fd..132d8f52be 100644
--- a/challenge-001/paulo-custodio/test.pl
+++ b/challenge-001/paulo-custodio/test.pl
@@ -148,11 +148,11 @@ sub build {
             return "python3 ../../challenge-001/paulo-custodio/brainfuck.py $prog_wo_ext.bf";
         }
         if (/^c$/) {
-            run("gcc $prog -o $prog_wo_ext -lm -lmpfr -lgmp") if (!-f $exe || -M $exe > -M $prog);
+            run("gcc $prog -o $prog_wo_ext -lm -lmpfr -lgmp -lsqlite3") if (!-f $exe || -M $exe > -M $prog);
             return $exe;
         }
         if (/^cpp$/) {
-            run("g++ $prog -o $prog_wo_ext -lmpfr -lgmpxx -lgmp") if (!-f $exe || -M $exe > -M $prog);
+            run("g++ $prog -o $prog_wo_ext -lmpfr -lgmpxx -lgmp -lsqlite3") if (!-f $exe || -M $exe > -M $prog);
             return $exe;
         }
         if (/^d$/) {
diff --git a/challenge-024/paulo-custodio/c/ch-2.c b/challenge-024/paulo-custodio/c/ch-2.c
new file mode 100644
index 0000000000..e3424eec6b
--- /dev/null
+++ b/challenge-024/paulo-custodio/c/ch-2.c
@@ -0,0 +1,294 @@
+/*
+Challenge 024
+
+Task #2
+Create a script to implement full text search functionality using Inverted
+Index. According to wikipedia:
+
+In computer science, an inverted index (also referred to as a postings file
+or inverted file) is a database index storing a mapping from content, such as
+words or numbers, to its locations in a table, or in a document or a set of
+documents (named in contrast to a forward index, which maps from documents to
+content). The purpose of an inverted index is to allow fast full-text
+searches, at a cost of increased processing when a document is added to the
+database.
+*/
+
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#include <sys/stat.h>
+#include <sqlite3.h>
+
+#define DATABASE    "index.db3"
+#define SEPARATORS  " \t\r\n\v\f!\"#$%&'()*+,-./:;<=>?@[\\]^`{|}~"
+
+void* check_mem(void* p) {
+    if (!p) {
+        fputs("Out of memory\n", stderr);
+        exit(EXIT_FAILURE);
+    }
+    return p;
+}
+
+bool file_exists(const char *filename) {
+    struct stat buffer;
+    return (stat(filename, &buffer) == 0);
+}
+
+void basename(char* filename) {
+    char* p = filename + strlen(filename);
+    while (p > filename && p[-1] != '/' && p[-1] != '\\' && p[-1] != '.')
+        p--;
+    if (p > filename && p[-1] == '.')
+        p[-1] = '\0';
+}
+
+char* strtolower(char* str) {
+    for (char* p = str; *p; p++)
+        *p = tolower(*p);
+    return str;
+}
+
+void check_error(sqlite3* db, int rc) {
+    if (rc != SQLITE_OK) {
+        fprintf(stderr, "Database error: %s\n", sqlite3_errmsg(db));
+        sqlite3_close(db);
+        exit(EXIT_FAILURE);
+    }
+}
+
+void check_error_step(sqlite3* db, int step) {
+    if (step != SQLITE_DONE) {
+        fprintf(stderr, "Database error: %s\n", sqlite3_errmsg(db));
+        sqlite3_close(db);
+        exit(EXIT_FAILURE);
+    }
+}
+
+int callback(void *NotUsed, int argc, char **argv, char **azColName) {
+    for(int i = 0; i < argc; i++) {
+        printf("%s = %s\n", azColName[i], argv[i] ? argv[i] : "NULL");
+    }
+    printf("\n");
+    return 0;
+}
+
+sqlite3* open_database() {
+    sqlite3 *db;
+    int rc = sqlite3_open(DATABASE, &db);
+    check_error(db, rc);
+    return db;
+}
+
+void close_database(sqlite3* db) {
+    sqlite3_close(db);
+}
+
+void create_database() {
+    if (file_exists(DATABASE))
+        return;
+
+    sqlite3 *db = open_database();
+    char *zErrMsg = NULL;
+
+    const char* sql =
+        "CREATE TABLE words ("
+        "id INTEGER PRIMARY KEY AUTOINCREMENT, "
+        "word TEXT UNIQUE);"
+        "CREATE TABLE documents ("
+        "id INTEGER PRIMARY KEY AUTOINCREMENT, "
+        "title TEXT UNIQUE); "
+        "CREATE TABLE found ("
+        "id INTEGER PRIMARY KEY AUTOINCREMENT, "
+        "document_id INTEGER, "
+        "word_id INTEGER); "
+        "CREATE UNIQUE INDEX found_index ON found(document_id, word_id); ";
+
+    int rc = sqlite3_exec(db, sql, callback, 0, &zErrMsg);
+    if (rc != SQLITE_OK) {
+        fprintf(stderr, "SQL error: %s\n", zErrMsg);
+        sqlite3_free(zErrMsg);
+        sqlite3_close(db);
+        return;
+    }
+
+    close_database(db);
+}
+
+int get_or_add_document_id(sqlite3* db, const char* title) {
+    bool found = false;
+    int id = -1;
+
+    const char* sql = "SELECT id FROM documents WHERE title = ?;";
+    sqlite3_stmt* stmt;
+    int rc = sqlite3_prepare_v2(db, sql, strlen(sql), &stmt, NULL);
+    check_error(db, rc);
+    sqlite3_bind_text(stmt, 1, title, strlen(title), NULL);
+    int step = sqlite3_step(stmt);
+    if (step == SQLITE_ROW) {
+        id = sqlite3_column_int(stmt, 0);
+        found = true;
+    }
+    sqlite3_finalize(stmt);
+
+    if (found)
+        return id;
+
+    sql = "INSERT INTO documents(title) VALUES(?);";
+    rc = sqlite3_prepare_v2(db, sql, strlen(sql), &stmt, NULL);
+    check_error(db, rc);
+    sqlite3_bind_text(stmt, 1, title, strlen(title), NULL);
+    step = sqlite3_step(stmt);
+    check_error_step(db, step);
+    sqlite3_finalize(stmt);
+
+    return sqlite3_last_insert_rowid(db);
+}
+
+int get_or_add_word_id(sqlite3* db, const char* word) {
+    bool found = false;
+    int id = -1;
+
+    const char* sql = "SELECT id FROM words WHERE word = ?;";
+    sqlite3_stmt* stmt;
+    int rc = sqlite3_prepare_v2(db, sql, strlen(sql), &stmt, NULL);
+    check_error(db, rc);
+    sqlite3_bind_text(stmt, 1, word, strlen(word), NULL);
+    int step = sqlite3_step(stmt);
+    if (step == SQLITE_ROW) {
+        id = sqlite3_column_int(stmt, 0);
+        found = true;
+    }
+    sqlite3_finalize(stmt);
+
+    if (found)
+        return id;
+
+    sql = "INSERT INTO words(word) VALUES(?);";
+    rc = sqlite3_prepare_v2(db, sql, strlen(sql), &stmt, NULL);
+    check_error(db, rc);
+    sqlite3_bind_text(stmt, 1, word, strlen(word), NULL);
+    step = sqlite3_step(stmt);
+    check_error_step(db, step);
+    sqlite3_finalize(stmt);
+
+    return sqlite3_last_insert_rowid(db);
+}
+
+int add_found(sqlite3* db, int document_id, int word_id) {
+    bool found = false;
+    int id = -1;
+
+    const char* sql = "SELECT id FROM found "
+                      "WHERE document_id = ? AND word_id = ?;";
+    sqlite3_stmt* stmt;
+    int rc = sqlite3_prepare_v2(db, sql, strlen(sql), &stmt, NULL);
+    check_error(db, rc);
+    sqlite3_bind_int(stmt, 1, document_id);
+    sqlite3_bind_int(stmt, 2, word_id);
+    int step = sqlite3_step(stmt);
+    if (step == SQLITE_ROW) {
+        id = sqlite3_column_int(stmt, 0);
+        found = true;
+    }
+    sqlite3_finalize(stmt);
+
+    if (found)
+        return id;
+
+    sql = "INSERT INTO found(document_id, word_id) VALUES(?, ?);";
+    rc = sqlite3_prepare_v2(db, sql, strlen(sql), &stmt, NULL);
+    check_error(db, rc);
+    sqlite3_bind_int(stmt, 1, document_id);
+    sqlite3_bind_int(stmt, 2, word_id);
+    step = sqlite3_step(stmt);
+    check_error_step(db, step);
+    sqlite3_finalize(stmt);
+
+    return sqlite3_last_insert_rowid(db);
+}
+
+void add_document(const char* filename) {
+    sqlite3* db = open_database();
+
+    // title
+    char* title = check_mem(strdup(filename));
+    basename(title);
+
+    // document id
+    int document_id = get_or_add_document_id(db, title);
+
+    // read document
+    FILE* fp = fopen(filename, "r");
+    if (fp == NULL) {
+        perror(filename);
+        close_database(db);
+        free(title);
+        exit(EXIT_FAILURE);
+    }
+
+    char line[BUFSIZ];
+    while (fgets(line, sizeof(line), fp) != NULL) {
+        char* word = strtok(line, SEPARATORS);
+        while (word != NULL) {
+            strtolower(word);
+            int word_id = get_or_add_word_id(db, word);
+            add_found(db, document_id, word_id);
+            word = strtok(NULL, SEPARATORS);
+        }
+    }
+
+    printf("Indexed %s\n", title);
+
+    fclose(fp);
+    close_database(db);
+    free(title);
+}
+
+void search_documents(const char* word) {
+    sqlite3* db = open_database();
+
+    char* lcword = strtolower(check_mem(strdup(word)));
+
+    const char* sql = "SELECT word, title "
+                      "FROM documents, words, found "
+                      "WHERE word = ? "
+                      "AND found.document_id = documents.id "
+                      "AND found.word_id     = words.id "
+                      "ORDER BY title;";
+    sqlite3_stmt* stmt;
+    int rc = sqlite3_prepare_v2(db, sql, strlen(sql), &stmt, NULL);
+    check_error(db, rc);
+    sqlite3_bind_text(stmt, 1, lcword, strlen(lcword), NULL);
+    int step;
+    while ((step  = sqlite3_step(stmt)) == SQLITE_ROW) {
+        printf("%s  %s\n",
+               sqlite3_column_text(stmt, 0),
+               sqlite3_column_text(stmt, 1));
+    }
+    sqlite3_finalize(stmt);
+
+    close_database(db);
+    free(lcword);
+}
+
+int usage(void) {
+    fputs("usage: ch-2 add|search file|word\n", stderr);
+    return EXIT_FAILURE;
+}
+
+int main(int argc, char* argv[]) {
+    if (argc != 3)
+        return usage();
+
+    create_database();
+    if (strcmp(argv[1], "add") == 0)
+        add_document(argv[2]);
+    else if (strcmp(argv[1], "search") == 0)
+        search_documents(argv[2]);
+    else
+        return usage();
+}
diff --git a/challenge-024/paulo-custodio/perl/ch-2.pl b/challenge-024/paulo-custodio/perl/ch-2.pl
index 2d74437c97..bd938b5a74 100644
--- a/challenge-024/paulo-custodio/perl/ch-2.pl
+++ b/challenge-024/paulo-custodio/perl/ch-2.pl
@@ -41,6 +41,7 @@ CREATE TABLE found (
     document_id INTEGER,
     word_id     INTEGER
 );
+CREATE UNIQUE INDEX found_index ON found(document_id, word_id);
 END
         close($p) or die "sqlite3 failed";
     }
@@ -64,7 +65,7 @@ sub add_doc {
     my($doc) = @_;
 
     # get title
-    my $title = path($doc)->basename;
+    (my $title = path($doc)->basename) =~ s/\.\w+$//;
 
     # connect to index database
     my $dbh = DBI->connect("dbi:SQLite:dbname=".DBFILE,"","",
diff --git a/challenge-024/paulo-custodio/python/ch-2.py b/challenge-024/paulo-custodio/python/ch-2.py
index 7b3aff407a..6f4ab47bb9 100644
--- a/challenge-024/paulo-custodio/python/ch-2.py
+++ b/challenge-024/paulo-custodio/python/ch-2.py
@@ -47,6 +47,9 @@ def create_database():
                 word_id     INTEGER
             );
             ''')
+        cur.execute('''
+            CREATE UNIQUE INDEX found_index ON found(document_id, word_id);
+            ''')
 
         con.commit()
         con.close()
@@ -94,7 +97,7 @@ def add_found(con, document_id, word_id):
 def add_doc(file):
     con = sqlite3.connect(DBFILE)
 
-    title = os.path.basename(file)
+    title = re.sub(r"\.\w+$", "", os.path.basename(file))
     document_id = get_document_id(con, title)
 
     # read document
diff --git a/challenge-024/paulo-custodio/t/test-2.yaml b/challenge-024/paulo-custodio/t/test-2.yaml
index 1f15630cb9..7df1a09659 100644
--- a/challenge-024/paulo-custodio/t/test-2.yaml
+++ b/challenge-024/paulo-custodio/t/test-2.yaml
@@ -1,52 +1,52 @@
-- setup:
+- setup:    unlink('index.db3');1
   cleanup:
   args:     add 'The Cask of Amontillado.txt'
   input:
   output:   |
-            Indexed The Cask of Amontillado.txt
+            Indexed The Cask of Amontillado
 - setup:
   cleanup:
   args:     add 'The Fall of the House of Usher.txt'
   input:
   output:   |
-            Indexed The Fall of the House of Usher.txt
+            Indexed The Fall of the House of Usher
 - setup:
   cleanup:
   args:     add 'The Masque of the Red Death.txt'
   input:
   output:   |
-            Indexed The Masque of the Red Death.txt
+            Indexed The Masque of the Red Death
 - setup:
   cleanup:
   args:     add 'The Raven.txt'
   input:
   output:   |
-            Indexed The Raven.txt
+            Indexed The Raven
 - setup:
   cleanup:
   args:     search death
   input:
   output:   |
-            death  The Fall of the House of Usher.txt
-            death  The Masque of the Red Death.txt
-            death  The Raven.txt
+            death  The Fall of the House of Usher
+            death  The Masque of the Red Death
+            death  The Raven
 - setup:
   cleanup:
   args:     search mystery
   input:
   output:   |
-            mystery  The Fall of the House of Usher.txt
-            mystery  The Raven.txt
+            mystery  The Fall of the House of Usher
+            mystery  The Raven
 - setup:
   cleanup:
   args:     search revenge
   input:
   output:   |
-            revenge  The Cask of Amontillado.txt
+            revenge  The Cask of Amontillado
 - setup:
-  cleanup:  unlink('index.db3')
+  cleanup:  unlink('index.db3');1
   args:     search imagination
   input:
   output:   |
-            imagination  The Fall of the House of Usher.txt
-            imagination  The Raven.txt
+            imagination  The Fall of the House of Usher
+            imagination  The Raven
author	Paulo Custodio <pauloscustodio@gmail.com>	2023-05-03 12:09:29 +0100
committer	Paulo Custodio <pauloscustodio@gmail.com>	2023-05-03 12:10:57 +0100
commit	02e546117f67c8b8fabd0ecba2ade1c5b4ff56f6 (patch)
tree	345b87193914524c9c3b36d50fcacf92b2f904e5
parent	6c6480baed8abb849ed2d2ffa29f5e8710b64268 (diff)
download	perlweeklychallenge-club-02e546117f67c8b8fabd0ecba2ade1c5b4ff56f6.tar.gz perlweeklychallenge-club-02e546117f67c8b8fabd0ecba2ade1c5b4ff56f6.tar.bz2 perlweeklychallenge-club-02e546117f67c8b8fabd0ecba2ade1c5b4ff56f6.zip