diff options
| -rw-r--r-- | challenge-132/tyler-wardhaugh/clojure/src/tw/weekly/c132/t2.clj | 60 | ||||
| -rw-r--r-- | challenge-132/tyler-wardhaugh/clojure/test/tw/weekly/c132_test.clj | 12 |
2 files changed, 64 insertions, 8 deletions
diff --git a/challenge-132/tyler-wardhaugh/clojure/src/tw/weekly/c132/t2.clj b/challenge-132/tyler-wardhaugh/clojure/src/tw/weekly/c132/t2.clj index 8d237772b6..0f0930b838 100644 --- a/challenge-132/tyler-wardhaugh/clojure/src/tw/weekly/c132/t2.clj +++ b/challenge-132/tyler-wardhaugh/clojure/src/tw/weekly/c132/t2.clj @@ -1,14 +1,62 @@ (ns tw.weekly.c132.t2 - (:require [clojure.edn :as :edn)) + (:require [clojure.edn :as edn] + [clojure.pprint :refer [cl-format]])) ;;; ; Task description for TASK #2, Hash Join ;;; -(def DEFAULT-INPUT []) +(def MAXSIZE "The maximum number of build relations to process at one time." 3) +(def DEFAULT-INPUT + [[[20, "Alex" ] + [28, "Joe" ] + [38, "Mike" ] + [18, "Alex" ] + [25, "David" ] + [18, "Simon" ]] + [["Alex", "Stewart"] + ["Joe", "Root" ] + ["Mike", "Gatting"] + ["Joe", "Blog" ] + ["Alex", "Jones" ] + ["Simon","Duane" ]] + 1 + 0]) + +(defn butnth + "Returns all values except the one at index." + [coll index] + (keep-indexed (fn [i v] (when (not= i index) v)) coll)) + +;;; Classic Hash Join +;; Algorithm description: +; https://en.wikipedia.org/wiki/Hash_join#Classic_hash_join +;; Notes: +; - We proactively batch the smaller relation (build) into MAXSIZE chunks to +; ensure it can fit into memory. +; - Order of output relation is not guaranteed. +;;; +(defn hash-join + [a b a-index b-index] + (let [[build build-index probe probe-index] + (if (<= (count a) (count b)) + [a a-index b b-index] + [b b-index a a-index])] + (-> (comp + (partition-all MAXSIZE) + (map (fn [batch] (group-by #(nth % build-index) batch))) + (mapcat + (fn [build-map] + (keep (fn [row] + (when-let [ks (build-map (nth row probe-index))] + (map #(concat % (butnth row probe-index)) ks))) + probe))) + cat) + (sequence build)))) (defn -main - "Run Task 1 with a given input D and S, defaulting to the first example from - the task description." + "Run Task 1 with a given input H1, H2, I1, and I2, defaulting to the first + example from the task description." [& args] - (let [[H1 H2] (or (some->> args (map edn/read-string)) DEFAULT-INPUT)] - )) + (let [[H1 H2 I1 I2] (or (some->> args (map edn/read-string)) DEFAULT-INPUT)] + (->> (hash-join H1 H2 I1 I2) + (cl-format true "~:{~a, ~s, ~s~%~}")))) diff --git a/challenge-132/tyler-wardhaugh/clojure/test/tw/weekly/c132_test.clj b/challenge-132/tyler-wardhaugh/clojure/test/tw/weekly/c132_test.clj index 6d2c5f2894..00edee2c66 100644 --- a/challenge-132/tyler-wardhaugh/clojure/test/tw/weekly/c132_test.clj +++ b/challenge-132/tyler-wardhaugh/clojure/test/tw/weekly/c132_test.clj @@ -1,7 +1,7 @@ (ns tw.weekly.c132-test (:require [clojure.test :refer [deftest is testing]] [tw.weekly.c132.t1 :as t1] - #_[tw.weekly.c132.t2 :refer []])) + [tw.weekly.c132.t2 :as t2])) (def today (t1/parse-date t1/DEFAULT-TODAY)) @@ -17,4 +17,12 @@ (deftest task-2 (testing "Task 2, Hash Join" - )) + (is (= (set '((20 "Alex" "Stewart") + (18 "Alex" "Stewart") + (28 "Joe" "Root") + (38 "Mike" "Gatting") + (28 "Joe" "Blog") + (20 "Alex" "Jones") + (18 "Alex" "Jones") + (18 "Simon" "Duane"))) + (set (apply t2/hash-join t2/DEFAULT-INPUT)))))) |
