diff options
Diffstat (limited to 'challenge-132/paulo-custodio/python/ch-2.py')
| -rw-r--r-- | challenge-132/paulo-custodio/python/ch-2.py | 100 |
1 files changed, 100 insertions, 0 deletions
diff --git a/challenge-132/paulo-custodio/python/ch-2.py b/challenge-132/paulo-custodio/python/ch-2.py new file mode 100644 index 0000000000..6d48ba2c8c --- /dev/null +++ b/challenge-132/paulo-custodio/python/ch-2.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python + +# TASK #2 > Hash Join +# Submitted by: Mohammad S Anwar +# Write a script to implement Hash Join algorithm as suggested +# by wikipedia. +# +# 1. For each tuple r in the build input R +# 1.1 Add r to the in-memory hash table +# 1.2 If the size of the hash table equals the maximum +# in-memory size: +# 1.2.1 Scan the probe input S, and add matching join +# tuples to the output relation +# 1.2.2 Reset the hash table, and continue scanning the +# build input R +# 2. Do a final scan of the probe input S and add the resulting +# join tuples to the output relation +# Example +# Input: +# +# @player_ages = ( +# [20, "Alex" ], +# [28, "Joe" ], +# [38, "Mike" ], +# [18, "Alex" ], +# [25, "David" ], +# [18, "Simon" ], +# ); +# +# @player_names = ( +# ["Alex", "Stewart"], +# ["Joe", "Root" ], +# ["Mike", "Gatting"], +# ["Joe", "Blog" ], +# ["Alex", "Jones" ], +# ["Simon","Duane" ], +# ); +# +# Output: +# +# Based on index = 1 of @players_age and +# index = 0 of @players_name. +# +# 20, "Alex", "Stewart" +# 20, "Alex", "Jones" +# 18, "Alex", "Stewart" +# 18, "Alex", "Jones" +# 28, "Joe", "Root" +# 28, "Joe", "Blog" +# 38, "Mike", "Gatting" +# 18, "Simon", "Duane" + +player_ages = [ + [20, "Alex" ], + [28, "Joe" ], + [38, "Mike" ], + [18, "Alex" ], + [25, "David" ], + [18, "Simon" ], +] +player_ages_key = 1 + +player_names = [ + ["Alex", "Stewart"], + ["Joe", "Root" ], + ["Mike", "Gatting"], + ["Joe", "Blog" ], + ["Alex", "Jones" ], + ["Simon","Duane" ], +] +player_names_key = 0 + +def format_col(s): + if type(s) == int: + return str(s) + else: + return '"'+s+'"' + +def hash_join(table1, key1, table2, key2): + build = {} + for row in table1: + key = row[key1] + if key not in build: + build[key] = [] + build[key].append(row) + result = [] + for row_probe in table2: + key = row_probe[key2] + for row_build in build[key]: + row = row_build + \ + row_probe[:key2] + \ + row_probe[key2+1:] + row = [format_col(x) for x in row] + result.append(row) + return result + +result = hash_join(player_ages, player_ages_key, + player_names, player_names_key) +for row in result: + print(', '.join(row)) |
