#! /opt/local/bin/perl
#
#       wassup.pl
#
#         PWC 57 TASK #2
#         Shortest Unique Prefix
#             Write a script to find the shortest unique prefix for each each
#             word in the given list. The prefixes will not necessarily be of
#             the same length.
#
#             Sample Input
#                 [ "alphabet", "book", "carpet", "cadmium", "cadeau", "alpine" ]
#             Expected Output
#                 [ "alph", "b", "car", "cadm", "cade", "alpi" ]
#
#         method: This is another tree based solution, this time constructing a
#             linked hash of hashes. The structure has a root node, a hash with
#             keys defined by the first letters of the words in the word list.
#             Each of these keys in turn points to another hash, that hash with
#             keys of the second letter set of those following the first; this
#             pattern continues until we have mapped every letter of every word.
#             The value of each node hash is thus held in the key to the link to
#             it rather than within itself. This structure is known as a trie.
#
#             To build the trie we need to first reduce every word to an array
#             of characters, then follow that sequence from node to node. If we
#             arrive at a node without a required character key, we create a new
#             key for that character, with its associated hash. The magic to our
#             solution happens when we keep track of every traversal we make
#             during construction, keeping a count of every time we visit a
#             given key in a {count} key alongside the {link} to the next hash.
#
#             Once we have built the trie, we can revisit it with each word in
#             our list and solve the challenge. We break down the word into an
#             array of letters exactly as we did before; only this time we know
#             that a pathway between letters to construct the word already
#             exists within the trie. Instead we create a shortest unique prefix
#             string (sup) for that word; as we iterate through the letters, at
#             each node we visit we add the letter to the string and check the
#             count. Once that count reaches 1, it indicates the word we are
#             searching through was the only word to make that particular link,
#             and we have found the shortest prefix for it.
#
#             But wait. There's still a problem. What if one word is completely
#             contained within another? If we have the two words "court" and
#             "courtship", the prefix "court" cannot distinguish between the
#             two.
#
#             To resolve this requires some metaknowledge about the data set,
#             and the choices in implimentation a trade-off.
#
#             This problem is in a way similar to that of encoding a string in
#             memory as an array of chars. How do we know we are at the end? We
#             need knowledge beyond just the characters that make up the string:
#             a string terminator, either implicit or explicit.
#
#             Explicitly, we could add some character not present in any word as
#             a terminator. For a list of English words this could be the number
#             0, for example. But this would need to be determined by the data
#             set for final use, which is not defined here. If the "words" given
#             were URLs, for instance, or codes, it would be altogether possible
#             that a given string could contain the 0 digit.
#
#             We could add something unique to our trie, for example the array
#             element '00', which cannot ever result from slicing on nullspace.
#             This is concrete evidence of string termination, but then we are
#             given the choice to either keep it in the prefix for output nor
#             not: if we chose to, we sully the data, and add ambiguity as to
#             whether the character as printed was originally present, if we
#             delete it, we have the same original ambiguity we started with.
#             Each decision for this fix creates ambiguity again, so that solves
#             nothing. The idea that coming to the end of the word array before
#             triggering the last escape clause would set an external "terminator
#             found" flag does hold some merit and might be useful in some
#             situations, however.
#
#             Another approach is to make the terminator implicit: the fact that
#             a string has termininated is evidence itself of a teminator. If we
#             hold external knowledge of the length of a string, we know when we
#             have arrived at the end. To acknowledge this need to add a rule
#             that given ambiguity between "court" associating to either "court"
#             or "courtship", the exact match is always the one. I find this
#             solution more elegant, but it requires the added step in lookup to
#             determine whether such ambiguity exists.
#
#             One approach requires metaknowledge of the data set on creating of
#             the trie, to determine a non-colliding terminator to be added, the
#             other metaknowledge on retrieval (see the "trie" in there?) to
#             determine whether there is any ambiguity to be resolved. I'm going
#             to go here with the latter, with adding the knowledge of the
#             algoritm's limitations to the the broader implimentation and call
#             this challenge complete.
#
#
#       2020 colin crain
## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ##


use warnings;
use strict;
use feature ":5.26";

## ## ## ## ## MAIN:

my @input = ("alphabet", "book", "carpet", "cadmium", "cadeau", "alpine", "court", "courtship") ;

my $trie = {};

## build the trie structure
for my $word ( @input ) {
    my $node = $trie;                   ## reset to root

    for my $letter ( split //, $word ) {
        ## if a node for the char exists, up the count, if not create it with count 1
        if (exists $node->{$letter} ) {
            $node->{$letter}->{count}++;
        }
        else {
            $node->{$letter} = {  link  => {},
                                  count => 1   } ;
        }
        ## reassign the base node to a reference to the new letter node and repeat
        $node = $node->{$letter}->{link};
    }
}

my %output;

## walk down the trie until count == 1 -- this is the shortest unique prefix
for my $word ( @input ) {
    my $node = $trie;                   ## reset to root
    my $sup;

    for my $letter ( split //, $word ) {
        $sup .= $letter;
        ## if the count drops to 1 this word is the only one to have walked this path,
        last if $node->{$letter}->{count} == 1;
        $node = $node->{$letter}->{link};
    }
    $output{$word} = $sup;
}

## output
printf "%-10s => %-10s \n", $_, $output{$_} for (sort keys %output) ;