diff options
| author | E. Choroba <choroba@matfyz.cz> | 2020-06-07 12:02:08 +0200 |
|---|---|---|
| committer | E. Choroba <choroba@matfyz.cz> | 2020-06-07 12:02:08 +0200 |
| commit | 45a2c661c0185227e7a19fdd5760ff81fd033cea (patch) | |
| tree | 42c23f375979cb9b0e4153d4319bb4bf6169b77c | |
| parent | 59ccb8903bf40d5f6d50663ef03d9dee5aac9789 (diff) | |
| download | perlweeklychallenge-club-45a2c661c0185227e7a19fdd5760ff81fd033cea.tar.gz perlweeklychallenge-club-45a2c661c0185227e7a19fdd5760ff81fd033cea.tar.bz2 perlweeklychallenge-club-45a2c661c0185227e7a19fdd5760ff81fd033cea.zip | |
Optimize LCS via Suffix Tree
Also, make it general by not using high unicode characters as
separators, but rather using arrays instead of strings to store the
characters, and making the markers a special array elements.
Moreover, optimize the coverage computing by iterating over the
numbers rather than the covered characters.
| -rwxr-xr-x | challenge-018/e-choroba/perl5/ch-1a.pl | 58 |
1 files changed, 33 insertions, 25 deletions
diff --git a/challenge-018/e-choroba/perl5/ch-1a.pl b/challenge-018/e-choroba/perl5/ch-1a.pl index 0f6ac12c35..01270a0ea2 100755 --- a/challenge-018/e-choroba/perl5/ch-1a.pl +++ b/challenge-018/e-choroba/perl5/ch-1a.pl @@ -15,11 +15,12 @@ use feature qw{ say }; } { package My::Suffix::Tree; + use List::Util qw{ first }; sub new { my ($class) = @_; bless my $self = {position => -1, - text => "", + text => [], active_edge => 0, active_length => 0, current_node => -1, @@ -41,11 +42,10 @@ use feature qw{ say }; return keys %{ $node->{numbers} } } - my $SEP = chr 2 ** 20; - sub fresh_separator { - my ($self) = @_; - $self->{separator} //= 2 ** 20; - return chr ++$self->{separator} + sub add_mark { + my ($self, $index) = @_; + $self->add_char("<$index"); + push @{ $self->{numbers} }, $self->{position}; } sub add_words { @@ -54,20 +54,16 @@ use feature qw{ say }; for my $word_index (0 .. $#words) { $self->add_char($_) for split //, $words[$word_index]; - $self->add_char($self->fresh_separator); - $self->add_char($_) for split //, "$word_index$SEP"; + $self->add_mark($word_index); } - my $text_length = length $self->{text}; for my $node (@{ $self->{nodes} }) { next if $node->{start} < 0; - my $text = $node->{end} > $text_length - ? substr $self->{text}, $node->{start} - : substr $self->{text}, $node->{start}, - $node->{end} - $node->{start}; - $node->{text} = $text; - if (my ($number) = $text =~ /([0-9]+)$SEP/) { - $node->{number} = $number; - } + + my $number = first { + $node->{start} <= $self->{numbers}[$_] + && $self->{numbers}[$_] <= $node->{end} + } 0 .. $#{ $self->{numbers} }; + $node->{number} = $number if defined $number; } $self->add_numbers(0); } @@ -79,7 +75,7 @@ use feature qw{ say }; $self->{need_suffix_link} = $node; } - sub active_edge { substr $_[0]{text}, $_[0]{active_edge}, 1 } + sub active_edge { $_[0]{text}[ $_[0]{active_edge} ] } sub walk_down { my ($self, $next) = @_; @@ -107,7 +103,7 @@ use feature qw{ say }; sub add_char { my ($self, $char) = @_; - substr $self->{text}, ++$self->{position}, 1, $char; + splice @{ $self->{text} }, ++$self->{position}, 1, $char; $self->{need_suffix_link} = -1; ++$self->{remainder}; while ($self->{remainder} > 0) { @@ -127,9 +123,9 @@ use feature qw{ say }; next if $self->walk_down($next); # Observation 2. # Observation 1. - if ($char eq substr $self->{text}, - $self->{nodes}[$next]{start} + $self->{active_length}, 1 - ) { + if ($char eq $self->{text}[ + $self->{nodes}[$next]{start} + $self->{active_length} + ]) { ++$self->{active_length}; # Observation 3. $self->_add_suffix_link($self->{active_node}); @@ -142,8 +138,10 @@ use feature qw{ say }; my $leaf = $self->new_node($self->{position}, 'INF'); $self->{nodes}[$split]{next}{$char} = $leaf; $self->{nodes}[$next]{start} += $self->{active_length}; - $self->{nodes}[$split]{next}{ substr $self->{text}, - $self->{nodes}[$next]{start}, 1 } = $next; + + $self->{nodes}[$split]{next}{ + $self->{text}[ $self->{nodes}[$next]{start} ] + } = $next; $self->_add_suffix_link($split); # Rule 2. } -- $self->{remainder}; @@ -164,6 +162,15 @@ use feature qw{ say }; } } + my @text; + sub text { + my ($self, $node) = @_; + @text = @{ $self->{text} } unless @text; + my $to = $node->{end} - 1; + $to = $#text if $node->{end} > $#text; + return join "", @text[ $node->{start} .. $to ] + } + my @lcs; sub longest_common_substring { my ($self, $node_index, $string) = @_; @@ -179,9 +186,10 @@ use feature qw{ say }; for my $next_char (keys %{ $node->{next} }) { my $next_index = $node->{next}{$next_char}; my $next = $self->{nodes}[$next_index]; + my $text = $self->text($next); $self->longest_common_substring( $next_index, - "$string$next->{text}"); + "$string$text"); } return @lcs } |
