aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.vscode/settings.json5
-rw-r--r--challenge-110/james-smith/README.md43
-rw-r--r--challenge-110/james-smith/perl/ch-1.pl15
-rw-r--r--challenge-110/james-smith/perl/ch-2.pl61
4 files changed, 76 insertions, 48 deletions
diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 0000000000..8bab9cab1f
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,5 @@
+{
+ "editor.rulers": [
+ 72,80,120,132
+ ]
+} \ No newline at end of file
diff --git a/challenge-110/james-smith/README.md b/challenge-110/james-smith/README.md
index 433fbf84f7..9759983712 100644
--- a/challenge-110/james-smith/README.md
+++ b/challenge-110/james-smith/README.md
@@ -172,8 +172,8 @@ sub transpose_seek {
## Loop through the file and get the start/end position of each line,
## and the first $BYTES characters of each line...
- push ( @pos, [$prev+$BYTES,tell $fh,substr $_,0,$BYTES]) &&
- ( $prev=tell $fh ) while <$fh>;
+ ( push @pos, [ $prev+$BYTES, tell $fh, substr $_, 0, $BYTES] ) &&
+ ( $prev = tell $fh ) while <$fh>;
## While we still have "columns" loop through each row and grab the first
## entry and output results.
@@ -181,16 +181,17 @@ sub transpose_seek {
while( $pos[0][0] < $pos[0][1] || length $pos[0][2] ) {
my @line;
foreach(@pos) {
+ ## Grab extra data for the row if we have got an incomplete
+ ## field {missing a "," and still data to read}
while( $_->[2] !~ m{,} && $_->[0] < $_->[1] ) {
seek $fh, $_->[0], 0;
- read $fh,
- $_->[2], ## "Buffer"
+ read $fh, $_->[2], ## "Buffer"
$_->[1]-$_->[0] > $BYTES ? $BYTES : $_->[1]-$_->[0],
- length $_->[2]; ## Length of "Buffer" so text gets added to end
+ length $_->[2]; ## Length of "Buffer" so text gets
+ ## added to end
$_->[0]+=$BYTES;
}
- $_->[2] =~ s{^([^,\r\n]+)[,\r\n]*}{};
- push @line, $1;
+ push @line, @[$_->[2] =~ s{^([^,\r\n]+)[,\r\n]*}{}];
}
say {$ofh} join q(,), @line;
}
@@ -285,22 +286,24 @@ We list these in order of "memory consumption"...
| Method/size | Time (s) | Kbytes | resident | shared |
| ----------- | --------: | --------: | --------: | -----: |
-| Seek small | 0.000 | 16,016 | 7,836 | 5,228 |
-| Regex small | 0.000 | 16,016 | 7,836 | 5,228 |
-| Split small | 0.000 | 16,016 | 7,836 | 5,228 |
-| Seek 1000 | 1.346 | 17,388 | 9,320 | 5,228 |
-| Seek 2000 | 5.841 | 18,848 | 10,636 | 5,228 |
-| Seek 5000 | 54.208 | 23,044 | 14,972 | 5,228 |
-| Regex 1000 | 1.293 | 25,492 | 17,288 | 5,228 |
-| Seek 30000 | 3,003.220 | 57,312 | 43,948 | 2,720 |
-| Regex 2000 | 9.040 | 63,896 | 51,376 | 3,140 |
-| Split 1000 | 0.934 | 105,784 | 93,100 | 3,204 |
-| Regex 5000 | 130.411 | 260,432 | 248,016 | 3,204 |
-| Split 2000 | 6.780 | 362,028 | 349,388 | 3,204 |
-| Split 5000 | 527.614 | 2,153,576 | 1,423,468 | 2,764 |
+| Seek small | 0.000 | 16,024 | 7,836 | 5,228 |
+| Regex small | 0.000 | 16,024 | 7,836 | 5,228 |
+| Split small | 0.000 | 16,024 | 7,836 | 5,228 |
+| Seek 1000 | 1.277 | 17,196 | 9,320 | 5,228 |
+| Seek 2000 | 5.132 | 18,612 | 10,636 | 5,228 |
+| Seek 5000 | 39.498 | 22,308 | 14,208 | 5,228 |
+| Regex 1000 | 1.181 | 24,868 | 17,288 | 5,228 |
+| Seek 30000 | 2,537.705 | 53,364 | 43,948 | 2,720 |
+| Regex 2000 | 10.596 | 58,160 | 51,376 | 3,140 |
+| Split 1000 | 1.054 | 103,620 | 93,100 | 3,204 |
+| Regex 5000 | 128.589 | 258,056 | 248,016 | 3,204 |
+| Split 2000 | 4.490 | 360,036 | 349,388 | 3,204 |
+| Split 5000 | 598.668 | 2,151,664 | 1,423,468 | 2,764 |
The size is the number of rows/columns - so the "1000" file has 1000 rows and 1000 columns (+row/column labels).
+As a "guestimate" for the 30,000 x 30,000 case for which the seek solution use roughly 50Mb, the regex solution would use 7GB memory and the split method would use about 75GB memory... Both these are more memory+swap than the machine that I'm using has!
+
**File sizes:**
| name | rows | columns | size | row size |
diff --git a/challenge-110/james-smith/perl/ch-1.pl b/challenge-110/james-smith/perl/ch-1.pl
index f47ec05169..5435cde1b1 100644
--- a/challenge-110/james-smith/perl/ch-1.pl
+++ b/challenge-110/james-smith/perl/ch-1.pl
@@ -9,6 +9,21 @@ use Test::More;
print grep { is_valid_phone_number($_) } <>;
sub is_valid_phone_number {
+ return m{\A # Start of line
+ \s* # Some white-space ?
+ (?: # Prefix - one of:
+ [+]\d+ | # +{digits}
+ 00\d+ | # 00{digits}
+ [(]\d+[)] # ({digits})
+ )
+ \s+ # Some white-space
+ \d+ # String of numbers
+ \s* # Some white-space ?
+ \Z # End of line
+ }x;
+}
+
+sub is_valid_phone_number_compact {
return m{\A\s*(?:[+]\d+|00\d+|[(]\d+[)])\s+\d+\s*\Z};
}
diff --git a/challenge-110/james-smith/perl/ch-2.pl b/challenge-110/james-smith/perl/ch-2.pl
index 4afa11e49c..8ee3bb6690 100644
--- a/challenge-110/james-smith/perl/ch-2.pl
+++ b/challenge-110/james-smith/perl/ch-2.pl
@@ -37,19 +37,19 @@ select(STDOUT); $| = 1;
my $t0;
- $t0 = time; transpose_seek( $FN_TINY, 'seek-small' ); say 'Seek small - Time: ',sprintf('%13.6f',time-$t0),' ',get_statm_info();
- $t0 = time; transpose_regex( $FN_TINY, 'regex-small' ); say 'Regex small - Time: ',sprintf('%13.6f',time-$t0),' ',get_statm_info();
- $t0 = time; transpose_split( $FN_TINY, 'split-small' ); say 'Split small - Time: ',sprintf('%13.6f',time-$t0),' ',get_statm_info();
- $t0 = time; transpose_seek( $FN_SMALL, 'seek-1000' ); say 'Seek 1000 - Time: ',sprintf('%13.6f',time-$t0),' ',get_statm_info();
- $t0 = time; transpose_seek( $FN_MEDIUM, 'seek-2000' ); say 'Seek 2000 - Time: ',sprintf('%13.6f',time-$t0),' ',get_statm_info();
- $t0 = time; transpose_seek( $FN_LARGE, 'seek-5000' ); say 'Seek 5000 - Time: ',sprintf('%13.6f',time-$t0),' ',get_statm_info();
- $t0 = time; transpose_regex( $FN_SMALL, 'regex-1000' ); say 'Regex 1000 - Time: ',sprintf('%13.6f',time-$t0),' ',get_statm_info();
- $t0 = time; transpose_seek( $FN_MASSIVE, 'seek-30000' ); say 'Seek 30000 - Time: ',sprintf('%13.6f',time-$t0),' ',get_statm_info();
- $t0 = time; transpose_regex( $FN_MEDIUM, 'regex-2000' ); say 'Regex 2000 - Time: ',sprintf('%13.6f',time-$t0),' ',get_statm_info();
- $t0 = time; transpose_split( $FN_SMALL, 'split-1000' ); say 'Split 1000 - Time: ',sprintf('%13.6f',time-$t0),' ',get_statm_info();
- $t0 = time; transpose_regex( $FN_LARGE, 'regex-5000' ); say 'Regex 5000 - Time: ',sprintf('%13.6f',time-$t0),' ',get_statm_info();
- $t0 = time; transpose_split( $FN_MEDIUM, 'split-2000' ); say 'Split 2000 - Time: ',sprintf('%13.6f',time-$t0),' ',get_statm_info();
- $t0 = time; transpose_split( $FN_LARGE, 'split-5000' ); say 'Split 5000 - Time: ',sprintf('%13.6f',time-$t0),' ',get_statm_info();
+if($ARGV[0] == 1) { $t0 = time; transpose_seek( $FN_TINY, 'seek-small' ); say 'Seek small - Time: ',sprintf('%13.6f',time-$t0),' ',get_statm_info(); }
+if($ARGV[0] == 2) { $t0 = time; transpose_regex( $FN_TINY, 'regex-small' ); say 'Regex small - Time: ',sprintf('%13.6f',time-$t0),' ',get_statm_info(); }
+if($ARGV[0] == 3) { $t0 = time; transpose_split( $FN_TINY, 'split-small' ); say 'Split small - Time: ',sprintf('%13.6f',time-$t0),' ',get_statm_info(); }
+if($ARGV[0] == 4) { $t0 = time; transpose_seek( $FN_SMALL, 'seek-1000' ); say 'Seek 1000 - Time: ',sprintf('%13.6f',time-$t0),' ',get_statm_info(); }
+if($ARGV[0] == 5) { $t0 = time; transpose_seek( $FN_MEDIUM, 'seek-2000' ); say 'Seek 2000 - Time: ',sprintf('%13.6f',time-$t0),' ',get_statm_info(); }
+if($ARGV[0] == 6) { $t0 = time; transpose_seek( $FN_LARGE, 'seek-5000' ); say 'Seek 5000 - Time: ',sprintf('%13.6f',time-$t0),' ',get_statm_info(); }
+if($ARGV[0] == 7) { $t0 = time; transpose_regex( $FN_SMALL, 'regex-1000' ); say 'Regex 1000 - Time: ',sprintf('%13.6f',time-$t0),' ',get_statm_info(); }
+if($ARGV[0] == 8) { $t0 = time; transpose_seek( $FN_MASSIVE, 'seek-30000' ); say 'Seek 30000 - Time: ',sprintf('%13.6f',time-$t0),' ',get_statm_info(); }
+if($ARGV[0] == 9) { $t0 = time; transpose_regex( $FN_MEDIUM, 'regex-2000' ); say 'Regex 2000 - Time: ',sprintf('%13.6f',time-$t0),' ',get_statm_info(); }
+if($ARGV[0] == 10) { $t0 = time; transpose_split( $FN_SMALL, 'split-1000' ); say 'Split 1000 - Time: ',sprintf('%13.6f',time-$t0),' ',get_statm_info(); }
+if($ARGV[0] == 11) { $t0 = time; transpose_regex( $FN_LARGE, 'regex-5000' ); say 'Regex 5000 - Time: ',sprintf('%13.6f',time-$t0),' ',get_statm_info(); }
+if($ARGV[0] == 12) { $t0 = time; transpose_split( $FN_MEDIUM, 'split-2000' ); say 'Split 2000 - Time: ',sprintf('%13.6f',time-$t0),' ',get_statm_info(); }
+if($ARGV[0] == 13) { $t0 = time; transpose_split( $FN_LARGE, 'split-5000' ); say 'Split 5000 - Time: ',sprintf('%13.6f',time-$t0),' ',get_statm_info(); }
sub transpose_split {
## Slurp into array
@@ -58,7 +58,7 @@ sub transpose_split {
close $fh;
## Generate transpose
open $fh, '>', $_[1];
- say {$fh} join ',', map {shift @{$_} } @in while @{$in[0]};
+ say {$fh} join ',', map {shift @{$_} } @in while @{$in[0]};
close $fh;
}
@@ -74,35 +74,40 @@ sub transpose_regex {
}
sub transpose_seek {
- my($prev,@pos) = (0);
+ my ( $prev, @pos ) = ( 0 );
- open my $fh, '<', $_[0];
- open my $ofh, '>', $_[1];
-
## Loop through the file and get the start/end position of each line,
## and the first $BYTES characters of each line...
- push ( @pos, [$prev+$BYTES,tell $fh,substr $_,0,$BYTES]) &&
- ( $prev=tell $fh ) while <$fh>;
+ open my $fh, '<', $_[0];
+
+ ( push @pos, [ $prev+$BYTES, tell $fh, substr $_, 0, $BYTES ] ) &&
+ ( $prev = tell $fh ) while <$fh>;
## While we still have "columns" loop through each row and grab the first
## entry and output results.
+ open my $ofh, '>', $_[1]; ## Need 2 file handles open at once for this.
+
while( $pos[0][0] < $pos[0][1] || length $pos[0][2] ) {
- my @line;
- foreach(@pos) {
+ my $j = '';
+ foreach( @pos ) {
+ ## Grab extra data for the row if we have got an incomplete
+ ## field {missing a "," and still data to read}
while( $_->[2] !~ m{,} && $_->[0] < $_->[1] ) {
- seek $fh, $_->[0], 0;
+ seek $fh, $_->[0], 0; ## 0 = from start of file!
read $fh,
- $_->[2], ## "Buffer"
+ $_->[2], ## "Buffer"
$_->[1]-$_->[0] > $BYTES ? $BYTES : $_->[1]-$_->[0],
- length $_->[2]; ## Length of "Buffer" so text gets added to end
- $_->[0]+=$BYTES;
+ length $_->[2]; ## Length of "Buffer" so text gets
+ ## added to end
+ $_->[0] += $BYTES;
}
$_->[2] =~ s{^([^,\r\n]+)[,\r\n]*}{};
- push @line, $1;
+ print {$ofh} $j, $1;
+ $j ||= ',';
}
- say {$ofh} join q(,), @line;
+ say {$ofh} '';
}
}