From 67729b2d0fe4d89c2af4a571eb65580972fd8f03 Mon Sep 17 00:00:00 2001 From: dcw Date: Tue, 28 Mar 2023 15:24:13 +0100 Subject: belatedly added my C implementation of task 2, really quite tricky - had to build a simple set module (slist.[ch]) --- challenge-209/duncan-c-white/C/.cbuild | 1 - challenge-209/duncan-c-white/C/Makefile | 11 +- challenge-209/duncan-c-white/C/README | 18 +- challenge-209/duncan-c-white/C/ch-2.c | 273 ++++++++++++++++++++++++++++ challenge-209/duncan-c-white/C/csvsplit.c | 46 +++-- challenge-209/duncan-c-white/C/csvsplit.h | 13 +- challenge-209/duncan-c-white/C/parseints.c | 114 ------------ challenge-209/duncan-c-white/C/parseints.h | 1 - challenge-209/duncan-c-white/C/printarray.c | 39 ---- challenge-209/duncan-c-white/C/printarray.h | 1 - challenge-209/duncan-c-white/C/slist.c | 148 +++++++++++++++ challenge-209/duncan-c-white/C/slist.h | 25 +++ challenge-209/duncan-c-white/README | 4 +- challenge-209/duncan-c-white/perl/ch-2.pl | 7 +- 14 files changed, 507 insertions(+), 194 deletions(-) create mode 100644 challenge-209/duncan-c-white/C/ch-2.c delete mode 100644 challenge-209/duncan-c-white/C/parseints.c delete mode 100644 challenge-209/duncan-c-white/C/parseints.h delete mode 100644 challenge-209/duncan-c-white/C/printarray.c delete mode 100644 challenge-209/duncan-c-white/C/printarray.h create mode 100644 challenge-209/duncan-c-white/C/slist.c create mode 100644 challenge-209/duncan-c-white/C/slist.h diff --git a/challenge-209/duncan-c-white/C/.cbuild b/challenge-209/duncan-c-white/C/.cbuild index 835981f6f1..a14ec76520 100644 --- a/challenge-209/duncan-c-white/C/.cbuild +++ b/challenge-209/duncan-c-white/C/.cbuild @@ -1,5 +1,4 @@ BUILD = ch-1 ch-2 -BUILD = ch-1 CFLAGS = -Wall -g #LDFLAGS = -lm #CFLAGS = -g diff --git a/challenge-209/duncan-c-white/C/Makefile b/challenge-209/duncan-c-white/C/Makefile index 513f8703b6..49d634398c 100644 --- a/challenge-209/duncan-c-white/C/Makefile +++ b/challenge-209/duncan-c-white/C/Makefile @@ -9,11 +9,10 @@ clean: /bin/rm -f $(BUILD) *.o core a.out args.o: args.c -ch-1: ch-1.o args.o csvsplit.o -ch-1.o: ch-1.c args.h csvsplit.h -ch-2: ch-2.o args.o parseints.o printarray.o -ch-2.o: ch-2.c args.h parseints.h printarray.h +ch-1: ch-1.o args.o +ch-1.o: ch-1.c args.h +ch-2: ch-2.o args.o csvsplit.o slist.o +ch-2.o: ch-2.c args.h csvsplit.h slist.h csvsplit.o: csvsplit.c csvsplit.h -parseints.o: parseints.c args.h parseints.h printarray.h -printarray.o: printarray.c +slist.o: slist.c csvsplit.h slist.h diff --git a/challenge-209/duncan-c-white/C/README b/challenge-209/duncan-c-white/C/README index dd5f3346f7..ed9e8bb1fc 100644 --- a/challenge-209/duncan-c-white/C/README +++ b/challenge-209/duncan-c-white/C/README @@ -3,9 +3,17 @@ Thought I'd also have a go at translating ch-1.pl and ch-2.pl into C.. Both C versions produce identical (non-debugging and debugging) output to the Perl originals. -These C versions use most of my regular support modules: +ch-2.c is considerably more complex than ch-2.pl, ch-2.pl built ideal data +structures in a typical Perlish fashion and used them idiomatically to focus +on the problem. Instead, ch-2.c only builds one main data structure - a sorted +list of strings (i.e. a set) - and then ch-2.c declares slists and arrays of +slists. As ever, it's the memory management in C that's the killer. However, +ch-2.c works correctly (as far as I can tell) and doesn't leak any memory (after +quite a lot of work to make that happen:-)). + +These C versions use some of my regular support modules: - a command-line argument processing module args.[ch], -- a csvlist-of-int parsing module parseints.[ch], and -- an int-array printing module printarray.[ch]. -- plus a (new for PWC) csv splitting module csvsplit.[ch] to split - a single argument into a wordlist +- plus a CSV splitting module csvsplit.[ch] to split + a single argument up +- plus (new for challenge 209) the module slist.[ch] that stores the sorted + list of strings that can be used like a simple set diff --git a/challenge-209/duncan-c-white/C/ch-2.c b/challenge-209/duncan-c-white/C/ch-2.c new file mode 100644 index 0000000000..89850b0d53 --- /dev/null +++ b/challenge-209/duncan-c-white/C/ch-2.c @@ -0,0 +1,273 @@ +// +// Task 2: Merge Account +// +// C version. +// + +#include +#include +#include +#include +#include +#include + +#include "args.h" +#include "csvsplit.h" +#include "slist.h" + + +// int nch = countch( target, str ); +// Count and return the number of occurrences of the target character in str. +// +int countch( char target, char *str ) +{ + int n=0; + for( ; *str; str++ ) + { + if( *str == target ) n++; + } + return n; +} + + +// extra is really an slist +static void addemail( char *value, void *s ) +{ + append_slist( (slist)s, value ); +} + + +// slist emails = splitcommas( emailstr ); +// form a sorted list of emails from emailstr, +// duplicating the emailstr buffer inside the slist +// emails will need freeing later +// +slist splitcommas( char *emailstr ) +{ + int n = 1 + countch( ',', emailstr ); + slist s = make_slist( n, 100 ); + + if( debug ) { printf( "emailstr %s, nemails = %d\n", emailstr, n ); } + csvForeach( emailstr, &addemail, s ); + if( s->n != n ) { printf( "warning: sn=%d, n=%d\n", s->n, n ); } + assert( s->n == n ); + + if( debug ) + { + printf( "debug: slist(emailstr %s) is ", emailstr ); + print_slist( s, stdout ); putchar( '\n' ); + } + + return s; +} + + +// int ne = 0; +// slist *emails = an slist[argc] array +// char *name = matchfirstname( &argc, argv, &ne, emails ); +// Extract the name from arg[0], and then find all matching +// arg[i0 ) + { + slist s1 = emails[0]; + + // remove emails[0] + for( int i=1; in; i++ ) + { + printf( ", " ); + printf( "\"%s\"", s1->data[i] ); + } + printf( "], " ); + free_slist( s1 ); + + if( debug ) + { + printf( "debug: remaining emails[] are: " ); + for( int j=0; j0 ) putchar('-'); + print_slist( emails[j], stdout ); + } + putchar( '\n' ); + } + } +} + + +int main( int argc, char **argv ) +{ + int argno = process_flag_n_m_args( "merge-accounts", argc, argv, + 1, 1000, "list(name,emails)" ); + + argv += argno; // reset base, so we now have argv[argc].. + argc -= argno; + + for( int i=0; i 0 ) + { + // find a set of email slists with matching names, + // using the first argument to pick the name + int ne = 0; + slist *emails = malloc( argc * sizeof(slist) ); + assert( emails != NULL ); + char *name = matchfirstname( &argc, argv, &ne, emails ); + if( debug ) + { + printf( "debug: ne=%d, ", ne ); + for( int i=0; i #include @@ -13,19 +14,15 @@ #include "csvsplit.h" -/* - * csvForeach( csvstring, &foreach_callback, (void *)extravalue ); - * Split csvstring into each comma-separated field, calling the - * foreach calback for each comma-separated field, passing the - * value and extravalue as parameters to it. It doesn't deal - * with commas in quoted strings, however. - */ +// csvForeach( csvstring, &foreach_callback, (void *)extravalue ); +// Split csvstring (modifying it in place) into each comma-separated field, +// calling the foreach calback for each comma-separated field, passing the +// value and extravalue as parameters to it. It doesn't deal +// with commas in quoted strings, however. +// void csvForeach( char *csvstring, csvforeachcb cb, void *extra ) { - // we need to modify the string, so make a mutable copy.. - char *copy = strdup( csvstring ); - - char *start = copy; + char *start = csvstring; for(;;) { char *comma=strchr( start, ',' ); @@ -42,6 +39,23 @@ void csvForeach( char *csvstring, csvforeachcb cb, void *extra ) // move start to one beyond where comma was.. start = comma+1; } +} + + +// csvForeachstrdup( csvstring, &foreach_callback, (void *)extravalue ); +// Split csvstring into each comma-separated field, duplicating it +// in order to modify the copy, calling the foreach calback for each +// comma-separated field it finds, passing the value and extravalue as +// parameters to it. It doesn't deal with commas in quoted strings, however. +// +void csvForeachstrdup( char *csvstring, csvforeachcb cb, void *extra ) +{ + // make a modifiable copy.. + char *copy = strdup( csvstring ); + assert( copy != NULL ); + + csvForeach( copy, cb, extra ); + // don't forget to.. free( copy ); } diff --git a/challenge-209/duncan-c-white/C/csvsplit.h b/challenge-209/duncan-c-white/C/csvsplit.h index c7f84857c1..9645549379 100644 --- a/challenge-209/duncan-c-white/C/csvsplit.h +++ b/challenge-209/duncan-c-white/C/csvsplit.h @@ -1,9 +1,9 @@ -/* - * csvsplit.h: simple CSV splitting (csvForeach) - doesn't handle - * quoted fields with nested commas. - * - * (C) Duncan C. White, May 2017 - */ +// +// csvsplit.h: simple CSV splitting (csvForeach), useful utility functions +// Now one function that strdup()s it's strings, another that +// modifies them in place. +// +// (C) Duncan C. White, May 2017-March 2023 // a csv foreach callback function takes // a char * (the csv value split out) @@ -12,3 +12,4 @@ typedef void (*csvforeachcb)( char *, void * ); extern void csvForeach( char * csvstring, csvforeachcb cb, void * extra ); +extern void csvForeachstrdup( char * csvstring, csvforeachcb cb, void * extra ); diff --git a/challenge-209/duncan-c-white/C/parseints.c b/challenge-209/duncan-c-white/C/parseints.c deleted file mode 100644 index 80408d3382..0000000000 --- a/challenge-209/duncan-c-white/C/parseints.c +++ /dev/null @@ -1,114 +0,0 @@ -// Simple routine to parse one or more arguments, -// looking for individual +ints or comma-separated -// lists of +ints. -// - -#include -#include -#include -#include -#include -#include - -#include "args.h" -#include "printarray.h" -#include "parseints.h" - -typedef struct -{ - int nel; // current number of elements - int maxel; // maximum number of elements allocated - int *list; // malloc()d list of integers -} intlist; - - -// -// intlist il.. then initialize il.. then: -// add_one( element, &il ); -// -static void add_one( int x, intlist *p ) -{ - if( p->nel > p->maxel ) - { - p->maxel += 128; - p->list = realloc( p->list, p->maxel ); - assert( p->list != NULL ); - } - #if 0 - if( debug ) - { - printf( "PIA: appending %d to result at " - "pos %d\n", x, p->nel ); - } - #endif - p->list[p->nel++] = x; -} - - -// -// intlist il.. then initialize il.. then: -// add_one_arg( argstr, &il ); -// -static void add_one_arg( char *argstr, intlist *p ) -{ - int x; - if( !check_unsigned_int(argstr,&x) ) - { - fprintf( stderr, "PIA: arg %s must be +int\n", argstr ); - exit(1); - } - add_one( x, p ); -} - - -// -// int nel; -// int *ilist = parse_int_args( argc, argv, argno, &nel ); -// process all arguments argv[argno..argc-1], extracting either -// single ints or comma-separated lists of ints from those arguments, -// accumulate all integers in a dynarray list, storing the total number -// of elements in nel. This list must be freed by the caller. -// Note that the list of elements used to be terminated by a -1 value, -// but I've commented this out from now on. -// -int *parse_int_args( int argc, char **argv, int argno, int *nel ) -{ - int *result = malloc( 128 * sizeof(int) ); - assert( result != NULL ); - intlist il = { 0, 128, result }; - - #if 0 - if( debug ) - { - printf( "PIA: parsing ints from args %d..%d\n", argno, argc-1 ); - } - #endif - for( int i=argno; i -#include - - -// print_int_array( maxw, nelements, results[], sep, outfile ); -// format results[0..nelements-1] as a separated -// list onto outfile with lines <= maxw chars long. -// produces a whole number of lines of output - without the trailing '\n' -void print_int_array( int maxw, int nel, int *results, char sep, FILE *out ) -{ - int linelen = 0; - for( int i=0; i maxw ) - { - fputc( '\n', out ); - linelen = 0; - } else if( i>0 ) - { - fputc( ' ', out ); - linelen++; - } - - linelen += len; - fprintf( out, "%s", buf ); - if( i0 ) - //{ - // fputc( '\n', out ); - //} -} diff --git a/challenge-209/duncan-c-white/C/printarray.h b/challenge-209/duncan-c-white/C/printarray.h deleted file mode 100644 index 40efb83277..0000000000 --- a/challenge-209/duncan-c-white/C/printarray.h +++ /dev/null @@ -1 +0,0 @@ -extern void print_int_array( int maxw, int nel, int * results, char sep, FILE * out ); diff --git a/challenge-209/duncan-c-white/C/slist.c b/challenge-209/duncan-c-white/C/slist.c new file mode 100644 index 0000000000..90d782d081 --- /dev/null +++ b/challenge-209/duncan-c-white/C/slist.c @@ -0,0 +1,148 @@ +// slist.c: a sorted list of strings, to be used like a set to which items +// are ever added + +#include +#include +#include +#include +#include +#include + +#include "csvsplit.h" +#include "slist.h" + + +// A sorted list is a growable dynarray of string pointers into a common +// string buffer (storage for which is also maintained inside the slist). + + +// slist s = make_slist( cap, bufcap ); +// Create an empty sorted list with capacity >= cap +// and the initial buffer capacity bufcap +// +slist make_slist( int cap, int bufcap ) +{ + slist s = malloc( sizeof(struct slist) ); + assert( s != NULL ); + s->cap = cap + 10; + s->n = 0; + s->data = malloc( s->cap * sizeof(char *) ); + assert( s->data != NULL ); + s->bufcap = bufcap + 100; + s->buflen = 0; + s->buf = malloc( s->bufcap * sizeof(char) ); + assert( s->buf != NULL ); + return s; +} + + +static int cmpstringp( const void *p1, const void *p2 ) +{ + // The actual arguments to this function are "pointers to + // pointers to char", but strcmp(3) arguments are "pointers + // to char", hence the following cast plus dereference + return strcmp( *(const char **) p1, *(const char **) p2 ); +} + + +// void append_slist( s, item ); +// Append a given item to sorted list s, growing it if necessary +// +void append_slist( slist s, char *item ) +{ + int len = strlen(item)+1; + if( s->buflen + len > s->bufcap ) + { + s->bufcap += 100 + len; + s->buf = realloc( s->buf, s->bufcap * sizeof(char) ); + assert( s->buf != NULL ); + } + char *sp = s->buf + s->buflen; + strcpy( sp, item ); + s->buflen += len; + + if( s->n == s->cap ) + { + s->cap += 100; + s->data = realloc( s->data, s->cap * sizeof(char *) ); + assert( s->data != NULL ); + } + s->data[s->n++] = sp; + + // re-sort data + qsort( s->data, s->n, sizeof(char *), &cmpstringp ); + + #if 0 + printf( "debug: after appending %s, slist is ", item ); + print_slist( s, stdout ); putchar( '\n' ); + #endif +} + + +// bool isin = isin_slist( value, s ); +// Given a sorted list s return true iff value is "in" s. +// Could use bsearch() if we cared enough.. +// +bool isin_slist( char *value, slist s ) +{ + for( int i=0; in; i++ ) + { + if( strcmp( value, s->data[i] ) == 0 ) return true; + } + return false; +} + + +// bool any = overlap_slists( s1, s2 ); +// Given two sorted lists (s1 and s2) return true +// iff the two lists contain any common members, even one. +// +bool overlap_slists( slist s1, slist s2 ) +{ + for( int i=0; in; i++ ) + { + if( isin_slist( s1->data[i], s2 ) ) return true; + } + return false; +} + + +// union_slists( s1, s2 ); +// Given two sorted lists (s1 and s2) do a set union operation +// altering s1. +// +void union_slists( slist s1, slist s2 ) +{ + for( int i=0; in; i++ ) + { + if( ! isin_slist( s2->data[i], s1 ) ) + { + append_slist( s1, s2->data[i] ); + } + } +} + + +// print_slist( s, out ); +// Print sorted list s to open writable file out +// as a CSV list. +// +void print_slist( slist s, FILE *out ) +{ + for( int i=0; in; i++ ) + { + if( i>0 ) putc( ',', out ); + printf( "%s", s->data[i] ); + } +} + + +// free_slist( s ); +// Free sorted list s +// +void free_slist( slist s ) +{ + free( s->data ); + free( s->buf ); + free( s ); +} diff --git a/challenge-209/duncan-c-white/C/slist.h b/challenge-209/duncan-c-white/C/slist.h new file mode 100644 index 0000000000..6d45ff3244 --- /dev/null +++ b/challenge-209/duncan-c-white/C/slist.h @@ -0,0 +1,25 @@ +// slist.h: a sorted list of strings - a growable dynarray of string +// pointers into a common string buffer (storage for which is +// also maintained inside the slist), which can be used as a simple set + +struct slist +{ + int cap; // current capacity, i.e. allocated size of data + int n; // current number of entries in data (n<=cap) + char **data; // actual data, growable. pointers into buf + int bufcap; // the allocated size of buf + int buflen; // the current amount of data stored in buf + char *buf; // the buffer of underlying data +}; + +typedef struct slist *slist; + +// prototypes + +extern slist make_slist( int cap, int bufcap ); +extern void append_slist( slist s, char * item ); +extern bool isin_slist( char * value, slist s ); +extern bool overlap_slists( slist s1, slist s2 ); +extern void union_slists( slist s1, slist s2 ); +extern void print_slist( slist s, FILE * out ); +extern void free_slist( slist s ); diff --git a/challenge-209/duncan-c-white/README b/challenge-209/duncan-c-white/README index a7d82c8b31..14df3eeaa9 100644 --- a/challenge-209/duncan-c-white/README +++ b/challenge-209/duncan-c-white/README @@ -67,5 +67,5 @@ merge two entries if the intersection of the email lists is non empty. Will also need to choose an input format, how about a list of words of the form A:a1@a.com,a2@a.com, B:b1@b.com, A:a3@a.com and B:b2@b.com,b1@b.com -(TODO) GUEST LANGUAGE: As a bonus, I also had a go at translating ch-2.pl into C -(TODO) (look in the C directory for that) +GUEST LANGUAGE: As a bonus, I also had a go at translating ch-2.pl into C +(look in the C directory for that) diff --git a/challenge-209/duncan-c-white/perl/ch-2.pl b/challenge-209/duncan-c-white/perl/ch-2.pl index e3cc27d87f..bfa468e2fc 100755 --- a/challenge-209/duncan-c-white/perl/ch-2.pl +++ b/challenge-209/duncan-c-white/perl/ch-2.pl @@ -33,8 +33,8 @@ # Will also need to choose an input format, how about a list of words of the # form A,a1@a.com,a2@a.com, B,b1@b.com, A,a3@a.com and B,b2@b.com,b1@b.com # -# (TODO) GUEST LANGUAGE: As a bonus, I also had a go at translating ch-2.pl into C -# (TODO) (look in the C directory for that) +# GUEST LANGUAGE: As a bonus, I also had a go at translating ch-2.pl into C +# (look in the C directory for that) # use strict; @@ -44,10 +44,11 @@ use Getopt::Long; use Data::Dumper; my $debug=0; -die "Usage: merge-accounts [--debug] name,emails+\n" +die "Usage: merge-accounts [--debug] list(name,emails)\n" unless GetOptions( "debug"=>\$debug ) && @ARGV > 0; my @input = map { + die "Bad arg $_, no comma found\n" unless /,/; my @x = split( /,/ ); \@x; } @ARGV; -- cgit