Jump to: navigation, search

The C Programming Language, 2nd Edition, by Kernighan and Ritchie
Exercise 5.17 on page 121

Add a field-handling capability, so sorting may be done on fields within lines, each field sorted according to an independent set of options. (The index for this book was sorted with -df for the index category and -n for the page numbers.)



Solution by Robert Taylor

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>

#define AUTHOR "Robert Taylor"
#define CREATION_DATE "May, 2014"
#define LAST_UPDATE __DATE__ /* last date that binary was compiled */
static char *program_name;
/* My solution to Exercise 5-17 
 * of the C Programming Language (second edition)
 * by Brian W. Kernighan 
 * and Denis M. Ritchie
 *
 * To compile:
 * gcc -Os -Wall -s -o sort sort.c
 *
 * For help:
 * ./sort --h
 */

#define MAXLEN 1000 /* max length of any input line */
#define MAXLINES 500000 /* max # of lines to be sorted */
#define ALLOCSIZE 15000000 /* size of space to store lines */
#define NUMDIGITS 5 /* 4 digits plus the '\0' terminator
		     for input of numeric option values */
static char *lineptr[MAXLINES]; /* pointers to text lines */

static int getLine(char *s, int lim);
static int readlines(char *lineptr[], int nlines);
static void writelines(char *lineptr[], int nlines);
static void my_qsort(void *lineptr[], int left, int right, int (*comp)(const void*, const void*));
static int numcmp(const char *s1, const char *s2);
static int str_cmp(const char *s1, const char *s2);
static int compare(const char *s1, const char *s2);
static void swap(void *v[], int i, int j);
static int parse_args(int argc, char *argv[]);
static void recordinit(void); /* initialize each new field record */
static void dump_field_records(void); /* dump field records for analysis */
static void print_help(void); /* provide help on program usage */
static void substr(char *s1, const char *s2, int p1, int p2);
static void use_record(const char *s1, const char *s2, int n);
static void substr_delim(char *s1, const char *s2, int n);
static void dump_parsed_fields(char *lineptr[], int nlines);
static void dump_parsed_fields_xml(char *lineptr[], int nlines);

static int reverse = 0; /* 1 if sort in reverse order */
static int fold = 0; /* if fold = 1 it means case insensitive sort */
static int directory = 0; /* if directory = 1 ignore invalid characters */
static int numeric = 0; /* 1 if numeric sort */
static int num_lines_to_ignore = 0; /* ignore sorting the first n lines */
static char line1[MAXLEN];/* two lines for comparison, possibly substrings */
static char line2[MAXLEN];/* of the lines read */
static int sample_field_parse = 0; /* if 1, output the data from the fields
				      defined, one field per line, no sorting
				      is performed in this case*/
static int dump_as_xml = 0; /* when dumping parsed fields, format as xml data */
static int include_line = 0; /* when dumping fields, output source line 
				as well. */

/* logic to handle field records */
#define RECORDSIZE	9 /* number of values in the record */
#define RECORDTYPE	0 /* offset within a record to the record type */
#define FIELDSTART	1 /* start of field as offset into the line */
#define FIELDEND	2 /* end of field as offset into the line */
#define FIELDDELIM	1 /* also could store delim char here */
#define FIELDQUOTE	2 /* also could store quote char here */
#define FIELDNUMERIC	3 /* numeric sort for this field? */
#define FIELDREV	4 /* sort this field in reverse order? */
#define FIELDFOLD	5 /* fold (case insensitive) sort for this field? */
#define FIELDDIR	6 /* directory sort for this field? */
#define FIELDOFFSET	7 /* offset of field (counting from left to right) */
#define FIELDESC	8 /* escape character typically \ */

#define RECORDTYPEOFFSET 0 /* 0 if we are considering offsets into the line */
#define RECORDTYPEDELIM 1 /* 1 if we are considering parsing delimited type data */

#define RECORDNUM 100 /* maximum number of field records */

#define MAXFIELDS RECORDNUM * RECORDSIZE /* room to hold RECORDNUM field records */
static int fieldinfo[MAXFIELDS]; /* array to hold field record data */
static int numfields = 0; /* number of fields for which options were read */
static int curfield = 0; /* field we are currently considering */

/* variables used to carry over default/last-set values between one field
 * record and the next */
static int recordtype = RECORDTYPEOFFSET; /* offsets within lines */
static int field_delim = ','; /* delimiter between fields */
static int field_quote = '"'; /* quote character */
static int field_esc = '\\'; /* for escaping field_delim, field_quote
			        field_esc etc.*/

/* Sort input lines... sorts lines of input data (STDIN) and sends sorted
 * lines to output (STDOUT). Run with --h to see help.
 */

int main(int argc, char *argv[])
{
	int nlines; /* number of input lines read */
	recordinit();/* initialize the first field record */
	if (parse_args(argc, argv)) /* parse command line arguments */
		return 1;
	if ((nlines = readlines(lineptr, MAXLINES)) >= 0){
		if (sample_field_parse){
			if (dump_as_xml){
				dump_parsed_fields_xml(lineptr, nlines);		
			} else {
				dump_parsed_fields(lineptr, nlines);
			}
		} else {
			my_qsort((void **) lineptr, 0, nlines -1, 
					(int (*)(const void*,const void*))compare);
			writelines(lineptr, nlines);
		}
		return 0;
	} else {
		printf("input too big to sort\n");
		return 1;
	}
}
/* parse_args: create field records from command line arguments */
static int parse_args(int argc, char *argv[])
{
	int i = 0; /* index into each argument string */
	int j = 0; /* index into numstring */
	int no_room = 0; /* if 1 stop collecting field records */
	int temp = 0; /* temporary storage of numbers */
	char numstring[NUMDIGITS];/* space to store passed number as a string */
	program_name = &(*argv)[0];
	while (--argc > 0){
		if (no_room)
			break; /* stop collecting field records */
                ++argv; /* look at the next command line argument */
		i = 0;
		j = 0;
		if (((*argv)[i] == ':') || ((*argv)[i] == ',')){
			recordinit();
			if (numfields >= RECORDNUM)
				break; /* no room to store more field records */
			continue;
		}
		if ((*argv)[i] == '-'){
			while ((*argv)[i] != '\0'){ /* allow for mashed together options */
				if (no_room)
					break; /* stop collecting field records */
				i++;
				switch((*argv)[i]){
				case 'n':
					numeric = 1;
					fieldinfo[((numfields - 1) * RECORDSIZE) + FIELDNUMERIC] = numeric;	
					break;
				case 'r':
					reverse = 1;
					fieldinfo[((numfields - 1) * RECORDSIZE) + FIELDREV] = reverse;
					break;
				case 'f':
					fold = 1;
					fieldinfo[((numfields - 1) * RECORDSIZE) + FIELDFOLD] = fold;
					break;
				case 'd':
					directory = 1;
					fieldinfo[((numfields - 1) * RECORDSIZE) + FIELDDIR] = directory;
					break;
				case 's':
					i++;
					j = 0;
					while ((*argv)[i] != '\0' && isdigit((*argv)[i])){
						numstring[j++] = (*argv)[i++];
						if (j >= NUMDIGITS) /* do not read too many digits */
							break;
					}
					i--;
					numstring[j] = '\0';
					temp = atoi(numstring); /* offset starts at 0 for first char */
					if (temp >= MAXLEN){
						printf("ERROR: The value provided for the s option"
								" is too large\n");
						return 1;
					} else {
						fieldinfo[((numfields - 1) * RECORDSIZE) + FIELDSTART] = temp;
					}
					break;
				case 'e':
					i++;
					j = 0;
					/* slight alteration to support passing a negative number */
					while ((*argv)[i] != '\0' && (isdigit((*argv)[i]) || (*argv)[i] == '-')){
						numstring[j++] = (*argv)[i++];
						if (j >= NUMDIGITS) /* do not read too many digits */
							break;
					}
					i--;
					numstring[j] = '\0';
					temp = atoi(numstring);
					if (temp >= MAXLEN){
						printf("ERROR: The value provided for the e option"
								" is too large\n");
						return 1;
					} else {
						fieldinfo[((numfields - 1) * RECORDSIZE) + FIELDEND] = temp;
					}
					break;
				case 'o':
					i++;
					j = 0;
					while ((*argv)[i] != '\0' && isdigit((*argv)[i])){
						numstring[j++] = (*argv)[i++];
						if (j >= NUMDIGITS) /* do not read too many digits */
							break;
					}
					i--;
					numstring[j] = '\0';
					temp = atoi(numstring);
					if (temp >= MAXLEN){
						printf("ERROR: The value provided for the o option"
								" is too large\n");
						return 1;
					} else {
						fieldinfo[((numfields - 1) * RECORDSIZE) + FIELDOFFSET] = temp;
					}
					break;
				case 't':
					i++;
					j = 0;
					while ((*argv)[i] != '\0' && isdigit((*argv)[i])){
						numstring[j++] = (*argv)[i++];
						if (j >= NUMDIGITS) /* do not read too many digits */
							break;
					}
					i--;
					numstring[j] = '\0';
					temp = atoi(numstring);
					if (temp != RECORDTYPEOFFSET && temp != RECORDTYPEDELIM){
						printf("ERROR: This is an ivalid value for the"
								" t option\n");
						return 1;
					} else {
						recordtype = temp;
						fieldinfo[((numfields - 1) * RECORDSIZE) + RECORDTYPE] = recordtype;
						if (recordtype == RECORDTYPEDELIM){
							if (fieldinfo[((numfields - 1) * RECORDSIZE) + FIELDDELIM] == 0){
								fieldinfo[((numfields - 1) * RECORDSIZE) + FIELDDELIM] = field_delim;
							}
							if (fieldinfo[((numfields - 1) * RECORDSIZE) + FIELDQUOTE] <= 0){
								fieldinfo[((numfields - 1) * RECORDSIZE) + FIELDQUOTE] = field_quote;
							}
							if (fieldinfo[((numfields - 1) * RECORDSIZE) + FIELDESC] == 0){
								fieldinfo[((numfields - 1) * RECORDSIZE) + FIELDESC] =	field_esc;
							}
						}
					}
					break;
				case 'm':
					i++;
					if ((*argv)[i] == '1'){
						i++;
						if ((*argv)[i] != '\0'){
							if ((*argv)[i] == 'S' && (*argv)[i + 1] == 'P'){
								field_delim = ' ';
								i++;
							} else if ((*argv)[i] == 'T' && (*argv)[i + 1] == 'A'){
								field_delim = '\t';
								i++;
							} else if ((*argv)[i] == 'V' && (*argv)[i + 1] == 'E'){
								field_delim = '|';
								i++;
							} else if ((*argv)[i] == 'S' && (*argv)[i + 1] == 'E'){
								field_delim = ';';
								i++;
							} else if ((*argv)[i] == 'B' && (*argv)[i + 1] == 'A'){
								field_delim = '\\';
								i++;
							} else if ((*argv)[i] == 'P' && (*argv)[i + 1] == 'E'){
								field_delim = '%';
								i++;
							} else if ((*argv)[i] == 'D' && (*argv)[i + 1] == 'O'){
								field_delim = '$';
								i++;
							} else if ((*argv)[i] == 'S' && (*argv)[i + 1] == 'Q'){
								field_delim = '\'';
								i++;
							} else if ((*argv)[i] == 'D' && (*argv)[i + 1] == 'Q'){
								field_delim = '"';
								i++;
							} else {
								field_delim = (*argv)[i];
							}
							fieldinfo[((numfields - 1) * RECORDSIZE) + FIELDDELIM] = field_delim;
						} else {
							printf("ERROR: Please provide the field delimiter character"
									" immediately following the -m1 option\n");
							return 1;
						}
					} else if ((*argv)[i] == '2'){
						i++;
						if ((*argv)[i] != '\0'){
							if ((*argv)[i] == 'S' && (*argv)[i + 1] == 'P'){
								field_quote = ' ';
								i++;
							} else if ((*argv)[i] == 'T' && (*argv)[i + 1] == 'A'){
								field_quote = '\t';
								i++;
							} else if ((*argv)[i] == 'V' && (*argv)[i + 1] == 'E'){
								field_quote = '|';
								i++;
							} else if ((*argv)[i] == 'S' && (*argv)[i + 1] == 'E'){
								field_quote = ';';
								i++;
							} else if ((*argv)[i] == 'B' && (*argv)[i + 1] == 'A'){
								field_quote = '\\';
								i++;
							} else if ((*argv)[i] == 'P' && (*argv)[i + 1] == 'E'){
								field_quote = '%';
								i++;
							} else if ((*argv)[i] == 'D' && (*argv)[i + 1] == 'O'){
								field_quote = '$';
								i++;
							} else if ((*argv)[i] == 'S' && (*argv)[i + 1] == 'Q'){
								field_quote = '\'';
								i++;
							} else if ((*argv)[i] == 'D' && (*argv)[i + 1] == 'Q'){
								field_quote = '"';
								i++;
							} else if ((*argv)[i] == 'N' && (*argv)[i + 1] == 'U'){
								field_quote = '\0';
								i++;
							} else {
								field_quote = (*argv)[i];
							}
							fieldinfo[((numfields - 1) * RECORDSIZE) + FIELDQUOTE] = field_quote;
						} else {
							printf("ERROR: Please provide the field quote character"
									" immediately following the -m2 option\n");
							return 1;
						}
					} else if ((*argv)[i] == '3'){
						i++;
						if ((*argv)[i] != '\0'){
							if ((*argv)[i] == 'S' && (*argv)[i + 1] == 'P'){
								field_esc = ' ';
								i++;
							} else if ((*argv)[i] == 'T' && (*argv)[i + 1] == 'A'){
								field_esc = '\t';
								i++;
							} else if ((*argv)[i] == 'V' && (*argv)[i + 1] == 'E'){
								field_esc = '|';
								i++;
							} else if ((*argv)[i] == 'S' && (*argv)[i + 1] == 'E'){
								field_esc = ';';
								i++;
							} else if ((*argv)[i] == 'B' && (*argv)[i + 1] == 'A'){
								field_esc = '\\';
								i++;
							} else if ((*argv)[i] == 'P' && (*argv)[i + 1] == 'E'){
								field_esc = '%';
								i++;
							} else if ((*argv)[i] == 'D' && (*argv)[i + 1] == 'O'){
								field_esc = '$';
								i++;
							} else if ((*argv)[i] == 'S' && (*argv)[i + 1] == 'Q'){
								field_esc = '\'';
								i++;
							} else if ((*argv)[i] == 'D' && (*argv)[i + 1] == 'Q'){
								field_esc = '"';
								i++;
							} else if ((*argv)[i] == 'N' && (*argv)[i + 1] == 'U'){
								field_esc = '\0';
								i++;
							} else {
								field_esc = (*argv)[i];
							}
							fieldinfo[((numfields - 1) * RECORDSIZE) + FIELDESC] = field_esc;
						} else {
							printf("ERROR: Please provide the desired escape character"
									" immediately following the -m3 option\n");
							return 1;
						}
					}
					break;
				case 'i':
					i++;
					j = 0;
					while ((*argv)[i] != '\0' && isdigit((*argv)[i])){
						numstring[j++] = (*argv)[i++];
						if (j >= NUMDIGITS) /* do not read too many digits */
							break;
					}
					i--;
					numstring[j] = '\0';
					num_lines_to_ignore = atoi(numstring);
					break;
				case '-':
					i++;
					/* --help or --HELP or --h or --H */
					if ((*argv)[i] == 'h' || (*argv)[i] == 'H'){
						print_help();
						return 1; /* exit */
					}
					/* --dump_records or --DUMP_RECORDS or --d or --D etc. */
					if ((*argv)[i] == 'd' || (*argv)[i] == 'D'){
						dump_field_records();
						return 1; /* exit */
					}
					/* --sample_field_parse or --SAMPLE_FIELD_PARSE or --s or --S */
					if ((*argv)[i] == 's' || (*argv)[i] == 'S'){
						sample_field_parse = 1;
						break; /* in case we add more ifs below */
					}
					/* --xml or --XML or --x or --X */
					if ((*argv)[i] == 'x' || (*argv)[i] == 'X'){
						dump_as_xml = 1;
						break; /* in case we add more ifs below */
					}
					/* --include_line or --INCLUDE_LINE or --i or --I */
					if ((*argv)[i] == 'i' || (*argv)[i] == 'I'){
						include_line = 1;
						break; /* in case we add more ifs below */
					}
					break;
				case ':':
					recordinit();
					if (numfields >= RECORDNUM){
						no_room = 1; /* no room to store more field records */
					}
					break;
				case ',':
					recordinit();
					if (numfields >= RECORDNUM){
						no_room = 1; /* no room to store more field records */
					}
					break;
				default:
					break;
				}
			}
		}
	}
	return 0;
}

/* Initialize some values for each new field record added */
static void recordinit(void)
{
	fieldinfo[(numfields * RECORDSIZE) + RECORDTYPE] = recordtype;
	if (recordtype){
		fieldinfo[(numfields * RECORDSIZE) + FIELDDELIM] = field_delim;
		fieldinfo[(numfields * RECORDSIZE) + FIELDQUOTE] = field_quote;
	} else {
		fieldinfo[(numfields * RECORDSIZE) + FIELDSTART] = 0;
		fieldinfo[(numfields * RECORDSIZE) + FIELDEND] = -1;
	}
	fieldinfo[(numfields * RECORDSIZE) + FIELDNUMERIC] = 0;
	fieldinfo[(numfields * RECORDSIZE) + FIELDREV] = 0;
	fieldinfo[(numfields * RECORDSIZE) + FIELDFOLD] = 0;
	fieldinfo[(numfields * RECORDSIZE) + FIELDDIR] = 0;
	fieldinfo[(numfields * RECORDSIZE) + FIELDOFFSET] = 0;
	fieldinfo[(numfields * RECORDSIZE) + FIELDESC] = field_esc;
	numfields++;
	return;
}

/* use_record: Setup to use the values from the specified 
 * field record.
 */
static void use_record(const char *s1, const char *s2, int n)
{
	int fieldnum, p1, p2;
	/* set some sourcefile scoped global variables */
	recordtype = fieldinfo[(n * RECORDSIZE) + RECORDTYPE];
	numeric = fieldinfo[(n * RECORDSIZE) + FIELDNUMERIC];
	reverse = fieldinfo[(n * RECORDSIZE) + FIELDREV];
	fold = fieldinfo[(n * RECORDSIZE) + FIELDFOLD];
	directory = fieldinfo[(n * RECORDSIZE) + FIELDDIR];
	/* set type specific variables and initialize line1 and line2 */
	if (recordtype){
		field_delim = fieldinfo[(n * RECORDSIZE) + FIELDDELIM];
		field_quote = fieldinfo[(n * RECORDSIZE) + FIELDQUOTE];
		fieldnum = fieldinfo[(n * RECORDSIZE) + FIELDOFFSET];
		field_esc = fieldinfo[(n * RECORDSIZE) + FIELDESC];
		substr_delim(line1, s1, fieldnum);
		substr_delim(line2, s2, fieldnum);
	} else {
		p1 = fieldinfo[(n * RECORDSIZE) + FIELDSTART];
		p2 = fieldinfo[(n * RECORDSIZE) + FIELDEND];
		substr(line1, s1, p1, p2);
		substr(line2, s2, p1, p2);
	}
	//printf("line1 is %s line2 is %s\n", line1, line2);
	return;
}

/* my_qsort: sort v[left] ... v[right] into 
 * increasing or decreasing order depending on
 * the value of reverse.*/
static void my_qsort(void *v[], int left, int right,
		int (*comp)(const void*, const void*))
{
	int i, last;
	if (left >= right) /* do nothing if array contains */
		return; /* fewer than two elements */
	swap(v, left, (left + right)/2);
	last = left;
	for (i = left + 1; i <= right; i++)
		if ((*comp)(v[i], v[left]) < 0)
			swap(v, ++last, i);
	swap(v, left, last);
	my_qsort(v, left, last - 1, comp);
	my_qsort(v, last + 1, right, comp);
}
/* compare: Parent process for comparisons. I am able
 * to dynamically make decisions concerning whether numcmp 
 * or str_cmp should be called in here.
 * Whether you code the reverse logic here or in qsort, it
 * needs to support changing dynamically based on what field
 * record is currently loaded.
 */
static int compare(const char *s1, const char *s2)
{
	int retval;
	curfield = 0;
	do{
		use_record(s1, s2, curfield);
		if (numeric)
			retval = numcmp(line1, line2);
		else
			retval = str_cmp(line1, line2);
		if (retval == 0){
			curfield++;
		}
	} while (retval == 0 && curfield < numfields);
	if (reverse){
		if (retval > 0){
			retval = -1;
		} else if (retval < 0){
			retval = 1;
		}
	}
	return retval;
}

/* numcmp: compare s1 and s2 numerically */
static int numcmp(const char *s1, const char *s2)
{
	double v1, v2;
	v1 = atof(s1);
	v2 = atof(s2);
	if (v1 < v2)
		return -1;
	else if (v1 > v2)
		return 1;
	else
		return 0;
}
/* str_cmp: replaces standard library strcmp to add
 * more features for types of comparison. Supports
 * case insensitive comparison for example. Supports
 * ignoring invalid characters. Borrowed version of this
 * function by Barrett Drawdy as it was cleaner than 
 * mine.
 */
static int str_cmp(const char *s1, const char *s2)
{
	if (directory){
		while (!isdigit(*s1) && !isalpha(*s1) && !isspace(*s1) && *s1)
			++s1; /* ignore bad characters */
		while (!isdigit(*s2) && !isalpha(*s2) && !isspace(*s2) && *s2)
			++s2; /* ignore bad characters */
	}
	while (fold ? (tolower(*s1) == tolower(*s2)) : (*s1 == *s2)){
		if (*s1 == '\0')
			return 0;
		++s1;
		++s2;
		if (directory){
			while (!isdigit(*s1) && !isalpha(*s1) && !isspace(*s1) && *s1)
				++s1; /* ignore bad characters */
			while (!isdigit(*s2) && !isalpha(*s2) && !isspace(*s2) && *s2)
				++s2; /* ignore bad characters */
		}
	}
	return fold ? (tolower(*s1) - tolower(*s2)) : (*s1 - *s2);
}
/* copy characters from offset p1 to p2 (inclusive) of s2 to s1 */
static void substr(char *s1, const char *s2, int p1, int p2)
{
	int i, j;
	int length = strlen(s2);
	if (p1 + 1 >= length){ /* desired field is missing from line */
		s1[0] = '\0';
		return;
	}
	if (p2 < p1)
		p2 = length;
	/* if p2 is too big we will simply set s1 to whatever is left */
	for (i = p1, j = 0; i <= p2 && s2[i] != '\0'; i++, j++)
		s1[j] = s2[i];
	s1[j] = '\0';
	return;
}

/* substr_delim: copy characters from the delimited field number n of
 * s2 to s1.
 * field_delim is counted if it is outside of a pair of field_quote 
 * (a quoted field), and not escaped with a field_esc. Otherwise
 * it is just copied as part of the data.
 *
 * If field_esc is followed by field_esc, field_delim, or field_quote
 * it is skipped and the following character is copied as part of
 * the data. Otherwise field_esc is just copied as part of the data.
 *
 * field_quote only has special meaning if it is at the very beginning
 * of a field, or if it is at the very end of a field and follows a 
 * field_quote that was at the very beginning of a field. If field_quote
 * is seen in the middle of the field it is just considered part of the
 * data.
 *
 * This logic should allow substr_delim to work with data that has been
 * prepared using strict quoting and escaping rules, while allowing
 * the most flexibility for handling data that was not strictly quoted
 * and escaped.
 *
 */
static void substr_delim(char *s1, const char *s2, int n)
{
	int delim_count = 0; /* the first field is number 0 */
	int field_quote_on = 0; /* are we inside of quotes? */
	/* i, j,  indexes into s2 and s1 */
	int i = 0;
	int j = 0;
	while (s2[i] != '\0'){
		if (i == 0 && s2[i] == field_quote){
			field_quote_on = 1;
			++i;
			continue;
		}
		if (s2[i] == field_esc){
			if (s2[i + 1] == field_quote ||
					s2[i + 1] == field_delim ||
					s2[i + 1] == field_esc){
				++i; /* skip this field_esc and copy the next char */
	                        s1[j++] = s2[i++];
	                        continue;
			}
		}
		if (field_quote_on == 1 && 
				s2[i] == field_quote &&
				(s2[i + 1] == field_delim || s2[i + 1] == '\0')){
			field_quote_on = 0;
			++i;
			continue;
		}
		if (field_quote_on == 0 && s2[i] == field_delim){
			++delim_count;
			++i;
			if (s2[i] == field_quote){
				field_quote_on = 1;
				++i;
			}
			continue;
		}
		if (delim_count == n){
			s1[j++] = s2[i];
		} else if (delim_count > n){
			break;
		}
		++i;
	}
	s1[j] = '\0';
	return;
}

/* swap pointers: void * is used so that swap can work on any pointer type.
 * Any pointer can be cast to void * and back again without loss of 
 * information
 */
static void swap(void *v[], int i, int j)
{
	void *temp;
	temp = v[i];
	v[i] = v[j];
	v[j] = temp;
}

static char *alloc(int);
/* readlines: read input lines */
static int readlines(char *lineptr[], int maxlines)
{
	int len, nlines;
	char *p, line[MAXLEN];
	nlines = 0;
	while ((len = getLine(line, MAXLEN)) > 0)
		if (num_lines_to_ignore){
			if (!sample_field_parse) /* if we are sampling fields we may not
						    want the ignored lines output */
				printf("%s", line);
			--num_lines_to_ignore;
		} else {
			if(nlines >= maxlines || (p = alloc(len)) == NULL)
				return -1;
			else {
				line[len - 1] = '\0'; /* delete newline */
				strcpy(p, line);
				lineptr[nlines++] = p;
			}
		}
	return nlines;
}

/* writelines: write output lines */
static void writelines(char *lineptr[], int nlines)
{
	while (nlines-- > 0)
		printf("%s\n", *lineptr++);
}

/* getLine: read a line into s, return length */
static int getLine(char *s, int lim)
{
	int c, i;
	for (i = 0; i < lim - 1 && (c = getchar()) != EOF && c != '\n'; ++i)
		s[i] = c;
	if (c == '\n'){
		s[i] = c;
		++i;
	}
	s[i] = '\0';
	return i;
}

static char allocbuf[ALLOCSIZE]; /* storage for alloc */
static char *allocp = allocbuf; /* next free position */
static char *alloc(int n) /* return pointer to n characters */
{
	if (allocbuf + ALLOCSIZE - allocp >= n){ /* it fits */
		allocp += n;
		return allocp - n; /* old p */
	} else {
		return NULL;
	}
}


/* dump_parsed_fields: Apply the field position/offset/delimiters
 * defined and dump the resulting field contents, one field per
 * line, repeat for all input lines. Useful for debugging how
 * the field position and contents are identified for the sort
 */
static void dump_parsed_fields(char *lineptr[], int nlines)
{
	int n = 0;
	int i = 1;
	int fieldnum, p1, p2;
	while (i <= nlines){
		if (include_line)
			printf("%s\n", *lineptr);
		n = 0;
		do{
			recordtype = fieldinfo[(n * RECORDSIZE) + RECORDTYPE];
			if (recordtype){
				field_delim = fieldinfo[(n * RECORDSIZE) + FIELDDELIM];
				field_quote = fieldinfo[(n * RECORDSIZE) + FIELDQUOTE];
				fieldnum = fieldinfo[(n * RECORDSIZE) + FIELDOFFSET];
				field_esc = fieldinfo[(n * RECORDSIZE) + FIELDESC];
				substr_delim(line1, *lineptr, fieldnum);
			} else {
				p1 = fieldinfo[(n * RECORDSIZE) + FIELDSTART];
				p2 = fieldinfo[(n * RECORDSIZE) + FIELDEND];
				substr(line1, *lineptr, p1, p2);
			}
			printf("%s\n", line1);/* output the parsed field */
			n++;
		} while (n < numfields);
		lineptr++;
		i++;
	}
	return;
}
/* dump_parsed_fields_xml: Apply the field position/offset/delimiters
 * defined and dump the resulting field contents, in an xml format.
 */
static void dump_parsed_fields_xml(char *lineptr[], int nlines)
{
	int n = 0;
	int i = 1;
	int fieldnum, p1, p2;
	printf("<root>\n");
	while (i <= nlines){
		printf("\t<line%d>", i);
		n = 0;
		do{
			recordtype = fieldinfo[(n * RECORDSIZE) + RECORDTYPE];
			if (recordtype){
				field_delim = fieldinfo[(n * RECORDSIZE) + FIELDDELIM];
				field_quote = fieldinfo[(n * RECORDSIZE) + FIELDQUOTE];
				fieldnum = fieldinfo[(n * RECORDSIZE) + FIELDOFFSET];
				field_esc = fieldinfo[(n * RECORDSIZE) + FIELDESC];
				substr_delim(line1, *lineptr, fieldnum);
			} else {
				p1 = fieldinfo[(n * RECORDSIZE) + FIELDSTART];
				p2 = fieldinfo[(n * RECORDSIZE) + FIELDEND];
				substr(line1, *lineptr, p1, p2);
			}
			printf("<f%d>%s</f%d>", n, line1, n);/* output the parsed field */
			n++;
		} while (n < numfields);
		printf("</line%d>\n", i);
		lineptr++;
		i++;
	}
	printf("</root>\n");
	return;
}

/* dump_field_records: for debugging purposes you may wish to
 * dump all the field records that have been stored
 */
static void dump_field_records(void)
{
	int i;
	int c;
	printf("\n---------------------------| Field Record Dump |-----------------------------\n");
	printf("Record	| COL0	| COL1	| COL2	| COL3	| COL4	| COL5	| COL6	| COL7	| COL8\n");
	for (i = 0; i < numfields; i++){
		printf(" %d\t", i);
		printf("| %d\t", fieldinfo[(i * RECORDSIZE) + RECORDTYPE]);
		if (fieldinfo[(i * RECORDSIZE) + RECORDTYPE] == RECORDTYPEOFFSET){
			printf("| %d\t", fieldinfo[(i * RECORDSIZE) + FIELDSTART]);
			printf("| %d\t", fieldinfo[(i * RECORDSIZE) + FIELDEND]);
		} else {
			c = fieldinfo[(i * RECORDSIZE) + FIELDDELIM];
			if (c == '\t'){
				printf("| TAB\t");
			} else if (c == ' '){
				printf("| SPACE\t");
			} else {
				printf("| %c\t", c);
			}
			c = fieldinfo[(i * RECORDSIZE) + FIELDQUOTE];
			if (c == '\t'){
				printf("| TAB\t");
			} else if (c == ' '){
				printf("| SPACE\t");
			} else if (c == '\0'){
				printf("| NULL\t");
			} else {
				printf("| %c\t", c);
			}
		}
		printf("| %d\t",fieldinfo[(i * RECORDSIZE) + FIELDNUMERIC]);
		printf("| %d\t",fieldinfo[(i * RECORDSIZE) + FIELDREV]);
		printf("| %d\t",fieldinfo[(i * RECORDSIZE) + FIELDFOLD]);
		printf("| %d\t",fieldinfo[(i * RECORDSIZE) + FIELDDIR]);
		printf("| %d\t",fieldinfo[(i * RECORDSIZE) + FIELDOFFSET]);
		c = fieldinfo[(i * RECORDSIZE) + FIELDESC];
		if (c == '\t'){
			printf("| TAB\t");
		} else if (c == ' '){
			printf("| SPACE\t");
		} else if (c == '\0'){
			printf("| NULL\t");
		} else {
			printf("| %c\t", c);
		}		
	}
	printf("\nWhere COL0 = RECORDTYPE, COL1 = FIELDSTART or FIELDDELIM\n");
	printf("COL2 = FIELDEND or FIELDQUOTE, COL3 = FIELDNUMERIC, COL4 = FIELDREV\n");
	printf("COL5 = FIELDFOLD, COL6 = FIELDDIR, COL7 = FIELDOFFSET, COL8 = FIELDESC\n\n");	
	printf("RECORDTYPE is set by -t\n");
	printf("FIELDSTART is set by -s\n");
	printf("FIELDDELIM is set by -m1\n");
	printf("FIELDEND is set by -e\n");
	printf("FIELDQUOTE is set by -m2\n");
	printf("FIELDNUMERIC is set by -n\n");
	printf("FIELDREV is set by -r\n");
	printf("FIELDFOLD is set by -f\n");
	printf("FIELDDIR is set by -d\n");
	printf("FIELDOFFSET is set by -o\n");
	printf("FIELDESC is set by -m3\n");
	printf("For more info use the --h (help) option.\n\n");
	return;
}

static void print_help(void)
{
	printf("Program: %s\n", program_name);
	printf("Author: %s\n", AUTHOR);
	printf("Creation Date: %s\n", CREATION_DATE);
	printf("Last Update: %s\n", LAST_UPDATE);
	printf("usage: cat sourcefile | ./sort -options [> outputfile]\n\n");
	printf("This sort program expects input from STDIN (output from cat for\n");
	printf("example) and sends output to STDOUT (the screen for example).\n\n");
	printf("If you fail to provide input from cat or similar you will be in\n");
	printf("an interactive input mode. This means you can enter lines using\n");
	printf("the keyboard and press CTRL-D on an empty line to process them.\n\n");
	printf("Pressing CTRL-C will abort the program.\n\n");
	printf("%s is quite sophisticated, permitting you to break a line\n", program_name);
	printf("into fields that can have separate sorting options applied to\n");
	printf("them.\n\n");
	printf("It is always lines that are sorted, not the fields within the line.\n");
	printf("However defining fields and specifying sort options for them permits\n");
	printf("sophisticated sorting behavior for the lines.\n\n");
	printf("You do not need to provide sorting options for every field on a line.\n");
	printf("Only specify those fields that have the data that you wish to use\n");
	printf("to sort the lines.\n\n");
	printf("The order that you specify the fields on the command line is the order\n");
	printf("of precedence for sorting.\n\n");
	printf("For example...\n");
	printf("if one field has a username and another has a date, you can sort\n");
	printf("by increasing (ascending) username and then by decreasing (descending)\n");
	printf("date simply by specifying those 2 fields and their sort options on the\n");
	printf("command line. Specify the username field first and the date field\n");
	printf("next. The way the logic works is only if there are equal values\n");
	printf("found in the first field, is the second field examined, and only if\n");
	printf("there are equal values found in the second field is the third field\n");
	printf("examined, and so on until either a difference is found between the\n");
	printf("two fields in question, or we run out of fields that we have defined\n");
	printf("for sorting. If we run out of fields that we have defined for sorting\n");
	printf("and no difference has been found, the lines are considered equal.\n\n");
	printf("This sort program supports the use of up to %d fields with unique\n", RECORDNUM);
	printf("sort options permitted for each field. The data can have any number of\n");
	printf("fields, but you can only specify sort options for %d fields.\n", RECORDNUM);
	printf("Actually the data is limited in fields per line by the setting\n");
	printf("for the maximum line length of %d characters and the size of the fields.\n\n",MAXLEN);
	printf("The field definitions (size/location) along with the sort options\n");
	printf("are saved internally in records. There is a handy option that\n");
	printf("permits dumping these internal records so that you can evaluate\n");
	printf("how the program has interpreted the command line options that you\n");
	printf("have provided. Place this option --d (--dump_records) after all\n");
	printf("the options that you desire to audit have been specified on the command\n");
	printf("line.\n\n");
	printf("--d	dump field records, place after other options on the command line.\n");
	printf("	If you use this option the sort is not performed, this option is used\n");
	printf("	strictly for debugging your sort options that you have defined\n\n");
	printf("%s allows you to specify fields using character offsets, from the\n", program_name);
	printf("start of the line at position 0, or using delimiters and quote characters.\n");
	printf("Each field definition can optionally use either method.\n");
	printf("-t#	type of field definition, where # is %d for the character offset method\n", RECORDTYPEOFFSET);
	printf("	and %d for the delimited field method. This option should be listed\n", RECORDTYPEDELIM);
	printf("	first in the field definition, but inherits from left to right so\n");
	printf("	if all field definitions are of the same type it only needs to be\n");
	printf("	specified for the first field for example. The default is the\n");
	printf("	character offset method\n");
	printf("-s#	starting character position, where # is a number that is less than\n");
	printf("	the maximum line length of %d\n", MAXLEN);
	printf("-e#	ending character position, where # is a number that is less than\n");
	printf("	the maximum line length of %d\n", MAXLEN);
	printf("If you choose to use the character offset method of defining a field you must\n");
	printf("set -s and -e for each field to the correct offsets, sort will not check to\n");
	printf("see that you did.\n\n");
	printf("If you set -e to less than -s, it means that you want from -s to the end of\n");
	printf("the line. The default setting for sort is to use the character offset method\n");
	printf("and -s is set to 0 (the beginning) and -e is set to -1 (the rest of the line).\n");
	printf("Because of the special meaning of -e set to be less than -s, -e supports\n");
	printf("passing a negative number -e-1 for -1.\n\n");
	printf("The default delimiters for delimited data is to use a comma ',' to separate\n");
	printf("fields and to use double quotes '\"', to quote fields. The quotes surround the\n");
	printf("fields to indicate that any commas that are found within the fields can be\n");
	printf("ignored.\n\n");
	printf("In order to support the possibility of a double quote found within the data\n");
	printf("an escape character can be used and the default escape character is a\n");
	printf("backslash '\\'.\n\n");
	printf("In actuality, this program is coded to be more flexible than that. An escape\n");
	printf("character 'can' be used to escape the delimiter, the quote character or an\n");
	printf("escape character, either inside or outside of a quoted field. If the escape\n");
	printf("character is found in front of anything else it is considered part of the data.\n\n");
	printf("If the quote character is found anywhere besides the start or end of a field it\n");
	printf("is considered part of the data and as such, technically, does not need to be\n");
	printf("escaped. So this program should be able to handle data formatted according to a\n");
	printf("variety of specifications.\n\n");
	printf("If this is not enough, the quote and/or the escape character can also be\n");
	printf("disabled in cases where they are not required and yet may be found in the data\n");
	printf("(see the NU code below).\n\n");
	printf("You can define what characters to use to separate fields, to quote fields,\n");
	printf("and to escape quote characters.\n\n");
	printf("-m1n	field separator, where n is the desired character\n");
	printf("-m2n	quote character, where n is the desired character\n");
	printf("-m3n	escape character, where n is the desired escape character\n\n");
	printf("In the above 3 options instead of specifying the literal character desired\n");
	printf("as n you can use the following 2 letter (uppercase) codes:\n\n");
	printf("SP	to mean a SPACE\n");
	printf("TA	to mean a TAB\n");
	printf("VE	to mean a VERTICAL BAR '|'\n");
	printf("SE	to mean a SEMICOLON ';'\n");
	printf("BA	to mean a BACKSLASH '\\'\n");
	printf("PE	to mean a PERCENT SIGN '%%'\n");
	printf("DO	to mean a DOLLAR SIGN '$'\n");
	printf("SQ	to mean a SINGLE QUOTE '\n");
	printf("DQ	to mean a DOUBLE QUOTE \"\n\n");
	printf("NU	to mean NULL '\\0', is supported for the quote or escape character.\n");
	printf("	Since such a character will not be seen in the data (it is the string\n");
	printf("	terminator) it is used to disable the operation of the quote or escape\n");
	printf("	character if that is ever desired.\n\n");
	printf("When using the delimited field method to specify fields it is important to\n");
	printf("indicate which field in the data we are referring to. Counting from 0 for the\n");
	printf("leftmost field on a line you can indicate field numbers using the -o option.\n");
	printf("-o#	indicate which delimited field, where # is a number from 0 to however\n");
	printf("	many fields exist in the data\n\n");
	printf("Every new field record initializes -o to be 0, referring to the first field, if\n");
	printf("this is not the field you want you must set the -o option to the correct field\n");
	printf("number.\n\n");
	printf("Using either method of specifying a field, the character offset method or the\n");
	printf("delimited method, if a specified field does not exist in the line in question\n");
	printf("it is treated as an empty field. If a large number of lines are missing this\n");
	printf("field and it is the only sort field that you indicated the sort will be slow.\n");
	printf("Qsort does not like it when too many lines evaluate to be equal.\n\n");
	printf("Between field definitions, to indicate the start of a new field, you can use\n");
	printf("either a colon ':' or a comma ','. Be careful not to place a field separator at\n");
	printf("the beginning as by default it will indicate to use the entire line with the\n");
	printf("default sorting options as the first field for sorting.\n\n");
	printf("The sorting options are:\n\n");
	printf("-n	numeric sort, puts numbers in order of value\n");
	printf("-r	reverse the sort order, instead of increasing order it would be\n");
	printf("	decreasing order\n");
	printf("-f	fold upper and lower case together, or in other words do a case\n");
	printf("	insensitive sort\n");
	printf("-d	directory sort, this ignores any character that is not a letter,\n");
	printf("	number or space\n\n");
	printf("The default sort options are set to have all of these options off, which\n");
	printf("means punctuation characters, or other special characters have a sorting\n");
	printf("value, the sort is in increasing order, upper and lower case letters\n");
	printf("have different sorting values, 17 would be considered lower than 2\n");
	printf("(numeric value is not considered).\n\n");
	printf("Options can be specified individually on the command-line separated by\n");
	printf("spaces...\n");
	printf("cat sourcefile | ./sort -t1 -o1 -n , -o0 -f -d\n\n");
	printf("or they can be mashed together...\n");
	printf("cat sourcefile | ./sort -t1o1n,o0fd\n\n");
	printf("However do not put spaces between an option and its value. -s 99 is not\n");
	printf("an accepted parameter, the option should be indicated as -s99. Also the\n");
	printf("use of an equals sign '=' is not supported between an option and its\n");
	printf("value.\n\n");
	printf("Do not confuse the use of the -s (start) and -e (end) options together\n");
	printf("with the delimited field option. It will replace whatever is being used\n");
	printf("for the delimiter and quote character with whatever character happens to\n");
	printf("equal the numeric value you provide. If you really know what you are\n");
	printf("doing it can be useful, otherwise avoid it.\n\n");
	printf("There is an option to ignore the first x number of lines. You might use\n");
	printf("this option if your data includes a header line with column titles for\n");
	printf("example and you do not want this line sorted in with the data.\n");
	printf("-i#	ignore the first # of lines, where # is a number between 0 and 9999.\n");
	printf("	0 in this case has no logical meaning since we start counting lines\n");
	printf("	with 1 as the first line.\n\n");
	printf("A Sorting Example:\n");
	printf("cat sourcefile | ./sort -t1o1n,o0fd\n\n");
	printf("Means use the delimited field method (-t1), the primary sort field is the\n");
	printf("second field (o1), do a numerical sort on that field (n), if two identical\n");
	printf("numbers are found in the second field, look at the first field (o0), doing a\n");
	printf("case insensitive directory sort of the first field, decide the order for\n");
	printf("the two lines. The default delimiter, quote character, and escape character\n");
	printf("is used.\n\n");
	printf("If the sort appears to be excessively slow, it could be because the fields and\n");
	printf("options that you have selected result in too many lines that would have an\n");
	printf("equal sort order. I think this is a limitation of the qsort logic since\n");
	printf("specifying more fields and more options, so that the lines which evaluate\n");
	printf("to be equal are reduced, significantly speeds up the sort.\n\n");
	printf("A few additional options have been added to aid in debugging the\n");
	printf("specification of fields. You may find other uses for them as well. The option\n\n");
	printf("--s	for sample field parsing\n\n");
	printf("found anywhere in the options will signify that instead of sorting the input\n");
	printf("you want to output the data from the fields that you have indicated. This\n");
	printf("will assist you in debugging delimiters and offset etc. when specifying\n");
	printf("fields for delimited data, and identifying errors in character offset\n");
	printf("specification when using the character offset method of identifying fields.\n");
	printf("It can be most useful when analyzing one field specifier at a time as the\n");
	printf("fields will be dumped one field per line so as not to introduce new field\n");
	printf("separators to confuse the issue when identifying what data was exactly\n");
	printf("pulled.\n\n");
	printf("There is a further specifier that can be added to also dump the source line\n");
	printf("that the fields were pulled from\n\n");
	printf("--i	for include line with the field parse dump\n\n");
	printf("For assisting with debugging what is parsed when multiple fields are specified\n");
	printf("there is an option to format the fields into an xml document. No effort is made\n");
	printf("to escape invalid characters found in the data, the data is left as is.\n\n");
	printf("--x	format parsed fields into an xml document format\n\n");
	printf("If you specify to use the xml format, the --i option is ignored, there is no\n");
	printf("option to include the original line in the xml format. The field numbering\n");
	printf("used in the xml output is not the number of the field with respect to the\n");
	printf("data but the number of the field record specified on the command line, staring\n");
	printf("with 0 as the leftmost field record specified\n\n");
	printf("Unless --s is specified --x and --i will be ignored.\n\n");
	return;
}


Solution by codybartfast

Category 0

A basic solution that uses the orignal comparison functions (and original qsort). Case conversion for -f, and directory sort conversion for -d, are only done once per line. The comparison function for each sort field is put in an array of function pointers which allows a simple and fast comparison function:

int compare(char **sortdata1, char **sortdata2)
{
	int i, rslt = 0;

	for (i = 0; i < nsortflds && rslt == 0; i++)
		rslt = (*compares[i])(sortdata1[i], sortdata2[i]);
	return rslt;
}

Usage:

Fields are tab delimited, one argument is provided for each field to sort by:

sort [-NN*M*]*

Where NN* is the index of field to sort by and M* is any combination of the modifiers n, r, f or d. Piority is given to fields in the order that they are listed. E.g.:

sort -10 -1n -5r

Will first sort by the Tenth field, then by the First field (numeric) and then by the Fifth field (in reverse).

Sample:

Input:

Author	Part    Edition Page	Keyword
------	----	-------	----	-------
Dennis	Part D	second	132	*readlines
Brian	Part A	first	22	[tail]
Brian	Part C	first	101	'Entab
Dennis	Part B	second	42	{fields}
Brian	Part B	First	75	/Tail/
Dennis	Part C	Second	85	Fields
Dennis	Part A	First	9	Readlines
Brian	Part D	Second	92	en-tab


Sort by Keyword (with fold and directory sort) and then by Part:

sort -5fd -2
Author	Part    Edition Page	Keyword
------	----	-------	----	-------
Brian	Part C	first	101	'Entab
Brian	Part D	Second	92	en-tab
Dennis	Part B	second	42	{fields}
Dennis	Part C	Second	85	Fields
Dennis	Part A	First	9	Readlines
Dennis	Part D	second	132	*readlines
Brian	Part A	first	22	[tail]
Brian	Part B	First	75	/Tail/


Sort by Edition (with fold), Author (in reverse) and then Page (numeric):

sort -3f -1r -4n
Author	Part    Edition Page	Keyword
------	----	-------	----	-------
Dennis	Part A	First	9	Readlines
Brian	Part A	first	22	[tail]
Brian	Part B	First	75	/Tail/
Brian	Part C	first	101	'Entab
Dennis	Part B	second	42	{fields}
Dennis	Part C	Second	85	Fields
Dennis	Part D	second	132	*readlines
Brian	Part D	Second	92	en-tab
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "lines.h"
#include "quicksort.h"

#define CHARCOMP int (*)(char *, char *)
#define VOIDCOMP int (*)(void *, void *)

void parseargs(int argc, char *argv[]);
char **getsortdata(char *line);
int compare(char **s1, char **s2);

char *line(char *sortdata[]);
char *fieldspace(char *sortdata[]);
void fold(char *value);
void dirsort(char *value);
int strcmpr(char *, char *);
int numcmp(char *, char *);
int numcmpr(char *, char *);
void freestuff(char ***kvms, int nlines);

int *sortidxs; /* indexes of fields to sort by, ordered by priority */
int nsortflds; /* number of fields we're sorting by */
int maxsortidx; /* highest index of the fields we're sorting by */
int (**compares)(char *, char *); /* comparison function for each sort field */
int *folds; /* whether to fold each sort field */
int *dirsorts; /* whether to use dir sort for each sort field */

int main(int argc, char *argv[])
{
	int i, nlines;
	char *buff, **lines, ***linedata;

	parseargs(argc, argv);

	if ((nlines = readlines(&buff, &lines)) == LNS_ERROR) {
		printf("input too big to sort\n");
		return 0;
	}

	linedata = malloc(nlines * sizeof(char **));
	for (i = 0; i < nlines; i++)
		linedata[i] = getsortdata(lines[i]);

	quicksort((void **)linedata, 0, nlines - 1, (VOIDCOMP)compare);

	for (i = 0; i < nlines; i++)
		lines[i] = line(linedata[i]);

	writelines(lines, nlines);

	freestuff(linedata, nlines);
	freelines(buff, lines);
	return 0;
}

int compare(char **sortdata1, char **sortdata2)
{
	int i, rslt = 0;

	for (i = 0; i < nsortflds && rslt == 0; i++)
		rslt = (*compares[i])(sortdata1[i], sortdata2[i]);
	return rslt;
}

void parseargs(int argc, char *argv[])
{
	int i;
	int maxsortcount = argc - 1;

	nsortflds = 0;
	sortidxs = malloc(maxsortcount * sizeof(int));
	compares = malloc(maxsortcount * sizeof(CHARCOMP));
	folds = malloc(maxsortcount * sizeof(int));
	dirsorts = malloc(maxsortcount * sizeof(int));

	argv++;
	for (i = 0; i < argc - 1; i++) {
		char *arg = argv[i];

		int fieldidx;
		int numeric = 0;
		int reverse = 0;
		int (*compare)(char *, char *);
		int fold = 0;
		int dirsort = 0;

		if (*arg == '-' && isdigit(*++arg)) {
			fieldidx = atoi(arg) - 1;
			while (*++arg) {
				if (*arg == 'n')
					numeric = 1;
				else if (*arg == 'r')
					reverse = 1;
				else if (*arg == 'f')
					fold = 1;
				else if (*arg == 'd')
					dirsort = 1;
			}
			if (numeric && reverse)
				compare = numcmpr;
			else if (numeric)
				compare = numcmp;
			else if (reverse)
				compare = strcmpr;
			else
				compare = (CHARCOMP)strcmp;
			maxsortidx =
				fieldidx > maxsortidx ? fieldidx : maxsortidx;
			sortidxs[nsortflds] = fieldidx;
			compares[nsortflds] = compare;
			folds[nsortflds] = fold;
			dirsorts[nsortflds] = dirsort;
			nsortflds++;
		}
	}
}

char **getsortdata(char *line)
{
	char *fieldspace = malloc((strlen(line) + 1) * sizeof(char));
	char *chr, *field;
	int fieldidx, i;

	char **tmpfields = malloc((maxsortidx + 2) * sizeof(char *));
	char **sortdata = malloc((nsortflds + 2) * sizeof(char *));

	strcpy(fieldspace, line);

	/* split line into fields and create temporary pointer for each field */
	field = fieldspace;
	fieldidx = 0;
	for (chr = fieldspace; *chr; chr++) {
		if (*chr == '\t' || *chr == '\n') {
			*chr = '\0';
			/*maxsortidx + 1 so last sort field is terminated */
			if (fieldidx <= maxsortidx + 1)
				tmpfields[fieldidx] = field;
			field = chr + 1;
			fieldidx++;
		}
	}

	/* ptr to sort by fields, in priority order, apply -f and-d if needed */
	for (i = 0; i < nsortflds; i++) {
		field = tmpfields[sortidxs[i]];
		if (folds[i])
			fold(field);
		if (dirsorts[i])
			dirsort(field);
		sortdata[i] = field;
	}
	sortdata[nsortflds] = line;
	sortdata[nsortflds + 1] = fieldspace;

	free(tmpfields);
	return sortdata;
}

char *line(char *sortdata[])
{
	return sortdata[nsortflds];
}

char *fieldspace(char *sortdata[])
{
	return sortdata[nsortflds + 1];
}

void fold(char *value)
{
	while ((*value = toupper(*value)))
		value++;
}

void dirsort(char *value)
{
	char c, *write;
	write = value;
	while ((c = *value++))
		if (isalnum(c) || c == ' ' || c == '\n')
			*write++ = c;
	*write = '\0';
}

int strcmpr(char *s1, char *s2)
{
	return strcmp(s2, s1);
}

int numcmp(char *s1, char *s2)
{
	double v1, v2;

	v1 = atof(s1);
	v2 = atof(s2);
	if (v1 < v2)
		return -1;
	else if (v1 > v2)
		return 1;
	else
		return 0;
}

int numcmpr(char *s1, char *s2)
{
	return numcmp(s2, s1);
}

void freestuff(char ***linedata, int nlines)
{
	int i;

	for (i = 0; i < nlines; i++) {
		free(fieldspace(linedata[i]));
		free(linedata[i]);
	}
	free(linedata);

	free(sortidxs);
	free(compares);
	free(folds);
	free(dirsorts);
}

Full code on github

Solution by anonymous

Once again, there is another exercise in the book that isn't clear enough in the instructions. I realized after I finished my program that there is another interpretation for this exercise. I understood it to mean that given a set of fields, the user can sort the line based on specific ones. For example, if the second field was sorted, the contents of the line remain intact, but the sorting decision is made solely on the second column. Now I realize that it could have also meant to be able to sort each field without sorting the whole line. This would have meant breaking up the line into fields, sorting the fields by themselves, and concatenating the fields back when printing. It appears that all three of us (Robert Taylor, codybartfast, and I) choose the former. It is hard to tell for Robert's though. The help command prints a book and the examples are rather complex. I just wanted to sort the second column numerically, but I gave up after 15 minutes of reading the help menu and trying weird stuff like -s2 -m2TA -e3.

I took my solution to exercise 5.16, added a getField function that carves out the specified tab delimited field, and updated myqsort to use that. By default, the field is 0 which returns the entire string when the function is called. I then had to completely re-write the way I handled arguments because of the way augments are passed to the program. I also wrote it to allow multiple sorts to be processed before printing by incorporating the sorting into the reading of the arguments loop. Every time a valid argument is completely read, it will process the sort based on the flags. I also had to add -l for logographical sorting so you can specify field(s) to use the default sorting method on.

The usage is helpful, but some quick examples will make more sense than anything else:

# sort 1st field by dirOrder, then sort 2nd field numerically
./sort -d1 -n2
# sort 3rd field logographically, then sort 1st field in case-insensitive in descending order
./sort -l3 -rf1
# same as above (order within each argument group doesn't matter)
./sort -l3 -fr1

Here is my program

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>

/*
    Exercise 5-17. Add a field-handling capability, so sorting may be done on fields within lines, each field sorted according to an independent set of options. (The index
    for this book was sorted with -df for the index category and -n for the page numbers.)
*/

#define MAXLINES 5000 // max #lines to be sorted
#define MAXLEN 1000 // max length of any input line
#define ALLOCSIZE 10000 // size of available space

char *lineptr[MAXLINES]; // pointers to text lines
static char allocbuf[ALLOCSIZE]; // storage for alloc
static char *allocp = allocbuf; // next free position

int readlines(char *lineptr[], int nlines);
void writelines(char *lineptr[], int nlines);
void swap(void *v[], int, int);
int readlines(char *lineptr[], int maxlines);
void writelines(char *lineptr[], int nlines);
int getline(char *s, int lim);
char *alloc(int n);
void myqsort(void *lineptr[], int left, int right, int (*comp)(void *, void *)); // qsort had to be renamed to myqsort since stdlib.h is included.
int reverseCompare(void *, void *); // used to call baseCompare with the arguments in reverse order
int numcmp(const char *, const char *);
int dircmp(const char *, const char *);
char *getField(char *, char *, int);


enum booelan { FALSE, TRUE }; // FALSE == 0, TRUE == 1, this creates booleans in a language that that doesn't natively support them unless you import stdbool.h in c99 or newer

int (*baseCompare)(void *, void *); // function pointer called baseCompare which points to either numcmp or cmpstr depending on the sorting type
int (*compare)(void *, void *); // function pointer called compare that will either point to the regular compare function baseCompare, or a new function called reverseCompare
int caseInsensitive = FALSE, field = 0;
char a[MAXLEN], b[MAXLEN]; // used for getField

// sort input lines
int main(int argc, char *argv[])
{   // nlines = number of input lines read, numeric, reverse, and dirOrder are flags for features turned on in the arguments
    int nlines, numeric, reverse, dirOrder, lexicographic;

    if ((nlines = readlines(lineptr, MAXLINES)) < 0)
    {
        printf("input too big to sort\n");
        return 1;
    }

    while (--argc > 0 && **++argv == '-') // checks the first char in each argument starting at argv[1]
    {
        field = 0;
        numeric = reverse = caseInsensitive = dirOrder = lexicographic = FALSE;
        while (*++*argv)
            if (field != 0)
                break;
            else
                switch (**argv)
                {
                case 'n':
                    numeric = TRUE;
                    break;
                case 'r':
                    reverse = TRUE;
                    break;
                case 'f':
                    caseInsensitive = TRUE;
                    break;
                case 'd':
                    dirOrder = TRUE;
                    break;
                case 'l':
                    lexicographic = TRUE;
                    break;
                default:
                    if (isdigit(**argv) && !(numeric || reverse || caseInsensitive || dirOrder || lexicographic)) // found -#
                    {
                        argc = -1; // triggers usage message
                        break;
                    }
                    else if (isdigit(**argv))
                        field = atoi(*argv);
                    else
                        argc = -1; // triggers usage message
                    break;
                }

        if (field < 0)
        {
            printf("Error: field choices must be positive ints\n");
            return 1;
        }

        baseCompare = (int (*)(void *, void *))(lexicographic ? strcmp : numeric ? numcmp : dirOrder ? dircmp : caseInsensitive ? strcmpi : strcmp); // chooses comparison function based on flags
        myqsort((void **) lineptr, 0, nlines - 1, reverse ? reverseCompare : baseCompare);
    }

    if (argc != 0)
    {
        printf("Usage: sort [-n[#]] [-r[#]] [-f[#]] [-d[#]] [-l[#]]\n");
        return 1;
    }

    writelines(lineptr, nlines);
    return 0;
}

// sort v[left]...v[right] into increasing order
void myqsort(void *v[], int left, int right, int (*comp)(void *, void *))
{
    int i, last;
    if (left >= right) // do nothing if array contains fewer than two elements
        return;
    swap(v, left, (left + right) / 2);
    last = left;
    for (i = left + 1; i <= right; i++)
        if ((*comp)(getField(v[i], a, field), getField(v[left], b, field)) < 0)
            swap(v, ++last, i);
        
    swap(v, left, last);
    myqsort(v, left, last - 1, comp);
    myqsort(v, last + 1, right, comp);
}

// compare s1 and s2 numerically
int numcmp(const char *s1, const char *s2)
{
    double v1 = atof(s1), v2 = atof(s2);
    if (v1 < v2)
        return -1;
    else if (v1 > v2)
        return 1;
    return 0;
}

// swaps int i and j
void swap(void *v[], int i, int j)
{
    void *temp = v[i];
    v[i] = v[j];
    v[j] = temp;
}

// read input lines
int readlines(char *lineptr[], int maxlines)
{
    int len, nlines;
    char *p, line[MAXLEN];

    nlines = 0;
    while ((len = getline(line, MAXLEN)) > 0)
        if (nlines >= maxlines || (p = alloc(len)) == NULL)
            return -1;
        else
        {
            line[len - 1] = '\0'; // delete newline
            strcpy(p, line);
            lineptr[nlines++] = p;
        }
    return nlines;
}

// write output lines
void writelines(char *lineptr[], int nlines)
{
    while (nlines-- > 0)
        printf("%s\n", *lineptr++);
}

// get line into s, return length
int getline(char *s, int lim)
{
    int c;
    char *original = s;
    while (--lim > 0 && (c = getchar()) != EOF && c != '\n')
        *s++ = c;
    if (c == '\n')
        *s++ = c;
    *s = '\0';
    return s - original; // returns length
}

// return pointer to n characters
char *alloc(int n)
{
    if (allocbuf + ALLOCSIZE - allocp >= n) // it fits
    {
        allocp += n; // update marker to note where the next unallocated memory starts
        return allocp - n; // old pointer address
    }
    else
        return 0; // not enough room
}

int reverseCompare(void *left, void *right)
{
    return (*baseCompare)(right, left); // reverses the order that items are compared and which causes it to be sorted in reverse (descending) order
}

// compare s1 and s2 via directory order (compares only on letters, numbers, and blanks)
int dircmp(const char *s1, const char *s2)
{
    int i = 0, j = 0;
    while (s1[i] || s2[i])
        if (isalnum(s1[i]) || isspace(s1[i]))
            if (isalnum(s2[j]) || isspace(s2[j]))
                if (caseInsensitive ? tolower(s1[i]) < tolower(s2[j]) : s1[i] < s2[j])
                    return -1;
                else if (caseInsensitive ? tolower(s1[i]) > tolower(s2[j]) : s1[i] > s2[j])
                    return 1;
                else
                    i++, j++;
            else
                j++;
        else
            i++;

    return 0;
}

// assumes that fields are tab delimited. Searches s for the nth field and returns it in t. If invalid n or field doesn't exist, returns s
char *getField(char *s, char *t, int n)
{
    if (n <= 0) // invalid n
        return s;
    int i = 0, j = 0;
    while (TRUE)
        if (s[i] == '\t' || s[i] == '\n' || s[i] == '\0') // last field ends with a '\n' or '\0'
            if (--n == 0) // if n == 0, field found
            {
                t[j] = '\0'; // field found! terminate string and return it
                return t;
            }
            else if (s[i] == '\0') // reach end of string
                break;
            else
            {
                j = 0; // reset j so t starts new string for new field
                s++; // move to next char for s
            }
        else
            t[j++] = s[i++]; // copy chars until tab is found
    
    return s; // if field doesn't exist, return s
}
Personal tools