Jump to: navigation, search

The C Programming Language, 2nd Edition, by Kernighan and Ritchie
Exercise 5.07 on page 110

Rewrite readlines to store lines in an array supplied by main , rather than calling alloc to maintain storage. How much faster is the program?



Solution by Steven Huang

/* K&R Exercise 5-7 */
/* Steven Huang */

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>

#define TRUE     1
#define FALSE    0

#define MAXLINES 5000       /* maximum number of lines */
#define MAXLEN   1000       /* maximum length of a line */
char *lineptr[MAXLINES];
char lines[MAXLINES][MAXLEN];

/* K&R2 p29 */
int getline(char s[], int lim)
{
  int c, i;

  for (i = 0; i < lim - 1 && (c = getchar()) != EOF && c != '\n'; i++)
    s[i] = c;                                                         
  if (c == '\n') {
    s[i++] = c;   
  }
  s[i] = '\0';
  return i;
}

/* K&R2 p109 */
int readlines(char *lineptr[], int maxlines)
{
  int len, nlines;
  char *p, line[MAXLEN];

  nlines = 0;
  while ((len = getline(line, MAXLEN)) > 0)
    if (nlines >= maxlines || (p = malloc(len)) == NULL)
      return -1;
    else {
      line[len - 1] = '\0';  /* delete the newline */
      strcpy(p, line);
      lineptr[nlines++] = p;
    }                       
  return nlines;  
}

int readlines2(char lines[][MAXLEN], int maxlines)
{
  int len, nlines;

  nlines = 0;
  while ((len = getline(lines[nlines], MAXLEN)) > 0)
    if (nlines >= maxlines)                         
      return -1;           
    else
      lines[nlines++][len - 1] = '\0';  /* delete the newline */
  return nlines;
}
 
int main(int argc, char *argv[])
{
  /* read things into cache, to be fair. */
  readlines2(lines, MAXLINES);

  if (argc > 1 && *argv[1] == '2') {
    puts("readlines2()");           
    readlines2(lines, MAXLINES);
  } else {
    puts("readlines()");
    readlines(lineptr, MAXLINES);
  }
   
  return 0;
}



Steven writes: "Unfortunately, the follow-up question here on which version is faster is difficult to determine on my machine, because the difference is very small. I can call malloc() one million times in under a second - this suggests that the conventional wisdom that malloc() is slow and should be avoided may need some more adjustment."

[Editor's note: That's probably because malloc is actually taking memory requests to the system as infrequently as possible, so that most of the calls invoke little more than pointer arithmetic. This suggests that the conventional wisdom may be based on real world programs, rather than artificial "how many mallocs per second can I do" benchmarks. :-) ]


[This space reserved for Steven's right of reply!]

Solution by Alex Hoang (Category 0)

/* The solution for readline presented by Steven Huang above has a major flaw: it does not
 * keep track of pointers so there is no way for the sorting routine to sort it.  The author
 * very likely wanted a solution that can be handled by the sorting routine presented in the book.

 * My cat 0 solution utilizes an array called linestore - passed from the main routine to the readline
 * function - that will be the allocation buffer for the line inputs.  Space allocation will be 
 * handled by the function myreadline, rather than by alloc as in the book.  This solution also 
 * addresses the pointers issue in Steven Huang's solution.  Since my solution is cat 0, I did not 
 * use a multidimensional array which has not been covered up to this point in the book. 
 
 * The complete program is below.  Only the main and myreadline functions differ from the book */

#include <stdio.h>
#include <string.h>
#define MAXLINES 5000
#define MAXLEN 1000
#define MAXSTORE 10000 /* max space allocated for all lines.  Same as ALLOCSIZE on p.91. */


char *lineptr[MAXLINES];

int readlines(char *lineptr[], int nlines);
void writelines(char *lineptr[], int nlines);

void qsort(char *lineptr[], int left, int right);

main()
{
    int nlines;
    char linestore[MAXSTORE]; /* array for storing all lines */
    /* myreadlines will pass an extra parameter linestore for storing all the input lines */
    if ((nlines = myreadlines(lineptr, MAXLINES, linestore)) >= 0)
    {
        qsort(lineptr, 0, nlines-1);
        writelines(lineptr, nlines);
        return 0;
    }
    else
    {
        printf("error: input too big to sort\n");
        return 1;
    }
}

int getline(char *, int);

/* My solution to the problem, without using alloc */

int myreadlines(char *lineptr[], int maxlines, char *ls)
{
    int len, nlines;
    char *p, line[MAXLEN];
    nlines = 0;
    p = ls + strlen(ls); /* initiate to first empty position */
    while ((len = getline(line, MAXLEN)) > 0)
        /* The line below will check to make sure adding the nextline will not exceed MAXSTORE */
        if (nlines >= maxlines || (strlen(ls) + len) > MAXSTORE)
            return -1;
        else
        {   line[len-1] = '\0';
            strcpy(p, line);
            lineptr[nlines++] = p;
            p += len; /* point p to next empty position */
        }
    return nlines;
}

/* K&R2 p98 */

void writelines(char *lineptr[], int nlines)
{
    while (nlines-- > 0)
        printf("%s\n", *lineptr++);
}

/* K&R2 p97 */

void qsort(char *v[], int left, int right)
{
    int i, last;
    void swap(char *v[], int i, int j);
    if (left >= right)
        return;
    swap(v, left, (left + right)/2);
    last = left;
    for (i = left+1; i <= right; i++)
        if (strcmp(v[i], v[left]) < 0)
            swap(v, ++last, i);
    swap(v, left, last);
    qsort(v, left, last-1);
    qsort(v, last+1, right);
}

/* K&R2 p99 */

void swap(char *v[], int i, int j)
{
    char *temp;
    temp = v[i];
    v[i] = v[j];
    v[j] = temp;
}

/* K&R2 p29 */
int getline(char s[], int lim)
{
  int c, i;

  for (i = 0; i < lim - 1 && (c = getchar()) != EOF && c != '\n'; i++)
    s[i] = c;
  if (c == '\n') {
    s[i++] = c;
  }
  s[i] = '\0';
  return i;
}

Solution by Xggggg (Category 0)

/*Exercise 5-7. Rewrite readlines to store lines in an array supplied by main, rather than
calling alloc to maintain storage. How much faster is the program?*/

#include <stdio.h>
#include <string.h>

#define MAXLINES 5000
#define MAXLENS 100000

char *lineptr[MAXLINES];

int readlines(char *lineptr[], int maxlines, char lines[], int maxlen);
void writelines(char *lineptr[], int nlines);
void quicksort(char *lineptr[], int left, int right);
int getline(char *s, int i);

int main(void)
{
	int nlines;
	char lines[MAXLENS];
	if ((nlines = readlines(lineptr, MAXLINES, lines, MAXLENS)) >= 0)
	{
		quicksort(lineptr, 0, nlines - 1);
		writelines(lineptr, nlines);
		return 0;
	}
	else
	{
		printf("error: input too big to sort\n");
		return 1;
	}
}

int getline(char s[], int lim)
{
	int c, i = 0;
	while (i < lim - 1 && (c = getchar()) != EOF && c != '\n')
		s[i++] = c;
	if (c == '\n')
		s[i++] = c;
	s[i] = '\0';
	return i;
}

int readlines(char *lineptr[], int maxlines, char lines[], int maxlens)
{
	int len, nlines = 0;
	char *p = lines;
	while ((len = getline(p, maxlens + lines - p)) > 0)
		if (nlines < maxlines)
		{
			p[len - 1] = '\0';
			lineptr[nlines++] = p;
			p += len;
		}
		else
			return -1;
	return nlines;
}

void writelines(char *lineptr[], int nlines)
{
	while (nlines-- > 0)
		printf("%s\n", *lineptr++);
}

void quicksort(char *v[], int left, int right)
{
	int i, last;
	void swap(char *v[], int i, int j);
	if (left >= right)
		return;
	swap(v, left, (left + right) / 2);
	last = left;
	for (i = left + 1; i <= right; i++)
		if (strcmp(v[i], v[left]) < 0)
			swap(v, ++last, i);
	swap(v, left, last);
	quicksort(v, left, last - 1);
	quicksort(v, last + 1, right);
}

void swap(char *v[], int i, int j)
{
	char *temp;
	temp = v[i];
	v[i] = v[j];
	v[j] = temp;
}

Solution by Evan W

Due to Alex Hoang's discovery, it is hard to sort using Steven Huang solution. THanks to ALex, I changed Steven Code a little to sort lines in a consistent way.

PS. I change getline to my_getline in literal, because there is a lib function named getline in lib when compiling in gcc.


/*   "The solution for readline presented by Steven Huang above has a major flaw: it does not
 *  * keep track of pointers so there is no way for the sorting routine to sort it.  The author
 *   * very likely wanted a solution that can be handled by the sorting routine presented in the book." --addressed by [mailto:alexwhoang@gmail.com Alex Hoang]
 *
 *    * So I keep track of pointers using lineptr, and I change getline to my_getline in literal for lib compatible.
 *     * when read a line, my code past the pointer tracker, and store lines in a global 2-D array like Steven, 
 *     * Maybe it is a little impossible-to-understand(a word from K&R chapter 5 p93) considering the code pass value related to 2-D array address. Maybe not.
 *     
 *    
 *         
 *          * The complete program is below. */

#include <stdio.h>
#include <string.h>
#define MAXLINES 5000
#define MAXLEN 1000
#define MAXSTORE 10000 /* max space allocated for all lines.  Same as ALLOCSIZE on p.91. */


char *lineptr[MAXLINES];
char lines[MAXLINES][MAXLEN];

int readlines(char *lineptr[], int nlines);
void writelines(char *lineptr[], int nlines);
int my_getline(char *, int);

void qsort(char *lineptr[], int left, int right);

int readlines2(char *lineptr[], int maxlines)
{
	int len, nlines;


	nlines = 0;
	while ((len = my_getline(lines[nlines], MAXLEN)) > 0)/*lines[nlines] is the address of the n-th lines.*/
		if (nlines >= maxlines)                         
			return -1;           
		else {
			lines[nlines][len - 1] = '\0';  /* delete the newline */
			lineptr[nlines] = lines[nlines]; /* track of the pointer to n-th lines.*/
			nlines++; /* increment n*/
		}
	return nlines;
}

main()
{
	int nlines;
	char linestore[MAXSTORE]; /* array for storing all lines */
	/* myreadlines will pass an extra parameter linestore for storing all the input lines */
	if ((nlines = readlines2(lineptr, MAXLINES)) >= 0)
	{
		qsort(lineptr, 0, nlines-1);
		writelines(lineptr, nlines);
		return 0;
	}
	else
	{
		printf("error: input too big to sort\n");
		return 1;
	}
}



void writelines(char *lineptr[], int nlines)
{
	while (nlines-- > 0)
		printf("%s\n", *lineptr++);
}

/* K&R2 p97 */

void qsort(char *v[], int left, int right)
{
	int i, last;
	void swap(char *v[], int i, int j);
	if (left >= right)
		return;
	swap(v, left, (left + right)/2);
	last = left;
	for (i = left+1; i <= right; i++)
		if (strcmp(v[i], v[left]) < 0)
			swap(v, ++last, i);
	swap(v, left, last);
	qsort(v, left, last-1);
	qsort(v, last+1, right);
}

/* K&R2 p99 */

void swap(char *v[], int i, int j)
{
	char *temp;
	temp = v[i];
	v[i] = v[j];
	v[j] = temp;
}

/* K&R2 p29 */
int my_getline(char s[], int lim)
{
	int c, i;

	for (i = 0; i < lim - 1 && (c = getchar()) != EOF && c != '\n'; i++)
		s[i] = c;
	if (c == '\n') {
		s[i++] = c;
	}
	s[i] = '\0';
	return i;
}




Solution by menonsahab

#include <stdio.h>
#include <string.h>

#define MAXLINES 10000
char *lineptr[MAXLINES];
int readlines(char *lineptr[], int nlines, char *storStr, int *nextFreeLoc);
void writelines(char *lineptr[], int nlines);

#define MAXLEN 100000
int xgetline(char *, int);
char *alloc(int);


void xqsort(char *lineptr[], int left, int right);
void swap(char *v[], int i, int j);

#define MAXSTRLEN 1000000

int main()
{
	int nlines;
	char storStr[MAXSTRLEN];
	int nextFreeLoc= 0;
	if( (nlines = readlines(lineptr, MAXLINES, storStr, &nextFreeLoc)) >= 0 )
	{
		xqsort(lineptr, 0, nlines-1);
		writelines(lineptr, nlines);
		return 0;
	}
	else
	{
		printf("error: input too big to sort\n");
		return 1;
	}
}

int xgetline(char *s, int lim)
{
	int c, i;
	for(i = 0; i < lim - 1 && (c = getchar()) != EOF && c != '\n'; ++i)
		s[i] = c;
	if(c == '\n')
	{
		s[i] = c;
		++i;
	}
	s[i] = '\0';
	return i;
}


int readlines(char *lineptr[], int maxlines, char *storStr, int *nextFreeLoc)
{
	int len, nlines;
	char line[MAXLEN];
	nlines = 0;
	while((len = xgetline(line, MAXLEN)) > 0)
	{
		if(nlines >= maxlines || /*(p = alloc(len)) == NULL*/ (MAXSTRLEN - *nextFreeLoc) < len )
			return -1;
		else
		{
			line[len-1] = '\0';
			strcpy(&storStr[*nextFreeLoc], line);
			*nextFreeLoc += len;
			lineptr[nlines++] = storStr + *nextFreeLoc;
		}
	}
	return nlines;
}

void writelines(char *lineptr[], int nlines)
{
	while(nlines--)
	{
		printf("%s\n", *lineptr++);
	}
}

void xqsort(char *v[], int left, int right)
{
	int i, last;
	if(left >= right)
		return;
	swap(v, left, (left + right)/2);
	last = left;
	for(i = left + 1; i <= right; i++)
		if(strcmp(v[i], v[left]) < 0)
			swap(v, ++last, i);
	swap(v, left, last);
	xqsort(v, left, last-1);
	xqsort(v, last+1, right);
}

void swap(char *v[], int i, int j)
{
	char *temp;
	temp = v[i];
	v[i] = v[j];
	v[j] = temp;
}

/* 
It is my understanding that any significant difference in execution time will only be
observed if the input is large enough. So I've increased the size of all strings, in both the original program and
my version of it and used a large text file (Dan Brown's novel: Origin) with 5832 lines and the results are as follows:

For the program in K&R that uses the alloc() function:
real    0m0.099s
user    0m0.016s
sys	0m0.012s

For the program where we supply a storage string from main to the function readlines:
real    0m0.099s
user    0m0.016s
sys	0m0.016s

I'm not particularly sure why one is slower than the other. I'm guessing it is because of too many arguments being passed on back and forth.
*/


Solution by codybartfast (Category 0)

Like others I initially saw no measurable difference in performance. So I used a file containing 3.25M lines, each containing one letter from War and Peace. (This was to emphasis line operations over the per character operations of readline.) I also removed the sorting and writing. Then the updated readlines was faster. Running the program 25 times with each version the original averaged 68ms and the updated version 38ms, an improvement of about 43%.

Milliseconds  Orignal     Updated
              =======     =======
                47          47
                47          31
                78          31
                78          31
                63          47
                63          63
                63          47
                63          47
                78          47
                63          16
                63          47
                63          47
                63          47
                47          31
                63          31
                78          63
                78          31
                78          47
                47          31
                63          31
                78          47
                78          16
                63          31
                94          31
                63          16
              =======     =======
        Mean    66          38
char *lineptr[MAXLINES];
char buffer[BUFSIZE];

int main(void)
{
	int nlines;

	if ((nlines = readlines(lineptr, buffer)) >= 0) {
		qsort(lineptr, 0, nlines - 1); /* These lines removed for */
		writelines(lineptr, nlines);   /*   performance testing.  */
		return 0;
	} else {
		printf("error: input too big to sort\n");
		return 1;
	}
}

int readlines(char *lineptr[], char *buffer)
{
	int len, nlines;
	char *bufmax = buffer + BUFSIZE - MAXLEN;

	nlines = 0;
	while ((len = getline(buffer, MAXLEN)) > 0) {
		lineptr[nlines++] = buffer;
		buffer += len;
		*(buffer - 1) = '\0'; /* delete newline */
		if (nlines >= MAXLINES || buffer > bufmax)
			return -1;
	}
	return nlines;
}

Solution by anonymous

I tried to do as little changes as possible. I removed the allocbuf, allocp, alloc function and afree function. I added in linedata in main, passed it to readlines, and updated the logic in readlines to use the char array instead of the original allocbuf. I left everything else alone. For my speed measurement of the changes, I noticed a small difference. My test programs did not call qsort or writelines so they only called readlines. The version with alloc took an average of 18.192 seconds to read 119,837,505 lines of various English words while the version without alloc took an average of 17.467 seconds. That is about a 4% savings in time overall. This is the command I used to get an average of 10 runs:

for i in {1..10}; do time cat english_words | ./a.out; done 2>&1 | grep ^real | sed s/,/./ | sed -e s/.*m// | awk '{sum += $1} END {print sum / NR}'

This is my revised program:

#include <stdio.h>
#include <string.h>

/*
    Exercise 5-7. Rewrite readlines to store lines in an array supplied by main, rather than calling alloc to maintain storage. How much faster is the program?
*/

#define MAXLINES 5000 // max number of lines to be sorted
#define MAXLEN 1000 // max length of any input line
#define ALLOCSIZE 5000000 // size of available space

char *lineptr[MAXLINES]; // pointers to text lines

int readlines(char *lineptr[], char linedata[], int maxlines);
void writelines(char *lineptr[], int nlines);
int getline(char *s, int lim);
void qsort(char *v[], int left, int right);
void swap(char *v[], int i, int j);

int main()
{
    int nlines; // number of input lines read
    static char linedata[ALLOCSIZE]; // this needed to be static to run

    if ((nlines = readlines(lineptr, linedata, MAXLINES)) >= 0)
    {
        qsort(lineptr, 0, nlines - 1);
        writelines(lineptr, nlines);
        return 0;
    }
    else
    {
        printf("error: input too big to sort\n");
        return 1;
    }
}

// read input lines
int readlines(char *lineptr[], char linedata[], int maxlines)
{
    int len, nlines;
    char *p = linedata, line[MAXLEN];
    nlines = 0;
    while ((len = getline(line, MAXLEN)) > 0)
        if (nlines >= maxlines || linedata + ALLOCSIZE - p < len) // linedata (starting pos ptr) + ALLOCSIZE (total size int) - p (current pos ptr) < len (size of line int). If <, line won't fit
            return -1;
        else
        {
            line[len - 1] = '\0'; // delete newline since writelines adds it back
            strcpy(p, line);
            lineptr[nlines++] = p;
            p += len; // next free position (first spot after '\0')
        }
    return nlines;
}

// write output lines
void writelines(char *lineptr[], int nlines)
{
    while (nlines-- > 0)
        printf("%s\n", *lineptr++);
}

// get line into s, return length
int getline(char *s, int lim)
{
    int c;
    char *original = s;
    while (--lim > 0 && (c = getchar()) != EOF && c != '\n')
        *s++ = c;
    if (c == '\n')
        *s++ = c;
    *s = '\0';
    return s - original; // returns length
}

// sort v[left]...v[right] into increasing order
void qsort(char *v[], int left, int right)
{
    int i, last;
    if (left >= right) // do nothing if array contains fewer than two elements
        return;
    swap(v, left, (left + right) / 2); // move partition element to v[0]
    last = left;
    for (i = left + 1; i <= right; i++) // partition
        if (strcmp(v[i], v[left]) < 0) // < 0 means v[i] is less than v[left], so swap them to put them in order
            swap(v, ++last, i);
    swap(v, left, last); // restore partition element
    qsort(v, left, last - 1);
    qsort(v, last + 1, right);
}

// interchange v[i] and v[j]
void swap(char *v[], int i, int j)
{
    char *temp = v[i];
    v[i] = v[j];
    v[j] = temp;
}
Personal tools