Jump to: navigation, search

The C Programming Language, 2nd Edition, by Kernighan and Ritchie
Exercise 8.02 on page 178

Rewrite fopen and _fillbuf with fields instead of explicit bit operations. Compare code size and execution speed.



Solution by Akil Adeshwar

/*
   Rewrite fopen and _fillbuf with fields instead of explicit bit operations.
   To avoid confusion, my implementation of standard definitions have a suffix "x" 


   Field based approach is time consuming as we would have to check if each field 
   is set or not while finding a free slot. Using Bit manipulation, we just have to 
   compare the particular flag bit to zero.
   
   Bit fields can be used in struct _flags structure to reduce the total size of a File type.
 */

#include<stdio.h>
#include<fcntl.h>
#include<stdlib.h>

#define EOF (-1)
#define OPEN_MAX 20 // Maximum files that can be open
#define PERMS 0666



// Field based approach for flags
typedef struct _flags{
	int _READ;
	int _WRITE;
	int _UNBUF;
	int _EOF;
	int _ERR;
}flags;

// File Data structre.

typedef struct _iobuf {
	int cnt;
	char *ptr;
	char *base;
	struct _flags flags;
	int fd;
}FILEx;

FILEx _iob[OPEN_MAX];


int _fillbuffx(FILEx *f);


#define getcx(p) (--(p)->cnt >= 0? (unsigned char) *(p)->ptr++:_fillbufx(p))


//Check if Slot is empty by checking if all fields are
//empty in the flags structure


int is_empty(struct _flags flags){
	if(!flags._READ && !flags._WRITE && !flags._UNBUF &&
			!flags._EOF && !flags._ERR)
		return 1;
	return 0;
}



FILEx *fopenx(char *name,char *mode){
	
	int fd;
	FILEx *fp;
	// Invalid Input
	if( *mode != 'r' && *mode != 'w' && *mode != 'a' )
		return NULL;

	// Check for free slot
	for( fp= _iob; fp< _iob + OPEN_MAX ; fp++)
		if(is_empty(fp->flags))
			break;
	// If FULL return NULL
	if( fp>= _iob+OPEN_MAX )
		return NULL;
	// Create file on Write Mode
	if( *mode == 'w')
		fd = creat(name,PERMS);
	// Append Mode - If file not present create one
	else if( *mode == 'a') {
		if((fd = open(name,O_WRONLY,0))==-1)
			fd = creat(name,PERMS);
		lseek(fd,0L,2); // Go to end, in case of append
	}
	else
		fd = open(name,O_RDONLY,0);
	if(fd == -1)
		return NULL;
	fp->fd = fd;
	fp->cnt=0;
	if(*mode == 'r')
		fp->flags._READ = 1;
	else
		fp->flags._WRITE = 1;
	return fp;
}



int _fillbufx(FILEx *fp){

	int bufsize;

	if(fp->flags._READ == 0)
		return EOF;
	bufsize = (fp->flags._UNBUF != 0)? 1:BUFSIZ;
	if(fp->base == NULL)
		if((fp->base = (char *) malloc (bufsize))==NULL)
			return EOF;
	fp->ptr = fp->base;
	fp->cnt = read(fp->fd,fp->ptr,bufsize);
	if(--fp->cnt< 0){

		if(fp->cnt == -1)
			fp->flags._EOF=1;
		else
			fp->flags._ERR=1;
		fp->cnt = 0;
		return EOF;
	}
	return (unsigned char) *fp->ptr++;
}



int main(void){
	//Use your own file.
	FILEx *fp = fopenx("test.c","r");
	if(fp!=NULL){	
		char c;
		// getcx is a macro defined above
		while((c=getcx(fp))!=EOF)
			putchar(c);
	}
	else
		puts("Error");
	return 0;
}


Solution by codybartfast

The field version was 8.7% slower than the bit version:

TIME    | cat times (s/2MB)     | Avrg  | %
--------|-----------------------|-------|-------
bits:   |21.773, 22.342, 22.393 |22.169 |
fields: |23.242, 23.218, 25.834 |16.655 | +8.7%

Both of these versions seem to be about 100 times (10,000%) slower than the cats from the previous question.
So it's possible the impact would be relatively larger in a more efficient implementation.

The absolute increase in the size of the binary code is just 1.8%. However, if we subtract the size of a
minimal program then the binary code is 26.5% larger for the field version.

SIZE    | Abs   | %     | Extra | %
--------|-------|-------|-------|-------
minimal:|16,472 |       |       |
bits:   |17,648 |       |+1,176 |
fields: |17,960 | +1.8% |+1,488 |+26.5%

Minimal program:

int main(void)
{
	return 0;
}

Bit version:

/**********
 *  FILE  *
 **********/

#define NULL 0
#define EOF (-1)
#define OPEN_MAX 20

typedef struct _iobuf {
	int cnt;
	char *ptr;
	char *base;
	int flag;
	int fd;
} FILE;
extern FILE _iob[OPEN_MAX];

#define stdin (&_iob[0])
#define stdout (&_iob[1])
#define stderr (&_iob[2])

enum flags { _READ = 01, _WRITE = 02, _UNBUF = 04, _EOF = 010, _ERR = 020 };

FILE *fopen(char *name, char *mode);
int _fillbuf(FILE *);
int _flushbuf(int, FILE *);

#define feof(p) (((p)->flag & _EOF) != 0)
#define ferror(p) (((p)->flag & _ERR) != 0)
#define fileno(p) ((p)->fd)

#define getc(p) (--(p)->cnt >= 0 ? (unsigned char)*(p)->ptr++ : _fillbuf(p))
#define putc(x, p) (--(p)->cnt >= 0 ? *(p)->ptr++ = (x) : _flushbuf((x), p))

#define getchar() getc(stdin)
#define putchar(x) putc(x, stdout)

/***********
 *  fopen  *
 ***********/

#include <fcntl.h>
#include <unistd.h>
#define PERMS 0666

FILE *fopen(char *name, char *mode)
{
	int fd;
	FILE *fp;

	if (*mode != 'r' && *mode != 'w' && *mode != 'a')
		return NULL;
	for (fp = _iob; fp < _iob + OPEN_MAX; fp++)
		if ((fp->flag & (_READ | _WRITE)) == 0)
			break;
	if (fp >= _iob + OPEN_MAX)
		return NULL;

	if (*mode == 'w')
		fd = creat(name, PERMS);
	else if (*mode == 'a') {
		if ((fd = open(name, O_WRONLY, 0)) == -1)
			fd = creat(name, PERMS);
		lseek(fd, 0L, 2);
	} else
		fd = open(name, O_RDONLY, 0);
	if (fd == -1)
		return NULL;
	fp->fd = fd;
	fp->cnt = 0;
	fp->base = NULL;
	fp->flag = (*mode == 'r') ? _READ : _WRITE;
	return fp;
}

/**************
 *  _fillbuf  *
 **************/

#include <stdlib.h>
#define BUFSIZE 1024

int _fillbuf(FILE *fp)
{
	int bufsize;

	if ((fp->flag & (_READ | _EOF | _ERR)) != _READ)
		return EOF;
	bufsize = (fp->flag & _UNBUF) ? 1 : BUFSIZE;
	if (fp->base == NULL)
		if ((fp->base = (char *)malloc(bufsize)) == NULL)
			return EOF;
	fp->ptr = fp->base;
	fp->cnt = read(fp->fd, fp->ptr, bufsize);
	if (--fp->cnt < 0) {
		if (fp->cnt == -1)
			fp->flag |= _EOF;
		if (fp->cnt == -2)
			fp->flag |= _ERR;
		fp->cnt = 0;
		return EOF;
	}
	return (unsigned char)*fp->ptr++;
}

/**************************
 *  stdin, stdou, stderr  *
 **************************/

FILE _iob[OPEN_MAX] = { { 0, (char *)0, (char *)0, _READ, 0 },
			{ 0, (char *)0, (char *)0, _WRITE, 1 },
			{ 0, (char *)0, (char *)0, _WRITE | _UNBUF, 2 } };

/****************
 *  main (cat)  *
 ****************/

int main(int argc, char *argv[])
{
	FILE *fp;
	char buf[1];

	while (--argc > 0)
		if ((fp = fopen(*++argv, "r")) == NULL)
			return 1;
		else
			while ((buf[0] = getc(fp)) != EOF)
				write(1, buf, 1);
	return 0;
}

Field version:

/**********
 *  FILE  *
 **********/

#define NULL 0
#define EOF (-1)
#define OPEN_MAX 20

typedef struct _iobuf {
	int cnt;
	char *ptr;
	char *base;
	int read;
	int write;
	int unbuf;
	int eof;
	int err;
	int fd;
} FILE;
extern FILE _iob[OPEN_MAX];

#define stdin (&_iob[0])
#define stdout (&_iob[1])
#define stderr (&_iob[2])

FILE *fopen(char *name, char *mode);
int _fillbuf(FILE *);
int _flushbuf(int, FILE *);

#define feof(p) (((p)->flag & _EOF) != 0)
#define ferror(p) (((p)->flag & _ERR) != 0)
#define fileno(p) ((p)->fd)

#define getc(p) (--(p)->cnt >= 0 ? (unsigned char)*(p)->ptr++ : _fillbuf(p))
#define putc(x, p) write(p->fd, &x, 1);

#define getchar() getc(stdin)
#define putchar(x) putc(x, stdout)

/***********
 *  fopen  *
 ***********/

#include <fcntl.h>
#include <unistd.h>
#define PERMS 0666

FILE *fopen(char *name, char *mode)
{
	int fd;
	FILE *fp;

	if (*mode != 'r' && *mode != 'w' && *mode != 'a')
		return NULL;
	for (fp = _iob; fp < _iob + OPEN_MAX; fp++)
		if (!fp->read && !fp->write)
			break;
	if (fp >= _iob + OPEN_MAX)
		return NULL;
	if (*mode == 'w')
		fd = creat(name, PERMS);
	else if (*mode == 'a') {
		if ((fd = open(name, O_WRONLY, 0)) == -1)
			fd = creat(name, PERMS);
		lseek(fd, 0L, 2);
	} else
		fd = open(name, O_RDONLY, 0);
	if (fd == -1)
		return NULL;
	fp->fd = fd;
	fp->cnt = 0;
	fp->base = NULL;
	fp->read = (*mode == 'r');
	fp->write = !fp->read;
	fp->unbuf = fp->eof = fp->err = 0;
	return fp;
}

/**************
 *  _fillbuf  *
 **************/

#include <stdlib.h>
#define BUFSIZE 1024

int _fillbuf(FILE *fp)
{
	int bufsize;

	if (!fp->read || fp->eof || fp->err)
		return EOF;
	bufsize = fp->unbuf ? 1 : BUFSIZE;
	if (fp->base == NULL)
		if ((fp->base = (char *)malloc(bufsize)) == NULL)
			return EOF;
	fp->ptr = fp->base;
	fp->cnt = read(fp->fd, fp->ptr, bufsize);
	if (--fp->cnt < 0) {
		if (fp->cnt == -1)
			fp->eof = 1;
		if (fp->cnt == -2)
			fp->err = 1;
		fp->cnt = 0;
		return EOF;
	}
	return (unsigned char)*fp->ptr++;
}

/**************************
 *  stdin, stdou, stderr  *
 **************************/

FILE _iob[OPEN_MAX] = { { 0, (char *)0, (char *)0, 1, 0, 0, 0, 0, 0 },
			{ 0, (char *)0, (char *)0, 0, 1, 0, 0, 0, 1 },
			{ 0, (char *)0, (char *)0, 0, 1, 1, 0, 0, 2 } };

/****************
 *  main (cat)  *
 ****************/

int main(int argc, char *argv[])
{
	FILE *fp;
	char c;

	while (--argc > 0)
		if ((fp = fopen(*++argv, "r")) == NULL)
			return 1;
		else
			while ((c = getc(fp)) != EOF)
				putchar(c);
	return 0;
}

Latest code on github

Solution by anonymous

Changing the code from bit operations to fields wasn't straightforward since some of the syntax hasn't been introduced. However, with some research I figured it out. Here are the changes from what was in the book to what I provided in my solution:

I changed _iobuf's flag from an int to a field struct and incorporated the _flags enum into it

// BEFORE
typedef struct _iobuf
{
    int cnt;    // characters left
    char *ptr;  // next character position
    char *base; // location of buffer
    int flag;   // mode of file access
    int fd;     // file descriptor
} FILE;

enum _flags {
    _READ = 01,  // file open for reading
    _WRITE = 02, // file open for writing
    _UNBUF = 04, // file is unbuffered
    _EOF = 010,  // EOF has occurred on this file
    _ERR = 020   // error occurred on this file
};
// AFTER
typedef struct _iobuf
{
    int cnt;     // characters left
    char *ptr;   // next character position
    char *base;  // location of buffer
    struct flags // mode of file access field struct
    {
        unsigned int _READ : 1;  // file open for reading
        unsigned int _WRITE : 1; // file open for writing
        unsigned int _UNBUF : 1; // file is unbuffered
        unsigned int _EOF : 1;   // EOF has occurred on this file
        unsigned int _ERR : 1;   // error occurred on this file
    } _flags;
    int fd;     // file descriptor
} FILE;

I then converted all of the bitwise operations to code that assigned and compared the field values.

// BEFORE
#define feof(p) (((p)->flag & _EOF) != 0) // 1
#define ferror(p) (((p)->flag & _ERR) != 0) // 2
if ((fp->flag & (_READ | _WRITE)) == 0) // 3
fp->flag = (*mode == 'r') ? _READ : _WRITE; // 4
if ((fp->flag & (_READ | _EOF | _ERR)) != _READ) // 5
bufsize = (fp->flag & _UNBUF) ? 1 : BUFSIZ; // 6
fp->flag |= _EOF; // 7
fp->flag |= _ERR; // 8
// AFTER
#define feof(p) ((p)->_flags._EOF != 0) // 1
#define ferror(p) ((p)->_flags._ERR != 0) // 2
if (fp->_flags._READ == 0 && fp->_flags._WRITE == 0) // 3
fp->_flags._READ = fp->_flags._WRITE = fp->_flags._UNBUF = fp->_flags._EOF = fp->_flags._ERR = 0; // 4
if (*mode == 'r') // 4
    fp->_flags._READ = 1; // 4
else // 4
    fp->_flags._WRITE = 1; // 4
if (fp->_flags._READ == 0 || fp->_flags._EOF == 1 || fp->_flags._ERR == 1) // 5
bufsize = (fp->_flags._UNBUF == 1) ? 1 : BUFSIZ; // 6
fp->_flags._EOF = 1; // 7
fp->_flags._ERR = 1; // 8

Finally, I had to update the _iob array's initialization since the flags were changed from an int to a struct.

// BEFORE
FILE _iob[OPEN_MAX] = // stdin, stdout, stderr
{
    { 0, (char *) 0, (char *) 0, _READ, 0 },
    { 0, (char *) 0, (char *) 0, _WRITE, 1 },
    { 0, (char *) 0, (char *) 0, _WRITE | _UNBUF, 2 }
};
// AFTER
FILE _iob[OPEN_MAX] = // stdin, stdout, stderr
{
    { 0, (char *) 0, (char *) 0, { ._READ = 1 }, 0 },
    { 0, (char *) 0, (char *) 0, { ._WRITE = 1 }, 1 },
    { 0, (char *) 0, (char *) 0, { ._WRITE = 1, ._UNBUF = 1 }, 2 }
};

I tested the code by creating a 10 GB random file with

head -c 10G < /dev/urandom > bigfile

and used the time command to measure how long it took for each version to read the entire file in. Both programs had the same code in main() where they simply read in each character until it reached the end. The bitwise version took 42.515 seconds on average while the fields version took 42.646 seconds on average. The file sizes were the same on my machine for both programs, but when I used the size command I was able to see the difference in text size:

size field bitwise
   text    data     bss     dec     hex filename
   2600    1256       8    3864     f18 field 
   2408    1256       8    3672     e58 bitwise

I am not sure if the field version was supposed to be faster with the tradeoff of a larger binary or not since the execution speed was about the same.

Also, it is worth pointing out that neither of the other two solutions here used actual fields. The syntax for a field is described on page 150 which specifies that the code for them is unsigned int <name> : # inside of a struct. Both Akil Adeshwar and codybartfast simply added a struct of ints instead of fields.

Here is my entire program

#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>

/*
    Exercise 8-2. Rewrite fopen and _fillbuf with fields instead of explicit bit operations. Compare code size and execution speed.
*/

#ifdef NULL
    #undef NULL
#endif

typedef struct _iobuf
{
    int cnt;     // characters left
    char *ptr;   // next character position
    char *base;  // location of buffer
    struct flags // mode of file access field struct
    {
        unsigned int _READ : 1;  // file open for reading
        unsigned int _WRITE : 1; // file open for writing
        unsigned int _UNBUF : 1; // file is unbuffered
        unsigned int _EOF : 1;   // EOF has occurred on this file
        unsigned int _ERR : 1;   // error occurred on this file
    } _flags;
    int fd;     // file descriptor
} FILE;

#define NULL 0
#define EOF (-1)
#define BUFSIZ 1024
#define OPEN_MAX 20 // max #files open at once
#define stdin (&_iob[0])
#define stdout (&_iob[1])
#define stderr (&_iob[2])

extern FILE _iob[OPEN_MAX];
int _fillbuf(FILE *fp);
int _flushbuf(int, FILE *);

#define feof(p) ((p)->_flags._EOF != 0)
#define ferror(p) ((p)->_flags._ERR != 0)
#define fileno(p) ((p)->fd)
#define getc(p) (--(p)->cnt >= 0 ? (unsigned char) *(p)->ptr++ : _fillbuf(p))
#define putc(x, p) (--(p)->cnt >= 0 ? *(p)->ptr++ = (x) : _flushbuf((x), p))
#define getchar() getc(stdin)
#define putchar(x) putc((x), stdout)
#define PERMS 0666 // RW for owner, group, others

FILE _iob[OPEN_MAX] = // stdin, stdout, stderr
{
    { 0, (char *) 0, (char *) 0, { ._READ = 1 }, 0 },
    { 0, (char *) 0, (char *) 0, { ._WRITE = 1 }, 1 },
    { 0, (char *) 0, (char *) 0, { ._WRITE = 1, ._UNBUF = 1 }, 2 }
};
FILE *fopen(char *name, char *mode);

int main()
{
    int c;
    while ((c = getchar()) != EOF) // read until end and then exit for testing purposes
        ;
    exit(0);
}

// open file, return file ptr
FILE *fopen(char *name, char *mode)
{
    int fd;
    FILE *fp;

    if (*mode != 'r' && *mode != 'w' && *mode != 'a')
        return NULL;
    for (fp = _iob; fp < _iob + OPEN_MAX; fp++)
        if (fp->_flags._READ == 0 && fp->_flags._WRITE == 0) // if both flags are zero
            break;
    if (fp >= _iob + OPEN_MAX) // no free slots
        return NULL;
    
    if (*mode == 'w')
        fd = creat(name, PERMS);
    else if (*mode == 'a')
    {
        if ((fd = open(name, O_WRONLY, 0)) == -1)
            fd = creat(name, PERMS);
        lseek(fd, 0L, 2);
    }
    else
        fd = open(name, O_RDONLY, 0);
    if (fd == -1) // couldn't access name
        return NULL;
    fp->fd = fd;
    fp->cnt = 0;
    fp->base = NULL;
    fp->_flags._READ = fp->_flags._WRITE = fp->_flags._UNBUF = fp->_flags._EOF = fp->_flags._ERR = 0;
    if (*mode == 'r')
        fp->_flags._READ = 1;
    else
        fp->_flags._WRITE = 1;
    return fp;
}

// allocate and fill input buffer
int _fillbuf(FILE *fp)
{
    int bufsize;
    if (fp->_flags._READ == 0 || fp->_flags._EOF == 1 || fp->_flags._ERR == 1)
        return EOF;
    bufsize = (fp->_flags._UNBUF == 1) ? 1 : BUFSIZ;
    if (fp->base == NULL) // no buffer yet
        if ((fp->base = (char *) malloc(bufsize)) == NULL)
            return EOF; // can't get buffer
    fp->ptr = fp->base;
    fp->cnt = read(fp->fd, fp->ptr, bufsize);
    if (--fp->cnt < 0)
    {
        if (fp->cnt == -1)
            fp->_flags._EOF = 1;
        else
            fp->_flags._ERR = 1;
        fp->cnt = 0;
        return EOF;
    }
    return (unsigned char) *fp->ptr++;
}
Personal tools