The C Programming Language, 2nd Edition, by Kernighan and Ritchie
Exercise 8.04 on page 179
The standard library function
int
fseek(FILE
*fp,
long
offset,
int
origin)
is identical to lseek
except that fp
is a file pointer instead of a file descriptor and the return value is an int
status, not a position. Write fseek
. Make sure that your fseek
coordinates properly with the buffering done for the other functions of the library.
Solutions by Gregory Pietsch
First solution:
/* Gregory Pietsch -- My category 0 solution to 8-4 */ int fseek(FILE *f, long offset, int whence) { if ((f->flag & _UNBUF) == 0 && base != NULL) { /* deal with buffering */ if (f->flag & _WRITE) { /* writing, so flush buffer */ fflush(f); /* from 8-3 */ } else if (f->flag & _READ) { /* reading, so trash buffer */ f->cnt = 0; f->ptr = f->base; } } return (lseek(f->fd, offset, whence) < 0); }
Second solution:
...which is considerably more comprehensive:
/* [The following solution is in the zip file as krx80401.c - RJH (ed.) ] EXERCISE 8-4 I thought I'd improve 8-4 too. I'm trying my best to get this as close to ISO C as possible given the restrictions that I'm under. (A real implementation would have fsetpos() borrow some of the same code.) */ /* Gregory Pietsch -- My category 0 solution to 8-4 */ #define SEEK_SET 0 #define SEEK_CUR 1 #define SEEK_END 2 int fseek(FILE *f, long offset, int whence) { int result; if ((f->flag & _UNBUF) == 0 && base != NULL) { /* deal with buffering */ if (f->flag & _WRITE) { /* writing, so flush buffer */ if (fflush(f)) return EOF; /* from 8-3 */ } else if (f->flag & _READ) { /* reading, so trash buffer -- * but I have to do some housekeeping first */ if (whence == SEEK_CUR) { /* fix offset so that it's from the last * character the user read (not the last * character that was actually read) */ if (offset >= 0 && offset <= f->cnt) { /* easy shortcut */ f->cnt -= offset; f->ptr += offset; f->flags &= ~_EOF; /* see below */ return 0; } else offset -= f->cnt; } f->cnt = 0; f->ptr = f->base; } } result = (lseek(f->fd, offset, whence) < 0); if (result == 0) f->flags &= ~_EOF; /* if successful, clear EOF flag */ return result; }
Solution by codybartfast (cat 0)
Thanks to Gregory for his second solution, I wouldn't have realised there were (at least)
two bugs in my original code without it.
#define isopen(p) ((p)->flag & (_READ | _WRITE)) int fseek(FILE *fp, long offset, int origin) { if (fp == NULL || !isopen(fp)) return EOF; if (fp->flag & _WRITE) { if (fflush(fp)) return EOF; } else { offset -= (origin == SEEK_CUR) ? fp->cnt : 0L; fp->cnt = 0; } if (lseek(fp->fd, offset, origin) == -1) { fp->flag |= _ERR; return EOF; } else { fp->flag &= ~_EOF; return 0; } }
The following program uses fseek to edit text in a file. Then the last part creates a
shorter sentence using fseek
with SEEK_CUR
while reading. This last part wouldn't
have worked with my orignal code because, as Gregory's second version implies:
- when the origin is
SEEK_CUR
the current position should be interpreted as the
position after the last charcter provided to the user, not the current position of
the underlying file descriptor, - the
EOF
flag should be cleared if we're no longer at the end of the file after seeking.
I think that to properly handle EOF
, the FILE
would need to contain the current
absolute position and the length of the file (so we know whether our current position
is the end of the file). If we did maintain that information then there are other
optimizations (other than the one in Gregor's second version) because we could know
whether locations relative to the start or end were already in our buffer.
int main(void) { FILE *temp; int i; char c, *s, tpath[] = "temp-8-4.txt"; char draft[] = "My Ham is Green and I like to Eat it.\n"; write(1, "Started with: ", 15); write(1, draft, 38); /* Write draft text to temp file */ if ((temp = fopen(tpath, "w")) == NULL) { write(2, "error: failed to open temp to write!\n", 37); return 1; } for (s = draft; *s; s++) putc(*s, temp); /* edit the temp file */ fseek(temp, 3L, SEEK_SET); for (s = "Dog"; *s; s++) putc(*s, temp); fseek(temp, 4L, SEEK_CUR); for (s = "Beige"; *s; s++) putc(*s, temp); fseek(temp, -8L, SEEK_END); for (s = "Pat"; *s; s++) putc(*s, temp); fclose(temp); write(1, "Ended with: ", 15); /* read temp file and write to stdout */ if ((temp = fopen(tpath, "r")) == NULL) { write(2, "error: failed to open temp to read!\n", 36); return 1; } while ((c = getc(temp)) != EOF) putchar(c); fflush(stdout); /* fseek with read and SEEK_CUR */ fseek(temp, 0, SEEK_SET); write(1, "Read SEEK_CUR: ", 15); for (i = 0; i < 22; i++) putchar(getc(temp)); fseek(temp, 8L, SEEK_CUR); while ((c = getc(temp)) != EOF) putchar(c); fclose(stdout); fclose(temp); return 0; }
Output:
Started with: My Ham is Green and I like to Eat it. Ended with: My Dog is Beige and I like to Pat it. Read SEEK_CUR: My Dog is Beige and I Pat it.
This output is the same as when using stdio.h
.
Full code on github
Solution by anonymous
My code is basically the same as the other solutions provided here, except that I have more error handling. If the parameters given pass all of the tests, it checks to see if it is a FILE open for writing. If so, it flushes the buffer before seeking. If it is a FILE open for reading and the position is based on current location within the file, a few things must happen.
First, I checked to see if you could just move within the buffer. If so, this saves the cost of seeking and reading. If the offset goes outside the bounds of the buffer, I had to account for the fact that the file's current position is at the end of the last read char in the buffer. Plus, I had to take into consideration that the program thinks the file is at position fp->ptr in the file. Since fp->cnt is the number of unread buffered input chars, I just used that to correct the offset. Finally, whenever a read FILE gets lseeked, the buffer needs to be reset. An easy way to accomplish this is to set the fp->cnt to zero. This causes getc to overwrite the existing buffer with chars at the new position the next time it is called.
The last thing to do was remove the EOF flag in the file if it was set. This is because if lseek did not return an error, it is legal to read/write at any position seeked too. This is true even if the position is past the end of the file. So just in case a program reads to the end of the file and then fseeks within that file, this flag needs turned off.
Here is my code
#include <fcntl.h> #include <unistd.h> #include <stdlib.h> #include <string.h> /* Exercise 8-4. The standard library function int fseek(FILE *fp, long offset, int origin) is identical to lseek except that fp is a file pointer instead of a file descriptor and the return value is an int status, not a position. Write fseek. Make sure that your fseek coordinates properly with the buffering done for the other functions of the library. */ #ifdef NULL #undef NULL #endif typedef struct _iobuf { int cnt; // characters left char *ptr; // next character position char *base; // location of buffer int flag; // mode of file access int fd; // file descriptor } FILE; enum _flags { _READ = 01, // file open for reading _WRITE = 02, // file open for writing _UNBUF = 04, // file is unbuffered _EOF = 010, // EOF has occurred on this file _ERR = 020 // error occurred on this file }; #define NULL 0 #define EOF (-1) #define BUFSIZ 1024 #define OPEN_MAX 20 // max #files open at once #define stdin (&_iob[0]) #define stdout (&_iob[1]) #define stderr (&_iob[2]) extern FILE _iob[OPEN_MAX]; int _fillbuf(FILE *fp); int _flushbuf(int c, FILE *fp); #define feof(p) (((p)->flag & _EOF) == _EOF) #define ferror(p) (((p)->flag & _ERR) == _ERR) #define fileno(p) ((p)->fd) #define getc(p) (--(p)->cnt >= 0 ? (unsigned char) *(p)->ptr++ : _fillbuf(p)) #define putc(x, p) (--(p)->cnt >= 0 ? *(p)->ptr++ = (x) : _flushbuf((x), p)) #define getchar() getc(stdin) #define putchar(x) putc((x), stdout) #define PERMS 0666 // RW for owner, group, others #define MAXERRORMSG 1500 FILE *fopen(char *name, char *mode); int fflush(FILE *fp); int fclose(FILE *fp); void error(char *msg); int fseek(FILE *fp, long offset, int origin); FILE _iob[OPEN_MAX] = // stdin, stdout, stderr { { 0, (char *) 0, (char *) 0, _READ, 0 }, { 0, (char *) 0, (char *) 0, _WRITE, 1 }, { 0, (char *) 0, (char *) 0, _WRITE | _UNBUF, 2 } }; int main(int argc, char *argv[]) { char msg[MAXERRORMSG]; FILE *fpIn, *fpOut; if (argc == 3) { if ((fpIn = fopen(*++argv, "r")) == NULL) // input file error(strcat(strcat(msg, "error: couldn't open file "), *argv)); // the double strcat adds "error..." to msg and then filename to msg after "error..." string if ((fpOut = fopen(*++argv, "w")) == NULL) // output file error(strcat(strcat(msg, "error: couldn't write to file "), *argv)); // the double strcat adds "error..." to msg and then filename to msg after "error..." string } else error("usage: ./myfseek input_file output_file"); if (fseek(fpIn, -1, SEEK_END) == EOF) error("failed to seek to last char of input file"); if (fseek(fpOut, 15, SEEK_CUR) == EOF) error("failed to seek 15 bytes past the start/end of the output file"); putc(getc(fpIn), fpOut); if (fseek(fpIn, 1, SEEK_SET) == EOF) error("failed to seek to first char of input file"); if (fseek(fpOut, -16, SEEK_END) == EOF) // if it was -17, lseek would trigger an error since it can't seek to before the file (-1) error("failed to seek 16 bytes back from the end of the output file (which is the start of the file)"); putc(getc(fpIn), fpOut); fclose(fpIn); // close the input file fclose(fpOut); // close the output file exit(0); } // opens file. Returns NULL if could not open file/bad mode provided, otherwise returns file ptr FILE *fopen(char *name, char *mode) { int fd; FILE *fp; if (*mode != 'r' && *mode != 'w' && *mode != 'a') return NULL; for (fp = _iob; fp < _iob + OPEN_MAX; fp++) if ((fp->flag & (_READ | _WRITE)) == 0) // if both _READ and _WRITE bits not set break; if (fp >= _iob + OPEN_MAX) // no free slots return NULL; if (*mode == 'w') fd = creat(name, PERMS); else if (*mode == 'a' && (fd = open(name, O_APPEND, 0)) == -1) // open in append mode. Causes write to always seek to the end before writing, regardless of lseek usage fd = creat(name, PERMS); // if files doesn't exist and creat is used to create it, it is possible for lseek to change the position of data written. // lseek(fd, 0L, 2); this is unnecessary else fd = open(name, O_RDONLY, 0); if (fd == -1) // couldn't access name return NULL; fp->fd = fd; fp->cnt = 0; fp->base = NULL; fp->flag = (*mode == 'r') ? _READ : _WRITE; // remove all flags and set only _READ or _WRITE return fp; } // allocate and fill input buffer. If error or EOF, return EOF, otherwise return next char in buffer int _fillbuf(FILE *fp) { if ((fp->flag & (_READ | _EOF | _ERR)) != _READ) // if _READ is not set or _EOF or _ERR is set return EOF; // only _READ should be set out of those three. Return EOF int bufsize = (fp->flag & _UNBUF) ? 1 : BUFSIZ; // get buffer size if (fp->base == NULL) // no buffer yet if ((fp->base = (char *) malloc(bufsize)) == NULL) // create buffer return EOF; // failed to create buffer, return EOF fp->ptr = fp->base; // reset ptr to base since all chars in buffer have already been read fp->cnt = read(fp->fd, fp->ptr, bufsize); // store number of chars read from fd in cnt. Overwrite buffer with up to bufsize number of chars in buffer pointed to by ptr if (--fp->cnt < 0) // if cnt - 1 is less than 0 { if (fp->cnt == -1) // if == -1, reached EOF fp->flag |= _EOF; // turn on _EOF bit else fp->flag |= _ERR; // if < -1, error occurred turn on _ERR bit fp->cnt = 0; // reset number to indicate no chars left without having negative number return EOF; } return (unsigned char) *fp->ptr++; // if read was successful, return char read from input } // calls fflush to write unread buffered data to output. Stores char in new buffer. Returns EOF if error, otherwise 0 int _flushbuf(int c, FILE *fp) { if (fp == NULL) return EOF; // invalid pointer provided else if (fflush(fp) == EOF) return EOF; // an error occurred in fflush *fp->ptr++ = (char) c; // store provided char in buffer. Make sure to cast to the int to char since buffer is based on size of char fp->cnt--; // update the counter for the number of chars that can fit in the buffer (just stored one) return 0; } // if fp is write FILE, writes unwritten buffer to output. if fp == NULL, flushes all write FILES. Returns EOF if error or read FILE provided, otherwise 0. int fflush(FILE *fp) { if (fp == NULL) // if fp == NULL, then flush all buffers { int result = 0; for (int i = 0; i < OPEN_MAX; i++) // go through all FILEs in _iob. Can't intelligently break from loop early since an older FILE can be closed before a newer one if (((&_iob[i])->flag & _WRITE) == _WRITE && fflush(&_iob[i]) == EOF) // if _WRITE flag set, flush it. Also, if fflush returns error, update result result = EOF; return result; // if any error occurred, return EOF, otherwise return 0 } else if (fp < _iob || fp >= _iob + OPEN_MAX) return EOF; // invalid pointer provided else if ((fp->flag & (_WRITE | _ERR | _READ)) != _WRITE) // if _WRITE is not set or _ERR or _READ is set. return EOF; // only _WRITE should be set out of those three. Return EOF int bufsize = (fp->flag & _UNBUF) ? 1 : BUFSIZ; // get buffer size if (fp->base == NULL) // no buffer yet so nothing to flush. Create buffer to store provided char { if ((fp->base = (char *) malloc(bufsize)) == NULL) { fp->flag |= _ERR; // turn on _ERR bit return EOF; // malloc failed to allocate a buffer } } else // buffer exists, so write contents to file { int n = fp->ptr - fp->base; // gets number of characters in buffer if (write(fp->fd, fp->base, n) != n) { fp->flag |= _ERR; // turn on _ERR bit return EOF; // error writing buffer, return EOF } } fp->ptr = fp->base; // reset ptr to base of buffer since data was already written or the buffer was just created fp->cnt = bufsize; // update the counter for the number of chars that can fit in the buffer so putc will work correctly return 0; } // flushes any unwritten data for fp, discards any unread buffered input, frees any automatically allocated buffer, then closes the file. Returns EOF if error, otherwise 0 int fclose(FILE *fp) { int result = 0; if (fp == NULL || fp < _iob || fp >= _iob + OPEN_MAX) return EOF; // invalid pointer provided if ((fp->flag & _WRITE) == _WRITE) // if file is for writing result = fflush(fp); // flush anything in buffer to output and store result if (fp->base != NULL) // if there is a buffer that malloc allocated (applies to both read and write FILEs) free(fp->base); // free it since we don't want a memory leak if (close(fp->fd) != 0) // close file (note: it's legal to close stdin, stdout, and stderr. Just reopen them (e.g. freopen) or exit program afterwards to prevent issues) result = EOF; // close returns 0 if no errors. So update result to error since it failed to close fp->cnt = fp->flag = 0; // reset cnt and flag to 0 fp->fd = -1; // reset fd to -1 (which indicates error and should prevent improper usage of this FILE's fd) fp->ptr = fp->base = NULL; // and reset the pointers to NULL. All of this resetting makes the FILE object available for future fopen calls return result; // defaults to 0 unless fflush or close returned an error } // print an error message and exit. Since normal printf messages aren't coded, use putc for stderr void error(char *msg) { fflush(NULL); // flush all buffers before program exits and error is printed while (*msg != '\0') putc(*msg++, stderr); // write error chars to stderr putc('\n', stderr); // finish it off with '\n', just in case fflush(stderr); // make sure error message is printed exit(1); // close program } // sets file position specified by offset and origin; subsequent read or write calls will access data at the new position. returns EOF is error, otherwise 0 int fseek(FILE *fp, long offset, int origin) { if (fp == NULL || fp < _iob || fp >= _iob + OPEN_MAX) return EOF; // invalid pointer provided else if (origin != SEEK_SET && origin != SEEK_CUR && origin != SEEK_END) return EOF; // invalid origin flag else if ((fp->flag & (_READ | _WRITE)) == 0 || (fp->flag & (_READ | _WRITE)) == _READ + _WRITE) return EOF; // niether _READ nor _WRITE set or both are set if ((fp->flag & _WRITE) == _WRITE) // _WRITE flag set { if (fflush(fp) == EOF) // flush write buffer before seeking return EOF; // if fflush returns error, return EOF } else // _READ flag set { // the FILE already read bufsize chars by calling read() and stored them in a buffer. However, some chars weren't read by the program and seek doesn't know this if (origin == SEEK_CUR) // Consequently, the program doesn't know the current position in the file. Fortunately, fp->cnt is the number of unread chars in the buffer { // so if seeking based on current location (origin == SEEK_CUR), the offset needs to be subtracted by the number of unread buffered chars if (offset >= 0 && offset <= fp->cnt) // However, if offset is still in the buffered chars, just move within buffer to prevent unnecessary seeks and reads { fp->ptr += offset; // move pointer to the new position in the buffer by the offset fp->cnt -= offset; // update number of chars left in buffer after the move return 0; // return 0 to indicate no errors occurred } offset -= fp->cnt; // since not within the buffer, subtract cnt away from the provided offset (negative numbers are okay!). } fp->cnt = 0; // finally, update cnt make the buffer get discarded later. When cnt == 0, getc calls _fillbuf which overwrites the buffer with new content } if (lseek(fp->fd, offset, origin) == -1) // if lseek returns -1, an error occurred { fp->flag |= _ERR; // so update the _ERR flag and return EOF return EOF; } fp->flag &= ~_EOF; // since seek didn't return an error, unset EOF flag. Note: it's legal to seek past the end of a file, so turn off EOF flag when lseek is error free return 0; }