The C Programming Language, 2nd Edition, by Kernighan and Ritchie
Exercise 5.13 on page 118
Write the program tail,
which prints the last n
lines of its input. By default, n
is 10, say, but it can be changed by an optional argument, so that
tail -n
prints the last n
lines. The program should behave rationally no matter how unreasonable the input or the value of n.
Write the program so it makes the best use of available storage; lines should be stored as in the sorting program of Section 5.6, not in a two-dimensional array of fixed size.
Solution by Gregory Pietsch
/**************************************************************************** tail.c - Source code for the tail command AUTHOR: Gregory Pietsch <gkp1@flash.net> DESCRIPTION: tail prints the last part of each file on the command line (10 lines by default); it reads from standard input if no files are given or when a filename of `-' is encountered. If more than one file is given, it prints a header consisting of the file's name enclosed in `==>' and `<==' before the output for each file. There are two option formats for tail: the new one, in which numbers are arguments to the option letters; and the old one, in which the number precedes any option letters. In this version, the old format is barely supported. Supporting it fully is left as an exercise to the reader ;-). GNU's -f (or --follow) option is not supported. With that option, the program loops forever on the assumption that the file being tailed is growing. I couldn't figure out how to determine if the program is reading from a pipe in ANSI C; this option is ignored if reading from a pipe. ****************************************************************************/ /* include files */ #include <ctype.h> #include <stdio.h> #include <stdlib.h> #include <string.h> /* macros */ #define NO_ARG 0 #define REQUIRED_ARG 1 #define OPTIONAL_ARG 2 /* how many characters will fill one's tail (literally) */ #define TAIL_BUFFER_SIZE 16384 /* how much for a string buffer */ #define TAIL_STRING_BUFFER_SIZE 256 /* need MIN */ #ifndef MIN #define MIN(x,y) ((x)<(y)?(x):(y)) #endif /* types */ typedef enum VERBOSITY_T { NEVER, SOMETIMES, ALWAYS } VERBOSITY_T; typedef struct LINE_QUEUE_EL_T { char *s; struct LINE_QUEUE_EL_T *next; } LINE_QUEUE_EL_T; typedef struct LINE_QUEUE_T { struct LINE_QUEUE_EL_T *first; struct LINE_QUEUE_EL_T *last; unsigned long num_elements; } LINE_QUEUE_T; /* GETOPT_LONG_OPTION_T: The type of long option */ typedef struct GETOPT_LONG_OPTION_T { char *name; /* the name of the long option */ int has_arg; /* one of the above macros */ int *flag; /* determines if getopt_long() returns a * value for a long option; if it is * non-NULL, 0 is returned as a function * value and the value of val is stored in * the area pointed to by flag. Otherwise, * val is returned. */ int val; /* determines the value to return if flag is * NULL. */ } GETOPT_LONG_OPTION_T; typedef enum GETOPT_ORDERING_T { PERMUTE, RETURN_IN_ORDER, REQUIRE_ORDER } GETOPT_ORDERING_T; /* globally-defined variables */ char *optarg = NULL; int optind = 0; int opterr = 1; int optopt = '?'; /* statically-defined variables */ static int show_help = 0; static int show_version = 0; static char *shortopts = "c:l:n:qv"; static GETOPT_LONG_OPTION_T longopts[] = { {"bytes", REQUIRED_ARG, NULL, 'c'}, {"lines", REQUIRED_ARG, NULL, 'n'}, {"quiet", NO_ARG, NULL, 'q'}, {"silent", NO_ARG, NULL, 'q'}, {"verbose", NO_ARG, NULL, 'v'}, {"help", NO_ARG, &show_help, 1}, {"version", NO_ARG, &show_version, 1}, {NULL, 0, 0, 0} }; static char *program_name; static int flag_bytes = 0; static VERBOSITY_T flag_verbosity = SOMETIMES; static unsigned long number = 0; static int flag_skip = 0; /* functions */ /* reverse_argv_elements: reverses num elements starting at argv */ static void reverse_argv_elements(char **argv, int num) { int i; char *tmp; for (i = 0; i < (num >> 1); i++) { tmp = argv[i]; argv[i] = argv[num - i - 1]; argv[num - i - 1] = tmp; } } /* permute: swap two blocks of argv-elements given their lengths */ static void permute(char **argv, int len1, int len2) { reverse_argv_elements(argv, len1); reverse_argv_elements(argv, len1 + len2); reverse_argv_elements(argv, len2); } /* is_option: is this argv-element an option or the end of the option list? */ static int is_option(char *argv_element, int only) { return ((argv_element == NULL) || (argv_element[0] == '-') || (only && argv_element[0] == '+')); } /* getopt_internal: the function that does all the dirty work */ static int getopt_internal(int argc, char **argv, char *shortopts, GETOPT_LONG_OPTION_T * longopts, int *longind, int only) { GETOPT_ORDERING_T ordering = PERMUTE; static size_t optwhere = 0; size_t permute_from = 0; int num_nonopts = 0; int optindex = 0; size_t match_chars = 0; char *possible_arg = NULL; int longopt_match = -1; int has_arg = -1; char *cp; int arg_next = 0; /* first, deal with silly parameters and easy stuff */ if (argc == 0 || argv == NULL || (shortopts == NULL && longopts == NULL)) return (optopt = '?'); if (optind >= argc || argv[optind] == NULL) return EOF; if (strcmp(argv[optind], "--") == 0) { optind++; return EOF; } /* if this is our first time through */ if (optind == 0) optind = optwhere = 1; /* define ordering */ if (shortopts != NULL && (*shortopts == '-' || *shortopts == '+')) { ordering = (*shortopts == '-') ? RETURN_IN_ORDER : REQUIRE_ORDER; shortopts++; } else ordering = (getenv("POSIXLY_CORRECT") != NULL) ? REQUIRE_ORDER : PERMUTE; /* * based on ordering, find our next option, if we're at the beginning of * one */ if (optwhere == 1) { switch (ordering) { case PERMUTE: permute_from = optind; num_nonopts = 0; while (!is_option(argv[optind], only)) { optind++; num_nonopts++; } if (argv[optind] == NULL) { /* no more options */ optind = permute_from; return EOF; } else if (strcmp(argv[optind], "--") == 0) { /* no more options, but have to get `--' out of the way */ permute(argv + permute_from, num_nonopts, 1); optind = permute_from + 1; return EOF; } break; case RETURN_IN_ORDER: if (!is_option(argv[optind], only)) { optarg = argv[optind++]; return (optopt = 1); } break; case REQUIRE_ORDER: if (!is_option(argv[optind], only)) return EOF; break; } } /* we've got an option, so parse it */ /* first, is it a long option? */ if (longopts != NULL && (memcmp(argv[optind], "--", 2) == 0 || (only && argv[optind][0] == '+')) && optwhere == 1) { /* handle long options */ if (memcmp(argv[optind], "--", 2) == 0) optwhere = 2; longopt_match = -1; possible_arg = strchr(argv[optind] + optwhere, '='); if (possible_arg == NULL) { /* no =, so next argv might be arg */ match_chars = strlen(argv[optind]); possible_arg = argv[optind] + match_chars; match_chars = match_chars - optwhere; } else match_chars = (possible_arg - argv[optind]) - optwhere; for (optindex = 0; longopts[optindex].name != NULL; optindex++) { if (memcmp(argv[optind] + optwhere, longopts[optindex].name, match_chars) == 0) { /* do we have an exact match? */ if (match_chars == (int)(strlen(longopts[optindex].name))) { longopt_match = optindex; break; } /* do any characters match? */ else { if (longopt_match < 0) longopt_match = optindex; else { /* we have ambiguous options */ if (opterr) fprintf(stderr, "%s: option `%s' is ambiguous " "(could be `--%s' or `--%s')\n", argv[0], argv[optind], longopts[longopt_match].name, longopts[optindex].name); return (optopt = '?'); } } } } if (longopt_match >= 0) has_arg = longopts[longopt_match].has_arg; } /* if we didn't find a long option, is it a short option? */ if (longopt_match < 0 && shortopts != NULL) { cp = strchr(shortopts, argv[optind][optwhere]); if (cp == NULL) { /* couldn't find option in shortopts */ if (opterr) fprintf(stderr, "%s: invalid option -- `-%c'\n", argv[0], argv[optind][optwhere]); optwhere++; if (argv[optind][optwhere] == '\0') { optind++; optwhere = 1; } return (optopt = '?'); } has_arg = ((cp[1] == ':') ? ((cp[2] == ':') ? OPTIONAL_ARG : REQUIRED_ARG) : NO_ARG); possible_arg = argv[optind] + optwhere + 1; optopt = *cp; } /* get argument and reset optwhere */ arg_next = 0; switch (has_arg) { case OPTIONAL_ARG: if (*possible_arg == '=') possible_arg++; if (*possible_arg != '\0') { optarg = possible_arg; optwhere = 1; } else optarg = NULL; break; case REQUIRED_ARG: if (*possible_arg == '=') possible_arg++; if (*possible_arg != '\0') { optarg = possible_arg; optwhere = 1; } else if (optind + 1 >= argc) { if (opterr) { fprintf(stderr, "%s: argument required for option `", argv[0]); if (longopt_match >= 0) fprintf(stderr, "--%s'\n", longopts[longopt_match].name); else fprintf(stderr, "-%c'\n", *cp); } optind++; return (optopt = ':'); } else { optarg = argv[optind + 1]; arg_next = 1; optwhere = 1; } break; case NO_ARG: if (longopt_match < 0) { optwhere++; if (argv[optind][optwhere] == '\0') optwhere = 1; } else optwhere = 1; optarg = NULL; break; } /* do we have to permute or otherwise modify optind? */ if (ordering == PERMUTE && optwhere == 1 && num_nonopts != 0) { permute(argv + permute_from, num_nonopts, 1 + arg_next); optind = permute_from + 1 + arg_next; } else if (optwhere == 1) optind = optind + 1 + arg_next; /* finally return */ if (longopt_match >= 0) { if (longind != NULL) *longind = longopt_match; if (longopts[longopt_match].flag != NULL) { *(longopts[longopt_match].flag) = longopts[longopt_match].val; return 0; } else return longopts[longopt_match].val; } else return optopt; } int getopt_long(int argc, char **argv, char *shortopts, GETOPT_LONG_OPTION_T * longopts, int *longind) { return getopt_internal(argc, argv, shortopts, longopts, longind, 0); } void help(void) { puts( "OPTIONS" ); puts( "" ); puts( "-c N, --bytes N Print last N bytes. N is a nonzero integer," ); puts( " optionally followed by one of the following" ); puts( " characters:" ); puts( "" ); puts( " b 512-byte blocks." ); puts( " k 1-kilobyte blocks." ); puts( " m 1-megabyte blocks." ); puts( "" ); puts( "-N, -l N, -n N, Print last N lines." ); puts( "--lines N" ); puts( "" ); puts( "-q, --quiet, Never print filename headers. Normally, filename" ); puts( "--silent headers are printed if and only if more than one file" ); puts( " is given on the command line." ); puts( "" ); puts( "-v, --verbose Always print filename headers." ); puts( "" ); puts( "--help Print usage message and exit successfully."); puts( "" ); puts( "--version Print version information and exit successfully." ); } void version(void) { puts( "tail - output the last part of files" ); puts( "Version 1.0" ); puts( "Written by Gregory Pietsch" ); } /* allocate memory, die on error */ void *xmalloc(size_t n) { void *p = malloc(n); if (p == NULL) { fprintf(stderr, "%s: out of memory\n", program_name); exit(EXIT_FAILURE); } return p; } /* reallocate memory, die on error */ void *xrealloc(void *p, size_t n) { void *s; if (n == 0) { if (p != NULL) free(p); return NULL; } if (p == NULL) return xmalloc(n); s = realloc(p, n); if (s == NULL) { fprintf(stderr, "%s: out of memory\n", program_name); exit(EXIT_FAILURE); } return s; } /* get string duplicate */ char *xstrdup(char *s) { char *p = xmalloc(strlen(s) + 1); strcpy(p, s); return p; } /* queue stuff - get fresh queue */ LINE_QUEUE_T *lq_create(void) { LINE_QUEUE_T *lq = xmalloc(sizeof LINE_QUEUE_T); lq->first = NULL; lq->last = NULL; lq->num_elements = 0; return lq; } /* put an item onto the queue */ void lq_enq(LINE_QUEUE_T * lq, char *s) { LINE_QUEUE_EL_T *lq_el = xmalloc(sizeof LINE_QUEUE_EL_T); lq_el->s = xstrdup(s); lq_el->next = NULL; if (lq->first == NULL && lq->last == NULL) { /* first element */ lq->first = lq->last = lq_el; lq->num_elements = 1; } else { /* tack onto end */ lq->last->next = lq_el; lq->last = lq_el; lq->num_elements++; } } /* take an item off the queue */ char *lq_deq(LINE_QUEUE_T * lq) { char *s; LINE_QUEUE_EL_T *lq_el; if (lq->first == NULL) return NULL; lq_el = lq->first; s = lq_el->s; if (lq->first == lq->last) lq->first = lq->last = NULL; else lq->first = lq->first->next; free(lq_el); lq->num_elements--; return s; } /* output number lines -- this function is tough because I can only * use fseek() to rewind a text stream (See ISO C 7.9.9.2 if you don't * believe Richard Heathfield). */ void tail_lines(FILE * f) { char buffer[TAIL_BUFFER_SIZE]; size_t num_read; int last_is_nl = 0; unsigned long num_skipped = 0; int c; LINE_QUEUE_T *lq = NULL; char *s; size_t s_size = 0; size_t s_allocked = 0; char *p; if (flag_skip) { /* skip a bunch of lines, output everything else */ while ((c = getc(f)) != EOF && num_skipped < number) { if (c == '\n') num_skipped++; } while ((num_read = fread(buffer, 1, TAIL_BUFFER_SIZE, f)) != 0) { fwrite(buffer, 1, num_read, stdout); last_is_nl = (buffer[num_read - 1] == '\n'); } if (!last_is_nl) putchar('\n'); } else { lq = lq_create(); s = xmalloc(TAIL_STRING_BUFFER_SIZE); s_allocked = TAIL_STRING_BUFFER_SIZE; while ((c = getc(f)) != EOF) { /* add to s, if not at eof or end of line */ if (c != '\n') { if (s_size == s_allocked - 1) { s_allocked += TAIL_STRING_BUFFER_SIZE; s = xrealloc(s, s_allocked); } s[s_size++] = c; } else { /* enqueue s, possibly dequeueing if we don't need a line */ s[s_size] = '\0'; lq_enq(lq, s); if (lq->num_elements > number) free(lq_deq(lq)); s_size = 0; } } while (lq->num_elements != 0) { /* print out strings */ p = lq_deq(lq); puts(p); free(p); } free(s); free(lq); } } /* output number characters, or skip over number characters */ void tail_chars(FILE * f) { char buffer[TAIL_BUFFER_SIZE]; size_t num_read; int last_is_nl = 0; long lnum = number; if (flag_skip) fseek(f, lnum, SEEK_SET); else fseek(f, -lnum, SEEK_END); while ((num_read = fread(buffer, 1, TAIL_BUFFER_SIZE, f)) != 0) { fwrite(buffer, 1, num_read, stdout); last_is_nl = (buffer[num_read - 1] == '\n'); } if (!last_is_nl) putchar('\n'); } void parse_args(int argc, char **argv) { int opt; char *p; int flag_found_number = 0; int verbosity_changed = 0; do { switch ((opt = getopt_long(argc, argv, shortopts, longopts, NULL))) { case 'c': /* print bytes */ if (flag_found_number) { fprintf(stderr, "%s: invalid arguments\s", program_name); abort(); } flag_bytes = 1; p = optarg; if (*p == '+') { flag_skip = 1; p++; } for (number = 0; isdigit(*p); number = number * 10 + (*p++ - '0')); switch (*p) { case 'b': /* 512-byte blocks */ number *= 512; break; case 'k': /* kilobyte blocks */ number *= 1024; break; case 'm': /* megabyte blocks */ number *= 1048576; break; default: break; } flag_found_number = 1; break; case 'l': case 'n': /* lines */ if (flag_found_number) { fprintf(stderr, "%s: invalid arguments\s", program_name); abort(); } flag_bytes = 0; p = optarg; if (*p == '+') { flag_skip = 1; p++; } number = strtoul(p, NULL, 10); flag_found_number = 1; break; case 'q': /* quiet */ if (verbosity_changed) { fprintf(stderr, "%s: invalid arguments\s", program_name); abort(); } verbosity_changed = 1; flag_verbosity = NEVER; break; case 'v': /* verbose */ if (verbosity_changed) { fprintf(stderr, "%s: invalid arguments\s", program_name); abort(); } verbosity_changed = 1; flag_verbosity = ALWAYS; break; case '?': /* invalid option */ fprintf(stderr, "For help, type:\n\t%s --help\n", program_name); exit(EXIT_FAILURE); case 1: case 0: if (show_help || show_version) { if (show_help) help(); if (show_version) version(); exit(EXIT_SUCCESS); } break; default: break; } } while (opt != EOF); if (flag_found_number == 0 || number == 0) { /* didn't find anything, so set default */ flag_bytes = 0; number = 10; } } int main(int argc, char **argv) { int i; int j; unsigned long ul; char **new_argv = xmalloc((argc + 1) * (sizeof(char *))); char *allocked_argvs = xmalloc(argc + 1); char *p; char *s; char *t; FILE *f; int flag_plus = 0; memset(allocked_argvs, 0, argc + 1); new_argv[0] = program_name = argv[0]; /* deal with silly old-format arguments */ for (i = 1, j = 1; i < argc; i++) { p = argv[i]; flag_plus = 0; /* handle options first */ if (*p == '-' || *p == '+') { if (isdigit(p[1]) || p[1] == '+' || *p == '+') { /* rearrange p */ s = xmalloc(strlen(p) + 3); t = s; *t++ = '-'; if (*p == '-') p++; ul = 0; if (*p == '+') { flag_plus = 1; p++; } while (isdigit(*p)) { ul = ul * 10 + (*p - '0'); p++; } if (strchr(p, 'q') != NULL) *t++ = 'q'; if (strchr(p, 'v') != NULL) *t++ = 'v'; if (strpbrk(p, "cbkm") != NULL) *t++ = 'c'; if (strchr(p, 'l') != NULL) *t++ = 'l'; if (strchr(p, 'n') != NULL || t[-1] == '-') *t++ = 'n'; if (flag_plus) *t++ = '+'; sprintf(t, "%lu", ul); t += strlen(t); if (strchr(p, 'b') != NULL) *t++ = 'b'; if (strchr(p, 'k') != NULL) *t++ = 'k'; if (strchr(p, 'm') != NULL) *t++ = 'm'; *t = '\0'; new_argv[j] = s; allocked_argvs[j++] = 1; } else new_argv[j++] = argv[i]; } } for (i = 1; i < argc; i++) { /* handle file names */ p = argv[i]; if (*p != '-') new_argv[j++] = p; } new_argv[argc] = NULL; parse_args(argc, new_argv); if (optind == argc || (optind == argc - 1 && strcmp(argv[optind], "-") == 0)) { /* no more argv-elements, tail stdin */ if (flag_verbosity == ALWAYS) puts("==> standard input <=="); flag_bytes ? tail_chars(stdin) : tail_lines(stdin); } else if (optind == argc - 1) { /* one file */ f = fopen(new_argv[optind], flag_bytes ? "rb" : "r"); if (f == NULL) { fprintf(stderr, "%s: Can't open file %s\n", program_name, new_argv[optind]); abort(); } if (flag_verbosity == ALWAYS) printf("==> %s <==\n", new_argv[optind]); flag_bytes ? tail_chars(f) : tail_lines(f); fclose(f); } else { /* multiple files */ for (i = optind; i < argc; i++) { if (strcmp(new_argv[i], "-") == 0) { f = stdin; if (flag_verbosity != NEVER) puts("==> standard input <=="); } else { f = fopen(new_argv[i], flag_bytes ? "rb" : "r"); if (f == NULL) { fprintf(stderr, "%s: can't open %s\n", argv[0], argv[i]); abort(); } if (flag_verbosity != NEVER) printf("==> %s <==\n", new_argv[i]); } flag_bytes ? tail_chars(f) : tail_lines(f); if (f != stdin) fclose(f); } } /* free all we can */ for (i = 1; i <= argc; i++) if (allocked_argvs[i]) free(new_argv[i]); free(allocked_argvs); return EXIT_SUCCESS; } /* END OF FILE tail.c */
Solution by Steven Huang
/* K&R Exercise 5-13 */ /* Steven Huang */ #include <stdio.h> #include <stdlib.h> #include <string.h> #define DEFAULT_NUM_LINES 10 #define MAX_LINE_LEN 1000 /* Points of interest for a novice: 1. atoi() has a normally annoying property of not being able to tell the caller conclusively whether the input was bad ("abc") or it was really zero ("0"), because it returns 0 for both cases. Here, we exploit that property, because we only want to accept options in the form of "-n". 2. Try to understand how this program deals with input that doesn't even have as many lines as the line_ptrs[] array. That is, how does this program degenerate into just displaying everything it read? (Hint: what does it mean when line_ptrs[x] is NULL?) 3. Using modulo arithmetic on an index to a circular array is a common and useful technique. Try to understand the range of values that current_line (and j, later) will take. In particular, why shouldn't we just do this: for (i = 0; i < num_lines; i++) if (line_ptrs[i]) printf("%s", line_ptrs[i]); 4. Why do we still use a "%s" to display what's inside line_ptrs, rather than just: printf(line_ptrs[i]); 5. There is a bug in this program, where you see: numlines = -numlines; When will this break? */ /* K&R2 p29 */ int getline(char s[], int lim) { int c, i; for (i = 0; i < lim - 1 && (c = getchar()) != EOF && c != '\n'; i++) s[i] = c; if (c == '\n') s[i++] = c; s[i] = '\0'; return i; } /* duplicates a string */ char *dupstr(const char *s) { char *p = malloc(strlen(s) + 1); if (p) strcpy(p, s); return p; } int main(int argc, char *argv[]) { int num_lines = DEFAULT_NUM_LINES; char **line_ptrs; char buffer[MAX_LINE_LEN]; int i; unsigned j, current_line; if (argc > 1) { /* We use a little trick here. The command line parameter should be in the form of "-n", where n is the number of lines. We don't check for the "-", but just pass it to atoi() anyway, and then check if atoi() returned us a negative number. */ num_lines = atoi(argv[1]); if (num_lines >= 0) { fprintf(stderr, "Expected -n, where n is the number of lines\n"); return EXIT_FAILURE; } /* Now make num_lines the positive number it's supposed to be. */ num_lines = -num_lines; } /* First, let's get enough storage for a list of n pointers... */ line_ptrs = malloc(sizeof *line_ptrs * num_lines); if (!line_ptrs) { fprintf(stderr, "Out of memory. Sorry.\n"); return EXIT_FAILURE; } /* and make them all point to NULL */ for (i = 0; i < num_lines; i++) line_ptrs[i] = NULL; /* Now start reading */ current_line = 0; do { getline(buffer, sizeof buffer); if (!feof(stdin)) { if (line_ptrs[current_line]) { /* there's already something here */ free(line_ptrs[current_line]); } line_ptrs[current_line] = dupstr(buffer); if (!line_ptrs[current_line]) { fprintf(stderr, "Out of memory. Sorry.\n"); return EXIT_FAILURE; } current_line = (current_line + 1) % num_lines; } } while (!feof(stdin)); /* Finished reading the file, so we are ready to print the lines */ for (i = 0; i < num_lines; i++) { j = (current_line + i) % num_lines; if (line_ptrs[j]) { printf("%s", line_ptrs[j]); free(line_ptrs[j]); } } return EXIT_SUCCESS; }
Solution by Jesus Alvarez (Category 0)
#include <stdio.h> #include <stdlib.h> #define MAXLINES 1024 #define MAXINPUT 10240 #define DEF_NUM_LINES 10 int getlines (char *); void parse_args (int, char **); char linestr[MAXINPUT]; char *lineptr[MAXLINES]; int num_of_lines = DEF_NUM_LINES; int main (int argc, char *argv[]) { int lines; int ltp = 0; int i; int c; *lineptr = linestr; /* Parse the argument strings passed to the program */ if (argc > 1) { parse_args(argc, argv); } /* Get the input from the user */ lines = getlines(linestr); if (num_of_lines == 0) { num_of_lines = 10; } ltp = lines < num_of_lines ? lines : num_of_lines; printf ("\n>>> Printing %d line(s):\n", ltp); if (ltp == DEF_NUM_LINES) { printf (">>> The default number of lines to print is "); printf ("%d\n", DEF_NUM_LINES); } printf ("\n"); for (i = lines; i > 0; i--) { while ((c = *lineptr[0]++) != '\n') { if (i <= ltp) { printf ("%c", c); } } if (c == '\n' && i <= ltp) { printf ("\n"); } } return 0; } int getlines (char *buffer) { int i, count = 0; char c; for (i = 0; (c = getchar()) != EOF && i < MAXINPUT; i++) { *buffer++ = c; if (c == '\n') { lineptr[++count] = buffer; } } *buffer++ = '\0'; return count; } void parse_args (int argc, char **argv) { char c; while (--argc > 0 && (*++argv)[0] == '-') { c = *++argv[0]; switch (c) { case 'n': num_of_lines = atoi(*(argv + 1)); break; } } }
Solution by [blob84] (Category 0)
It is possible to use the code at ¶5.6
#include <stdio.h> #include <string.h> #include <stdlib.h> #define MAXLINES 5000 /* max #lines to be sorted */ #define MAXN 100 char *lineptr[MAXLINES]; /* pointers to text lines */ int readlines(char *lineptr[], int nlines); void writelines(char *lineptr[], int nlines); /* sort input lines */ int main(int argc, char **argv) { int c, nlines, iline = 0, n = 0; /* number of input lines read */ char getn[MAXN]; char *s = getn; if (argc == 1) iline = 10; else if (argc == 2 && (*++argv)[0] == '-') { if ((c = *++argv[0]) == 'n') { if (isdigit(*s = *++argv[0])) { while (isdigit(*++s = *++argv[0])) ; if (*argv[0] != '\0') { printf("error 1:\n"); return 1; } *s = '\0'; iline = atoi(getn); } else { printf("error 2:\n"); return 1; } } else { printf("error 3:\n"); return 1; } } else if (argc == 3 && (*++argv)[0] == '-') { if (*++argv[0] == 'n' && isdigit(c = *argv[1])) iline = atoi(argv[1]); else { printf("error 4:\n"); return 1; } } if ((nlines = readlines(lineptr, MAXLINES)) >= 0) { if (nlines-iline > 0) writelines(lineptr+(nlines-iline), iline); else writelines(lineptr, nlines); return 0; } else { printf("error: input too big to sort\n"); return 1; } } #define MAXLEN 1000 /* max length of any input line */ int pgetline(char *, int); char *alloc(int); /* readlines: read input lines */ int readlines(char *lineptr[], int maxlines) { int len, nlines; char *p, line[MAXLEN]; nlines = 0; while ((len = pgetline(line, MAXLEN)) > 0) if (nlines >= maxlines || (p = alloc(len)) == NULL) return -1; else { line[len-1] = '\0'; /* delete newline */ strcpy(p, line); lineptr[nlines++] = p; } return nlines; } /* writelines: write output lines */ void writelines(char *lineptr[], int nlines) { while (nlines-- > 0) printf("%s\n", *lineptr++); } int pgetline(char *s, int lim) { int c; char *ps = s; while (--lim > 0 && (c = getchar()) != EOF && c != '\n') *ps++ = c; if (c == '\n') *ps++ = c; *ps = '\0'; return ps-s; } #define ALLOCSIZE 10000 static char allocbuf[ALLOCSIZE]; static char *allocp = allocbuf; char *alloc(int n) { if (allocbuf + ALLOCSIZE - allocp >= n) { allocp += n; return allocp - n; } else return 0; } void afree(char *p) { if (p >= allocbuf && p < allocbuf + ALLOCSIZE) allocp = p; }
Solution by Sosnovski Eugene (Category 0)
/*******************************************************************************/ /* the first thing that came to mind - take program from Chapter 5.6 , */ /* and return last N rows: */ /* Creating "char allocbuf[]", on each string call "p = alloc (len)", */ /* copy string to p and wrint lineptr[i] = p (in array of pointers). */ /* */ /* But . In exercise said about the "maximum memory savin"! Lack the proposed */ /* method is that "allocbuf" spent irrationally. Suppose it was filling, but */ /* stdio has any strings. Why not "erase" all string except */ /* the last n (interest strings) and continue to work? Simply shift */ /* the last n strings (by character) in the "start allocbuf" (remember the */ /* difference = `diff`), and in array of pointer shift interest pointers */ /* (last n) to the `diff` values. */ /*******************************************************************************/ #include <stdio.h> #include <string.h> #include <stdlib.h> #define DEFAULT_STRING_NUM 10 #define MAXLEN 1000 #define MAXLINES 5000 char *lineptr[MAXLINES]; int getline(char *, int); char *smartalloc(int n, char *begin); int readlines(char *linestack[], int tail_num); void writelines(char *lineptr[], int tail_num, int nlines); void writespecific(char *lineptr[]); int main(int argc, char *argv[]) { int num, nlines; if (argc == 3 && strcmp(*(argv+1),"-n") == 0) { num = atoi(*(argv+2)); if (num < 1) { printf("usage: 'tail -n COUNT_LINES' \n"); return -1; } else if (num > MAXLINES) { printf("n must be in {1..%d}\n",MAXLINES); return -1; } } else if (argc == 1) { num = DEFAULT_STRING_NUM; } else { printf("usage: 'tail -n COUNT_LINES' \n"); return -1; } if ((nlines=readlines(lineptr, num)) >= 0) { num = (num > nlines) ? nlines : num ; /* if in stdio lines less, than user want (num) */ writelines(lineptr, num, nlines); return 0; } else { printf("error: input too big to tail\n"); return -1; } } /* readlines: */ int readlines(char *lineptr[], int tail_num) { int len; /* length string getted from getline */ int nlines = 0; /* total count strings ( <= tail_num )*/ char *p; /* pointer to current free position (returned by smartalloc) */ char *leftpos = NULL; /* pointer to beginning interest block (use in smartalloc) */ char *rightpos = NULL; /* pointer to end interest block (calculate as p+len) */ char line[MAXLEN]; /* current string */ while ((len = getline(line, MAXLEN)) > 0) { if ((p = smartalloc(len+1, leftpos)) == NULL) { /* if buffer overfull */ return -1; } else { /* check is `memory moved`? if next pointer not "next" */ if (rightpos+1 > p) { /* moving interest pointers value */ for (int i = 0; i < tail_num; i++) lineptr[nlines-i-1] -= rightpos - p + 1; /* (rightpos - p + 1) - moving 'diff' (in pointers) */ } /* copying string */ line[len] = '\0'; /* delete \n in line */ strcpy(p, line); lineptr[nlines++] = p; if (nlines <= tail_num) leftpos = lineptr[0]; /* first element */ else leftpos = lineptr[nlines-tail_num]; /* first interest element */ rightpos = p + len; } } return nlines; } void writelines(char *lineptr[], int tail_num, int nlines) { for (int i = 0; i < tail_num; i++) printf("%s\n",lineptr[nlines-tail_num+i]); } #define ALLOCSIZE 100 static char allocbuf[ALLOCSIZE]; static char *allocp = &allocbuf[0]; static void movemem(char *start); /** * n - lenght of asking memory * *begin - pointer to beginning interest blocks */ char *smartalloc(int n, char *begin) { /* if first calling, begin set to buffer-begin */ begin = (begin == 0) ? allocbuf : begin; // begin == 0 eq begin == NULL if (allocbuf + ALLOCSIZE - allocp >= n) { allocp += n; return allocp - n; } else { /* buffer full */ movemem(begin); if (allocbuf + ALLOCSIZE - allocp >= n) { allocp += n; return allocp - n; } else return 0; /* movemem does not solve problem */ } } /** * move important memory blocks (it start from *start) to buffer-begin */ static void movemem(char *begin) { if (begin > allocbuf && begin < allocp) { char *p = &allocbuf[0]; while (begin < allocp) *p++ = *begin++; allocp = p; } } /** * In difference with getline-KnR: * this function cut '\n' */ int getline(char s[], int lim) { int i, c; for (i = 0; i<lim-1 && (c=getchar())!= EOF && c!='\n'; i++) s[i] = c; s[i] = '\0'; return i; }
Solution by codybartfast (Category 1)
I think this solution is distinct from most of the above in that:
- There is no limit* on:
- the length of the lines,
- the number of lines,
- nor the overall size of the tail.
- Reading lines doesn't require copying data between arrays.
It should also behave correctly if the input is not properly terminated with a newline.
It handles a large tail by enlarging the tail buffer if necessary.
(* of course this is not strictly true, but the tail size and the number of lines are bound by ULONG_MAX. If long is 64 bits then this is effectively unlimited)
#include <ctype.h> #include <limits.h> #include <stddef.h> #include <stdio.h> #include <stdlib.h> #define INITIAL_BUFFSIZE (1 << 10) #define DEFAULT_TAILSIZE 10 enum { OK, ERROR }; /* Single buffer to store characters, loops back to start on reaching limit */ unsigned long buffsize; unsigned long maxbuffsize = ULONG_MAX; char *buff, *cursor, *bufflimit; /* max number of lines to print */ unsigned long tailsize; /* pointers to the start of lines in the buffer, also loops back to start on */ /* reaching limit */ char **lines, **firstln, **lastln, **lineslimit; int readlines(void); void writelines(void); int init_buff(unsigned long); int init_lines(void); int enlargebuff(void); int settailsize(int argc, char **argv); int main(int argc, char **argv) { if (settailsize(argc, argv) != OK) return 0; if (tailsize == 0) return 0; if (init_buff(INITIAL_BUFFSIZE) != OK) return 0; if (init_lines() != OK) return 0; if (readlines() == OK) writelines(); free(lines); free(buff); return 0; } int readlines(void) { char c; int afternl = 0; /* true if previously character read was a newline */ while ((c = getchar()) != EOF) { if (afternl) { if (++lastln == lineslimit) lastln = lines; *lastln = cursor; if (lastln == firstln) if (++firstln == lineslimit) firstln = lines; } *cursor = c; if (++cursor == bufflimit) cursor = buff; if (cursor == *firstln) { if (enlargebuff() != OK) return ERROR; } afternl = (c == '\n'); } return OK; } void writelines(void) { char *bptr; if (*firstln <= cursor) { for (bptr = *firstln; bptr < cursor; bptr++) putchar(*bptr); } else { for (bptr = *firstln; bptr < bufflimit; bptr++) putchar(*bptr); for (bptr = buff; bptr < cursor; bptr++) putchar(*bptr); } } int init_buff(unsigned long size) { buffsize = size; buff = malloc(buffsize * sizeof(char)); if (buff == NULL) { printf("error: Insufficient memory (allocating buff)\n"); return ERROR; } cursor = buff; bufflimit = buff + buffsize; return OK; } int init_lines(void) { lines = malloc(tailsize * sizeof(char *)); if (lines == NULL) { printf("error: Insufficient memory (allocating lines)\n"); return ERROR; } firstln = lastln = lines; *lines = buff; lineslimit = lines + tailsize; return OK; } int enlargebuff(void) { unsigned long used; char *bptr, **lptr; ptrdiff_t offset; /* copy info about current buffer before it's replaced */ char *pbuff = buff, *pcursor = cursor, *pbufflimit = bufflimit; char *pfirst = *firstln; ptrdiff_t pfirst2limit = pbufflimit - pfirst; if (buffsize == maxbuffsize) { printf("error: Cannot increase buffer size beyond %ld\n", buffsize); return ERROR; } /* replace buff */ buffsize = buffsize > maxbuffsize / 4 ? maxbuffsize : buffsize * 4; if (init_buff(buffsize) != OK) { return ERROR; } /* copy data from previous buff to new buff */ if (pfirst < pcursor) { for (bptr = pfirst; bptr < pcursor;) *cursor++ = *bptr++; used = pcursor - pfirst; } else { for (bptr = pfirst; bptr < pbufflimit;) *cursor++ = *bptr++; for (bptr = pbuff; bptr < pcursor;) *cursor++ = *bptr++; used = pfirst2limit + (pcursor - pbuff); } /* update line pointers to point to lines' new positions in new buff */ for (lptr = lines; lptr < lineslimit && *lptr != NULL; lptr++) { offset = *lptr >= pfirst ? *lptr - pfirst : pfirst2limit + (*lptr - pbuff); *lptr = buff + offset; } /* set cursor for new buff */ cursor = buff + used; free(pbuff); return OK; } int settailsize(int argc, char **argv) { char c, *ptr; unsigned long d, n = 0; unsigned long max = ULONG_MAX; if (argc <= 1) { tailsize = DEFAULT_TAILSIZE; } else if (argc == 2 && *argv[1] == '-') { ptr = argv[1] + 1; if (*ptr == '\0') /* match behaviour of linux tail */ n = DEFAULT_TAILSIZE; while ((c = *ptr++) != '\0') { if (!isdigit(c)) { printf("error: Argument not numeric, got: %s\n", argv[1]); return ERROR; } if (n > (max / 10)) { printf("error: Argument too large, max: %lu\n", max); return ERROR; } n *= 10; d = (c - '0'); if (d > max - n) { printf("error: Argument too large, max: %lu\n", max); return ERROR; } n += d; } tailsize = n; } else if (argc == 2) { printf("error: Expected argument to begin with '-'\n"); return ERROR; } else { printf("error: Expected at most one argument, got %d.\n", argc - 1); return ERROR; } return OK; }
Just for the laughs I also wrote a Categor 1 solution using features from later in the book.
Solution by anonymous
This exercise required deep thought when trying to figure out how to implement tail with the provided requirements and not just seek to the end of the file or input like I imagine the real tail program does. I came up with the idea of a circulating storage system that keeps the last n lines of text in it by overwriting the oldest line with the newest line. Once I realized how practical this system was, I used most of the code from section 5.6 as requested and created my tail program.
This program uses two circular queue systems (first in first out) where the oldest data is overwritten whenever there is no more free space. The index used to find the data in the arrays loops around each time it gets to the end and starts back at the beginning. One queue system stores the pointers and the others stores the strings in allocbuf. They are independent of each other so even if they are different sizes or the size of the input lines varies, the program will still function correctly.
The alloc system is rather complex and the reason for this is because of the exercise constraints. I quickly built a program that used the requested code that basically recreated a two-dimensional array using an array of pointers and allocbuf, but that wasn't allowed. I considered writing something from scratch instead of using the ridge code from section 5.6, but that also wasn't allowed. I then thought to myself, well, if the user supplied too much data, I would have just printed a message and quit, but the exercise said it must handle it. On top of that, it wants efficient data storage. As such, I had to design a memory storage/allocation system that isn't simple, which means I am sure there is a bug or two in there somewhere. However, it worked for all of my test cases, so I can only hope that I wrote good code.
Each time memory is requested, the alloc function checks to see if there is enough space to fit it in the buffer without being truncated. If it fits, it then verifies that it will not overwrite any memory used by an existing pointer. If it fails this test, it will move all of the strings over to the right to ensure that no memory is wasted in-between the strings. Finally, if shifting everything to the right freed up enough space, it allocates it and returns the pointer. If not, it returns 0 to cause the program to quit.
I tested all of the other solutions here and this is my feedback for everyone:
- Gregory Pietsch, your program is rather long (799 lines) and sadly it didn't compile for me using gcc.
- Steven Huang, from what I can tell, everything you do is with dynamic memory allocation which doesn't comply with the constrains of the exercise.
- Jesus Alvarez, if I gave your program input longer than 1024 lines and I requested more than 1024 lines for the tail output, the program crashes after some kind of buffer overflow.
- Blob84, your program doesn't allow a user to input more than the maxlines size, even if they wanted to print only the last line (-n 1)
- Sosnovski Eugene, your movemem function inspired me to come up with my solution. Sadly, I discovered that your system can't handle large input with a small tail size once the number of maxlines is about half of the input size. This is true even when I gave alloc a lot of memory to use.
- codybartfast, from what I can tell, everything you do is with dynamic memory allocation which doesn't comply with the constrains of the exercise. Also, since malloc hasn't been introduced yet (first time I found it was on page 143 and the exercise is on page 118), I updated your category from 0 to 1.
Here is my code:
#include <stdio.h> #include <string.h> #include <ctype.h> #include <stdlib.h> /* Exercise 5-13. Write the program tail, which prints the last n lines of its input. By default, n is 10, let us say, but it can be changed by an optional argument, so that tail -n prints the last n lines. The program should behave rationally no matter how unreasonable the input or the value of n. Write the program so it makes the best use of available storage; lines should be stored as in the sorting program of Section 5.6, not in a two-dimensional array of fixed size. */ #define MAXLINES 5000 // max number of lines #define MAXLEN 1000 // max length of any input line. Note: if the line length is greater than this, it will be split into different "lines" in the program. #define ALLOCSIZE MAXLINES * MAXLEN // size of available space char *lineptr[MAXLINES] = {0}; // pointers to text lines. Initialized to null static char allocbuf[ALLOCSIZE]; // storage for alloc static char *allocp = allocbuf; // next free position in allocbuf int lineIndex = 0, tailLen = 0; // used to keep track of what the current line pointer is and the tailLen value int readlines(char *lineptr[]); void printlines(char *lineptr[]); int getline(char *s, int lim); char *alloc(int n); int main(int argc, char *argv[]) { int linesread = 0; tailLen = 10; while (--argc > 0 && **++argv == '-') // checks the first char in each argument starting at argv[1] if (isdigit(*++*argv)) // checks the second char in the argument if it started with a - tailLen = atoi(*argv); // If it is a digit, get the int from it else argc = -1; // triggers usage message if (argc != 0) { printf("Usage: tail [-n]\n"); return 1; } else if (tailLen == 0) // if length of zero requested, just exit return 0; else if (tailLen <= MAXLINES && (linesread = readlines(lineptr)) >= 0) { if (tailLen > linesread) // if the tailLen requested is higher than the number of linesread, update tailLen since printlines uses it to stay in the bounds of lineptr tailLen = linesread; if (lineIndex >= tailLen) // same thing for lineIndex, except set it to 0 since it is a circular queue system lineIndex = 0; printlines(lineptr); return 0; } else if (tailLen > MAXLINES) { printf("error: tail length too big. Max supported tail length is %d\n", MAXLINES); return 1; } else { printf("error: input too big to fit in the buffer\n"); return 1; } } // read input lines and return number of lines read int readlines(char *lineptr[]) { int len, nlines = 0; char *p, line[MAXLEN]; while ((len = getline(line, MAXLEN)) > 0) { if ((p = alloc(len + 1)) == NULL) return -1; // not enough space to copy the line into the buffer strcpy(p, line); // copy line from getline to pointer p which points to the space given by alloc lineptr[lineIndex] = p; // set lineprt to pointer p if (++lineIndex == tailLen) // circle around and start overwriting the oldest pointers and, consequently, the oldest lines lineIndex = 0; if (++nlines > tailLen) // if the number of lines processes so far is greater than tailLen, reset it to tailLen since that is the real amount of lines stored nlines = tailLen; } return nlines; // number of lines read } // write output lines starting at lineIndex and loop around to lineIndex - 1 or to tailLen - 1 in case lineIndex is 0 void printlines(char *lineptr[]) { int nlines = tailLen; while (nlines-- > 0) { printf("%s", lineptr[lineIndex++]); if (lineIndex >= tailLen) // since a circular queue system is used, need to loop around once it reaches the max value (tailLen) lineIndex = 0; } } // get line into s, return length. If line is longer than lim, only up to lim - 1 chars is returned (without \n) and the next call to getline returns the rest. Lim includes \n and \0 chars int getline(char *s, int lim) { int c; char *original = s; while (--lim > 0 && (c = getchar()) != EOF && c != '\n') *s++ = c; if (c == '\n') *s++ = c; *s = '\0'; return s - original; // returns length } // return pointer to n characters. If there is not enough space, try to reclaim unused space and try again. Returns 0 if cannot fit n chars in buffer char *alloc(int n) { if (allocp == allocbuf + ALLOCSIZE) // if the last allocated space went to the end of the buffer, then allocp == the end + 1 (out of bounds of array), so reset it to the beginning allocp = allocbuf; if (allocbuf + ALLOCSIZE - allocp >= n) // it fits! However, more checks need to be done since it might overwrite lines of text still being tracked by pointers { int next = (lineIndex + 1 < tailLen) ? lineIndex + 1 : 0; // set next to the index that is after lineIndex (2nd oldest line) if (lineptr[next] != NULL && allocp < lineptr[next] && allocp + n - lineptr[next] > 0) { // if the above line is true, there is not enough room since the space request will overwrite data in use by another pointer. if (MAXLINES > 1) // If there is more than one line allowed in the buffer, attempt to make more room and try to fit the request one last time { int rightMost = 0, i; for (i = 0; i < tailLen; i++) // this loop searches all of the pointers to find the string with the highest memory address (the right most one in the buffer) if (lineptr[i] != NULL && lineptr[rightMost] < lineptr[i]) rightMost = i; i--; // i equals tailLen in the last step, so decrement it one so it equals the last valid index in lineptr (used below) char temp[MAXLEN]; do { if ((rightMost == i && lineptr[rightMost] + strlen(lineptr[rightMost]) < allocbuf + ALLOCSIZE)) // this runs only on the first iteration { // if true, then the rightMost string can be shifted to the right so the '\0' char is at the last available space in allocbuf strcpy(temp, lineptr[rightMost]); // copies str to temp lineptr[rightMost] = allocbuf + ALLOCSIZE - strlen(lineptr[rightMost]) - 1; // updates the pointer to equal the farthest right position without truncating the string. Note: strlen does not include the '\0', hence the - 1 strcpy(lineptr[rightMost], temp); // copies the str back to allocbuf, but at the new position } else if (lineptr[rightMost] + strlen(lineptr[rightMost]) < lineptr[rightMost + 1]) // this is for the rest of the iterations of the loop { strcpy(temp, lineptr[rightMost]); // copies str to temp lineptr[rightMost] = lineptr[rightMost + 1] - strlen(lineptr[rightMost]) - 1; // updates the pointer to equal the farthest right position without overwriting the string to the right of it strcpy(lineptr[rightMost], temp); // copies the str back to allocbuf, but at the new position } if (--rightMost < 0) // increment rightMost and reset it to the end if it equals zero since the pointers are using a cirular queue system rightMost = tailLen - 1; } while (rightMost != i); // once every string/pointer was shifted to the right as much as possible, it will equal rightMost again so break loop if (lineptr[next] != NULL && allocp < lineptr[next] && allocp + n - lineptr[next] > 0) // same check as above, but now there should be more space return 0; // if still not enough room after shifting everything to the right, return 0 } else return 0; // if there is only 1 line in the buffer and there wasn't space to begin with, return 0 } // if it makes it this far, there is enough space without overwriting any valid string tracked by a pointer. allocp += n; // update marker to note where the next unallocated memory starts return allocp - n; // old pointer address } else if (allocp != allocbuf) // this flag prevents an infinite recursive loop and is useful since I don't want this to run if they are equal { allocp = allocbuf; // reset allocp to the start and see if it will fit return alloc(n); } return 0; // not enough room in the buffer }