The C Programming Language, 2nd Edition, by Kernighan and Ritchie
Exercise 5.11 on page 118
Modify the programs entab
and detab
(written as exercises in Chapter 1) to accept a list of tab stops as arguments. Use the default tab settings if there are no arguments.
Solution by Gregory Pietsch
Here's detab...
/**************************************************************************** detab.c - Source code for the detab command AUTHOR: Gregory Pietsch DESCRIPTION: detab - expand tabs into spaces ****************************************************************************/ /* include files */ #include <stdio.h> #include <string.h> /* macros */ #define NO_ARG 0 #define REQUIRED_ARG 1 #define OPTIONAL_ARG 2 /* types */ /* GETOPT_LONG_OPTION_T: The type of long option */ typedef struct GETOPT_LONG_OPTION_T { char *name; /* the name of the long option */ int has_arg; /* one of the above macros */ int *flag; /* determines if getopt_long() returns a * value for a long option; if it is * non-NULL, 0 is returned as a function * value and the value of val is stored in * the area pointed to by flag. Otherwise, * val is returned. */ int val; /* determines the value to return if flag is * NULL. */ } GETOPT_LONG_OPTION_T; typedef enum GETOPT_ORDERING_T { PERMUTE, RETURN_IN_ORDER, REQUIRE_ORDER } GETOPT_ORDERING_T; /* globally-defined variables */ char *optarg = NULL; int optind = 0; int opterr = 1; int optopt = '?'; /* statically-defined variables */ static char *program_name; /* if nonzero, it means tab every x characters */ static unsigned long tab_every = 8; /* -i: only handle initial tabs/spaces */ static int flag_initial = 0; /* expand tabs into spaces */ static int flag_expand = 1; static unsigned long *tab_stop_list = NULL; static size_t num_tab_stops = 0; static size_t num_tab_stops_allocked = 0; static int show_help = 0; static int show_version = 0; static char *shortopts = "it:"; static GETOPT_LONG_OPTION_T longopts[] = { {"initial", NO_ARG, NULL, 'i'}, {"tabs", REQUIRED_ARG, NULL, 't'}, {"help", NO_ARG, &show_help, 1}, {"version", NO_ARG, &show_version, 1}, {NULL, 0, 0, 0} }; /* functions */ /* reverse_argv_elements: reverses num elements starting at argv */ static void reverse_argv_elements(char **argv, int num) { int i; char *tmp; for (i = 0; i < (num >> 1); i++) { tmp = argv[i]; argv[i] = argv[num - i - 1]; argv[num - i - 1] = tmp; } } /* permute: swap two blocks of argv-elements given their lengths */ static void permute(char **argv, int len1, int len2) { reverse_argv_elements(argv, len1); reverse_argv_elements(argv, len1 + len2); reverse_argv_elements(argv, len2); } /* is_option: is this argv-element an option or the end of the option list? */ static int is_option(char *argv_element, int only) { return ((argv_element == NULL) || (argv_element[0] == '-') || (only && argv_element[0] == '+')); } /* getopt_internal: the function that does all the dirty work */ static int getopt_internal(int argc, char **argv, char *shortopts, GETOPT_LONG_OPTION_T * longopts, int *longind, int only) { GETOPT_ORDERING_T ordering = PERMUTE; static size_t optwhere = 0; size_t permute_from = 0; int num_nonopts = 0; int optindex = 0; size_t match_chars = 0; char *possible_arg = NULL; int longopt_match = -1; int has_arg = -1; char *cp; int arg_next = 0; /* first, deal with silly parameters and easy stuff */ if (argc == 0 || argv == NULL || (shortopts == NULL && longopts == NULL)) return (optopt = '?'); if (optind >= argc || argv[optind] == NULL) return EOF; if (strcmp(argv[optind], "--") == 0) { optind++; return EOF; } /* if this is our first time through */ if (optind == 0) optind = optwhere = 1; /* define ordering */ if (shortopts != NULL && (*shortopts == '-' || *shortopts == '+')) { ordering = (*shortopts == '-') ? RETURN_IN_ORDER : REQUIRE_ORDER; shortopts++; } else ordering = (getenv("POSIXLY_CORRECT") != NULL) ? REQUIRE_ORDER : PERMUTE; /* based on ordering, find our next option, if we're at the beginning of * one */ if (optwhere == 1) { switch (ordering) { case PERMUTE: permute_from = optind; num_nonopts = 0; while (!is_option(argv[optind], only)) { optind++; num_nonopts++; } if (argv[optind] == NULL) { /* no more options */ optind = permute_from; return EOF; } else if (strcmp(argv[optind], "--") == 0) { /* no more options, but have to get `--' out of the way */ permute(argv + permute_from, num_nonopts, 1); optind = permute_from + 1; return EOF; } break; case RETURN_IN_ORDER: if (!is_option(argv[optind], only)) { optarg = argv[optind++]; return (optopt = 1); } break; case REQUIRE_ORDER: if (!is_option(argv[optind], only)) return EOF; break; } } /* we've got an option, so parse it */ /* first, is it a long option? */ if (longopts != NULL && (memcmp(argv[optind], "--", 2) == 0 || (only && argv[optind][0] == '+')) && optwhere == 1) { /* handle long options */ if (memcmp(argv[optind], "--", 2) == 0) optwhere = 2; longopt_match = -1; possible_arg = strchr(argv[optind] + optwhere, '='); if (possible_arg == NULL) { /* no =, so next argv might be arg */ match_chars = strlen(argv[optind]); possible_arg = argv[optind] + match_chars; match_chars = match_chars - optwhere; } else match_chars = (possible_arg - argv[optind]) - optwhere; for (optindex = 0; longopts[optindex].name != NULL; optindex++) { if (memcmp(argv[optind] + optwhere, longopts[optindex].name, match_chars) == 0) { /* do we have an exact match? */ if (match_chars == (int) (strlen(longopts[optindex].name))) { longopt_match = optindex; break; } /* do any characters match? */ else { if (longopt_match < 0) longopt_match = optindex; else { /* we have ambiguous options */ if (opterr) fprintf(stderr, "%s: option `%s' is ambiguous " "(could be `--%s' or `--%s')\n", argv[0], argv[optind], longopts[longopt_match].name, longopts[optindex].name); return (optopt = '?'); } } } } if (longopt_match >= 0) has_arg = longopts[longopt_match].has_arg; } /* if we didn't find a long option, is it a short option? */ if (longopt_match < 0 && shortopts != NULL) { cp = strchr(shortopts, argv[optind][optwhere]); if (cp == NULL) { /* couldn't find option in shortopts */ if (opterr) fprintf(stderr, "%s: invalid option -- `-%c'\n", argv[0], argv[optind][optwhere]); optwhere++; if (argv[optind][optwhere] == '\0') { optind++; optwhere = 1; } return (optopt = '?'); } has_arg = ((cp[1] == ':') ? ((cp[2] == ':') ? OPTIONAL_ARG : REQUIRED_ARG) : NO_ARG); possible_arg = argv[optind] + optwhere + 1; optopt = *cp; } /* get argument and reset optwhere */ arg_next = 0; switch (has_arg) { case OPTIONAL_ARG: if (*possible_arg == '=') possible_arg++; if (*possible_arg != '\0') { optarg = possible_arg; optwhere = 1; } else optarg = NULL; break; case REQUIRED_ARG: if (*possible_arg == '=') possible_arg++; if (*possible_arg != '\0') { optarg = possible_arg; optwhere = 1; } else if (optind + 1 >= argc) { if (opterr) { fprintf(stderr, "%s: argument required for option `", argv[0]); if (longopt_match >= 0) fprintf(stderr, "--%s'\n", longopts[longopt_match].name); else fprintf(stderr, "-%c'\n", *cp); } optind++; return (optopt = ':'); } else { optarg = argv[optind + 1]; arg_next = 1; optwhere = 1; } break; case NO_ARG: if (longopt_match < 0) { optwhere++; if (argv[optind][optwhere] == '\0') optwhere = 1; } else optwhere = 1; optarg = NULL; break; } /* do we have to permute or otherwise modify optind? */ if (ordering == PERMUTE && optwhere == 1 && num_nonopts != 0) { permute(argv + permute_from, num_nonopts, 1 + arg_next); optind = permute_from + 1 + arg_next; } else if (optwhere == 1) optind = optind + 1 + arg_next; /* finally return */ if (longopt_match >= 0) { if (longind != NULL) *longind = longopt_match; if (longopts[longopt_match].flag != NULL) { *(longopts[longopt_match].flag) = longopts[longopt_match].val; return 0; } else return longopts[longopt_match].val; } else return optopt; } int getopt_long(int argc, char **argv, char *shortopts, GETOPT_LONG_OPTION_T * longopts, int *longind) { return getopt_internal(argc, argv, shortopts, longopts, longind, 0); } void help(void) { puts( "OPTIONS" ); puts( "" ); puts( "-i, --initial When shrinking, make initial spaces/tabs on a line tabs" ); puts( " and expand every other tab on the line into spaces." ); puts( "-t=tablist, Specify list of tab stops. Default is every 8 characters." ); puts( "--tabs=tablist, The parameter tablist is a list of tab stops separated by" ); puts( "-tablist commas; if no commas are present, the program will put a" ); puts( " tab stop every x places, with x being the number in the" ); puts( " parameter." ); puts( "" ); puts( "--help Print usage message and exit successfully." ); puts( "" ); puts( "--version Print version information and exit successfully." ); } void version(void) { puts( "detab - expand tabs into spaces" ); puts( "Version 1.0" ); puts( "Written by Gregory Pietsch" ); } /* allocate memory, die on error */ void *xmalloc(size_t n) { void *p = malloc(n); if (p == NULL) { fprintf(stderr, "%s: out of memory\n", program_name); exit(EXIT_FAILURE); } return p; } /* reallocate memory, die on error */ void *xrealloc(void *p, size_t n) { void *s; if (n == 0) { if (p != NULL) free(p); return NULL; } if (p == NULL) return xmalloc(n); s = realloc(p, n); if (s == NULL) { fprintf(stderr, "%s: out of memory\n", program_name); exit(EXIT_FAILURE); } return s; } /* Determine the location of the first character in the string s1 * that is not a character in s2. The terminating null is not * considered part of the string. */ char *xstrcpbrk(char *s1, char *s2) { char *sc1; char *sc2; for (sc1 = s1; *sc1 != '\0'; sc1++) for (sc2 = s2;; sc2++) if (*sc2 == '\0') return sc1; else if (*sc1 == *sc2) break; return NULL; /* terminating nulls match */ } /* compare function for qsort() */ int ul_cmp(const void *a, const void *b) { unsigned long *ula = (unsigned long *) a; unsigned long *ulb = (unsigned long *) b; return (*ula < *ulb) ? -1 : (*ula > *ulb); } /* handle a tab stop list -- assumes param isn't NULL */ void handle_tab_stops(char *s) { char *p; unsigned long ul; size_t len = strlen(s); if (xstrcpbrk(s, "0123456789,") != NULL) { /* funny param */ fprintf(stderr, "%s: invalid parameter\n", program_name); exit(EXIT_FAILURE); } if (strchr(s, ',') == NULL) { tab_every = strtoul(s, NULL, 10); if (tab_every == 0) tab_every = 8; } else { tab_stop_list = xrealloc(tab_stop_list, (num_tab_stops_allocked += len) * (sizeof(unsigned long))); for (p = s; (p = strtok(p, ",")) != NULL; p = NULL) { ul = strtoul(p, NULL, 10); tab_stop_list[num_tab_stops++] = ul; } qsort(tab_stop_list, num_tab_stops, sizeof(unsigned long), ul_cmp); } } void parse_args(int argc, char **argv) { int opt; do { switch ((opt = getopt_long(argc, argv, shortopts, longopts, NULL))) { case 'i': /* initial */ flag_initial = 1; break; case 't': /* tab stops */ handle_tab_stops(optarg); break; case '?': /* invalid option */ fprintf(stderr, "For help, type:\n\t%s --help\n", program_name); exit(EXIT_FAILURE); case 1: case 0: if (show_help || show_version) { if (show_help) help(); if (show_version) version(); exit(EXIT_SUCCESS); } break; default: break; } } while (opt != EOF); } /* output exactly n spaces */ void output_spaces(size_t n) { int x = n; /* assume n is small */ printf("%*s", x, ""); } /* get next highest tab stop */ unsigned long get_next_tab(unsigned long x) { size_t i; if (tab_stop_list == NULL) { /* use tab_every */ x += (tab_every - (x % tab_every)); return x; } else { for (i = 0; i < num_tab_stops && tab_stop_list[i] <= x; i++); return (i >= num_tab_stops) ? 0 : tab_stop_list[i]; } } /* the function that does the dirty work */ void tab(FILE * f) { unsigned long linelength = 0; int c; int in_initials = 1; size_t num_spaces = 0; unsigned long next_tab; while ((c = getc(f)) != EOF) { if (c != ' ' && c != '\t' && num_spaces > 0) { /* output spaces and possible tabs */ if (flag_expand || (flag_initial && !in_initials) || num_spaces == 1) { /* output spaces anyway */ output_spaces(num_spaces); linelength += num_spaces; num_spaces = 0; } else while (num_spaces != 0) { next_tab = get_next_tab(linelength); if (next_tab > 0 && next_tab <= linelength + num_spaces) { /* output a tab */ putchar('\t'); num_spaces -= (next_tab - linelength); linelength = next_tab; } else { /* output spaces */ output_spaces(num_spaces); linelength += num_spaces; num_spaces = 0; } } } switch (c) { case ' ': /* space */ num_spaces++; break; case '\b': /* backspace */ /* preserve backspaces in output; decrement length for tabbing * purposes */ putchar(c); if (linelength > 0) linelength--; break; case '\n': /* newline */ putchar(c); in_initials = 1; linelength = 0; break; case '\t': /* tab */ next_tab = get_next_tab(linelength + num_spaces); if (next_tab == 0) { while ((next_tab = get_next_tab(linelength)) != 0) { /* output tabs */ putchar('\t'); num_spaces -= (next_tab - linelength); linelength = next_tab; } /* output spaces */ output_spaces(num_spaces); num_spaces = 0; putchar('\t'); linelength += num_spaces + 1; } else num_spaces = next_tab - linelength; break; default: putchar(c); in_initials = 0; linelength++; break; } } } int main(int argc, char **argv) { int i; FILE *fp; char *allocked_argvs = xmalloc(argc + 1); char **new_argv = xmalloc((argc + 1) * sizeof(char *)); char *p; program_name = argv[0]; memset(allocked_argvs, 0, argc + 1); for (i = 0; i < argc; i++) { p = argv[i]; if (isdigit(p[1])) { new_argv[i] = xmalloc(strlen(p) + 2); sprintf(new_argv[i], "-t%s", p + 1); allocked_argvs[i] = 1; } else new_argv[i] = p; } new_argv[argc] = NULL; parse_args(argc, new_argv); if (optind == argc) tab(stdin); else { for (i = optind; i < argc; i++) { if (strcmp(argv[i], "-") == 0) fp = stdin; else { fp = fopen(argv[i], "r"); if (fp == NULL) { fprintf(stderr, "%s: can't open %s\n", argv[0], argv[i]); abort(); } } tab(fp); if (fp != stdin) fclose(fp); } } /* free everything we can */ for (i = 0; i < argc; i++) if (allocked_argvs[i]) free(new_argv[i]); free(allocked_argvs); if (tab_stop_list != NULL) free(tab_stop_list); return EXIT_SUCCESS; } /* END OF FILE detab.c */
Here's entab...
/**************************************************************************** entab.c - Source code for the detab command AUTHOR: Gregory Pietsch DESCRIPTION: entab - shrinks spaces into tabs ****************************************************************************/ /* include files */ #include <stdio.h> #include <string.h> /* macros */ #define NO_ARG 0 #define REQUIRED_ARG 1 #define OPTIONAL_ARG 2 /* types */ /* GETOPT_LONG_OPTION_T: The type of long option */ typedef struct GETOPT_LONG_OPTION_T { char *name; /* the name of the long option */ int has_arg; /* one of the above macros */ int *flag; /* determines if getopt_long() returns a * value for a long option; if it is * non-NULL, 0 is returned as a function * value and the value of val is stored in * the area pointed to by flag. Otherwise, * val is returned. */ int val; /* determines the value to return if flag is * NULL. */ } GETOPT_LONG_OPTION_T; typedef enum GETOPT_ORDERING_T { PERMUTE, RETURN_IN_ORDER, REQUIRE_ORDER } GETOPT_ORDERING_T; /* globally-defined variables */ char *optarg = NULL; int optind = 0; int opterr = 1; int optopt = '?'; /* statically-defined variables */ static char *program_name; /* if nonzero, it means tab every x characters */ static unsigned long tab_every = 8; /* -i: only handle initial tabs/spaces */ static int flag_initial = 0; /* don't expand tabs into spaces */ static int flag_expand = 0; static unsigned long *tab_stop_list = NULL; static size_t num_tab_stops = 0; static size_t num_tab_stops_allocked = 0; static int show_help = 0; static int show_version = 0; static char *shortopts = "it:"; static GETOPT_LONG_OPTION_T longopts[] = { {"initial", NO_ARG, NULL, 'i'}, {"tabs", REQUIRED_ARG, NULL, 't'}, {"help", NO_ARG, &show_help, 1}, {"version", NO_ARG, &show_version, 1}, {NULL, 0, 0, 0} }; /* functions */ /* reverse_argv_elements: reverses num elements starting at argv */ static void reverse_argv_elements(char **argv, int num) { int i; char *tmp; for (i = 0; i < (num >> 1); i++) { tmp = argv[i]; argv[i] = argv[num - i - 1]; argv[num - i - 1] = tmp; } } /* permute: swap two blocks of argv-elements given their lengths */ static void permute(char **argv, int len1, int len2) { reverse_argv_elements(argv, len1); reverse_argv_elements(argv, len1 + len2); reverse_argv_elements(argv, len2); } /* is_option: is this argv-element an option or the end of the option list? */ static int is_option(char *argv_element, int only) { return ((argv_element == NULL) || (argv_element[0] == '-') || (only && argv_element[0] == '+')); } /* getopt_internal: the function that does all the dirty work */ static int getopt_internal(int argc, char **argv, char *shortopts, GETOPT_LONG_OPTION_T * longopts, int *longind, int only) { GETOPT_ORDERING_T ordering = PERMUTE; static size_t optwhere = 0; size_t permute_from = 0; int num_nonopts = 0; int optindex = 0; size_t match_chars = 0; char *possible_arg = NULL; int longopt_match = -1; int has_arg = -1; char *cp; int arg_next = 0; /* first, deal with silly parameters and easy stuff */ if (argc == 0 || argv == NULL || (shortopts == NULL && longopts == NULL)) return (optopt = '?'); if (optind >= argc || argv[optind] == NULL) return EOF; if (strcmp(argv[optind], "--") == 0) { optind++; return EOF; } /* if this is our first time through */ if (optind == 0) optind = optwhere = 1; /* define ordering */ if (shortopts != NULL && (*shortopts == '-' || *shortopts == '+')) { ordering = (*shortopts == '-') ? RETURN_IN_ORDER : REQUIRE_ORDER; shortopts++; } else ordering = (getenv("POSIXLY_CORRECT") != NULL) ? REQUIRE_ORDER : PERMUTE; /* based on ordering, find our next option, if we're at the beginning of * one */ if (optwhere == 1) { switch (ordering) { case PERMUTE: permute_from = optind; num_nonopts = 0; while (!is_option(argv[optind], only)) { optind++; num_nonopts++; } if (argv[optind] == NULL) { /* no more options */ optind = permute_from; return EOF; } else if (strcmp(argv[optind], "--") == 0) { /* no more options, but have to get `--' out of the way */ permute(argv + permute_from, num_nonopts, 1); optind = permute_from + 1; return EOF; } break; case RETURN_IN_ORDER: if (!is_option(argv[optind], only)) { optarg = argv[optind++]; return (optopt = 1); } break; case REQUIRE_ORDER: if (!is_option(argv[optind], only)) return EOF; break; } } /* we've got an option, so parse it */ /* first, is it a long option? */ if (longopts != NULL && (memcmp(argv[optind], "--", 2) == 0 || (only && argv[optind][0] == '+')) && optwhere == 1) { /* handle long options */ if (memcmp(argv[optind], "--", 2) == 0) optwhere = 2; longopt_match = -1; possible_arg = strchr(argv[optind] + optwhere, '='); if (possible_arg == NULL) { /* no =, so next argv might be arg */ match_chars = strlen(argv[optind]); possible_arg = argv[optind] + match_chars; match_chars = match_chars - optwhere; } else match_chars = (possible_arg - argv[optind]) - optwhere; for (optindex = 0; longopts[optindex].name != NULL; optindex++) { if (memcmp(argv[optind] + optwhere, longopts[optindex].name, match_chars) == 0) { /* do we have an exact match? */ if (match_chars == (int) (strlen(longopts[optindex].name))) { longopt_match = optindex; break; } /* do any characters match? */ else { if (longopt_match < 0) longopt_match = optindex; else { /* we have ambiguous options */ if (opterr) fprintf(stderr, "%s: option `%s' is ambiguous " "(could be `--%s' or `--%s')\n", argv[0], argv[optind], longopts[longopt_match].name, longopts[optindex].name); return (optopt = '?'); } } } } if (longopt_match >= 0) has_arg = longopts[longopt_match].has_arg; } /* if we didn't find a long option, is it a short option? */ if (longopt_match < 0 && shortopts != NULL) { cp = strchr(shortopts, argv[optind][optwhere]); if (cp == NULL) { /* couldn't find option in shortopts */ if (opterr) fprintf(stderr, "%s: invalid option -- `-%c'\n", argv[0], argv[optind][optwhere]); optwhere++; if (argv[optind][optwhere] == '\0') { optind++; optwhere = 1; } return (optopt = '?'); } has_arg = ((cp[1] == ':') ? ((cp[2] == ':') ? OPTIONAL_ARG : REQUIRED_ARG) : NO_ARG); possible_arg = argv[optind] + optwhere + 1; optopt = *cp; } /* get argument and reset optwhere */ arg_next = 0; switch (has_arg) { case OPTIONAL_ARG: if (*possible_arg == '=') possible_arg++; if (*possible_arg != '\0') { optarg = possible_arg; optwhere = 1; } else optarg = NULL; break; case REQUIRED_ARG: if (*possible_arg == '=') possible_arg++; if (*possible_arg != '\0') { optarg = possible_arg; optwhere = 1; } else if (optind + 1 >= argc) { if (opterr) { fprintf(stderr, "%s: argument required for option `", argv[0]); if (longopt_match >= 0) fprintf(stderr, "--%s'\n", longopts[longopt_match].name); else fprintf(stderr, "-%c'\n", *cp); } optind++; return (optopt = ':'); } else { optarg = argv[optind + 1]; arg_next = 1; optwhere = 1; } break; case NO_ARG: if (longopt_match < 0) { optwhere++; if (argv[optind][optwhere] == '\0') optwhere = 1; } else optwhere = 1; optarg = NULL; break; } /* do we have to permute or otherwise modify optind? */ if (ordering == PERMUTE && optwhere == 1 && num_nonopts != 0) { permute(argv + permute_from, num_nonopts, 1 + arg_next); optind = permute_from + 1 + arg_next; } else if (optwhere == 1) optind = optind + 1 + arg_next; /* finally return */ if (longopt_match >= 0) { if (longind != NULL) *longind = longopt_match; if (longopts[longopt_match].flag != NULL) { *(longopts[longopt_match].flag) = longopts[longopt_match].val; return 0; } else return longopts[longopt_match].val; } else return optopt; } int getopt_long(int argc, char **argv, char *shortopts, GETOPT_LONG_OPTION_T * longopts, int *longind) { return getopt_internal(argc, argv, shortopts, longopts, longind, 0); } void help(void) { puts( "OPTIONS" ); puts( "" ); puts( "-i, --initial When shrinking, make initial spaces/tabs on a line tabs" ); puts( " and expand every other tab on the line into spaces." ); puts( "-t=tablist, Specify list of tab stops. Default is every 8 characters." ); puts( "--tabs=tablist, The parameter tablist is a list of tab stops separated by" ); puts( "-tablist commas; if no commas are present, the program will put a" ); puts( " tab stop every x places, with x being the number in the" ); puts( " parameter." ); puts( "" ); puts( "--help Print usage message and exit successfully." ); puts( "" ); puts( "--version Print version information and exit successfully." ); } void version(void) { puts( "entab - shrink spaces into tabs" ); puts( "Version 1.0" ); puts( "Written by Gregory Pietsch" ); } /* allocate memory, die on error */ void *xmalloc(size_t n) { void *p = malloc(n); if (p == NULL) { fprintf(stderr, "%s: out of memory\n", program_name); exit(EXIT_FAILURE); } return p; } /* reallocate memory, die on error */ void *xrealloc(void *p, size_t n) { void *s; if (n == 0) { if (p != NULL) free(p); return NULL; } if (p == NULL) return xmalloc(n); s = realloc(p, n); if (s == NULL) { fprintf(stderr, "%s: out of memory\n", program_name); exit(EXIT_FAILURE); } return s; } /* Determine the location of the first character in the string s1 * that is not a character in s2. The terminating null is not * considered part of the string. */ char *xstrcpbrk(char *s1, char *s2) { char *sc1; char *sc2; for (sc1 = s1; *sc1 != '\0'; sc1++) for (sc2 = s2;; sc2++) if (*sc2 == '\0') return sc1; else if (*sc1 == *sc2) break; return NULL; /* terminating nulls match */ } /* compare function for qsort() */ int ul_cmp(const void *a, const void *b) { unsigned long *ula = (unsigned long *) a; unsigned long *ulb = (unsigned long *) b; return (*ula < *ulb) ? -1 : (*ula > *ulb); } /* handle a tab stop list -- assumes param isn't NULL */ void handle_tab_stops(char *s) { char *p; unsigned long ul; size_t len = strlen(s); if (xstrcpbrk(s, "0123456789,") != NULL) { /* funny param */ fprintf(stderr, "%s: invalid parameter\n", program_name); exit(EXIT_FAILURE); } if (strchr(s, ',') == NULL) { tab_every = strtoul(s, NULL, 10); if (tab_every == 0) tab_every = 8; } else { tab_stop_list = xrealloc(tab_stop_list, (num_tab_stops_allocked += len) * (sizeof(unsigned long))); for (p = s; (p = strtok(p, ",")) != NULL; p = NULL) { ul = strtoul(p, NULL, 10); tab_stop_list[num_tab_stops++] = ul; } qsort(tab_stop_list, num_tab_stops, sizeof(unsigned long), ul_cmp); } } void parse_args(int argc, char **argv) { int opt; do { switch ((opt = getopt_long(argc, argv, shortopts, longopts, NULL))) { case 'i': /* initial */ flag_initial = 1; break; case 't': /* tab stops */ handle_tab_stops(optarg); break; case '?': /* invalid option */ fprintf(stderr, "For help, type:\n\t%s --help\n", program_name); exit(EXIT_FAILURE); case 1: case 0: if (show_help || show_version) { if (show_help) help(); if (show_version) version(); exit(EXIT_SUCCESS); } break; default: break; } } while (opt != EOF); } /* output exactly n spaces */ void output_spaces(size_t n) { int x = n; /* assume n is small */ printf("%*s", x, ""); } /* get next highest tab stop */ unsigned long get_next_tab(unsigned long x) { size_t i; if (tab_stop_list == NULL) { /* use tab_every */ x += (tab_every - (x % tab_every)); return x; } else { for (i = 0; i < num_tab_stops && tab_stop_list[i] <= x; i++); return (i >= num_tab_stops) ? 0 : tab_stop_list[i]; } } /* the function that does the dirty work */ void tab(FILE * f) { unsigned long linelength = 0; int c; int in_initials = 1; size_t num_spaces = 0; unsigned long next_tab; while ((c = getc(f)) != EOF) { if (c != ' ' && c != '\t' && num_spaces > 0) { /* output spaces and possible tabs */ if (flag_expand || (flag_initial && !in_initials) || num_spaces == 1) { /* output spaces anyway */ output_spaces(num_spaces); linelength += num_spaces; num_spaces = 0; } else while (num_spaces != 0) { next_tab = get_next_tab(linelength); if (next_tab > 0 && next_tab <= linelength + num_spaces) { /* output a tab */ putchar('\t'); num_spaces -= (next_tab - linelength); linelength = next_tab; } else { /* output spaces */ output_spaces(num_spaces); linelength += num_spaces; num_spaces = 0; } } } switch (c) { case ' ': /* space */ num_spaces++; break; case '\b': /* backspace */ /* preserve backspaces in output; decrement length for tabbing * purposes */ putchar(c); if (linelength > 0) linelength--; break; case '\n': /* newline */ putchar(c); in_initials = 1; linelength = 0; break; case '\t': /* tab */ next_tab = get_next_tab(linelength + num_spaces); if (next_tab == 0) { while ((next_tab = get_next_tab(linelength)) != 0) { /* output tabs */ putchar('\t'); num_spaces -= (next_tab - linelength); linelength = next_tab; } /* output spaces */ output_spaces(num_spaces); num_spaces = 0; putchar('\t'); linelength += num_spaces + 1; } else num_spaces = next_tab - linelength; break; default: putchar(c); in_initials = 0; linelength++; break; } } } int main(int argc, char **argv) { int i; FILE *fp; char *allocked_argvs = xmalloc(argc + 1); char **new_argv = xmalloc((argc + 1) * sizeof(char *)); char *p; program_name = argv[0]; memset(allocked_argvs, 0, argc + 1); for (i = 0; i < argc; i++) { p = argv[i]; if (isdigit(p[1])) { new_argv[i] = xmalloc(strlen(p) + 2); sprintf(new_argv[i], "-t%s", p + 1); allocked_argvs[i] = 1; } else new_argv[i] = p; } new_argv[argc] = NULL; parse_args(argc, new_argv); if (optind == argc) tab(stdin); else { for (i = optind; i < argc; i++) { if (strcmp(argv[i], "-") == 0) fp = stdin; else { fp = fopen(argv[i], "r"); if (fp == NULL) { fprintf(stderr, "%s: can't open %s\n", argv[0], argv[i]); abort(); } } tab(fp); if (fp != stdin) fclose(fp); } } /* free everything we can */ for (i = 0; i < argc; i++) if (allocked_argvs[i]) free(new_argv[i]); free(allocked_argvs); if (tab_stop_list != NULL) free(tab_stop_list); return EXIT_SUCCESS; } /* END OF FILE entab.c */
Solution by Cromagnon (talk) (cat 0)
This is a category 0 Solution. December 20, 2019.
Unix-like systems have two commands called expand and unexpand. These commands do exactly what detab and entab do respectivelly (but with a different syntax for the arguments). So, I wrote my solutions to be operationally equivalent to these UNIX commands.
If you run ./detab 3 9 10 20 < file > out1
,
then run expand -t 3,9,10,20 < file > out2
,
and compare the output of both commands with diff out1 out2
;
you will see that both outputs are equal.
Therefore, both my detab and UNIX expand are equivalent.
I used GNU's and openBSD's expand to test it.
My implementation of entab is not fully equivalent with GNU unexpand, for some certain weird input combinations of spaces between tabs (but I think this is because GNU unexpand uses some "smart guesses" in those situations).
They both have a limit of 100 arguments, and all the arguments must be positive integers listed in ascending order.
They return 2 if they get more than 100 arguments;
return 1 if the arguments are not in the correct format;
and returns 0 if it runs successfully.
The argument limit can be eliminated by using malloc (and turning it into an category 1 solution)
or by defining stop[]
as unsigned char stop[argc+1]
(and turn it into an category 2 solution, since C99 accepts defining arrays with variable size).
detab:
#include <stdio.h> #define DEFSTOP 8 #define MAXSTOPS 100 int getstop(char *s); /* detab: replace tabs with the proper number of blanks to space to the next tabstop */ int main(int argc, char *argv[]) { int c, n; unsigned char col; /* column after previous tabstop or beginning of line */ unsigned char stop[MAXSTOPS]; /* list of tabstops */ if (argc-- > MAXSTOPS) return 2; /* too many arguments */ for (c = n = 0; n < argc; n++) { int temp; temp = getstop(*++argv); if (temp <= c) return 1; /* tabstop is not a positive integer or not in ascending order */ stop[n] = temp - c; c = temp; } if (n == 0) stop[n++] = DEFSTOP; stop[n] = 0; /* the number of tabstops is n-1, where stop[n]==0 */ col = n = 0; while ((c = getchar()) != EOF) { switch (c) { case '\t': do { putchar(' '); } while (stop[n] > ++col); if (stop[n] && stop[1]) n++; col = 0; break; case '\n': putchar(c); col = n = 0; break; default: putchar(c); if (col == stop[n] - 1) { col = 0; if (stop[n] && stop[1]) n++; } else ++col; break; } } return 0; } /* getstop: return tabstop if valid, -1 if invalid */ int getstop(char *cp) { int n; for (n = 0; *cp != '\0'; cp++) { if (*cp < '0' || *cp > '9') return -1; n = 10 * n + *cp - '0'; } return n; }
entab:
#include <stdio.h> #define DEFSTOP 8 #define MAXSTOPS 100 int getstop(char *s); /* entab: replace spaces by the least number of tabs/spaces to get the same spacing */ int main(int argc, char *argv[]) { int c, n; unsigned char col, spc; unsigned char stop[MAXSTOPS]; if (argc-- > MAXSTOPS) return 2; /* too many arguments */ for (c = n = 0; n < argc; n++) { int temp; temp = getstop(*++argv); if (temp <= c) return 1; /* tabstop is not a positive integer or not in ascending order */ stop[n] = temp - c; c = temp; } if (n == 0) stop[n++] = DEFSTOP; stop[n] = 0; /* the number of tabstops is n-1, where stop[n]==0 */ spc = col = n = 0; while ((c = getchar()) != EOF) { /* print blanks */ if (col == 0) { if (spc > 1 || (spc == 1 && (c == ' ' || c == '\t'))) putchar('\t'); else if (spc == 1) putchar(' '); } else if (c != '\t' && c != ' ') while (spc--) putchar(' '); /* print character, if non-space; and count spaces */ if (c != ' ') { putchar(c); spc = 0; } else (col == 0) ? (spc = 1) : spc++; /* count columns and stops */ switch (c) { case '\t': if (stop[n] && stop[1]) ++n; col = 0; break; case '\n': n = col = 0; break; default: if (col == stop[n] - 1) { col = 0; if (stop[n] && stop[1]) ++n; } else ++col; break; } } return 0; } /* getstop: return tabstop if valid, -1 if invalid */ int getstop(char *cp) { int n; for (n = 0; *cp != '\0'; cp++) { if (*cp < '0' || *cp > '9') return -1; n = 10 * n + *cp - '0'; } return n; }
Solution by Codybartfast (cat 0)
"Tabstops" provides shared functions for parsing arguments and determining tab locations that is used by both detab and entab:
/* detab.c */ #include <stdio.h> #include "tabstops.h" int main(int argc, char *argv[]) { int col, c, dist; parsestops(argc, argv); col = 0; while ((c = getchar()) != EOF) { if (c == '\t') { for (dist = dist2stop(col); dist > 0; --dist) { putchar(' '); ++col; } } else { putchar(c); if (c == '\n') col = 0; else if (c == '\b' && col > 0) --col; else ++col; } } return 0; }
/* tabstops.h */ void parsestops(int argc, char *argv[]); int dist2stop(int col); int istabstop(int col);
/* tabstops.c */ #include <stdio.h> #include "tabstops.h" #define MAXSTOPS 1024 int stops[MAXSTOPS], stopcount; int dfltsize = 4; int atoi(char *s); int dist2stop(int col) { int i; for (i = 0; i < stopcount; i++) { if (col < stops[i]) return stops[i] - col; } return (dfltsize - (col % dfltsize)); } int istabstop(int col) { return col > 0 && (dist2stop(col - 1) == 1); } void parsestops(int argc, char *argv[]) { int i; stopcount = argc - 1; if (stopcount > MAXSTOPS) { printf("error: More than %d tap stops!\n", MAXSTOPS); return; } for (i = 1; i < argc; i++) stops[i - 1] = atoi(argv[i]) - 1; } int atoi(char *s) { int i, n = 0; for (i = 0; s[i] >= '0' && s[i] <= '9'; ++i) n = 10 * n + (s[i] - '0'); return n; }
/* entab.c */ #include <stdio.h> #include "tabstops.h" int main(int argc, char *argv[]) { int col, c, nspace; parsestops(argc, argv); col = nspace = 0; while ((c = getchar()) != EOF) { if (c == '\b' && col > 0) { --col; if (nspace > 0) --nspace; else putchar(c); } else if (nspace > 0 && istabstop(col)) { if (nspace == 1) putchar(' '); else putchar('\t'); nspace = 0; } if (c == '\t') { putchar(c); nspace = 0; col = col + dist2stop(col); } else if (c == ' ') { ++col; ++nspace; } else if (c != '\b') { for (; 0 < nspace; --nspace) putchar(' '); putchar(c); if (c == '\n') col = 0; else ++col; } } return 0; }
Solution by anonymous
The instructions for this left some things up for interruption. For example Crogmagnon choose to insert tabs at the custom tab stops and let the shell running the program expand the tabs to their default length. Gregory Pietsch also allowed custom tab stops but the program acted funny if you wanted to start at a higher tab stop like 15, so it does have some limitations. Codybartfast wanted to take the opportunity to create a shared file like most large programs do instead of duplicating code and creating two separate monolithic programs. However, his entab program did not stop at the custom tab stops I specified, so I am not sure what he intended the program to do.
That being said, my interruption of the exercise was to allow the user to create custom tab stops of any length. Whenever a tab was read in, it would add whitespace until that custom tab spot was reached. If no custom tab stops were provided, then the default size tab stops would be used. If the output went beyond the last custom tab stop, default tab stops would be filled in after the last custom one (the default tab stop can be customized to any desired size). For entab, whitespace was filled in with the maximum number of tabs that wouldn't go past the custom tab stop, and then spaces. For detab, only spaces were used. Note: unlike detab, entab relies on the default tab size to be set to the tab stop size of the shell/text reader the output will be in order to place the maximum number of tabs instead of spaces.
I originally found it difficult to understand if my program was working correctly so I created a text file to test entab and detab on. This greatly aided in testing my programs as well as the solutions here. The file has a line full of tabs and pipe characters. Below it, the line is full of spaces and pipe characters. Depending on the custom tab stop size for the output of the text, you would be able to see if the pipes align as specified. This allows one to test their entab and detab programs by providing a list of custom tab stops that align with a multiples of 8, or multiples of 7, etc. to verify if the program outputs the text correctly. Here is my pipes text file:
/*8: | | | | | | | | | | | | | | | | | | | | | | 7: | | | | | | | | | | | | | | | | | | | | | | 6: | | | | | | | | | | | | | | | | | | | | | | 5: | | | | | | | | | | | | | | | | | | | | | | 4: | | | | | | | | | | | | | | | | | | | | | | 3: | | | | | | | | | | | | | | | | | | | | | | 2: | | | | | | | | | | | | | | | | | | | | | | 1: | | | | | | | | | | | | | | | | | | | | | | */
This is an example test of using the text file with entab to see if a tab width of 3 outputs correctly
cat pipe.txt | ./entab 3 6 9 12 15 18 21 24 27 30 33
Here is my entab program
#include <stdio.h> #include <ctype.h> #include <stdlib.h> /* Exercise 5-11. Modify the programs entab and detab (written as exercises in Chapter 1) to accept a list of tab stops as arguments. Use the default tab settings if there are no arguments. */ #define DEFAULT_TAB_STOP 8 // length of default tab stop. For entab, this needs to be the tab stop size of the shell/tool that displays the contents since tabs are in the output. #define MAX_TABS 1000 // max supported custom tabs void shellsort(int v[], int n); int main(int argc, char *argv[]) { int c, i = 0, j, k, col = 0, spaces = 0; // i is index in tabs, j is the next custom/default tab stop, k is the distance to the natural tab stops to see if a tab will fit int tabs[MAX_TABS]; while (--argc > 0) // get all arguments and store them in tabs { if (i >= MAX_TABS) return 1; // too many arguments else if (isdigit(**++argv)) // only allow arguments that start with a digit tabs[i++] = atoi(*argv); // gets all valid digits in string and turns it into an int else return 2; // argument started with non-digit } int n = 0; // this is used as a multiplier to fill tabs up with the default tab stop if no custom tab stops were provided or not enough of them were provided to fill up tabs if (i > 0) // if i > 0, then custom tab stops were provided as an argument { shellsort(tabs, i); // puts ints in numerical order n = tabs[i - 1]; // gets the largest value in tabs n += DEFAULT_TAB_STOP - (n % DEFAULT_TAB_STOP); // moves n to the next default tab stop n /= DEFAULT_TAB_STOP; // gets the base of the number to use in the next while loop } while (i < MAX_TABS) // adds the rest of the default tabs to tabs array tabs[i++] = n++ * DEFAULT_TAB_STOP; // uses base n to get value of next tab stop after the last largest custom one. Appends to it the end of tabs i = 0; // resets index while ((c = getchar()) != EOF) { if (c != ' ') // if the char is not a space, all saved spaces need to be processed before it is printed { while (spaces > 0) // this allows the below logic to only think about things one iteration at a time { k = DEFAULT_TAB_STOP - (col % DEFAULT_TAB_STOP); // find the next default tab stop while (tabs[i] <= col && i < MAX_TABS) // get the next custom/default tab stop i++; if (i < MAX_TABS) // but only if not out of bounds of array j = tabs[i] - col; if (k <= j && spaces - k >= 0) // if the natural tab is less than the custom tab and there are enough spaces, substitute a tab for the spaces { putchar('\t'); col += k; // updates col position spaces -= k; // updates spaces used } else // if natural tab is greater than custom one, fill in the spaces until the custom tab stop is met. Keep track of col position and spaces left { while (spaces > 0 && j-- > 0) { putchar(' '); col++; spaces--; } } } } switch (c) { case ' ': // don't print the spaces, but keep track of the number of them (they are processed above) spaces++; break; case '\n': // reset the col and tabs index and print it col = 0; i = 0; putchar(c); break; case '\t': // find the next custom tab, subtract the number of spaces from it to current col and add that to spaces. These spaces will be processed the next iteration while (tabs[i] <= col && i < MAX_TABS) i++; if (i < MAX_TABS) j = tabs[i] - col; spaces += j; break; default: // all other chars are printed and col position is incremented by one putchar(c); col++; break; } } return 0; } // sort v[0]...v[n-1] into increasing order void shellsort(int v[], int n) { int gap, i, j, temp; for (gap = n / 2; gap > 0; gap /= 2) for (i = gap; i < n; i++) for (j = i - gap; j >= 0 && v[j] > v[j + gap]; j -= gap) { temp = v[j]; v[j] = v[j + gap]; v[j + gap] = temp; } }
Here is my detab program
#include <stdio.h> #include <ctype.h> #include <stdlib.h> /* Exercise 5-11. Modify the programs entab and detab (written as exercises in Chapter 1) to accept a list of tab stops as arguments. Use the default tab settings if there are no arguments. */ #define DEFAULT_TAB_STOP 8 // length of default tab stop. For detab, this can be an custom length since no tabs will be inserted to the output #define MAX_TABS 1000 // max supported custom tabs void shellsort(int v[], int n); int main(int argc, char *argv[]) { int c, i = 0, j, col = 0, spaces = 0; // i is index in tabs, j is the next custom/default tab stop int tabs[MAX_TABS]; while (--argc > 0) // get all arguments and store them in tabs { if (i >= MAX_TABS) return 1; // too many arguments else if (isdigit(**++argv)) // only allow arguments that start with a digit tabs[i++] = atoi(*argv); // gets all valid digits in string and turns it into an int else return 2; // argument started with non-digit } int n = 0; // this is used as a multiplier to fill tabs up with the default tab stop if no custom tab stops were provided or not enough of them were provided to fill up tabs if (i > 0) // if i > 0, then custom tab stops were provided as an argument { shellsort(tabs, i); // puts ints in numerical order n = tabs[i - 1]; // gets the largest value in tabs n += DEFAULT_TAB_STOP - (n % DEFAULT_TAB_STOP); // moves n to the next default tab stop n /= DEFAULT_TAB_STOP; // gets the base of the number to use in the next while loop } while (i < MAX_TABS) // adds the rest of the default tabs to tabs array tabs[i++] = n++ * DEFAULT_TAB_STOP; // uses base n to get value of next tab stop after the last largest custom one. Appends to it the end of tabs i = 0; // resets index while ((c = getchar()) != EOF) { if (c != ' ') // if the char is not a space, all saved spaces need to be processed before it is printed { while (spaces > 0) // this allows the below logic to only think about things one iteration at a time { while (tabs[i] <= col && i < MAX_TABS) // get the next custom/default tab stop i++; if (i < MAX_TABS) // but only if not out of bounds of array j = tabs[i] - col; while (spaces > 0 && j-- > 0) // fill in the spaces until the custom tab stop is met. Keep track of col position and spaces left { putchar(' '); col++; spaces--; } } } switch (c) { case ' ': // don't print the spaces, but keep track of the number of them (they are processed above) spaces++; break; case '\n': // reset the col and tabs index and print it col = 0; i = 0; putchar(c); break; case '\t': // find the next custom tab, subtract the number of spaces from it to current col and add that to spaces. These spaces will be processed the next iteration while (tabs[i] <= col && i < MAX_TABS) i++; if (i < MAX_TABS) j = tabs[i] - col; spaces += j; break; default: // all other chars are printed and col position is incremented by one putchar(c); col++; break; } } return 0; } // sort v[0]...v[n-1] into increasing order void shellsort(int v[], int n) { int gap, i, j, temp; for (gap = n / 2; gap > 0; gap /= 2) for (i = gap; i < n; i++) for (j = i - gap; j >= 0 && v[j] > v[j + gap]; j -= gap) { temp = v[j]; v[j] = v[j + gap]; v[j + gap] = temp; } }