The C Programming Language, 2nd Edition, by Kernighan and Ritchie
Exercise 7.02 on page 155
Write a program that will print arbitrary input in a sensible way. As a minimum, it should print non-graphic characters in octal or hexadecimal according to local custom, and break long text lines.
Solution by Richard Heathfield
/* Use -o for octal output, -x for hexadecimal */ #include <stdio.h> #define OCTAL 8 #define HEXADECIMAL 16 void ProcessArgs(int argc, char *argv[], int *output) { int i = 0; while(argc > 1) { --argc; if(argv[argc][0] == '-') { i = 1; while(argv[argc][i] != '\0') { if(argv[argc][i] == 'o') { *output = OCTAL; } else if(argv[argc][i] == 'x') { *output = HEXADECIMAL; } else { /* Quietly ignore unknown switches, because we don't want to * interfere with the program's output. Later on in the * chapter, the delights of fprintf(stderr, "yadayadayada\n") * are revealed, just too late for this exercise. */ } ++i; } } } } int can_print(int ch) { char *printable = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890 !\"#%&'()*+,-./:;<=>?[\\]^_{|}~\t\f\v\r\n"; char *s; int found = 0; for(s = printable; !found && *s; s++) { if(*s == ch) { found = 1; } } return found; } int main(int argc, char *argv[]) { int split = 80; int output = HEXADECIMAL; int ch; int textrun = 0; int binaryrun = 0; char *format; int width = 0; ProcessArgs(argc, argv, &output); if(output == HEXADECIMAL) { format = "%02X "; width = 4; } else { format = "%3o "; width = 4; } while((ch = getchar()) != EOF) { if(can_print(ch)) { if(binaryrun > 0) { putchar('\n'); binaryrun = 0; textrun = 0; } putchar(ch); ++textrun; if(ch == '\n') { textrun = 0; } if(textrun == split) { putchar('\n'); textrun = 0; } } else { if(textrun > 0 || binaryrun + width >= split) { printf("\nBinary stream: "); textrun = 0; binaryrun = 15; } printf(format, ch); binaryrun += width; } } putchar('\n'); return 0; }
Solution by codybartfast (cat 0)
Features: * a buffer to remove text that is followed by backsapces, * replacement of non-grahic characters, * line split includeing backtracking and hyphen insertion, * left, centre or right aligned or justified text, * expansion of tabs to spaces. Each of these is implemented separately, taking a function like 'getchar' and returning a new function that is also like 'getchar'. This enables the features to be composed in any order (though not necessarily with meaningful results), or used separately. In any case the final function can be used in the same way as getchar. I haven't posted most of the code because it is quite long. But I have inten- tionally made this paragraph quite long because it was originally a single line - but here is formatted using the code.
#include <stdio.h> #include "bsbuff.h" #include "nongraphic.h" #include "linesplit.h" #include "align.h" #include "detab.h" int tabsize = 4; int linelen = 70; int maxrollover = 7; int main(void) { int c, (*getch)(void); configure_tabs(tabsize); getch = backspace_buffer(&getchar); getch = replace_nongraphic(getch); getch = split_lines(getch, linelen, maxrollover); getch = align(getch, linelen, maxrollover, ALIGN_JUSTIFIED); getch = detab(getch, linelen); while ((c = (*getch)()) != EOF) putchar(c); return 0; }
/* nongraphic.h */ int (*replace_nongraphic(int (*)(void)))(void);
/* nongraphic.c */ #include <limits.h> #include <stdio.h> #include "nongraphic.h" #define FORMAT "<0x%02x>" #define LENGTH 6 static char replacement[LENGTH + 1]; static char *pending, *end; static int (*_get_char)(void); static int replace(void); int (*replace_nongraphic(int (*get_char)(void)))(void) { if (UCHAR_MAX > 0xff) { printf("error: UCHAR_MAX larger than 0Xff: %ud", UCHAR_MAX); return NULL; } end = replacement + LENGTH; pending = end; _get_char = get_char; return &replace; } static int replace(void) { if (pending < end) { return *pending++; } else { int c = (*_get_char)(); if ((0x20 <= c && c <= 0x7e) || (0x8 <= c && c <= 0xC) || c == EOF) return c; sprintf(replacement, FORMAT, c); pending = replacement; return replace(); } }
Full code on github
Solution by anonymous
This handles regular ASCII based input well, but it seems that getchar cannot handle Unicode encoding at all (it replaces the characters with question marks).
Conveniently there is an isgraph function that checks to see if a char is a digit, punctuation, or letter of either case. It only is missing spaces, tabs, newlines, and so on. So combining isgraph with isspace checks to see if a character is a printable character and greatly simplifies the check of graphic vs non-graphic characters.
Here is my code
#include <stdio.h> #include <ctype.h> #define FORMAT "%X" // "%X" for hex, "%o" for octal #define LINELEN 75 /* Exercise 7-2. Write a program that will print arbitrary input in a sensible way. As a minimum, it should print non-graphic characters in octal or hexadecimal according to local custom, and break long text lines. */ int main() { int c, i = 0; while ((c = getchar()) != EOF) { if (c == '\n') // reset line length counter i = 0; else if (++i == LINELEN) // check if equal to line length. If so, print new line and reset counter { i = 0; putchar('\n'); } if (isgraph(c) || isspace(c)) // graphic characters and spaces putchar(c); else printf(FORMAT, c); // non-graphic characters } return 0; }