The C Programming Language, 2nd Edition, by Kernighan and Ritchie
Exercise 6.04 on page 143
Write a program that prints the distinct words in its input sorted into decreasing order of frequency of occurrence. Precede each word by its count.
Solution by Bryan Williams
Bryan's solution is, as far as I can tell, Category 1 only because
he uses EXIT_SUCCESS
and EXIT_FAILURE
.
/* Chapter 6. Structures Write a program that prints out the distinct words in its input sorted into decreasing order of frequency of occurrence. Precede each word by its count. Author: Bryan Williams */ #include <stdlib.h> #include <stdio.h> #include <string.h> #include <assert.h> typedef struct WORD { char *Word; size_t Count; struct WORD *Left; struct WORD *Right; } WORD; /* Assumptions: input is on stdin, output to stdout. Plan: read the words into a tree, keeping a count of how many we have, allocate an array big enough to hold Treecount (WORD *)'s walk the tree to populate the array. qsort the array, based on size. printf the array free the array free the tree free tibet (optional) free international shipping! */ #define SUCCESS 0 #define CANNOT_MALLOC_WORDARRAY 1 #define NO_WORDS_ON_INPUT 2 #define NO_MEMORY_FOR_WORDNODE 3 #define NO_MEMORY_FOR_WORD 4 #define NONALPHA "1234567890 \v\f\n\t\r+=-*/\\,.;:'#~?<>|{}[]`!\"�$%^&()" int ReadInputToTree(WORD **DestTree, size_t *Treecount, FILE *Input); int AddToTree(WORD **DestTree, size_t *Treecount, char *Word); int WalkTree(WORD **DestArray, WORD *Word); int CompareCounts(const void *vWord1, const void *vWord2); int OutputWords(FILE *Dest, size_t Count, WORD **WordArray); void FreeTree(WORD *W); char *dupstr(char *s); int main(void) { int Status = SUCCESS; WORD *Words = NULL; size_t Treecount = 0; WORD **WordArray = NULL; /* Read the words on stdin into a tree */ if(SUCCESS == Status) { Status = ReadInputToTree(&Words, &Treecount, stdin); } /* Sanity check for no sensible input */ if(SUCCESS == Status) { if(0 == Treecount) { Status = NO_WORDS_ON_INPUT; } } /* allocate a sufficiently large array */ if(SUCCESS == Status) { WordArray = malloc(Treecount * sizeof *WordArray); if(NULL == WordArray) { Status = CANNOT_MALLOC_WORDARRAY; } } /* Walk the tree into the array */ if(SUCCESS == Status) { Status = WalkTree(WordArray, Words); } /* qsort the array */ if(SUCCESS == Status) { qsort(WordArray, Treecount, sizeof *WordArray, CompareCounts); } /* walk down the WordArray outputting the values */ if(SUCCESS == Status) { Status = OutputWords(stdout, Treecount, WordArray); } /* free the word array */ if(NULL != WordArray) { free(WordArray); WordArray = NULL; } /* and free the tree memory */ if(NULL != Words) { FreeTree(Words); Words = NULL; } /* Error report and we are finshed */ if(SUCCESS != Status) { fprintf(stderr, "Program failed with code %d\n", Status); } return (SUCCESS == Status ? EXIT_SUCCESS : EXIT_FAILURE); } void FreeTree(WORD *W) { if(NULL != W) { if(NULL != W->Word) { free(W->Word); W->Word = NULL; } if(NULL != W->Left) { FreeTree(W->Left); W->Left = NULL; } if(NULL != W->Right) { FreeTree(W->Right); W->Right = NULL; } } } int AddToTree(WORD **DestTree, size_t *Treecount, char *Word) { int Status = SUCCESS; int CompResult = 0; /* safety check */ assert(NULL != DestTree); assert(NULL != Treecount); assert(NULL != Word); /* ok, either *DestTree is NULL or it isn't (deep huh?) */ if(NULL == *DestTree) /* this is the place to add it then */ { *DestTree = malloc(sizeof **DestTree); if(NULL == *DestTree) { /* horrible - we're out of memory */ Status = NO_MEMORY_FOR_WORDNODE; } else { (*DestTree)->Left = NULL; (*DestTree)->Right = NULL; (*DestTree)->Count = 1; (*DestTree)->Word = dupstr(Word); if(NULL == (*DestTree)->Word) { /* even more horrible - we've run out of memory in the middle */ Status = NO_MEMORY_FOR_WORD; free(*DestTree); *DestTree = NULL; } else { /* everything was successful, add one to the tree nodes count */ ++*Treecount; } } } else /* we need to make a decision */ { CompResult = strcmp(Word, (*DestTree)->Word); if(0 < CompResult) { Status = AddToTree(&(*DestTree)->Left, Treecount, Word); } else if(0 > CompResult) { Status = AddToTree(&(*DestTree)->Left, Treecount, Word); } else { /* add one to the count - this is the same node */ ++(*DestTree)->Count; } } /* end of else we need to make a decision */ return Status; } int ReadInputToTree(WORD **DestTree, size_t *Treecount, FILE *Input) { int Status = SUCCESS; char Buf[8192] = {0}; char *Word = NULL; /* safety check */ assert(NULL != DestTree); assert(NULL != Treecount); assert(NULL != Input); /* for every line */ while(NULL != fgets(Buf, sizeof Buf, Input)) { /* strtok the input to get only alpha character words */ Word = strtok(Buf, NONALPHA); while(SUCCESS == Status && NULL != Word) { /* deal with this word by adding it to the tree */ Status = AddToTree(DestTree, Treecount, Word); /* next word */ if(SUCCESS == Status) { Word = strtok(NULL, NONALPHA); } } } return Status; } int WalkTree(WORD **DestArray, WORD *Word) { int Status = SUCCESS; static WORD **Write = NULL; /* safety check */ assert(NULL != Word); /* store the starting point if this is the first call */ if(NULL != DestArray) { Write = DestArray; } /* Now add this node and it's kids */ if(NULL != Word) { *Write = Word; ++Write; if(NULL != Word->Left) { Status = WalkTree(NULL, Word->Left); } if(NULL != Word->Right) { Status = WalkTree(NULL, Word->Right); } } return Status; } /* CompareCounts is called by qsort. This means that it gets pointers to the data items being compared. In this case the data items are pointers too. */ int CompareCounts(const void *vWord1, const void *vWord2) { int Result = 0; WORD * const *Word1 = vWord1; WORD * const *Word2 = vWord2; assert(NULL != vWord1); assert(NULL != vWord2); /* ensure the result is either 1, 0 or -1 */ if((*Word1)->Count < (*Word2)->Count) { Result = 1; } else if((*Word1)->Count > (*Word2)->Count) { Result = -1; } else { Result = 0; } return Result; } int OutputWords(FILE *Dest, size_t Count, WORD **WordArray) { int Status = SUCCESS; size_t Pos = 0; /* safety check */ assert(NULL != Dest); assert(NULL != WordArray); /* Print a header */ fprintf(Dest, "Total Words : %lu\n", (unsigned long)Count); /* Print the words in descending order */ while(SUCCESS == Status && Pos < Count) { fprintf(Dest, "%10lu %s\n", (unsigned long)WordArray[Pos]->Count, WordArray[Pos]->Word); ++Pos; } return Status; } /* dupstr: duplicate a string */ char *dupstr(char *s) { char *Result = NULL; size_t slen = 0; /* sanity check */ assert(NULL != s); /* get string length */ slen = strlen(s); /* allocate enough storage */ Result = malloc(slen + 1); /* populate string */ if(NULL != Result) { memcpy(Result, s, slen); *(Result + slen) = '\0'; } return Result; }
Solution by codybartfast
Uses quicksort to sort on frequency (as a result words with the same frequency are not in alphabetic order).
#include <ctype.h> #include <stdio.h> #include <string.h> #include "getword.h" #include "quicksort.h" #define MAXWORD 100 struct wnode { char *key; int count; struct wnode *left; struct wnode *right; }; struct wnode *addword(struct wnode *p, char *key, int *wc); int treetoarray(struct wnode **warray, struct wnode *wnode, int i); int compare(struct wnode *a, struct wnode *b); struct wnode *walloc(void); void freewnode(struct wnode *wnode); char *keyfrom(char *key, char *word); int isnoiseword(char *word); char *strdup(const char *s); int main(void) { char word[MAXWORD], key[MAXWORD]; int i, wordcount = 0; struct wnode **warray, *wnode = NULL; while (getword(&streamin, word, MAXWORD) != EOF) if (!isnoiseword(keyfrom(key, word))) if ((wnode = addword(wnode, key, &wordcount)) == NULL) return 1; warray = (struct wnode **)malloc(wordcount * sizeof(struct wnode *)); treetoarray(warray, wnode, 0); quicksort((void **)warray, 0, wordcount - 1, (VOIDCOMP)compare); for (i = 0; i < wordcount; i++) printf("%4d %s\n", warray[i]->count, warray[i]->key); freewnode(wnode); free(warray); return 0; } struct wnode *addword(struct wnode *p, char *key, int *wc) { int cond; if (p == NULL) { if ((p = walloc()) == NULL) return NULL; p->key = (char *)strdup(key); p->left = p->right = NULL; p->count = 1; (*wc)++; } else if ((cond = strcmp(key, p->key)) == 0) { p->count++; } else if (cond < 0) { if ((p->left = addword(p->left, key, wc)) == NULL) return NULL; } else { if ((p->right = addword(p->right, key, wc)) == NULL) return NULL; } return p; } int treetoarray(struct wnode **warray, struct wnode *wnode, int idx) { if (!wnode) return idx; warray[idx++] = wnode; idx = treetoarray(warray, wnode->left, idx); return treetoarray(warray, wnode->right, idx); } int compare(struct wnode *a, struct wnode *b) { return b->count - a->count; } struct wnode *walloc(void) { return (struct wnode *)malloc(sizeof(struct wnode)); } void freewnode(struct wnode *wnode) { if (wnode == NULL) return; freewnode(wnode->left); freewnode(wnode->right); free(wnode->key); free(wnode); } char *keyfrom(char *key, char *word) { char *k = key; while ((*k++ = tolower(*word++))) ; return key; } int isnoiseword(char *word) { static char *noisewords[] = { "a", "about", "all", "an", "and", "as", "at", "be", "but", "by", "do", "for", "from", "get", "go", "have", "he", "her", "his", "i", "if", "in", "it", "me", "my", "not", "of", "on", "one", "or", "out", "say", "she", "so", "that", "the", "their", "there", "they", "this", "to", "up", "we", "what", "when", "which", "who", "will", "with", "would", "you" }; static int n = (sizeof noisewords / sizeof(char *)); int cond, low, high, mid; low = 0; high = n - 1; while (low <= high) { mid = (low + high) / 2; if ((cond = strcmp(word, noisewords[mid])) < 0) high = mid - 1; else if (cond > 0) low = mid + 1; else return 1; } return 0; } char *strdup(const char *s) { char *p = (char *)malloc(strlen(s) + 1); if (p != NULL) strcpy(p, s); return p; }
Full, latest code on github
Solution by anonymous
Most of the code came from the book and I adapted it to work for this exercise. I used a pointer array of tnodes to keep track of all of the nodes and to sort them. This seemed like the most efficient way to get the job done. I updated the qsort (now called qsortd) and swap functions to process the tnode pointers instead of char array pointers. I also swapped the comparison in the function so it sorts in descending order by default (hence the d in the name). Finally, treeprint was updated to use the sorted tnode pointers for printing.
#include <stdio.h> #include <ctype.h> #include <string.h> #include <stdlib.h> /* Exercise 6-4. Write a program that prints the distinct words in its input sorted into decreasing order of frequency of occurrence. Precede each word by its count. */ struct tnode // the tree node: { char *word; // points to the text int count; // number of occurrences struct tnode *left; // left child struct tnode *right; // right child }; #define MAXWORD 100 #define BUFSIZE 100 #define MAXSUPPORTEDWORDS 10000 struct tnode *addtree(struct tnode *, char *); void treeprint(void); int getword(char *, int); struct tnode *talloc(void); char *mystrdup(char *); int getword(char *word, int lim); int getch(void); void ungetch(int); void qsortd(struct tnode *v[], int left, int right); void swap(struct tnode *v[], int i, int j); char buf[BUFSIZE]; // buffer for ungetch int bufp = 0; // next free position in buf struct tnode *treeNodes[MAXSUPPORTEDWORDS] = { 0 }; // initialized to null int treeP = 0; // next free position in treeNodes // word frequency count int main() { struct tnode *root; char word[MAXWORD]; root = NULL; while (getword(word, MAXWORD) != EOF) if (isalpha(word[0]) || word[0] == '_') root = addtree(root, word); qsortd(treeNodes, 0, treeP - 1); treeprint(); return 0; } // add a node with w, at or below p struct tnode *addtree(struct tnode *p, char *w) { int result; if (p == NULL) // a new word has arrived { p = talloc(); // make a new node p->word = mystrdup(w); p->count = 1; p->left = p->right = NULL; treeNodes[treeP++] = p; } else if ((result = strcmp(w, p->word)) == 0) p->count++; // repeated word else if (result < 0) // less than into left subtree p->left = addtree(p->left, w); else // greater than into right subtree p->right = addtree(p->right, w); return p; } // in-order print of tree p void treeprint(void) { for (int i = 0; i < treeP; i++) printf("%4d %s\n", treeNodes[i]->count, treeNodes[i]->word); } struct tnode *talloc(void) { return (struct tnode *) malloc(sizeof(struct tnode)); } char *mystrdup(char *s) // make a duplicate of s { char *p = (char *) malloc(strlen(s) + 1); // + 1 for '\0' if (p != NULL) strcpy(p, s); return p; } // get next word or character from input int getword(char *word, int lim) { int c; char *w = word; while ((c = tolower(getch())) == '\t' || c == ' ') ; if (c != EOF) *w++ = c; if (!isalpha(c) && c != '_') { *w = '\0'; return c; } for ( ; --lim > 0; w++) if (!isalnum(*w = tolower(getch())) && *w != '_') { ungetch(*w); break; } *w = '\0'; return word[0]; } // get a (possibly pushed back) character int getch(void) { return (bufp > 0) ? buf[--bufp] : getchar(); } // push character back on input void ungetch(int c) { if (bufp >= BUFSIZE) printf("ungetch: too many characters\n"); else buf[bufp++] = c; } // sort v[left]...v[right] into decreasing order void qsortd(struct tnode *v[], int left, int right) { int i, last; if (left >= right) // do nothing if array contains fewer than two elements return; swap(v, left, (left + right) / 2); // move partition element to v[0] last = left; for (i = left + 1; i <= right; i++) // partition if (v[left]->count < v[i]->count) // if v[left] is less than v[i], swap them to put them in order. Note: v[i], v[left] == increasing order and v[left], v[i] == decreasing order swap(v, ++last, i); swap(v, left, last); // restore partition element qsortd(v, left, last - 1); qsortd(v, last + 1, right); } // interchange v[i] and v[j] void swap(struct tnode *v[], int i, int j) { // grab memory address of tnode and then swap where pointers are pointing struct tnode *temp = v[i]; v[i] = v[j]; v[j] = temp; }