The C Programming Language, 2nd Edition, by Kernighan and Ritchie
Exercise 6.04 on page 143
Write a program that prints the distinct words in its input sorted into decreasing order of frequency of occurrence. Precede each word by its count.
Warning: the answers on these pages are crowdsourced, open to anybody to provide/edit after free sign-up, have not all been vetted, might not be correct, let alone optimal, and should not be assumed to be.
Solution by Bryan Williams
Bryan's solution is, as far as I can tell, Category 1 only because
he uses EXIT_SUCCESS
and EXIT_FAILURE
.
/* Chapter 6. Structures Write a program that prints out the distinct words in its input sorted into decreasing order of frequency of occurrence. Precede each word by its count. Author: Bryan Williams */ #include <stdlib.h> #include <stdio.h> #include <string.h> #include <assert.h> typedef struct WORD { char *Word; size_t Count; struct WORD *Left; struct WORD *Right; } WORD; /* Assumptions: input is on stdin, output to stdout. Plan: read the words into a tree, keeping a count of how many we have, allocate an array big enough to hold Treecount (WORD *)'s walk the tree to populate the array. qsort the array, based on size. printf the array free the array free the tree free tibet (optional) free international shipping! */ #define SUCCESS 0 #define CANNOT_MALLOC_WORDARRAY 1 #define NO_WORDS_ON_INPUT 2 #define NO_MEMORY_FOR_WORDNODE 3 #define NO_MEMORY_FOR_WORD 4 #define NONALPHA "1234567890 \v\f\n\t\r+=-*/\\,.;:'#~?<>|{}[]`!\"�$%^&()" int ReadInputToTree(WORD **DestTree, size_t *Treecount, FILE *Input); int AddToTree(WORD **DestTree, size_t *Treecount, char *Word); int WalkTree(WORD **DestArray, WORD *Word); int CompareCounts(const void *vWord1, const void *vWord2); int OutputWords(FILE *Dest, size_t Count, WORD **WordArray); void FreeTree(WORD *W); char *dupstr(char *s); int main(void) { int Status = SUCCESS; WORD *Words = NULL; size_t Treecount = 0; WORD **WordArray = NULL; /* Read the words on stdin into a tree */ if(SUCCESS == Status) { Status = ReadInputToTree(&Words, &Treecount, stdin); } /* Sanity check for no sensible input */ if(SUCCESS == Status) { if(0 == Treecount) { Status = NO_WORDS_ON_INPUT; } } /* allocate a sufficiently large array */ if(SUCCESS == Status) { WordArray = malloc(Treecount * sizeof *WordArray); if(NULL == WordArray) { Status = CANNOT_MALLOC_WORDARRAY; } } /* Walk the tree into the array */ if(SUCCESS == Status) { Status = WalkTree(WordArray, Words); } /* qsort the array */ if(SUCCESS == Status) { qsort(WordArray, Treecount, sizeof *WordArray, CompareCounts); } /* walk down the WordArray outputting the values */ if(SUCCESS == Status) { Status = OutputWords(stdout, Treecount, WordArray); } /* free the word array */ if(NULL != WordArray) { free(WordArray); WordArray = NULL; } /* and free the tree memory */ if(NULL != Words) { FreeTree(Words); Words = NULL; } /* Error report and we are finshed */ if(SUCCESS != Status) { fprintf(stderr, "Program failed with code %d\n", Status); } return (SUCCESS == Status ? EXIT_SUCCESS : EXIT_FAILURE); } void FreeTree(WORD *W) { if(NULL != W) { if(NULL != W->Word) { free(W->Word); W->Word = NULL; } if(NULL != W->Left) { FreeTree(W->Left); W->Left = NULL; } if(NULL != W->Right) { FreeTree(W->Right); W->Right = NULL; } } } int AddToTree(WORD **DestTree, size_t *Treecount, char *Word) { int Status = SUCCESS; int CompResult = 0; /* safety check */ assert(NULL != DestTree); assert(NULL != Treecount); assert(NULL != Word); /* ok, either *DestTree is NULL or it isn't (deep huh?) */ if(NULL == *DestTree) /* this is the place to add it then */ { *DestTree = malloc(sizeof **DestTree); if(NULL == *DestTree) { /* horrible - we're out of memory */ Status = NO_MEMORY_FOR_WORDNODE; } else { (*DestTree)->Left = NULL; (*DestTree)->Right = NULL; (*DestTree)->Count = 1; (*DestTree)->Word = dupstr(Word); if(NULL == (*DestTree)->Word) { /* even more horrible - we've run out of memory in the middle */ Status = NO_MEMORY_FOR_WORD; free(*DestTree); *DestTree = NULL; } else { /* everything was successful, add one to the tree nodes count */ ++*Treecount; } } } else /* we need to make a decision */ { CompResult = strcmp(Word, (*DestTree)->Word); if(0 < CompResult) { Status = AddToTree(&(*DestTree)->Left, Treecount, Word); } else if(0 > CompResult) { Status = AddToTree(&(*DestTree)->Left, Treecount, Word); } else { /* add one to the count - this is the same node */ ++(*DestTree)->Count; } } /* end of else we need to make a decision */ return Status; } int ReadInputToTree(WORD **DestTree, size_t *Treecount, FILE *Input) { int Status = SUCCESS; char Buf[8192] = {0}; char *Word = NULL; /* safety check */ assert(NULL != DestTree); assert(NULL != Treecount); assert(NULL != Input); /* for every line */ while(NULL != fgets(Buf, sizeof Buf, Input)) { /* strtok the input to get only alpha character words */ Word = strtok(Buf, NONALPHA); while(SUCCESS == Status && NULL != Word) { /* deal with this word by adding it to the tree */ Status = AddToTree(DestTree, Treecount, Word); /* next word */ if(SUCCESS == Status) { Word = strtok(NULL, NONALPHA); } } } return Status; } int WalkTree(WORD **DestArray, WORD *Word) { int Status = SUCCESS; static WORD **Write = NULL; /* safety check */ assert(NULL != Word); /* store the starting point if this is the first call */ if(NULL != DestArray) { Write = DestArray; } /* Now add this node and it's kids */ if(NULL != Word) { *Write = Word; ++Write; if(NULL != Word->Left) { Status = WalkTree(NULL, Word->Left); } if(NULL != Word->Right) { Status = WalkTree(NULL, Word->Right); } } return Status; } /* CompareCounts is called by qsort. This means that it gets pointers to the data items being compared. In this case the data items are pointers too. */ int CompareCounts(const void *vWord1, const void *vWord2) { int Result = 0; WORD * const *Word1 = vWord1; WORD * const *Word2 = vWord2; assert(NULL != vWord1); assert(NULL != vWord2); /* ensure the result is either 1, 0 or -1 */ if((*Word1)->Count < (*Word2)->Count) { Result = 1; } else if((*Word1)->Count > (*Word2)->Count) { Result = -1; } else { Result = 0; } return Result; } int OutputWords(FILE *Dest, size_t Count, WORD **WordArray) { int Status = SUCCESS; size_t Pos = 0; /* safety check */ assert(NULL != Dest); assert(NULL != WordArray); /* Print a header */ fprintf(Dest, "Total Words : %lu\n", (unsigned long)Count); /* Print the words in descending order */ while(SUCCESS == Status && Pos < Count) { fprintf(Dest, "%10lu %s\n", (unsigned long)WordArray[Pos]->Count, WordArray[Pos]->Word); ++Pos; } return Status; } /* dupstr: duplicate a string */ char *dupstr(char *s) { char *Result = NULL; size_t slen = 0; /* sanity check */ assert(NULL != s); /* get string length */ slen = strlen(s); /* allocate enough storage */ Result = malloc(slen + 1); /* populate string */ if(NULL != Result) { memcpy(Result, s, slen); *(Result + slen) = '\0'; } return Result; }
Solution by codybartfast
Uses quicksort to sort on frequency (as a result words with the same frequency are not in alphabetic order).
#include <ctype.h> #include <stdio.h> #include <string.h> #include "getword.h" #include "quicksort.h" #define MAXWORD 100 struct wnode { char *key; int count; struct wnode *left; struct wnode *right; }; struct wnode *addword(struct wnode *p, char *key, int *wc); int treetoarray(struct wnode **warray, struct wnode *wnode, int i); int compare(struct wnode *a, struct wnode *b); struct wnode *walloc(void); void freewnode(struct wnode *wnode); char *keyfrom(char *key, char *word); int isnoiseword(char *word); char *strdup(const char *s); int main(void) { char word[MAXWORD], key[MAXWORD]; int i, wordcount = 0; struct wnode **warray, *wnode = NULL; while (getword(&streamin, word, MAXWORD) != EOF) if (!isnoiseword(keyfrom(key, word))) if ((wnode = addword(wnode, key, &wordcount)) == NULL) return 1; warray = (struct wnode **)malloc(wordcount * sizeof(struct wnode *)); treetoarray(warray, wnode, 0); quicksort((void **)warray, 0, wordcount - 1, (VOIDCOMP)compare); for (i = 0; i < wordcount; i++) printf("%4d %s\n", warray[i]->count, warray[i]->key); freewnode(wnode); free(warray); return 0; } struct wnode *addword(struct wnode *p, char *key, int *wc) { int cond; if (p == NULL) { if ((p = walloc()) == NULL) return NULL; p->key = (char *)strdup(key); p->left = p->right = NULL; p->count = 1; (*wc)++; } else if ((cond = strcmp(key, p->key)) == 0) { p->count++; } else if (cond < 0) { if ((p->left = addword(p->left, key, wc)) == NULL) return NULL; } else { if ((p->right = addword(p->right, key, wc)) == NULL) return NULL; } return p; } int treetoarray(struct wnode **warray, struct wnode *wnode, int idx) { if (!wnode) return idx; warray[idx++] = wnode; idx = treetoarray(warray, wnode->left, idx); return treetoarray(warray, wnode->right, idx); } int compare(struct wnode *a, struct wnode *b) { return b->count - a->count; } struct wnode *walloc(void) { return (struct wnode *)malloc(sizeof(struct wnode)); } void freewnode(struct wnode *wnode) { if (wnode == NULL) return; freewnode(wnode->left); freewnode(wnode->right); free(wnode->key); free(wnode); } char *keyfrom(char *key, char *word) { char *k = key; while ((*k++ = tolower(*word++))) ; return key; } int isnoiseword(char *word) { static char *noisewords[] = { "a", "about", "all", "an", "and", "as", "at", "be", "but", "by", "do", "for", "from", "get", "go", "have", "he", "her", "his", "i", "if", "in", "it", "me", "my", "not", "of", "on", "one", "or", "out", "say", "she", "so", "that", "the", "their", "there", "they", "this", "to", "up", "we", "what", "when", "which", "who", "will", "with", "would", "you" }; static int n = (sizeof noisewords / sizeof(char *)); int cond, low, high, mid; low = 0; high = n - 1; while (low <= high) { mid = (low + high) / 2; if ((cond = strcmp(word, noisewords[mid])) < 0) high = mid - 1; else if (cond > 0) low = mid + 1; else return 1; } return 0; } char *strdup(const char *s) { char *p = (char *)malloc(strlen(s) + 1); if (p != NULL) strcpy(p, s); return p; }
Full, latest code on github
Solution by anonymous
Most of the code came from the book and I adapted it to work for this exercise. I used a pointer array of tnodes to keep track of all of the nodes and to sort them. This seemed like the most efficient way to get the job done. I updated the qsort (now called qsortd) and swap functions to process the tnode pointers instead of char array pointers. I also swapped the comparison in the function so it sorts in descending order by default (hence the d in the name). Finally, treeprint was updated to use the sorted tnode pointers for printing.
#include <stdio.h> #include <ctype.h> #include <string.h> #include <stdlib.h> /* Exercise 6-4. Write a program that prints the distinct words in its input sorted into decreasing order of frequency of occurrence. Precede each word by its count. */ struct tnode // the tree node: { char *word; // points to the text int count; // number of occurrences struct tnode *left; // left child struct tnode *right; // right child }; #define MAXWORD 100 #define BUFSIZE 100 #define MAXSUPPORTEDWORDS 10000 struct tnode *addtree(struct tnode *, char *); void treeprint(void); int getword(char *, int); struct tnode *talloc(void); char *mystrdup(char *); int getword(char *word, int lim); int getch(void); void ungetch(int); void qsortd(struct tnode *v[], int left, int right); void swap(struct tnode *v[], int i, int j); char buf[BUFSIZE]; // buffer for ungetch int bufp = 0; // next free position in buf struct tnode *treeNodes[MAXSUPPORTEDWORDS] = { 0 }; // initialized to null int treeP = 0; // next free position in treeNodes // word frequency count int main() { struct tnode *root; char word[MAXWORD]; root = NULL; while (getword(word, MAXWORD) != EOF) if (isalpha(word[0]) || word[0] == '_') root = addtree(root, word); qsortd(treeNodes, 0, treeP - 1); treeprint(); return 0; } // add a node with w, at or below p struct tnode *addtree(struct tnode *p, char *w) { int result; if (p == NULL) // a new word has arrived { p = talloc(); // make a new node p->word = mystrdup(w); p->count = 1; p->left = p->right = NULL; treeNodes[treeP++] = p; } else if ((result = strcmp(w, p->word)) == 0) p->count++; // repeated word else if (result < 0) // less than into left subtree p->left = addtree(p->left, w); else // greater than into right subtree p->right = addtree(p->right, w); return p; } // in-order print of tree p void treeprint(void) { for (int i = 0; i < treeP; i++) printf("%4d %s\n", treeNodes[i]->count, treeNodes[i]->word); } struct tnode *talloc(void) { return (struct tnode *) malloc(sizeof(struct tnode)); } char *mystrdup(char *s) // make a duplicate of s { char *p = (char *) malloc(strlen(s) + 1); // + 1 for '\0' if (p != NULL) strcpy(p, s); return p; } // get next word or character from input int getword(char *word, int lim) { int c; char *w = word; while ((c = tolower(getch())) == '\t' || c == ' ') ; if (c != EOF) *w++ = c; if (!isalpha(c) && c != '_') { *w = '\0'; return c; } for ( ; --lim > 0; w++) if (!isalnum(*w = tolower(getch())) && *w != '_') { ungetch(*w); break; } *w = '\0'; return word[0]; } // get a (possibly pushed back) character int getch(void) { return (bufp > 0) ? buf[--bufp] : getchar(); } // push character back on input void ungetch(int c) { if (bufp >= BUFSIZE) printf("ungetch: too many characters\n"); else buf[bufp++] = c; } // sort v[left]...v[right] into decreasing order void qsortd(struct tnode *v[], int left, int right) { int i, last; if (left >= right) // do nothing if array contains fewer than two elements return; swap(v, left, (left + right) / 2); // move partition element to v[0] last = left; for (i = left + 1; i <= right; i++) // partition if (v[left]->count < v[i]->count) // if v[left] is less than v[i], swap them to put them in order. Note: v[i], v[left] == increasing order and v[left], v[i] == decreasing order swap(v, ++last, i); swap(v, left, last); // restore partition element qsortd(v, left, last - 1); qsortd(v, last + 1, right); } // interchange v[i] and v[j] void swap(struct tnode *v[], int i, int j) { // grab memory address of tnode and then swap where pointers are pointing struct tnode *temp = v[i]; v[i] = v[j]; v[j] = temp; }