Jump to: navigation, search

The C Programming Language, 2nd Edition, by Kernighan and Ritchie
Exercise 6.04 on page 143

Write a program that prints the distinct words in its input sorted into decreasing order of frequency of occurrence. Precede each word by its count.



Warning: the answers on these pages are crowdsourced, open to anybody to provide/edit after free sign-up, have not all been vetted, might not be correct, let alone optimal, and should not be assumed to be.

Solution by Bryan Williams

Bryan's solution is, as far as I can tell, Category 1 only because he uses EXIT_SUCCESS and EXIT_FAILURE .

/*

  Chapter 6. Structures

          Write a program that prints out the distinct words in its 
          input sorted into decreasing order of frequency of occurrence.
          Precede each word by its count.

  Author: Bryan Williams

*/

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#include <assert.h>


typedef struct WORD
{
  char *Word;
  size_t Count;
  struct WORD *Left;
  struct WORD *Right;
} WORD;


/*
  Assumptions: input is on stdin, output to stdout.

  Plan: read the words into a tree, keeping a count of how many we have,
        allocate an array big enough to hold Treecount (WORD *)'s 
        walk the tree to populate the array.
        qsort the array, based on size.
        printf the array
        free the array
        free the tree
        free tibet (optional)
        free international shipping!
*/

#define SUCCESS                      0
#define CANNOT_MALLOC_WORDARRAY      1
#define NO_WORDS_ON_INPUT            2
#define NO_MEMORY_FOR_WORDNODE       3
#define NO_MEMORY_FOR_WORD           4


#define NONALPHA "1234567890 \v\f\n\t\r+=-*/\\,.;:'#~?<>|{}[]`!\"�$%^&()"

int ReadInputToTree(WORD **DestTree, size_t *Treecount, FILE *Input);
int AddToTree(WORD **DestTree, size_t *Treecount, char *Word); 
int WalkTree(WORD **DestArray, WORD *Word);
int CompareCounts(const void *vWord1, const void *vWord2);
int OutputWords(FILE *Dest, size_t Count, WORD **WordArray);
void FreeTree(WORD *W);
char *dupstr(char *s);


int main(void)
{
  int Status = SUCCESS;
  WORD *Words = NULL;
  size_t Treecount = 0;
  WORD **WordArray = NULL;

  /* Read the words on stdin into a tree */
  if(SUCCESS == Status)
  {
    Status = ReadInputToTree(&Words, &Treecount, stdin);
  }

  /* Sanity check for no sensible input */
  if(SUCCESS == Status)
  {
    if(0 == Treecount)
    {
      Status = NO_WORDS_ON_INPUT;
    }
  }

  /* allocate a sufficiently large array */
  if(SUCCESS == Status)
  {
     WordArray = malloc(Treecount * sizeof *WordArray);
     if(NULL == WordArray)
     {
       Status = CANNOT_MALLOC_WORDARRAY;
     } 
  }

  /* Walk the tree into the array */
  if(SUCCESS == Status)
  {
    Status = WalkTree(WordArray, Words);
  }

  /* qsort the array */
  if(SUCCESS == Status)
  {
    qsort(WordArray, Treecount, sizeof *WordArray, CompareCounts);
  }

  /* walk down the WordArray outputting the values */
  if(SUCCESS == Status)
  {
    Status = OutputWords(stdout, Treecount, WordArray);
  }

  /* free the word array */
  if(NULL != WordArray)
  {
    free(WordArray);
    WordArray = NULL;
  }

  /* and free the tree memory */
  if(NULL != Words)
  {
    FreeTree(Words);
    Words = NULL;
  }

  /* Error report and we are finshed */
  if(SUCCESS != Status)
  {
    fprintf(stderr, "Program failed with code %d\n", Status);
  }
  return (SUCCESS == Status ? EXIT_SUCCESS : EXIT_FAILURE);
}




void FreeTree(WORD *W)
{
  if(NULL != W)
  {
    if(NULL != W->Word)
    { 
      free(W->Word);
      W->Word = NULL;
    }
    if(NULL != W->Left)
    {
      FreeTree(W->Left);
      W->Left = NULL;
    }
    if(NULL != W->Right)
    {
      FreeTree(W->Right);
      W->Right = NULL;
    }
  }
}


int AddToTree(WORD **DestTree, size_t *Treecount, char *Word)
{
  int Status = SUCCESS;
  int CompResult = 0;

  /* safety check */
  assert(NULL != DestTree);
  assert(NULL != Treecount);
  assert(NULL != Word);

  /* ok, either *DestTree is NULL or it isn't (deep huh?) */
  if(NULL == *DestTree)  /* this is the place to add it then */
  {
    *DestTree = malloc(sizeof **DestTree);
    if(NULL == *DestTree) 
    {
      /* horrible - we're out of memory */
      Status = NO_MEMORY_FOR_WORDNODE;
    }
    else
    {
      (*DestTree)->Left = NULL;
      (*DestTree)->Right = NULL;
      (*DestTree)->Count = 1;
      (*DestTree)->Word = dupstr(Word);
      if(NULL == (*DestTree)->Word)
      {
        /* even more horrible - we've run out of memory in the middle */
        Status = NO_MEMORY_FOR_WORD; 
        free(*DestTree);
        *DestTree = NULL;
      }
      else
      {
        /* everything was successful, add one to the tree nodes count */
        ++*Treecount;
      }
    }
  }
  else  /* we need to make a decision */
  {
    CompResult = strcmp(Word, (*DestTree)->Word);
    if(0 < CompResult)
    {
      Status = AddToTree(&(*DestTree)->Left, Treecount, Word);
    }
    else if(0 > CompResult)
    {
      Status = AddToTree(&(*DestTree)->Left, Treecount, Word);
    }
    else
    {
      /* add one to the count - this is the same node */
      ++(*DestTree)->Count;
    }
  }  /* end of else we need to make a decision */

  return Status;  
}


int ReadInputToTree(WORD **DestTree, size_t *Treecount, FILE *Input)
{
  int Status = SUCCESS;
  char Buf[8192] = {0};
  char *Word = NULL;

  /* safety check */
  assert(NULL != DestTree);
  assert(NULL != Treecount);
  assert(NULL != Input);

  /* for every line */
  while(NULL != fgets(Buf, sizeof Buf, Input))
  {
    /* strtok the input to get only alpha character words */
    Word = strtok(Buf, NONALPHA);
    while(SUCCESS == Status && NULL != Word)
    {
      /* deal with this word by adding it to the tree */
      Status = AddToTree(DestTree, Treecount, Word); 

      /* next word */
      if(SUCCESS == Status) 
      {
        Word = strtok(NULL, NONALPHA);
      }
    }
  }

  return Status;
}




int WalkTree(WORD **DestArray, WORD *Word)
{
  int Status = SUCCESS;
  static WORD **Write = NULL;
  
  /* safety check */
  assert(NULL != Word);

  /* store the starting point if this is the first call */
  if(NULL != DestArray)
  {
    Write = DestArray;
  }

  /* Now add this node and it's kids */
  if(NULL != Word)
  {
    *Write = Word;
    ++Write;
    if(NULL != Word->Left)
    {
      Status = WalkTree(NULL, Word->Left);
    }
    if(NULL != Word->Right)
    {
      Status = WalkTree(NULL, Word->Right);
    }
  }

  return Status;
}


/*
   CompareCounts is called by qsort. This means that it gets pointers to the 
   data items being compared. In this case the data items are pointers too. 
*/
int CompareCounts(const void *vWord1, const void *vWord2)
{
  int Result = 0;
  WORD * const *Word1 = vWord1;
  WORD * const *Word2 = vWord2;
 
  assert(NULL != vWord1); 
  assert(NULL != vWord2); 

  /* ensure the result is either 1, 0 or -1 */
  if((*Word1)->Count < (*Word2)->Count)
  {
    Result = 1;
  }
  else if((*Word1)->Count > (*Word2)->Count)
  {
    Result = -1;
  }
  else
  {
    Result = 0;
  }

  return Result;
}


int OutputWords(FILE *Dest, size_t Count, WORD **WordArray)
{
  int Status = SUCCESS;
  size_t Pos = 0;
 
  /* safety check */
  assert(NULL != Dest);
  assert(NULL != WordArray);

  /* Print a header */
  fprintf(Dest, "Total Words : %lu\n", (unsigned long)Count); 

  /* Print the words in descending order */
  while(SUCCESS == Status && Pos < Count) 
  {
    fprintf(Dest, "%10lu %s\n", (unsigned long)WordArray[Pos]->Count, WordArray[Pos]->Word);
    ++Pos;
  }

  return Status;
}


/*
    dupstr: duplicate a string
*/
char *dupstr(char *s)
{
  char *Result = NULL;
  size_t slen = 0;
  
  /* sanity check */
  assert(NULL != s);

  /* get string length */
  slen = strlen(s);
  
  /* allocate enough storage */
  Result = malloc(slen + 1);
 
  /* populate string */
  if(NULL != Result)
  {
    memcpy(Result, s, slen);
    *(Result + slen) = '\0';
  }

  return Result;
}


Solution by codybartfast

Category 0

Uses quicksort to sort on frequency (as a result words with the same frequency are not in alphabetic order).

#include <ctype.h>
#include <stdio.h>
#include <string.h>
#include "getword.h"
#include "quicksort.h"

#define MAXWORD 100

struct wnode {
	char *key;
	int count;
	struct wnode *left;
	struct wnode *right;
};
struct wnode *addword(struct wnode *p, char *key, int *wc);
int treetoarray(struct wnode **warray, struct wnode *wnode, int i);
int compare(struct wnode *a, struct wnode *b);
struct wnode *walloc(void);
void freewnode(struct wnode *wnode);
char *keyfrom(char *key, char *word);
int isnoiseword(char *word);
char *strdup(const char *s);

int main(void)
{
	char word[MAXWORD], key[MAXWORD];
	int i, wordcount = 0;
	struct wnode **warray, *wnode = NULL;

	while (getword(&streamin, word, MAXWORD) != EOF)
		if (!isnoiseword(keyfrom(key, word)))
			if ((wnode = addword(wnode, key, &wordcount)) == NULL)
				return 1;

	warray = (struct wnode **)malloc(wordcount * sizeof(struct wnode *));
	treetoarray(warray, wnode, 0);
	quicksort((void **)warray, 0, wordcount - 1, (VOIDCOMP)compare);
	for (i = 0; i < wordcount; i++)
		printf("%4d  %s\n", warray[i]->count, warray[i]->key);

	freewnode(wnode);
	free(warray);
	return 0;
}

struct wnode *addword(struct wnode *p, char *key, int *wc)
{
	int cond;
	if (p == NULL) {
		if ((p = walloc()) == NULL)
			return NULL;
		p->key = (char *)strdup(key);
		p->left = p->right = NULL;
		p->count = 1;
		(*wc)++;
	} else if ((cond = strcmp(key, p->key)) == 0) {
		p->count++;
	} else if (cond < 0) {
		if ((p->left = addword(p->left, key, wc)) == NULL)
			return NULL;
	} else {
		if ((p->right = addword(p->right, key, wc)) == NULL)
			return NULL;
	}
	return p;
}

int treetoarray(struct wnode **warray, struct wnode *wnode, int idx)
{
	if (!wnode)
		return idx;
	warray[idx++] = wnode;
	idx = treetoarray(warray, wnode->left, idx);
	return treetoarray(warray, wnode->right, idx);
}

int compare(struct wnode *a, struct wnode *b)
{
	return b->count - a->count;
}

struct wnode *walloc(void)
{
	return (struct wnode *)malloc(sizeof(struct wnode));
}

void freewnode(struct wnode *wnode)
{
	if (wnode == NULL)
		return;
	freewnode(wnode->left);
	freewnode(wnode->right);
	free(wnode->key);
	free(wnode);
}

char *keyfrom(char *key, char *word)
{
	char *k = key;
	while ((*k++ = tolower(*word++)))
		;
	return key;
}

int isnoiseword(char *word)
{
	static char *noisewords[] = {
		"a",	 "about", "all",   "an",    "and",  "as",   "at",
		"be",	 "but",	  "by",	   "do",    "for",  "from", "get",
		"go",	 "have",  "he",	   "her",   "his",  "i",    "if",
		"in",	 "it",	  "me",	   "my",    "not",  "of",   "on",
		"one",	 "or",	  "out",   "say",   "she",  "so",   "that",
		"the",	 "their", "there", "they",  "this", "to",   "up",
		"we",	 "what",  "when",  "which", "who",  "will", "with",
		"would", "you"
	};
	static int n = (sizeof noisewords / sizeof(char *));

	int cond, low, high, mid;

	low = 0;
	high = n - 1;
	while (low <= high) {
		mid = (low + high) / 2;
		if ((cond = strcmp(word, noisewords[mid])) < 0)
			high = mid - 1;
		else if (cond > 0)
			low = mid + 1;
		else
			return 1;
	}
	return 0;
}

char *strdup(const char *s)
{
	char *p = (char *)malloc(strlen(s) + 1);
	if (p != NULL)
		strcpy(p, s);
	return p;
}

Full, latest code on github

Solution by anonymous

Most of the code came from the book and I adapted it to work for this exercise. I used a pointer array of tnodes to keep track of all of the nodes and to sort them. This seemed like the most efficient way to get the job done. I updated the qsort (now called qsortd) and swap functions to process the tnode pointers instead of char array pointers. I also swapped the comparison in the function so it sorts in descending order by default (hence the d in the name). Finally, treeprint was updated to use the sorted tnode pointers for printing.

#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>

/*
    Exercise 6-4. Write a program that prints the distinct words in its input sorted into decreasing order of frequency of occurrence. Precede each word by its count.
*/

struct tnode // the tree node:
{ 
    char *word;             // points to the text
    int count;              // number of occurrences
    struct tnode *left;     // left child
    struct tnode *right;    // right child
};

#define MAXWORD 100
#define BUFSIZE 100
#define MAXSUPPORTEDWORDS 10000

struct tnode *addtree(struct tnode *, char *);
void treeprint(void);
int getword(char *, int);
struct tnode *talloc(void);
char *mystrdup(char *);
int getword(char *word, int lim);
int getch(void);
void ungetch(int);
void qsortd(struct tnode *v[], int left, int right);
void swap(struct tnode *v[], int i, int j);

char buf[BUFSIZE];  // buffer for ungetch
int bufp = 0;       // next free position in buf
struct tnode *treeNodes[MAXSUPPORTEDWORDS] = { 0 }; // initialized to null
int treeP = 0;      // next free position in treeNodes

// word frequency count
int main()
{
    struct tnode *root;
    char word[MAXWORD];

    root = NULL;
    while (getword(word, MAXWORD) != EOF)
        if (isalpha(word[0]) || word[0] == '_')
            root = addtree(root, word);
    qsortd(treeNodes, 0, treeP - 1);
    treeprint();
    return 0;
}

// add a node with w, at or below p
struct tnode *addtree(struct tnode *p, char *w)
{
    int result;

    if (p == NULL) // a new word has arrived
    {
        p = talloc(); // make a new node
        p->word = mystrdup(w);
        p->count = 1;
        p->left = p->right = NULL;
        treeNodes[treeP++] = p;
    }
    else if ((result = strcmp(w, p->word)) == 0)
        p->count++; // repeated word
    else if (result < 0) // less than into left subtree
        p->left = addtree(p->left, w);
    else // greater than into right subtree
        p->right = addtree(p->right, w);
    return p;
}

// in-order print of tree p
void treeprint(void)
{
    for (int i = 0; i < treeP; i++)
        printf("%4d %s\n", treeNodes[i]->count, treeNodes[i]->word);
}

struct tnode *talloc(void)
{
    return (struct tnode *) malloc(sizeof(struct tnode));
}

char *mystrdup(char *s) // make a duplicate of s
{
    char *p = (char *) malloc(strlen(s) + 1); // + 1 for '\0'
    if (p != NULL)
        strcpy(p, s);
    return p;
}

// get next word or character from input
int getword(char *word, int lim)
{
    int c;
    char *w = word;
    while ((c = tolower(getch())) == '\t' || c == ' ')
        ;
    if (c != EOF)
        *w++ = c;
    if (!isalpha(c) && c != '_')
    {
        *w = '\0';
        return c;
    }
    for ( ; --lim > 0; w++)
        if (!isalnum(*w = tolower(getch())) && *w != '_')
        {
            ungetch(*w);
            break;
        }
    *w = '\0';
    return word[0];
}

// get a (possibly pushed back) character
int getch(void)
{
    return (bufp > 0) ? buf[--bufp] : getchar();
}

// push character back on input
void ungetch(int c)
{
    if (bufp >= BUFSIZE)
        printf("ungetch: too many characters\n");
    else
        buf[bufp++] = c;
}

// sort v[left]...v[right] into decreasing order
void qsortd(struct tnode *v[], int left, int right)
{
    int i, last;
    if (left >= right) // do nothing if array contains fewer than two elements
        return;
    swap(v, left, (left + right) / 2); // move partition element to v[0]
    last = left;
    for (i = left + 1; i <= right; i++) // partition
        if (v[left]->count < v[i]->count) // if v[left] is less than v[i], swap them to put them in order. Note: v[i], v[left] == increasing order and v[left], v[i] == decreasing order
            swap(v, ++last, i);
    swap(v, left, last); // restore partition element
    qsortd(v, left, last - 1);
    qsortd(v, last + 1, right);
}

// interchange v[i] and v[j]
void swap(struct tnode *v[], int i, int j)
{   // grab memory address of tnode and then swap where pointers are pointing
    struct tnode *temp = v[i]; 
    v[i] = v[j];
    v[j] = temp; 
}
Personal tools