Jump to: navigation, search

The C Programming Language, 2nd Edition, by Kernighan and Ritchie
Exercise 6.04 on page 143

Write a program that prints the distinct words in its input sorted into decreasing order of frequency of occurrence. Precede each word by its count.



Solution by Bryan Williams

Bryan's solution is, as far as I can tell, Category 1 only because he uses EXIT_SUCCESS and EXIT_FAILURE .

/*

  Chapter 6. Structures

          Write a program that prints out the distinct words in its 
          input sorted into decreasing order of frequency of occurrence.
          Precede each word by its count.

  Author: Bryan Williams

*/

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#include <assert.h>


typedef struct WORD
{
  char *Word;
  size_t Count;
  struct WORD *Left;
  struct WORD *Right;
} WORD;


/*
  Assumptions: input is on stdin, output to stdout.

  Plan: read the words into a tree, keeping a count of how many we have,
        allocate an array big enough to hold Treecount (WORD *)'s 
        walk the tree to populate the array.
        qsort the array, based on size.
        printf the array
        free the array
        free the tree
        free tibet (optional)
        free international shipping!
*/

#define SUCCESS                      0
#define CANNOT_MALLOC_WORDARRAY      1
#define NO_WORDS_ON_INPUT            2
#define NO_MEMORY_FOR_WORDNODE       3
#define NO_MEMORY_FOR_WORD           4


#define NONALPHA "1234567890 \v\f\n\t\r+=-*/\\,.;:'#~?<>|{}[]`!\"�$%^&()"

int ReadInputToTree(WORD **DestTree, size_t *Treecount, FILE *Input);
int AddToTree(WORD **DestTree, size_t *Treecount, char *Word); 
int WalkTree(WORD **DestArray, WORD *Word);
int CompareCounts(const void *vWord1, const void *vWord2);
int OutputWords(FILE *Dest, size_t Count, WORD **WordArray);
void FreeTree(WORD *W);
char *dupstr(char *s);


int main(void)
{
  int Status = SUCCESS;
  WORD *Words = NULL;
  size_t Treecount = 0;
  WORD **WordArray = NULL;

  /* Read the words on stdin into a tree */
  if(SUCCESS == Status)
  {
    Status = ReadInputToTree(&Words, &Treecount, stdin);
  }

  /* Sanity check for no sensible input */
  if(SUCCESS == Status)
  {
    if(0 == Treecount)
    {
      Status = NO_WORDS_ON_INPUT;
    }
  }

  /* allocate a sufficiently large array */
  if(SUCCESS == Status)
  {
     WordArray = malloc(Treecount * sizeof *WordArray);
     if(NULL == WordArray)
     {
       Status = CANNOT_MALLOC_WORDARRAY;
     } 
  }

  /* Walk the tree into the array */
  if(SUCCESS == Status)
  {
    Status = WalkTree(WordArray, Words);
  }

  /* qsort the array */
  if(SUCCESS == Status)
  {
    qsort(WordArray, Treecount, sizeof *WordArray, CompareCounts);
  }

  /* walk down the WordArray outputting the values */
  if(SUCCESS == Status)
  {
    Status = OutputWords(stdout, Treecount, WordArray);
  }

  /* free the word array */
  if(NULL != WordArray)
  {
    free(WordArray);
    WordArray = NULL;
  }

  /* and free the tree memory */
  if(NULL != Words)
  {
    FreeTree(Words);
    Words = NULL;
  }

  /* Error report and we are finshed */
  if(SUCCESS != Status)
  {
    fprintf(stderr, "Program failed with code %d\n", Status);
  }
  return (SUCCESS == Status ? EXIT_SUCCESS : EXIT_FAILURE);
}




void FreeTree(WORD *W)
{
  if(NULL != W)
  {
    if(NULL != W->Word)
    { 
      free(W->Word);
      W->Word = NULL;
    }
    if(NULL != W->Left)
    {
      FreeTree(W->Left);
      W->Left = NULL;
    }
    if(NULL != W->Right)
    {
      FreeTree(W->Right);
      W->Right = NULL;
    }
  }
}


int AddToTree(WORD **DestTree, size_t *Treecount, char *Word)
{
  int Status = SUCCESS;
  int CompResult = 0;

  /* safety check */
  assert(NULL != DestTree);
  assert(NULL != Treecount);
  assert(NULL != Word);

  /* ok, either *DestTree is NULL or it isn't (deep huh?) */
  if(NULL == *DestTree)  /* this is the place to add it then */
  {
    *DestTree = malloc(sizeof **DestTree);
    if(NULL == *DestTree) 
    {
      /* horrible - we're out of memory */
      Status = NO_MEMORY_FOR_WORDNODE;
    }
    else
    {
      (*DestTree)->Left = NULL;
      (*DestTree)->Right = NULL;
      (*DestTree)->Count = 1;
      (*DestTree)->Word = dupstr(Word);
      if(NULL == (*DestTree)->Word)
      {
        /* even more horrible - we've run out of memory in the middle */
        Status = NO_MEMORY_FOR_WORD; 
        free(*DestTree);
        *DestTree = NULL;
      }
      else
      {
        /* everything was successful, add one to the tree nodes count */
        ++*Treecount;
      }
    }
  }
  else  /* we need to make a decision */
  {
    CompResult = strcmp(Word, (*DestTree)->Word);
    if(0 < CompResult)
    {
      Status = AddToTree(&(*DestTree)->Left, Treecount, Word);
    }
    else if(0 > CompResult)
    {
      Status = AddToTree(&(*DestTree)->Left, Treecount, Word);
    }
    else
    {
      /* add one to the count - this is the same node */
      ++(*DestTree)->Count;
    }
  }  /* end of else we need to make a decision */

  return Status;  
}


int ReadInputToTree(WORD **DestTree, size_t *Treecount, FILE *Input)
{
  int Status = SUCCESS;
  char Buf[8192] = {0};
  char *Word = NULL;

  /* safety check */
  assert(NULL != DestTree);
  assert(NULL != Treecount);
  assert(NULL != Input);

  /* for every line */
  while(NULL != fgets(Buf, sizeof Buf, Input))
  {
    /* strtok the input to get only alpha character words */
    Word = strtok(Buf, NONALPHA);
    while(SUCCESS == Status && NULL != Word)
    {
      /* deal with this word by adding it to the tree */
      Status = AddToTree(DestTree, Treecount, Word); 

      /* next word */
      if(SUCCESS == Status) 
      {
        Word = strtok(NULL, NONALPHA);
      }
    }
  }

  return Status;
}




int WalkTree(WORD **DestArray, WORD *Word)
{
  int Status = SUCCESS;
  static WORD **Write = NULL;
  
  /* safety check */
  assert(NULL != Word);

  /* store the starting point if this is the first call */
  if(NULL != DestArray)
  {
    Write = DestArray;
  }

  /* Now add this node and it's kids */
  if(NULL != Word)
  {
    *Write = Word;
    ++Write;
    if(NULL != Word->Left)
    {
      Status = WalkTree(NULL, Word->Left);
    }
    if(NULL != Word->Right)
    {
      Status = WalkTree(NULL, Word->Right);
    }
  }

  return Status;
}


/*
   CompareCounts is called by qsort. This means that it gets pointers to the 
   data items being compared. In this case the data items are pointers too. 
*/
int CompareCounts(const void *vWord1, const void *vWord2)
{
  int Result = 0;
  WORD * const *Word1 = vWord1;
  WORD * const *Word2 = vWord2;
 
  assert(NULL != vWord1); 
  assert(NULL != vWord2); 

  /* ensure the result is either 1, 0 or -1 */
  if((*Word1)->Count < (*Word2)->Count)
  {
    Result = 1;
  }
  else if((*Word1)->Count > (*Word2)->Count)
  {
    Result = -1;
  }
  else
  {
    Result = 0;
  }

  return Result;
}


int OutputWords(FILE *Dest, size_t Count, WORD **WordArray)
{
  int Status = SUCCESS;
  size_t Pos = 0;
 
  /* safety check */
  assert(NULL != Dest);
  assert(NULL != WordArray);

  /* Print a header */
  fprintf(Dest, "Total Words : %lu\n", (unsigned long)Count); 

  /* Print the words in descending order */
  while(SUCCESS == Status && Pos < Count) 
  {
    fprintf(Dest, "%10lu %s\n", (unsigned long)WordArray[Pos]->Count, WordArray[Pos]->Word);
    ++Pos;
  }

  return Status;
}


/*
    dupstr: duplicate a string
*/
char *dupstr(char *s)
{
  char *Result = NULL;
  size_t slen = 0;
  
  /* sanity check */
  assert(NULL != s);

  /* get string length */
  slen = strlen(s);
  
  /* allocate enough storage */
  Result = malloc(slen + 1);
 
  /* populate string */
  if(NULL != Result)
  {
    memcpy(Result, s, slen);
    *(Result + slen) = '\0';
  }

  return Result;
}


Solution by codybartfast

Category 0

Uses quicksort to sort on frequency (as a result words with the same frequency are not in alphabetic order).

#include <ctype.h>
#include <stdio.h>
#include <string.h>
#include "getword.h"
#include "quicksort.h"

#define MAXWORD 100

struct wnode {
	char *key;
	int count;
	struct wnode *left;
	struct wnode *right;
};
struct wnode *addword(struct wnode *p, char *key, int *wc);
int treetoarray(struct wnode **warray, struct wnode *wnode, int i);
int compare(struct wnode *a, struct wnode *b);
struct wnode *walloc(void);
void freewnode(struct wnode *wnode);
char *keyfrom(char *key, char *word);
int isnoiseword(char *word);
char *strdup(const char *s);

int main(void)
{
	char word[MAXWORD], key[MAXWORD];
	int i, wordcount = 0;
	struct wnode **warray, *wnode = NULL;

	while (getword(&streamin, word, MAXWORD) != EOF)
		if (!isnoiseword(keyfrom(key, word)))
			if ((wnode = addword(wnode, key, &wordcount)) == NULL)
				return 1;

	warray = (struct wnode **)malloc(wordcount * sizeof(struct wnode *));
	treetoarray(warray, wnode, 0);
	quicksort((void **)warray, 0, wordcount - 1, (VOIDCOMP)compare);
	for (i = 0; i < wordcount; i++)
		printf("%4d  %s\n", warray[i]->count, warray[i]->key);

	freewnode(wnode);
	free(warray);
	return 0;
}

struct wnode *addword(struct wnode *p, char *key, int *wc)
{
	int cond;
	if (p == NULL) {
		if ((p = walloc()) == NULL)
			return NULL;
		p->key = (char *)strdup(key);
		p->left = p->right = NULL;
		p->count = 1;
		(*wc)++;
	} else if ((cond = strcmp(key, p->key)) == 0) {
		p->count++;
	} else if (cond < 0) {
		if ((p->left = addword(p->left, key, wc)) == NULL)
			return NULL;
	} else {
		if ((p->right = addword(p->right, key, wc)) == NULL)
			return NULL;
	}
	return p;
}

int treetoarray(struct wnode **warray, struct wnode *wnode, int idx)
{
	if (!wnode)
		return idx;
	warray[idx++] = wnode;
	idx = treetoarray(warray, wnode->left, idx);
	return treetoarray(warray, wnode->right, idx);
}

int compare(struct wnode *a, struct wnode *b)
{
	return b->count - a->count;
}

struct wnode *walloc(void)
{
	return (struct wnode *)malloc(sizeof(struct wnode));
}

void freewnode(struct wnode *wnode)
{
	if (wnode == NULL)
		return;
	freewnode(wnode->left);
	freewnode(wnode->right);
	free(wnode->key);
	free(wnode);
}

char *keyfrom(char *key, char *word)
{
	char *k = key;
	while ((*k++ = tolower(*word++)))
		;
	return key;
}

int isnoiseword(char *word)
{
	static char *noisewords[] = {
		"a",	 "about", "all",   "an",    "and",  "as",   "at",
		"be",	 "but",	  "by",	   "do",    "for",  "from", "get",
		"go",	 "have",  "he",	   "her",   "his",  "i",    "if",
		"in",	 "it",	  "me",	   "my",    "not",  "of",   "on",
		"one",	 "or",	  "out",   "say",   "she",  "so",   "that",
		"the",	 "their", "there", "they",  "this", "to",   "up",
		"we",	 "what",  "when",  "which", "who",  "will", "with",
		"would", "you"
	};
	static int n = (sizeof noisewords / sizeof(char *));

	int cond, low, high, mid;

	low = 0;
	high = n - 1;
	while (low <= high) {
		mid = (low + high) / 2;
		if ((cond = strcmp(word, noisewords[mid])) < 0)
			high = mid - 1;
		else if (cond > 0)
			low = mid + 1;
		else
			return 1;
	}
	return 0;
}

char *strdup(const char *s)
{
	char *p = (char *)malloc(strlen(s) + 1);
	if (p != NULL)
		strcpy(p, s);
	return p;
}

Full, latest code on github

Solution by anonymous

Most of the code came from the book and I adapted it to work for this exercise. I used a pointer array of tnodes to keep track of all of the nodes and to sort them. This seemed like the most efficient way to get the job done. I updated the qsort (now called qsortd) and swap functions to process the tnode pointers instead of char array pointers. I also swapped the comparison in the function so it sorts in descending order by default (hence the d in the name). Finally, treeprint was updated to use the sorted tnode pointers for printing.

#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>

/*
    Exercise 6-4. Write a program that prints the distinct words in its input sorted into decreasing order of frequency of occurrence. Precede each word by its count.
*/

struct tnode // the tree node:
{ 
    char *word;             // points to the text
    int count;              // number of occurrences
    struct tnode *left;     // left child
    struct tnode *right;    // right child
};

#define MAXWORD 100
#define BUFSIZE 100
#define MAXSUPPORTEDWORDS 10000

struct tnode *addtree(struct tnode *, char *);
void treeprint(void);
int getword(char *, int);
struct tnode *talloc(void);
char *mystrdup(char *);
int getword(char *word, int lim);
int getch(void);
void ungetch(int);
void qsortd(struct tnode *v[], int left, int right);
void swap(struct tnode *v[], int i, int j);

char buf[BUFSIZE];  // buffer for ungetch
int bufp = 0;       // next free position in buf
struct tnode *treeNodes[MAXSUPPORTEDWORDS] = { 0 }; // initialized to null
int treeP = 0;      // next free position in treeNodes

// word frequency count
int main()
{
    struct tnode *root;
    char word[MAXWORD];

    root = NULL;
    while (getword(word, MAXWORD) != EOF)
        if (isalpha(word[0]) || word[0] == '_')
            root = addtree(root, word);
    qsortd(treeNodes, 0, treeP - 1);
    treeprint();
    return 0;
}

// add a node with w, at or below p
struct tnode *addtree(struct tnode *p, char *w)
{
    int result;

    if (p == NULL) // a new word has arrived
    {
        p = talloc(); // make a new node
        p->word = mystrdup(w);
        p->count = 1;
        p->left = p->right = NULL;
        treeNodes[treeP++] = p;
    }
    else if ((result = strcmp(w, p->word)) == 0)
        p->count++; // repeated word
    else if (result < 0) // less than into left subtree
        p->left = addtree(p->left, w);
    else // greater than into right subtree
        p->right = addtree(p->right, w);
    return p;
}

// in-order print of tree p
void treeprint(void)
{
    for (int i = 0; i < treeP; i++)
        printf("%4d %s\n", treeNodes[i]->count, treeNodes[i]->word);
}

struct tnode *talloc(void)
{
    return (struct tnode *) malloc(sizeof(struct tnode));
}

char *mystrdup(char *s) // make a duplicate of s
{
    char *p = (char *) malloc(strlen(s) + 1); // + 1 for '\0'
    if (p != NULL)
        strcpy(p, s);
    return p;
}

// get next word or character from input
int getword(char *word, int lim)
{
    int c;
    char *w = word;
    while ((c = tolower(getch())) == '\t' || c == ' ')
        ;
    if (c != EOF)
        *w++ = c;
    if (!isalpha(c) && c != '_')
    {
        *w = '\0';
        return c;
    }
    for ( ; --lim > 0; w++)
        if (!isalnum(*w = tolower(getch())) && *w != '_')
        {
            ungetch(*w);
            break;
        }
    *w = '\0';
    return word[0];
}

// get a (possibly pushed back) character
int getch(void)
{
    return (bufp > 0) ? buf[--bufp] : getchar();
}

// push character back on input
void ungetch(int c)
{
    if (bufp >= BUFSIZE)
        printf("ungetch: too many characters\n");
    else
        buf[bufp++] = c;
}

// sort v[left]...v[right] into decreasing order
void qsortd(struct tnode *v[], int left, int right)
{
    int i, last;
    if (left >= right) // do nothing if array contains fewer than two elements
        return;
    swap(v, left, (left + right) / 2); // move partition element to v[0]
    last = left;
    for (i = left + 1; i <= right; i++) // partition
        if (v[left]->count < v[i]->count) // if v[left] is less than v[i], swap them to put them in order. Note: v[i], v[left] == increasing order and v[left], v[i] == decreasing order
            swap(v, ++last, i);
    swap(v, left, last); // restore partition element
    qsortd(v, left, last - 1);
    qsortd(v, last + 1, right);
}

// interchange v[i] and v[j]
void swap(struct tnode *v[], int i, int j)
{   // grab memory address of tnode and then swap where pointers are pointing
    struct tnode *temp = v[i]; 
    v[i] = v[j];
    v[j] = temp; 
}
Personal tools