Jump to: navigation, search

The C Programming Language, 2nd Edition, by Kernighan and Ritchie
Exercise 5.12 on page 118

Extend entab and detab to accept the shorthand entab -m +n to mean tab stops every n columns, starting at column m. Choose convenient (for the user) default behavior.



Solution by Franz Fritsche

/*
 * entab.c (Exercise 5-12)
 * Author   :   Franz Fritsche
 * About    :   Replaces strings of blanks with the minimum number
 *              of tabs and blanks to achieve the same spacing.
 * Usage    :   entab -m +n
 *              (tab stops every n columns, starting at column m)
 * Date     :   March 23, 2007
 */

#include <stdio.h>
#include <stdlib.h>

#define TABSIZE 4    /* default tab size */

int main(int argc, char *argv[])
{
    int c, pos, spaces, i;
    int tabsize[2] = {TABSIZE, TABSIZE};

    while (--argc > 0 && (**++argv == '-' || **argv == '+'))
        switch (**argv)
        {
            case '-':
                if ((pos = atoi(*argv + 1)) > 0)
                    tabsize[0] = pos;
                break;
            case '+':
                if ((pos = atoi(*argv + 1)) > 0)
                    tabsize[1] = pos;
                break;
        }
    
    pos = 0;
    spaces = 0;
    i = 0;
    while ((c = getchar()) != EOF)
    {
        if (c == ' ')
        {
            if (pos < tabsize[i]-1)
            {
                ++pos;
                ++spaces;
            }
            else
            {
                putchar('\t');
                pos = 0;
                spaces = 0;
                if (i < 1)
                    i++;
            }
        }
        else
        {
            if (c != '\t')
                while (spaces)
                {
                    putchar(' ');
                    --spaces;
                }
                
            putchar(c);
            
            if (c == '\n')
            {
                pos = 0;
                spaces = 0;
                i = 0;
            }
            else if  (c == '\t')
            {
                pos = 0;
                spaces = 0;
                if (i < 1)
                    i++;
            }                
            else /* normal character */
            {
                ++pos;
            
                if (pos == tabsize[i])
                {
                    pos = 0;
                    if (i < 1) 
                        i++;
                }
            }
        }
    }
    
    while (spaces) /* may be left to do */
    {
        putchar(' ');
        --spaces;
    }

    return 0;
}

/*
 * detab.c (Exercise 5-12)
 * Author   :   Franz Fritsche
 * About    :   Replaces tabs in the input with the proper number 
 *              of blanks to space to the next tab stop.
 * Usage    :   detab -m +n
 *              (tab stops every n columns, starting at column m)
 * Date     :   March 23, 2007
 */

#include <stdio.h>
#include <stdlib.h>

#define TABSIZE 4    /* default tab size */

int main(int argc, char *argv[])
{
    int c, pos, i;
    int tabsize[2] = {TABSIZE, TABSIZE};

    while (--argc > 0 && (**++argv == '-' || **argv == '+'))
        switch (**argv)
        {
            case '-':
                if ((pos = atoi(*argv + 1)) > 0)
                    tabsize[0] = pos;
                break;
            case '+':
                if ((pos = atoi(*argv + 1)) > 0)
                    tabsize[1] = pos;
                break;
        }
    
    pos = 0;
    i = 0;
    while ((c = getchar()) != EOF)
    {
        if (c == '\t')
        {
            while (pos < tabsize[i])
            {
                putchar(' ');
                ++pos;
            }
            pos = 0;
            if (i < 1)
                i++;
        }
        else  
        {
            putchar(c);
            
            if (c == '\n')
            {
                pos = 0;
                i = 0;
            }
            else /* normal character */
            {
                ++pos;
            
                if (pos == tabsize[i])
                {
                    pos = 0;
                    if (i < 1)
                        i++;
                }
            }
        }
    }

    return 0;
}


Solution by codybartfast (cat 0)

I guess the question is ambiguous whether we should extend the detab/entab from Chapter 1 or extend the detab/entab from question 5-11. I assumed we should extend 5-11. The following supports both custom tab stops and repeating tab stops. If -m is not specified it repeats tabs starting with the last custom tab or 1 if no custom tabs are specified. E.g.:

detab -59 +10 1 5 9 17 33 65 

produces:

t0  t1  t2      t3              t4                        t5    t6  t7        t8

^   ^   ^       ^               ^                         ^     ^   ^         ^
1   5   9       17              33                        59    65  69        79
c   c   c       c               c                         R     c   R         R

c = custom tab
R = repeating tab

Only tabstops.c is changed from the previous question, detab.c and entab.c are unchanged.

/* detab.c */

#include <stdio.h>
#include "tabstops.h"

int main(int argc, char *argv[])
{
	int col, c, dist;

	parsestops(argc, argv);

	col = 0;
	while ((c = getchar()) != EOF) {
		if (c == '\t') {
			for (dist = dist2stop(col); dist > 0; --dist) {
				putchar(' ');
				++col;
			}
		} else {
			putchar(c);
			if (c == '\n')
				col = 0;
			else if (c == '\b' && col > 0)
				--col;
			else
				++col;
		}
	}
	return 0;
}
/* tabstops.h */

void parsestops(int argc, char *argv[]);
int dist2stop(int col);
int istabstop(int col);
/* tabstops.c */

#include <stdio.h>
#include "tabstops.h"

#define MAXARGS 1024
#define NOTSET -1

int custstops[MAXARGS], ncust;
int firstrepeating = NOTSET;
int repeatsize = 8;

int atoi(char *s);
int max(int, int);

int dist2stop(int col)
{
	int rdist, cdist, i;

	rdist = col < firstrepeating ?
			(firstrepeating - col) :
			repeatsize - ((col - firstrepeating) % repeatsize);

	for (i = 0; i < ncust; i++) {
		if (col < custstops[i]) {
			cdist = custstops[i] - col;
			return rdist < cdist ? rdist : cdist;
		}
	}
	return rdist;
}

int istabstop(int col)
{
	return col > 0 && (dist2stop(col - 1) == 1);
}

void parsestops(int argc, char *argv[])
{
	char *vstr, first;
	int val, i;
	int maxcust = 0;

	if (argc > MAXARGS) {
		printf("error: More than %d arguments!\n", MAXARGS);
		return;
	}
	ncust = 0;
	for (i = 1; i < argc; i++) {
		vstr = argv[i];
		first = *vstr;
		if (first == '-' || first == '+')
			vstr++;
		val = atoi(vstr);
		if (first == '-') {
			firstrepeating = val - 1;
		} else if (first == '+')
			repeatsize = val;
		else
			maxcust = max(maxcust, custstops[ncust++] = val - 1);
	}
	/* if no -m then start repeating from last custom stop */
	firstrepeating = firstrepeating == NOTSET ? maxcust : firstrepeating;
}

int atoi(char *s)
{
	int i, n = 0;

	for (i = 0; s[i] >= '0' && s[i] <= '9'; ++i)
		n = 10 * n + (s[i] - '0');
	return n;
}

int max(int a, int b)
{
	return a >= b ? a : b;
}
/* entab.c */

#include <stdio.h>
#include "tabstops.h"

int main(int argc, char *argv[])
{
	int col, c, nspace;

	parsestops(argc, argv);

	col = nspace = 0;
	while ((c = getchar()) != EOF) {
		if (c == '\b' && col > 0) {
			--col;
			if (nspace > 0)
				--nspace;
			else
				putchar(c);
		} else if (nspace > 0 && istabstop(col)) {
			if (nspace == 1)
				putchar(' ');
			else
				putchar('\t');
			nspace = 0;
		}
		if (c == '\t') {
			putchar(c);
			nspace = 0;
			col = col + dist2stop(col);
		} else if (c == ' ') {
			++col;
			++nspace;
		} else if (c != '\b') {
			for (; 0 < nspace; --nspace)
				putchar(' ');
			putchar(c);
			if (c == '\n')
				col = 0;
			else
				++col;
		}
	}
	return 0;
}

Solution by anonymous

I made my initial while loop overly complicated in both entab and detab because I wanted to practice using pointers. To make up for it I added a lot of comments explaining it, but looking back, it would have been better to write things more clearly.

This solution is an extension of exercise 5-11 so it allows the -m +n shorthand and/or custom tab stops as arguments. Default tab stops are only used if the -m +n shorthand is not in the arguments. They are added to the end of any supplied custom tabs. I decided to be lazy with my logic and write a remove duplicates function to remove the dupes form the sorted tabs entries. I then added the rest of the custom tab stops until the tabs array was full and remove duplicates. Since removing duplicates shortened the array, I put this in a loop until no more duplicates were detected. While it was easier to write, it is not as efficient as it could be.

Like before, my pipes text file came in handy to test my programs as well as the ones on here, so I recommend getting a copy of it from exercise 5-11. Additionally, I explained how these two programs work in more detail on that page.

Finally, Franz Fritsche's entab and detab both didn't work if -m is set to 0. Plus both Franz Fritsche's and codybartfast's entab program added tabs even though I specified the default tab size of 8 and wanted custom tab stops at 4 using -1 +4 and -0 +4. This resulted in incorrect output since my tab stop size in my shell is 8.

Here is my entab

#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>

/*
    Exercise 5-12. Extend entab and detab to accept the shorthand entab -m +n to mean tab stops every n columns, starting at column m. Choose convenient (for the user) default behavior.
*/

#define DEFAULT_TAB_STOP 8 // length of default tab stop. For entab, this needs to be the tab stop size of the shell/tool that displays the contents since tabs are in the output.
#define MAX_TABS 1000      // max supported custom tabs

void shellsort(int v[], int n);
int removeDuplicates(int v[], int n);

int main(int argc, char *argv[])
{   // i is index in tabs, j is the next custom/default tab stop, k is the distance to the natural tab stops to see if a tab will fit
    int c, i = 0, j, k, col = 0, spaces = 0, startCol = -1, customTabStop = -1;
    int tabs[MAX_TABS];
    // This while loop does a lot... It makes sure there is still arguments left by decrementing argc and seeing if it above 0. Next, the first if has *(*++argv)++) which moves the argv
    // pointer to the second argument (the ++argv part). Next, it gets the first char in the argument's string and sets it to c (the *(*...) part). Next, it moves the pointer in the
    // argument's string to the second character updating the string from something like -123 to 123 and something like +456 to 465 (the ++ at the end). This allows atoi to be used on
    // the string later for setting startCol and spacesPerTabStop. Finally, it checks to see if c is equal to - or +. The downside of this code is that the first if cannot be moved or
    // the logic will fail. All of the happens on the first two lines
    while (--argc > 0)
        if ((c = *(*++argv)++) == '-' || c == '+') // if starts with - or +
            if (**argv != '-') // if second char is not -
                if (c == '-') // if started with -
                    startCol = atoi(*argv); // *argv points to argument string without the leading -
                else // if started with +
                    customTabStop = atoi(*argv); // *argv points to argument string without the leading +
            else // second char is -
                return 3; // negative number for startCol/customTabStop not supported
        else if (isdigit(c)) // if starts with number
            if (i >= MAX_TABS) // if spaces not left in tabs
                return 1; // too many custom tab stop arguments
            else // add number to tabs
                tabs[i++] = atoi(--*argv); // note: the -- in --*argv moves argv back one to get whole int since above it was removed. I could concat c and argv, but this is easier
        else // if not digit and did not start with - or +, display usage and exit
        {
            printf("Usage: entab [-m +n] [#] [#] ... [#]\n");
            return 2; // non-valid argument
        }

    int n = 0; // this is used as a multiplier to fill tabs up with the default tab stop and/or customTabStop if no custom tab stops were provided or not enough of them were provided
    if (startCol != -1 && customTabStop != -1) // if - and + were provided as arguments
    {
        if (customTabStop == 0)
        {
            printf("Only custom tab stop lengths of 1 or greater is supported\n");
            return 4;
        }
        int oldSize;
        do
        {
            while (i < MAX_TABS) // adds the rest of the default tabs to tabs array
                tabs[i++] = startCol + n++ * customTabStop;
            shellsort(tabs, i); // puts ints in numerical order
            oldSize = i; // keep track of the old size of tabs in case removeDuplicates shortens it
            i = removeDuplicates(tabs, i); // removes duplicates and updates i to new len of tabs
        } while (oldSize != i); // if the array was shortened, repeat adding tab stops at the end until no more duplicates are added
    }
    else if (startCol != -1 || customTabStop != -1) // if only one was provided, display usage and exit
    {
        printf("Usage: entab [-m +n] [#] [#] ... [#]\n");
        return 2; // non-valid argument
    }
    else // if not using - and + for shorthand custom tab stops
    {
        if (i > 0) // if i > 0, then custom tab stops were provided as an argument
        {
            shellsort(tabs, i); // puts ints in numerical order
            n = tabs[i - 1]; // gets the largest value in tabs
            n += DEFAULT_TAB_STOP - (n % DEFAULT_TAB_STOP); // moves n to the next default tab stop
            n /= DEFAULT_TAB_STOP; // gets the base of the number to use in the next while loop
        }
        while (i < MAX_TABS) // adds the rest of the default tabs to tabs array
            tabs[i++] = n++ * DEFAULT_TAB_STOP; // uses base n to get value of next tab stop after the last largest custom one. Appends to it the end of tabs
    }
    i = 0; // resets index

    while ((c = getchar()) != EOF)
    {
        if (c != ' ') // if the char is not a space, all saved spaces need to be processed before it is printed
        {
            while (spaces > 0) // this allows the below logic to only think about things one iteration at a time
            {
                k = DEFAULT_TAB_STOP - (col % DEFAULT_TAB_STOP); // find the next default tab stop
                while (tabs[i] <= col && i < MAX_TABS) // get the next custom/default tab stop
                    i++;
                if (i < MAX_TABS)                      // but only if not out of bounds of array
                    j = tabs[i] - col;
                if (k <= j && spaces - k >= 0) // if the natural tab is less than the custom tab and there are enough spaces, substitute a tab for the spaces
                {
                    putchar('\t');
                    col += k; // updates col position
                    spaces -= k; // updates spaces used
                }
                else // if natural tab is greater than custom one, fill in the spaces until the custom tab stop is met. Keep track of col position and spaces left
                {
                    while (spaces > 0 && j-- > 0)
                    {
                        putchar(' ');
                        col++;
                        spaces--;
                    }
                }
            }
        }

        switch (c)
        {
        case ' ': // don't print the spaces, but keep track of the number of them (they are processed above)
            spaces++;
            break;
        case '\n': // reset the col and tabs index and print it
            col = 0;
            i = 0;
            putchar(c);
            break;
        case '\t': // find the next custom tab, subtract the number of spaces from it to current col and add that to spaces. These spaces will be processed the next iteration
            while (tabs[i] <= col && i < MAX_TABS)
                i++;
            if (i < MAX_TABS)
                j = tabs[i] - col;
            spaces += j;
            break;
        default: // all other chars are printed and col position is incremented by one
            putchar(c);
            col++;
            break;
        }
    }
    return 0;
}

// sort v[0]...v[n-1] into increasing order
void shellsort(int v[], int n)
{
    int gap, i, j, temp;

    for (gap = n / 2; gap > 0; gap /= 2)
        for (i = gap; i < n; i++)
            for (j = i - gap; j >= 0 && v[j] > v[j + gap]; j -= gap)
            {
                temp = v[j];
                v[j] = v[j + gap];
                v[j + gap] = temp;
            }
}

// removes duplicates from a sorted array. Returns new length
int removeDuplicates(int v[], int n)
{
    int j = 0;
    for (int i = 0; i < n - 1; i++) 
        if (v[i] != v[i + 1]) 
            v[j++] = v[i];
    v[j++] = v[n - 1];
    return j;
}

Here is my detab

#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>

/*
    Exercise 5-12. Extend entab and detab to accept the shorthand entab -m +n to mean tab stops every n columns, starting at column m. Choose convenient (for the user) default behavior.
*/

#define DEFAULT_TAB_STOP 8 // length of default tab stop. For detab, this can be a custom length since no tabs will be inserted to the output
#define MAX_TABS 1000      // max supported custom tabs

void shellsort(int v[], int n);
int removeDuplicates(int v[], int n);

int main(int argc, char *argv[])
{   // i is index in tabs, j is the next custom/default tab stop
    int c, i = 0, j, col = 0, spaces = 0, startCol = -1, customTabStop = -1;
    int tabs[MAX_TABS];
    // This while loop does a lot... It makes sure there is still arguments left by decrementing argc and seeing if it above 0. Next, the first if has *(*++argv)++) which moves the argv
    // pointer to the second argument (the ++argv part). Next, it gets the first char in the argument's string and sets it to c (the *(*...) part). Next, it moves the pointer in the
    // argument's string to the second character updating the string from something like -123 to 123 and something like +456 to 465 (the ++ at the end). This allows atoi to be used on
    // the string later for setting startCol and spacesPerTabStop. Finally, it checks to see if c is equal to - or +. The downside of this code is that the first if cannot be moved or
    // the logic will fail. All of the happens on the first two lines
    while (--argc > 0)
        if ((c = *(*++argv)++) == '-' || c == '+') // if starts with - or +
            if (**argv != '-') // if second char is not -
                if (c == '-') // if started with -
                    startCol = atoi(*argv); // *argv points to argument string without the leading -
                else // if started with +
                    customTabStop = atoi(*argv); // *argv points to argument string without the leading +
            else // second char is -
                return 3; // negative number for startCol/customTabStop not supported
        else if (isdigit(c)) // if starts with number
            if (i >= MAX_TABS) // if spaces not left in tabs
                return 1; // too many custom tab stop arguments
            else // add number to tabs
                tabs[i++] = atoi(--*argv); // note: the -- in --*argv moves argv back one to get whole int since above it was removed. I could concat c and argv, but this is easier
        else // if not digit and did not start with - or +, display usage and exit
        {
            printf("Usage: entab [-m +n] [#] [#] ... [#]\n");
            return 2; // non-valid argument
        }

    int n = 0; // this is used as a multiplier to fill tabs up with the default tab stop and/or customTabStop if no custom tab stops were provided or not enough of them were provided
    if (startCol != -1 && customTabStop != -1) // if - and + were provided as arguments
    {
        if (customTabStop == 0)
        {
            printf("Only custom tab stop lengths of 1 or greater is supported\n");
            return 4;
        }
        int oldSize;
        do
        {
            while (i < MAX_TABS) // adds the rest of the default tabs to tabs array
                tabs[i++] = startCol + n++ * customTabStop;
            shellsort(tabs, i); // puts ints in numerical order
            oldSize = i; // keep track of the old size of tabs in case removeDuplicates shortens it
            i = removeDuplicates(tabs, i); // removes duplicates and updates i to new len of tabs
        } while (oldSize != i); // if the array was shortened, repeat adding tab stops at the end until no more duplicates are added
    }
    else if (startCol != -1 || customTabStop != -1) // if only one was provided, display usage and exit
    {
        printf("Usage: entab [-m +n] [#] [#] ... [#]\n");
        return 2; // non-valid argument
    }
    else // if not using - and + for shorthand custom tab stops
    {
        if (i > 0) // if i > 0, then custom tab stops were provided as an argument
        {
            shellsort(tabs, i); // puts ints in numerical order
            n = tabs[i - 1]; // gets the largest value in tabs
            n += DEFAULT_TAB_STOP - (n % DEFAULT_TAB_STOP); // moves n to the next default tab stop
            n /= DEFAULT_TAB_STOP; // gets the base of the number to use in the next while loop
        }
        while (i < MAX_TABS) // adds the rest of the default tabs to tabs array
            tabs[i++] = n++ * DEFAULT_TAB_STOP; // uses base n to get value of next tab stop after the last largest custom one. Appends to it the end of tabs
    }

    i = 0; // resets index
    while ((c = getchar()) != EOF)
    {
        if (c != ' ') // if the char is not a space, all saved spaces need to be processed before it is printed
        {
            while (spaces > 0) // this allows the below logic to only think about things one iteration at a time
            {
                while (tabs[i] <= col && i < MAX_TABS) // get the next custom/default tab stop
                    i++;
                if (i < MAX_TABS)                      // but only if not out of bounds of array
                    j = tabs[i] - col;
                while (spaces > 0 && j-- > 0) // fill in the spaces until the custom tab stop is met. Keep track of col position and spaces left
                {
                    putchar(' ');
                    col++;
                    spaces--;
                }
            }
        }

        switch (c)
        {
        case ' ': // don't print the spaces, but keep track of the number of them (they are processed above)
            spaces++;
            break;
        case '\n': // reset the col and tabs index and print it
            col = 0;
            i = 0;
            putchar(c);
            break;
        case '\t': // find the next custom tab, subtract the number of spaces from it to current col and add that to spaces. These spaces will be processed the next iteration
            while (tabs[i] <= col && i < MAX_TABS)
                i++;
            if (i < MAX_TABS)
                j = tabs[i] - col;
            spaces += j;
            break;
        default: // all other chars are printed and col position is incremented by one
            putchar(c);
            col++;
            break;
        }
    }
    return 0;
}

// sort v[0]...v[n-1] into increasing order
void shellsort(int v[], int n)
{
    int gap, i, j, temp;

    for (gap = n / 2; gap > 0; gap /= 2)
        for (i = gap; i < n; i++)
            for (j = i - gap; j >= 0 && v[j] > v[j + gap]; j -= gap)
            {
                temp = v[j];
                v[j] = v[j + gap];
                v[j + gap] = temp;
            }
}

// removes duplicates from a sorted array. Returns new length
int removeDuplicates(int v[], int n)
{
    int j = 0;
    for (int i = 0; i < n - 1; i++) 
        if (v[i] != v[i + 1]) 
            v[j++] = v[i];
    v[j++] = v[n - 1];
    return j;
}
Personal tools