`

c - word counter (binary-tree)

 
阅读更多

c - word counter (binary-tree)

 

count ascii word in a file, written by c, using binary-tree,

 

 

code:

    word_counter.c:

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

#define WORD_MAX_LEN 50
#define LAST_LETTER 1
#define LAST_OTHER 0

/**
 * count ascii words
 * @author kuchaguangjie
 * @date 2012-05-09 14:05:21
 * @mail kuchaguangjie@163.com
 */

struct word_count {
	char *word;
	int count;
	struct word_count *left;
	struct word_count *right;
};

/**
 * find word from FILE
 * 
 * state machine of word found: 
 |---------------------------------------|
 |last_char      new_char      word_found|
 |---------------------------------------|
 |letter	     other           Y       |
 |letter         letter          N       | 
 |other          other           N       | 
 |other          letter          N       | 
 |---------------------------------------| 

 letter, include: a-z A-Z,
 other, any other ascii char,

 *
 * @param fp
 * 	pointer to FILE
 * @param word
 * 	the char array to store word
 * @param limit
 * 	max char count in the word
 * 
 * @return
 * 	1 -> not reach EOF, 0 -> EOF,
 * 	if reach EOF, it will put into word the string found, or an empty string,
 */
int getword(FILE *fp, char *word, int limit) {
	int i = 0, last = LAST_OTHER;
	char c;
	while ((c = fgetc(fp)) != EOF) {
		if ((c >= 97 && c <= 122) || (c >= 65 && c <= 90)) { // a - z, A - Z,
			last = LAST_LETTER;
			if (i < WORD_MAX_LEN)
				word[i++] = c;
		} else {
			if (last == LAST_LETTER) {
				word[i] = '\0';
				return 1;
			}
			last = LAST_OTHER;
		}
	}

	// reach EOF
	word[i] = '\0';
	return 0;
}

/**
 * alloc memory for a struct word_count
 */
struct word_count *word_alloc() {
	return (struct word_count *) malloc(sizeof(struct word_count));
}

/**
 * binary-tree add
 * 
 * @param p
 *	pointer to the node on/under which to add/count the word
 * @param word
 * 	the word to add
 * 
 * @return
 * 	the pointer on which the word is add/count++
 */
struct word_count *btree_add(struct word_count *p, char *word) {
	int cmp;

	if (p == NULL) {
		p = word_alloc();
		p->word = strdup(word); // tip: here must duplicate the string, because the original string will change later,
		p->count = 1;
		p->left = p->right = NULL;
	} else if ((cmp = strcmp(word, p->word)) == 0)
		p->count++;
	else if (cmp < 0)
		p->left = btree_add(p->left, word);
	else
		p->right = btree_add(p->right, word);

	return p; // tip: must return the pointer, in case when original pointer is NULL, need to update it,
}

/* print all nodes in order */
void treeprint(struct word_count *p) {
	if (p != NULL) {
		treeprint(p->left);
		printf("%6d, %s\n", p->count, p->word);
		treeprint(p->right);
	}
}

/**
 * do count words from a file
 * 
 * @param fp
 * 	FILE pointer
 * @param counts
 * 	word_count array, which is ordered,
 * @param n
 * 	word counts, equals to word_count array size,
 * 
 * @return
 * 	total different word
 */
void docount(FILE *fp) {
	struct word_count *root = NULL;
	char word[WORD_MAX_LEN + 1];
	int end;
	while ((end = getword(fp, word, WORD_MAX_LEN)) == 1) {
		root = btree_add(root, word);
	}
	// last word
	if (word[0] != '\0') {
		root = btree_add(root, word);
	}
	treeprint(root);
}

int main() {
	char *fpath = "/home/eric/workspace/c_workplace/practise/word_counter.c";
	FILE *fp = fopen(fpath, "r");
	docount(fp);
	return 1;
}
 
分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics