mirror of https://github.com/TheAlgorithms/C
feat: Added a program that prints words in alphabetical order using binary tree data structure (#841)
* frequencies of words started * A program to Print words contained in a file in alphabetical order * Apply suggestions from code review Co-authored-by: David Leal <halfpacho@gmail.com> * appropriate comments added as suggested from code review * comments cleaned up * updating DIRECTORY.md * Apply suggestions from code review Co-authored-by: David Leal <halfpacho@gmail.com> * commenting re-worded * Add link to algorithm * Update data_structures/binary_trees/words_alphabetical.c Co-authored-by: David Leal <halfpacho@gmail.com> * Update data_structures/binary_trees/words_alphabetical.c Co-authored-by: David Leal <halfpacho@gmail.com> * main function moved to the bottom and function prototypes deleted * uint64_t data type used for non negative values * uint8_t used * all int types fixed * detailed explanation of algorithm added * Update data_structures/binary_trees/words_alphabetical.c Co-authored-by: David Leal <halfpacho@gmail.com> * Update data_structures/binary_trees/words_alphabetical.c Co-authored-by: David Leal <halfpacho@gmail.com> * Update data_structures/binary_trees/words_alphabetical.c Co-authored-by: David Leal <halfpacho@gmail.com> * tests and documentation added * Update data_structures/binary_trees/words_alphabetical.c Co-authored-by: David Leal <halfpacho@gmail.com> * Update data_structures/binary_trees/words_alphabetical.c Co-authored-by: David Leal <halfpacho@gmail.com> * Update data_structures/binary_trees/words_alphabetical.c Co-authored-by: David Leal <halfpacho@gmail.com> * Update data_structures/binary_trees/words_alphabetical.c Co-authored-by: David Leal <halfpacho@gmail.com> * Update data_structures/binary_trees/words_alphabetical.c Co-authored-by: David Leal <halfpacho@gmail.com> * Update data_structures/binary_trees/words_alphabetical.c Co-authored-by: David Leal <halfpacho@gmail.com> * Update data_structures/binary_trees/words_alphabetical.c Co-authored-by: David Leal <halfpacho@gmail.com> * Update data_structures/binary_trees/words_alphabetical.c Co-authored-by: David Leal <halfpacho@gmail.com> * Update data_structures/binary_trees/words_alphabetical.c Co-authored-by: David Leal <halfpacho@gmail.com> * Update data_structures/binary_trees/words_alphabetical.c Co-authored-by: David Leal <halfpacho@gmail.com> * Update data_structures/binary_trees/words_alphabetical.c Co-authored-by: David Leal <halfpacho@gmail.com> * Update data_structures/binary_trees/words_alphabetical.c Co-authored-by: David Leal <halfpacho@gmail.com> * Update data_structures/binary_trees/words_alphabetical.c Co-authored-by: David Leal <halfpacho@gmail.com> * documentation added * Update data_structures/binary_trees/words_alphabetical.c Co-authored-by: David Leal <halfpacho@gmail.com> * Update data_structures/binary_trees/words_alphabetical.c Co-authored-by: David Leal <halfpacho@gmail.com> * Update data_structures/binary_trees/words_alphabetical.c Co-authored-by: David Leal <halfpacho@gmail.com> * Update data_structures/binary_trees/words_alphabetical.c Co-authored-by: David Leal <halfpacho@gmail.com> * Update data_structures/binary_trees/words_alphabetical.c Co-authored-by: David Leal <halfpacho@gmail.com> * Update data_structures/binary_trees/words_alphabetical.c Co-authored-by: David Leal <halfpacho@gmail.com> * Update data_structures/binary_trees/words_alphabetical.c Co-authored-by: David Leal <halfpacho@gmail.com> * Update data_structures/binary_trees/words_alphabetical.c Co-authored-by: David Leal <halfpacho@gmail.com> * Update data_structures/binary_trees/words_alphabetical.c Co-authored-by: David Leal <halfpacho@gmail.com> * Update data_structures/binary_trees/words_alphabetical.c Co-authored-by: David Leal <halfpacho@gmail.com> * Update data_structures/binary_trees/words_alphabetical.c Co-authored-by: David Leal <halfpacho@gmail.com> * Update data_structures/binary_trees/words_alphabetical.c Co-authored-by: David Leal <halfpacho@gmail.com> * Apply suggestions from code review Co-authored-by: David Leal <halfpacho@gmail.com> * documentation added * Apply suggestions from code review Co-authored-by: David Leal <halfpacho@gmail.com> * Add documentation Co-authored-by: David Leal <halfpacho@gmail.com> Co-authored-by: github-actions <${GITHUB_ACTOR}@users.noreply.github.com>
This commit is contained in:
parent
580bd405ea
commit
1b30b89083
|
@ -38,6 +38,7 @@
|
|||
* [Red Black Tree](https://github.com/TheAlgorithms/C/blob/master/data_structures/binary_trees/red_black_tree.c)
|
||||
* [Segment Tree](https://github.com/TheAlgorithms/C/blob/master/data_structures/binary_trees/segment_tree.c)
|
||||
* [Threaded Binary Trees](https://github.com/TheAlgorithms/C/blob/master/data_structures/binary_trees/threaded_binary_trees.c)
|
||||
* [Words Alphabetical](https://github.com/TheAlgorithms/C/blob/master/data_structures/binary_trees/words_alphabetical.c)
|
||||
* Dictionary
|
||||
* [Dict](https://github.com/TheAlgorithms/C/blob/master/data_structures/dictionary/dict.c)
|
||||
* [Dict](https://github.com/TheAlgorithms/C/blob/master/data_structures/dictionary/dict.h)
|
||||
|
|
|
@ -0,0 +1,316 @@
|
|||
/**
|
||||
* @file
|
||||
* @brief Printing the [words contained in a
|
||||
* file](http://www.dailyfreecode.com/Code/word-list-reads-text-file-makes-2050.aspx)
|
||||
* named `file.txt` in alphabetical order and also their frequencies in to
|
||||
* another file "wordcount.txt"
|
||||
* @details
|
||||
* Given a file (`file.txt`) containing words (like a publication or a novel),
|
||||
* where words are separated by a space, newline, or underscore.
|
||||
* This program prints (writes or outputs) to another file (`wordcount.txt`),
|
||||
* the individual words contained in 'file.txt' with their frequencies (number
|
||||
* of occurences) each on a newline and in alphabetical order. This program uses
|
||||
* the binary tree data structure to accomplish this task.
|
||||
* @author [Randy Kwalar](https://github.com/RandyKdev)
|
||||
*/
|
||||
|
||||
#include <assert.h> /// for assert
|
||||
#include <ctype.h> /// for type checks
|
||||
#include <inttypes.h> /// for uint64_t based types, int64_t based types
|
||||
#include <stdbool.h> /// for boolean data type
|
||||
#include <stdio.h> /// for IO operations
|
||||
#include <stdlib.h> /// for memory allocation
|
||||
#include <string.h> /// for string operations
|
||||
|
||||
/**
|
||||
* @brief structure defining a node in the binary tree
|
||||
*/
|
||||
struct Node
|
||||
{
|
||||
char *word; ///< the word (value) of the node
|
||||
uint64_t frequency; ///< number of occurences of the word
|
||||
struct Node *left; ///< pointer to the left child node
|
||||
struct Node *right; ///< pointer to the right child node
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Ends program due to an error
|
||||
* @param errorMessage the error message to be printed
|
||||
* @returns void
|
||||
*/
|
||||
void endProgramAbruptly(char *errorMessage)
|
||||
{
|
||||
fprintf(stderr, "%s\n", errorMessage);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Frees memory when program is terminating
|
||||
* @param node pointer to current node
|
||||
* @returns void
|
||||
*/
|
||||
void freeTreeMemory(struct Node *node)
|
||||
{
|
||||
if (node != NULL)
|
||||
{
|
||||
freeTreeMemory(node->left);
|
||||
freeTreeMemory(node->right);
|
||||
free(node->word); // freeing node->word because memory was allocated
|
||||
// using malloc
|
||||
free(node); // freeing node because memory was allocated using malloc
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Stores word in memory
|
||||
* @param word word to be stored in memory
|
||||
* @returns a pointer to the newly allocated word if the word IS stored successfully
|
||||
* @returns `NULL` if the word is NOT stored
|
||||
*/
|
||||
char *getPointerToWord(char *word)
|
||||
{
|
||||
char *string =
|
||||
(char *)malloc((strlen(word) + 1) * sizeof(char)); ///< pointer to string
|
||||
// + 1 is for the '\0' character
|
||||
if (string != NULL)
|
||||
{
|
||||
strcpy(string, word);
|
||||
return string;
|
||||
}
|
||||
endProgramAbruptly(
|
||||
"\nA problem occurred while reserving memory for the word\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Closes the file after reading or writing
|
||||
* @param file pointer to the file to be closed
|
||||
* @returns void
|
||||
*/
|
||||
void closeFile(FILE *file)
|
||||
{
|
||||
if (fclose(file)) {
|
||||
endProgramAbruptly("\nA Problem Occurred while closing a file\n");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Reserves memory for new node
|
||||
* @returns a pointer to the newly allocated node if memory IS successfully reserved
|
||||
* @returns `NULL` if memory is NOT reserved
|
||||
*/
|
||||
struct Node *allocateMemoryForNode()
|
||||
{
|
||||
struct Node *node =
|
||||
(struct Node *)malloc(sizeof(struct Node)); ///< pointer to the node
|
||||
if (node != NULL)
|
||||
{
|
||||
return node;
|
||||
}
|
||||
endProgramAbruptly(
|
||||
"\nA problem occurred while reserving memory for the structure\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Writes contents of tree to another file alphabetically
|
||||
* @param node pointer to current node
|
||||
* @param file pointer to file
|
||||
* @returns void
|
||||
*/
|
||||
void writeContentOfTreeToFile(struct Node *node, FILE *file)
|
||||
{
|
||||
static uint64_t i = 1; ///< for word numbering in the write file
|
||||
if (node != NULL) // checks if the node is valid
|
||||
{
|
||||
writeContentOfTreeToFile(
|
||||
node->left,
|
||||
file); // calls `writeContentOfTreeToFile` for left sub tree
|
||||
fprintf(file, "%-5lu \t %-9lu \t %s \n", i++, node->frequency,
|
||||
node->word); // prints the word number, word frequency and word
|
||||
// in tabular format to the file
|
||||
writeContentOfTreeToFile(
|
||||
node->right,
|
||||
file); // calls `writeContentOfTreeToFile` for right sub tree
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Adds word (node) to the correct position in tree
|
||||
* @param word word to be inserted in to the tree
|
||||
* @param currentNode node which is being compared
|
||||
* @returns a pointer to the root node
|
||||
*/
|
||||
struct Node *addWordToTree(char *word, struct Node *currentNode)
|
||||
{
|
||||
if (currentNode == NULL) // checks if `currentNode` is `NULL`
|
||||
{
|
||||
struct Node *currentNode =
|
||||
allocateMemoryForNode(); // allocates memory for new node
|
||||
currentNode->word = getPointerToWord(word); // stores `word` in memory
|
||||
currentNode->frequency = 1; // initializes the word frequency to 1
|
||||
currentNode->left = NULL; // sets left node to `NULL`
|
||||
currentNode->right = NULL; // sets right node to `NULL`
|
||||
return currentNode; // returns pointer to newly created node
|
||||
}
|
||||
|
||||
int64_t compared = strcmp(word, currentNode->word); ///< holds compare state
|
||||
|
||||
if (compared > 0) {
|
||||
currentNode->right = addWordToTree(word,
|
||||
currentNode->right); // adds `word` to right sub tree if `word` is
|
||||
// alphabetically greater than `currentNode->word`
|
||||
}
|
||||
else if (compared < 0) {
|
||||
currentNode->left = addWordToTree(word,
|
||||
currentNode->left); // adds `word` to left sub tree if `word` is
|
||||
// alphabetically less than `currentNode->word`
|
||||
}
|
||||
else {
|
||||
currentNode->frequency++; // increments `currentNode` frequency if `word` is the same as `currentNode->word`
|
||||
}
|
||||
|
||||
return currentNode; // returns pointer to current node
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Reads words from file to tree
|
||||
* @param file file to be read from
|
||||
* @param root root node of tree
|
||||
* @returns a pointer to the root node
|
||||
*/
|
||||
struct Node *readWordsInFileToTree(FILE *file, struct Node *root)
|
||||
{
|
||||
// longest english word = 45 chars
|
||||
// +1 for '\0' = 46 chars
|
||||
char *inputString =
|
||||
(char *)malloc(46 * sizeof(char)); ///< pointer to the input string
|
||||
|
||||
char inputChar; ///< temp storage of characters
|
||||
bool isPrevCharAlpha = false; ///< bool to mark the end of a word
|
||||
uint8_t pos = 0; ///< position in inputString to place the inputChar
|
||||
|
||||
while ((inputChar = fgetc(file)) != EOF)
|
||||
{
|
||||
if (pos > 0)
|
||||
isPrevCharAlpha = isalpha(inputString[pos - 1]);
|
||||
|
||||
// checks if character is letter
|
||||
if (isalpha(inputChar))
|
||||
{
|
||||
inputString[pos++] = tolower(inputChar);
|
||||
continue;
|
||||
}
|
||||
|
||||
// checks if character is ' or - and if it is preceded by a letter eg
|
||||
// yours-not, persons' (valid)
|
||||
if ((inputChar == '\'' || inputChar == '-') && isPrevCharAlpha)
|
||||
{
|
||||
inputString[pos++] = inputChar;
|
||||
continue;
|
||||
}
|
||||
|
||||
// makes sure that there is something valid in inputString
|
||||
if (pos == 0)
|
||||
continue;
|
||||
|
||||
// if last character is not letter and is not ' then replace by \0
|
||||
if (!isPrevCharAlpha && inputString[pos - 1] != '\'')
|
||||
pos--;
|
||||
inputString[pos] = '\0';
|
||||
pos = 0;
|
||||
isPrevCharAlpha = false;
|
||||
root = addWordToTree(inputString, root);
|
||||
}
|
||||
|
||||
// this is to catch the case for the EOF being immediately after the last
|
||||
// letter or '
|
||||
if (pos > 0)
|
||||
{
|
||||
if (!isPrevCharAlpha && inputString[pos - 1] != '\'')
|
||||
pos--;
|
||||
inputString[pos] = '\0';
|
||||
root = addWordToTree(inputString, root);
|
||||
}
|
||||
|
||||
free(inputString);
|
||||
return root;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Self-test implementations
|
||||
* @returns void
|
||||
*/
|
||||
static void test()
|
||||
{
|
||||
struct Node *root = NULL; ///< pointer to the root node
|
||||
FILE *file = NULL; ///< pointer to the file
|
||||
|
||||
file = fopen("file.txt", "w"); // creates test file in write mode
|
||||
|
||||
fprintf(file,
|
||||
"hey_this, is a. test input \n to a_file"); // writes test data to
|
||||
// test file
|
||||
|
||||
closeFile(file); // closes test file
|
||||
file = fopen("file.txt", "r"); // reopens test file in read mode
|
||||
|
||||
root = readWordsInFileToTree(file,
|
||||
root); // reads words from test file to tree
|
||||
|
||||
// Tests to check if words were added to correct position in tree and also
|
||||
// if their frequencies were added correctly
|
||||
assert(strcmp(root->word, "hey") == 0);
|
||||
assert(root->frequency == 1);
|
||||
assert(strcmp(root->left->word, "a") == 0);
|
||||
assert(root->left->frequency == 2);
|
||||
assert(strcmp(root->right->word, "this") == 0);
|
||||
assert(strcmp(root->left->right->word, "file") == 0);
|
||||
assert(strcmp(root->right->left->word, "is") == 0);
|
||||
|
||||
closeFile(file); // closes test file
|
||||
remove("file.txt"); // deletes test file from storage
|
||||
|
||||
file = fopen("wordcount.txt", "a"); // creates write file
|
||||
fprintf(file, "%-5s \t %9s \t %s \n", "S/N", "FREQUENCY",
|
||||
"WORD"); // prints the heading to `wordcount.txt`
|
||||
writeContentOfTreeToFile(
|
||||
root, file); // writes content of tree to file (`wordcount.txt`)
|
||||
|
||||
// Here is how the output to `wordcount.txt` should look like
|
||||
char *correctString =
|
||||
"S/N FREQUENCY WORD \n"
|
||||
"1 2 a \n"
|
||||
"2 1 file \n"
|
||||
"3 1 hey \n"
|
||||
"4 1 input \n"
|
||||
"5 1 is \n"
|
||||
"6 1 n \n"
|
||||
"7 1 test \n"
|
||||
"8 1 this \n"
|
||||
"9 1 to \n";
|
||||
|
||||
int16_t inputChar; // holds the current character in `wordcount.txt`
|
||||
uint64_t i = 0; // holds the current index in `correctString`
|
||||
|
||||
// Checks if the content in `wordcount.txt` is as expected (the same as in
|
||||
// `correctString`)
|
||||
while ((inputChar = fgetc(file)) != EOF) {
|
||||
assert(inputChar == correctString[i++]);
|
||||
}
|
||||
|
||||
closeFile(file); // closes `wordcount.txt`
|
||||
remove("wordcount.txt"); // deletes `wordcount.txt`
|
||||
|
||||
freeTreeMemory(root); // frees memory taken up by the tree
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Main function
|
||||
* @returns 0 on exit
|
||||
*/
|
||||
int main()
|
||||
{
|
||||
test(); // run self-test implementations
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue