mirror of
https://github.com/TheAlgorithms/C
synced 2024-11-21 21:11:57 +03:00
feat: Rewrite the trie example (#927)
The trie example had some issues; * It did not follow the code convention in CONTRIBUTING.md * The createTrieNode used an inefficient zeroing method (looping over the entries) which also does not zero out holes in the structure (e.g. an alternative would be to use "*node = &(TrieNode){0}", but calloc does all that anyway * It used an inefficient and clumsy printArray method * It used strlen inside the algorithm; this new method could get rid of any strlen/strnlen usage (inserts/searches could be sanitized by snprintf) * This version can allow for a custom mapping function, e.g. if NULL is a valid separator (say that you want a trie for certain binary packages) * The previous version actually contained out-of-bounds array indexing; there were no checks for out-of-bound indexing and words in the word list did contain out of bounds words. It's a surprise it was working so well. * This version just returns 'int' to allow for error checks (instead of a printf inside the algorithm), and uses double pointers for return values (good practice) * The usage example contained unnecessary mallocs, switched that out for scanf. The example is just an example after all, in real applications you'd have better input sanitazion.
This commit is contained in:
parent
d017c3ef77
commit
2314a19586
@ -11398,7 +11398,6 @@ ancylostome
|
||||
ancylostomiasis
|
||||
ancyroid
|
||||
and
|
||||
and/or
|
||||
anda
|
||||
andabata
|
||||
andabatarian
|
||||
@ -23796,8 +23795,6 @@ azymous
|
||||
b
|
||||
bhoy
|
||||
bs
|
||||
b/l
|
||||
b/s
|
||||
ba
|
||||
baa
|
||||
baaed
|
||||
@ -41542,10 +41539,6 @@ byzants
|
||||
bz
|
||||
c
|
||||
cs
|
||||
c/d
|
||||
c/f
|
||||
c/m
|
||||
c/o
|
||||
ca
|
||||
ca
|
||||
cacanny
|
||||
@ -126581,7 +126574,6 @@ hailweed
|
||||
haily
|
||||
haimsucken
|
||||
hain
|
||||
hain"t
|
||||
haint
|
||||
hainberry
|
||||
hainch
|
||||
@ -128730,7 +128722,6 @@ hdlc
|
||||
hdqrs
|
||||
hdwe
|
||||
he
|
||||
he"ll
|
||||
hed
|
||||
hell
|
||||
hes
|
||||
@ -139010,7 +139001,6 @@ ill
|
||||
im
|
||||
is
|
||||
ive
|
||||
i/c
|
||||
ia
|
||||
iago
|
||||
iamatology
|
||||
@ -145956,7 +145946,6 @@ inpours
|
||||
inpush
|
||||
input
|
||||
inputs
|
||||
input/output
|
||||
inputfile
|
||||
inputs
|
||||
inputted
|
||||
@ -151770,7 +151759,6 @@ isuretine
|
||||
isuroid
|
||||
isz
|
||||
it
|
||||
it"ll
|
||||
itd
|
||||
itll
|
||||
its
|
||||
@ -155260,7 +155248,6 @@ kb
|
||||
kbar
|
||||
kbps
|
||||
kc
|
||||
kc/s
|
||||
kcal
|
||||
kea
|
||||
keach
|
||||
@ -156978,7 +156965,6 @@ klva
|
||||
klystron
|
||||
klystrons
|
||||
km
|
||||
km/sec
|
||||
kmel
|
||||
kmet
|
||||
kmole
|
||||
@ -158105,7 +158091,6 @@ lenvoy
|
||||
loeil
|
||||
ls
|
||||
ltre
|
||||
l/w
|
||||
la
|
||||
laager
|
||||
laagered
|
||||
@ -164745,8 +164730,6 @@ ller
|
||||
lloyds
|
||||
llyn
|
||||
lm
|
||||
lm/ft
|
||||
lm/m
|
||||
ln
|
||||
lndg
|
||||
lnr
|
||||
@ -167403,7 +167386,6 @@ lyttas
|
||||
lyxose
|
||||
m
|
||||
ms
|
||||
m/s
|
||||
ma
|
||||
maam
|
||||
maad
|
||||
@ -185958,8 +185940,6 @@ n
|
||||
ngana
|
||||
nimporte
|
||||
ns
|
||||
n/a
|
||||
n/f
|
||||
na
|
||||
naa
|
||||
naam
|
||||
@ -198979,8 +198959,6 @@ oclock
|
||||
oer
|
||||
oertop
|
||||
os
|
||||
o/c
|
||||
o/s
|
||||
oad
|
||||
oadal
|
||||
oaf
|
||||
@ -345815,8 +345793,6 @@ vyingly
|
||||
vyrnwy
|
||||
w
|
||||
ws
|
||||
w/
|
||||
w/o
|
||||
wa
|
||||
wa
|
||||
waac
|
||||
|
@ -3,6 +3,9 @@
|
||||
|
||||
/*-----character - 97 used for get the character from the ASCII value-----*/
|
||||
|
||||
// needed for strnlen
|
||||
#define _POSIX_C_SOURCE 200809L
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
@ -11,177 +14,193 @@
|
||||
#define ALPHABET_SIZE 26
|
||||
|
||||
/*--Node in the Trie--*/
|
||||
typedef struct TrieNode
|
||||
{
|
||||
struct TrieNode *children[ALPHABET_SIZE];
|
||||
char character;
|
||||
bool isEndOfWord;
|
||||
struct trie {
|
||||
struct trie *children[ALPHABET_SIZE];
|
||||
bool end_of_word;
|
||||
};
|
||||
|
||||
} TrieNode;
|
||||
|
||||
/*--Create new node--*/
|
||||
TrieNode *createTrieNode()
|
||||
/*--Create new trie node--*/
|
||||
int trie_new (
|
||||
struct trie ** trie
|
||||
)
|
||||
{
|
||||
TrieNode *node;
|
||||
node = malloc(sizeof(TrieNode));
|
||||
node->isEndOfWord = false;
|
||||
int i = 0;
|
||||
while (i < ALPHABET_SIZE)
|
||||
{
|
||||
node->children[i] = NULL;
|
||||
i++;
|
||||
*trie = calloc(1, sizeof(struct trie));
|
||||
if (NULL == *trie) {
|
||||
// memory allocation failed
|
||||
return -1;
|
||||
}
|
||||
return node;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*--Insert new word to Trie--*/
|
||||
void insert(TrieNode *root, char *word)
|
||||
int trie_insert (
|
||||
struct trie * trie,
|
||||
char *word,
|
||||
unsigned word_len
|
||||
)
|
||||
{
|
||||
/*----Addition of the word done by recurcively----*/
|
||||
int ret = 0;
|
||||
|
||||
// Check wheather word character pointer is NULL
|
||||
if ((strlen(word) - 1) != 0)
|
||||
{
|
||||
char character = *word;
|
||||
if (root->children[character - 97] == NULL)
|
||||
{
|
||||
TrieNode *node = NULL;
|
||||
node = createTrieNode();
|
||||
node->character = character;
|
||||
root->children[character - 97] = node;
|
||||
// this is the end of this word; add an end-of-word marker here and we're
|
||||
// done.
|
||||
if (0 == word_len) {
|
||||
trie->end_of_word = true;
|
||||
return 0;
|
||||
}
|
||||
word++;
|
||||
insert(root->children[character - 97], word);
|
||||
|
||||
// if you have some more complex mapping, you could introduce one here. In
|
||||
// this easy example, we just subtract 'a' (97) from it, meaning that 'a' is 0,
|
||||
// 'b' is 1, and so on.
|
||||
const unsigned int index = word[0] - 'a';
|
||||
|
||||
// this index is outside the alphabet size; indexing this would mean an
|
||||
// out-of-bound memory access (bad!). If you introduce a separate map
|
||||
// function for indexing, then you could move the out-of-bounds index in
|
||||
// there.
|
||||
if (ALPHABET_SIZE <= index) {
|
||||
return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
root->isEndOfWord = true;
|
||||
|
||||
// The index does not exist yet, allocate it.
|
||||
if (NULL == trie->children[index]) {
|
||||
ret = trie_new(&trie->children[index]);
|
||||
if (-1 == ret) {
|
||||
// creating new trie node failed
|
||||
return -1;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// recurse into the child node
|
||||
return trie_insert(
|
||||
/* trie = */ trie->children[index],
|
||||
/* word = */ word + 1,
|
||||
/* word_len = */ word_len - 1
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
/*--Search a word in the Trie--*/
|
||||
TrieNode *search(TrieNode *root, char *word)
|
||||
int trie_search(
|
||||
struct trie * trie,
|
||||
char *word,
|
||||
unsigned word_len,
|
||||
struct trie ** result
|
||||
)
|
||||
{
|
||||
TrieNode *temp;
|
||||
while (*word != '\0')
|
||||
{
|
||||
char character = *word;
|
||||
if (root->children[character - 97] != NULL)
|
||||
{
|
||||
temp = root->children[character - 97];
|
||||
word++;
|
||||
root = temp;
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("No possible words!!\n");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
return root;
|
||||
// we found a match
|
||||
if (0 == word_len) {
|
||||
*result = trie;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*---Print a word in the array--*/
|
||||
void printArray(char chars[], int len)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < len; i++)
|
||||
{
|
||||
printf("%c", chars[i]);
|
||||
// same here as in trie_insert, if you have a separate index mapping, add
|
||||
// it here. In this example, we just subtract 'a'.
|
||||
const unsigned int index = word[0] - 'a';
|
||||
|
||||
// This word contains letters outside the alphabet length; it's invalid.
|
||||
// Remember to do this to prevent buffer overflows.
|
||||
if (ALPHABET_SIZE <= index) {
|
||||
return -1;
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
// No match
|
||||
if (NULL == trie->children[index]) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// traverse the trie
|
||||
return trie_search(
|
||||
/* trie = */ trie->children[index],
|
||||
/* word = */ word + 1,
|
||||
/* word_len = */ word_len - 1,
|
||||
/* result = */ result
|
||||
);
|
||||
}
|
||||
|
||||
/*---Return all the related words------*/
|
||||
void printPathsRecur(TrieNode *node, char prefix[], int filledLen)
|
||||
void trie_print (
|
||||
struct trie * trie,
|
||||
char prefix[],
|
||||
unsigned prefix_len
|
||||
)
|
||||
{
|
||||
if (node == NULL)
|
||||
return;
|
||||
|
||||
prefix[filledLen] = node->character;
|
||||
filledLen++;
|
||||
|
||||
if (node->isEndOfWord)
|
||||
{
|
||||
printArray(prefix, filledLen);
|
||||
// An end-of-word marker means that this is a complete word, print it.
|
||||
if (true == trie->end_of_word) {
|
||||
printf("%.*s\n", prefix_len, prefix);
|
||||
}
|
||||
|
||||
int i;
|
||||
for (i = 0; i < ALPHABET_SIZE; i++)
|
||||
{
|
||||
printPathsRecur(node->children[i], prefix, filledLen);
|
||||
// However, there can be longer words with the same prefix; traverse into
|
||||
// those as well.
|
||||
for (int i = 0; i < ALPHABET_SIZE; i++) {
|
||||
|
||||
// No words on this character
|
||||
if (NULL == trie->children[i]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// If you have a separate index mapping, then you'd need the inverse of
|
||||
// the map here. Since we subtracted 'a' for the index, we can just add
|
||||
// 'a' to get the inverse map function.
|
||||
prefix[prefix_len] = i + 'a';
|
||||
|
||||
// traverse the print into the child
|
||||
trie_print(trie->children[i], prefix, prefix_len + 1);
|
||||
}
|
||||
}
|
||||
|
||||
/*--Travel through the Trie and return words from it--*/
|
||||
void traverse(char prefix[], TrieNode *root)
|
||||
{
|
||||
TrieNode *temp = NULL;
|
||||
temp = search(root, prefix);
|
||||
int j = 0;
|
||||
while (prefix[j] != '\0')
|
||||
{
|
||||
j++;
|
||||
}
|
||||
printPathsRecur(temp, prefix, j - 1);
|
||||
}
|
||||
|
||||
/*------Demonstrate purposes uses text file called dictionary -------*/
|
||||
|
||||
#define NUMBER_OF_WORDS (354935)
|
||||
#define INPUT_WORD_SIZE (100)
|
||||
int main() {
|
||||
int ret = 0;
|
||||
struct trie * root = NULL;
|
||||
struct trie * trie = NULL;
|
||||
char word[100] = {0};
|
||||
|
||||
/*----Get input from the user------*/
|
||||
char *receiveInput(char *s)
|
||||
{
|
||||
scanf("%99s", s);
|
||||
return s;
|
||||
// Create a root trie
|
||||
ret = trie_new(&root);
|
||||
if (-1 == ret) {
|
||||
fprintf(stderr, "Could not create trie\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
// Read the file dictionary
|
||||
int word_count = 0;
|
||||
char *words[NUMBER_OF_WORDS];
|
||||
// open the dictionary file
|
||||
FILE *fp = fopen("dictionary.txt", "r");
|
||||
|
||||
if (fp == 0)
|
||||
{
|
||||
if (NULL == fp) {
|
||||
fprintf(stderr, "Error while opening dictionary file");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
words[word_count] = malloc(INPUT_WORD_SIZE);
|
||||
|
||||
while (fgets(words[word_count], INPUT_WORD_SIZE, fp))
|
||||
{
|
||||
word_count++;
|
||||
words[word_count] = malloc(INPUT_WORD_SIZE);
|
||||
// insert all the words from the dictionary
|
||||
while (1 == fscanf(fp, "%100s\n", word)) {
|
||||
ret = trie_insert(root, word, strnlen(word, 100));
|
||||
if (-1 == ret) {
|
||||
fprintf(stderr, "Could not insert word into trie\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Push the words in to Trie
|
||||
TrieNode *root = NULL;
|
||||
root = createTrieNode();
|
||||
int i;
|
||||
for (i = 0; i < NUMBER_OF_WORDS; i++)
|
||||
{
|
||||
insert(root, words[i]);
|
||||
}
|
||||
|
||||
while (1)
|
||||
{
|
||||
while (1) {
|
||||
printf("Enter keyword: ");
|
||||
char str[100];
|
||||
receiveInput(str);
|
||||
if (1 != scanf("%100s", word)) {
|
||||
break;
|
||||
}
|
||||
|
||||
printf(
|
||||
"\n==========================================================\n");
|
||||
printf("\n********************* Possible Words ********************\n");
|
||||
|
||||
// Find the word through the Trie
|
||||
traverse(str, root);
|
||||
ret = trie_search(root, word, strnlen(word, 100), &trie);
|
||||
if (-1 == ret) {
|
||||
printf("No results\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
printf(
|
||||
"\n==========================================================\n");
|
||||
trie_print(trie, word, strnlen(word, 100));
|
||||
|
||||
printf("\n==========================================================\n");
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user