feat: Rewrite the trie example (#927)

The trie example had some issues;

* It did not follow the code convention in CONTRIBUTING.md
* The createTrieNode used an inefficient zeroing method (looping over
  the entries) which also does not zero out holes in the structure (e.g.
  an alternative would be to use "*node = &(TrieNode){0}", but calloc
  does all that anyway
* It used an inefficient and clumsy printArray method
* It used strlen inside the algorithm; this new method could get rid of
  any strlen/strnlen usage (inserts/searches could be sanitized by
  snprintf)
* This version can allow for a custom mapping function, e.g. if NULL is
  a valid separator (say that you want a trie for certain binary
  packages)
* The previous version actually contained out-of-bounds array indexing;
  there were no checks for out-of-bound indexing and words in the word
  list did contain out of bounds words. It's a surprise it was working
  so well.
* This version just returns 'int' to allow for error checks (instead of
  a printf inside the algorithm), and uses double pointers for return
  values (good practice)
* The usage example contained unnecessary mallocs, switched that out for
  scanf. The example is just an example after all, in real applications
  you'd have better input sanitazion.
This commit is contained in:
Daniel Beecham 2022-03-20 02:04:18 +01:00 committed by GitHub
parent d017c3ef77
commit 2314a19586
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 146 additions and 151 deletions

View File

@ -11398,7 +11398,6 @@ ancylostome
ancylostomiasis
ancyroid
and
and/or
anda
andabata
andabatarian
@ -23796,8 +23795,6 @@ azymous
b
bhoy
bs
b/l
b/s
ba
baa
baaed
@ -41542,10 +41539,6 @@ byzants
bz
c
cs
c/d
c/f
c/m
c/o
ca
ca
cacanny
@ -126581,7 +126574,6 @@ hailweed
haily
haimsucken
hain
hain"t
haint
hainberry
hainch
@ -128730,7 +128722,6 @@ hdlc
hdqrs
hdwe
he
he"ll
hed
hell
hes
@ -139010,7 +139001,6 @@ ill
im
is
ive
i/c
ia
iago
iamatology
@ -145956,7 +145946,6 @@ inpours
inpush
input
inputs
input/output
inputfile
inputs
inputted
@ -151770,7 +151759,6 @@ isuretine
isuroid
isz
it
it"ll
itd
itll
its
@ -155260,7 +155248,6 @@ kb
kbar
kbps
kc
kc/s
kcal
kea
keach
@ -156978,7 +156965,6 @@ klva
klystron
klystrons
km
km/sec
kmel
kmet
kmole
@ -158105,7 +158091,6 @@ lenvoy
loeil
ls
ltre
l/w
la
laager
laagered
@ -164745,8 +164730,6 @@ ller
lloyds
llyn
lm
lm/ft
lm/m
ln
lndg
lnr
@ -167403,7 +167386,6 @@ lyttas
lyxose
m
ms
m/s
ma
maam
maad
@ -185958,8 +185940,6 @@ n
ngana
nimporte
ns
n/a
n/f
na
naa
naam
@ -198979,8 +198959,6 @@ oclock
oer
oertop
os
o/c
o/s
oad
oadal
oaf
@ -345815,8 +345793,6 @@ vyingly
vyrnwy
w
ws
w/
w/o
wa
wa
waac
@ -354932,4 +354908,4 @@ zymurgy
zythem
zythum
zyzzyva
zyzzyvas
zyzzyvas

View File

@ -3,6 +3,9 @@
/*-----character - 97 used for get the character from the ASCII value-----*/
// needed for strnlen
#define _POSIX_C_SOURCE 200809L
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
@ -11,177 +14,193 @@
#define ALPHABET_SIZE 26
/*--Node in the Trie--*/
typedef struct TrieNode
{
struct TrieNode *children[ALPHABET_SIZE];
char character;
bool isEndOfWord;
struct trie {
struct trie *children[ALPHABET_SIZE];
bool end_of_word;
};
} TrieNode;
/*--Create new node--*/
TrieNode *createTrieNode()
/*--Create new trie node--*/
int trie_new (
struct trie ** trie
)
{
TrieNode *node;
node = malloc(sizeof(TrieNode));
node->isEndOfWord = false;
int i = 0;
while (i < ALPHABET_SIZE)
{
node->children[i] = NULL;
i++;
*trie = calloc(1, sizeof(struct trie));
if (NULL == *trie) {
// memory allocation failed
return -1;
}
return node;
return 0;
}
/*--Insert new word to Trie--*/
void insert(TrieNode *root, char *word)
int trie_insert (
struct trie * trie,
char *word,
unsigned word_len
)
{
/*----Addition of the word done by recurcively----*/
int ret = 0;
// Check wheather word character pointer is NULL
if ((strlen(word) - 1) != 0)
{
char character = *word;
if (root->children[character - 97] == NULL)
{
TrieNode *node = NULL;
node = createTrieNode();
node->character = character;
root->children[character - 97] = node;
// this is the end of this word; add an end-of-word marker here and we're
// done.
if (0 == word_len) {
trie->end_of_word = true;
return 0;
}
// if you have some more complex mapping, you could introduce one here. In
// this easy example, we just subtract 'a' (97) from it, meaning that 'a' is 0,
// 'b' is 1, and so on.
const unsigned int index = word[0] - 'a';
// this index is outside the alphabet size; indexing this would mean an
// out-of-bound memory access (bad!). If you introduce a separate map
// function for indexing, then you could move the out-of-bounds index in
// there.
if (ALPHABET_SIZE <= index) {
return -1;
}
// The index does not exist yet, allocate it.
if (NULL == trie->children[index]) {
ret = trie_new(&trie->children[index]);
if (-1 == ret) {
// creating new trie node failed
return -1;
}
word++;
insert(root->children[character - 97], word);
}
else
{
root->isEndOfWord = true;
}
return;
// recurse into the child node
return trie_insert(
/* trie = */ trie->children[index],
/* word = */ word + 1,
/* word_len = */ word_len - 1
);
}
/*--Search a word in the Trie--*/
TrieNode *search(TrieNode *root, char *word)
int trie_search(
struct trie * trie,
char *word,
unsigned word_len,
struct trie ** result
)
{
TrieNode *temp;
while (*word != '\0')
{
char character = *word;
if (root->children[character - 97] != NULL)
{
temp = root->children[character - 97];
word++;
root = temp;
}
else
{
printf("No possible words!!\n");
return NULL;
}
// we found a match
if (0 == word_len) {
*result = trie;
return 0;
}
return root;
}
/*---Print a word in the array--*/
void printArray(char chars[], int len)
{
int i;
for (i = 0; i < len; i++)
{
printf("%c", chars[i]);
// same here as in trie_insert, if you have a separate index mapping, add
// it here. In this example, we just subtract 'a'.
const unsigned int index = word[0] - 'a';
// This word contains letters outside the alphabet length; it's invalid.
// Remember to do this to prevent buffer overflows.
if (ALPHABET_SIZE <= index) {
return -1;
}
printf("\n");
// No match
if (NULL == trie->children[index]) {
return -1;
}
// traverse the trie
return trie_search(
/* trie = */ trie->children[index],
/* word = */ word + 1,
/* word_len = */ word_len - 1,
/* result = */ result
);
}
/*---Return all the related words------*/
void printPathsRecur(TrieNode *node, char prefix[], int filledLen)
void trie_print (
struct trie * trie,
char prefix[],
unsigned prefix_len
)
{
if (node == NULL)
return;
prefix[filledLen] = node->character;
filledLen++;
if (node->isEndOfWord)
{
printArray(prefix, filledLen);
// An end-of-word marker means that this is a complete word, print it.
if (true == trie->end_of_word) {
printf("%.*s\n", prefix_len, prefix);
}
int i;
for (i = 0; i < ALPHABET_SIZE; i++)
{
printPathsRecur(node->children[i], prefix, filledLen);
// However, there can be longer words with the same prefix; traverse into
// those as well.
for (int i = 0; i < ALPHABET_SIZE; i++) {
// No words on this character
if (NULL == trie->children[i]) {
continue;
}
// If you have a separate index mapping, then you'd need the inverse of
// the map here. Since we subtracted 'a' for the index, we can just add
// 'a' to get the inverse map function.
prefix[prefix_len] = i + 'a';
// traverse the print into the child
trie_print(trie->children[i], prefix, prefix_len + 1);
}
}
/*--Travel through the Trie and return words from it--*/
void traverse(char prefix[], TrieNode *root)
{
TrieNode *temp = NULL;
temp = search(root, prefix);
int j = 0;
while (prefix[j] != '\0')
{
j++;
}
printPathsRecur(temp, prefix, j - 1);
}
/*------Demonstrate purposes uses text file called dictionary -------*/
#define NUMBER_OF_WORDS (354935)
#define INPUT_WORD_SIZE (100)
int main() {
int ret = 0;
struct trie * root = NULL;
struct trie * trie = NULL;
char word[100] = {0};
/*----Get input from the user------*/
char *receiveInput(char *s)
{
scanf("%99s", s);
return s;
}
// Create a root trie
ret = trie_new(&root);
if (-1 == ret) {
fprintf(stderr, "Could not create trie\n");
exit(1);
}
int main()
{
// Read the file dictionary
int word_count = 0;
char *words[NUMBER_OF_WORDS];
// open the dictionary file
FILE *fp = fopen("dictionary.txt", "r");
if (fp == 0)
{
if (NULL == fp) {
fprintf(stderr, "Error while opening dictionary file");
exit(1);
}
words[word_count] = malloc(INPUT_WORD_SIZE);
while (fgets(words[word_count], INPUT_WORD_SIZE, fp))
{
word_count++;
words[word_count] = malloc(INPUT_WORD_SIZE);
// insert all the words from the dictionary
while (1 == fscanf(fp, "%100s\n", word)) {
ret = trie_insert(root, word, strnlen(word, 100));
if (-1 == ret) {
fprintf(stderr, "Could not insert word into trie\n");
exit(1);
}
}
// Push the words in to Trie
TrieNode *root = NULL;
root = createTrieNode();
int i;
for (i = 0; i < NUMBER_OF_WORDS; i++)
{
insert(root, words[i]);
}
while (1)
{
while (1) {
printf("Enter keyword: ");
char str[100];
receiveInput(str);
if (1 != scanf("%100s", word)) {
break;
}
printf(
"\n==========================================================\n");
printf("\n********************* Possible Words ********************\n");
// Find the word through the Trie
traverse(str, root);
ret = trie_search(root, word, strnlen(word, 100), &trie);
if (-1 == ret) {
printf("No results\n");
continue;
}
printf(
"\n==========================================================\n");
trie_print(trie, word, strnlen(word, 100));
printf("\n==========================================================\n");
}
}