diff --git a/test-pendig.c b/test-pendig.c new file mode 100644 index 0000000..869ed31 --- /dev/null +++ b/test-pendig.c @@ -0,0 +1,188 @@ +#include "Tinn.h" +#include +#include +#include +#include + +typedef struct +{ + float** in; + float** tg; + int nips; + int nops; + int rows; +} +Data; + +static int lns(FILE* const file) +{ + int ch = EOF; + int lines = 0; + int pc = '\n'; + while((ch = getc(file)) != EOF) + { + if(ch == '\n') + lines++; + pc = ch; + } + if(pc != '\n') + lines++; + rewind(file); + return lines; +} + +static char* readln(FILE* const file) +{ + int ch = EOF; + int reads = 0; + int size = 128; + char* line = (char*) malloc((size) * sizeof(char)); + while((ch = getc(file)) != '\n' && ch != EOF) + { + line[reads++] = ch; + if(reads + 1 == size) + line = (char*) realloc((line), (size *= 2) * sizeof(char)); + } + line[reads] = '\0'; + return line; +} + +static float** new2d(const int rows, const int cols) +{ + float** row = (float**) malloc((rows) * sizeof(float*)); + for(int r = 0; r < rows; r++) + row[r] = (float*) malloc((cols) * sizeof(float)); + return row; +} + +static Data ndata(const int nips, const int nops, const int rows) +{ + const Data data = { + new2d(rows, nips), new2d(rows, nops), nips, nops, rows + }; + return data; +} + +static void parse(const Data data, char* line, const int row) +{ + for(int col = 0; col < data.nips; col++) + { + const float val = atof(strtok(col == 0 ? line : NULL, ", ")); + /* Input values are 0-100 pixel coordinates; scale to 0.0-1.0 */ + data.in[row][col] = val / 100.0; + } + /* Last value is a 0-9 numeral which we need to convert + * into a size 10 vector of {0.00, 1.00} + */ + const float val = atof(strtok(NULL, ", ")); + for(int col = 0; col < data.nops; col++) { + data.tg[row][col] = 0.0; + } + data.tg[row][(int)val] = 1.0; +} + +static void dfree(const Data d) +{ + for(int row = 0; row < d.rows; row++) + { + free(d.in[row]); + free(d.tg[row]); + } + free(d.in); + free(d.tg); +} + +static void shuffle(const Data d) +{ + for(int a = 0; a < d.rows; a++) + { + const int b = rand() % d.rows; + float* ot = d.tg[a]; + float* it = d.in[a]; + // Swap output. + d.tg[a] = d.tg[b]; + d.tg[b] = ot; + // Swap input. + d.in[a] = d.in[b]; + d.in[b] = it; + } +} + +static Data build(const char* path, const int nips, const int nops) +{ + FILE* file = fopen(path, "r"); + if(file == NULL) + { + printf("Could not open %s\n", path); + printf("Get it from the machine learning database: "); + printf("wget http://archive.ics.uci.edu/ml/machine-learning-databases/pendigits/pendigits.tra\n"); + exit(1); + } + const int rows = lns(file); + Data data = ndata(nips, nops, rows); + for(int row = 0; row < rows; row++) + { + char* line = readln(file); + parse(data, line, row); + free(line); + } + fclose(file); + return data; +} + +int main() +{ + // Tinn does not seed the random number generator. + srand(time(0)); + // Input and output size is harded coded here as machine learning + // repositories usually don't include the input and output size in the data itself. + const int nips = 16; + const int nops = 10; + // Hyper Parameters. + // Learning rate is annealed and thus not constant. + // It can be fine tuned along with the number of hidden layers. + // Feel free to modify the anneal rate. + // The number of iterations can be changed for stronger training. + float rate = 1.0f; + const int nhid = 28; + const float anneal = 0.99f; + const int iterations = 128; + // Load the training set. + const Data data = build("pendigits.tra", nips, nops); + // Train, baby, train. + const Tinn tinn = xtbuild(nips, nhid, nops); + for(int i = 0; i < iterations; i++) + { + shuffle(data); + float error = 0.0f; + for(int j = 0; j < data.rows; j++) + { + const float* const in = data.in[j]; + const float* const tg = data.tg[j]; + error += xttrain(tinn, in, tg, rate); + } + printf("error %.12f :: learning rate %f\n", + (double) error / data.rows, + (double) rate); + rate *= anneal; + } + // This is how you save the neural network to disk. + xtsave(tinn, "saved.tinn"); + xtfree(tinn); + // This is how you load the neural network from disk. + const Tinn loaded = xtload("saved.tinn"); + // Now we do a prediction with the neural network we loaded from disk. + // Ideally, we would also load a testing set to make the prediction with, + // but for the sake of brevity here we just reuse the training set from earlier. + // One data set is picked at random. + const int pick = rand() % data.rows; + const float* const in = data.in[pick]; + const float* const tg = data.tg[pick]; + const float* const pd = xtpredict(loaded, in); + xtprint(tg, data.nops); + xtprint(pd, data.nops); + // All done. Let's clean up. + xtfree(loaded); + dfree(data); + return 0; +}