tinn/test.c

201 lines
5.5 KiB
C
Raw Normal View History

2018-03-29 06:55:49 +03:00
#include "Tinn.h"
2018-03-27 00:11:15 +03:00
#include <stdio.h>
2018-04-02 01:25:47 +03:00
#include <time.h>
2018-03-30 02:13:48 +03:00
#include <string.h>
#include <stdlib.h>
2018-04-13 08:04:46 +03:00
// Data object.
2018-03-30 02:13:48 +03:00
typedef struct
{
2018-04-13 08:04:46 +03:00
// 2D floating point array of input.
2018-03-31 14:29:03 +03:00
float** in;
2018-04-13 08:04:46 +03:00
// 2D floating point array of target.
2018-03-31 14:29:03 +03:00
float** tg;
2018-04-13 08:04:46 +03:00
// Number of inputs to neural network.
2018-03-30 23:04:37 +03:00
int nips;
2018-04-13 08:04:46 +03:00
// Number of outputs to neural network.
2018-03-30 23:04:37 +03:00
int nops;
2018-04-13 08:04:46 +03:00
// Number of rows in file (number of sets for neural network).
2018-03-30 02:13:48 +03:00
int rows;
}
Data;
2018-04-13 08:04:46 +03:00
// Returns the number of lines in a file.
2018-03-30 02:13:48 +03:00
static int lns(FILE* const file)
{
int ch = EOF;
int lines = 0;
int pc = '\n';
while((ch = getc(file)) != EOF)
{
if(ch == '\n')
lines++;
pc = ch;
}
if(pc != '\n')
lines++;
rewind(file);
return lines;
}
2018-04-13 08:04:46 +03:00
// Reads a line from a file.
2018-03-30 02:13:48 +03:00
static char* readln(FILE* const file)
{
int ch = EOF;
int reads = 0;
int size = 128;
2018-04-04 02:17:27 +03:00
char* line = (char*) malloc((size) * sizeof(char));
2018-03-30 02:13:48 +03:00
while((ch = getc(file)) != '\n' && ch != EOF)
{
line[reads++] = ch;
if(reads + 1 == size)
2018-03-30 23:04:37 +03:00
line = (char*) realloc((line), (size *= 2) * sizeof(char));
2018-03-30 02:13:48 +03:00
}
line[reads] = '\0';
return line;
}
2018-04-13 08:04:46 +03:00
// New 2D array of floats.
2018-03-31 14:29:03 +03:00
static float** new2d(const int rows, const int cols)
2018-03-30 02:13:48 +03:00
{
2018-03-31 14:29:03 +03:00
float** row = (float**) malloc((rows) * sizeof(float*));
2018-03-30 02:13:48 +03:00
for(int r = 0; r < rows; r++)
2018-03-31 14:29:03 +03:00
row[r] = (float*) malloc((cols) * sizeof(float));
2018-03-30 02:13:48 +03:00
return row;
}
2018-04-13 08:04:46 +03:00
// New data object.
2018-03-30 23:04:37 +03:00
static Data ndata(const int nips, const int nops, const int rows)
2018-03-30 02:13:48 +03:00
{
const Data data = {
2018-03-30 23:04:37 +03:00
new2d(rows, nips), new2d(rows, nops), nips, nops, rows
2018-03-30 02:13:48 +03:00
};
return data;
}
2018-03-29 08:04:47 +03:00
2018-04-13 08:04:46 +03:00
// Gets one row of inputs and outputs from a string.
2018-03-30 02:13:48 +03:00
static void parse(const Data data, char* line, const int row)
{
2018-03-30 23:04:37 +03:00
const int cols = data.nips + data.nops;
2018-03-30 02:13:48 +03:00
for(int col = 0; col < cols; col++)
{
2018-03-31 14:29:03 +03:00
const float val = atof(strtok(col == 0 ? line : NULL, " "));
2018-03-30 23:04:37 +03:00
if(col < data.nips)
data.in[row][col] = val;
2018-03-30 02:13:48 +03:00
else
2018-03-30 23:04:37 +03:00
data.tg[row][col - data.nips] = val;
2018-03-30 02:13:48 +03:00
}
}
2018-04-13 08:04:46 +03:00
// Frees a data object from the heap.
2018-03-30 02:13:48 +03:00
static void dfree(const Data d)
{
for(int row = 0; row < d.rows; row++)
{
2018-03-30 23:04:37 +03:00
free(d.in[row]);
free(d.tg[row]);
2018-03-30 02:13:48 +03:00
}
2018-03-30 23:04:37 +03:00
free(d.in);
free(d.tg);
2018-03-30 02:13:48 +03:00
}
2018-04-13 08:04:46 +03:00
// Randomly shuffles a data object.
2018-03-30 02:13:48 +03:00
static void shuffle(const Data d)
{
for(int a = 0; a < d.rows; a++)
{
const int b = rand() % d.rows;
2018-03-31 14:29:03 +03:00
float* ot = d.tg[a];
float* it = d.in[a];
2018-03-30 02:13:48 +03:00
// Swap output.
2018-03-30 23:04:37 +03:00
d.tg[a] = d.tg[b];
d.tg[b] = ot;
2018-03-30 02:13:48 +03:00
// Swap input.
2018-03-30 23:04:37 +03:00
d.in[a] = d.in[b];
d.in[b] = it;
2018-03-30 02:13:48 +03:00
}
}
2018-04-13 08:04:46 +03:00
// Parses file from path getting all inputs and outputs for the neural network. Returns data object.
2018-03-30 23:04:37 +03:00
static Data build(const char* path, const int nips, const int nops)
2018-03-30 02:13:48 +03:00
{
FILE* file = fopen(path, "r");
if(file == NULL)
{
printf("Could not open %s\n", path);
2018-03-30 23:04:37 +03:00
printf("Get it from the machine learning database: ");
printf("wget http://archive.ics.uci.edu/ml/machine-learning-databases/semeion/semeion.data\n");
2018-03-30 02:13:48 +03:00
exit(1);
}
const int rows = lns(file);
2018-03-30 23:04:37 +03:00
Data data = ndata(nips, nops, rows);
2018-03-30 02:13:48 +03:00
for(int row = 0; row < rows; row++)
{
char* line = readln(file);
parse(data, line, row);
free(line);
}
fclose(file);
return data;
}
2018-03-29 08:04:47 +03:00
2018-04-11 19:34:56 +03:00
// Learns and predicts hand written digits with 98% accuracy.
2018-03-30 23:04:37 +03:00
int main()
2018-03-29 06:55:49 +03:00
{
2018-04-02 01:25:47 +03:00
// Tinn does not seed the random number generator.
srand(time(0));
2018-03-31 02:46:16 +03:00
// Input and output size is harded coded here as machine learning
// repositories usually don't include the input and output size in the data itself.
2018-03-30 23:04:37 +03:00
const int nips = 256;
const int nops = 10;
// Hyper Parameters.
// Learning rate is annealed and thus not constant.
2018-03-31 02:46:16 +03:00
// It can be fine tuned along with the number of hidden layers.
2018-04-11 19:34:56 +03:00
// Feel free to modify the anneal rate.
// The number of iterations can be changed for stronger training.
2018-04-01 10:59:20 +03:00
float rate = 1.0f;
2018-04-11 19:34:56 +03:00
const int nhid = 28;
2018-03-31 14:33:12 +03:00
const float anneal = 0.99f;
2018-04-11 19:34:56 +03:00
const int iterations = 128;
2018-03-30 23:04:37 +03:00
// Load the training set.
const Data data = build("semeion.data", nips, nops);
2018-03-31 01:42:20 +03:00
// Train, baby, train.
2018-03-30 23:04:37 +03:00
const Tinn tinn = xtbuild(nips, nhid, nops);
2018-04-11 19:34:56 +03:00
for(int i = 0; i < iterations; i++)
2018-03-30 00:32:11 +03:00
{
2018-03-30 23:04:37 +03:00
shuffle(data);
2018-03-31 14:33:12 +03:00
float error = 0.0f;
2018-03-30 02:13:48 +03:00
for(int j = 0; j < data.rows; j++)
{
2018-03-31 14:29:03 +03:00
const float* const in = data.in[j];
const float* const tg = data.tg[j];
2018-03-30 23:04:37 +03:00
error += xttrain(tinn, in, tg, rate);
2018-03-30 02:13:48 +03:00
}
2018-04-04 01:12:36 +03:00
printf("error %.12f :: learning rate %f\n",
(double) error / data.rows,
(double) rate);
2018-03-31 02:46:16 +03:00
rate *= anneal;
2018-03-30 00:32:11 +03:00
}
2018-03-31 01:42:20 +03:00
// This is how you save the neural network to disk.
xtsave(tinn, "saved.tinn");
xtfree(tinn);
// This is how you load the neural network from disk.
const Tinn loaded = xtload("saved.tinn");
2018-03-31 02:46:16 +03:00
// Now we do a prediction with the neural network we loaded from disk.
// Ideally, we would also load a testing set to make the prediction with,
// but for the sake of brevity here we just reuse the training set from earlier.
2018-04-13 08:04:46 +03:00
// One data set is picked at random (zero index of input and target arrays is enough
// as they were both shuffled earlier).
const float* const in = data.in[0];
const float* const tg = data.tg[0];
2018-04-04 01:12:36 +03:00
const float* const pd = xtpredict(loaded, in);
2018-04-13 08:04:46 +03:00
// Prints target.
2018-04-11 19:34:56 +03:00
xtprint(tg, data.nops);
2018-04-13 08:04:46 +03:00
// Prints prediction.
2018-04-11 19:34:56 +03:00
xtprint(pd, data.nops);
// All done. Let's clean up.
2018-03-31 01:42:20 +03:00
xtfree(loaded);
2018-03-30 02:13:48 +03:00
dfree(data);
2018-03-27 00:11:15 +03:00
return 0;
}