mirror of
https://github.com/codeplea/genann
synced 2024-11-24 23:40:02 +03:00
Merge pull request #8 from amboar/speed
RFC: Increase genann performance by roughly 30%
This commit is contained in:
commit
033618b1f5
15
Makefile
15
Makefile
@ -1,9 +1,16 @@
|
|||||||
CCFLAGS = -Wall -Wshadow -O2 -g
|
CFLAGS = -Wall -Wshadow -O3 -g -march=native
|
||||||
LDLIBS = -lm
|
LDLIBS = -lm
|
||||||
|
|
||||||
|
|
||||||
all: test example1 example2 example3 example4
|
all: test example1 example2 example3 example4
|
||||||
|
|
||||||
|
sigmoid: CFLAGS += -Dgenann_act=genann_act_sigmoid_cached
|
||||||
|
sigmoid: all
|
||||||
|
|
||||||
|
threshold: CFLAGS += -Dgenann_act=genann_act_threshold
|
||||||
|
threshold: all
|
||||||
|
|
||||||
|
linear: CFLAGS += -Dgenann_act=genann_act_linear
|
||||||
|
linear: all
|
||||||
|
|
||||||
test: test.o genann.o
|
test: test.o genann.o
|
||||||
|
|
||||||
@ -21,5 +28,7 @@ example4: example4.o genann.o
|
|||||||
|
|
||||||
clean:
|
clean:
|
||||||
$(RM) *.o
|
$(RM) *.o
|
||||||
$(RM) *.exe
|
$(RM) test example1 example2 example3 example4 *.exe
|
||||||
$(RM) persist.txt
|
$(RM) persist.txt
|
||||||
|
|
||||||
|
.PHONY: sigmoid threshold linear clean
|
||||||
|
142
genann.c
142
genann.c
@ -32,54 +32,71 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
|
#ifndef genann_act
|
||||||
|
#define genann_act_hidden genann_act_hidden_indirect
|
||||||
|
#define genann_act_output genann_act_output_indirect
|
||||||
|
#else
|
||||||
|
#define genann_act_hidden genann_act
|
||||||
|
#define genann_act_output genann_act
|
||||||
|
#endif
|
||||||
|
|
||||||
#define LOOKUP_SIZE 4096
|
#define LOOKUP_SIZE 4096
|
||||||
|
|
||||||
double genann_act_sigmoid(double a) {
|
double genann_act_hidden_indirect(const struct genann *ann, double a) {
|
||||||
|
return ann->activation_hidden(ann, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
double genann_act_output_indirect(const struct genann *ann, double a) {
|
||||||
|
return ann->activation_output(ann, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
const double sigmoid_dom_min = -15.0;
|
||||||
|
const double sigmoid_dom_max = 15.0;
|
||||||
|
double interval;
|
||||||
|
double lookup[LOOKUP_SIZE];
|
||||||
|
|
||||||
|
#define likely(x) __builtin_expect(!!(x), 1)
|
||||||
|
#define unlikely(x) __builtin_expect(!!(x), 0)
|
||||||
|
#define __unused __attribute__((unused))
|
||||||
|
|
||||||
|
double inline genann_act_sigmoid(const genann *ann __unused, double a) {
|
||||||
if (a < -45.0) return 0;
|
if (a < -45.0) return 0;
|
||||||
if (a > 45.0) return 1;
|
if (a > 45.0) return 1;
|
||||||
return 1.0 / (1 + exp(-a));
|
return 1.0 / (1 + exp(-a));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void genann_init_sigmoid_lookup(const genann *ann) {
|
||||||
double genann_act_sigmoid_cached(double a) {
|
const double f = (sigmoid_dom_max - sigmoid_dom_min) / LOOKUP_SIZE;
|
||||||
/* If you're optimizing for memory usage, just
|
|
||||||
* delete this entire function and replace references
|
|
||||||
* of genann_act_sigmoid_cached to genann_act_sigmoid
|
|
||||||
*/
|
|
||||||
const double min = -15.0;
|
|
||||||
const double max = 15.0;
|
|
||||||
static double interval;
|
|
||||||
static int initialized = 0;
|
|
||||||
static double lookup[LOOKUP_SIZE];
|
|
||||||
|
|
||||||
/* Calculate entire lookup table on first run. */
|
|
||||||
if (!initialized) {
|
|
||||||
interval = (max - min) / LOOKUP_SIZE;
|
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
interval = LOOKUP_SIZE / (sigmoid_dom_max - sigmoid_dom_min);
|
||||||
for (i = 0; i < LOOKUP_SIZE; ++i) {
|
for (i = 0; i < LOOKUP_SIZE; ++i) {
|
||||||
lookup[i] = genann_act_sigmoid(min + interval * i);
|
lookup[i] = genann_act_sigmoid(ann, sigmoid_dom_min + f * i);
|
||||||
}
|
}
|
||||||
/* This is down here to make this thread safe. */
|
|
||||||
initialized = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
int i;
|
|
||||||
i = (int)((a-min)/interval+0.5);
|
|
||||||
if (i <= 0) return lookup[0];
|
|
||||||
if (i >= LOOKUP_SIZE) return lookup[LOOKUP_SIZE-1];
|
|
||||||
return lookup[i];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
double inline genann_act_sigmoid_cached(const genann *ann __unused, double a) {
|
||||||
|
assert(!isnan(a));
|
||||||
|
|
||||||
double genann_act_threshold(double a) {
|
if (a < sigmoid_dom_min) return lookup[0];
|
||||||
return a > 0;
|
if (a >= sigmoid_dom_max) return lookup[LOOKUP_SIZE - 1];
|
||||||
|
|
||||||
|
size_t j = (size_t)((a-sigmoid_dom_min)*interval+0.5);
|
||||||
|
|
||||||
|
/* Because floating point... */
|
||||||
|
if (unlikely(j < 0)) return lookup[0];
|
||||||
|
if (unlikely(j >= LOOKUP_SIZE)) return lookup[LOOKUP_SIZE - 1];
|
||||||
|
|
||||||
|
return lookup[j];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
double inline genann_act_linear(const struct genann *ann __unused, double a) {
|
||||||
double genann_act_linear(double a) {
|
|
||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
double inline genann_act_threshold(const struct genann *ann __unused, double a) {
|
||||||
|
return a > 0;
|
||||||
|
}
|
||||||
|
|
||||||
genann *genann_init(int inputs, int hidden_layers, int hidden, int outputs) {
|
genann *genann_init(int inputs, int hidden_layers, int hidden, int outputs) {
|
||||||
if (hidden_layers < 0) return 0;
|
if (hidden_layers < 0) return 0;
|
||||||
@ -117,6 +134,8 @@ genann *genann_init(int inputs, int hidden_layers, int hidden, int outputs) {
|
|||||||
ret->activation_hidden = genann_act_sigmoid_cached;
|
ret->activation_hidden = genann_act_sigmoid_cached;
|
||||||
ret->activation_output = genann_act_sigmoid_cached;
|
ret->activation_output = genann_act_sigmoid_cached;
|
||||||
|
|
||||||
|
genann_init_sigmoid_lookup(ret);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -193,21 +212,41 @@ double const *genann_run(genann const *ann, double const *inputs) {
|
|||||||
|
|
||||||
int h, j, k;
|
int h, j, k;
|
||||||
|
|
||||||
const genann_actfun act = ann->activation_hidden;
|
if (!ann->hidden_layers) {
|
||||||
const genann_actfun acto = ann->activation_output;
|
double *ret = o;
|
||||||
|
for (j = 0; j < ann->outputs; ++j) {
|
||||||
/* Figure hidden layers, if any. */
|
|
||||||
for (h = 0; h < ann->hidden_layers; ++h) {
|
|
||||||
for (j = 0; j < ann->hidden; ++j) {
|
|
||||||
double sum = *w++ * -1.0;
|
double sum = *w++ * -1.0;
|
||||||
for (k = 0; k < (h == 0 ? ann->inputs : ann->hidden); ++k) {
|
for (k = 0; k < ann->inputs; ++k) {
|
||||||
sum += *w++ * i[k];
|
sum += *w++ * i[k];
|
||||||
}
|
}
|
||||||
*o++ = act(sum);
|
*o++ = genann_act_output(ann, sum);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
i += (h == 0 ? ann->inputs : ann->hidden);
|
/* Figure input layer */
|
||||||
|
for (j = 0; j < ann->hidden; ++j) {
|
||||||
|
double sum = *w++ * -1.0;
|
||||||
|
for (k = 0; k < ann->inputs; ++k) {
|
||||||
|
sum += *w++ * i[k];
|
||||||
|
}
|
||||||
|
*o++ = genann_act_hidden(ann, sum);
|
||||||
|
}
|
||||||
|
|
||||||
|
i += ann->inputs;
|
||||||
|
|
||||||
|
/* Figure hidden layers, if any. */
|
||||||
|
for (h = 1; h < ann->hidden_layers; ++h) {
|
||||||
|
for (j = 0; j < ann->hidden; ++j) {
|
||||||
|
double sum = *w++ * -1.0;
|
||||||
|
for (k = 0; k < ann->hidden; ++k) {
|
||||||
|
sum += *w++ * i[k];
|
||||||
|
}
|
||||||
|
*o++ = genann_act_hidden(ann, sum);
|
||||||
|
}
|
||||||
|
|
||||||
|
i += ann->hidden;
|
||||||
}
|
}
|
||||||
|
|
||||||
double const *ret = o;
|
double const *ret = o;
|
||||||
@ -215,10 +254,10 @@ double const *genann_run(genann const *ann, double const *inputs) {
|
|||||||
/* Figure output layer. */
|
/* Figure output layer. */
|
||||||
for (j = 0; j < ann->outputs; ++j) {
|
for (j = 0; j < ann->outputs; ++j) {
|
||||||
double sum = *w++ * -1.0;
|
double sum = *w++ * -1.0;
|
||||||
for (k = 0; k < (ann->hidden_layers ? ann->hidden : ann->inputs); ++k) {
|
for (k = 0; k < ann->hidden; ++k) {
|
||||||
sum += *w++ * i[k];
|
sum += *w++ * i[k];
|
||||||
}
|
}
|
||||||
*o++ = acto(sum);
|
*o++ = genann_act_output(ann, sum);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Sanity check that we used all weights and wrote all outputs. */
|
/* Sanity check that we used all weights and wrote all outputs. */
|
||||||
@ -243,7 +282,8 @@ void genann_train(genann const *ann, double const *inputs, double const *desired
|
|||||||
|
|
||||||
|
|
||||||
/* Set output layer deltas. */
|
/* Set output layer deltas. */
|
||||||
if (ann->activation_output == genann_act_linear) {
|
if (genann_act_output == genann_act_linear ||
|
||||||
|
ann->activation_output == genann_act_linear) {
|
||||||
for (j = 0; j < ann->outputs; ++j) {
|
for (j = 0; j < ann->outputs; ++j) {
|
||||||
*d++ = *t++ - *o++;
|
*d++ = *t++ - *o++;
|
||||||
}
|
}
|
||||||
@ -304,12 +344,9 @@ void genann_train(genann const *ann, double const *inputs, double const *desired
|
|||||||
|
|
||||||
/* Set output layer weights. */
|
/* Set output layer weights. */
|
||||||
for (j = 0; j < ann->outputs; ++j) {
|
for (j = 0; j < ann->outputs; ++j) {
|
||||||
for (k = 0; k < (ann->hidden_layers ? ann->hidden : ann->inputs) + 1; ++k) {
|
*w++ += *d * learning_rate * -1.0;
|
||||||
if (k == 0) {
|
for (k = 1; k < (ann->hidden_layers ? ann->hidden : ann->inputs) + 1; ++k) {
|
||||||
*w++ += *d * learning_rate * -1.0;
|
*w++ += *d * learning_rate * i[k-1];
|
||||||
} else {
|
|
||||||
*w++ += *d * learning_rate * i[k-1];
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
++d;
|
++d;
|
||||||
@ -337,12 +374,9 @@ void genann_train(genann const *ann, double const *inputs, double const *desired
|
|||||||
|
|
||||||
|
|
||||||
for (j = 0; j < ann->hidden; ++j) {
|
for (j = 0; j < ann->hidden; ++j) {
|
||||||
for (k = 0; k < (h == 0 ? ann->inputs : ann->hidden) + 1; ++k) {
|
*w++ += *d * learning_rate * -1.0;
|
||||||
if (k == 0) {
|
for (k = 1; k < (h == 0 ? ann->inputs : ann->hidden) + 1; ++k) {
|
||||||
*w++ += *d * learning_rate * -1.0;
|
*w++ += *d * learning_rate * i[k-1];
|
||||||
} else {
|
|
||||||
*w++ += *d * learning_rate * i[k-1];
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
++d;
|
++d;
|
||||||
}
|
}
|
||||||
|
16
genann.h
16
genann.h
@ -39,9 +39,9 @@ extern "C" {
|
|||||||
#define GENANN_RANDOM() (((double)rand())/RAND_MAX)
|
#define GENANN_RANDOM() (((double)rand())/RAND_MAX)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
struct genann;
|
||||||
|
|
||||||
typedef double (*genann_actfun)(double a);
|
typedef double (*genann_actfun)(const struct genann *ann, double a);
|
||||||
|
|
||||||
|
|
||||||
typedef struct genann {
|
typedef struct genann {
|
||||||
/* How many inputs, outputs, and hidden neurons. */
|
/* How many inputs, outputs, and hidden neurons. */
|
||||||
@ -70,8 +70,6 @@ typedef struct genann {
|
|||||||
|
|
||||||
} genann;
|
} genann;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* Creates and returns a new ann. */
|
/* Creates and returns a new ann. */
|
||||||
genann *genann_init(int inputs, int hidden_layers, int hidden, int outputs);
|
genann *genann_init(int inputs, int hidden_layers, int hidden, int outputs);
|
||||||
|
|
||||||
@ -96,11 +94,11 @@ void genann_train(genann const *ann, double const *inputs, double const *desired
|
|||||||
/* Saves the ann. */
|
/* Saves the ann. */
|
||||||
void genann_write(genann const *ann, FILE *out);
|
void genann_write(genann const *ann, FILE *out);
|
||||||
|
|
||||||
|
void genann_init_sigmoid_lookup(const genann *ann);
|
||||||
double genann_act_sigmoid(double a);
|
double genann_act_sigmoid(const genann *ann, double a);
|
||||||
double genann_act_sigmoid_cached(double a);
|
double genann_act_sigmoid_cached(const genann *ann, double a);
|
||||||
double genann_act_threshold(double a);
|
double genann_act_threshold(const genann *ann, double a);
|
||||||
double genann_act_linear(double a);
|
double genann_act_linear(const genann *ann, double a);
|
||||||
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
Loading…
Reference in New Issue
Block a user