Merge pull request #8 from amboar/speed

RFC: Increase genann performance by roughly 30%
This commit is contained in:
Lewis Van Winkle 2018-04-11 08:38:44 -05:00 committed by GitHub
commit 033618b1f5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 108 additions and 67 deletions

View File

@ -1,9 +1,16 @@
CCFLAGS = -Wall -Wshadow -O2 -g
CFLAGS = -Wall -Wshadow -O3 -g -march=native
LDLIBS = -lm
all: test example1 example2 example3 example4
sigmoid: CFLAGS += -Dgenann_act=genann_act_sigmoid_cached
sigmoid: all
threshold: CFLAGS += -Dgenann_act=genann_act_threshold
threshold: all
linear: CFLAGS += -Dgenann_act=genann_act_linear
linear: all
test: test.o genann.o
@ -21,5 +28,7 @@ example4: example4.o genann.o
clean:
$(RM) *.o
$(RM) *.exe
$(RM) test example1 example2 example3 example4 *.exe
$(RM) persist.txt
.PHONY: sigmoid threshold linear clean

142
genann.c
View File

@ -32,54 +32,71 @@
#include <stdlib.h>
#include <string.h>
#ifndef genann_act
#define genann_act_hidden genann_act_hidden_indirect
#define genann_act_output genann_act_output_indirect
#else
#define genann_act_hidden genann_act
#define genann_act_output genann_act
#endif
#define LOOKUP_SIZE 4096
double genann_act_sigmoid(double a) {
double genann_act_hidden_indirect(const struct genann *ann, double a) {
return ann->activation_hidden(ann, a);
}
double genann_act_output_indirect(const struct genann *ann, double a) {
return ann->activation_output(ann, a);
}
const double sigmoid_dom_min = -15.0;
const double sigmoid_dom_max = 15.0;
double interval;
double lookup[LOOKUP_SIZE];
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
#define __unused __attribute__((unused))
double inline genann_act_sigmoid(const genann *ann __unused, double a) {
if (a < -45.0) return 0;
if (a > 45.0) return 1;
return 1.0 / (1 + exp(-a));
}
double genann_act_sigmoid_cached(double a) {
/* If you're optimizing for memory usage, just
* delete this entire function and replace references
* of genann_act_sigmoid_cached to genann_act_sigmoid
*/
const double min = -15.0;
const double max = 15.0;
static double interval;
static int initialized = 0;
static double lookup[LOOKUP_SIZE];
/* Calculate entire lookup table on first run. */
if (!initialized) {
interval = (max - min) / LOOKUP_SIZE;
void genann_init_sigmoid_lookup(const genann *ann) {
const double f = (sigmoid_dom_max - sigmoid_dom_min) / LOOKUP_SIZE;
int i;
interval = LOOKUP_SIZE / (sigmoid_dom_max - sigmoid_dom_min);
for (i = 0; i < LOOKUP_SIZE; ++i) {
lookup[i] = genann_act_sigmoid(min + interval * i);
lookup[i] = genann_act_sigmoid(ann, sigmoid_dom_min + f * i);
}
/* This is down here to make this thread safe. */
initialized = 1;
}
int i;
i = (int)((a-min)/interval+0.5);
if (i <= 0) return lookup[0];
if (i >= LOOKUP_SIZE) return lookup[LOOKUP_SIZE-1];
return lookup[i];
}
double inline genann_act_sigmoid_cached(const genann *ann __unused, double a) {
assert(!isnan(a));
double genann_act_threshold(double a) {
return a > 0;
if (a < sigmoid_dom_min) return lookup[0];
if (a >= sigmoid_dom_max) return lookup[LOOKUP_SIZE - 1];
size_t j = (size_t)((a-sigmoid_dom_min)*interval+0.5);
/* Because floating point... */
if (unlikely(j < 0)) return lookup[0];
if (unlikely(j >= LOOKUP_SIZE)) return lookup[LOOKUP_SIZE - 1];
return lookup[j];
}
double genann_act_linear(double a) {
double inline genann_act_linear(const struct genann *ann __unused, double a) {
return a;
}
double inline genann_act_threshold(const struct genann *ann __unused, double a) {
return a > 0;
}
genann *genann_init(int inputs, int hidden_layers, int hidden, int outputs) {
if (hidden_layers < 0) return 0;
@ -117,6 +134,8 @@ genann *genann_init(int inputs, int hidden_layers, int hidden, int outputs) {
ret->activation_hidden = genann_act_sigmoid_cached;
ret->activation_output = genann_act_sigmoid_cached;
genann_init_sigmoid_lookup(ret);
return ret;
}
@ -193,21 +212,41 @@ double const *genann_run(genann const *ann, double const *inputs) {
int h, j, k;
const genann_actfun act = ann->activation_hidden;
const genann_actfun acto = ann->activation_output;
/* Figure hidden layers, if any. */
for (h = 0; h < ann->hidden_layers; ++h) {
for (j = 0; j < ann->hidden; ++j) {
if (!ann->hidden_layers) {
double *ret = o;
for (j = 0; j < ann->outputs; ++j) {
double sum = *w++ * -1.0;
for (k = 0; k < (h == 0 ? ann->inputs : ann->hidden); ++k) {
for (k = 0; k < ann->inputs; ++k) {
sum += *w++ * i[k];
}
*o++ = act(sum);
*o++ = genann_act_output(ann, sum);
}
return ret;
}
i += (h == 0 ? ann->inputs : ann->hidden);
/* Figure input layer */
for (j = 0; j < ann->hidden; ++j) {
double sum = *w++ * -1.0;
for (k = 0; k < ann->inputs; ++k) {
sum += *w++ * i[k];
}
*o++ = genann_act_hidden(ann, sum);
}
i += ann->inputs;
/* Figure hidden layers, if any. */
for (h = 1; h < ann->hidden_layers; ++h) {
for (j = 0; j < ann->hidden; ++j) {
double sum = *w++ * -1.0;
for (k = 0; k < ann->hidden; ++k) {
sum += *w++ * i[k];
}
*o++ = genann_act_hidden(ann, sum);
}
i += ann->hidden;
}
double const *ret = o;
@ -215,10 +254,10 @@ double const *genann_run(genann const *ann, double const *inputs) {
/* Figure output layer. */
for (j = 0; j < ann->outputs; ++j) {
double sum = *w++ * -1.0;
for (k = 0; k < (ann->hidden_layers ? ann->hidden : ann->inputs); ++k) {
for (k = 0; k < ann->hidden; ++k) {
sum += *w++ * i[k];
}
*o++ = acto(sum);
*o++ = genann_act_output(ann, sum);
}
/* Sanity check that we used all weights and wrote all outputs. */
@ -243,7 +282,8 @@ void genann_train(genann const *ann, double const *inputs, double const *desired
/* Set output layer deltas. */
if (ann->activation_output == genann_act_linear) {
if (genann_act_output == genann_act_linear ||
ann->activation_output == genann_act_linear) {
for (j = 0; j < ann->outputs; ++j) {
*d++ = *t++ - *o++;
}
@ -304,12 +344,9 @@ void genann_train(genann const *ann, double const *inputs, double const *desired
/* Set output layer weights. */
for (j = 0; j < ann->outputs; ++j) {
for (k = 0; k < (ann->hidden_layers ? ann->hidden : ann->inputs) + 1; ++k) {
if (k == 0) {
*w++ += *d * learning_rate * -1.0;
} else {
*w++ += *d * learning_rate * i[k-1];
}
*w++ += *d * learning_rate * -1.0;
for (k = 1; k < (ann->hidden_layers ? ann->hidden : ann->inputs) + 1; ++k) {
*w++ += *d * learning_rate * i[k-1];
}
++d;
@ -337,12 +374,9 @@ void genann_train(genann const *ann, double const *inputs, double const *desired
for (j = 0; j < ann->hidden; ++j) {
for (k = 0; k < (h == 0 ? ann->inputs : ann->hidden) + 1; ++k) {
if (k == 0) {
*w++ += *d * learning_rate * -1.0;
} else {
*w++ += *d * learning_rate * i[k-1];
}
*w++ += *d * learning_rate * -1.0;
for (k = 1; k < (h == 0 ? ann->inputs : ann->hidden) + 1; ++k) {
*w++ += *d * learning_rate * i[k-1];
}
++d;
}

View File

@ -39,9 +39,9 @@ extern "C" {
#define GENANN_RANDOM() (((double)rand())/RAND_MAX)
#endif
struct genann;
typedef double (*genann_actfun)(double a);
typedef double (*genann_actfun)(const struct genann *ann, double a);
typedef struct genann {
/* How many inputs, outputs, and hidden neurons. */
@ -70,8 +70,6 @@ typedef struct genann {
} genann;
/* Creates and returns a new ann. */
genann *genann_init(int inputs, int hidden_layers, int hidden, int outputs);
@ -96,11 +94,11 @@ void genann_train(genann const *ann, double const *inputs, double const *desired
/* Saves the ann. */
void genann_write(genann const *ann, FILE *out);
double genann_act_sigmoid(double a);
double genann_act_sigmoid_cached(double a);
double genann_act_threshold(double a);
double genann_act_linear(double a);
void genann_init_sigmoid_lookup(const genann *ann);
double genann_act_sigmoid(const genann *ann, double a);
double genann_act_sigmoid_cached(const genann *ann, double a);
double genann_act_threshold(const genann *ann, double a);
double genann_act_linear(const genann *ann, double a);
#ifdef __cplusplus

2
test.c
View File

@ -248,7 +248,7 @@ void sigmoid() {
const double d = .0001;
while (i < max) {
lfequal(genann_act_sigmoid(i), genann_act_sigmoid_cached(i));
lfequal(genann_act_sigmoid(NULL, i), genann_act_sigmoid_cached(NULL, i));
i += d;
}
}