mirror of
https://github.com/codeplea/genann
synced 2024-11-21 22:11:34 +03:00
Merge pull request #8 from amboar/speed
RFC: Increase genann performance by roughly 30%
This commit is contained in:
commit
033618b1f5
15
Makefile
15
Makefile
@ -1,9 +1,16 @@
|
||||
CCFLAGS = -Wall -Wshadow -O2 -g
|
||||
CFLAGS = -Wall -Wshadow -O3 -g -march=native
|
||||
LDLIBS = -lm
|
||||
|
||||
|
||||
all: test example1 example2 example3 example4
|
||||
|
||||
sigmoid: CFLAGS += -Dgenann_act=genann_act_sigmoid_cached
|
||||
sigmoid: all
|
||||
|
||||
threshold: CFLAGS += -Dgenann_act=genann_act_threshold
|
||||
threshold: all
|
||||
|
||||
linear: CFLAGS += -Dgenann_act=genann_act_linear
|
||||
linear: all
|
||||
|
||||
test: test.o genann.o
|
||||
|
||||
@ -21,5 +28,7 @@ example4: example4.o genann.o
|
||||
|
||||
clean:
|
||||
$(RM) *.o
|
||||
$(RM) *.exe
|
||||
$(RM) test example1 example2 example3 example4 *.exe
|
||||
$(RM) persist.txt
|
||||
|
||||
.PHONY: sigmoid threshold linear clean
|
||||
|
142
genann.c
142
genann.c
@ -32,54 +32,71 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifndef genann_act
|
||||
#define genann_act_hidden genann_act_hidden_indirect
|
||||
#define genann_act_output genann_act_output_indirect
|
||||
#else
|
||||
#define genann_act_hidden genann_act
|
||||
#define genann_act_output genann_act
|
||||
#endif
|
||||
|
||||
#define LOOKUP_SIZE 4096
|
||||
|
||||
double genann_act_sigmoid(double a) {
|
||||
double genann_act_hidden_indirect(const struct genann *ann, double a) {
|
||||
return ann->activation_hidden(ann, a);
|
||||
}
|
||||
|
||||
double genann_act_output_indirect(const struct genann *ann, double a) {
|
||||
return ann->activation_output(ann, a);
|
||||
}
|
||||
|
||||
const double sigmoid_dom_min = -15.0;
|
||||
const double sigmoid_dom_max = 15.0;
|
||||
double interval;
|
||||
double lookup[LOOKUP_SIZE];
|
||||
|
||||
#define likely(x) __builtin_expect(!!(x), 1)
|
||||
#define unlikely(x) __builtin_expect(!!(x), 0)
|
||||
#define __unused __attribute__((unused))
|
||||
|
||||
double inline genann_act_sigmoid(const genann *ann __unused, double a) {
|
||||
if (a < -45.0) return 0;
|
||||
if (a > 45.0) return 1;
|
||||
return 1.0 / (1 + exp(-a));
|
||||
}
|
||||
|
||||
|
||||
double genann_act_sigmoid_cached(double a) {
|
||||
/* If you're optimizing for memory usage, just
|
||||
* delete this entire function and replace references
|
||||
* of genann_act_sigmoid_cached to genann_act_sigmoid
|
||||
*/
|
||||
const double min = -15.0;
|
||||
const double max = 15.0;
|
||||
static double interval;
|
||||
static int initialized = 0;
|
||||
static double lookup[LOOKUP_SIZE];
|
||||
|
||||
/* Calculate entire lookup table on first run. */
|
||||
if (!initialized) {
|
||||
interval = (max - min) / LOOKUP_SIZE;
|
||||
void genann_init_sigmoid_lookup(const genann *ann) {
|
||||
const double f = (sigmoid_dom_max - sigmoid_dom_min) / LOOKUP_SIZE;
|
||||
int i;
|
||||
|
||||
interval = LOOKUP_SIZE / (sigmoid_dom_max - sigmoid_dom_min);
|
||||
for (i = 0; i < LOOKUP_SIZE; ++i) {
|
||||
lookup[i] = genann_act_sigmoid(min + interval * i);
|
||||
lookup[i] = genann_act_sigmoid(ann, sigmoid_dom_min + f * i);
|
||||
}
|
||||
/* This is down here to make this thread safe. */
|
||||
initialized = 1;
|
||||
}
|
||||
|
||||
int i;
|
||||
i = (int)((a-min)/interval+0.5);
|
||||
if (i <= 0) return lookup[0];
|
||||
if (i >= LOOKUP_SIZE) return lookup[LOOKUP_SIZE-1];
|
||||
return lookup[i];
|
||||
}
|
||||
|
||||
double inline genann_act_sigmoid_cached(const genann *ann __unused, double a) {
|
||||
assert(!isnan(a));
|
||||
|
||||
double genann_act_threshold(double a) {
|
||||
return a > 0;
|
||||
if (a < sigmoid_dom_min) return lookup[0];
|
||||
if (a >= sigmoid_dom_max) return lookup[LOOKUP_SIZE - 1];
|
||||
|
||||
size_t j = (size_t)((a-sigmoid_dom_min)*interval+0.5);
|
||||
|
||||
/* Because floating point... */
|
||||
if (unlikely(j < 0)) return lookup[0];
|
||||
if (unlikely(j >= LOOKUP_SIZE)) return lookup[LOOKUP_SIZE - 1];
|
||||
|
||||
return lookup[j];
|
||||
}
|
||||
|
||||
|
||||
double genann_act_linear(double a) {
|
||||
double inline genann_act_linear(const struct genann *ann __unused, double a) {
|
||||
return a;
|
||||
}
|
||||
|
||||
double inline genann_act_threshold(const struct genann *ann __unused, double a) {
|
||||
return a > 0;
|
||||
}
|
||||
|
||||
genann *genann_init(int inputs, int hidden_layers, int hidden, int outputs) {
|
||||
if (hidden_layers < 0) return 0;
|
||||
@ -117,6 +134,8 @@ genann *genann_init(int inputs, int hidden_layers, int hidden, int outputs) {
|
||||
ret->activation_hidden = genann_act_sigmoid_cached;
|
||||
ret->activation_output = genann_act_sigmoid_cached;
|
||||
|
||||
genann_init_sigmoid_lookup(ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -193,21 +212,41 @@ double const *genann_run(genann const *ann, double const *inputs) {
|
||||
|
||||
int h, j, k;
|
||||
|
||||
const genann_actfun act = ann->activation_hidden;
|
||||
const genann_actfun acto = ann->activation_output;
|
||||
|
||||
/* Figure hidden layers, if any. */
|
||||
for (h = 0; h < ann->hidden_layers; ++h) {
|
||||
for (j = 0; j < ann->hidden; ++j) {
|
||||
if (!ann->hidden_layers) {
|
||||
double *ret = o;
|
||||
for (j = 0; j < ann->outputs; ++j) {
|
||||
double sum = *w++ * -1.0;
|
||||
for (k = 0; k < (h == 0 ? ann->inputs : ann->hidden); ++k) {
|
||||
for (k = 0; k < ann->inputs; ++k) {
|
||||
sum += *w++ * i[k];
|
||||
}
|
||||
*o++ = act(sum);
|
||||
*o++ = genann_act_output(ann, sum);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
i += (h == 0 ? ann->inputs : ann->hidden);
|
||||
/* Figure input layer */
|
||||
for (j = 0; j < ann->hidden; ++j) {
|
||||
double sum = *w++ * -1.0;
|
||||
for (k = 0; k < ann->inputs; ++k) {
|
||||
sum += *w++ * i[k];
|
||||
}
|
||||
*o++ = genann_act_hidden(ann, sum);
|
||||
}
|
||||
|
||||
i += ann->inputs;
|
||||
|
||||
/* Figure hidden layers, if any. */
|
||||
for (h = 1; h < ann->hidden_layers; ++h) {
|
||||
for (j = 0; j < ann->hidden; ++j) {
|
||||
double sum = *w++ * -1.0;
|
||||
for (k = 0; k < ann->hidden; ++k) {
|
||||
sum += *w++ * i[k];
|
||||
}
|
||||
*o++ = genann_act_hidden(ann, sum);
|
||||
}
|
||||
|
||||
i += ann->hidden;
|
||||
}
|
||||
|
||||
double const *ret = o;
|
||||
@ -215,10 +254,10 @@ double const *genann_run(genann const *ann, double const *inputs) {
|
||||
/* Figure output layer. */
|
||||
for (j = 0; j < ann->outputs; ++j) {
|
||||
double sum = *w++ * -1.0;
|
||||
for (k = 0; k < (ann->hidden_layers ? ann->hidden : ann->inputs); ++k) {
|
||||
for (k = 0; k < ann->hidden; ++k) {
|
||||
sum += *w++ * i[k];
|
||||
}
|
||||
*o++ = acto(sum);
|
||||
*o++ = genann_act_output(ann, sum);
|
||||
}
|
||||
|
||||
/* Sanity check that we used all weights and wrote all outputs. */
|
||||
@ -243,7 +282,8 @@ void genann_train(genann const *ann, double const *inputs, double const *desired
|
||||
|
||||
|
||||
/* Set output layer deltas. */
|
||||
if (ann->activation_output == genann_act_linear) {
|
||||
if (genann_act_output == genann_act_linear ||
|
||||
ann->activation_output == genann_act_linear) {
|
||||
for (j = 0; j < ann->outputs; ++j) {
|
||||
*d++ = *t++ - *o++;
|
||||
}
|
||||
@ -304,12 +344,9 @@ void genann_train(genann const *ann, double const *inputs, double const *desired
|
||||
|
||||
/* Set output layer weights. */
|
||||
for (j = 0; j < ann->outputs; ++j) {
|
||||
for (k = 0; k < (ann->hidden_layers ? ann->hidden : ann->inputs) + 1; ++k) {
|
||||
if (k == 0) {
|
||||
*w++ += *d * learning_rate * -1.0;
|
||||
} else {
|
||||
*w++ += *d * learning_rate * i[k-1];
|
||||
}
|
||||
*w++ += *d * learning_rate * -1.0;
|
||||
for (k = 1; k < (ann->hidden_layers ? ann->hidden : ann->inputs) + 1; ++k) {
|
||||
*w++ += *d * learning_rate * i[k-1];
|
||||
}
|
||||
|
||||
++d;
|
||||
@ -337,12 +374,9 @@ void genann_train(genann const *ann, double const *inputs, double const *desired
|
||||
|
||||
|
||||
for (j = 0; j < ann->hidden; ++j) {
|
||||
for (k = 0; k < (h == 0 ? ann->inputs : ann->hidden) + 1; ++k) {
|
||||
if (k == 0) {
|
||||
*w++ += *d * learning_rate * -1.0;
|
||||
} else {
|
||||
*w++ += *d * learning_rate * i[k-1];
|
||||
}
|
||||
*w++ += *d * learning_rate * -1.0;
|
||||
for (k = 1; k < (h == 0 ? ann->inputs : ann->hidden) + 1; ++k) {
|
||||
*w++ += *d * learning_rate * i[k-1];
|
||||
}
|
||||
++d;
|
||||
}
|
||||
|
16
genann.h
16
genann.h
@ -39,9 +39,9 @@ extern "C" {
|
||||
#define GENANN_RANDOM() (((double)rand())/RAND_MAX)
|
||||
#endif
|
||||
|
||||
struct genann;
|
||||
|
||||
typedef double (*genann_actfun)(double a);
|
||||
|
||||
typedef double (*genann_actfun)(const struct genann *ann, double a);
|
||||
|
||||
typedef struct genann {
|
||||
/* How many inputs, outputs, and hidden neurons. */
|
||||
@ -70,8 +70,6 @@ typedef struct genann {
|
||||
|
||||
} genann;
|
||||
|
||||
|
||||
|
||||
/* Creates and returns a new ann. */
|
||||
genann *genann_init(int inputs, int hidden_layers, int hidden, int outputs);
|
||||
|
||||
@ -96,11 +94,11 @@ void genann_train(genann const *ann, double const *inputs, double const *desired
|
||||
/* Saves the ann. */
|
||||
void genann_write(genann const *ann, FILE *out);
|
||||
|
||||
|
||||
double genann_act_sigmoid(double a);
|
||||
double genann_act_sigmoid_cached(double a);
|
||||
double genann_act_threshold(double a);
|
||||
double genann_act_linear(double a);
|
||||
void genann_init_sigmoid_lookup(const genann *ann);
|
||||
double genann_act_sigmoid(const genann *ann, double a);
|
||||
double genann_act_sigmoid_cached(const genann *ann, double a);
|
||||
double genann_act_threshold(const genann *ann, double a);
|
||||
double genann_act_linear(const genann *ann, double a);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
Loading…
Reference in New Issue
Block a user