mirror of
https://github.com/codeplea/genann
synced 2024-11-21 22:11:34 +03:00
genann: Optionally resolve activation functions at link time
Shave around 94 million instructions and 10 million branches off of execution trace of example4 if the sigmoid activation function is resolved at link-time. Before (`make`): ``` Performance counter stats for './example4': 98.988806 task-clock (msec) # 0.998 CPUs utilized 1 context-switches # 0.010 K/sec 0 cpu-migrations # 0.000 K/sec 79 page-faults # 0.798 K/sec 312,298,260 cycles # 3.155 GHz 1,094,183,752 instructions # 3.50 insn per cycle 212,007,732 branches # 2141.734 M/sec 62,774 branch-misses # 0.03% of all branches 0.099228100 seconds time elapsed ``` After: `make`: ``` Performance counter stats for './example4': 97.335180 task-clock (msec) # 0.998 CPUs utilized 0 context-switches # 0.000 K/sec 0 cpu-migrations # 0.000 K/sec 82 page-faults # 0.842 K/sec 306,722,357 cycles # 3.151 GHz 1,065,669,644 instructions # 3.47 insn per cycle 214,256,601 branches # 2201.225 M/sec 60,154 branch-misses # 0.03% of all branches 0.097577079 seconds time elapsed ``` `make sigmoid`: ``` Performance counter stats for './example4': 92.629610 task-clock (msec) # 0.997 CPUs utilized 0 context-switches # 0.000 K/sec 0 cpu-migrations # 0.000 K/sec 78 page-faults # 0.842 K/sec 291,863,801 cycles # 3.151 GHz 1,000,931,204 instructions # 3.43 insn per cycle 202,465,800 branches # 2185.757 M/sec 50,949 branch-misses # 0.03% of all branches 0.092889789 seconds time elapsed ``` Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
This commit is contained in:
parent
b1f72be243
commit
db51375bb7
11
Makefile
11
Makefile
@ -3,6 +3,15 @@ LDLIBS = -lm
|
||||
|
||||
all: test example1 example2 example3 example4
|
||||
|
||||
sigmoid: CFLAGS += -Dgenann_act=genann_act_sigmoid_cached
|
||||
sigmoid: all
|
||||
|
||||
threshold: CFLAGS += -Dgenann_act=genann_act_threshold
|
||||
threshold: all
|
||||
|
||||
linear: CFLAGS += -Dgenann_act=genann_act_linear
|
||||
linear: all
|
||||
|
||||
test: test.o genann.o
|
||||
|
||||
check: test
|
||||
@ -21,3 +30,5 @@ clean:
|
||||
$(RM) *.o
|
||||
$(RM) test example1 example2 example3 example4 *.exe
|
||||
$(RM) persist.txt
|
||||
|
||||
.PHONY: sigmoid threshold linear clean
|
||||
|
90
genann.c
90
genann.c
@ -32,61 +32,71 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifndef genann_act
|
||||
#define genann_act_hidden genann_act_hidden_indirect
|
||||
#define genann_act_output genann_act_output_indirect
|
||||
#else
|
||||
#define genann_act_hidden genann_act
|
||||
#define genann_act_output genann_act
|
||||
#endif
|
||||
|
||||
#define LOOKUP_SIZE 4096
|
||||
|
||||
double genann_act_sigmoid(double a) {
|
||||
double genann_act_hidden_indirect(const struct genann *ann, double a) {
|
||||
return ann->activation_hidden(ann, a);
|
||||
}
|
||||
|
||||
double genann_act_output_indirect(const struct genann *ann, double a) {
|
||||
return ann->activation_output(ann, a);
|
||||
}
|
||||
|
||||
const double sigmoid_dom_min = -15.0;
|
||||
const double sigmoid_dom_max = 15.0;
|
||||
double interval;
|
||||
double lookup[LOOKUP_SIZE];
|
||||
|
||||
#define likely(x) __builtin_expect(!!(x), 1)
|
||||
#define unlikely(x) __builtin_expect(!!(x), 0)
|
||||
#define __unused __attribute__((unused))
|
||||
|
||||
double inline genann_act_sigmoid(const genann *ann __unused, double a) {
|
||||
if (a < -45.0) return 0;
|
||||
if (a > 45.0) return 1;
|
||||
return 1.0 / (1 + exp(-a));
|
||||
}
|
||||
|
||||
|
||||
double genann_act_sigmoid_cached(double a) {
|
||||
/* If you're optimizing for memory usage, just
|
||||
* delete this entire function and replace references
|
||||
* of genann_act_sigmoid_cached to genann_act_sigmoid
|
||||
*/
|
||||
const double min = -15.0;
|
||||
const double max = 15.0;
|
||||
static double interval;
|
||||
static int initialized = 0;
|
||||
static double lookup[LOOKUP_SIZE];
|
||||
|
||||
/* Calculate entire lookup table on first run. */
|
||||
if (!initialized) {
|
||||
const double f = (max - min) / LOOKUP_SIZE;
|
||||
void genann_init_sigmoid_lookup(const genann *ann) {
|
||||
const double f = (sigmoid_dom_max - sigmoid_dom_min) / LOOKUP_SIZE;
|
||||
int i;
|
||||
interval = LOOKUP_SIZE / (max - min);
|
||||
|
||||
interval = LOOKUP_SIZE / (sigmoid_dom_max - sigmoid_dom_min);
|
||||
for (i = 0; i < LOOKUP_SIZE; ++i) {
|
||||
lookup[i] = genann_act_sigmoid(min + f * i);
|
||||
lookup[i] = genann_act_sigmoid(ann, sigmoid_dom_min + f * i);
|
||||
}
|
||||
/* This is down here to make this thread safe. */
|
||||
initialized = 1;
|
||||
}
|
||||
|
||||
double inline genann_act_sigmoid_cached(const genann *ann __unused, double a) {
|
||||
assert(!isnan(a));
|
||||
|
||||
if (a < min) return lookup[0];
|
||||
if (a >= max) return lookup[LOOKUP_SIZE - 1];
|
||||
if (a < sigmoid_dom_min) return lookup[0];
|
||||
if (a >= sigmoid_dom_max) return lookup[LOOKUP_SIZE - 1];
|
||||
|
||||
size_t j = (size_t)((a-min)*interval+0.5);
|
||||
size_t j = (size_t)((a-sigmoid_dom_min)*interval+0.5);
|
||||
|
||||
if (j < 0) return lookup[0];
|
||||
if (j >= LOOKUP_SIZE) return lookup[LOOKUP_SIZE - 1];
|
||||
/* Because floating point... */
|
||||
if (unlikely(j < 0)) return lookup[0];
|
||||
if (unlikely(j >= LOOKUP_SIZE)) return lookup[LOOKUP_SIZE - 1];
|
||||
|
||||
return lookup[j];
|
||||
}
|
||||
|
||||
|
||||
double genann_act_threshold(double a) {
|
||||
return a > 0;
|
||||
}
|
||||
|
||||
|
||||
double genann_act_linear(double a) {
|
||||
double inline genann_act_linear(const struct genann *ann __unused, double a) {
|
||||
return a;
|
||||
}
|
||||
|
||||
double inline genann_act_threshold(const struct genann *ann __unused, double a) {
|
||||
return a > 0;
|
||||
}
|
||||
|
||||
genann *genann_init(int inputs, int hidden_layers, int hidden, int outputs) {
|
||||
if (hidden_layers < 0) return 0;
|
||||
@ -124,6 +134,8 @@ genann *genann_init(int inputs, int hidden_layers, int hidden, int outputs) {
|
||||
ret->activation_hidden = genann_act_sigmoid_cached;
|
||||
ret->activation_output = genann_act_sigmoid_cached;
|
||||
|
||||
genann_init_sigmoid_lookup(ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -200,9 +212,6 @@ double const *genann_run(genann const *ann, double const *inputs) {
|
||||
|
||||
int h, j, k;
|
||||
|
||||
const genann_actfun act = ann->activation_hidden;
|
||||
const genann_actfun acto = ann->activation_output;
|
||||
|
||||
if (!ann->hidden_layers) {
|
||||
double *ret = o;
|
||||
for (j = 0; j < ann->outputs; ++j) {
|
||||
@ -210,7 +219,7 @@ double const *genann_run(genann const *ann, double const *inputs) {
|
||||
for (k = 0; k < ann->inputs; ++k) {
|
||||
sum += *w++ * i[k];
|
||||
}
|
||||
*o++ = acto(sum);
|
||||
*o++ = genann_act_output(ann, sum);
|
||||
}
|
||||
|
||||
return ret;
|
||||
@ -222,7 +231,7 @@ double const *genann_run(genann const *ann, double const *inputs) {
|
||||
for (k = 0; k < ann->inputs; ++k) {
|
||||
sum += *w++ * i[k];
|
||||
}
|
||||
*o++ = act(sum);
|
||||
*o++ = genann_act_hidden(ann, sum);
|
||||
}
|
||||
|
||||
i += ann->inputs;
|
||||
@ -234,7 +243,7 @@ double const *genann_run(genann const *ann, double const *inputs) {
|
||||
for (k = 0; k < ann->hidden; ++k) {
|
||||
sum += *w++ * i[k];
|
||||
}
|
||||
*o++ = act(sum);
|
||||
*o++ = genann_act_hidden(ann, sum);
|
||||
}
|
||||
|
||||
i += ann->hidden;
|
||||
@ -248,7 +257,7 @@ double const *genann_run(genann const *ann, double const *inputs) {
|
||||
for (k = 0; k < ann->hidden; ++k) {
|
||||
sum += *w++ * i[k];
|
||||
}
|
||||
*o++ = acto(sum);
|
||||
*o++ = genann_act_output(ann, sum);
|
||||
}
|
||||
|
||||
/* Sanity check that we used all weights and wrote all outputs. */
|
||||
@ -273,7 +282,8 @@ void genann_train(genann const *ann, double const *inputs, double const *desired
|
||||
|
||||
|
||||
/* Set output layer deltas. */
|
||||
if (ann->activation_output == genann_act_linear) {
|
||||
if (genann_act_output == genann_act_linear ||
|
||||
ann->activation_output == genann_act_linear) {
|
||||
for (j = 0; j < ann->outputs; ++j) {
|
||||
*d++ = *t++ - *o++;
|
||||
}
|
||||
|
16
genann.h
16
genann.h
@ -39,9 +39,9 @@ extern "C" {
|
||||
#define GENANN_RANDOM() (((double)rand())/RAND_MAX)
|
||||
#endif
|
||||
|
||||
struct genann;
|
||||
|
||||
typedef double (*genann_actfun)(double a);
|
||||
|
||||
typedef double (*genann_actfun)(const struct genann *ann, double a);
|
||||
|
||||
typedef struct genann {
|
||||
/* How many inputs, outputs, and hidden neurons. */
|
||||
@ -70,8 +70,6 @@ typedef struct genann {
|
||||
|
||||
} genann;
|
||||
|
||||
|
||||
|
||||
/* Creates and returns a new ann. */
|
||||
genann *genann_init(int inputs, int hidden_layers, int hidden, int outputs);
|
||||
|
||||
@ -96,11 +94,11 @@ void genann_train(genann const *ann, double const *inputs, double const *desired
|
||||
/* Saves the ann. */
|
||||
void genann_write(genann const *ann, FILE *out);
|
||||
|
||||
|
||||
double genann_act_sigmoid(double a);
|
||||
double genann_act_sigmoid_cached(double a);
|
||||
double genann_act_threshold(double a);
|
||||
double genann_act_linear(double a);
|
||||
void genann_init_sigmoid_lookup(const genann *ann);
|
||||
double genann_act_sigmoid(const genann *ann, double a);
|
||||
double genann_act_sigmoid_cached(const genann *ann, double a);
|
||||
double genann_act_threshold(const genann *ann, double a);
|
||||
double genann_act_linear(const genann *ann, double a);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
Loading…
Reference in New Issue
Block a user