genann: Optionally resolve activation functions at link time

Shave around 94 million instructions and 10 million branches off of execution
trace of example4 if the sigmoid activation function is resolved at link-time.

Before (`make`):
```
 Performance counter stats for './example4':

         98.988806      task-clock (msec)         #    0.998 CPUs utilized
                 1      context-switches          #    0.010 K/sec
                 0      cpu-migrations            #    0.000 K/sec
                79      page-faults               #    0.798 K/sec
       312,298,260      cycles                    #    3.155 GHz
     1,094,183,752      instructions              #    3.50  insn per cycle
       212,007,732      branches                  # 2141.734 M/sec
            62,774      branch-misses             #    0.03% of all branches

       0.099228100 seconds time elapsed
```

After:

`make`:
```
 Performance counter stats for './example4':

         97.335180      task-clock (msec)         #    0.998 CPUs utilized
                 0      context-switches          #    0.000 K/sec
                 0      cpu-migrations            #    0.000 K/sec
                82      page-faults               #    0.842 K/sec
       306,722,357      cycles                    #    3.151 GHz
     1,065,669,644      instructions              #    3.47  insn per cycle
       214,256,601      branches                  # 2201.225 M/sec
            60,154      branch-misses             #    0.03% of all branches

       0.097577079 seconds time elapsed
```

`make sigmoid`:
```
 Performance counter stats for './example4':

         92.629610      task-clock (msec)         #    0.997 CPUs utilized
                 0      context-switches          #    0.000 K/sec
                 0      cpu-migrations            #    0.000 K/sec
                78      page-faults               #    0.842 K/sec
       291,863,801      cycles                    #    3.151 GHz
     1,000,931,204      instructions              #    3.43  insn per cycle
       202,465,800      branches                  # 2185.757 M/sec
            50,949      branch-misses             #    0.03% of all branches

       0.092889789 seconds time elapsed
```

Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
This commit is contained in:
Andrew Jeffery 2017-12-18 13:08:30 +10:30
parent b1f72be243
commit db51375bb7
4 changed files with 72 additions and 53 deletions

View File

@ -3,6 +3,15 @@ LDLIBS = -lm
all: test example1 example2 example3 example4
sigmoid: CFLAGS += -Dgenann_act=genann_act_sigmoid_cached
sigmoid: all
threshold: CFLAGS += -Dgenann_act=genann_act_threshold
threshold: all
linear: CFLAGS += -Dgenann_act=genann_act_linear
linear: all
test: test.o genann.o
check: test
@ -21,3 +30,5 @@ clean:
$(RM) *.o
$(RM) test example1 example2 example3 example4 *.exe
$(RM) persist.txt
.PHONY: sigmoid threshold linear clean

View File

@ -32,61 +32,71 @@
#include <stdlib.h>
#include <string.h>
#ifndef genann_act
#define genann_act_hidden genann_act_hidden_indirect
#define genann_act_output genann_act_output_indirect
#else
#define genann_act_hidden genann_act
#define genann_act_output genann_act
#endif
#define LOOKUP_SIZE 4096
double genann_act_sigmoid(double a) {
double genann_act_hidden_indirect(const struct genann *ann, double a) {
return ann->activation_hidden(ann, a);
}
double genann_act_output_indirect(const struct genann *ann, double a) {
return ann->activation_output(ann, a);
}
const double sigmoid_dom_min = -15.0;
const double sigmoid_dom_max = 15.0;
double interval;
double lookup[LOOKUP_SIZE];
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
#define __unused __attribute__((unused))
double inline genann_act_sigmoid(const genann *ann __unused, double a) {
if (a < -45.0) return 0;
if (a > 45.0) return 1;
return 1.0 / (1 + exp(-a));
}
double genann_act_sigmoid_cached(double a) {
/* If you're optimizing for memory usage, just
* delete this entire function and replace references
* of genann_act_sigmoid_cached to genann_act_sigmoid
*/
const double min = -15.0;
const double max = 15.0;
static double interval;
static int initialized = 0;
static double lookup[LOOKUP_SIZE];
/* Calculate entire lookup table on first run. */
if (!initialized) {
const double f = (max - min) / LOOKUP_SIZE;
void genann_init_sigmoid_lookup(const genann *ann) {
const double f = (sigmoid_dom_max - sigmoid_dom_min) / LOOKUP_SIZE;
int i;
interval = LOOKUP_SIZE / (max - min);
for (i = 0; i < LOOKUP_SIZE; ++i) {
lookup[i] = genann_act_sigmoid(min + f * i);
}
/* This is down here to make this thread safe. */
initialized = 1;
}
interval = LOOKUP_SIZE / (sigmoid_dom_max - sigmoid_dom_min);
for (i = 0; i < LOOKUP_SIZE; ++i) {
lookup[i] = genann_act_sigmoid(ann, sigmoid_dom_min + f * i);
}
}
double inline genann_act_sigmoid_cached(const genann *ann __unused, double a) {
assert(!isnan(a));
if (a < min) return lookup[0];
if (a >= max) return lookup[LOOKUP_SIZE - 1];
if (a < sigmoid_dom_min) return lookup[0];
if (a >= sigmoid_dom_max) return lookup[LOOKUP_SIZE - 1];
size_t j = (size_t)((a-min)*interval+0.5);
size_t j = (size_t)((a-sigmoid_dom_min)*interval+0.5);
if (j < 0) return lookup[0];
if (j >= LOOKUP_SIZE) return lookup[LOOKUP_SIZE - 1];
/* Because floating point... */
if (unlikely(j < 0)) return lookup[0];
if (unlikely(j >= LOOKUP_SIZE)) return lookup[LOOKUP_SIZE - 1];
return lookup[j];
}
double genann_act_threshold(double a) {
return a > 0;
}
double genann_act_linear(double a) {
double inline genann_act_linear(const struct genann *ann __unused, double a) {
return a;
}
double inline genann_act_threshold(const struct genann *ann __unused, double a) {
return a > 0;
}
genann *genann_init(int inputs, int hidden_layers, int hidden, int outputs) {
if (hidden_layers < 0) return 0;
@ -124,6 +134,8 @@ genann *genann_init(int inputs, int hidden_layers, int hidden, int outputs) {
ret->activation_hidden = genann_act_sigmoid_cached;
ret->activation_output = genann_act_sigmoid_cached;
genann_init_sigmoid_lookup(ret);
return ret;
}
@ -200,9 +212,6 @@ double const *genann_run(genann const *ann, double const *inputs) {
int h, j, k;
const genann_actfun act = ann->activation_hidden;
const genann_actfun acto = ann->activation_output;
if (!ann->hidden_layers) {
double *ret = o;
for (j = 0; j < ann->outputs; ++j) {
@ -210,7 +219,7 @@ double const *genann_run(genann const *ann, double const *inputs) {
for (k = 0; k < ann->inputs; ++k) {
sum += *w++ * i[k];
}
*o++ = acto(sum);
*o++ = genann_act_output(ann, sum);
}
return ret;
@ -222,7 +231,7 @@ double const *genann_run(genann const *ann, double const *inputs) {
for (k = 0; k < ann->inputs; ++k) {
sum += *w++ * i[k];
}
*o++ = act(sum);
*o++ = genann_act_hidden(ann, sum);
}
i += ann->inputs;
@ -234,7 +243,7 @@ double const *genann_run(genann const *ann, double const *inputs) {
for (k = 0; k < ann->hidden; ++k) {
sum += *w++ * i[k];
}
*o++ = act(sum);
*o++ = genann_act_hidden(ann, sum);
}
i += ann->hidden;
@ -248,7 +257,7 @@ double const *genann_run(genann const *ann, double const *inputs) {
for (k = 0; k < ann->hidden; ++k) {
sum += *w++ * i[k];
}
*o++ = acto(sum);
*o++ = genann_act_output(ann, sum);
}
/* Sanity check that we used all weights and wrote all outputs. */
@ -273,7 +282,8 @@ void genann_train(genann const *ann, double const *inputs, double const *desired
/* Set output layer deltas. */
if (ann->activation_output == genann_act_linear) {
if (genann_act_output == genann_act_linear ||
ann->activation_output == genann_act_linear) {
for (j = 0; j < ann->outputs; ++j) {
*d++ = *t++ - *o++;
}

View File

@ -39,9 +39,9 @@ extern "C" {
#define GENANN_RANDOM() (((double)rand())/RAND_MAX)
#endif
struct genann;
typedef double (*genann_actfun)(double a);
typedef double (*genann_actfun)(const struct genann *ann, double a);
typedef struct genann {
/* How many inputs, outputs, and hidden neurons. */
@ -70,8 +70,6 @@ typedef struct genann {
} genann;
/* Creates and returns a new ann. */
genann *genann_init(int inputs, int hidden_layers, int hidden, int outputs);
@ -96,11 +94,11 @@ void genann_train(genann const *ann, double const *inputs, double const *desired
/* Saves the ann. */
void genann_write(genann const *ann, FILE *out);
double genann_act_sigmoid(double a);
double genann_act_sigmoid_cached(double a);
double genann_act_threshold(double a);
double genann_act_linear(double a);
void genann_init_sigmoid_lookup(const genann *ann);
double genann_act_sigmoid(const genann *ann, double a);
double genann_act_sigmoid_cached(const genann *ann, double a);
double genann_act_threshold(const genann *ann, double a);
double genann_act_linear(const genann *ann, double a);
#ifdef __cplusplus

2
test.c
View File

@ -248,7 +248,7 @@ void sigmoid() {
const double d = .0001;
while (i < max) {
lfequal(genann_act_sigmoid(i), genann_act_sigmoid_cached(i));
lfequal(genann_act_sigmoid(NULL, i), genann_act_sigmoid_cached(NULL, i));
i += d;
}
}