Merge pull request #8 from amboar/speed

RFC: Increase genann performance by roughly 30%
2024-11-24 23:40:02 +03:00 · 2018-04-11 08:38:44 -05:00 · 2018-04-11 08:38:44 -05:00 · 033618b1f5
commit 033618b1f5
parent e8680aed7c d21d0f301b
4 changed files with 108 additions and 67 deletions
--- a/15
+++ b/15
@ -1,9 +1,16 @@
-CCFLAGS = -Wall -Wshadow -O2 -g
+CFLAGS = -Wall -Wshadow -O3 -g -march=native
 LDLIBS = -lm
 all: test example1 example2 example3 example4
 sigmoid: CFLAGS += -Dgenann_act=genann_act_sigmoid_cached
 sigmoid: all
 threshold: CFLAGS += -Dgenann_act=genann_act_threshold
 threshold: all
 linear: CFLAGS += -Dgenann_act=genann_act_linear
 linear: all
 test: test.o genann.o
@ -21,5 +28,7 @@ example4: example4.o genann.o
 clean:
 	$(RM) *.o
-	$(RM) *.exe
+	$(RM) test example1 example2 example3 example4 *.exe
 	$(RM) persist.txt
 .PHONY: sigmoid threshold linear clean
--- a/genann.c
+++ b/genann.c
@ -32,54 +32,71 @@
 #include <stdlib.h>
 #include <string.h>
 #ifndef genann_act
 #define genann_act_hidden genann_act_hidden_indirect
 #define genann_act_output genann_act_output_indirect
 #else
 #define genann_act_hidden genann_act
 #define genann_act_output genann_act
 #endif
 #define LOOKUP_SIZE 4096
-double genann_act_sigmoid(double a) {
+double genann_act_hidden_indirect(const struct genann *ann, double a) {
    return ann->activation_hidden(ann, a);
 }
 double genann_act_output_indirect(const struct genann *ann, double a) {
    return ann->activation_output(ann, a);
 }
 const double sigmoid_dom_min = -15.0;
 const double sigmoid_dom_max = 15.0;
 double interval;
 double lookup[LOOKUP_SIZE];
 #define likely(x)       __builtin_expect(!!(x), 1)
 #define unlikely(x)     __builtin_expect(!!(x), 0)
 #define __unused        __attribute__((unused))
 double inline genann_act_sigmoid(const genann *ann __unused, double a) {
    if (a < -45.0) return 0;
    if (a > 45.0) return 1;
    return 1.0 / (1 + exp(-a));
 }
-
+void genann_init_sigmoid_lookup(const genann *ann) {
-double genann_act_sigmoid_cached(double a) {
+        const double f = (sigmoid_dom_max - sigmoid_dom_min) / LOOKUP_SIZE;
    /* If you're optimizing for memory usage, just
     * delete this entire function and replace references
     * of genann_act_sigmoid_cached to genann_act_sigmoid
     */
    const double min = -15.0;
    const double max = 15.0;
    static double interval;
    static int initialized = 0;
    static double lookup[LOOKUP_SIZE];
    /* Calculate entire lookup table on first run. */
    if (!initialized) {
        interval = (max - min) / LOOKUP_SIZE;
        int i;
        interval = LOOKUP_SIZE / (sigmoid_dom_max - sigmoid_dom_min);
        for (i = 0; i < LOOKUP_SIZE; ++i) {
-            lookup[i] = genann_act_sigmoid(min + interval * i);
+            lookup[i] = genann_act_sigmoid(ann, sigmoid_dom_min + f * i);
        }
        /* This is down here to make this thread safe. */
        initialized = 1;
    }
    int i;
    i = (int)((a-min)/interval+0.5);
    if (i <= 0) return lookup[0];
    if (i >= LOOKUP_SIZE) return lookup[LOOKUP_SIZE-1];
    return lookup[i];
 }
 double inline genann_act_sigmoid_cached(const genann *ann __unused, double a) {
    assert(!isnan(a));
-double genann_act_threshold(double a) {
+    if (a < sigmoid_dom_min) return lookup[0];
-    return a > 0;
+    if (a >= sigmoid_dom_max) return lookup[LOOKUP_SIZE - 1];
    size_t j = (size_t)((a-sigmoid_dom_min)*interval+0.5);
    /* Because floating point... */
    if (unlikely(j < 0)) return lookup[0];
    if (unlikely(j >= LOOKUP_SIZE)) return lookup[LOOKUP_SIZE - 1];
    return lookup[j];
 }
-
+double inline genann_act_linear(const struct genann *ann __unused, double a) {
 double genann_act_linear(double a) {
    return a;
 }
 double inline genann_act_threshold(const struct genann *ann __unused, double a) {
    return a > 0;
 }
 genann *genann_init(int inputs, int hidden_layers, int hidden, int outputs) {
    if (hidden_layers < 0) return 0;
@ -117,6 +134,8 @@ genann *genann_init(int inputs, int hidden_layers, int hidden, int outputs) {
    ret->activation_hidden = genann_act_sigmoid_cached;
    ret->activation_output = genann_act_sigmoid_cached;
    genann_init_sigmoid_lookup(ret);
    return ret;
 }
@ -193,21 +212,41 @@ double const *genann_run(genann const *ann, double const *inputs) {
    int h, j, k;
-    const genann_actfun act = ann->activation_hidden;
+    if (!ann->hidden_layers) {
-    const genann_actfun acto = ann->activation_output;
+        double *ret = o;
-
+        for (j = 0; j < ann->outputs; ++j) {
    /* Figure hidden layers, if any. */
    for (h = 0; h < ann->hidden_layers; ++h) {
        for (j = 0; j < ann->hidden; ++j) {
            double sum = *w++ * -1.0;
-            for (k = 0; k < (h == 0 ? ann->inputs : ann->hidden); ++k) {
+            for (k = 0; k < ann->inputs; ++k) {
                sum += *w++ * i[k];
            }
-            *o++ = act(sum);
+            *o++ = genann_act_output(ann, sum);
        }
        return ret;
    }
-        i += (h == 0 ? ann->inputs : ann->hidden);
+    /* Figure input layer */
    for (j = 0; j < ann->hidden; ++j) {
        double sum = *w++ * -1.0;
        for (k = 0; k < ann->inputs; ++k) {
            sum += *w++ * i[k];
        }
        *o++ = genann_act_hidden(ann, sum);
    }
    i += ann->inputs;
    /* Figure hidden layers, if any. */
    for (h = 1; h < ann->hidden_layers; ++h) {
        for (j = 0; j < ann->hidden; ++j) {
            double sum = *w++ * -1.0;
            for (k = 0; k < ann->hidden; ++k) {
                sum += *w++ * i[k];
            }
            *o++ = genann_act_hidden(ann, sum);
        }
        i += ann->hidden;
    }
    double const *ret = o;
@ -215,10 +254,10 @@ double const *genann_run(genann const *ann, double const *inputs) {
    /* Figure output layer. */
    for (j = 0; j < ann->outputs; ++j) {
        double sum = *w++ * -1.0;
-        for (k = 0; k < (ann->hidden_layers ? ann->hidden : ann->inputs); ++k) {
+        for (k = 0; k < ann->hidden; ++k) {
            sum += *w++ * i[k];
        }
-        *o++ = acto(sum);
+        *o++ = genann_act_output(ann, sum);
    }
    /* Sanity check that we used all weights and wrote all outputs. */
@ -243,7 +282,8 @@ void genann_train(genann const *ann, double const *inputs, double const *desired
        /* Set output layer deltas. */
-        if (ann->activation_output == genann_act_linear) {
+        if (genann_act_output == genann_act_linear ||
                ann->activation_output == genann_act_linear) {
            for (j = 0; j < ann->outputs; ++j) {
                *d++ = *t++ - *o++;
            }
@ -304,12 +344,9 @@ void genann_train(genann const *ann, double const *inputs, double const *desired
        /* Set output layer weights. */
        for (j = 0; j < ann->outputs; ++j) {
-            for (k = 0; k < (ann->hidden_layers ? ann->hidden : ann->inputs) + 1; ++k) {
+            *w++ += *d * learning_rate * -1.0;
-                if (k == 0) {
+            for (k = 1; k < (ann->hidden_layers ? ann->hidden : ann->inputs) + 1; ++k) {
-                    *w++ += *d * learning_rate * -1.0;
+                *w++ += *d * learning_rate * i[k-1];
                } else {
                    *w++ += *d * learning_rate * i[k-1];
                }
            }
            ++d;
@ -337,12 +374,9 @@ void genann_train(genann const *ann, double const *inputs, double const *desired
        for (j = 0; j < ann->hidden; ++j) {
-            for (k = 0; k < (h == 0 ? ann->inputs : ann->hidden) + 1; ++k) {
+            *w++ += *d * learning_rate * -1.0;
-                if (k == 0) {
+            for (k = 1; k < (h == 0 ? ann->inputs : ann->hidden) + 1; ++k) {
-                    *w++ += *d * learning_rate * -1.0;
+                *w++ += *d * learning_rate * i[k-1];
                } else {
                    *w++ += *d * learning_rate * i[k-1];
                }
            }
            ++d;
        }
--- a/genann.h
+++ b/genann.h
@ -39,9 +39,9 @@ extern "C" {
 #define GENANN_RANDOM() (((double)rand())/RAND_MAX)
 #endif
 struct genann;
-typedef double (*genann_actfun)(double a);
+typedef double (*genann_actfun)(const struct genann *ann, double a);
 typedef struct genann {
    /* How many inputs, outputs, and hidden neurons. */
@ -70,8 +70,6 @@ typedef struct genann {
 } genann;
 /* Creates and returns a new ann. */
 genann *genann_init(int inputs, int hidden_layers, int hidden, int outputs);
@ -96,11 +94,11 @@ void genann_train(genann const *ann, double const *inputs, double const *desired
 /* Saves the ann. */
 void genann_write(genann const *ann, FILE *out);
-
+void genann_init_sigmoid_lookup(const genann *ann);
-double genann_act_sigmoid(double a);
+double genann_act_sigmoid(const genann *ann, double a);
-double genann_act_sigmoid_cached(double a);
+double genann_act_sigmoid_cached(const genann *ann, double a);
-double genann_act_threshold(double a);
+double genann_act_threshold(const genann *ann, double a);
-double genann_act_linear(double a);
+double genann_act_linear(const genann *ann, double a);
 #ifdef __cplusplus
--- a/test.c
+++ b/test.c
@ -248,7 +248,7 @@ void sigmoid() {
    const double d = .0001;
    while (i < max) {
-        lfequal(genann_act_sigmoid(i), genann_act_sigmoid_cached(i));
+        lfequal(genann_act_sigmoid(NULL, i), genann_act_sigmoid_cached(NULL, i));
        i += d;
    }
 }