Merge pull request #8 from amboar/speed

RFC: Increase genann performance by roughly 30%
2024-11-21 22:11:34 +03:00 · 2018-04-11 08:38:44 -05:00 · 2018-04-11 08:38:44 -05:00 · 033618b1f5
commit 033618b1f5
parent e8680aed7c d21d0f301b
4 changed files with 108 additions and 67 deletions
--- a/15
+++ b/15
@ -1,9 +1,16 @@
-CCFLAGS = -Wall -Wshadow -O2 -g
+CFLAGS = -Wall -Wshadow -O3 -g -march=native
 LDLIBS = -lm

-
 all: test example1 example2 example3 example4

+sigmoid: CFLAGS += -Dgenann_act=genann_act_sigmoid_cached
+sigmoid: all
+
+threshold: CFLAGS += -Dgenann_act=genann_act_threshold
+threshold: all
+
+linear: CFLAGS += -Dgenann_act=genann_act_linear
+linear: all

 test: test.o genann.o

@ -21,5 +28,7 @@ example4: example4.o genann.o

 clean:
 	$(RM) *.o
-	$(RM) *.exe
+	$(RM) test example1 example2 example3 example4 *.exe
 	$(RM) persist.txt
+
+.PHONY: sigmoid threshold linear clean
--- a/genann.c
+++ b/genann.c
@ -32,54 +32,71 @@
 #include <stdlib.h>
 #include <string.h>

+#ifndef genann_act
+#define genann_act_hidden genann_act_hidden_indirect
+#define genann_act_output genann_act_output_indirect
+#else
+#define genann_act_hidden genann_act
+#define genann_act_output genann_act
+#endif
+
 #define LOOKUP_SIZE 4096

-double genann_act_sigmoid(double a) {
+double genann_act_hidden_indirect(const struct genann *ann, double a) {
+    return ann->activation_hidden(ann, a);
+}
+
+double genann_act_output_indirect(const struct genann *ann, double a) {
+    return ann->activation_output(ann, a);
+}
+
+const double sigmoid_dom_min = -15.0;
+const double sigmoid_dom_max = 15.0;
+double interval;
+double lookup[LOOKUP_SIZE];
+
+#define likely(x)       __builtin_expect(!!(x), 1)
+#define unlikely(x)     __builtin_expect(!!(x), 0)
+#define __unused        __attribute__((unused))
+
+double inline genann_act_sigmoid(const genann *ann __unused, double a) {
    if (a < -45.0) return 0;
    if (a > 45.0) return 1;
    return 1.0 / (1 + exp(-a));
 }

-
-double genann_act_sigmoid_cached(double a) {
-    /* If you're optimizing for memory usage, just
-     * delete this entire function and replace references
-     * of genann_act_sigmoid_cached to genann_act_sigmoid
-     */
-    const double min = -15.0;
-    const double max = 15.0;
-    static double interval;
-    static int initialized = 0;
-    static double lookup[LOOKUP_SIZE];
-
-    /* Calculate entire lookup table on first run. */
-    if (!initialized) {
-        interval = (max - min) / LOOKUP_SIZE;
+void genann_init_sigmoid_lookup(const genann *ann) {
+        const double f = (sigmoid_dom_max - sigmoid_dom_min) / LOOKUP_SIZE;
        int i;
+
+        interval = LOOKUP_SIZE / (sigmoid_dom_max - sigmoid_dom_min);
        for (i = 0; i < LOOKUP_SIZE; ++i) {
-            lookup[i] = genann_act_sigmoid(min + interval * i);
+            lookup[i] = genann_act_sigmoid(ann, sigmoid_dom_min + f * i);
        }
-        /* This is down here to make this thread safe. */
-        initialized = 1;
-    }
-
-    int i;
-    i = (int)((a-min)/interval+0.5);
-    if (i <= 0) return lookup[0];
-    if (i >= LOOKUP_SIZE) return lookup[LOOKUP_SIZE-1];
-    return lookup[i];
 }

+double inline genann_act_sigmoid_cached(const genann *ann __unused, double a) {
+    assert(!isnan(a));

-double genann_act_threshold(double a) {
-    return a > 0;
+    if (a < sigmoid_dom_min) return lookup[0];
+    if (a >= sigmoid_dom_max) return lookup[LOOKUP_SIZE - 1];
+
+    size_t j = (size_t)((a-sigmoid_dom_min)*interval+0.5);
+
+    /* Because floating point... */
+    if (unlikely(j < 0)) return lookup[0];
+    if (unlikely(j >= LOOKUP_SIZE)) return lookup[LOOKUP_SIZE - 1];
+
+    return lookup[j];
 }

-
-double genann_act_linear(double a) {
+double inline genann_act_linear(const struct genann *ann __unused, double a) {
    return a;
 }

+double inline genann_act_threshold(const struct genann *ann __unused, double a) {
+    return a > 0;
+}

 genann *genann_init(int inputs, int hidden_layers, int hidden, int outputs) {
    if (hidden_layers < 0) return 0;
@ -117,6 +134,8 @@ genann *genann_init(int inputs, int hidden_layers, int hidden, int outputs) {
    ret->activation_hidden = genann_act_sigmoid_cached;
    ret->activation_output = genann_act_sigmoid_cached;

+    genann_init_sigmoid_lookup(ret);
+
    return ret;
 }

@ -193,21 +212,41 @@ double const *genann_run(genann const *ann, double const *inputs) {

    int h, j, k;

-    const genann_actfun act = ann->activation_hidden;
-    const genann_actfun acto = ann->activation_output;
-
-    /* Figure hidden layers, if any. */
-    for (h = 0; h < ann->hidden_layers; ++h) {
-        for (j = 0; j < ann->hidden; ++j) {
+    if (!ann->hidden_layers) {
+        double *ret = o;
+        for (j = 0; j < ann->outputs; ++j) {
            double sum = *w++ * -1.0;
-            for (k = 0; k < (h == 0 ? ann->inputs : ann->hidden); ++k) {
+            for (k = 0; k < ann->inputs; ++k) {
                sum += *w++ * i[k];
            }
-            *o++ = act(sum);
+            *o++ = genann_act_output(ann, sum);
        }

+        return ret;
+    }

-        i += (h == 0 ? ann->inputs : ann->hidden);
+    /* Figure input layer */
+    for (j = 0; j < ann->hidden; ++j) {
+        double sum = *w++ * -1.0;
+        for (k = 0; k < ann->inputs; ++k) {
+            sum += *w++ * i[k];
+        }
+        *o++ = genann_act_hidden(ann, sum);
+    }
+
+    i += ann->inputs;
+
+    /* Figure hidden layers, if any. */
+    for (h = 1; h < ann->hidden_layers; ++h) {
+        for (j = 0; j < ann->hidden; ++j) {
+            double sum = *w++ * -1.0;
+            for (k = 0; k < ann->hidden; ++k) {
+                sum += *w++ * i[k];
+            }
+            *o++ = genann_act_hidden(ann, sum);
+        }
+
+        i += ann->hidden;
    }

    double const *ret = o;
@ -215,10 +254,10 @@ double const *genann_run(genann const *ann, double const *inputs) {
    /* Figure output layer. */
    for (j = 0; j < ann->outputs; ++j) {
        double sum = *w++ * -1.0;
-        for (k = 0; k < (ann->hidden_layers ? ann->hidden : ann->inputs); ++k) {
+        for (k = 0; k < ann->hidden; ++k) {
            sum += *w++ * i[k];
        }
-        *o++ = acto(sum);
+        *o++ = genann_act_output(ann, sum);
    }

    /* Sanity check that we used all weights and wrote all outputs. */
@ -243,7 +282,8 @@ void genann_train(genann const *ann, double const *inputs, double const *desired


        /* Set output layer deltas. */
-        if (ann->activation_output == genann_act_linear) {
+        if (genann_act_output == genann_act_linear ||
+                ann->activation_output == genann_act_linear) {
            for (j = 0; j < ann->outputs; ++j) {
                *d++ = *t++ - *o++;
            }
@ -304,12 +344,9 @@ void genann_train(genann const *ann, double const *inputs, double const *desired

        /* Set output layer weights. */
        for (j = 0; j < ann->outputs; ++j) {
-            for (k = 0; k < (ann->hidden_layers ? ann->hidden : ann->inputs) + 1; ++k) {
-                if (k == 0) {
-                    *w++ += *d * learning_rate * -1.0;
-                } else {
-                    *w++ += *d * learning_rate * i[k-1];
-                }
+            *w++ += *d * learning_rate * -1.0;
+            for (k = 1; k < (ann->hidden_layers ? ann->hidden : ann->inputs) + 1; ++k) {
+                *w++ += *d * learning_rate * i[k-1];
            }

            ++d;
@ -337,12 +374,9 @@ void genann_train(genann const *ann, double const *inputs, double const *desired


        for (j = 0; j < ann->hidden; ++j) {
-            for (k = 0; k < (h == 0 ? ann->inputs : ann->hidden) + 1; ++k) {
-                if (k == 0) {
-                    *w++ += *d * learning_rate * -1.0;
-                } else {
-                    *w++ += *d * learning_rate * i[k-1];
-                }
+            *w++ += *d * learning_rate * -1.0;
+            for (k = 1; k < (h == 0 ? ann->inputs : ann->hidden) + 1; ++k) {
+                *w++ += *d * learning_rate * i[k-1];
            }
            ++d;
        }
--- a/genann.h
+++ b/genann.h
@ -39,9 +39,9 @@ extern "C" {
 #define GENANN_RANDOM() (((double)rand())/RAND_MAX)
 #endif

+struct genann;

-typedef double (*genann_actfun)(double a);
-
+typedef double (*genann_actfun)(const struct genann *ann, double a);

 typedef struct genann {
    /* How many inputs, outputs, and hidden neurons. */
@ -70,8 +70,6 @@ typedef struct genann {

 } genann;

-
-
 /* Creates and returns a new ann. */
 genann *genann_init(int inputs, int hidden_layers, int hidden, int outputs);

@ -96,11 +94,11 @@ void genann_train(genann const *ann, double const *inputs, double const *desired
 /* Saves the ann. */
 void genann_write(genann const *ann, FILE *out);

-
-double genann_act_sigmoid(double a);
-double genann_act_sigmoid_cached(double a);
-double genann_act_threshold(double a);
-double genann_act_linear(double a);
+void genann_init_sigmoid_lookup(const genann *ann);
+double genann_act_sigmoid(const genann *ann, double a);
+double genann_act_sigmoid_cached(const genann *ann, double a);
+double genann_act_threshold(const genann *ann, double a);
+double genann_act_linear(const genann *ann, double a);


 #ifdef __cplusplus
--- a/test.c
+++ b/test.c
@ -248,7 +248,7 @@ void sigmoid() {
    const double d = .0001;

    while (i < max) {
-        lfequal(genann_act_sigmoid(i), genann_act_sigmoid_cached(i));
+        lfequal(genann_act_sigmoid(NULL, i), genann_act_sigmoid_cached(NULL, i));
        i += d;
    }
 }