genann: Remove branching from back-propagation inner-loop

This saves approximately 80 million instructions and 44 million branches in the trace of example4, shaving off around 8ms: Before: ``` Performance counter stats for './example4': 92.629610 task-clock (msec) # 0.997 CPUs utilized 0 context-switches # 0.000 K/sec 0 cpu-migrations # 0.000 K/sec 78 page-faults # 0.842 K/sec 291,863,801 cycles # 3.151 GHz 1,000,931,204 instructions # 3.43 insn per cycle 202,465,800 branches # 2185.757 M/sec 50,949 branch-misses # 0.03% of all branches 0.092889789 seconds time elapsed ``` After: ``` Performance counter stats for './example4': 84.473035 task-clock (msec) # 0.997 CPUs utilized 3 context-switches # 0.036 K/sec 0 cpu-migrations # 0.000 K/sec 81 page-faults # 0.959 K/sec 265,472,170 cycles # 3.143 GHz 919,372,488 instructions # 3.46 insn per cycle 158,754,885 branches # 1879.356 M/sec 65,337 branch-misses # 0.04% of all branches 0.084755458 seconds time elapsed ``` Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
2017-12-18 17:39:25 +10:30 · 2017-12-18 17:39:25 +10:30 · d21d0f301b
parent db51375bb7
commit d21d0f301b
1 changed files with 6 additions and 12 deletions
--- a/genann.c
+++ b/genann.c
@ -344,12 +344,9 @@ void genann_train(genann const *ann, double const *inputs, double const *desired

        /* Set output layer weights. */
        for (j = 0; j < ann->outputs; ++j) {
-            for (k = 0; k < (ann->hidden_layers ? ann->hidden : ann->inputs) + 1; ++k) {
-                if (k == 0) {
-                    *w++ += *d * learning_rate * -1.0;
-                } else {
-                    *w++ += *d * learning_rate * i[k-1];
-                }
+            *w++ += *d * learning_rate * -1.0;
+            for (k = 1; k < (ann->hidden_layers ? ann->hidden : ann->inputs) + 1; ++k) {
+                *w++ += *d * learning_rate * i[k-1];
            }

            ++d;
@ -377,12 +374,9 @@ void genann_train(genann const *ann, double const *inputs, double const *desired


        for (j = 0; j < ann->hidden; ++j) {
-            for (k = 0; k < (h == 0 ? ann->inputs : ann->hidden) + 1; ++k) {
-                if (k == 0) {
-                    *w++ += *d * learning_rate * -1.0;
-                } else {
-                    *w++ += *d * learning_rate * i[k-1];
-                }
+            *w++ += *d * learning_rate * -1.0;
+            for (k = 1; k < (h == 0 ? ann->inputs : ann->hidden) + 1; ++k) {
+                *w++ += *d * learning_rate * i[k-1];
            }
            ++d;
        }