genann: Remove branching from back-propagation inner-loop

This saves approximately 80 million instructions and 44 million branches in the
trace of example4, shaving off around 8ms:

Before:

```
 Performance counter stats for './example4':

         92.629610      task-clock (msec)         #    0.997 CPUs utilized
                 0      context-switches          #    0.000 K/sec
                 0      cpu-migrations            #    0.000 K/sec
                78      page-faults               #    0.842 K/sec
       291,863,801      cycles                    #    3.151 GHz
     1,000,931,204      instructions              #    3.43  insn per cycle
       202,465,800      branches                  # 2185.757 M/sec
            50,949      branch-misses             #    0.03% of all branches

       0.092889789 seconds time elapsed
```

After:
```
 Performance counter stats for './example4':

         84.473035      task-clock (msec)         #    0.997 CPUs utilized
                 3      context-switches          #    0.036 K/sec
                 0      cpu-migrations            #    0.000 K/sec
                81      page-faults               #    0.959 K/sec
       265,472,170      cycles                    #    3.143 GHz
       919,372,488      instructions              #    3.46  insn per cycle
       158,754,885      branches                  # 1879.356 M/sec
            65,337      branch-misses             #    0.04% of all branches

       0.084755458 seconds time elapsed
```

Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
This commit is contained in:
Andrew Jeffery 2017-12-18 17:39:25 +10:30
parent db51375bb7
commit d21d0f301b
1 changed files with 6 additions and 12 deletions

View File

@ -344,12 +344,9 @@ void genann_train(genann const *ann, double const *inputs, double const *desired
/* Set output layer weights. */
for (j = 0; j < ann->outputs; ++j) {
for (k = 0; k < (ann->hidden_layers ? ann->hidden : ann->inputs) + 1; ++k) {
if (k == 0) {
*w++ += *d * learning_rate * -1.0;
} else {
*w++ += *d * learning_rate * i[k-1];
}
*w++ += *d * learning_rate * -1.0;
for (k = 1; k < (ann->hidden_layers ? ann->hidden : ann->inputs) + 1; ++k) {
*w++ += *d * learning_rate * i[k-1];
}
++d;
@ -377,12 +374,9 @@ void genann_train(genann const *ann, double const *inputs, double const *desired
for (j = 0; j < ann->hidden; ++j) {
for (k = 0; k < (h == 0 ? ann->inputs : ann->hidden) + 1; ++k) {
if (k == 0) {
*w++ += *d * learning_rate * -1.0;
} else {
*w++ += *d * learning_rate * i[k-1];
}
*w++ += *d * learning_rate * -1.0;
for (k = 1; k < (h == 0 ? ann->inputs : ann->hidden) + 1; ++k) {
*w++ += *d * learning_rate * i[k-1];
}
++d;
}