genann: Unroll loops via hoisting inner-loop conditions in genann_run()

This gives a reduction of rougly 27 million instructions and 11 million
branches in the execution trace of example4.

On a Lenovo X1 Carbon Gen 3 machine (Intel(R) Core(TM) i7-5600U CPU @ 2.60GHz)
running Ubuntu 17.10 with GCC 7.2.0-8ubuntu3, using
CFLAGS="-g -O3 -march=native -DNDEBUG" I see the following change in
`perf stat`:

Before:

```
Performance counter stats for './example4':

       101.369081      task-clock (msec)         #    0.998 CPUs utilized
                1      context-switches          #    0.010 K/sec
                0      cpu-migrations            #    0.000 K/sec
               79      page-faults               #    0.779 K/sec
      320,197,883      cycles                    #    3.159 GHz
    1,121,174,423      instructions              #    3.50  insn per cycle
      223,257,752      branches                  # 2202.425 M/sec
           62,680      branch-misses             #    0.03% of all branches

      0.101595114 seconds time elapsed
```

After:

```
 Performance counter stats for './example4':

         98.988806      task-clock (msec)         #    0.998 CPUs utilized
                 1      context-switches          #    0.010 K/sec
                 0      cpu-migrations            #    0.000 K/sec
                79      page-faults               #    0.798 K/sec
       312,298,260      cycles                    #    3.155 GHz
     1,094,183,752      instructions              #    3.50  insn per cycle
       212,007,732      branches                  # 2141.734 M/sec
            62,774      branch-misses             #    0.03% of all branches

       0.099228100 seconds time elapsed
```

Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
This commit is contained in:
Andrew Jeffery 2017-12-18 08:57:58 +10:30
parent 6574bddf6b
commit b1f72be243

View File

@ -203,18 +203,41 @@ double const *genann_run(genann const *ann, double const *inputs) {
const genann_actfun act = ann->activation_hidden;
const genann_actfun acto = ann->activation_output;
if (!ann->hidden_layers) {
double *ret = o;
for (j = 0; j < ann->outputs; ++j) {
double sum = *w++ * -1.0;
for (k = 0; k < ann->inputs; ++k) {
sum += *w++ * i[k];
}
*o++ = acto(sum);
}
return ret;
}
/* Figure input layer */
for (j = 0; j < ann->hidden; ++j) {
double sum = *w++ * -1.0;
for (k = 0; k < ann->inputs; ++k) {
sum += *w++ * i[k];
}
*o++ = act(sum);
}
i += ann->inputs;
/* Figure hidden layers, if any. */
for (h = 0; h < ann->hidden_layers; ++h) {
for (h = 1; h < ann->hidden_layers; ++h) {
for (j = 0; j < ann->hidden; ++j) {
double sum = *w++ * -1.0;
for (k = 0; k < (h == 0 ? ann->inputs : ann->hidden); ++k) {
for (k = 0; k < ann->hidden; ++k) {
sum += *w++ * i[k];
}
*o++ = act(sum);
}
i += (h == 0 ? ann->inputs : ann->hidden);
i += ann->hidden;
}
double const *ret = o;
@ -222,7 +245,7 @@ double const *genann_run(genann const *ann, double const *inputs) {
/* Figure output layer. */
for (j = 0; j < ann->outputs; ++j) {
double sum = *w++ * -1.0;
for (k = 0; k < (ann->hidden_layers ? ann->hidden : ann->inputs); ++k) {
for (k = 0; k < ann->hidden; ++k) {
sum += *w++ * i[k];
}
*o++ = acto(sum);