mirror of
https://github.com/codeplea/genann
synced 2024-11-21 22:11:34 +03:00
genann: Unroll loops via hoisting inner-loop conditions in genann_run()
This gives a reduction of rougly 27 million instructions and 11 million branches in the execution trace of example4. On a Lenovo X1 Carbon Gen 3 machine (Intel(R) Core(TM) i7-5600U CPU @ 2.60GHz) running Ubuntu 17.10 with GCC 7.2.0-8ubuntu3, using CFLAGS="-g -O3 -march=native -DNDEBUG" I see the following change in `perf stat`: Before: ``` Performance counter stats for './example4': 101.369081 task-clock (msec) # 0.998 CPUs utilized 1 context-switches # 0.010 K/sec 0 cpu-migrations # 0.000 K/sec 79 page-faults # 0.779 K/sec 320,197,883 cycles # 3.159 GHz 1,121,174,423 instructions # 3.50 insn per cycle 223,257,752 branches # 2202.425 M/sec 62,680 branch-misses # 0.03% of all branches 0.101595114 seconds time elapsed ``` After: ``` Performance counter stats for './example4': 98.988806 task-clock (msec) # 0.998 CPUs utilized 1 context-switches # 0.010 K/sec 0 cpu-migrations # 0.000 K/sec 79 page-faults # 0.798 K/sec 312,298,260 cycles # 3.155 GHz 1,094,183,752 instructions # 3.50 insn per cycle 212,007,732 branches # 2141.734 M/sec 62,774 branch-misses # 0.03% of all branches 0.099228100 seconds time elapsed ``` Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
This commit is contained in:
parent
6574bddf6b
commit
b1f72be243
33
genann.c
33
genann.c
@ -203,18 +203,41 @@ double const *genann_run(genann const *ann, double const *inputs) {
|
||||
const genann_actfun act = ann->activation_hidden;
|
||||
const genann_actfun acto = ann->activation_output;
|
||||
|
||||
/* Figure hidden layers, if any. */
|
||||
for (h = 0; h < ann->hidden_layers; ++h) {
|
||||
if (!ann->hidden_layers) {
|
||||
double *ret = o;
|
||||
for (j = 0; j < ann->outputs; ++j) {
|
||||
double sum = *w++ * -1.0;
|
||||
for (k = 0; k < ann->inputs; ++k) {
|
||||
sum += *w++ * i[k];
|
||||
}
|
||||
*o++ = acto(sum);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Figure input layer */
|
||||
for (j = 0; j < ann->hidden; ++j) {
|
||||
double sum = *w++ * -1.0;
|
||||
for (k = 0; k < (h == 0 ? ann->inputs : ann->hidden); ++k) {
|
||||
for (k = 0; k < ann->inputs; ++k) {
|
||||
sum += *w++ * i[k];
|
||||
}
|
||||
*o++ = act(sum);
|
||||
}
|
||||
|
||||
i += ann->inputs;
|
||||
|
||||
i += (h == 0 ? ann->inputs : ann->hidden);
|
||||
/* Figure hidden layers, if any. */
|
||||
for (h = 1; h < ann->hidden_layers; ++h) {
|
||||
for (j = 0; j < ann->hidden; ++j) {
|
||||
double sum = *w++ * -1.0;
|
||||
for (k = 0; k < ann->hidden; ++k) {
|
||||
sum += *w++ * i[k];
|
||||
}
|
||||
*o++ = act(sum);
|
||||
}
|
||||
|
||||
i += ann->hidden;
|
||||
}
|
||||
|
||||
double const *ret = o;
|
||||
@ -222,7 +245,7 @@ double const *genann_run(genann const *ann, double const *inputs) {
|
||||
/* Figure output layer. */
|
||||
for (j = 0; j < ann->outputs; ++j) {
|
||||
double sum = *w++ * -1.0;
|
||||
for (k = 0; k < (ann->hidden_layers ? ann->hidden : ann->inputs); ++k) {
|
||||
for (k = 0; k < ann->hidden; ++k) {
|
||||
sum += *w++ * i[k];
|
||||
}
|
||||
*o++ = acto(sum);
|
||||
|
Loading…
Reference in New Issue
Block a user