mirror of https://github.com/codeplea/genann
genann: Remove branching from back-propagation inner-loop
This saves approximately 80 million instructions and 44 million branches in the trace of example4, shaving off around 8ms: Before: ``` Performance counter stats for './example4': 92.629610 task-clock (msec) # 0.997 CPUs utilized 0 context-switches # 0.000 K/sec 0 cpu-migrations # 0.000 K/sec 78 page-faults # 0.842 K/sec 291,863,801 cycles # 3.151 GHz 1,000,931,204 instructions # 3.43 insn per cycle 202,465,800 branches # 2185.757 M/sec 50,949 branch-misses # 0.03% of all branches 0.092889789 seconds time elapsed ``` After: ``` Performance counter stats for './example4': 84.473035 task-clock (msec) # 0.997 CPUs utilized 3 context-switches # 0.036 K/sec 0 cpu-migrations # 0.000 K/sec 81 page-faults # 0.959 K/sec 265,472,170 cycles # 3.143 GHz 919,372,488 instructions # 3.46 insn per cycle 158,754,885 branches # 1879.356 M/sec 65,337 branch-misses # 0.04% of all branches 0.084755458 seconds time elapsed ``` Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
This commit is contained in:
parent
db51375bb7
commit
d21d0f301b
18
genann.c
18
genann.c
|
@ -344,12 +344,9 @@ void genann_train(genann const *ann, double const *inputs, double const *desired
|
|||
|
||||
/* Set output layer weights. */
|
||||
for (j = 0; j < ann->outputs; ++j) {
|
||||
for (k = 0; k < (ann->hidden_layers ? ann->hidden : ann->inputs) + 1; ++k) {
|
||||
if (k == 0) {
|
||||
*w++ += *d * learning_rate * -1.0;
|
||||
} else {
|
||||
*w++ += *d * learning_rate * i[k-1];
|
||||
}
|
||||
*w++ += *d * learning_rate * -1.0;
|
||||
for (k = 1; k < (ann->hidden_layers ? ann->hidden : ann->inputs) + 1; ++k) {
|
||||
*w++ += *d * learning_rate * i[k-1];
|
||||
}
|
||||
|
||||
++d;
|
||||
|
@ -377,12 +374,9 @@ void genann_train(genann const *ann, double const *inputs, double const *desired
|
|||
|
||||
|
||||
for (j = 0; j < ann->hidden; ++j) {
|
||||
for (k = 0; k < (h == 0 ? ann->inputs : ann->hidden) + 1; ++k) {
|
||||
if (k == 0) {
|
||||
*w++ += *d * learning_rate * -1.0;
|
||||
} else {
|
||||
*w++ += *d * learning_rate * i[k-1];
|
||||
}
|
||||
*w++ += *d * learning_rate * -1.0;
|
||||
for (k = 1; k < (h == 0 ? ann->inputs : ann->hidden) + 1; ++k) {
|
||||
*w++ += *d * learning_rate * i[k-1];
|
||||
}
|
||||
++d;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue