mirror of https://github.com/codeplea/genann
genann: Use reciprocal interval value to strength reduce divide to multiply
This gives a reduction of roughly 2.5 million instructions in the execution trace of example4. genann_act_sigmoid_cached() previously divided by interval to calculate the lookup index. Divide is a expensive operation, so instead use the reciprocal of the existing interval calculation to reduce the divide to a multiply. Building with the following configuration: ``` $ head /proc/cpuinfo processor : 0 vendor_id : GenuineIntel cpu family : 6 model : 61 model name : Intel(R) Core(TM) i7-5600U CPU @ 2.60GHz stepping : 4 microcode : 0x25 cpu MHz : 2593.871 cache size : 4096 KB physical id : 0 $ cat /etc/os-release NAME="Ubuntu" VERSION="17.10 (Artful Aardvark)" ID=ubuntu ID_LIKE=debian PRETTY_NAME="Ubuntu 17.10" VERSION_ID="17.10" HOME_URL="https://www.ubuntu.com/" SUPPORT_URL="https://help.ubuntu.com/" BUG_REPORT_URL="https://bugs.launchpad.net/ubuntu/" PRIVACY_POLICY_URL="https://www.ubuntu.com/legal/terms-and-policies/privacy-policy" VERSION_CODENAME=artful UBUNTU_CODENAME=artful $ cc --version gcc (Ubuntu 7.2.0-8ubuntu3) 7.2.0 Copyright (C) 2017 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ``` on my Lenovo X1 Carbon Gen 3 machine sees the following: ``` $ make CFLAGS="-g -O3 -march=native -DNDEBUG" cc -g -O3 -march=native -DNDEBUG -c -o test.o test.c cc -g -O3 -march=native -DNDEBUG -c -o genann.o genann.c cc -g -O3 -march=native -DNDEBUG -c -o example1.o example1.c cc -g -O3 -march=native -DNDEBUG -c -o example2.o example2.c cc -g -O3 -march=native -DNDEBUG -c -o example3.o example3.c cc -g -O3 -march=native -DNDEBUG -c -o example4.o example4.c cc -g -O3 -march=native -DNDEBUG -c -o strings.o strings.c cc test.o genann.o -lm -o test cc example1.o genann.o -lm -o example1 cc example4.o genann.o -lm -o example4 cc example3.o genann.o -lm -o example3 cc example2.o genann.o -lm -o example2 cc strings.o genann.o -lm -o strings $ for i in `seq 0 10`; do ./example4 > /dev/null; done; sudo perf stat record ./example4 GENANN example 4. Train an ANN on the IRIS dataset using backpropagation. Loading 150 data points from example/iris.data Training for 5000 loops over data. 147/150 correct (98.0%). Performance counter stats for './example4': 101.369081 task-clock (msec) # 0.998 CPUs utilized 1 context-switches # 0.010 K/sec 0 cpu-migrations # 0.000 K/sec 79 page-faults # 0.779 K/sec 320,197,883 cycles # 3.159 GHz 1,121,174,423 instructions # 3.50 insn per cycle 223,257,752 branches # 2202.425 M/sec 62,680 branch-misses # 0.03% of all branches 0.101595114 seconds time elapsed ``` Prior to the change, we see something like: ``` $ make CFLAGS="-g -O3 -march=native" cc -g -O3 -march=native -c -o test.o test.c cc -g -O3 -march=native -c -o genann.o genann.c cc -g -O3 -march=native -c -o example1.o example1.c cc -g -O3 -march=native -c -o example2.o example2.c cc -g -O3 -march=native -c -o example3.o example3.c cc -g -O3 -march=native -c -o example4.o example4.c cc -g -O3 -march=native -c -o strings.o strings.c cc test.o genann.o -lm -o test cc example1.o genann.o -lm -o example1 cc example3.o genann.o -lm -o example3 cc example4.o genann.o -lm -o example4 cc strings.o genann.o -lm -o strings cc example2.o genann.o -lm -o example2 $ for i in `seq 0 10`; do ./example4 > /dev/null; done; sudo perf stat record ./example4 GENANN example 4. Train an ANN on the IRIS dataset using backpropagation. Loading 150 data points from example/iris.data Training for 5000 loops over data. 147/150 correct (98.0%). Performance counter stats for './example4': 104.644198 task-clock (msec) # 0.998 CPUs utilized 0 context-switches # 0.000 K/sec 0 cpu-migrations # 0.000 K/sec 79 page-faults # 0.755 K/sec 330,340,554 cycles # 3.157 GHz 1,123,669,767 instructions # 3.40 insn per cycle 215,441,809 branches # 2058.803 M/sec 62,406 branch-misses # 0.03% of all branches 0.104891323 seconds time elapsed ``` Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
This commit is contained in:
parent
b79a5ce751
commit
6574bddf6b
21
genann.c
21
genann.c
|
@ -54,20 +54,27 @@ double genann_act_sigmoid_cached(double a) {
|
|||
|
||||
/* Calculate entire lookup table on first run. */
|
||||
if (!initialized) {
|
||||
interval = (max - min) / LOOKUP_SIZE;
|
||||
const double f = (max - min) / LOOKUP_SIZE;
|
||||
int i;
|
||||
interval = LOOKUP_SIZE / (max - min);
|
||||
for (i = 0; i < LOOKUP_SIZE; ++i) {
|
||||
lookup[i] = genann_act_sigmoid(min + interval * i);
|
||||
lookup[i] = genann_act_sigmoid(min + f * i);
|
||||
}
|
||||
/* This is down here to make this thread safe. */
|
||||
initialized = 1;
|
||||
}
|
||||
|
||||
int i;
|
||||
i = (int)((a-min)/interval+0.5);
|
||||
if (i <= 0) return lookup[0];
|
||||
if (i >= LOOKUP_SIZE) return lookup[LOOKUP_SIZE-1];
|
||||
return lookup[i];
|
||||
assert(!isnan(a));
|
||||
|
||||
if (a < min) return lookup[0];
|
||||
if (a >= max) return lookup[LOOKUP_SIZE - 1];
|
||||
|
||||
size_t j = (size_t)((a-min)*interval+0.5);
|
||||
|
||||
if (j < 0) return lookup[0];
|
||||
if (j >= LOOKUP_SIZE) return lookup[LOOKUP_SIZE - 1];
|
||||
|
||||
return lookup[j];
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue