genann/genann.c

398 lines
12 KiB
C
Raw Normal View History

2016-02-10 02:53:54 +03:00
/*
* GENANN - Minimal C Artificial Neural Network
*
* Copyright (c) 2015, 2016 Lewis Van Winkle
*
* http://CodePlea.com
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgement in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*
*/
#include "genann.h"
#include <assert.h>
#include <errno.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
2016-02-10 02:53:54 +03:00
2017-12-18 05:38:30 +03:00
#ifndef genann_act
#define genann_act_hidden genann_act_hidden_indirect
#define genann_act_output genann_act_output_indirect
#else
#define genann_act_hidden genann_act
#define genann_act_output genann_act
#endif
2016-02-10 02:53:54 +03:00
#define LOOKUP_SIZE 4096
2017-12-18 05:38:30 +03:00
double genann_act_hidden_indirect(const struct genann *ann, double a) {
return ann->activation_hidden(ann, a);
}
double genann_act_output_indirect(const struct genann *ann, double a) {
return ann->activation_output(ann, a);
}
const double sigmoid_dom_min = -15.0;
const double sigmoid_dom_max = 15.0;
double interval;
double lookup[LOOKUP_SIZE];
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
#define __unused __attribute__((unused))
double inline genann_act_sigmoid(const genann *ann __unused, double a) {
2016-02-10 02:53:54 +03:00
if (a < -45.0) return 0;
if (a > 45.0) return 1;
return 1.0 / (1 + exp(-a));
}
2017-12-18 05:38:30 +03:00
void genann_init_sigmoid_lookup(const genann *ann) {
const double f = (sigmoid_dom_max - sigmoid_dom_min) / LOOKUP_SIZE;
2016-02-10 02:53:54 +03:00
int i;
2017-12-18 05:38:30 +03:00
interval = LOOKUP_SIZE / (sigmoid_dom_max - sigmoid_dom_min);
2016-02-10 02:53:54 +03:00
for (i = 0; i < LOOKUP_SIZE; ++i) {
2017-12-18 05:38:30 +03:00
lookup[i] = genann_act_sigmoid(ann, sigmoid_dom_min + f * i);
2016-02-10 02:53:54 +03:00
}
2017-12-18 05:38:30 +03:00
}
2016-02-10 02:53:54 +03:00
2017-12-18 05:38:30 +03:00
double inline genann_act_sigmoid_cached(const genann *ann __unused, double a) {
genann: Use reciprocal interval value to strength reduce divide to multiply This gives a reduction of roughly 2.5 million instructions in the execution trace of example4. genann_act_sigmoid_cached() previously divided by interval to calculate the lookup index. Divide is a expensive operation, so instead use the reciprocal of the existing interval calculation to reduce the divide to a multiply. Building with the following configuration: ``` $ head /proc/cpuinfo processor : 0 vendor_id : GenuineIntel cpu family : 6 model : 61 model name : Intel(R) Core(TM) i7-5600U CPU @ 2.60GHz stepping : 4 microcode : 0x25 cpu MHz : 2593.871 cache size : 4096 KB physical id : 0 $ cat /etc/os-release NAME="Ubuntu" VERSION="17.10 (Artful Aardvark)" ID=ubuntu ID_LIKE=debian PRETTY_NAME="Ubuntu 17.10" VERSION_ID="17.10" HOME_URL="https://www.ubuntu.com/" SUPPORT_URL="https://help.ubuntu.com/" BUG_REPORT_URL="https://bugs.launchpad.net/ubuntu/" PRIVACY_POLICY_URL="https://www.ubuntu.com/legal/terms-and-policies/privacy-policy" VERSION_CODENAME=artful UBUNTU_CODENAME=artful $ cc --version gcc (Ubuntu 7.2.0-8ubuntu3) 7.2.0 Copyright (C) 2017 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ``` on my Lenovo X1 Carbon Gen 3 machine sees the following: ``` $ make CFLAGS="-g -O3 -march=native -DNDEBUG" cc -g -O3 -march=native -DNDEBUG -c -o test.o test.c cc -g -O3 -march=native -DNDEBUG -c -o genann.o genann.c cc -g -O3 -march=native -DNDEBUG -c -o example1.o example1.c cc -g -O3 -march=native -DNDEBUG -c -o example2.o example2.c cc -g -O3 -march=native -DNDEBUG -c -o example3.o example3.c cc -g -O3 -march=native -DNDEBUG -c -o example4.o example4.c cc -g -O3 -march=native -DNDEBUG -c -o strings.o strings.c cc test.o genann.o -lm -o test cc example1.o genann.o -lm -o example1 cc example4.o genann.o -lm -o example4 cc example3.o genann.o -lm -o example3 cc example2.o genann.o -lm -o example2 cc strings.o genann.o -lm -o strings $ for i in `seq 0 10`; do ./example4 > /dev/null; done; sudo perf stat record ./example4 GENANN example 4. Train an ANN on the IRIS dataset using backpropagation. Loading 150 data points from example/iris.data Training for 5000 loops over data. 147/150 correct (98.0%). Performance counter stats for './example4': 101.369081 task-clock (msec) # 0.998 CPUs utilized 1 context-switches # 0.010 K/sec 0 cpu-migrations # 0.000 K/sec 79 page-faults # 0.779 K/sec 320,197,883 cycles # 3.159 GHz 1,121,174,423 instructions # 3.50 insn per cycle 223,257,752 branches # 2202.425 M/sec 62,680 branch-misses # 0.03% of all branches 0.101595114 seconds time elapsed ``` Prior to the change, we see something like: ``` $ make CFLAGS="-g -O3 -march=native" cc -g -O3 -march=native -c -o test.o test.c cc -g -O3 -march=native -c -o genann.o genann.c cc -g -O3 -march=native -c -o example1.o example1.c cc -g -O3 -march=native -c -o example2.o example2.c cc -g -O3 -march=native -c -o example3.o example3.c cc -g -O3 -march=native -c -o example4.o example4.c cc -g -O3 -march=native -c -o strings.o strings.c cc test.o genann.o -lm -o test cc example1.o genann.o -lm -o example1 cc example3.o genann.o -lm -o example3 cc example4.o genann.o -lm -o example4 cc strings.o genann.o -lm -o strings cc example2.o genann.o -lm -o example2 $ for i in `seq 0 10`; do ./example4 > /dev/null; done; sudo perf stat record ./example4 GENANN example 4. Train an ANN on the IRIS dataset using backpropagation. Loading 150 data points from example/iris.data Training for 5000 loops over data. 147/150 correct (98.0%). Performance counter stats for './example4': 104.644198 task-clock (msec) # 0.998 CPUs utilized 0 context-switches # 0.000 K/sec 0 cpu-migrations # 0.000 K/sec 79 page-faults # 0.755 K/sec 330,340,554 cycles # 3.157 GHz 1,123,669,767 instructions # 3.40 insn per cycle 215,441,809 branches # 2058.803 M/sec 62,406 branch-misses # 0.03% of all branches 0.104891323 seconds time elapsed ``` Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
2017-12-17 02:59:40 +03:00
assert(!isnan(a));
2017-12-18 05:38:30 +03:00
if (a < sigmoid_dom_min) return lookup[0];
if (a >= sigmoid_dom_max) return lookup[LOOKUP_SIZE - 1];
genann: Use reciprocal interval value to strength reduce divide to multiply This gives a reduction of roughly 2.5 million instructions in the execution trace of example4. genann_act_sigmoid_cached() previously divided by interval to calculate the lookup index. Divide is a expensive operation, so instead use the reciprocal of the existing interval calculation to reduce the divide to a multiply. Building with the following configuration: ``` $ head /proc/cpuinfo processor : 0 vendor_id : GenuineIntel cpu family : 6 model : 61 model name : Intel(R) Core(TM) i7-5600U CPU @ 2.60GHz stepping : 4 microcode : 0x25 cpu MHz : 2593.871 cache size : 4096 KB physical id : 0 $ cat /etc/os-release NAME="Ubuntu" VERSION="17.10 (Artful Aardvark)" ID=ubuntu ID_LIKE=debian PRETTY_NAME="Ubuntu 17.10" VERSION_ID="17.10" HOME_URL="https://www.ubuntu.com/" SUPPORT_URL="https://help.ubuntu.com/" BUG_REPORT_URL="https://bugs.launchpad.net/ubuntu/" PRIVACY_POLICY_URL="https://www.ubuntu.com/legal/terms-and-policies/privacy-policy" VERSION_CODENAME=artful UBUNTU_CODENAME=artful $ cc --version gcc (Ubuntu 7.2.0-8ubuntu3) 7.2.0 Copyright (C) 2017 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ``` on my Lenovo X1 Carbon Gen 3 machine sees the following: ``` $ make CFLAGS="-g -O3 -march=native -DNDEBUG" cc -g -O3 -march=native -DNDEBUG -c -o test.o test.c cc -g -O3 -march=native -DNDEBUG -c -o genann.o genann.c cc -g -O3 -march=native -DNDEBUG -c -o example1.o example1.c cc -g -O3 -march=native -DNDEBUG -c -o example2.o example2.c cc -g -O3 -march=native -DNDEBUG -c -o example3.o example3.c cc -g -O3 -march=native -DNDEBUG -c -o example4.o example4.c cc -g -O3 -march=native -DNDEBUG -c -o strings.o strings.c cc test.o genann.o -lm -o test cc example1.o genann.o -lm -o example1 cc example4.o genann.o -lm -o example4 cc example3.o genann.o -lm -o example3 cc example2.o genann.o -lm -o example2 cc strings.o genann.o -lm -o strings $ for i in `seq 0 10`; do ./example4 > /dev/null; done; sudo perf stat record ./example4 GENANN example 4. Train an ANN on the IRIS dataset using backpropagation. Loading 150 data points from example/iris.data Training for 5000 loops over data. 147/150 correct (98.0%). Performance counter stats for './example4': 101.369081 task-clock (msec) # 0.998 CPUs utilized 1 context-switches # 0.010 K/sec 0 cpu-migrations # 0.000 K/sec 79 page-faults # 0.779 K/sec 320,197,883 cycles # 3.159 GHz 1,121,174,423 instructions # 3.50 insn per cycle 223,257,752 branches # 2202.425 M/sec 62,680 branch-misses # 0.03% of all branches 0.101595114 seconds time elapsed ``` Prior to the change, we see something like: ``` $ make CFLAGS="-g -O3 -march=native" cc -g -O3 -march=native -c -o test.o test.c cc -g -O3 -march=native -c -o genann.o genann.c cc -g -O3 -march=native -c -o example1.o example1.c cc -g -O3 -march=native -c -o example2.o example2.c cc -g -O3 -march=native -c -o example3.o example3.c cc -g -O3 -march=native -c -o example4.o example4.c cc -g -O3 -march=native -c -o strings.o strings.c cc test.o genann.o -lm -o test cc example1.o genann.o -lm -o example1 cc example3.o genann.o -lm -o example3 cc example4.o genann.o -lm -o example4 cc strings.o genann.o -lm -o strings cc example2.o genann.o -lm -o example2 $ for i in `seq 0 10`; do ./example4 > /dev/null; done; sudo perf stat record ./example4 GENANN example 4. Train an ANN on the IRIS dataset using backpropagation. Loading 150 data points from example/iris.data Training for 5000 loops over data. 147/150 correct (98.0%). Performance counter stats for './example4': 104.644198 task-clock (msec) # 0.998 CPUs utilized 0 context-switches # 0.000 K/sec 0 cpu-migrations # 0.000 K/sec 79 page-faults # 0.755 K/sec 330,340,554 cycles # 3.157 GHz 1,123,669,767 instructions # 3.40 insn per cycle 215,441,809 branches # 2058.803 M/sec 62,406 branch-misses # 0.03% of all branches 0.104891323 seconds time elapsed ``` Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
2017-12-17 02:59:40 +03:00
2017-12-18 05:38:30 +03:00
size_t j = (size_t)((a-sigmoid_dom_min)*interval+0.5);
genann: Use reciprocal interval value to strength reduce divide to multiply This gives a reduction of roughly 2.5 million instructions in the execution trace of example4. genann_act_sigmoid_cached() previously divided by interval to calculate the lookup index. Divide is a expensive operation, so instead use the reciprocal of the existing interval calculation to reduce the divide to a multiply. Building with the following configuration: ``` $ head /proc/cpuinfo processor : 0 vendor_id : GenuineIntel cpu family : 6 model : 61 model name : Intel(R) Core(TM) i7-5600U CPU @ 2.60GHz stepping : 4 microcode : 0x25 cpu MHz : 2593.871 cache size : 4096 KB physical id : 0 $ cat /etc/os-release NAME="Ubuntu" VERSION="17.10 (Artful Aardvark)" ID=ubuntu ID_LIKE=debian PRETTY_NAME="Ubuntu 17.10" VERSION_ID="17.10" HOME_URL="https://www.ubuntu.com/" SUPPORT_URL="https://help.ubuntu.com/" BUG_REPORT_URL="https://bugs.launchpad.net/ubuntu/" PRIVACY_POLICY_URL="https://www.ubuntu.com/legal/terms-and-policies/privacy-policy" VERSION_CODENAME=artful UBUNTU_CODENAME=artful $ cc --version gcc (Ubuntu 7.2.0-8ubuntu3) 7.2.0 Copyright (C) 2017 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ``` on my Lenovo X1 Carbon Gen 3 machine sees the following: ``` $ make CFLAGS="-g -O3 -march=native -DNDEBUG" cc -g -O3 -march=native -DNDEBUG -c -o test.o test.c cc -g -O3 -march=native -DNDEBUG -c -o genann.o genann.c cc -g -O3 -march=native -DNDEBUG -c -o example1.o example1.c cc -g -O3 -march=native -DNDEBUG -c -o example2.o example2.c cc -g -O3 -march=native -DNDEBUG -c -o example3.o example3.c cc -g -O3 -march=native -DNDEBUG -c -o example4.o example4.c cc -g -O3 -march=native -DNDEBUG -c -o strings.o strings.c cc test.o genann.o -lm -o test cc example1.o genann.o -lm -o example1 cc example4.o genann.o -lm -o example4 cc example3.o genann.o -lm -o example3 cc example2.o genann.o -lm -o example2 cc strings.o genann.o -lm -o strings $ for i in `seq 0 10`; do ./example4 > /dev/null; done; sudo perf stat record ./example4 GENANN example 4. Train an ANN on the IRIS dataset using backpropagation. Loading 150 data points from example/iris.data Training for 5000 loops over data. 147/150 correct (98.0%). Performance counter stats for './example4': 101.369081 task-clock (msec) # 0.998 CPUs utilized 1 context-switches # 0.010 K/sec 0 cpu-migrations # 0.000 K/sec 79 page-faults # 0.779 K/sec 320,197,883 cycles # 3.159 GHz 1,121,174,423 instructions # 3.50 insn per cycle 223,257,752 branches # 2202.425 M/sec 62,680 branch-misses # 0.03% of all branches 0.101595114 seconds time elapsed ``` Prior to the change, we see something like: ``` $ make CFLAGS="-g -O3 -march=native" cc -g -O3 -march=native -c -o test.o test.c cc -g -O3 -march=native -c -o genann.o genann.c cc -g -O3 -march=native -c -o example1.o example1.c cc -g -O3 -march=native -c -o example2.o example2.c cc -g -O3 -march=native -c -o example3.o example3.c cc -g -O3 -march=native -c -o example4.o example4.c cc -g -O3 -march=native -c -o strings.o strings.c cc test.o genann.o -lm -o test cc example1.o genann.o -lm -o example1 cc example3.o genann.o -lm -o example3 cc example4.o genann.o -lm -o example4 cc strings.o genann.o -lm -o strings cc example2.o genann.o -lm -o example2 $ for i in `seq 0 10`; do ./example4 > /dev/null; done; sudo perf stat record ./example4 GENANN example 4. Train an ANN on the IRIS dataset using backpropagation. Loading 150 data points from example/iris.data Training for 5000 loops over data. 147/150 correct (98.0%). Performance counter stats for './example4': 104.644198 task-clock (msec) # 0.998 CPUs utilized 0 context-switches # 0.000 K/sec 0 cpu-migrations # 0.000 K/sec 79 page-faults # 0.755 K/sec 330,340,554 cycles # 3.157 GHz 1,123,669,767 instructions # 3.40 insn per cycle 215,441,809 branches # 2058.803 M/sec 62,406 branch-misses # 0.03% of all branches 0.104891323 seconds time elapsed ``` Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
2017-12-17 02:59:40 +03:00
2017-12-18 05:38:30 +03:00
/* Because floating point... */
if (unlikely(j >= LOOKUP_SIZE)) return lookup[LOOKUP_SIZE - 1];
genann: Use reciprocal interval value to strength reduce divide to multiply This gives a reduction of roughly 2.5 million instructions in the execution trace of example4. genann_act_sigmoid_cached() previously divided by interval to calculate the lookup index. Divide is a expensive operation, so instead use the reciprocal of the existing interval calculation to reduce the divide to a multiply. Building with the following configuration: ``` $ head /proc/cpuinfo processor : 0 vendor_id : GenuineIntel cpu family : 6 model : 61 model name : Intel(R) Core(TM) i7-5600U CPU @ 2.60GHz stepping : 4 microcode : 0x25 cpu MHz : 2593.871 cache size : 4096 KB physical id : 0 $ cat /etc/os-release NAME="Ubuntu" VERSION="17.10 (Artful Aardvark)" ID=ubuntu ID_LIKE=debian PRETTY_NAME="Ubuntu 17.10" VERSION_ID="17.10" HOME_URL="https://www.ubuntu.com/" SUPPORT_URL="https://help.ubuntu.com/" BUG_REPORT_URL="https://bugs.launchpad.net/ubuntu/" PRIVACY_POLICY_URL="https://www.ubuntu.com/legal/terms-and-policies/privacy-policy" VERSION_CODENAME=artful UBUNTU_CODENAME=artful $ cc --version gcc (Ubuntu 7.2.0-8ubuntu3) 7.2.0 Copyright (C) 2017 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ``` on my Lenovo X1 Carbon Gen 3 machine sees the following: ``` $ make CFLAGS="-g -O3 -march=native -DNDEBUG" cc -g -O3 -march=native -DNDEBUG -c -o test.o test.c cc -g -O3 -march=native -DNDEBUG -c -o genann.o genann.c cc -g -O3 -march=native -DNDEBUG -c -o example1.o example1.c cc -g -O3 -march=native -DNDEBUG -c -o example2.o example2.c cc -g -O3 -march=native -DNDEBUG -c -o example3.o example3.c cc -g -O3 -march=native -DNDEBUG -c -o example4.o example4.c cc -g -O3 -march=native -DNDEBUG -c -o strings.o strings.c cc test.o genann.o -lm -o test cc example1.o genann.o -lm -o example1 cc example4.o genann.o -lm -o example4 cc example3.o genann.o -lm -o example3 cc example2.o genann.o -lm -o example2 cc strings.o genann.o -lm -o strings $ for i in `seq 0 10`; do ./example4 > /dev/null; done; sudo perf stat record ./example4 GENANN example 4. Train an ANN on the IRIS dataset using backpropagation. Loading 150 data points from example/iris.data Training for 5000 loops over data. 147/150 correct (98.0%). Performance counter stats for './example4': 101.369081 task-clock (msec) # 0.998 CPUs utilized 1 context-switches # 0.010 K/sec 0 cpu-migrations # 0.000 K/sec 79 page-faults # 0.779 K/sec 320,197,883 cycles # 3.159 GHz 1,121,174,423 instructions # 3.50 insn per cycle 223,257,752 branches # 2202.425 M/sec 62,680 branch-misses # 0.03% of all branches 0.101595114 seconds time elapsed ``` Prior to the change, we see something like: ``` $ make CFLAGS="-g -O3 -march=native" cc -g -O3 -march=native -c -o test.o test.c cc -g -O3 -march=native -c -o genann.o genann.c cc -g -O3 -march=native -c -o example1.o example1.c cc -g -O3 -march=native -c -o example2.o example2.c cc -g -O3 -march=native -c -o example3.o example3.c cc -g -O3 -march=native -c -o example4.o example4.c cc -g -O3 -march=native -c -o strings.o strings.c cc test.o genann.o -lm -o test cc example1.o genann.o -lm -o example1 cc example3.o genann.o -lm -o example3 cc example4.o genann.o -lm -o example4 cc strings.o genann.o -lm -o strings cc example2.o genann.o -lm -o example2 $ for i in `seq 0 10`; do ./example4 > /dev/null; done; sudo perf stat record ./example4 GENANN example 4. Train an ANN on the IRIS dataset using backpropagation. Loading 150 data points from example/iris.data Training for 5000 loops over data. 147/150 correct (98.0%). Performance counter stats for './example4': 104.644198 task-clock (msec) # 0.998 CPUs utilized 0 context-switches # 0.000 K/sec 0 cpu-migrations # 0.000 K/sec 79 page-faults # 0.755 K/sec 330,340,554 cycles # 3.157 GHz 1,123,669,767 instructions # 3.40 insn per cycle 215,441,809 branches # 2058.803 M/sec 62,406 branch-misses # 0.03% of all branches 0.104891323 seconds time elapsed ``` Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
2017-12-17 02:59:40 +03:00
return lookup[j];
2016-02-10 02:53:54 +03:00
}
2017-12-18 05:38:30 +03:00
double inline genann_act_linear(const struct genann *ann __unused, double a) {
2016-05-20 00:55:44 +03:00
return a;
}
2017-12-18 05:38:30 +03:00
double inline genann_act_threshold(const struct genann *ann __unused, double a) {
return a > 0;
}
2016-05-20 00:55:44 +03:00
2016-02-11 23:38:42 +03:00
genann *genann_init(int inputs, int hidden_layers, int hidden, int outputs) {
2016-02-10 02:53:54 +03:00
if (hidden_layers < 0) return 0;
if (inputs < 1) return 0;
if (outputs < 1) return 0;
if (hidden_layers > 0 && hidden < 1) return 0;
const int hidden_weights = hidden_layers ? (inputs+1) * hidden + (hidden_layers-1) * (hidden+1) * hidden : 0;
const int output_weights = (hidden_layers ? (hidden+1) : (inputs+1)) * outputs;
const int total_weights = (hidden_weights + output_weights);
const int total_neurons = (inputs + hidden * hidden_layers + outputs);
/* Allocate extra size for weights, outputs, and deltas. */
2016-02-11 23:38:42 +03:00
const int size = sizeof(genann) + sizeof(double) * (total_weights + total_neurons + (total_neurons - inputs));
genann *ret = malloc(size);
2016-02-10 02:53:54 +03:00
if (!ret) return 0;
ret->inputs = inputs;
ret->hidden_layers = hidden_layers;
ret->hidden = hidden;
ret->outputs = outputs;
ret->total_weights = total_weights;
ret->total_neurons = total_neurons;
/* Set pointers. */
2016-02-11 23:38:42 +03:00
ret->weight = (double*)((char*)ret + sizeof(genann));
2016-02-10 02:53:54 +03:00
ret->output = ret->weight + ret->total_weights;
ret->delta = ret->output + ret->total_neurons;
genann_randomize(ret);
ret->activation_hidden = genann_act_sigmoid_cached;
ret->activation_output = genann_act_sigmoid_cached;
2017-12-18 05:38:30 +03:00
genann_init_sigmoid_lookup(ret);
2016-02-10 02:53:54 +03:00
return ret;
}
2016-02-11 23:38:42 +03:00
genann *genann_read(FILE *in) {
2016-02-10 02:53:54 +03:00
int inputs, hidden_layers, hidden, outputs;
int rc;
errno = 0;
rc = fscanf(in, "%d %d %d %d", &inputs, &hidden_layers, &hidden, &outputs);
if (rc < 4 || errno != 0) {
perror("fscanf");
return NULL;
}
2016-02-10 02:53:54 +03:00
2016-02-11 23:38:42 +03:00
genann *ann = genann_init(inputs, hidden_layers, hidden, outputs);
2016-02-10 02:53:54 +03:00
int i;
for (i = 0; i < ann->total_weights; ++i) {
errno = 0;
rc = fscanf(in, " %le", ann->weight + i);
if (rc < 1 || errno != 0) {
perror("fscanf");
genann_free(ann);
return NULL;
}
2016-02-10 02:53:54 +03:00
}
return ann;
}
2016-02-11 23:38:42 +03:00
genann *genann_copy(genann const *ann) {
const int size = sizeof(genann) + sizeof(double) * (ann->total_weights + ann->total_neurons + (ann->total_neurons - ann->inputs));
genann *ret = malloc(size);
2016-02-10 02:53:54 +03:00
if (!ret) return 0;
memcpy(ret, ann, size);
/* Set pointers. */
2016-02-11 23:38:42 +03:00
ret->weight = (double*)((char*)ret + sizeof(genann));
2016-02-10 02:53:54 +03:00
ret->output = ret->weight + ret->total_weights;
ret->delta = ret->output + ret->total_neurons;
return ret;
}
2016-02-11 23:38:42 +03:00
void genann_randomize(genann *ann) {
2016-02-10 02:53:54 +03:00
int i;
for (i = 0; i < ann->total_weights; ++i) {
double r = GENANN_RANDOM();
/* Sets weights from -0.5 to 0.5. */
ann->weight[i] = r - 0.5;
}
}
2016-02-11 23:38:42 +03:00
void genann_free(genann *ann) {
2016-02-10 02:53:54 +03:00
/* The weight, output, and delta pointers go to the same buffer. */
free(ann);
}
2016-02-11 23:38:42 +03:00
double const *genann_run(genann const *ann, double const *inputs) {
2016-02-10 02:53:54 +03:00
double const *w = ann->weight;
double *o = ann->output + ann->inputs;
double const *i = ann->output;
/* Copy the inputs to the scratch area, where we also store each neuron's
* output, for consistency. This way the first layer isn't a special case. */
memcpy(ann->output, inputs, sizeof(double) * ann->inputs);
int h, j, k;
if (!ann->hidden_layers) {
double *ret = o;
for (j = 0; j < ann->outputs; ++j) {
double sum = *w++ * -1.0;
for (k = 0; k < ann->inputs; ++k) {
sum += *w++ * i[k];
}
2017-12-18 05:38:30 +03:00
*o++ = genann_act_output(ann, sum);
}
return ret;
}
/* Figure input layer */
for (j = 0; j < ann->hidden; ++j) {
double sum = *w++ * -1.0;
for (k = 0; k < ann->inputs; ++k) {
sum += *w++ * i[k];
}
2017-12-18 05:38:30 +03:00
*o++ = genann_act_hidden(ann, sum);
}
i += ann->inputs;
2016-02-10 02:53:54 +03:00
/* Figure hidden layers, if any. */
for (h = 1; h < ann->hidden_layers; ++h) {
2016-02-10 02:53:54 +03:00
for (j = 0; j < ann->hidden; ++j) {
double sum = *w++ * -1.0;
for (k = 0; k < ann->hidden; ++k) {
sum += *w++ * i[k];
2016-02-10 02:53:54 +03:00
}
2017-12-18 05:38:30 +03:00
*o++ = genann_act_hidden(ann, sum);
2016-02-10 02:53:54 +03:00
}
i += ann->hidden;
2016-02-10 02:53:54 +03:00
}
double const *ret = o;
/* Figure output layer. */
for (j = 0; j < ann->outputs; ++j) {
double sum = *w++ * -1.0;
for (k = 0; k < ann->hidden; ++k) {
sum += *w++ * i[k];
2016-02-10 02:53:54 +03:00
}
2017-12-18 05:38:30 +03:00
*o++ = genann_act_output(ann, sum);
2016-02-10 02:53:54 +03:00
}
/* Sanity check that we used all weights and wrote all outputs. */
assert(w - ann->weight == ann->total_weights);
assert(o - ann->output == ann->total_neurons);
return ret;
}
2016-02-11 23:38:42 +03:00
void genann_train(genann const *ann, double const *inputs, double const *desired_outputs, double learning_rate) {
2016-02-10 02:53:54 +03:00
/* To begin with, we must run the network forward. */
genann_run(ann, inputs);
int h, j, k;
/* First set the output layer deltas. */
{
double const *o = ann->output + ann->inputs + ann->hidden * ann->hidden_layers; /* First output. */
double *d = ann->delta + ann->hidden * ann->hidden_layers; /* First delta. */
double const *t = desired_outputs; /* First desired output. */
/* Set output layer deltas. */
2017-12-18 05:38:30 +03:00
if (genann_act_output == genann_act_linear ||
ann->activation_output == genann_act_linear) {
for (j = 0; j < ann->outputs; ++j) {
*d++ = *t++ - *o++;
}
} else {
for (j = 0; j < ann->outputs; ++j) {
*d++ = (*t - *o) * *o * (1.0 - *o);
++o; ++t;
}
2016-02-10 02:53:54 +03:00
}
}
/* Set hidden layer deltas, start on last layer and work backwards. */
/* Note that loop is skipped in the case of hidden_layers == 0. */
for (h = ann->hidden_layers - 1; h >= 0; --h) {
/* Find first output and delta in this layer. */
double const *o = ann->output + ann->inputs + (h * ann->hidden);
double *d = ann->delta + (h * ann->hidden);
/* Find first delta in following layer (which may be hidden or output). */
double const * const dd = ann->delta + ((h+1) * ann->hidden);
/* Find first weight in following layer (which may be hidden or output). */
double const * const ww = ann->weight + ((ann->inputs+1) * ann->hidden) + ((ann->hidden+1) * ann->hidden * (h));
for (j = 0; j < ann->hidden; ++j) {
double delta = 0;
for (k = 0; k < (h == ann->hidden_layers-1 ? ann->outputs : ann->hidden); ++k) {
const double forward_delta = dd[k];
const int windex = k * (ann->hidden + 1) + (j + 1);
const double forward_weight = ww[windex];
delta += forward_delta * forward_weight;
}
*d = *o * (1.0-*o) * delta;
++d; ++o;
}
}
/* Train the outputs. */
{
/* Find first output delta. */
double const *d = ann->delta + ann->hidden * ann->hidden_layers; /* First output delta. */
/* Find first weight to first output delta. */
double *w = ann->weight + (ann->hidden_layers
? ((ann->inputs+1) * ann->hidden + (ann->hidden+1) * ann->hidden * (ann->hidden_layers-1))
: (0));
/* Find first output in previous layer. */
double const * const i = ann->output + (ann->hidden_layers
? (ann->inputs + (ann->hidden) * (ann->hidden_layers-1))
: 0);
/* Set output layer weights. */
2016-02-10 02:53:54 +03:00
for (j = 0; j < ann->outputs; ++j) {
*w++ += *d * learning_rate * -1.0;
for (k = 1; k < (ann->hidden_layers ? ann->hidden : ann->inputs) + 1; ++k) {
*w++ += *d * learning_rate * i[k-1];
2016-02-10 02:53:54 +03:00
}
++d;
}
assert(w - ann->weight == ann->total_weights);
}
/* Train the hidden layers. */
for (h = ann->hidden_layers - 1; h >= 0; --h) {
/* Find first delta in this layer. */
double const *d = ann->delta + (h * ann->hidden);
/* Find first input to this layer. */
double const *i = ann->output + (h
? (ann->inputs + ann->hidden * (h-1))
: 0);
/* Find first weight to this layer. */
double *w = ann->weight + (h
? ((ann->inputs+1) * ann->hidden + (ann->hidden+1) * (ann->hidden) * (h-1))
: 0);
for (j = 0; j < ann->hidden; ++j) {
*w++ += *d * learning_rate * -1.0;
for (k = 1; k < (h == 0 ? ann->inputs : ann->hidden) + 1; ++k) {
*w++ += *d * learning_rate * i[k-1];
2016-02-10 02:53:54 +03:00
}
++d;
}
}
}
2016-02-11 23:38:42 +03:00
void genann_write(genann const *ann, FILE *out) {
2016-02-10 02:53:54 +03:00
fprintf(out, "%d %d %d %d", ann->inputs, ann->hidden_layers, ann->hidden, ann->outputs);
int i;
for (i = 0; i < ann->total_weights; ++i) {
fprintf(out, " %.20e", ann->weight[i]);
}
}