Merge pull request #555 from kvedala/docs/ml

[docs] Update documentations in machine learning
This commit is contained in:
Krishna Vedala 2020-07-03 14:13:08 -04:00 committed by GitHub
commit 246f3e3f0e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 105 additions and 62 deletions

View File

@ -2,9 +2,7 @@
* \file
* \brief [Adaptive Linear Neuron
* (ADALINE)](https://en.wikipedia.org/wiki/ADALINE) implementation
*
* \author [Krishna Vedala](https://github.com/kvedala)
*
* \details
* <img
* src="https://upload.wikimedia.org/wikipedia/commons/b/be/Adaline_flow_chart.gif"
* width="200px">
@ -20,6 +18,7 @@
* computed. Computing the \f$w_j\f$ is a supervised learning algorithm wherein
* a set of features and their corresponding outputs are given and weights are
* computed using stochastic gradient descent method.
* \author [Krishna Vedala](https://github.com/kvedala)
*/
#include <assert.h>
@ -30,8 +29,15 @@
#include <stdlib.h>
#include <time.h>
/**
* @addtogroup machine_learning Machine learning algorithms
* @{
* @addtogroup adaline Adaline learning algorithm
* @{
*/
/** Maximum number of iterations to learn */
#define MAX_ITER 500 // INT_MAX
#define MAX_ADALINE_ITER 500 // INT_MAX
/** structure to hold adaline model parameters */
struct adaline
@ -41,7 +47,8 @@ struct adaline
int num_weights; /**< number of weights of the neural network */
};
#define ACCURACY 1e-5 /**< convergence accuracy \f$=1\times10^{-5}\f$ */
/** convergence accuracy \f$=1\times10^{-5}\f$ */
#define ADALINE_ACCURACY 1e-5
/**
* Default constructor
@ -77,7 +84,7 @@ struct adaline new_adaline(const int num_features, const double eta)
}
/** delete dynamically allocated memory
* \param[in] ada model from which the memory is to be freeed.
* \param[in] ada model from which the memory is to be freed.
*/
void delete_adaline(struct adaline *ada)
{
@ -91,13 +98,18 @@ void delete_adaline(struct adaline *ada)
* function](https://en.wikipedia.org/wiki/Heaviside_step_function) <img
* src="https://upload.wikimedia.org/wikipedia/commons/d/d9/Dirac_distribution_CDF.svg"
* width="200px"/>
* @param x activation function input
* @returns \f$f(x)= \begin{cases}1 & \forall\; x > 0\\ -1 & \forall\; x \le0
* \end{cases}\f$
*/
int activation(double x) { return x > 0 ? 1 : -1; }
int adaline_activation(double x) { return x > 0 ? 1 : -1; }
/**
* Operator to print the weights of the model
* @param ada model for which the values to print
* @returns pointer to a NULL terminated string of formatted weights
*/
char *get_weights_str(struct adaline *ada)
char *adaline_get_weights_str(const struct adaline *ada)
{
static char out[100]; // static so the value is persistent
@ -121,7 +133,7 @@ char *get_weights_str(struct adaline *ada)
* activation function (`NULL` to ignore)
* \returns model prediction output
*/
int predict(struct adaline *ada, const double *x, double *out)
int adaline_predict(struct adaline *ada, const double *x, double *out)
{
double y = ada->weights[ada->num_weights - 1]; // assign bias value
@ -130,7 +142,8 @@ int predict(struct adaline *ada, const double *x, double *out)
if (out) // if out variable is not NULL
*out = y;
return activation(y); // quantizer: apply ADALINE threshold function
// quantizer: apply ADALINE threshold function
return adaline_activation(y);
}
/**
@ -142,10 +155,10 @@ int predict(struct adaline *ada, const double *x, double *out)
* \param[in] y known output value
* \returns correction factor
*/
double fit_sample(struct adaline *ada, const double *x, const int y)
double adaline_fit_sample(struct adaline *ada, const double *x, const int y)
{
/* output of the model with current weights */
int p = predict(ada, x, NULL);
int p = adaline_predict(ada, x, NULL);
int prediction_error = y - p; // error in estimation
double correction_factor = ada->eta * prediction_error;
@ -168,19 +181,21 @@ double fit_sample(struct adaline *ada, const double *x, const int y)
* \param[in] y known output value for each feature vector
* \param[in] N number of training samples
*/
void fit(struct adaline *ada, double **X, const int *y, const int N)
void adaline_fit(struct adaline *ada, double **X, const int *y, const int N)
{
double avg_pred_error = 1.f;
int iter;
for (iter = 0; (iter < MAX_ITER) && (avg_pred_error > ACCURACY); iter++)
for (iter = 0;
(iter < MAX_ADALINE_ITER) && (avg_pred_error > ADALINE_ACCURACY);
iter++)
{
avg_pred_error = 0.f;
// perform fit for each sample
for (int i = 0; i < N; i++)
{
double err = fit_sample(ada, X[i], y[i]);
double err = adaline_fit_sample(ada, X[i], y[i]);
avg_pred_error += fabs(err);
}
avg_pred_error /= N;
@ -188,15 +203,19 @@ void fit(struct adaline *ada, double **X, const int *y, const int N)
// Print updates every 200th iteration
// if (iter % 100 == 0)
printf("\tIter %3d: Training weights: %s\tAvg error: %.4f\n", iter,
get_weights_str(ada), avg_pred_error);
adaline_get_weights_str(ada), avg_pred_error);
}
if (iter < MAX_ITER)
if (iter < MAX_ADALINE_ITER)
printf("Converged after %d iterations.\n", iter);
else
printf("Did not converged after %d iterations.\n", iter);
}
/** @}
* @}
*/
/**
* test function to predict points in a 2D coordinate system above the line
* \f$x=y\f$ as +1 and others as -1.
@ -221,19 +240,19 @@ void test1(double eta)
}
printf("------- Test 1 -------\n");
printf("Model before fit: %s", get_weights_str(&ada));
printf("Model before fit: %s", adaline_get_weights_str(&ada));
fit(&ada, X, Y, N);
printf("Model after fit: %s\n", get_weights_str(&ada));
adaline_fit(&ada, X, Y, N);
printf("Model after fit: %s\n", adaline_get_weights_str(&ada));
double test_x[] = {5, -3};
int pred = predict(&ada, test_x, NULL);
int pred = adaline_predict(&ada, test_x, NULL);
printf("Predict for x=(5,-3): % d", pred);
assert(pred == -1);
printf(" ...passed\n");
double test_x2[] = {5, 8};
pred = predict(&ada, test_x2, NULL);
pred = adaline_predict(&ada, test_x2, NULL);
printf("Predict for x=(5, 8): % d", pred);
assert(pred == 1);
printf(" ...passed\n");
@ -275,10 +294,10 @@ void test2(double eta)
}
printf("------- Test 2 -------\n");
printf("Model before fit: %s", get_weights_str(&ada));
printf("Model before fit: %s", adaline_get_weights_str(&ada));
fit(&ada, X, Y, N);
printf("Model after fit: %s\n", get_weights_str(&ada));
adaline_fit(&ada, X, Y, N);
printf("Model after fit: %s\n", adaline_get_weights_str(&ada));
int N_test_cases = 5;
double test_x[2];
@ -289,7 +308,7 @@ void test2(double eta)
test_x[0] = x0;
test_x[1] = x1;
int pred = predict(&ada, test_x, NULL);
int pred = adaline_predict(&ada, test_x, NULL);
printf("Predict for x=(% 3.2f,% 3.2f): % d", x0, x1, pred);
int expected_val = (x0 + 3. * x1) > -1 ? 1 : -1;
@ -343,10 +362,10 @@ void test3(double eta)
}
printf("------- Test 3 -------\n");
printf("Model before fit: %s", get_weights_str(&ada));
printf("Model before fit: %s", adaline_get_weights_str(&ada));
fit(&ada, X, Y, N);
printf("Model after fit: %s\n", get_weights_str(&ada));
adaline_fit(&ada, X, Y, N);
printf("Model after fit: %s\n", adaline_get_weights_str(&ada));
int N_test_cases = 5;
double test_x[6];
@ -361,7 +380,7 @@ void test3(double eta)
test_x[3] = x0 * x0;
test_x[4] = x1 * x1;
test_x[5] = x2 * x2;
int pred = predict(&ada, test_x, NULL);
int pred = adaline_predict(&ada, test_x, NULL);
printf("Predict for x=(% 3.2f,% 3.2f): % d", x0, x1, pred);
int expected_val = (x0 * x0 + x1 * x1 + x2 * x2) <= 1 ? 1 : -1;

View File

@ -1,18 +1,18 @@
/**
* \file
* \author [Krishna Vedala](https://github.com/kvedala)
* \brief [Kohonen self organizing
* map](https://en.wikipedia.org/wiki/Self-organizing_map) (topological map)
*
* This example implements a powerful unsupervised learning algorithm called as
* a self organizing map. The algorithm creates a connected network of weights
* that closely follows the given data points. This thus creates a topological
* map of the given data i.e., it maintains the relationship between varipus
* data points in a much higher dimesional space by creating an equivalent in a
* map of the given data i.e., it maintains the relationship between various
* data points in a much higher dimensional space by creating an equivalent in a
* 2-dimensional space.
* <img alt="Trained topological maps for the test cases in the program"
* src="https://raw.githubusercontent.com/TheAlgorithms/C/docs/images/machine_learning/kohonen/2D_Kohonen_SOM.svg"
* />
* \author [Krishna Vedala](https://github.com/kvedala)
* \warning MSVC 2019 compiler generates code that does not execute as expected.
* However, MinGW, Clang for GCC and Clang for MSVC compilers on windows perform
* as expected. Any insights and suggestions should be directed to the author.
@ -27,6 +27,13 @@
#include <omp.h>
#endif
/**
* @addtogroup machine_learning Machine learning algorithms
* @{
* @addtogroup kohonen_2d Kohonen SOM topology algorithm
* @{
*/
#ifndef max
/** shorthand for maximum value */
#define max(a, b) (((a) > (b)) ? (a) : (b))
@ -37,7 +44,7 @@
#endif
/** to store info regarding 3D arrays */
struct array_3d
struct kohonen_array_3d
{
int dim1; /**< lengths of first dimension */
int dim2; /**< lengths of second dimension */
@ -51,13 +58,13 @@ struct array_3d
* X_{i,j,k} = i\times M\times N + j\times N + k
* \f]
* where \f$L\f$, \f$M\f$ and \f$N\f$ are the 3D matrix dimensions.
* \param[in] arr pointer to ::array_3d structure
* \param[in] arr pointer to ::kohonen_array_3d structure
* \param[in] x first index
* \param[in] y second index
* \param[in] z third index
* \returns pointer to (x,y,z)^th location of data
*/
double *data_3d(const struct array_3d *arr, int x, int y, int z)
double *kohonen_data_3d(const struct kohonen_array_3d *arr, int x, int y, int z)
{
int offset = (x * arr->dim2 * arr->dim3) + (y * arr->dim3) + z;
return arr->data + offset;
@ -85,7 +92,7 @@ double _random(double a, double b)
/**
* Save a given n-dimensional data martix to file.
*
* \param[in] fname filename to save in (gets overwriten without confirmation)
* \param[in] fname filename to save in (gets overwritten without confirmation)
* \param[in] X matrix to save
* \param[in] num_points rows in the matrix = number of points
* \param[in] num_features columns in the matrix = dimensions of points
@ -129,7 +136,7 @@ int save_2d_data(const char *fname, double **X, int num_points,
* \returns 0 if all ok
* \returns -1 if file creation failed
*/
int save_u_matrix(const char *fname, struct array_3d *W)
int save_u_matrix(const char *fname, struct kohonen_array_3d *W)
{
FILE *fp = fopen(fname, "wt");
if (!fp) // error with fopen
@ -164,8 +171,8 @@ int save_u_matrix(const char *fname, struct array_3d *W)
double d = 0.f;
for (k = 0; k < W->dim3; k++) // for each feature
{
double *w1 = data_3d(W, i, j, k);
double *w2 = data_3d(W, l, m, k);
double *w1 = kohonen_data_3d(W, i, j, k);
double *w2 = kohonen_data_3d(W, l, m, k);
d += (w1[0] - w2[0]) * (w1[0] - w2[0]);
// distance += w1[0] * w1[0];
}
@ -224,8 +231,9 @@ void get_min_2d(double **X, int N, double *val, int *x_idx, int *y_idx)
* \param[in] R neighborhood range
* \returns minimum distance of sample and trained weights
*/
double update_weights(const double *X, struct array_3d *W, double **D,
int num_out, int num_features, double alpha, int R)
double kohonen_update_weights(const double *X, struct kohonen_array_3d *W,
double **D, int num_out, int num_features,
double alpha, int R)
{
int x, y, k;
double d_min = 0.f;
@ -243,7 +251,7 @@ double update_weights(const double *X, struct array_3d *W, double **D,
// point from the current sample
for (k = 0; k < num_features; k++)
{
double *w = data_3d(W, x, y, k);
double *w = kohonen_data_3d(W, x, y, k);
D[x][y] += (w[0] - X[k]) * (w[0] - X[k]);
}
D[x][y] = sqrt(D[x][y]);
@ -283,7 +291,7 @@ double update_weights(const double *X, struct array_3d *W, double **D,
for (k = 0; k < num_features; k++)
{
double *w = data_3d(W, x, y, k);
double *w = kohonen_data_3d(W, x, y, k);
// update weights of nodes in the neighborhood
w[0] += alpha * scale_factor * (X[k] - w[0]);
}
@ -303,7 +311,7 @@ double update_weights(const double *X, struct array_3d *W, double **D,
* \param[in] num_out number of output points
* \param[in] alpha_min terminal value of alpha
*/
void kohonen_som(double **X, struct array_3d *W, int num_samples,
void kohonen_som(double **X, struct kohonen_array_3d *W, int num_samples,
int num_features, int num_out, double alpha_min)
{
int R = num_out >> 2, iter = 0;
@ -322,8 +330,8 @@ void kohonen_som(double **X, struct array_3d *W, int num_samples,
for (int sample = 0; sample < num_samples; sample++)
{
// update weights for the current input pattern sample
dmin += update_weights(X[sample], W, D, num_out, num_features,
alpha, R);
dmin += kohonen_update_weights(X[sample], W, D, num_out,
num_features, alpha, R);
}
// every 20th iteration, reduce the neighborhood range
@ -340,6 +348,11 @@ void kohonen_som(double **X, struct array_3d *W, int num_samples,
free(D);
}
/**
* @}
* @}
*/
/** Creates a random set of points distributed in four clusters in
* 3D space with centroids at the points
* * \f$(0,5, 0.5, 0.5)\f$
@ -400,7 +413,7 @@ void test1()
double **X = (double **)malloc(N * sizeof(double *));
// cluster nodex in 'x' * cluster nodes in 'y' * 2
struct array_3d W;
struct kohonen_array_3d W;
W.dim1 = num_out;
W.dim2 = num_out;
W.dim3 = features;
@ -421,7 +434,7 @@ void test1()
// preallocate with random initial weights
for (j = 0; j < features; j++)
{
double *w = data_3d(&W, i, k, j);
double *w = kohonen_data_3d(&W, i, k, j);
w[0] = _random(-5, 5);
}
}
@ -500,7 +513,7 @@ void test2()
double **X = (double **)malloc(N * sizeof(double *));
// cluster nodex in 'x' * cluster nodes in 'y' * 2
struct array_3d W;
struct kohonen_array_3d W;
W.dim1 = num_out;
W.dim2 = num_out;
W.dim3 = features;
@ -520,7 +533,7 @@ void test2()
#endif
for (j = 0; j < features; j++)
{ // preallocate with random initial weights
double *w = data_3d(&W, i, k, j);
double *w = kohonen_data_3d(&W, i, k, j);
w[0] = _random(-5, 5);
}
}
@ -601,7 +614,7 @@ void test3()
double **X = (double **)malloc(N * sizeof(double *));
// cluster nodex in 'x' * cluster nodes in 'y' * 2
struct array_3d W;
struct kohonen_array_3d W;
W.dim1 = num_out;
W.dim2 = num_out;
W.dim3 = features;
@ -622,7 +635,7 @@ void test3()
// preallocate with random initial weights
for (j = 0; j < features; j++)
{
double *w = data_3d(&W, i, k, j);
double *w = kohonen_data_3d(&W, i, k, j);
w[0] = _random(-5, 5);
}
}

View File

@ -3,13 +3,12 @@
* \brief [Kohonen self organizing
* map](https://en.wikipedia.org/wiki/Self-organizing_map) (data tracing)
*
* \author [Krishna Vedala](https://github.com/kvedala)
*
* \details
* This example implements a powerful self organizing map algorithm.
* The algorithm creates a connected network of weights that closely
* follows the given data points. This this creates a chain of nodes that
* resembles the given input shape.
* \author [Krishna Vedala](https://github.com/kvedala)
* \see kohonen_som_topology.c
*/
#define _USE_MATH_DEFINES /**< required for MS Visual C */
@ -21,6 +20,13 @@
#include <omp.h>
#endif
/**
* @addtogroup machine_learning Machine learning algorithms
* @{
* @addtogroup kohonen_1d Kohonen SOM trace/chain algorithm
* @{
*/
#ifndef max
/** shorthand for maximum value */
#define max(a, b) (((a) > (b)) ? (a) : (b))
@ -95,7 +101,7 @@ int save_nd_data(const char *fname, double **X, int num_points,
* \param[out] val minimum value found
* \param[out] idx index where minimum value was found
*/
void get_min_1d(double const *X, int N, double *val, int *idx)
void kohonen_get_min_1d(double const *X, int N, double *val, int *idx)
{
val[0] = INFINITY; // initial min value
@ -120,8 +126,8 @@ void get_min_1d(double const *X, int N, double *val, int *idx)
* \param[in] alpha learning rate \f$0<\alpha\le1\f$
* \param[in] R neighborhood range
*/
void update_weights(double const *x, double *const *W, double *D, int num_out,
int num_features, double alpha, int R)
void kohonen_update_weights(double const *x, double *const *W, double *D,
int num_out, int num_features, double alpha, int R)
{
int j, k;
@ -138,11 +144,11 @@ void update_weights(double const *x, double *const *W, double *D, int num_out,
D[j] += (W[j][k] - x[k]) * (W[j][k] - x[k]);
}
// step 2: get closest node i.e., node with snallest Euclidian distance to
// step 2: get closest node i.e., node with smallest Euclidian distance to
// the current pattern
int d_min_idx;
double d_min;
get_min_1d(D, num_out, &d_min, &d_min_idx);
kohonen_get_min_1d(D, num_out, &d_min, &d_min_idx);
// step 3a: get the neighborhood range
int from_node = max(0, d_min_idx - R);
@ -177,7 +183,7 @@ void kohonen_som_tracer(double **X, double *const *W, int num_samples,
double alpha = 1.f;
double *D = (double *)malloc(num_out * sizeof(double));
// Loop alpha from 1 to slpha_min
// Loop alpha from 1 to alpha_min
for (; alpha > alpha_min; alpha -= 0.01, iter++)
{
// Loop for each sample pattern in the data set
@ -185,7 +191,7 @@ void kohonen_som_tracer(double **X, double *const *W, int num_samples,
{
const double *x = X[sample];
// update weights for the current input pattern sample
update_weights(x, W, D, num_out, num_features, alpha, R);
kohonen_update_weights(x, W, D, num_out, num_features, alpha, R);
}
// every 10th iteration, reduce the neighborhood range
@ -196,6 +202,11 @@ void kohonen_som_tracer(double **X, double *const *W, int num_samples,
free(D);
}
/**
* @}
* @}
*/
/** Creates a random set of points distributed *near* the circumference
* of a circle and trains an SOM that finds that circular pattern. The
* generating function is