Algorithms_in_C 1.0.0
Set of algorithms implemented in C.
Loading...
Searching...
No Matches
Kohonen SOM topology algorithm
Collaboration diagram for Kohonen SOM topology algorithm:

Data Structures

struct  kohonen_array_3d
 to store info regarding 3D arrays More...
 

Macros

#define max(a, b)   (((a) > (b)) ? (a) : (b))
 shorthand for maximum value
 
#define min(a, b)   (((a) < (b)) ? (a) : (b))
 shorthand for minimum value
 

Functions

double * kohonen_data_3d (const struct kohonen_array_3d *arr, int x, int y, int z)
 Function that returns the pointer to (x, y, z) ^th location in the linear 3D array given by:
 
double _random (double a, double b)
 Helper function to generate a random number in a given interval.
 
int save_2d_data (const char *fname, double **X, int num_points, int num_features)
 Save a given n-dimensional data martix to file.
 
int save_u_matrix (const char *fname, struct kohonen_array_3d *W)
 Create the distance matrix or U-matrix from the trained weights and save to disk.
 
void get_min_2d (double **X, int N, double *val, int *x_idx, int *y_idx)
 Get minimum value and index of the value in a matrix.
 
double kohonen_update_weights (const double *X, struct kohonen_array_3d *W, double **D, int num_out, int num_features, double alpha, int R)
 Update weights of the SOM using Kohonen algorithm.
 
void kohonen_som (double **X, struct kohonen_array_3d *W, int num_samples, int num_features, int num_out, double alpha_min)
 Apply incremental algorithm with updating neighborhood and learning rates on all samples in the given datset.
 

Detailed Description

Function Documentation

◆ _random()

double _random ( double  a,
double  b 
)

Helper function to generate a random number in a given interval.


Steps:

  1. r1 = rand() % 100 gets a random number between 0 and 99
  2. r2 = r1 / 100 converts random number to be between 0 and 0.99
  3. scale and offset the random number to given range of \([a,b)\)

    \[ y = (b - a) \times \frac{\text{(random number between 0 and RAND_MAX)} \; \text{mod}\; 100}{100} + a \]

Parameters
[in]alower limit
[in]bupper limit
Returns
random number in the range \([a,b)\)
88{
89 return ((b - a) * (rand() % 100) / 100.f) + a;
90}

◆ get_min_2d()

void get_min_2d ( double **  X,
int  N,
double *  val,
int *  x_idx,
int *  y_idx 
)

Get minimum value and index of the value in a matrix.

Parameters
[in]Xmatrix to search
[in]Nnumber of points in the vector
[out]valminimum value found
[out]x_idxx-index where minimum value was found
[out]y_idxy-index where minimum value was found
205{
206 val[0] = INFINITY; // initial min value
207
208 for (int i = 0; i < N; i++) // traverse each x-index
209 {
210 for (int j = 0; j < N; j++) // traverse each y-index
211 {
212 if (X[i][j] < val[0]) // if a lower value is found
213 { // save the value and its index
214 x_idx[0] = i;
215 y_idx[0] = j;
216 val[0] = X[i][j];
217 }
218 }
219 }
220}

◆ kohonen_data_3d()

double * kohonen_data_3d ( const struct kohonen_array_3d arr,
int  x,
int  y,
int  z 
)

Function that returns the pointer to (x, y, z) ^th location in the linear 3D array given by:

\[ X_{i,j,k} = i\times M\times N + j\times N + k \]

where \(L\), \(M\) and \(N\) are the 3D matrix dimensions.

Parameters
[in]arrpointer to kohonen_array_3d structure
[in]xfirst index
[in]ysecond index
[in]zthird index
Returns
pointer to (x,y,z)^th location of data
68{
69 int offset = (x * arr->dim2 * arr->dim3) + (y * arr->dim3) + z;
70 return arr->data + offset;
71}
int dim3
lengths of thirddimension
Definition: kohonen_som_topology.c:51
int dim2
lengths of second dimension
Definition: kohonen_som_topology.c:50
double * data
pointer to data
Definition: kohonen_som_topology.c:52

◆ kohonen_som()

void kohonen_som ( double **  X,
struct kohonen_array_3d W,
int  num_samples,
int  num_features,
int  num_out,
double  alpha_min 
)

Apply incremental algorithm with updating neighborhood and learning rates on all samples in the given datset.

Parameters
[in]Xdata set
[in,out]Wweights matrix
[in]num_samplesnumber of output points
[in]num_featuresnumber of features per input sample
[in]num_outnumber of output points
[in]alpha_minterminal value of alpha
316{
317 int R = num_out >> 2, iter = 0;
318 double **D = (double **)malloc(num_out * sizeof(double *));
319 for (int i = 0; i < num_out; i++)
320 D[i] = (double *)malloc(num_out * sizeof(double));
321
322 double dmin = 1.f; // average minimum distance of all samples
323
324 // Loop alpha from 1 to slpha_min
325 for (double alpha = 1.f; alpha > alpha_min && dmin > 1e-3;
326 alpha -= 0.001, iter++)
327 {
328 dmin = 0.f;
329 // Loop for each sample pattern in the data set
330 for (int sample = 0; sample < num_samples; sample++)
331 {
332 // update weights for the current input pattern sample
333 dmin += kohonen_update_weights(X[sample], W, D, num_out,
334 num_features, alpha, R);
335 }
336
337 // every 20th iteration, reduce the neighborhood range
338 if (iter % 100 == 0 && R > 1)
339 R--;
340
341 dmin /= num_samples;
342 printf("iter: %5d\t alpha: %.4g\t R: %d\td_min: %.4g\r", iter, alpha, R,
343 dmin);
344 }
345 putchar('\n');
346
347 for (int i = 0; i < num_out; i++) free(D[i]);
348 free(D);
349}
double kohonen_update_weights(const double *X, struct kohonen_array_3d *W, double **D, int num_out, int num_features, double alpha, int R)
Update weights of the SOM using Kohonen algorithm.
Definition: kohonen_som_topology.c:234
#define malloc(bytes)
This macro replace the standard malloc function with malloc_dbg.
Definition: malloc_dbg.h:18
#define free(ptr)
This macro replace the standard free function with free_dbg.
Definition: malloc_dbg.h:26
Here is the call graph for this function:

◆ kohonen_update_weights()

double kohonen_update_weights ( const double *  X,
struct kohonen_array_3d W,
double **  D,
int  num_out,
int  num_features,
double  alpha,
int  R 
)

Update weights of the SOM using Kohonen algorithm.

Parameters
[in]Xdata point
[in,out]Wweights matrix
[in,out]Dtemporary vector to store distances
[in]num_outnumber of output points
[in]num_featuresnumber of features per input sample
[in]alphalearning rate \(0<\alpha\le1\)
[in]Rneighborhood range
Returns
minimum distance of sample and trained weights
237{
238 int x, y, k;
239 double d_min = 0.f;
240
241#ifdef _OPENMP
242#pragma omp for
243#endif
244 // step 1: for each 2D output point
245 for (x = 0; x < num_out; x++)
246 {
247 for (y = 0; y < num_out; y++)
248 {
249 D[x][y] = 0.f;
250 // compute Euclidian distance of each output
251 // point from the current sample
252 for (k = 0; k < num_features; k++)
253 {
254 double *w = kohonen_data_3d(W, x, y, k);
255 D[x][y] += (w[0] - X[k]) * (w[0] - X[k]);
256 }
257 D[x][y] = sqrt(D[x][y]);
258 }
259 }
260
261 // step 2: get closest node i.e., node with smallest Euclidian distance to
262 // the current pattern
263 int d_min_x, d_min_y;
264 get_min_2d(D, num_out, &d_min, &d_min_x, &d_min_y);
265
266 // step 3a: get the neighborhood range
267 int from_x = max(0, d_min_x - R);
268 int to_x = min(num_out, d_min_x + R + 1);
269 int from_y = max(0, d_min_y - R);
270 int to_y = min(num_out, d_min_y + R + 1);
271
272 // step 3b: update the weights of nodes in the
273 // neighborhood
274#ifdef _OPENMP
275#pragma omp for
276#endif
277 for (x = from_x; x < to_x; x++)
278 {
279 for (y = from_y; y < to_y; y++)
280 {
281 /* you can enable the following normalization if needed.
282 personally, I found it detrimental to convergence */
283 // const double s2pi = sqrt(2.f * M_PI);
284 // double normalize = 1.f / (alpha * s2pi);
285
286 /* apply scaling inversely proportional to distance from the
287 current node */
288 double d2 =
289 (d_min_x - x) * (d_min_x - x) + (d_min_y - y) * (d_min_y - y);
290 double scale_factor = exp(-d2 / (2.f * alpha * alpha));
291
292 for (k = 0; k < num_features; k++)
293 {
294 double *w = kohonen_data_3d(W, x, y, k);
295 // update weights of nodes in the neighborhood
296 w[0] += alpha * scale_factor * (X[k] - w[0]);
297 }
298 }
299 }
300 return d_min;
301}
#define min(a, b)
shorthand for minimum value
Definition: kohonen_som_topology.c:43
double * kohonen_data_3d(const struct kohonen_array_3d *arr, int x, int y, int z)
Function that returns the pointer to (x, y, z) ^th location in the linear 3D array given by:
Definition: kohonen_som_topology.c:67
void get_min_2d(double **X, int N, double *val, int *x_idx, int *y_idx)
Get minimum value and index of the value in a matrix.
Definition: kohonen_som_topology.c:204
#define max(a, b)
shorthand for maximum value
Definition: kohonen_som_topology.c:39
Here is the call graph for this function:

◆ save_2d_data()

int save_2d_data ( const char *  fname,
double **  X,
int  num_points,
int  num_features 
)

Save a given n-dimensional data martix to file.

Parameters
[in]fnamefilename to save in (gets overwritten without confirmation)
[in]Xmatrix to save
[in]num_pointsrows in the matrix = number of points
[in]num_featurescolumns in the matrix = dimensions of points
Returns
0 if all ok
-1 if file creation failed
104{
105 FILE *fp = fopen(fname, "wt");
106 if (!fp) // error with fopen
107 {
108 char msg[120];
109 sprintf(msg, "File error (%s): ", fname);
110 perror(msg);
111 return -1;
112 }
113
114 for (int i = 0; i < num_points; i++) // for each point in the array
115 {
116 for (int j = 0; j < num_features; j++) // for each feature in the array
117 {
118 fprintf(fp, "%.4g", X[i][j]); // print the feature value
119 if (j < num_features - 1) // if not the last feature
120 fputc(',', fp); // suffix comma
121 }
122 if (i < num_points - 1) // if not the last row
123 fputc('\n', fp); // start a new line
124 }
125 fclose(fp);
126 return 0;
127}

◆ save_u_matrix()

int save_u_matrix ( const char *  fname,
struct kohonen_array_3d W 
)

Create the distance matrix or U-matrix from the trained weights and save to disk.

Parameters
[in]fnamefilename to save in (gets overwriten without confirmation)
[in]Wmodel matrix to save
Returns
0 if all ok
-1 if file creation failed
140{
141 FILE *fp = fopen(fname, "wt");
142 if (!fp) // error with fopen
143 {
144 char msg[120];
145 sprintf(msg, "File error (%s): ", fname);
146 perror(msg);
147 return -1;
148 }
149
150 int R = max(W->dim1 >> 3, 2); /* neighborhood range */
151
152 for (int i = 0; i < W->dim1; i++) // for each x
153 {
154 for (int j = 0; j < W->dim2; j++) // for each y
155 {
156 double distance = 0.f;
157 int k;
158
159 int from_x = max(0, i - R);
160 int to_x = min(W->dim1, i + R + 1);
161 int from_y = max(0, j - R);
162 int to_y = min(W->dim2, j + R + 1);
163 int l;
164#ifdef _OPENMP
165#pragma omp parallel for reduction(+ : distance)
166#endif
167 for (l = from_x; l < to_x; l++) // scan neighborhoor in x
168 {
169 for (int m = from_y; m < to_y; m++) // scan neighborhood in y
170 {
171 double d = 0.f;
172 for (k = 0; k < W->dim3; k++) // for each feature
173 {
174 double *w1 = kohonen_data_3d(W, i, j, k);
175 double *w2 = kohonen_data_3d(W, l, m, k);
176 d += (w1[0] - w2[0]) * (w1[0] - w2[0]);
177 // distance += w1[0] * w1[0];
178 }
179 distance += sqrt(d);
180 // distance += d;
181 }
182 }
183
184 distance /= R * R; // mean distance from neighbors
185 fprintf(fp, "%.4g", distance); // print the mean separation
186 if (j < W->dim2 - 1) // if not the last column
187 fputc(',', fp); // suffix comma
188 }
189 if (i < W->dim1 - 1) // if not the last row
190 fputc('\n', fp); // start a new line
191 }
192 fclose(fp);
193 return 0;
194}
int dim1
lengths of first dimension
Definition: kohonen_som_topology.c:49
Here is the call graph for this function: