Algorithms_in_C  1.0.0
Set of algorithms implemented in C.
k_means_clustering.c File Reference

K Means Clustering Algorithm implemented. More...

#include <float.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
Include dependency graph for k_means_clustering.c:

Data Structures

struct  observation
 
struct  cluster
 

Macros

#define _USE_MATH_DEFINES   /* required for MS Visual C */
 

Typedefs

typedef struct observation observation
 
typedef struct cluster cluster
 

Functions

int calculateNearst (observation *o, cluster clusters[], int k)
 
void calculateCentroid (observation observations[], size_t size, cluster *centroid)
 
clusterkMeans (observation observations[], size_t size, int k)
 
void printEPS (observation pts[], size_t len, cluster cent[], int k)
 
static void test ()
 
void test2 ()
 
int main ()
 

Detailed Description

K Means Clustering Algorithm implemented.

This file has K Means algorithm implemmented It prints test output in eps format

Note: Though the code for clustering works for all the 2D data points and can be extended for any size vector by making the required changes, but note that the output method i.e. printEPS is only good for polar data points i.e. in a circle and both test use the same.

Author
Lakhan Nad

Function Documentation

◆ main()

int main ( void  )

This function calls the test function

385 {
386  srand(time(NULL));
387  test();
388  /* test2(); */
389  return 0;
390 }
static void test()
Definition: k_means_clustering.c:321
Here is the call graph for this function:

◆ printEPS()

void printEPS ( observation  pts[],
size_t  len,
cluster  cent[],
int  k 
)

A function to print observations and clusters The code is taken from http://rosettacode.org/wiki/K-means%2B%2B_clustering. Even the K Means code is also inspired from it

Note
To print in a file use pipeline operator
./k_means_clustering > image.eps
Parameters
observationsobservations array
lensize of observation array
centclusters centroid's array
ksize of cent array
238 {
239  int W = 400, H = 400;
240  double min_x = DBL_MAX, max_x = DBL_MIN, min_y = DBL_MAX, max_y = DBL_MIN;
241  double scale = 0, cx = 0, cy = 0;
242  double* colors = (double*)malloc(sizeof(double) * (k * 3));
243  int i;
244  size_t j;
245  double kd = k * 1.0;
246  for (i = 0; i < k; i++)
247  {
248  *(colors + 3 * i) = (3 * (i + 1) % k) / kd;
249  *(colors + 3 * i + 1) = (7 * i % k) / kd;
250  *(colors + 3 * i + 2) = (9 * i % k) / kd;
251  }
252 
253  for (j = 0; j < len; j++)
254  {
255  if (max_x < pts[j].x)
256  {
257  max_x = pts[j].x;
258  }
259  if (min_x > pts[j].x)
260  {
261  min_x = pts[j].x;
262  }
263  if (max_y < pts[j].y)
264  {
265  max_y = pts[j].y;
266  }
267  if (min_y > pts[j].y)
268  {
269  min_y = pts[j].y;
270  }
271  }
272  scale = W / (max_x - min_x);
273  if (scale > (H / (max_y - min_y)))
274  {
275  scale = H / (max_y - min_y);
276  };
277  cx = (max_x + min_x) / 2;
278  cy = (max_y + min_y) / 2;
279 
280  printf("%%!PS-Adobe-3.0 EPSF-3.0\n%%%%BoundingBox: -5 -5 %d %d\n", W + 10,
281  H + 10);
282  printf(
283  "/l {rlineto} def /m {rmoveto} def\n"
284  "/c { .25 sub exch .25 sub exch .5 0 360 arc fill } def\n"
285  "/s { moveto -2 0 m 2 2 l 2 -2 l -2 -2 l closepath "
286  " gsave 1 setgray fill grestore gsave 3 setlinewidth"
287  " 1 setgray stroke grestore 0 setgray stroke }def\n");
288  for (int i = 0; i < k; i++)
289  {
290  printf("%g %g %g setrgbcolor\n", *(colors + 3 * i),
291  *(colors + 3 * i + 1), *(colors + 3 * i + 2));
292  for (j = 0; j < len; j++)
293  {
294  if (pts[j].group != i)
295  {
296  continue;
297  }
298  printf("%.3f %.3f c\n", (pts[j].x - cx) * scale + W / 2,
299  (pts[j].y - cy) * scale + H / 2);
300  }
301  printf("\n0 setgray %g %g s\n", (cent[i].x - cx) * scale + W / 2,
302  (cent[i].y - cy) * scale + H / 2);
303  }
304  printf("\n%%%%EOF");
305 
306  // free accquired memory
307  free(colors);
308 }
#define malloc(bytes)
This macro replace the standard malloc function with malloc_dbg.
Definition: malloc_dbg.h:18
#define free(ptr)
This macro replace the standard free function with free_dbg.
Definition: malloc_dbg.h:26
double x
abscissa of 2D data point
Definition: k_means_clustering.c:40
double y
ordinate of 2D data point
Definition: k_means_clustering.c:41

◆ test()

static void test ( void  )
static

A function to test the kMeans function Generates 100000 points in a circle of radius 20.0 with center at (0,0) and cluster them into 5 clusters

Output for 100000 points divided in 5 clusters

Returns
None
322 {
323  size_t size = 100000L;
324  observation* observations =
325  (observation*)malloc(sizeof(observation) * size);
326  double maxRadius = 20.00;
327  double radius = 0;
328  double ang = 0;
329  size_t i = 0;
330  for (; i < size; i++)
331  {
332  radius = maxRadius * ((double)rand() / RAND_MAX);
333  ang = 2 * M_PI * ((double)rand() / RAND_MAX);
334  observations[i].x = radius * cos(ang);
335  observations[i].y = radius * sin(ang);
336  }
337  int k = 5; // No of clusters
338  cluster* clusters = kMeans(observations, size, k);
339  printEPS(observations, size, clusters, k);
340  // Free the accquired memory
341  free(observations);
342  free(clusters);
343 }
cluster * kMeans(observation observations[], size_t size, int k)
Definition: k_means_clustering.c:134
void printEPS(observation pts[], size_t len, cluster cent[], int k)
Definition: k_means_clustering.c:237
Definition: k_means_clustering.c:53
Definition: k_means_clustering.c:39
Here is the call graph for this function:

◆ test2()

void test2 ( )

A function to test the kMeans function Generates 1000000 points in a circle of radius 20.0 with center at (0,0) and cluster them into 11 clusters

Output for 1000000 points divided in 11 clusters

Returns
None
357 {
358  size_t size = 1000000L;
359  observation* observations =
360  (observation*)malloc(sizeof(observation) * size);
361  double maxRadius = 20.00;
362  double radius = 0;
363  double ang = 0;
364  size_t i = 0;
365  for (; i < size; i++)
366  {
367  radius = maxRadius * ((double)rand() / RAND_MAX);
368  ang = 2 * M_PI * ((double)rand() / RAND_MAX);
369  observations[i].x = radius * cos(ang);
370  observations[i].y = radius * sin(ang);
371  }
372  int k = 11; // No of clusters
373  cluster* clusters = kMeans(observations, size, k);
374  printEPS(observations, size, clusters, k);
375  // Free the accquired memory
376  free(observations);
377  free(clusters);
378 }
Here is the call graph for this function: