Algorithms_in_C 1.0.0
Set of algorithms implemented in C.
Loading...
Searching...
No Matches
k_means_clustering.c File Reference

K Means Clustering Algorithm implemented. More...

#include <float.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
Include dependency graph for k_means_clustering.c:

Data Structures

struct  observation
 
struct  cluster
 

Macros

#define _USE_MATH_DEFINES   /* required for MS Visual C */
 

Typedefs

typedef struct observation observation
 
typedef struct cluster cluster
 

Functions

int calculateNearst (observation *o, cluster clusters[], int k)
 
void calculateCentroid (observation observations[], size_t size, cluster *centroid)
 
clusterkMeans (observation observations[], size_t size, int k)
 
void printEPS (observation pts[], size_t len, cluster cent[], int k)
 
static void test ()
 
void test2 ()
 
int main ()
 

Detailed Description

K Means Clustering Algorithm implemented.

This file has K Means algorithm implemmented It prints test output in eps format

Note: Though the code for clustering works for all the 2D data points and can be extended for any size vector by making the required changes, but note that the output method i.e. printEPS is only good for polar data points i.e. in a circle and both test use the same.

Author
Lakhan Nad

Function Documentation

◆ main()

int main ( void  )

This function calls the test function

385{
386 srand(time(NULL));
387 test();
388 /* test2(); */
389 return 0;
390}
static void test()
Definition: k_means_clustering.c:321
Here is the call graph for this function:

◆ printEPS()

void printEPS ( observation  pts[],
size_t  len,
cluster  cent[],
int  k 
)

A function to print observations and clusters The code is taken from http://rosettacode.org/wiki/K-means%2B%2B_clustering. Even the K Means code is also inspired from it

Note
To print in a file use pipeline operator
./k_means_clustering > image.eps
Parameters
observationsobservations array
lensize of observation array
centclusters centroid's array
ksize of cent array
238{
239 int W = 400, H = 400;
240 double min_x = DBL_MAX, max_x = DBL_MIN, min_y = DBL_MAX, max_y = DBL_MIN;
241 double scale = 0, cx = 0, cy = 0;
242 double* colors = (double*)malloc(sizeof(double) * (k * 3));
243 int i;
244 size_t j;
245 double kd = k * 1.0;
246 for (i = 0; i < k; i++)
247 {
248 *(colors + 3 * i) = (3 * (i + 1) % k) / kd;
249 *(colors + 3 * i + 1) = (7 * i % k) / kd;
250 *(colors + 3 * i + 2) = (9 * i % k) / kd;
251 }
252
253 for (j = 0; j < len; j++)
254 {
255 if (max_x < pts[j].x)
256 {
257 max_x = pts[j].x;
258 }
259 if (min_x > pts[j].x)
260 {
261 min_x = pts[j].x;
262 }
263 if (max_y < pts[j].y)
264 {
265 max_y = pts[j].y;
266 }
267 if (min_y > pts[j].y)
268 {
269 min_y = pts[j].y;
270 }
271 }
272 scale = W / (max_x - min_x);
273 if (scale > (H / (max_y - min_y)))
274 {
275 scale = H / (max_y - min_y);
276 };
277 cx = (max_x + min_x) / 2;
278 cy = (max_y + min_y) / 2;
279
280 printf("%%!PS-Adobe-3.0 EPSF-3.0\n%%%%BoundingBox: -5 -5 %d %d\n", W + 10,
281 H + 10);
282 printf(
283 "/l {rlineto} def /m {rmoveto} def\n"
284 "/c { .25 sub exch .25 sub exch .5 0 360 arc fill } def\n"
285 "/s { moveto -2 0 m 2 2 l 2 -2 l -2 -2 l closepath "
286 " gsave 1 setgray fill grestore gsave 3 setlinewidth"
287 " 1 setgray stroke grestore 0 setgray stroke }def\n");
288 for (int i = 0; i < k; i++)
289 {
290 printf("%g %g %g setrgbcolor\n", *(colors + 3 * i),
291 *(colors + 3 * i + 1), *(colors + 3 * i + 2));
292 for (j = 0; j < len; j++)
293 {
294 if (pts[j].group != i)
295 {
296 continue;
297 }
298 printf("%.3f %.3f c\n", (pts[j].x - cx) * scale + W / 2,
299 (pts[j].y - cy) * scale + H / 2);
300 }
301 printf("\n0 setgray %g %g s\n", (cent[i].x - cx) * scale + W / 2,
302 (cent[i].y - cy) * scale + H / 2);
303 }
304 printf("\n%%%%EOF");
305
306 // free accquired memory
307 free(colors);
308}
#define malloc(bytes)
This macro replace the standard malloc function with malloc_dbg.
Definition: malloc_dbg.h:18
#define free(ptr)
This macro replace the standard free function with free_dbg.
Definition: malloc_dbg.h:26
double x
abscissa of 2D data point
Definition: k_means_clustering.c:40
double y
ordinate of 2D data point
Definition: k_means_clustering.c:41

◆ test()

static void test ( void  )
static

A function to test the kMeans function Generates 100000 points in a circle of radius 20.0 with center at (0,0) and cluster them into 5 clusters

Output for 100000 points divided in 5 clusters

Returns
None
322{
323 size_t size = 100000L;
324 observation* observations =
325 (observation*)malloc(sizeof(observation) * size);
326 double maxRadius = 20.00;
327 double radius = 0;
328 double ang = 0;
329 size_t i = 0;
330 for (; i < size; i++)
331 {
332 radius = maxRadius * ((double)rand() / RAND_MAX);
333 ang = 2 * M_PI * ((double)rand() / RAND_MAX);
334 observations[i].x = radius * cos(ang);
335 observations[i].y = radius * sin(ang);
336 }
337 int k = 5; // No of clusters
338 cluster* clusters = kMeans(observations, size, k);
339 printEPS(observations, size, clusters, k);
340 // Free the accquired memory
341 free(observations);
342 free(clusters);
343}
cluster * kMeans(observation observations[], size_t size, int k)
Definition: k_means_clustering.c:134
void printEPS(observation pts[], size_t len, cluster cent[], int k)
Definition: k_means_clustering.c:237
Definition: k_means_clustering.c:53
Definition: k_means_clustering.c:39
Here is the call graph for this function:

◆ test2()

void test2 ( )

A function to test the kMeans function Generates 1000000 points in a circle of radius 20.0 with center at (0,0) and cluster them into 11 clusters

Output for 1000000 points divided in 11 clusters

Returns
None
357{
358 size_t size = 1000000L;
359 observation* observations =
360 (observation*)malloc(sizeof(observation) * size);
361 double maxRadius = 20.00;
362 double radius = 0;
363 double ang = 0;
364 size_t i = 0;
365 for (; i < size; i++)
366 {
367 radius = maxRadius * ((double)rand() / RAND_MAX);
368 ang = 2 * M_PI * ((double)rand() / RAND_MAX);
369 observations[i].x = radius * cos(ang);
370 observations[i].y = radius * sin(ang);
371 }
372 int k = 11; // No of clusters
373 cluster* clusters = kMeans(observations, size, k);
374 printEPS(observations, size, clusters, k);
375 // Free the accquired memory
376 free(observations);
377 free(clusters);
378}
Here is the call graph for this function: