Collaboration diagram for K-Means Clustering Algorithm:

Data Structures
struct	observation

struct	cluster

Typedefs
typedef struct observation	observation

typedef struct cluster	cluster

Functions
int	calculateNearst (observation *o, cluster clusters[], int k)

void	calculateCentroid (observation observations[], size_t size, cluster *centroid)

cluster *	kMeans (observation observations[], size_t size, int k)

Detailed Description

Function Documentation

◆ calculateCentroid()

void calculateCentroid	(	observation	observations[],
		size_t	size,
		cluster *	centroid
	)

Calculate centoid and assign it to the cluster variable

Parameters

observations	an array of observations whose centroid is calculated
size	size of the observations array
centroid	a reference to cluster object to store information of centroid

{
    size_t i = 0;
    centroid->x = 0;
    centroid->y = 0;
    centroid->count = size;
    for (; i < size; i++)
    {
        centroid->x += observations[i].x;
        centroid->y += observations[i].y;
        observations[i].group = 0;
    }
    centroid->x /= centroid->count;
    centroid->y /= centroid->count;
}

◆ calculateNearst()

int calculateNearst	(	observation *	o,
		cluster	clusters[],
		int	k
	)

Returns the index of centroid nearest to given observation

Parameters

o	observation
clusters	array of cluster having centroids coordinates
k	size of clusters array

Returns: the index of nearest centroid for given observation

{
    double minD = DBL_MAX;
    double dist = 0;
    int index = -1;
    int i = 0;
    for (; i < k; i++)
    {
        /* Calculate Squared Distance*/
        dist = (clusters[i].x - o->x) * (clusters[i].x - o->x) +
               (clusters[i].y - o->y) * (clusters[i].y - o->y);
        if (dist < minD)
        {
            minD = dist;
            index = i;
        }
    }
    return index;
}

◆ kMeans()

cluster * kMeans	(	observation	observations[],
		size_t	size,
		int	k
	)

–K Means Algorithm–

Assign each observation to one of k groups creating a random initial clustering
Find the centroid of observations for each cluster to form new centroids
Find the centroid which is nearest for each observation among the calculated centroids
Assign the observation to its nearest centroid to create a new clustering.
Repeat step 2,3,4 until there is no change the current clustering and is same as last clustering.

Parameters

observations	an array of observations to cluster
size	size of observations array
k	no of clusters to be made

Returns: pointer to cluster object

{
    cluster* clusters = NULL;
    if (k <= 1)
    {
        /*
        If we have to cluster them only in one group
        then calculate centroid of observations and
        that will be a ingle cluster
        */
        clusters = (cluster*)malloc(sizeof(cluster));
        memset(clusters, 0, sizeof(cluster));
        calculateCentroid(observations, size, clusters);
    }
    else if (k < size)
    {
        clusters = malloc(sizeof(cluster) * k);
        memset(clusters, 0, k * sizeof(cluster));
        /* STEP 1 */
        for (size_t j = 0; j < size; j++)
        {
            observations[j].group = rand() % k;
        }
        size_t changed = 0;
        size_t minAcceptedError =
            size /
            10000;  // Do until 99.99 percent points are in correct cluster
        int t = 0;
        do
        {
            /* Initialize clusters */
            for (int i = 0; i < k; i++)
            {
                clusters[i].x = 0;
                clusters[i].y = 0;
                clusters[i].count = 0;
            }
            /* STEP 2*/
            for (size_t j = 0; j < size; j++)
            {
                t = observations[j].group;
                clusters[t].x += observations[j].x;
                clusters[t].y += observations[j].y;
                clusters[t].count++;
            }
            for (int i = 0; i < k; i++)
            {
                clusters[i].x /= clusters[i].count;
                clusters[i].y /= clusters[i].count;
            }
            /* STEP 3 and 4 */
            changed = 0;  // this variable stores change in clustering
            for (size_t j = 0; j < size; j++)
            {
                t = calculateNearst(observations + j, clusters, k);
                if (t != observations[j].group)
                {
                    changed++;
                    observations[j].group = t;
                }
            }
        } while (changed > minAcceptedError);  // Keep on grouping until we have
                                               // got almost best clustering
    }
    else
    {
        /* If no of clusters is more than observations
           each observation can be its own cluster
        */
        clusters = (cluster*)malloc(sizeof(cluster) * k);
        memset(clusters, 0, k * sizeof(cluster));
        for (int j = 0; j < size; j++)
        {
            clusters[j].x = observations[j].x;
            clusters[j].y = observations[j].y;
            clusters[j].count = 1;
            observations[j].group = j;
        }
    }
    return clusters;
}

Here is the call graph for this function:

Data Structures

Typedefs

Functions

Detailed Description

Function Documentation

◆ calculateCentroid()

◆ calculateNearst()

◆ kMeans()