fix documentations

2024-11-28 16:23:10 +03:00 · 2020-08-08 12:22:16 -04:00 · 2020-08-08 12:22:16 -04:00 · 814f9077b7
commit 814f9077b7
parent 05d9af45f3
1 changed files with 243 additions and 198 deletions
--- a/machine_learning/k_means_clustering.c
+++ b/machine_learning/k_means_clustering.c
@ -35,7 +35,8 @@
 *  the name observation is used to denote
 *  a random point in plane
 */
-typedef struct observation {
+typedef struct observation
+{
    double x;  /**< abscissa of 2D data point */
    double y;  /**< ordinate of 2D data point */
    int group; /**< the group no in which this observation would go */
@ -48,13 +49,14 @@ typedef struct observation {
 *  stores the count of observations
 *  belonging to this cluster
 */
-typedef struct cluster {
+typedef struct cluster
+{
    double x;     /**< abscissa centroid of this cluster */
    double y;     /**< ordinate of centroid of this cluster */
    size_t count; /**< count of observations present in this cluster */
 } cluster;

-/*! @fn calculateNearest
+/*!
 * Returns the index of centroid nearest to
 * given observation
 *
@ -64,16 +66,19 @@ typedef struct cluster {
 *
 * @returns the index of nearest centroid for given observation
 */
-int calculateNearst(observation* o, cluster clusters[], int k) {
+int calculateNearst(observation* o, cluster clusters[], int k)
+{
    double minD = DBL_MAX;
    double dist = 0;
    int index = -1;
    int i = 0;
-  for (; i < k; i++) {
+    for (; i < k; i++)
+    {
        /* Calculate Squared Distance*/
        dist = (clusters[i].x - o->x) * (clusters[i].x - o->x) +
               (clusters[i].y - o->y) * (clusters[i].y - o->y);
-    if (dist < minD) {
+        if (dist < minD)
+        {
            minD = dist;
            index = i;
        }
@ -81,7 +86,7 @@ int calculateNearst(observation* o, cluster clusters[], int k) {
    return index;
 }

-/*! @fn calculateCentroid
+/*!
 * Calculate centoid and assign it to the cluster variable
 *
 * @param observations  an array of observations whose centroid is calculated
@ -90,12 +95,14 @@ int calculateNearst(observation* o, cluster clusters[], int k) {
 * centroid
 */
 void calculateCentroid(observation observations[], size_t size,
-                       cluster* centroid) {
+                       cluster* centroid)
+{
    size_t i = 0;
    centroid->x = 0;
    centroid->y = 0;
    centroid->count = size;
-  for (; i < size; i++) {
+    for (; i < size; i++)
+    {
        centroid->x += observations[i].x;
        centroid->y += observations[i].y;
        observations[i].group = 0;
@ -104,7 +111,7 @@ void calculateCentroid(observation observations[], size_t size,
    centroid->y /= centroid->count;
 }

-/*!  @fn kMeans
+/*!
 *    --K Means Algorithm--
 * 1. Assign each observation to one of k groups
 *    creating a random initial clustering
@ -117,15 +124,18 @@ void calculateCentroid(observation observations[], size_t size,
 * 5. Repeat step 2,3,4 until there is no change
 *    the current clustering and is same as last
 *    clustering.
+ *
 * @param observations  an array of observations to cluster
 * @param size  size of observations array
 * @param k  no of clusters to be made
 *
 * @returns pointer to cluster object
 */
-cluster* kMeans(observation observations[], size_t size, int k) {
+cluster* kMeans(observation observations[], size_t size, int k)
+{
    cluster* clusters = NULL;
-  if (k <= 1) {
+    if (k <= 1)
+    {
        /*
        If we have to cluster them only in one group
        then calculate centroid of observations and
@ -134,53 +144,66 @@ cluster* kMeans(observation observations[], size_t size, int k) {
        clusters = (cluster*)malloc(sizeof(cluster));
        memset(clusters, 0, sizeof(cluster));
        calculateCentroid(observations, size, clusters);
-  } else if (k < size) {
+    }
+    else if (k < size)
+    {
        clusters = malloc(sizeof(cluster) * k);
        memset(clusters, 0, k * sizeof(cluster));
        /* STEP 1 */
-    for (size_t j = 0; j < size; j++) {
+        for (size_t j = 0; j < size; j++)
+        {
            observations[j].group = rand() % k;
        }
        size_t changed = 0;
        size_t minAcceptedError =
-        size / 10000;  // Do until 99.99 percent points are in correct cluster
+            size /
+            10000;  // Do until 99.99 percent points are in correct cluster
        int t = 0;
-    do {
+        do
+        {
            /* Initialize clusters */
-      for (int i = 0; i < k; i++) {
+            for (int i = 0; i < k; i++)
+            {
                clusters[i].x = 0;
                clusters[i].y = 0;
                clusters[i].count = 0;
            }
            /* STEP 2*/
-      for (size_t j = 0; j < size; j++) {
+            for (size_t j = 0; j < size; j++)
+            {
                t = observations[j].group;
                clusters[t].x += observations[j].x;
                clusters[t].y += observations[j].y;
                clusters[t].count++;
            }
-      for (int i = 0; i < k; i++) {
+            for (int i = 0; i < k; i++)
+            {
                clusters[i].x /= clusters[i].count;
                clusters[i].y /= clusters[i].count;
            }
            /* STEP 3 and 4 */
            changed = 0;  // this variable stores change in clustering
-      for (size_t j = 0; j < size; j++) {
+            for (size_t j = 0; j < size; j++)
+            {
                t = calculateNearst(observations + j, clusters, k);
-        if (t != observations[j].group) {
+                if (t != observations[j].group)
+                {
                    changed++;
                    observations[j].group = t;
                }
            }
        } while (changed > minAcceptedError);  // Keep on grouping until we have
                                               // got almost best clustering
-  } else {
+    }
+    else
+    {
        /* If no of clusters is more than observations
           each observation can be its own cluster
        */
        clusters = (cluster*)malloc(sizeof(cluster) * k);
        memset(clusters, 0, k * sizeof(cluster));
-    for (int j = 0; j < size; j++) {
+        for (int j = 0; j < size; j++)
+        {
            clusters[j].x = observations[j].x;
            clusters[j].y = observations[j].y;
            clusters[j].count = 1;
@ -195,21 +218,24 @@ cluster* kMeans(observation observations[], size_t size, int k) {
 * @}
 */

-/*! @fn printEPS
+/*!
 * A function to print observations and clusters
 * The code is taken from
- * @link http://rosettacode.org/wiki/K-means%2B%2B_clustering
- * its C implementation
+ * http://rosettacode.org/wiki/K-means%2B%2B_clustering.
 * Even the K Means code is also inspired from it
 *
- * Note: To print in a file use pipeline operator ( ./a.out > image.eps )
+ * @note To print in a file use pipeline operator
+ * ```sh
+ * ./k_means_clustering > image.eps
+ * ```
 *
 * @param observations  observations array
 * @param len  size of observation array
 * @param cent  clusters centroid's array
 * @param k  size of cent array
 */
-void printEPS(observation pts[], size_t len, cluster cent[], int k) {
+void printEPS(observation pts[], size_t len, cluster cent[], int k)
+{
    int W = 400, H = 400;
    double min_x = DBL_MAX, max_x = DBL_MIN, min_y = DBL_MAX, max_y = DBL_MIN;
    double scale = 0, cx = 0, cy = 0;
@ -217,20 +243,27 @@ void printEPS(observation pts[], size_t len, cluster cent[], int k) {
    int i;
    size_t j;
    double kd = k * 1.0;
-  for (i = 0; i < k; i++) {
+    for (i = 0; i < k; i++)
+    {
        *(colors + 3 * i) = (3 * (i + 1) % k) / kd;
        *(colors + 3 * i + 1) = (7 * i % k) / kd;
        *(colors + 3 * i + 2) = (9 * i % k) / kd;
    }

-  for (j = 0; j < len; j++) {
-    if (max_x < pts[j].x) max_x = pts[j].x;
-    if (min_x > pts[j].x) min_x = pts[j].x;
-    if (max_y < pts[j].y) max_y = pts[j].y;
-    if (min_y > pts[j].y) min_y = pts[j].y;
+    for (j = 0; j < len; j++)
+    {
+        if (max_x < pts[j].x)
+            max_x = pts[j].x;
+        if (min_x > pts[j].x)
+            min_x = pts[j].x;
+        if (max_y < pts[j].y)
+            max_y = pts[j].y;
+        if (min_y > pts[j].y)
+            min_y = pts[j].y;
    }
    scale = W / (max_x - min_x);
-  if (scale > (H / (max_y - min_y))) {
+    if (scale > (H / (max_y - min_y)))
+    {
        scale = H / (max_y - min_y);
    };
    cx = (max_x + min_x) / 2;
@ -244,11 +277,14 @@ void printEPS(observation pts[], size_t len, cluster cent[], int k) {
        "/s { moveto -2 0 m 2 2 l 2 -2 l -2 -2 l closepath "
        "	gsave 1 setgray fill grestore gsave 3 setlinewidth"
        " 1 setgray stroke grestore 0 setgray stroke }def\n");
-  for (int i = 0; i < k; i++) {
-    printf("%g %g %g setrgbcolor\n", *(colors + 3 * i), *(colors + 3 * i + 1),
-           *(colors + 3 * i + 2));
-    for (j = 0; j < len; j++) {
-      if (pts[j].group != i) continue;
+    for (int i = 0; i < k; i++)
+    {
+        printf("%g %g %g setrgbcolor\n", *(colors + 3 * i),
+               *(colors + 3 * i + 1), *(colors + 3 * i + 2));
+        for (j = 0; j < len; j++)
+        {
+            if (pts[j].group != i)
+                continue;
            printf("%.3f %.3f c\n", (pts[j].x - cx) * scale + W / 2,
                   (pts[j].y - cy) * scale + H / 2);
        }
@ -261,7 +297,7 @@ void printEPS(observation pts[], size_t len, cluster cent[], int k) {
    free(colors);
 }

-/*! @fn test
+/*!
 * A function to test the kMeans function
 * Generates 100000 points in a circle of
 * radius 20.0 with center at (0,0)
@ -270,15 +306,19 @@ void printEPS(observation pts[], size_t len, cluster cent[], int k) {
 * <img alt="Output for 100000 points divided in 5 clusters" src=
 * "https://raw.githubusercontent.com/TheAlgorithms/C/docs/images/machine_learning/k_means_clustering/kMeansTest1.png"
 * width="400px" heiggt="400px">
+ * @returns None
 */
-static void test() {
+static void test()
+{
    size_t size = 100000L;
-  observation* observations = (observation*)malloc(sizeof(observation) * size);
+    observation* observations =
+        (observation*)malloc(sizeof(observation) * size);
    double maxRadius = 20.00;
    double radius = 0;
    double ang = 0;
    size_t i = 0;
-  for (; i < size; i++) {
+    for (; i < size; i++)
+    {
        radius = maxRadius * ((double)rand() / RAND_MAX);
        ang = 2 * M_PI * ((double)rand() / RAND_MAX);
        observations[i].x = radius * cos(ang);
@ -292,7 +332,7 @@ static void test() {
    free(clusters);
 }

-/*! @fn test2
+/*!
 * A function to test the kMeans function
 * Generates 1000000 points in a circle of
 * radius 20.0 with center at (0,0)
@ -301,15 +341,19 @@ static void test() {
 * <img alt="Output for 1000000 points divided in 11 clusters" src=
 * "https://raw.githubusercontent.com/TheAlgorithms/C/docs/images/machine_learning/k_means_clustering/kMeansTest2.png"
 * width="400px" heiggt="400px">
+ * @returns None
 */
-void test2() {
+void test2()
+{
    size_t size = 1000000L;
-  observation* observations = (observation*)malloc(sizeof(observation) * size);
+    observation* observations =
+        (observation*)malloc(sizeof(observation) * size);
    double maxRadius = 20.00;
    double radius = 0;
    double ang = 0;
    size_t i = 0;
-  for (; i < size; i++) {
+    for (; i < size; i++)
+    {
        radius = maxRadius * ((double)rand() / RAND_MAX);
        ang = 2 * M_PI * ((double)rand() / RAND_MAX);
        observations[i].x = radius * cos(ang);
@ -323,11 +367,12 @@ void test2() {
    free(clusters);
 }

-/*! @fn main
+/*!
 * This function calls the test
 * function
 */
-int main() {
+int main()
+{
    srand(time(NULL));
    test();
    /* test2(); */