stephencox · stephencox · May 20, 2025 · May 20, 2025 · May 24, 2025 · May 24, 2025
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -30,4 +30,4 @@ target_link_libraries(pearl-example pearl m)
 add_test(NAME test COMMAND pearl-test)
 
 set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
-add_compile_options(-O2)
+add_compile_options(-O3 -Wall -Wextra -pedantic)
diff --git a/src/pearl_layer.c b/src/pearl_layer.c
@@ -1,5 +1,8 @@
 #include <pearl_layer.h>
 #include <omp.h>
+#include <stdlib.h> // For rand(), RAND_MAX
+#include <time.h>   // For time() in srand comment
+#include <assert.h> // For assert()
 
 pearl_layer *pearl_layer_create()
 {
@@ -91,6 +94,59 @@ void pearl_layer_destroy(pearl_layer **layer)
     }
 }
 
+void pearl_layer_backward_dropout(pearl_layer **child_layer, pearl_layer **parent_layer)
+{
+    pearl_layer_data_dropout *dropout_data = (pearl_layer_data_dropout *)(*child_layer)->layer_data;
+    pearl_tensor *current_layer_da = (*child_layer)->da;
+
+    assert(dropout_data != NULL);
+    assert(dropout_data->weights != NULL);
+    assert(current_layer_da != NULL);
+
+    if ((*parent_layer)->da == NULL) {
+        if (current_layer_da->dimension == 1) {
+            (*parent_layer)->da = pearl_tensor_create(1, current_layer_da->size[0]);
+        } else if (current_layer_da->dimension == 2) {
+            (*parent_layer)->da = pearl_tensor_create(2, current_layer_da->size[0], current_layer_da->size[1]);
+        }
+    } else {
+        assert((*parent_layer)->da->dimension == current_layer_da->dimension);
+        assert((*parent_layer)->da->size[0] == current_layer_da->size[0]);
+        if (current_layer_da->dimension == 2) {
+            assert((*parent_layer)->da->size[1] == current_layer_da->size[1]);
+        }
+    }
+
+    assert(dropout_data->weights->dimension == current_layer_da->dimension);
+    assert(dropout_data->weights->size[0] == current_layer_da->size[0]);
+    if (current_layer_da->dimension == 2) {
+        assert(dropout_data->weights->size[1] == current_layer_da->size[1]);
+    }
+
+    float rate = dropout_data->rate;
+    float scale;
+    if (rate >= 1.0f - 1e-9f) { // Treat rate as 1.0 if it's very close
+        scale = 0.0f;
+    } else if (rate < 0.0f) { // Invalid rate
+        // Handle error or assert, for now, let's assume valid range or clamp
+        assert(rate >= 0.0f && rate < 1.0f); // This will fail if rate < 0
+        scale = 1.0f; // Default or error scale
+    }
+    else {
+        scale = 1.0f / (1.0f - rate);
+    }
+
+
+    unsigned int total_elements = 1;
+    for (unsigned int i = 0; i < current_layer_da->dimension; i++) {
+        total_elements *= current_layer_da->size[i];
+    }
+
+    for (unsigned int i = 0; i < total_elements; i++) {
+        (*parent_layer)->da->data[i] = current_layer_da->data[i] * dropout_data->weights->data[i] * scale;
+    }
+}
+
 PEARL_API void pearl_layer_add_child(pearl_layer **parent, pearl_layer **child)
 {
     if (*parent != NULL) {
@@ -170,24 +226,99 @@ pearl_layer *pearl_layer_create_dropout(unsigned int num_neurons)
     layer->num_neurons = num_neurons;
     pearl_layer_data_dropout *data = calloc(1, sizeof(pearl_layer_data_dropout));
     data->rate = 0.5f;
-    data->weights = pearl_tensor_create(1, layer->num_neurons);
+    data->weights = NULL;
     layer->layer_data = data;
     return layer;
 }
 
-void pearl_layer_forward(pearl_layer **parent_layer, pearl_layer **child_layer)
+void pearl_layer_forward_dropout(pearl_layer **parent_layer, pearl_layer **child_layer, bool is_training) // Add is_training
+{
+    pearl_tensor *input_a = (*parent_layer)->a;
+    pearl_layer_data_dropout *dropout_data = (pearl_layer_data_dropout *)(*child_layer)->layer_data;
+
+    assert(input_a != NULL);
+    assert(dropout_data != NULL);
+
+    // Manage child's activation tensor (*child_layer)->a
+    if ((*child_layer)->a == NULL) {
+        if (input_a->dimension == 1) {
+            (*child_layer)->a = pearl_tensor_create(1, input_a->size[0]);
+        } else if (input_a->dimension == 2) {
+            (*child_layer)->a = pearl_tensor_create(2, input_a->size[0], input_a->size[1]);
+        }
+    } else {
+        assert((*child_layer)->a->dimension == input_a->dimension);
+        assert((*child_layer)->a->size[0] == input_a->size[0]);
+        if (input_a->dimension == 2) {
+            assert((*child_layer)->a->size[1] == input_a->size[1]);
+        }
+    }
+
+    // Manage dropout mask tensor dropout_data->weights
+    if (dropout_data->weights == NULL) {
+        if (input_a->dimension == 1) {
+            dropout_data->weights = pearl_tensor_create(1, input_a->size[0]);
+        } else if (input_a->dimension == 2) {
+            dropout_data->weights = pearl_tensor_create(2, input_a->size[0], input_a->size[1]);
+        }
+    } else {
+        assert(dropout_data->weights->dimension == input_a->dimension);
+        assert(dropout_data->weights->size[0] == input_a->size[0]);
+        if (input_a->dimension == 2) {
+            assert(dropout_data->weights->size[1] == input_a->size[1]);
+        }
+    }
+
+    float rate = dropout_data->rate;
+    // Assert 0.0 <= rate < 1.0 for typical operation.
+    // If rate is exactly 1.0, all neurons are dropped. If 0.0, no neurons are dropped.
+    assert(rate >= 0.0f && rate < 1.0f);
+
+
+    float scale = 1.0f / (1.0f - rate);
+    if (rate >= 1.0f - 1e-9f) { // Handle rate very close or equal to 1.0
+        scale = 0.0f;
+    }
+
+
+    unsigned int total_elements = 1;
+    for (unsigned int i = 0; i < input_a->dimension; i++) {
+        total_elements *= input_a->size[i];
+    }
+
+    // Note: For proper operation, ensure that rand() is seeded once at program start, e.g., with srand(time(NULL)).
+    if (is_training) {
+        for (unsigned int i = 0; i < total_elements; i++) {
+            float random_val = (float)rand() / RAND_MAX;
+            if (random_val < rate) {
+                dropout_data->weights->data[i] = 0.0f;
+            } else {
+                dropout_data->weights->data[i] = 1.0f;
+            }
+            (*child_layer)->a->data[i] = input_a->data[i] * dropout_data->weights->data[i] * scale;
+        }
+    } else {
+        // Not training, just copy data and do not apply dropout mask or scaling
+        for (unsigned int i = 0; i < total_elements; i++) {
+            (*child_layer)->a->data[i] = input_a->data[i];
+        }
+    }
+}
+
+void pearl_layer_forward(pearl_layer **parent_layer, pearl_layer **child_layer, bool is_training) // Add is_training
 {
     switch ((*child_layer)->type) {
         case pearl_layer_type_input:
             break;
         case pearl_layer_type_fully_connected:
             pearl_layer_forward_fully_connected(parent_layer, child_layer);
             break;
-        case pearl_layer_type_dropout:
+        case pearl_layer_type_dropout: // New case
+            pearl_layer_forward_dropout(parent_layer, child_layer, is_training); // Pass is_training
             break;
     }
     for (unsigned int i = 0; i < (*child_layer)->num_child_layers; i++) {
-        pearl_layer_forward(child_layer, &(*child_layer)->child_layers[i]);
+        pearl_layer_forward(child_layer, &(*child_layer)->child_layers[i], is_training); // Pass is_training
     }
 }
 
@@ -243,7 +374,8 @@ void pearl_layer_backward(pearl_layer **child_layer, pearl_layer **parent_layer)
         case pearl_layer_type_fully_connected:
             pearl_layer_backward_fully_connected(child_layer, parent_layer);
             break;
-        case pearl_layer_type_dropout:
+        case pearl_layer_type_dropout: // New case
+            pearl_layer_backward_dropout(child_layer, parent_layer);
             break;
     }
     for (unsigned int i = 0; i < (*parent_layer)->num_parent_layers; i++) {
@@ -357,6 +489,8 @@ void pearl_layer_update_fully_connected(pearl_layer **child_layer, float learnin
     assert(data->weights->dimension == 2);
     assert(data->weights->size[0] == data->dw->size[0]);
     assert(data->weights->size[1] == data->dw->size[1]);
+
+    #pragma omp parallel for // Add this line
     for (unsigned int i = 0; i < data->weights->size[0]; i++) {
         for (unsigned int j = 0; j < data->weights->size[1]; j++) {
             data->weights->data[ARRAY_IDX_2D(i, j, data->weights->size[1])] -= learning_rate * data->dw->data[ARRAY_IDX_2D(i, j, data->dw->size[1])];

diff --git a/src/pearl_layer.h b/src/pearl_layer.h
@@ -2,6 +2,7 @@
 #define PEARL_LAYER_H
 
 #include <stdlib.h>
+#include <stdbool.h>
 #include <math.h>
 #include <pearl_activation.h>
 #include <pearl_tensor.h>
@@ -52,7 +53,7 @@ PEARL_API pearl_layer *pearl_layer_create_input(unsigned int num_neurons);
 PEARL_API pearl_layer *pearl_layer_create_fully_connected(unsigned int num_neurons, unsigned int num_neurons_prev_layer);
 pearl_layer *pearl_layer_create_fully_connected_blank(unsigned int num_neurons);
 PEARL_API pearl_layer *pearl_layer_create_dropout(unsigned int num_neurons);
-void pearl_layer_forward(pearl_layer **parent_layer, pearl_layer **child_layer);
+void pearl_layer_forward(pearl_layer **parent_layer, pearl_layer **child_layer, bool is_training);
 void pearl_layer_forward_fully_connected(pearl_layer **parent_layer, pearl_layer **child_layer);
 void pearl_layer_backward(pearl_layer **child_layer, pearl_layer **parent_layer);
 void pearl_layer_backward_fully_connected(pearl_layer **child_layer, pearl_layer **parent_layer);

diff --git a/src/pearl_network.c b/src/pearl_network.c
@@ -11,25 +11,25 @@ PEARL_API pearl_network *pearl_network_create()
     network->version.minor = PEARL_NETWORK_VERSION_MINOR;
     network->version.revision = PEARL_NETWORK_VERSION_REVISION;
     network->input_layer = NULL;
+    network->is_training = true; // Add this line
     return network;
 }
 
 PEARL_API void pearl_network_destroy(pearl_network **network)
 {
     if (*network != NULL) {
         if ((*network)->input_layer != NULL) {
-            pearl_layer_destroy(&(*network)->input_layer);
-            free((*network)->input_layer);
-            (*network)->input_layer = NULL;
+            pearl_layer_destroy(&((*network)->input_layer)); // This will free the layer and set (*network)->input_layer to NULL.
         }
-
-        free(*network);
+        // No need to free (*network)->input_layer again or set it to NULL here.
+        free(*network); // Free the network struct itself
         *network = NULL;
     }
 }
 
 PEARL_API float pearl_network_train_epoch(pearl_network **network, const pearl_tensor *input, const pearl_tensor *output)
 {
+    (*network)->is_training = true; // Set is_training to true for training
     // Forward
     pearl_network_forward(network, input);
 
@@ -65,7 +65,7 @@ void pearl_network_forward(pearl_network **network, const pearl_tensor *input)
 
     /* Recursive forward other layers */
     for (unsigned int i = 0; i < (*network)->input_layer->num_child_layers; i++) {
-        pearl_layer_forward(&(*network)->input_layer, &(*network)->input_layer->child_layers[i]);
+        pearl_layer_forward(&(*network)->input_layer, &(*network)->input_layer->child_layers[i], (*network)->is_training); // Pass is_training
     }
 }
 
@@ -93,6 +93,7 @@ void pearl_network_backward(pearl_network **network, const pearl_tensor *output)
 
 PEARL_API pearl_tensor *pearl_network_calculate(pearl_network **network, const pearl_tensor *input)
 {
+    (*network)->is_training = false; // Set is_training to false for calculation/inference
     pearl_network_forward(network, input);
     pearl_tensor *output = pearl_tensor_copy((*network)->output_layer->a);
     return output;

diff --git a/src/pearl_network.h b/src/pearl_network.h
@@ -2,6 +2,7 @@
 #define PEARL_NETWORK_H
 
 #include <stdlib.h>
+#include <stdbool.h>
 #include <pearl_global.h>
 #include <pearl_layer.h>
 #include <pearl_optimiser.h>
@@ -22,6 +23,7 @@ typedef struct {
     pearl_loss loss;
     float learning_rate;
     pearl_version version;
+    bool is_training;
 } pearl_network;
 
 PEARL_API pearl_network *pearl_network_create();

diff --git a/src/pearl_tensor.c b/src/pearl_tensor.c
@@ -1,4 +1,5 @@
 #include <pearl_tensor.h>
+#include <string.h> // Add this include
 
 PEARL_API pearl_tensor *pearl_tensor_create(const int num_args, ...)
 {
@@ -39,21 +40,22 @@
     pearl_tensor *result = calloc(1, sizeof(pearl_tensor));
     result->dimension = x->dimension;
     result->size = calloc(x->dimension, sizeof(unsigned int));
-    int alloc = 1;
+
+    unsigned int total_elements = 1;
     for (unsigned int i = 0 ; i < x->dimension; i++) {
         result->size[i] = x->size[i];
-        alloc *= x->size[i];
+        total_elements *= x->size[i];
     }
-    result->data = calloc(alloc, sizeof(float));
-
-    unsigned int num_data = 1;
-    for (unsigned int i = 0; i < x->dimension; i++) {
-        num_data *= x->size[i];
+
+    result->data = calloc(total_elements, sizeof(float));
+    if (x->data != NULL && result->data != NULL && total_elements > 0) { // Add checks for safety
+        memcpy(result->data, x->data, total_elements * sizeof(float));
+    } else if (total_elements == 0) {
+        // If total_elements is 0, data might be NULL, which is fine.
+        // If x->data is NULL but total_elements > 0, calloc still provides zeroed memory.
     }
 
-    for (unsigned int i = 0; i < num_data; i++) {
-        result->data[i] = x->data[i];
-    }
+
     return result;
 }