From dbbdb5179079f24a1eca63e3bc60276776297063 Mon Sep 17 00:00:00 2001
From: NomadicPython <8770011+NomadicPython@users.noreply.github.com>
Date: Mon, 28 Apr 2025 18:55:54 +0200
Subject: [PATCH 1/5] add self.save_data()

---
 community.py | 45 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/community.py b/community.py
index fec0d27..644234d 100644
--- a/community.py
+++ b/community.py
@@ -72,6 +72,7 @@ def load_data(self, data_path: str | None = None) -> None:
     def create_data(self, num_species: int, num_resources: int) -> None:
         """
         Generate experimental setup for a new experiment.
+        TODO: Refactor using save_data, BUG: metabolic matrices
 
         :param num_species: Number of species in the system.
         :param num_resources: Number of resources in the system.
@@ -129,6 +130,50 @@ def create_data(self, num_species: int, num_resources: int) -> None:
         }
         with open(os.path.join(data_path, "parameters.json"), "w") as f:
             json.dump(param_dict, f, indent=4)
+        
+    def save_data(self, path):
+        """
+        Save the community's data to the specified path.
+
+        :param path: Path to the directory where data will be saved.
+        """
+        # Ensure the directory exists
+        os.makedirs(path, exist_ok=True)
+
+        # Convert numpy arrays in params to lists for JSON serialization
+        serializable_params = {
+            key: (value.tolist() if isinstance(value, np.ndarray) else value)
+            for key, value in self.params.items()
+        }
+
+        # Store parameter dict in JSON
+        with open(os.path.join(path, "parameters.json"), "w") as f:
+            json.dump(serializable_params, f, indent=4)
+
+        # Create the metabolic csv
+        metabolic_matrices = pd.DataFrame(
+            np.concatenate(self.D),
+            index=np.repeat(self.species_names, len(self.resource_names)),
+            columns=self.resource_names,
+        )
+        metabolic_matrices["resource"] = self.resource_names * len(self.species_names)
+        metabolic_matrices.to_csv(os.path.join(path, "metabolic_matrices.csv"))
+
+        # Create the consumer_preference csv
+        consumer_preference = pd.DataFrame(
+            self.C,
+            index=self.species_names,
+            columns=self.resource_names,
+        )
+        consumer_preference.to_csv(os.path.join(path, "consumer_preference.csv"))
+
+        # Create the leakage coefficients csv
+        leakage_coefficients = pd.DataFrame(
+            self.l,
+            index=self.species_names,
+            columns=self.resource_names,
+        )
+        leakage_coefficients.to_csv(os.path.join(path, "leakage_coefficients.csv"))
 
     def __str__(self) -> str:
         """Prints the community object with the data loaded"""

From 753b8cccfb5e7d9e1c134ac6355c9643c6cddf39 Mon Sep 17 00:00:00 2001
From: NomadicPython <8770011+NomadicPython@users.noreply.github.com>
Date: Mon, 28 Apr 2025 19:09:53 +0200
Subject: [PATCH 2/5] added dirichlet sampling for metabolic conversion matrix

---
 utilities.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utilities.py b/utilities.py
index 310a9b1..ceffd88 100644
--- a/utilities.py
+++ b/utilities.py
@@ -78,5 +78,5 @@ def extract_d_matrices(combined_metabolic_csv: str) -> np.ndarray:
     """
     data = pd.read_csv(combined_metabolic_csv, header=0, index_col=0)
     species_list = data.index.unique()
-    D = [data.loc[species].set_index("resource") for species in species_list]
+    D = [create_d_matrix(data.loc[species].set_index("resource")) for species in species_list]
     return np.array(D)

From 3884ff0baa812e9f4ac306fb3703c7ed1903bebf Mon Sep 17 00:00:00 2001
From: NomadicPython <8770011+NomadicPython@users.noreply.github.com>
Date: Wed, 30 Apr 2025 17:13:36 +0200
Subject: [PATCH 3/5] add randomize argument in self.load_data() to optionally
 load data without any random sampling and reproduce results

---
 community.py | 14 ++++++++++----
 utilities.py | 12 ++++++++++--
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/community.py b/community.py
index 644234d..cf394fe 100644
--- a/community.py
+++ b/community.py
@@ -30,7 +30,7 @@ def __init__(self, experiment_name: str):
         "Intialize the community with the data that is same for an experiment"
         self.exp = experiment_name
 
-    def load_data(self, data_path: str | None = None) -> None:
+    def load_data(self, data_path: str | None = None, randomize: bool = True) -> None:
         """
         Load experimental data from the specified path.
 
@@ -51,17 +51,23 @@ def load_data(self, data_path: str | None = None) -> None:
                 self.params[key] = np.array(value)
 
         # load consumer preference
+        if randomize:
+            cv = self.params["cv"]
+        else:
+            cv = 0
         self.C, self.species_names, self.resource_names = create_c_matrix(
             pd.read_csv(
                 os.path.join(data_path, "consumer_preference.csv"),
                 index_col=0,
                 header=0,
             ),
-            cv=self.params["cv"],
+            cv=cv,
         )
 
         # load metabolic matrices
-        self.D = extract_d_matrices(os.path.join(data_path, "metabolic_matrices.csv"))
+        self.D = extract_d_matrices(
+            os.path.join(data_path, "metabolic_matrices.csv"), use_dirichlet=randomize
+        )
 
         # load leakage coefficients
         self.l = pd.read_csv(
@@ -130,7 +136,7 @@ def create_data(self, num_species: int, num_resources: int) -> None:
         }
         with open(os.path.join(data_path, "parameters.json"), "w") as f:
             json.dump(param_dict, f, indent=4)
-        
+
     def save_data(self, path):
         """
         Save the community's data to the specified path.
diff --git a/utilities.py b/utilities.py
index ceffd88..463f180 100644
--- a/utilities.py
+++ b/utilities.py
@@ -70,7 +70,9 @@ def create_d_matrix(metabolic_df: pd.DataFrame) -> np.ndarray:
     )
 
 
-def extract_d_matrices(combined_metabolic_csv: str) -> np.ndarray:
+def extract_d_matrices(
+    combined_metabolic_csv: str, use_dirichlet: bool = True
+) -> np.ndarray:
     """Extracts species specific DF from a single csv file
 
     :param combined_metabolic_csv: path to the csv file
@@ -78,5 +80,11 @@ def extract_d_matrices(combined_metabolic_csv: str) -> np.ndarray:
     """
     data = pd.read_csv(combined_metabolic_csv, header=0, index_col=0)
     species_list = data.index.unique()
-    D = [create_d_matrix(data.loc[species].set_index("resource")) for species in species_list]
+    if use_dirichlet:
+        D = [
+            create_d_matrix(data.loc[species].set_index("resource"))
+            for species in species_list
+        ]
+    else:
+        D = [data.loc[species].set_index("resource") for species in species_list]
     return np.array(D)

From 45f81b9e39c3eaf298bed3354feb38ce6c0f2dd1 Mon Sep 17 00:00:00 2001
From: NomadicPython <8770011+NomadicPython@users.noreply.github.com>
Date: Thu, 1 May 2025 19:26:37 +0200
Subject: [PATCH 4/5] add optimized_integrate() and next_experiment_path()

---
 community.py | 32 ++++++++++++++++++++++++++++++++
 utilities.py | 24 ++++++++++++++++++++++--
 2 files changed, 54 insertions(+), 2 deletions(-)

diff --git a/community.py b/community.py
index cf394fe..9e6865f 100644
--- a/community.py
+++ b/community.py
@@ -35,6 +35,7 @@ def load_data(self, data_path: str | None = None, randomize: bool = True) -> Non
         Load experimental data from the specified path.
 
         :param data_path: Path to the data folder (optional, defaults to the experiment's data folder if None).
+        :param randomize: If true, consumer preference and metabolic matrices use random sampling from existing data.
         """
         if data_path == None:
             data_path = os.path.abspath(
@@ -382,3 +383,34 @@ def integrate(
             args=(self.C, self.D, self.l, self.params),
             **kwargs,
         )
+
+    def optimized_integrate(
+        self,
+        time: int | float,
+        y0: np.ndarray | None = None,
+        time_step: int = 1000,
+        threshold: float = 10 ** (-4),
+        **kwargs,
+    ) -> scipy.integrate._ivp.ivp.OdeResult:
+        """
+        Numerically integrate the community over the provided timespan using consumer-resource dynamics.
+
+        :param time: Time duration for integration.
+        :param y0: Initial state vector of species and resources (optional).
+        :param kwargs: Additional arguments passed to scipy.integrate.solve_ivp.
+        :return: Integration result as a scipy.integrate._ivp.ivp.OdeResult object.
+        """
+        # set initial concentrations of species and resources if not provided
+        if y0 is None:
+            y0 = np.concatenate((self.params["N0"], self.params["R0"]))
+        num_steps, leftover_time = time // time_step, time % time_step
+        # simulate in batches of time_step duration
+        for i in range(num_steps):
+            sol = self.integrate(time_step, y0, **kwargs)
+            y0 = sol.y[:, -1]
+            y0[: len(self.species_names)] = y0[: len(self.species_names)] * (
+                y0[: len(self.species_names)] > threshold
+            )
+        # simulate for any remaining time
+        sol = self.integrate(leftover_time, y0, **kwargs)
+        return sol
diff --git a/utilities.py b/utilities.py
index 463f180..d26ee11 100644
--- a/utilities.py
+++ b/utilities.py
@@ -8,6 +8,7 @@
 """
 import numpy as np
 import pandas as pd
+import os
 
 
 def load_data(csv_file: str) -> pd.DataFrame:
@@ -42,9 +43,10 @@ def create_c_matrix(
 ) -> np.ndarray:
     """Generate a stochastic consumer preference matrix from template
 
-    :param consumer_df: pandas dataframe with integer values for consumer preferences
-                        serve as the mean for the randomly sampled value.
+    :param consumer_pref: pandas dataframe with integer values for consumer preferences
+                          serve as the mean for the randomly sampled value.
     :param cv: A single coefficient of variation used for random sampling
+    :param replacement_type: 'perturbation' (default) | 'minimum', determines how zeros are replaced
     :return c_matrix: numpy matrix
     """
     species_names = consumer_pref.index.to_list()
@@ -88,3 +90,21 @@ def extract_d_matrices(
     else:
         D = [data.loc[species].set_index("resource") for species in species_list]
     return np.array(D)
+
+
+def next_experiment_path(log_folder: str) -> str:
+    """Finds the next experiment number in the log folder
+
+    :param log_folder: path to the log folder
+    :return: path to the next experiment folder
+    """
+    subdirs = [
+        subdir
+        for subdir in os.listdir(log_folder)
+        if os.path.isdir(os.path.join(log_folder, subdir))
+    ]
+    if not subdirs:
+        return os.path.join(log_folder, "0001")
+    else:
+        last_experiment = int(sorted(subdirs)[-1])
+        return os.path.join(log_folder, str(last_experiment + 1).zfill(4))

From e3c33439543d08e025e8ada24758c081aed63e63 Mon Sep 17 00:00:00 2001
From: NomadicPython <8770011+NomadicPython@users.noreply.github.com>
Date: Mon, 12 May 2025 12:37:10 +0200
Subject: [PATCH 5/5] early steady state detection

---
 community.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/community.py b/community.py
index 9e6865f..261ca35 100644
--- a/community.py
+++ b/community.py
@@ -394,6 +394,7 @@ def optimized_integrate(
     ) -> scipy.integrate._ivp.ivp.OdeResult:
         """
         Numerically integrate the community over the provided timespan using consumer-resource dynamics.
+        Sets species with really low presence to 0.
 
         :param time: Time duration for integration.
         :param y0: Initial state vector of species and resources (optional).
@@ -406,11 +407,16 @@ def optimized_integrate(
         num_steps, leftover_time = time // time_step, time % time_step
         # simulate in batches of time_step duration
         for i in range(num_steps):
+            prev_y0 = y0
             sol = self.integrate(time_step, y0, **kwargs)
             y0 = sol.y[:, -1]
+            # set species with low presence to 0
             y0[: len(self.species_names)] = y0[: len(self.species_names)] * (
                 y0[: len(self.species_names)] > threshold
             )
+            # if the solution has not changed, break the loop
+            if (np.round(prev_y0, 4) == np.round(y0, 4)).all():
+                break
         # simulate for any remaining time
         sol = self.integrate(leftover_time, y0, **kwargs)
         return sol