From ccc8598613abf11c7e98738d756282f62905b5fe Mon Sep 17 00:00:00 2001
From: bastien-mva <bastien.batardiere@gmail.com>
Date: Wed, 26 Jun 2024 11:01:04 +0200
Subject: [PATCH 1/2] add summary on the latent variables.

---
 pyPLNmodels/microcosm.py |  4 +--
 pyPLNmodels/models.py    | 54 ++++++++++++++++++++++++++++++++++++++++
 tests/test_common.py     |  9 +++----
 3 files changed, 60 insertions(+), 7 deletions(-)

diff --git a/pyPLNmodels/microcosm.py b/pyPLNmodels/microcosm.py
index 7f2575cc..95d63cf4 100644
--- a/pyPLNmodels/microcosm.py
+++ b/pyPLNmodels/microcosm.py
@@ -26,9 +26,9 @@ def load_microcosm(
     Parameters
     ----------
     n_samples : int, optional
-        Number of samples, by default max_samples.
+        Number of samples, by default 300.
     dim : int, optional
-        Dimension, by default max_dim.
+        Dimension, by default 200.
     get_affil: bool, optional (keyword-only)
         If True, will return the affiliations also. Default to False .
     cov_list: list, optional (keyword-only).
diff --git a/pyPLNmodels/models.py b/pyPLNmodels/models.py
index a8deabcc..182c5992 100644
--- a/pyPLNmodels/models.py
+++ b/pyPLNmodels/models.py
@@ -13,6 +13,7 @@ import matplotlib.pyplot as plt
 from sklearn.decomposition import PCA
 import matplotlib
 from scipy import stats
+from statsmodels.api import OLS
 
 from pyPLNmodels._closed_forms import (
     _closed_formula_coef,
@@ -459,6 +460,51 @@ class _model(ABC):
         self._print_end_of_fitting_message(stop_condition, tol)
         self._fitted = True
 
+    def summary(
+        self,
+        variable_number,
+        yname: str = None,
+        xname: list[str] = None,
+        title: str = None,
+        alpha: float = 0.05,
+        slim: bool = False,
+    ):
+        """
+        Summary from statsmodels on the latent variables.
+
+        parameters
+        ----------
+        yname : str, Optional
+            Name of endogenous (response) variable. The Default is y.
+        xname : str, Optional
+            Names for the exogenous variables. Default is var_## for ##
+            in the number of regressors.
+            Must match the number of parameters in the model.
+
+        title : str, Optional
+            Title for the top table. If not None, then this replaces the default title.
+        alpha : float, optional
+            The significance level for the confidence intervals.
+        slim: bool, Optional
+            Flag indicating to produce reduced set or diagnostic information. Default is False.
+        """
+        if self.exog is None:
+            print("No exog in the model, can not perform a summary.")
+        else:
+            ols = self._fit_ols(variable_number)
+            return ols.summary(
+                yname=yname, xname=xname, title=title, alpha=alpha, slim=slim
+            )
+
+    ## write docstrings on the summary function
+
+    def _fit_ols(self, variable_number):
+        return OLS(
+            self.latent_variables.numpy()[:, variable_number],
+            self.exog.numpy(),
+            hasconst=True,
+        ).fit()
+
     @property
     def fitted(self) -> bool:
         """
@@ -4577,6 +4623,14 @@ class ZIPln(_model):
         variables = self.latent_prob
         return self._viz_variables(variables, colors=colors, ax=ax, show_cov=False)
 
+    def _fit_ols(self, variable_number):
+        latent_variables, _ = self.latent_variables
+        return OLS(
+            latent_variables.numpy()[:, variable_number],
+            self.exog.numpy(),
+            hasconst=True,
+        ).fit()
+
 
 class Brute_ZIPln(ZIPln):
     @property
diff --git a/tests/test_common.py b/tests/test_common.py
index f4f05d61..450790df 100644
--- a/tests/test_common.py
+++ b/tests/test_common.py
@@ -12,10 +12,8 @@ pln_and_plnpca = ["Pln", "PlnPCA"]
 single_models = ["Pln", "PlnPCA", "ZIPln"]
 
 
-# @pytest.mark.parametrize("any_model", dict_fixtures["loaded_and_fitted_model"])
-# @filter_models(single_models)
-@pytest.mark.parametrize("any_model", dict_fixtures["loaded_model"])
-@filter_models(["ZIPln"])
+@pytest.mark.parametrize("any_model", dict_fixtures["loaded_and_fitted_model"])
+@filter_models(single_models)
 def test_properties(any_model):
     assert hasattr(any_model, "latent_parameters")
     assert hasattr(any_model, "latent_variables")
@@ -25,7 +23,7 @@ def test_properties(any_model):
 
 @pytest.mark.parametrize("sim_model", dict_fixtures["loaded_and_fitted_model"])
 @filter_models(pln_and_plnpca)
-def test_predict_simulated(sim_model):
+def test_predict_simulated_and_summary(sim_model):
     if sim_model.nb_cov == 0:
         assert sim_model.predict() is None
         with pytest.raises(AttributeError):
@@ -35,6 +33,7 @@ def test_predict_simulated(sim_model):
         prediction = sim_model.predict(X)
         expected = X @ sim_model.coef
         assert torch.all(torch.eq(expected, prediction))
+    print(sim_model.summary(variable_number=2))
 
 
 @pytest.mark.parametrize("any_instance_model", dict_fixtures["instances"])
-- 
GitLab


From 2912a069a3cc52b052c85a62b0db8a5339221b5f Mon Sep 17 00:00:00 2001
From: bastien-mva <bastien.batardiere@gmail.com>
Date: Wed, 26 Jun 2024 11:07:19 +0200
Subject: [PATCH 2/2] add statsmodels.

---
 pyproject.toml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index ae5c5f4f..507915cb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -53,6 +53,7 @@ dependencies = [
 "scikit-learn",
 "patsy",
 "tqdm",
+"statsmodels",
 ]
 [project.optional-dependencies]
 tests = ["pytest","pytest_lazy_fixture"]
@@ -70,5 +71,3 @@ include-package-data = true
 [tool.setuptools.packages.find]
 include = ["pyPLNmodels", "pyPLNmodels.*"]
 namespaces = false
-
-
-- 
GitLab