From ccc8598613abf11c7e98738d756282f62905b5fe Mon Sep 17 00:00:00 2001 From: bastien-mva <bastien.batardiere@gmail.com> Date: Wed, 26 Jun 2024 11:01:04 +0200 Subject: [PATCH 1/2] add summary on the latent variables. --- pyPLNmodels/microcosm.py | 4 +-- pyPLNmodels/models.py | 54 ++++++++++++++++++++++++++++++++++++++++ tests/test_common.py | 9 +++---- 3 files changed, 60 insertions(+), 7 deletions(-) diff --git a/pyPLNmodels/microcosm.py b/pyPLNmodels/microcosm.py index 7f2575cc..95d63cf4 100644 --- a/pyPLNmodels/microcosm.py +++ b/pyPLNmodels/microcosm.py @@ -26,9 +26,9 @@ def load_microcosm( Parameters ---------- n_samples : int, optional - Number of samples, by default max_samples. + Number of samples, by default 300. dim : int, optional - Dimension, by default max_dim. + Dimension, by default 200. get_affil: bool, optional (keyword-only) If True, will return the affiliations also. Default to False . cov_list: list, optional (keyword-only). diff --git a/pyPLNmodels/models.py b/pyPLNmodels/models.py index a8deabcc..182c5992 100644 --- a/pyPLNmodels/models.py +++ b/pyPLNmodels/models.py @@ -13,6 +13,7 @@ import matplotlib.pyplot as plt from sklearn.decomposition import PCA import matplotlib from scipy import stats +from statsmodels.api import OLS from pyPLNmodels._closed_forms import ( _closed_formula_coef, @@ -459,6 +460,51 @@ class _model(ABC): self._print_end_of_fitting_message(stop_condition, tol) self._fitted = True + def summary( + self, + variable_number, + yname: str = None, + xname: list[str] = None, + title: str = None, + alpha: float = 0.05, + slim: bool = False, + ): + """ + Summary from statsmodels on the latent variables. + + parameters + ---------- + yname : str, Optional + Name of endogenous (response) variable. The Default is y. + xname : str, Optional + Names for the exogenous variables. Default is var_## for ## + in the number of regressors. + Must match the number of parameters in the model. + + title : str, Optional + Title for the top table. If not None, then this replaces the default title. + alpha : float, optional + The significance level for the confidence intervals. + slim: bool, Optional + Flag indicating to produce reduced set or diagnostic information. Default is False. + """ + if self.exog is None: + print("No exog in the model, can not perform a summary.") + else: + ols = self._fit_ols(variable_number) + return ols.summary( + yname=yname, xname=xname, title=title, alpha=alpha, slim=slim + ) + + ## write docstrings on the summary function + + def _fit_ols(self, variable_number): + return OLS( + self.latent_variables.numpy()[:, variable_number], + self.exog.numpy(), + hasconst=True, + ).fit() + @property def fitted(self) -> bool: """ @@ -4577,6 +4623,14 @@ class ZIPln(_model): variables = self.latent_prob return self._viz_variables(variables, colors=colors, ax=ax, show_cov=False) + def _fit_ols(self, variable_number): + latent_variables, _ = self.latent_variables + return OLS( + latent_variables.numpy()[:, variable_number], + self.exog.numpy(), + hasconst=True, + ).fit() + class Brute_ZIPln(ZIPln): @property diff --git a/tests/test_common.py b/tests/test_common.py index f4f05d61..450790df 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -12,10 +12,8 @@ pln_and_plnpca = ["Pln", "PlnPCA"] single_models = ["Pln", "PlnPCA", "ZIPln"] -# @pytest.mark.parametrize("any_model", dict_fixtures["loaded_and_fitted_model"]) -# @filter_models(single_models) -@pytest.mark.parametrize("any_model", dict_fixtures["loaded_model"]) -@filter_models(["ZIPln"]) +@pytest.mark.parametrize("any_model", dict_fixtures["loaded_and_fitted_model"]) +@filter_models(single_models) def test_properties(any_model): assert hasattr(any_model, "latent_parameters") assert hasattr(any_model, "latent_variables") @@ -25,7 +23,7 @@ def test_properties(any_model): @pytest.mark.parametrize("sim_model", dict_fixtures["loaded_and_fitted_model"]) @filter_models(pln_and_plnpca) -def test_predict_simulated(sim_model): +def test_predict_simulated_and_summary(sim_model): if sim_model.nb_cov == 0: assert sim_model.predict() is None with pytest.raises(AttributeError): @@ -35,6 +33,7 @@ def test_predict_simulated(sim_model): prediction = sim_model.predict(X) expected = X @ sim_model.coef assert torch.all(torch.eq(expected, prediction)) + print(sim_model.summary(variable_number=2)) @pytest.mark.parametrize("any_instance_model", dict_fixtures["instances"]) -- GitLab From 2912a069a3cc52b052c85a62b0db8a5339221b5f Mon Sep 17 00:00:00 2001 From: bastien-mva <bastien.batardiere@gmail.com> Date: Wed, 26 Jun 2024 11:07:19 +0200 Subject: [PATCH 2/2] add statsmodels. --- pyproject.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index ae5c5f4f..507915cb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,6 +53,7 @@ dependencies = [ "scikit-learn", "patsy", "tqdm", +"statsmodels", ] [project.optional-dependencies] tests = ["pytest","pytest_lazy_fixture"] @@ -70,5 +71,3 @@ include-package-data = true [tool.setuptools.packages.find] include = ["pyPLNmodels", "pyPLNmodels.*"] namespaces = false - - -- GitLab