diff --git a/.Rbuildignore b/.Rbuildignore
index a4115536511ff1959906f59fdce2f44be87ee2e1..24bc3f5ea600a4920f8db840be2173a2d68d619a 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -13,3 +13,4 @@ $run_dev.*
 ^Dockerfile$
 ^\.dockerignore$
 ^\.gitlab-ci.yml$
+^header\.R?md$
diff --git a/R/mod_modalTransfo.R b/R/mod_modalTransfo.R
index 6b00032030e584c41dd3b9a6cc56d9e85ebe76c1..351679a7861a0f68babce19ce1133f5794aff9bc 100644
--- a/R/mod_modalTransfo.R
+++ b/R/mod_modalTransfo.R
@@ -90,7 +90,7 @@ mod_modalTransfo_server <- function(id, r) {
           if (module$fun_transfo == "prune_samples") {
             glue::glue("Filter based on sample names (to keep: {str_flatten_comma(module$var_transfo)})")
           } else if (module$fun_transfo == "tax_glom") {
-            glue::glue("Agglomerate taxa ok taxonomic rank *{module$var_transfo}*")
+            glue::glue("Agglomerate taxa on taxonomic rank *{module$var_transfo}*")
           } else if (module$fun_transfo == "tax_spread") {
             glue::glue("Spread taxonomy according to last known taxa, to remove unknown and multi-affiliations (pattern: {str_flatten_comma(module$var_transfo)})")
           } else if (module$fun_transfo == "rarefy_even_depth") {
diff --git a/README.Rmd b/README.Rmd
index 807653eb88fad7eb02b464a57d3d0982c0d2390d..c9d5ad3e8c692da334b5f3cf5f5a0b47542e406c 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -1,10 +1,16 @@
 ---
-output: github_document
+title: "Easy16S"
+output:
+  github_document:
+    includes:
+      in_header: header.md
+    toc: true
+    toc_depth: 5
 ---
 
 <!-- README.md is generated from README.Rmd. Please edit that file -->
 
-```{r, include = FALSE}
+```{r, include=FALSE}
 knitr::opts_chunk$set(
   collapse = TRUE,
   comment = "#>",
@@ -13,19 +19,16 @@ knitr::opts_chunk$set(
 )
 ```
 
-# easy16S
+The goal of easy16S is to provide a user-friendly interactive web-application with convenient functions and default settings to explore, visualize and analyze metabarcoding data.
 
-<!-- badges: start -->
-[![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental)
-<!-- badges: end -->
-
-The goal of easy16S is to ...
+It builds upon the [phyloseq](https://joey711.github.io/phyloseq/) package and its extensions and is tightly integrated with the [FROGS](https://frogs.toulouse.inrae.fr/index.html) sequence processing suite.
 
 ## Installation
 
 You can install the development version of easy16S like so:
 
 ``` r
+# install.packages("remotes")
 remotes::install_gitlab(
   repo = "cedric.midoux/easy16S@main",
   host = "forgemia.inra.fr"
@@ -34,11 +37,210 @@ remotes::install_gitlab(
 
 ## Run the Shiny Application
 
+To run the Shiny application, execute the following code in your R environment:
+
 ```{r run_app, eval=FALSE}
 easy16S::run_app()
-easy16S::run_app(phyloseq.extended::food)
+easy16S::run_app(physeq = phyloseq.extended::food) # directly load your data
 ```
 
+## Docker Integration
+
+For each release, a [CI/CD pipeline](.gitlab-ci.yml) builds a Docker image using a [Dockerfile](Dockerfile). This process is particularly useful for deploying with ShinyProxy.
+
+The Docker images are available in the [container registry](https://forgemia.inra.fr/cedric.midoux/easy16s/container_registry).
+
+The image exposes port `3838` and launches the app using `easy16S::run_app(options = list(launch.browser = FALSE))`
+
+Therefore, to run the application, execute the following commands:
+
+``` bash
+docker pull registry.forgemia.inra.fr/cedric.midoux/easy16s:v23.11
+docker run --rm --publish 3838:3838 registry.forgemia.inra.fr/cedric.midoux/easy16s:v23.11
+```
+
+The app will be accessible at <http://localhost:3838/>.
+
+## Usage
+
+### Load data
+
+Users can load a phyloseq object directly when launching the app using the following syntax: `easy16S::run_app(physeq = phyloseq.extended::food)`.
+
+Alternatively, there are three ways to load data when the app is launched:
+
+-   Use one of the demo datasets provided with the application.
+-   Upload flat files to build a phyloseq object:
+    -   a BIOM file ([Standard format](https://biom-format.org/) or [FROGS format](https://frogs.toulouse.inrae.fr/index.html)) [mandatory].
+    -   a metadata table with variables (in columns) and samples (in rows). Ensure that sample names (1st column) are spelled exactly as in the BIOM file. The delimiter and format of columns can be specified.
+    -   a phylogenetic tree in Newick format.
+    -   a FASTA file with representative sequences.
+-   Upload a phyloseq object as :
+    -   a RDS file.
+    -   a RData file containing a phyloseq object named `data`.
+
+Additionally, a RDS object can be provided directly from an URL: `https://shiny.migale.inrae.fr/app/easy16S/?rds=https://mywebsite.com/path/to/my/data.rds`
+
+### Preprocess data
+
+Before doing any analysis, it is customary to preprocess the data to refine and clean the raw data. The following operations are available and can be applied iteratively to achieve rich selections:
+
+-   Select samples based on their name.
+-   Filter samples based on the sample variables available in the metadata table.
+-   Aggregate taxa at a specified taxonomic rank (e.g. Genus, Family, etc)
+-   Spread taxonomy to remove unknown and multi-affiliations by spreading the last known rank to further ranks (e.g. "Bacillus;multi-affiliation" would become "Bacillus; unknown Bacillus species").
+-   Rarefaction (resample the abundance table to ensure that all samples have the same depth, set as the minimum one among samples).
+-   Transform the abundances in the abundance table, using one the following: `prop` (change abundances to proportions / relative abundances), `sqrt` (square root), `sqrt_prop` (square root of relative abundances), `clr` ([centered log-ratio](https://en.wikipedia.org/wiki/Compositional_data#Center_log_ratio_transform), after adding a pseudo-count of 1).
+
+Once the desired operations are selected, users can seamlessly switch between the raw and preprocessed data to assess the impact of the applied transformations.
+
+#### A few words about rarefaction
+
+For many analyses (notably all those based on presence / absence data and more generally [diversity analyses](https://doi.org/10.1101/2023.06.23.546312)), it is recommended to normalize the samples by rarefying to account for variations in sequencing effort and ensure that the detection probability is comparable across sampling. Rarefying involves subsampling each sample to the same depth, ensuring a more equitable comparison of microbial diversity across samples. It is however [not advised](https://doi.org/10.1371/journal.pcbi.1003531) for differential abundance analyses as it decreases statistical power.
+
+### Explore and Analyse Data
+
+#### Tables
+
+Users can visualize and explore key tables constituting the phyloseq object under study:
+
+-   **OTU/ASV Table:** Abundance of each OTU/ASV in all samples.
+-   **Taxonomy Table:** Taxonomic affiliation of each OTU/ASV at different taxonomic ranks (e.g. Phylum to Species).
+-   **Agglomerate OTU/ASV Table:** Same as OTU/ASV Table but after merging all ASV/OTU sharing the same taxonomic affiliation up to user-specified rank.
+-   **Sample Data Table:** Metadata associated with each sample, as provided by the user during the import process (metadata table).
+
+For a deeper understanding of how phyloseq objects function, refer to the [phyloseq documentation on data import](https://joey711.github.io/phyloseq/import-data.html#phyloseq-ize_data_already_in_r).
+
+#### Metadata
+
+This section provides access to the sample data table for use with the [esquisse addin](https://dreamrs.github.io/esquisse/index.html). It is useful to explore and assess associations between sample variables (but not metabarcoding data).
+
+> This addin allows you to interactively explore your data by visualizing it with the ggplot2 package. It allows you to draw bar plots, curves, scatter plots, histograms, boxplot and sf objects, then export the graph or retrieve the code to reproduce the graph.
+
+#### Barplot
+
+Used to create composition graphs (stacked barplots of relative abundances), based on the `phyloseq.extended::plot_composition()` function. This feature provides users with the option to:
+
+-   Specify the taxonomic rank used for aggregation and coloring.
+-   Filter and display results for a specific taxon.
+-   Group samples based on metadata.
+
+Composition barplots show the relative abundance of all or part of the sample diversity.
+
+See also [bar plots on phyloseq documentation](https://joey711.github.io/phyloseq/plot_bar-examples.html).
+
+#### Rarefaction
+
+Used to create rarefaction curves, based on the `phyloseq.extended::ggrare()` function. These settings provide users with the option to:
+
+-   Color, annotate and group samples based on metadata.
+-   Display a minimum sample threshold.
+
+Rarefaction curves are used to evaluate the relationship between richness and sampling effort (number of reads, or sequencing depth) in each sample. This curve shows the expected number of OTUs/ASVs observed in each sample based on the sequencing depth. Rarefaction curves generally grow rapidly at first, as the most common OTUs/ASVs are found, but the curves plateau as the diversity saturates as only the rarest ones remain to be observed.
+
+#### Heatmap
+
+To create an ecologically-organized heatmap, use the `phyloseq::plot_heatmap()` function. These settings provide users with the option to:
+
+-   Select only the *n* most abundant taxa for display.
+-   Agglomerate taxa at a user-specified taxonomic rank.
+-   Group, annotate and order samples based on metadata.
+-   Display the affiliation of each OTU/ASV at a user-specified taxonomic rank.
+
+Heatmaps can be used to investigate the structuring of sample communities, ordered using a "NMDS" ordination (samples ordered by increasing angle between the x-axis and their projection). It can also be used to observe core and condition-specific microbiota.
+
+See also [heatmap plots in the phyloseq documentation](https://joey711.github.io/phyloseq/plot_heatmap-examples).
+
+#### $\alpha$-Diversity
+
+$\alpha$-diversity measures richness within a sample. Detailed information on this concept and the different metrics available in easy16S can be found in the [alpha diversity section of the phyloseq documentation](https://joey711.github.io/phyloseq/plot_richness-examples.html).
+
+##### Table
+
+Compute the main alpha diversity estimators using the `phyloseq::estimate_richness()` function. If a sample data table is available, it is included in the table for further analyses (e.g. ANOVA, regression, etc)
+
+##### Plot
+
+Visualize the previously calculated metrics with the `phyloseq::plot_richness()` function. Users can customize the arrangement of samples along the x-axis (`X`), color and shape of samples based on metadata. Additionally, diversity data can be displayed as boxplots instead of points.
+
+##### ANOVA
+
+This section performs *ANOVA* on the diversity table enriched with the metadata to assess the impact of a covariate of interest on the alpha-diversity. For categorical variables, a post-hoc pairwise comparison table is also provided to identify levels of the variable with significantly different diversities.
+
+#### $\beta$-diversity
+
+$\beta$-diversity measures the dissimilarity between samples, capturing richness variations. The selection of a distance metric is crucial, and detailed information is available in the [phyloseq documentation](https://joey711.github.io/phyloseq/distance.html) or in the [gusta.me website](https://sites.google.com/site/mb3gustame/reference/dissimilarity-distance). These functions can be compositional or qualitative, phylogenetic or not, and the choice depends on the features of interest.
+
+> Different distances capture different features of the samples. There is no "one size fits all." However, choosing an appropriate measure is essential as it will strongly affect how your data is treated during analysis and what kind of interpretations are meaningful.
+
+##### Table
+
+Compute distances between each pair of samples using the `phyloseq::distance()` function and the chosen distance metric.
+
+##### Samples heatmap
+
+Plot matrix of pairwise distances using the `phyloseq.extended::plot_dist_as_heatmap()` function. Users can customize sample order based on metadata to highlight patterns (e.g. lower within-group than between-group distances).
+
+##### Samples clustering
+
+Use the distance matrix and a user-specified linkage method (e.g. Ward, complete, average, etc) to compute and plot a hierarchical clustering tree of the samples with the `phyloseq.extended::plot_clust()` function. Users can color leaves of the tree (i.e. samples) according to a categorical metadata to identify the variables along which the samples separates.
+
+##### MultiDimensional Scaling
+
+Use the distance matrix to ordinate the samples (i.e. project them while preserving at best their pairwise distances) in a low-dimensional space with the `phyloseq::ordination()` function, and visualize this ordination with the `phyloseq::plot_ordination()` function. In addition to selecting the ordination method ([MDS/PCoA](https://sites.google.com/site/mb3gustame/dissimilarity-based-methods/principal-coordinates-analysis), [NMDS](https://sites.google.com/site/mb3gustame/dissimilarity-based-methods/non-metric-multidimensional-scaling), etc), users can customize color, shape and labels of samples based on metadata. Additionally, ellipses can be added to group samples in the same category of a variability (e.g. healthy versus diseased individuals). By defaults, the ordination represents the principal plane (axes 1 and 2) of the projection but further axes can be used for plotting.
+
+These graphs serve as powerful tools for exploring and interpreting factors structuring the microbial community structures.
+
+For more examples and details, refer to [ordination plots on phyloseq documentation](https://joey711.github.io/phyloseq/plot_ordination-examples.html) or [GUSTA ME](https://sites.google.com/site/mb3gustame/dissimilarity-based-methods/principal-coordinates-analysis).
+
+##### Multivariate ANOVA
+
+Use *Permutational Multivariate ANOVA* to assess the impact of one or several covariates on community structure with `vegan::adonis2(by = 'terms', perm = 9999)`. The test compares the structure given by sample data with 9999 randomly generated structures. *Permutational Multivariate ANOVA* (also called [non parametric multivariate ANOVA](https://sites.google.com/site/mb3gustame/hypothesis-tests/manova/npmanova) or npmanova) accommodates complex designs, but it tests only location effects (e.g. are the typical communities similar in groups A and B?) and assumes equal dispersions (i.e. same biological variability in both groups).
+
+Users should specify up to 3 covariates and their potential interactions to be included in the model.
+
+#### PCA
+
+Perform PCA using `stats::prcomp()` on the abundance matrix. While *MultiDimensional Scaling* (MDS) is often recommended for microbiome analysis, *Principal Component Analysis* (PCA) after appropriate data transformation can be an alternative. The transformed abundances can be centered and/or scaled during the analysis. Users can customize color, shape and labels of samples based on metadata, add ellipses to group samples from the same category, and select the axes of the projection like in Multidimensional Scaling. Loadings (OTU/ASV) of the principal axes can also be incorporated to understand the individual contributions of taxa each axis.
+
+#### Differential abundance
+
+This section is dedicated to the identification of over- or under-abundant OTU/ASVs based on an experimental variable (categorical or numeric). The main tool for this analysis is the `DESeq2` package (with the `sfType = "poscounts"` used by default to ignore null values when computing scale factors), utilized through the `phyloseq::phyloseq_to_deseq2()` function (refer to [the accompanying vignette](https://joey711.github.io/phyloseq-extensions/DESeq2.html)).
+
+However, note that while `DESeq2` was developed for transcriptomics data using negative binomial models, amplicon metagenomics data are typically very sparse, and how well these models handle such sparsity, even with `sfType = "poscounts"` is not clear.
+
+To proceed with differential abundance analysis, users need to
+
+-   select an experimental design model
+-   select a contrast of two covariates (for categorical variables).
+
+An interactive volcano plot representing the differentially abundant OTUs is then showed (clicking on any OTU/ASV displays a barplot representing its relative abundance across the samples) alongside an interactive table with detailed information on the differential abundance statistics (p-value, effect size, etc) and the taxonomy of each OTU.
+
+This analysis allows the user to identify and visualize the taxa that exhibit significant differences in abundance between two conditions, providing valuable insights into the impact of experimental variables on individual microbes.
+
+### Export data, plot, and results
+
+Users can export their (potentially preprocessed) **data** with the "download" icons. The export options include:
+
+-   Exporting data in `.biom` format. Note that if a phylogenetic tree is present, it will not be included in the exported biom file. This format facilitates compatibility with other tools.
+-   Exporting the constructed phyloseq object in `.rds` format. This enables further analysis within R or for use in Easy16S.
+
+For results **tables**, users can easily export them using the `CSV`, `Copy` (to clipboard) or `Excel` buttons.
+
+To export a **plot**, click on the camera button located at the top right of each plot. Global export parameters, such as height, width, scale, and format, can be configured through the menu at the top right of the header. This functionality provides users with resize plots as needed before export.
+
+These export features enhance the usability and accessibility of both data and results, allowing users to seamlessly integrate Easy16S with their preferred analysis tools and workflows.
+
+## Issues and Support
+
+If you encounter any bugs or have suggestions for improvement, please use the [issue tracker](https://forgemia.inra.fr/cedric.midoux/easy16s/-/issues).
+
+## Cite us
+
+We appreciate acknowledgment from research teams using Easy16S. Please include the following sentence in the publications of your analyses: *"We are grateful to the INRAE MIGALE bioinformatics facility (MIGALE, INRAE, 2020. Migale bioinformatics Facility, doi: 10.15454/1.5572390655343293E12) for providing help and computing resources"*.
+
+A scientific publication related to Easy16S is currently in preparation.
+
 ## Code of Conduct
 
 Please note that the easy16S project is released with a [Contributor Code of Conduct](https://contributor-covenant.org/version/2/1/CODE_OF_CONDUCT.html). By contributing to this project, you agree to abide by its terms.
diff --git a/README.md b/README.md
index d0551de568e1d624e41818d2290f0e1016dea356..a313c557dd04d27fb897509743eadbe5e58e7b7d 100644
--- a/README.md
+++ b/README.md
@@ -1,37 +1,453 @@
-
-<!-- README.md is generated from README.Rmd. Please edit that file -->
-
-# easy16S
-
-<!-- badges: start -->
-
-[![Lifecycle:
-experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental)
-<!-- badges: end -->
-
-The goal of easy16S is to â€¦
-
-## Installation
-
-You can install the development version of easy16S like so:
-
-``` r
-remotes::install_gitlab(
-  repo = "cedric.midoux/easy16S@main",
-  host = "forgemia.inra.fr"
-)
-```
-
-## Run the Shiny Application
-
-``` r
-easy16S::run_app()
-easy16S::run_app(phyloseq.extended::food)
-```
-
-## Code of Conduct
-
-Please note that the easy16S project is released with a [Contributor
-Code of
-Conduct](https://contributor-covenant.org/version/2/1/CODE_OF_CONDUCT.html).
-By contributing to this project, you agree to abide by its terms.
+Easy16S
+================
+
+
+<!-- header.md is generated from header.md.Rmd. Please edit that file -->
+<!-- badges: start -->
+
+[![Lifecycle:
+experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental)
+[![SWH](https://archive.softwareheritage.org/badge/swh:1:dir:1828e5d3e367f8dbb972a1dc6684fc4aebe04825/)](https://archive.softwareheritage.org/swh:1:dir:1828e5d3e367f8dbb972a1dc6684fc4aebe04825)
+[![HAL](https://img.shields.io/badge/-hal--04304559-1E198E?style=flat&logo=HAL)](https://hal.science/hal-04304559)
+
+<!-- badges: end -->
+
+Easy16S is designed to facilitate the exploration, visualization and
+analysis of microbiome data.
+
+- [Installation](#installation)
+- [Run the Shiny Application](#run-the-shiny-application)
+- [Docker Integration](#docker-integration)
+- [Usage](#usage)
+  - [Load data](#load-data)
+  - [Preprocess data](#preprocess-data)
+    - [A few words about rarefaction](#a-few-words-about-rarefaction)
+  - [Explore and Analyse Data](#explore-and-analyse-data)
+    - [Tables](#tables)
+    - [Metadata](#metadata)
+    - [Barplot](#barplot)
+    - [Rarefaction](#rarefaction)
+    - [Heatmap](#heatmap)
+    - [$\alpha$-Diversity](#alpha-diversity)
+      - [Table](#table)
+      - [Plot](#plot)
+      - [ANOVA](#anova)
+    - [$\beta$-diversity](#beta-diversity)
+      - [Table](#table-1)
+      - [Samples heatmap](#samples-heatmap)
+      - [Samples clustering](#samples-clustering)
+      - [MultiDimensional Scaling](#multidimensional-scaling)
+      - [Multivariate ANOVA](#multivariate-anova)
+    - [PCA](#pca)
+    - [Differential abundance](#differential-abundance)
+  - [Export data, plot, and results](#export-data-plot-and-results)
+- [Issues and Support](#issues-and-support)
+- [Cite us](#cite-us)
+- [Code of Conduct](#code-of-conduct)
+
+<!-- README.md is generated from README.Rmd. Please edit that file -->
+
+The goal of easy16S is to provide a user-friendly interactive
+web-application with convenient functions and default settings to
+explore, visualize and analyze metabarcoding data.
+
+It builds upon the [phyloseq](https://joey711.github.io/phyloseq/)
+package and its extensions and is tightly integrated with the
+[FROGS](https://frogs.toulouse.inrae.fr/index.html) sequence processing
+suite.
+
+## Installation
+
+You can install the development version of easy16S like so:
+
+``` r
+# install.packages("remotes")
+remotes::install_gitlab(
+  repo = "cedric.midoux/easy16S@main",
+  host = "forgemia.inra.fr"
+)
+```
+
+## Run the Shiny Application
+
+To run the Shiny application, execute the following code in your R
+environment:
+
+``` r
+easy16S::run_app()
+easy16S::run_app(physeq = phyloseq.extended::food) # directly load your data
+```
+
+## Docker Integration
+
+For each release, a [CI/CD pipeline](.gitlab-ci.yml) builds a Docker
+image using a [Dockerfile](Dockerfile). This process is particularly
+useful for deploying with ShinyProxy.
+
+The Docker images are available in the [container
+registry](https://forgemia.inra.fr/cedric.midoux/easy16s/container_registry).
+
+The image exposes port `3838` and launches the app using
+`easy16S::run_app(options = list(launch.browser = FALSE))`
+
+Therefore, to run the application, execute the following commands:
+
+``` bash
+docker pull registry.forgemia.inra.fr/cedric.midoux/easy16s:v23.11
+docker run --rm --publish 3838:3838 registry.forgemia.inra.fr/cedric.midoux/easy16s:v23.11
+```
+
+The app will be accessible at <http://localhost:3838/>.
+
+## Usage
+
+### Load data
+
+Users can load a phyloseq object directly when launching the app using
+the following syntax:
+`easy16S::run_app(physeq = phyloseq.extended::food)`.
+
+Alternatively, there are three ways to load data when the app is
+launched:
+
+- Use one of the demo datasets provided with the application.
+- Upload flat files to build a phyloseq object:
+  - a BIOM file ([Standard format](https://biom-format.org/) or [FROGS
+    format](https://frogs.toulouse.inrae.fr/index.html)) \[mandatory\].
+  - a metadata table with variables (in columns) and samples (in rows).
+    Ensure that sample names (1st column) are spelled exactly as in the
+    BIOM file. The delimiter and format of columns can be specified.
+  - a phylogenetic tree in Newick format.
+  - a FASTA file with representative sequences.
+- Upload a phyloseq object as :
+  - a RDS file.
+  - a RData file containing a phyloseq object named `data`.
+
+Additionally, a RDS object can be provided directly from an URL:
+`https://shiny.migale.inrae.fr/app/easy16S/?rds=https://mywebsite.com/path/to/my/data.rds`
+
+### Preprocess data
+
+Before doing any analysis, it is customary to preprocess the data to
+refine and clean the raw data. The following operations are available
+and can be applied iteratively to achieve rich selections:
+
+- Select samples based on their name.
+- Filter samples based on the sample variables available in the metadata
+  table.
+- Aggregate taxa at a specified taxonomic rank (e.g.Â Genus, Family, etc)
+- Spread taxonomy to remove unknown and multi-affiliations by spreading
+  the last known rank to further ranks
+  (e.g.Â â€œBacillus;multi-affiliationâ€ would become â€œBacillus; unknown
+  Bacillus speciesâ€).
+- Rarefaction (resample the abundance table to ensure that all samples
+  have the same depth, set as the minimum one among samples).
+- Transform the abundances in the abundance table, using one the
+  following: `prop` (change abundances to proportions / relative
+  abundances), `sqrt` (square root), `sqrt_prop` (square root of
+  relative abundances), `clr` ([centered
+  log-ratio](https://en.wikipedia.org/wiki/Compositional_data#Center_log_ratio_transform),
+  after adding a pseudo-count of 1).
+
+Once the desired operations are selected, users can seamlessly switch
+between the raw and preprocessed data to assess the impact of the
+applied transformations.
+
+#### A few words about rarefaction
+
+For many analyses (notably all those based on presence / absence data
+and more generally [diversity
+analyses](https://doi.org/10.1101/2023.06.23.546312)), it is recommended
+to normalize the samples by rarefying to account for variations in
+sequencing effort and ensure that the detection probability is
+comparable across sampling. Rarefying involves subsampling each sample
+to the same depth, ensuring a more equitable comparison of microbial
+diversity across samples. It is however [not
+advised](https://doi.org/10.1371/journal.pcbi.1003531) for differential
+abundance analyses as it decreases statistical power.
+
+### Explore and Analyse Data
+
+#### Tables
+
+Users can visualize and explore key tables constituting the phyloseq
+object under study:
+
+- **OTU/ASV Table:** Abundance of each OTU/ASV in all samples.
+- **Taxonomy Table:** Taxonomic affiliation of each OTU/ASV at different
+  taxonomic ranks (e.g.Â Phylum to Species).
+- **Agglomerate OTU/ASV Table:** Same as OTU/ASV Table but after merging
+  all ASV/OTU sharing the same taxonomic affiliation up to
+  user-specified rank.
+- **Sample Data Table:** Metadata associated with each sample, as
+  provided by the user during the import process (metadata table).
+
+For a deeper understanding of how phyloseq objects function, refer to
+the [phyloseq documentation on data
+import](https://joey711.github.io/phyloseq/import-data.html#phyloseq-ize_data_already_in_r).
+
+#### Metadata
+
+This section provides access to the sample data table for use with the
+[esquisse addin](https://dreamrs.github.io/esquisse/index.html). It is
+useful to explore and assess associations between sample variables (but
+not metabarcoding data).
+
+> This addin allows you to interactively explore your data by
+> visualizing it with the ggplot2 package. It allows you to draw bar
+> plots, curves, scatter plots, histograms, boxplot and sf objects, then
+> export the graph or retrieve the code to reproduce the graph.
+
+#### Barplot
+
+Used to create composition graphs (stacked barplots of relative
+abundances), based on the `phyloseq.extended::plot_composition()`
+function. This feature provides users with the option to:
+
+- Specify the taxonomic rank used for aggregation and coloring.
+- Filter and display results for a specific taxon.
+- Group samples based on metadata.
+
+Composition barplots show the relative abundance of all or part of the
+sample diversity.
+
+See also [bar plots on phyloseq
+documentation](https://joey711.github.io/phyloseq/plot_bar-examples.html).
+
+#### Rarefaction
+
+Used to create rarefaction curves, based on the
+`phyloseq.extended::ggrare()` function. These settings provide users
+with the option to:
+
+- Color, annotate and group samples based on metadata.
+- Display a minimum sample threshold.
+
+Rarefaction curves are used to evaluate the relationship between
+richness and sampling effort (number of reads, or sequencing depth) in
+each sample. This curve shows the expected number of OTUs/ASVs observed
+in each sample based on the sequencing depth. Rarefaction curves
+generally grow rapidly at first, as the most common OTUs/ASVs are found,
+but the curves plateau as the diversity saturates as only the rarest
+ones remain to be observed.
+
+#### Heatmap
+
+To create an ecologically-organized heatmap, use the
+`phyloseq::plot_heatmap()` function. These settings provide users with
+the option to:
+
+- Select only the *n* most abundant taxa for display.
+- Agglomerate taxa at a user-specified taxonomic rank.
+- Group, annotate and order samples based on metadata.
+- Display the affiliation of each OTU/ASV at a user-specified taxonomic
+  rank.
+
+Heatmaps can be used to investigate the structuring of sample
+communities, ordered using a â€œNMDSâ€ ordination (samples ordered by
+increasing angle between the x-axis and their projection). It can also
+be used to observe core and condition-specific microbiota.
+
+See also [heatmap plots in the phyloseq
+documentation](https://joey711.github.io/phyloseq/plot_heatmap-examples).
+
+#### $\alpha$-Diversity
+
+$\alpha$-diversity measures richness within a sample. Detailed
+information on this concept and the different metrics available in
+easy16S can be found in the [alpha diversity section of the phyloseq
+documentation](https://joey711.github.io/phyloseq/plot_richness-examples.html).
+
+##### Table
+
+Compute the main alpha diversity estimators using the
+`phyloseq::estimate_richness()` function. If a sample data table is
+available, it is included in the table for further analyses (e.g.Â ANOVA,
+regression, etc)
+
+##### Plot
+
+Visualize the previously calculated metrics with the
+`phyloseq::plot_richness()` function. Users can customize the
+arrangement of samples along the x-axis (`X`), color and shape of
+samples based on metadata. Additionally, diversity data can be displayed
+as boxplots instead of points.
+
+##### ANOVA
+
+This section performs *ANOVA* on the diversity table enriched with the
+metadata to assess the impact of a covariate of interest on the
+alpha-diversity. For categorical variables, a post-hoc pairwise
+comparison table is also provided to identify levels of the variable
+with significantly different diversities.
+
+#### $\beta$-diversity
+
+$\beta$-diversity measures the dissimilarity between samples, capturing
+richness variations. The selection of a distance metric is crucial, and
+detailed information is available in the [phyloseq
+documentation](https://joey711.github.io/phyloseq/distance.html) or in
+the [gusta.me
+website](https://sites.google.com/site/mb3gustame/reference/dissimilarity-distance).
+These functions can be compositional or qualitative, phylogenetic or
+not, and the choice depends on the features of interest.
+
+> Different distances capture different features of the samples. There
+> is no â€œone size fits all.â€ However, choosing an appropriate measure is
+> essential as it will strongly affect how your data is treated during
+> analysis and what kind of interpretations are meaningful.
+
+##### Table
+
+Compute distances between each pair of samples using the
+`phyloseq::distance()` function and the chosen distance metric.
+
+##### Samples heatmap
+
+Plot matrix of pairwise distances using the
+`phyloseq.extended::plot_dist_as_heatmap()` function. Users can
+customize sample order based on metadata to highlight patterns
+(e.g.Â lower within-group than between-group distances).
+
+##### Samples clustering
+
+Use the distance matrix and a user-specified linkage method (e.g.Â Ward,
+complete, average, etc) to compute and plot a hierarchical clustering
+tree of the samples with the `phyloseq.extended::plot_clust()` function.
+Users can color leaves of the tree (i.e.Â samples) according to a
+categorical metadata to identify the variables along which the samples
+separates.
+
+##### MultiDimensional Scaling
+
+Use the distance matrix to ordinate the samples (i.e.Â project them while
+preserving at best their pairwise distances) in a low-dimensional space
+with the `phyloseq::ordination()` function, and visualize this
+ordination with the `phyloseq::plot_ordination()` function. In addition
+to selecting the ordination method
+([MDS/PCoA](https://sites.google.com/site/mb3gustame/dissimilarity-based-methods/principal-coordinates-analysis),
+[NMDS](https://sites.google.com/site/mb3gustame/dissimilarity-based-methods/non-metric-multidimensional-scaling),
+etc), users can customize color, shape and labels of samples based on
+metadata. Additionally, ellipses can be added to group samples in the
+same category of a variability (e.g.Â healthy versus diseased
+individuals). By defaults, the ordination represents the principal plane
+(axes 1 and 2) of the projection but further axes can be used for
+plotting.
+
+These graphs serve as powerful tools for exploring and interpreting
+factors structuring the microbial community structures.
+
+For more examples and details, refer to [ordination plots on phyloseq
+documentation](https://joey711.github.io/phyloseq/plot_ordination-examples.html)
+or [GUSTA
+ME](https://sites.google.com/site/mb3gustame/dissimilarity-based-methods/principal-coordinates-analysis).
+
+##### Multivariate ANOVA
+
+Use *Permutational Multivariate ANOVA* to assess the impact of one or
+several covariates on community structure with
+`vegan::adonis2(by = 'terms', perm = 9999)`. The test compares the
+structure given by sample data with 9999 randomly generated structures.
+*Permutational Multivariate ANOVA* (also called [non parametric
+multivariate
+ANOVA](https://sites.google.com/site/mb3gustame/hypothesis-tests/manova/npmanova)
+or npmanova) accommodates complex designs, but it tests only location
+effects (e.g.Â are the typical communities similar in groups A and B?)
+and assumes equal dispersions (i.e.Â same biological variability in both
+groups).
+
+Users should specify up to 3 covariates and their potential interactions
+to be included in the model.
+
+#### PCA
+
+Perform PCA using `stats::prcomp()` on the abundance matrix. While
+*MultiDimensional Scaling* (MDS) is often recommended for microbiome
+analysis, *Principal Component Analysis* (PCA) after appropriate data
+transformation can be an alternative. The transformed abundances can be
+centered and/or scaled during the analysis. Users can customize color,
+shape and labels of samples based on metadata, add ellipses to group
+samples from the same category, and select the axes of the projection
+like in Multidimensional Scaling. Loadings (OTU/ASV) of the principal
+axes can also be incorporated to understand the individual contributions
+of taxa each axis.
+
+#### Differential abundance
+
+This section is dedicated to the identification of over- or
+under-abundant OTU/ASVs based on an experimental variable (categorical
+or numeric). The main tool for this analysis is the `DESeq2` package
+(with the `sfType = "poscounts"` used by default to ignore null values
+when computing scale factors), utilized through the
+`phyloseq::phyloseq_to_deseq2()` function (refer to [the accompanying
+vignette](https://joey711.github.io/phyloseq-extensions/DESeq2.html)).
+
+However, note that while `DESeq2` was developed for transcriptomics data
+using negative binomial models, amplicon metagenomics data are typically
+very sparse, and how well these models handle such sparsity, even with
+`sfType = "poscounts"` is not clear.
+
+To proceed with differential abundance analysis, users need to
+
+- select an experimental design model
+- select a contrast of two covariates (for categorical variables).
+
+An interactive volcano plot representing the differentially abundant
+OTUs is then showed (clicking on any OTU/ASV displays a barplot
+representing its relative abundance across the samples) alongside an
+interactive table with detailed information on the differential
+abundance statistics (p-value, effect size, etc) and the taxonomy of
+each OTU.
+
+This analysis allows the user to identify and visualize the taxa that
+exhibit significant differences in abundance between two conditions,
+providing valuable insights into the impact of experimental variables on
+individual microbes.
+
+### Export data, plot, and results
+
+Users can export their (potentially preprocessed) **data** with the
+â€œdownloadâ€ icons. The export options include:
+
+- Exporting data in `.biom` format. Note that if a phylogenetic tree is
+  present, it will not be included in the exported biom file. This
+  format facilitates compatibility with other tools.
+- Exporting the constructed phyloseq object in `.rds` format. This
+  enables further analysis within R or for use in Easy16S.
+
+For results **tables**, users can easily export them using the `CSV`,
+`Copy` (to clipboard) or `Excel` buttons.
+
+To export a **plot**, click on the camera button located at the top
+right of each plot. Global export parameters, such as height, width,
+scale, and format, can be configured through the menu at the top right
+of the header. This functionality provides users with resize plots as
+needed before export.
+
+These export features enhance the usability and accessibility of both
+data and results, allowing users to seamlessly integrate Easy16S with
+their preferred analysis tools and workflows.
+
+## Issues and Support
+
+If you encounter any bugs or have suggestions for improvement, please
+use the [issue
+tracker](https://forgemia.inra.fr/cedric.midoux/easy16s/-/issues).
+
+## Cite us
+
+We appreciate acknowledgment from research teams using Easy16S. Please
+include the following sentence in the publications of your analyses:
+*â€œWe are grateful to the INRAE MIGALE bioinformatics facility (MIGALE,
+INRAE, 2020. Migale bioinformatics Facility, doi:
+10.15454/1.5572390655343293E12) for providing help and computing
+resourcesâ€*.
+
+A scientific publication related to Easy16S is currently in preparation.
+
+## Code of Conduct
+
+Please note that the easy16S project is released with a [Contributor
+Code of
+Conduct](https://contributor-covenant.org/version/2/1/CODE_OF_CONDUCT.html).
+By contributing to this project, you agree to abide by its terms.
diff --git a/header.Rmd b/header.Rmd
new file mode 100644
index 0000000000000000000000000000000000000000..6d7bfe21dc50519b8b9f035fbe5cd2c50f8dc90a
--- /dev/null
+++ b/header.Rmd
@@ -0,0 +1,22 @@
+---
+output: github_document
+---
+
+<!-- header.md is generated from header.md.Rmd. Please edit that file -->
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>",
+  fig.path = "man/figures/README-",
+  out.width = "100%"
+)
+```
+
+<!-- badges: start -->
+
+[![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental) [![SWH](https://archive.softwareheritage.org/badge/swh:1:dir:1828e5d3e367f8dbb972a1dc6684fc4aebe04825/)](https://archive.softwareheritage.org/swh:1:dir:1828e5d3e367f8dbb972a1dc6684fc4aebe04825) [![HAL](https://img.shields.io/badge/-hal--04304559-1E198E?style=flat&logo=HAL)](https://hal.science/hal-04304559)
+
+<!-- badges: end -->
+
+Easy16S is designed to facilitate the exploration, visualization and analysis of microbiome data.
diff --git a/header.md b/header.md
new file mode 100644
index 0000000000000000000000000000000000000000..6f3b7118b5ce3ed6702ba2fc456ecff06fdd3f7b
--- /dev/null
+++ b/header.md
@@ -0,0 +1,13 @@
+
+<!-- header.md is generated from header.md.Rmd. Please edit that file -->
+<!-- badges: start -->
+
+[![Lifecycle:
+experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental)
+[![SWH](https://archive.softwareheritage.org/badge/swh:1:dir:1828e5d3e367f8dbb972a1dc6684fc4aebe04825/)](https://archive.softwareheritage.org/swh:1:dir:1828e5d3e367f8dbb972a1dc6684fc4aebe04825)
+[![HAL](https://img.shields.io/badge/-hal--04304559-1E198E?style=flat&logo=HAL)](https://hal.science/hal-04304559)
+
+<!-- badges: end -->
+
+Easy16S is designed to facilitate the exploration, visualization and
+analysis of microbiome data.
diff --git a/inst/WORDLIST b/inst/WORDLIST
index 4b58ce2a01ae8235809b05c073ddc9db8763c98f..dc51d2c539290e5dd2a446b9e565439bfb51fb89 100644
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@@ -1,3 +1,44 @@
+ASV
+ASVs
+Analyse
+BIOM
+FASTA
+GUSTA
+INRAE
 Lifecycle
+Loadings
+MDS
+MIGALE
+Migale
+MultiDimensional
+NMDS
+Newick
+OTU
+OTUs
+PCoA
+Permutational
+Preprocess
+RData
+SWH
+ShinyProxy
+addin
+amplicon
+bioinformatics
+biom
+compositional
+dispersions
+doi
+esquisse
+ggplot
 golem
+gusta
+iteratively
+metabarcoding
+metagenomics
 microbiome
+microbiota
+npmanova
+phyloseq
+preprocess
+preprocessed
+transcriptomics