diff --git a/sup_lrn/comp_stat_classif.ipynb b/sup_lrn/comp_stat_classif.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..d61554f232e8f65f46187857f883a258f8f18e73
--- /dev/null
+++ b/sup_lrn/comp_stat_classif.ipynb
@@ -0,0 +1,507 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "00cde4c7",
+   "metadata": {},
+   "source": [
+    "This notebook compares statistical testing and classification on synthetic data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8c9c8432",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# import librairies\n",
+    "\n",
+    "import numpy as np\n",
+    "import scipy.signal as spsg\n",
+    "import pandas as pd\n",
+    "\n",
+    "import scipy.stats as stt\n",
+    "import statsmodels.api as sm\n",
+    "import statsmodels.formula.api as smf\n",
+    "\n",
+    "from sklearn.model_selection import StratifiedShuffleSplit\n",
+    "from sklearn.linear_model import LogisticRegression\n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "from sklearn.pipeline import Pipeline, make_pipeline\n",
+    "\n",
+    "%matplotlib inline\n",
+    "import matplotlib as mpl\n",
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn as sb\n",
+    "\n",
+    "font = {'family' : 'DejaVu Sans',\n",
+    "        'weight' : 'regular',\n",
+    "        'size'   : 18}\n",
+    "mpl.rc('font', **font)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0d12a052",
+   "metadata": {},
+   "source": [
+    "## Generate synthetic data to classify\n",
+    "\n",
+    "We generate synthetic data with 2 classes to separate (`s0` and `s1` samples, respectively). The input dimensionality corresponds `m` features. See the notebooks on supervised learning for details."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4673d31d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create synthetic dataset where 2 classes of s0+s1 samples of m-dimensional inputs with controlled contrast\n",
+    "def gen_inputs(m,        # input dimensionality\n",
+    "               s0,       # number of samples for class 0\n",
+    "               s1,       # number of samples for class 1\n",
+    "               scaling,  # scaling factor to separate classes\n",
+    "               type):    # type of scaling (0=bimodal or 1=linear)\n",
+    "\n",
+    "    # labels\n",
+    "    lbl = np.zeros([s0+s1], dtype=int)\n",
+    "    # inputs\n",
+    "    X = np.zeros([s0+s1,m])\n",
+    "\n",
+    "    # create s0 and s1 samples for the 2 classes\n",
+    "    for i in range(s0+s1):\n",
+    "        # label\n",
+    "        lbl[i] = int(i<s0)\n",
+    "        # inputs are random noise plus a shift\n",
+    "        for j in range(m):\n",
+    "            # positive/negative shift for 1st/2nd class, only for indices j larger than m/2\n",
+    "            if type==0:\n",
+    "                if j>=m/2:\n",
+    "                    if i<s0:\n",
+    "                        a = -scaling\n",
+    "                    else:\n",
+    "                        a = scaling\n",
+    "                else:\n",
+    "                    a = 0.0\n",
+    "            else:\n",
+    "                if i<s0:\n",
+    "                    a = -j / m * scaling\n",
+    "                else:\n",
+    "                    a = j / m * scaling\n",
+    "            # the shift linearly depends on the feature index j\n",
+    "            X[i,j] = a + np.random.randn()\n",
+    "            \n",
+    "    return X, lbl"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2052e9ec-c51e-429a-8ec0-d67eb9ec24ed",
+   "metadata": {},
+   "source": [
+    "For features with indices smaller than $m/2$, there is no contrast between the two classes (in red and blue). For indices larger than $m/2$, the contrast is determined by the scaling parameters (separation between the two disributions). Each feature involve independent noise in the generation of the samples."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "970d0a6e-d894-45f5-a1a0-5752ee222688",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# input properties\n",
+    "m = 1 # input dimensionality (try 1 and 10)\n",
+    "s0 = 50 # number of samples for class 0\n",
+    "s1 = 50 # number of samples for class 1\n",
+    "scaling = 0.5 # class contrast\n",
+    "\n",
+    "# generate inputs\n",
+    "X, y = gen_inputs(m, s0, s1, scaling, 0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a2b44140-a4fb-4f9c-9fd1-ff3934a61c3c",
+   "metadata": {},
+   "source": [
+    "We see that  the left half of vertical columns in the matrix (sample, feature) have similar values for the red and blue classes (top and bottom half of rows, respectively). However, the right columns show differentiated values for the two classes: lower values for the red class than for the blue class. Importantly, note that these data involve noise: for each sample, not the same input feature (among high indices) allows for a clear prediction of the class of the sample."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e1d2470e-44a2-4fbd-834e-e898eb7f649b",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=[8,6])\n",
+    "plt.axes([0.2,0.2,0.55,0.7])\n",
+    "plt.imshow(X, aspect=0.2, interpolation='nearest')\n",
+    "plt.xticks(range(0,m,2))\n",
+    "plt.xlabel('feature')\n",
+    "plt.ylabel('sample')\n",
+    "plt.axes([0.6,0.2,0.3,0.7])\n",
+    "plt.imshow(y.reshape([s0+s1,1]), aspect=0.2, interpolation='nearest', cmap='bwr')\n",
+    "plt.xticks([0],[' '])\n",
+    "plt.xlabel('label')\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9089a6d5-ff78-4c1c-9bd0-add0f7997e43",
+   "metadata": {},
+   "source": [
+    "## Statistical testing\n",
+    "\n",
+    "The goal of statistical testing is to compare two distrbutions, each for each class. We can start with comparing features individually."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fcdbcf9f-73ba-4010-893f-7a3083450c99",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# bins for istograms\n",
+    "vbins = np.linspace(-3,3,30)\n",
+    "\n",
+    "# plot\n",
+    "plt.figure(figsize=[6,7])\n",
+    "for j, i in enumerate(np.unique([0, int((m-1)*0.33), int((m-1)*0.66), m-1])):\n",
+    "    plt.subplot(4,1,j+1)\n",
+    "    plt.hist(X[:s0,i], histtype='step', bins=vbins, color='r')\n",
+    "    plt.hist(X[s0:,i], histtype='step', bins=vbins, color='b')\n",
+    "    plt.axis(xmin=-3, xmax=3)\n",
+    "    plt.legend(['class 0', 'class 1'], fontsize=10)\n",
+    "    plt.title('input {}'.format(i), loc='left')\n",
+    "plt.xlabel('X values')\n",
+    "plt.tight_layout()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dea3710d-4929-456a-95e2-23a1445e3b03",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "p_val_indiv = np.zeros([m])\n",
+    "for i in range(m):\n",
+    "    # results of t tests\n",
+    "    print(stt.ttest_ind(X[:s0,i], X[s0:,i], equal_var=True))\n",
+    "    p_val_indiv[i] = stt.ttest_ind(X[:s0,i], X[s0:,i], equal_var=True)[1]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8b834ae4-32e2-48b8-aebb-e1d4a62c803b",
+   "metadata": {},
+   "source": [
+    "We can build a linear model that take all features together. To each feature corresponds a regression coefficient that evaluates its contribution in a multivariate context. Note that this is equivalent to predicting the class index from all features together."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3fb8fe3c-7ff4-4e33-b565-113bdd97cb9c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create a dataframe with predicted variable as class index and all features\n",
+    "df = pd.DataFrame()\n",
+    "df['y'] = y\n",
+    "for i in range(m):\n",
+    "    tag = 'x{}'.format(i)\n",
+    "    df[tag] = X[:,i]\n",
+    "\n",
+    "# define linear model\n",
+    "if m==1:\n",
+    "    lm = smf.ols('y ~ x0', df)\n",
+    "else:\n",
+    "    lm = smf.ols('y ~ x0 + x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9', df)\n",
+    "\n",
+    "# fit model to data\n",
+    "lmf = lm.fit()\n",
+    "\n",
+    "# summary\n",
+    "print(lmf.summary())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "14b15b16-d7ee-40ec-b39f-da6cd11dea68",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(sm.stats.anova_lm(lmf, typ=2))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3cdb2b17-ae91-4c68-9048-a704d26a0e41",
+   "metadata": {},
+   "source": [
+    " Importantly, the p-values evaluated in a multivariate context differ from those calculated individually (compare `m=1` and `m=10`)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7f88dbf8-05fa-4182-bff0-4409bdd19c2e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "p_val_multi = lmf.pvalues[1:]\n",
+    "\n",
+    "plt.figure()\n",
+    "plt.scatter(-np.log(p_val_indiv)/np.log(10), -np.log(p_val_multi)/np.log(10))\n",
+    "plt.xlabel('p-value (log 10) indiv')\n",
+    "plt.ylabel('p-value (log 10) multi')\n",
+    "plt.tight_layout()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "089b6b07-3866-4c89-ab04-c7fe8c32ad2b",
+   "metadata": {},
+   "source": [
+    "## Classification\n",
+    "\n",
+    "The goal of classification is to extract the relevant information to perform the classification. Here we use cross-validationwith 80%-20% for training-testing."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1187b5f5-1078-4241-83d3-1717edac5ee1",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# number of split repetitions\n",
+    "n_rep = 10\n",
+    "\n",
+    "# outer cross-validation scheme\n",
+    "cvs = StratifiedShuffleSplit(n_splits=n_rep, test_size=0.2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6f2a4e24",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# default value fhyperparameter for regularization\n",
+    "C = 10000.0\n",
+    "\n",
+    "# classifier in pipeline and wrapper for RFE\n",
+    "clf = Pipeline([('scl',StandardScaler()),\n",
+    "                ('mlr',LogisticRegression(C=C))])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "07cdf8f2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# results: accuracy and trained weights\n",
+    "d_acc = {'type': [], 'score': []}\n",
+    "d_weights = {'type': [], 'feat_idx': [], 'value': []}\n",
+    "\n",
+    "# repeat classification\n",
+    "for train_ind, test_ind in cvs.split(X, y):\n",
+    "    \n",
+    "    # train the classifier\n",
+    "    clf.fit(X[train_ind,:], y[train_ind])\n",
+    "    # accuracy on train set\n",
+    "    score = clf.score(X[train_ind,:], y[train_ind])\n",
+    "    d_acc['type'] += ['train']\n",
+    "    d_acc['score'] += [score]\n",
+    "    # accuracy on test set\n",
+    "    score = clf.score(X[test_ind,:], y[test_ind])\n",
+    "    d_acc['type'] += ['test']\n",
+    "    d_acc['score'] += [score]\n",
+    "    # trained weights\n",
+    "    for idx in range(m):\n",
+    "        d_weights['type'] += ['train']\n",
+    "        d_weights['feat_idx'] += [idx]\n",
+    "        d_weights['value'] += [clf[-1].coef_[0,idx]]\n",
+    "    \n",
+    "    # shuffling labels to evaluate the chance-level classification, retrain and test\n",
+    "    train_ind_rand = np.random.permutation(train_ind)\n",
+    "    clf.fit(X[train_ind,:], y[train_ind_rand])\n",
+    "    score = clf.score(X[test_ind,:], y[test_ind])\n",
+    "    d_acc['type'] += ['shuf']\n",
+    "    d_acc['score'] += [score]\n",
+    "    # trained weights\n",
+    "    for idx in range(m):\n",
+    "        d_weights['type'] += ['shuf']\n",
+    "        d_weights['feat_idx'] += [idx]\n",
+    "        d_weights['value'] += [clf[-1].coef_[0,idx]]\n",
+    "\n",
+    "# transform lists of dictionnaries in dataframes\n",
+    "acc = pd.DataFrame(d_acc, columns=['type', 'score'])\n",
+    "weights = pd.DataFrame(d_weights, columns=['type', 'feat_idx', 'value'])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2702e677",
+   "metadata": {},
+   "source": [
+    "When plotting the accuracy results, we see that the classifier performs very well on the training set as illustrated by the accuracy in brown. In comparison the accuracy on the testing set in orange is lower, but still way above the chance level in gray (for the scaling factor of $0.7$ used in generating the samples). The comparison between these three accuracy distributions (because we use several repetitions of the splitting scheme) provides a quantitative manner to understand whether the classifier training is successful. A more complete check-up would consider the evolution of these accuracies during the iterative training of the classifier.\n",
+    "\n",
+    "Here we have two classes with the same number of samples in each, so the chance-level is in theory 50%. The point of evaluating the chance level from the data is to quantify the variability of the corresponding accuracy."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "03237341",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "chance_level = 0.5\n",
+    "\n",
+    "plt.figure()\n",
+    "sb.violinplot(data=acc, y='score', x='type', hue='type', cut=1.0,\n",
+    "              palette=['brown','orange','gray'], density_norm='width', inner=None, legend=False)\n",
+    "sb.swarmplot(data=acc, y='score', x='type', hue='type',\n",
+    "              palette=['black','black','black'])\n",
+    "plt.plot([-1,3], [chance_level]*2, '--k')\n",
+    "plt.yticks([0,1])\n",
+    "plt.xlabel('')\n",
+    "plt.ylabel('accuracy')\n",
+    "plt.tight_layout()\n",
+    "plt.title('train-test accuracies')\n",
+    "\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4dead486-aa61-42fe-af61-12862fce1100",
+   "metadata": {},
+   "source": [
+    "The `weights` dataframe has to the trained weights for the classifier in the two conditions ('train' and 'shuf'), together with the index of the feature. We can plot them to see which features are used by the trained classifier, indicating informative features.\n",
+    "\n",
+    "We indeed see large weights (in absolute value) for high features indices and small weights for low feature indices for the trained classifier. In contrast, the classifier trained with shuffled labels has all weights close to zero, indicating an absence of specialization."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0427e876-3a2d-4df4-9c7f-3105e8ca339d",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "plt.figure()\n",
+    "sb.lineplot(data=weights, y='value', x='feat_idx', hue='type')\n",
+    "plt.xlabel('feature index')\n",
+    "plt.ylabel('weight value')\n",
+    "plt.tight_layout()\n",
+    "plt.title('trained weights', fontsize=16)\n",
+    "plt.legend(fontsize=14, loc='lower left')\n",
+    "\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "173c52a4-52af-4a20-afa2-0e6f958d8995",
+   "metadata": {},
+   "source": [
+    "## Compare p-values and trained weights"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3ab07ed6-8fab-4ed5-816b-2d5c8c3b9dcd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "abs_weight = np.abs(weights[weights['type']=='train'].groupby('feat_idx')['value'].apply(func=np.mean))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c03b4ffa-375d-48b3-b9bb-d27f2fb72bc2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure()\n",
+    "plt.scatter(-np.log(p_val_indiv)/np.log(10), abs_weight)\n",
+    "plt.xlabel('p-value (log 10)')\n",
+    "plt.ylabel('absolute weight')\n",
+    "plt.title('individual')\n",
+    "plt.tight_layout()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a7dc35c7-3689-4c2c-bafc-d70a0baa5f77",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure()\n",
+    "plt.scatter(-np.log(p_val_multi)/np.log(10), abs_weight)\n",
+    "plt.xlabel('p-value (log 10)')\n",
+    "plt.ylabel('absolute weight')\n",
+    "plt.title('multivariate')\n",
+    "plt.tight_layout()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c0f55fdc-d11a-4188-9e3a-3d793431ed54",
+   "metadata": {},
+   "source": [
+    "## Exercices:\n",
+    "- change from $m=1$ to $10$ inputs\n",
+    "- modify the values of the scaling factor to increase/decrease the contrast between the two classes\n",
+    "- modify the value of the regularization parameter to see the effect of the trained weights\n",
+    "- change the input structure: `gen_inputs(m, s0, s1, scaling, 1)` instead of `gen_inputs(m, s0, s1, scaling, 0)`"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}