diff --git a/sup_lrn/nb_suplearn4_feature_selection.ipynb b/sup_lrn/nb_suplearn4_feature_selection.ipynb
index bae4b3cdc7dc1de74fbde00ff3dcdeb6d4455a5e..f106e2a54fe4a8b68e17310c95a5faf9029abf70 100644
--- a/sup_lrn/nb_suplearn4_feature_selection.ipynb
+++ b/sup_lrn/nb_suplearn4_feature_selection.ipynb
@@ -87,6 +87,16 @@
     "    return X, lbl"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "9d0e394b-eb3e-4140-99e7-9d6123960711",
+   "metadata": {},
+   "source": [
+    "## Toy example\n",
+    "\n",
+    "How to interpret the trained weights?"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -95,10 +105,126 @@
    "outputs": [],
    "source": [
     "# generate inputs and labels\n",
-    "m = 50 # input dimensionality\n",
+    "m = 10 # input dimensionality\n",
     "s0 = 100 # number of samples for class 0\n",
     "s1 = 100 # number of samples for class 1\n",
-    "X, y = gen_inputs(m, s0, s1, scaling=0.5) # try 0.2"
+    "X, y = gen_inputs(m, s0, s1, scaling=0.7)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0a45c553-630d-4d95-94d5-6735e9fe0ee2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create matrix rgb\n",
+    "vbins = np.linspace(-3,3,30) # bins for istograms\n",
+    "mat_rgb = np.zeros([m, vbins.size-1, 3])\n",
+    "for i in range(m):\n",
+    "    mat_rgb[i,:,0] = np.histogram(X[:s0,i], bins=vbins)[0]\n",
+    "    mat_rgb[i,:,2] = np.histogram(X[s0:,i], bins=vbins)[0]\n",
+    "mat_rgb /= mat_rgb.max() / 2.0\n",
+    "\n",
+    "plt.figure(figsize=[6,7])\n",
+    "plt.imshow(mat_rgb)\n",
+    "plt.xlabel('values')\n",
+    "plt.ylabel('input index')\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d712a2fb-86b6-46df-a869-bc05475e79d2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# train classifier\n",
+    "clf = LogisticRegression()\n",
+    "clf.fit(X,y)\n",
+    "\n",
+    "# plot weights\n",
+    "plt.figure()\n",
+    "plt.plot(range(m), clf.coef_.flatten())\n",
+    "plt.xlabel('input index')\n",
+    "plt.ylabel('weight')\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bc00e090-8aad-4bf5-9e7a-29a92e7e50fe",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# copy X and rescale an input\n",
+    "X1 = np.copy(X)\n",
+    "i = 8\n",
+    "X1[:,i] *= 10"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2777f72d-b304-4e12-ae53-5e0df531d85c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# plot\n",
+    "plt.figure(figsize=[6,7])\n",
+    "plt.subplot(211)\n",
+    "plt.hist(X[:s0,i], histtype='step', bins=40, color='r')\n",
+    "plt.hist(X[s0:,i], histtype='step', bins=40, color='b')\n",
+    "plt.legend(['class 0', 'class 1'], fontsize=10)\n",
+    "plt.title('input {} before'.format(i), loc='left')\n",
+    "plt.subplot(212)\n",
+    "plt.hist(X1[:s0,i], histtype='step', bins=40, color='r')\n",
+    "plt.hist(X1[s0:,i], histtype='step', bins=40, color='b')\n",
+    "plt.legend(['class 0', 'class 1'], fontsize=10)\n",
+    "plt.title('input {} after'.format(i), loc='left')\n",
+    "plt.tight_layout()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "56bb23cb-598a-4164-97f9-af9c1300eaf8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "clf1 = LogisticRegression()\n",
+    "clf1.fit(X1,y)\n",
+    "\n",
+    "plt.figure()\n",
+    "plt.plot(range(m), clf.coef_.flatten(), label='orig')\n",
+    "plt.plot(range(m), clf1.coef_.flatten()/np.std(X,axis=0), label='rescaled')\n",
+    "plt.xlabel('input index')\n",
+    "plt.ylabel('weight')\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "341418f1-2c2b-4e40-8711-1278e2af2948",
+   "metadata": {},
+   "source": [
+    "## Using the RFE object"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8d55d9cb-1f2a-4da6-b443-9713f75f6cf5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# generate inputs and labels\n",
+    "m = 10 # input dimensionality\n",
+    "s0 = 100 # number of samples for class 0\n",
+    "s1 = 100 # number of samples for class 1\n",
+    "X, y = gen_inputs(m, s0, s1, scaling=0.7)"
    ]
   },
   {
@@ -113,6 +239,123 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "1f5113c4-b6c5-462f-84f0-5ba38872d420",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# create matrix rgb\n",
+    "vbins = np.linspace(-3,3,30) # bins for istograms\n",
+    "mat_rgb = np.zeros([m, vbins.size-1, 3])\n",
+    "for i in range(m):\n",
+    "    mat_rgb[i,:,0] = np.histogram(X[:s0,i], bins=vbins)[0]\n",
+    "    mat_rgb[i,:,2] = np.histogram(X[s0:,i], bins=vbins)[0]\n",
+    "mat_rgb /= mat_rgb.max() / 2.0\n",
+    "\n",
+    "plt.figure(figsize=[6,7])\n",
+    "plt.imshow(mat_rgb)\n",
+    "plt.xlabel('values')\n",
+    "plt.ylabel('input index')\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d355c4af-54a7-4668-8181-f69f9a9b466e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# classifier in pipeline\n",
+    "clf = Pipeline([('scl',StandardScaler()),\n",
+    "                ('mlr',LogisticRegression(C=1.0))])\n",
+    "\n",
+    "# number of repetitions \n",
+    "n_rep = 1 # change for 10\n",
+    "\n",
+    "# outer cross-validation scheme\n",
+    "cvs = StratifiedShuffleSplit(n_splits=n_rep, test_size=0.2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4fe3c14c-06ae-4cea-91ff-50b56dbe2f07",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# check names of parameters for pipeline (for grid search)\n",
+    "print(clf.get_params())\n",
+    "\n",
+    "# quick fix to get coefficients from mlr estimator in pipeline\n",
+    "def get_coef(clf_pipeline):\n",
+    "    return clf_pipeline['mlr'].coef_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "876b966b-5222-452e-b643-58d7ab638f87",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# repeat classification\n",
+    "for train_ind, test_ind in cvs.split(X, y):\n",
+    "    \n",
+    "    # wrap classifier to be fitted to data to calculate the ranking\n",
+    "    feature_select = RFE(clf, n_features_to_select=1, step=1,\n",
+    "                         importance_getter=get_coef)\n",
+    "    \n",
+    "    # perform RFE\n",
+    "    feature_select.fit(X[train_ind,:], y[train_ind])\n",
+    "    print(feature_select.ranking_)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e044c2ed-1cf9-4337-ab91-41a6c239b7b0",
+   "metadata": {},
+   "source": [
+    "Exercises:\n",
+    "- test the variability of the ranking across train-test splits\n",
+    "- change the granularity of the output ranking with `n_features_to_select` and `step`\n",
+    "- modify the scaling for the input generation to see how the ranking changes."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c26064a9-17a9-44d6-9c66-b85676c94fc8",
+   "metadata": {},
+   "source": [
+    "## Back to general case and combining all options for the classification"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cc559465-2c51-4e62-9b40-d683b91f1eac",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# generate inputs and labels\n",
+    "m = 50 # input dimensionality\n",
+    "s0 = 100 # number of samples for class 0\n",
+    "s1 = 100 # number of samples for class 1\n",
+    "X, y = gen_inputs(m, s0, s1, scaling=0.5)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6febb8bf-a89c-4c1d-8383-feffd43f98c2",
+   "metadata": {},
+   "source": [
+    "Here the inputs are modified compared to the previous notebooks, with a gradient of contrast that increases with the feature index $j$."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "edb43b31-62f0-4d0f-98d8-ac3e3c062405",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -136,8 +379,6 @@
    "id": "0024c6aa",
    "metadata": {},
    "source": [
-    "## Parameterization of classifier\n",
-    "\n",
     "We then build a pipeline for the classifier. The outer cross-validation corresponds to the train-test splitting as before.\n",
     "\n",
     "The inner crosss-validation corresponds to the optimization of the hyperparameter `C` of the classifier (logistic regression in the pipeline)."
@@ -153,7 +394,7 @@
     "# hyperparameter for regularization\n",
     "Cs = [0.01,0.1,1.0,10.0,100.0]\n",
     "\n",
-    "# classifier in pipeline and wrapper for RFE\n",
+    "# classifier in pipeline\n",
     "clf = Pipeline([('scl',StandardScaler()),\n",
     "                ('mlr',LogisticRegression())])\n",
     "\n",
@@ -174,9 +415,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# check names of parameters for pipeline (for grid search)\n",
-    "print(clf.get_params())\n",
-    "\n",
     "# quick fix to get coefficients from mlr estimator in pipeline\n",
     "def get_coef(clf_pipeline):\n",
     "    return clf_pipeline['mlr'].coef_"
@@ -187,8 +425,6 @@
    "id": "e3f096d7",
    "metadata": {},
    "source": [
-    "## Optimization involving the tuning of hyperparameter\n",
-    "\n",
     "We use `GridSearchCV` to optimize the hyperparameter, the use the best classifier pipeline on the test set and perform recursive feature elimination (RFE) to identify informative features that contribute to the correct classification. The latter gives a ranking where low ranks correspond to informative features."
    ]
   },
@@ -230,7 +466,7 @@
     "         'score': [score],\n",
     "         'ranking': [ranking]}\n",
     "    acc = pd.concat((acc, pd.DataFrame(data=d)), ignore_index=True)\n",
-    "    \n",
+    "\n",
     "    # shuffling\n",
     "    train_ind_rand = np.random.permutation(train_ind)\n",
     "\n",
@@ -265,8 +501,8 @@
     "chance_level = 0.5\n",
     "\n",
     "plt.figure()\n",
-    "sb.violinplot(data=acc, y='score', x='type', \n",
-    "              palette=['brown','orange'], scale='width')\n",
+    "sb.violinplot(data=acc, y='score', x='type', hue='type',\n",
+    "              palette=['brown','orange'], density_norm='width')\n",
     "plt.plot([-1,2], [chance_level]*2, '--k')\n",
     "plt.yticks([0,1])\n",
     "plt.ylabel('accuracy')\n",
@@ -277,8 +513,8 @@
     "acc2 = acc[acc['type']=='test']\n",
     "\n",
     "plt.figure()\n",
-    "sb.violinplot(data=acc2, y='log C', x='type', \n",
-    "              palette=['orange'], scale='width')\n",
+    "sb.violinplot(data=acc2, y='log C', x='type', hue='type', \n",
+    "              palette=['orange'], density_norm='width')\n",
     "plt.tight_layout()\n",
     "plt.title('best C (log$_{10}$ scale)')\n",
     "\n",
@@ -302,8 +538,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "sel_test = acc['type']=='test'\n",
+    "sel_shuf = acc['type']=='shuf'\n",
+    "\n",
     "plt.figure()\n",
-    "plt.plot(acc['ranking'].mean())\n",
+    "plt.plot(acc[sel_test]['ranking'].mean(), label='test')\n",
+    "plt.plot(acc[sel_shuf]['ranking'].mean(), label='shuf')\n",
     "plt.xlabel('input index')\n",
     "plt.ylabel('mean ranking across CV splits')\n",
     "plt.tight_layout()\n",
@@ -314,10 +554,20 @@
   },
   {
    "cell_type": "markdown",
-   "id": "bcf0b3dc",
+   "id": "9468ad24-29f5-4688-92cb-4821854051ef",
+   "metadata": {},
+   "source": [
+    "The ranking estimated from classification can be compared to the \"ground truth\", which is in theory the order of input indices. Note that the randomness in the generation of the inputs adds some noise that affects the precision of the estimated ranking."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6ee2f8f7-3f55-41bd-b991-fefed23fe20f",
    "metadata": {},
+   "outputs": [],
    "source": [
-    "Modify the scaling for the input generation to see how the ranking changes."
+    "print(np.argsort(acc[sel_test]['ranking'].mean()))"
    ]
   }
  ],
@@ -337,7 +587,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.13"
+   "version": "3.11.7"
   }
  },
  "nbformat": 4,