added crossvalidation + generalization of criteria

2bcaecc8 · paul_pvc · 10d9a6c2 · 2bcaecc8 · 2bcaecc8
Commit 2bcaecc8 authored 4 months ago by paul_pvc
--- a/Main.py
+++ b/Main.py
 from sklearn.naive_bayes import GaussianNB
+from sklearn.svm import SVC
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.neural_network import MLPClassifier
 from xgboost import XGBClassifier

+
 import TP

 path1_t = "./Init/Mer"
@@ -13,3 +17,5 @@ print()
 print("Erreur empirique щ(ºДºщ):", TP.computeError(S_train), "erreurs")
 print("Erreur réelle ( ͡° _ʖ ͡°):", TP.computeError(S_test), "erreurs")
 print("Taux de réussite (╯°□°)╯︵ ┻━┻ : ", TP.computeScore(S_test), "%")
+print(TP.get_cross_val_score(classifier, S_train, S_test, y_train, y_test))
+#TP.computePredictionFile(classifier, TP.fetch_images_to_dict("./Init/Data CC2"))
\ No newline at end of file
--- a/TP.py
+++ b/TP.py
@@ -6,6 +6,8 @@ import numpy as np
 from sklearn.metrics import accuracy_score
 from sklearn.model_selection import train_test_split
 from skimage.feature import graycomatrix, graycoprops
+from sklearn.model_selection import cross_val_score
+import math
 from sklearn.naive_bayes import GaussianNB

 MAX_SIZE = (224, 224)
@@ -64,11 +66,39 @@ def compute_glcm_caracteristics(image_gl):
    """
    image_arr= np.array(image_gl)
    #print(image_arr.shape)
-    glcm = graycomatrix(image_arr, distances=[5], angles=[0], levels=256,
+    glcm = graycomatrix(image_arr, distances=[1], angles=[0], levels=256,
                        symmetric=True, normed=True)
    return [graycoprops(glcm, 'dissimilarity')[0, 0], graycoprops(glcm, 'correlation')[0, 0], graycoprops(glcm, 'contrast')[0, 0],
            graycoprops(glcm, 'energy')[0, 0], graycoprops(glcm, 'homogeneity')[0, 0]]

+"""
+def compute_4_histos(resized):
+    image = resized.copy()
+    histos = []
+    histos += computeHisto(image.crop((0,0, 112, 112)))
+    histos += computeHisto(image.crop((112,0, 224, 112)))
+    histos += computeHisto(image.crop((0,112, 112, 224)))
+    histos += computeHisto(image.crop((112, 112, 224, 224)))
+    return histos
+
+
+def compute_4_glcm(resized):
+    image = resized.copy()
+    glcms = []
+    glcms = compute_glcm_caracteristics(image.crop((0, 0, 112, 112)))
+    glcms += compute_glcm_caracteristics(image.crop((0, 112, 112, 224)))
+    glcms += compute_glcm_caracteristics(image.crop((112, 0, 224, 112)))
+
+    glcms += compute_glcm_caracteristics(image.crop((112, 112, 224, 224)))
+    return glcms
+
+
+def summer(glcms, image, croped):
+    a = compute_glcm_caracteristics(image.crop(croped))
+    for i in range(len(a)):
+        glcms[i] += a[i]
+
+"""

 def computeDict(image_path, path, y_true_value, max_size: tuple):
    """
@@ -94,6 +124,8 @@ def computeDict(image_path, path, y_true_value, max_size: tuple):
            "X_pixelbw": computePixelBW_histo(resized),
            #"X_glcm_data": extract_data_glcm(compute_glcm(resized)),
            "X_glcm_data": compute_glcm_caracteristics(image_gl),
+            #"X_splitted_histo": compute_4_histos(resized),
+            #"X_splitted_glcm": compute_4_glcm(image_gl),
            "y_true_class": y_true_value,
            "y_predicted_class": None}

@@ -118,6 +150,10 @@ def computeHisto(image: PIL.Image.Image):
    return image.histogram()


+def extract_relevant_data(l: dict) -> list:
+    return l["X_histo"] + l["X_glcm_data"]
+#78% l["X_histo"] + l["X_glcm_data"]
+
 def fitFromHisto(S, algo):
    """
    Fit the given algorithm (classifier) With the sample S, We cut in train/test lists.
@@ -130,9 +166,9 @@ def fitFromHisto(S, algo):

    y = np.array(df["y_true_class"])

-    S_train, S_test, y_train, y_test = train_test_split(S, y, test_size=0.2, random_state=42)
+    S_train, S_test, y_train, y_test = train_test_split(S, y, test_size=0.2)

-    X_train = np.array([np.array(l["X_histo"]+l["X_glcm_data"]) for l in S_train])
+    X_train = np.array([np.array(extract_relevant_data(l)) for l in S_train])
    #X_train = df[["X_histo", "X_pixelbw"]]
    #print(X_train)
    #print(len(X_train[0]))
@@ -151,7 +187,7 @@ def predictFromHisto(S, model, list_dict=True):
    :param list_dict: is the sample in list(dict)
    :return: None
    """
-    tab = model.predict(np.array([x["X_histo"]+x["X_glcm_data"] for x in S]))
+    tab = model.predict(np.array([extract_relevant_data(x) for x in S]))
    if list_dict:
        for i in range(len(S)):
            S[i]["y_predicted_class"] = tab[i]
@@ -216,3 +252,12 @@ def computePredictionFile(classifier, images_test=None):
    file.write("# EE = "+ str(computeError(S_train))+"\n")
    file.write("# ER = "+ str(computeError(S_test))+"\n")
    file.close()
+
+
+def get_cross_val_score(classifier, S_train, S_test, y_train, y_test):
+    X_train = np.array([np.array(extract_relevant_data(l)) for l in S_train])
+    X_test = np.array([np.array(extract_relevant_data(l)) for l in S_test])
+    X = np.concatenate([X_train,X_test])
+    y = np.concatenate([y_train,y_test])
+    scores = cross_val_score(classifier, X, y, cv=20)
+    return np.mean(scores)
\ No newline at end of file