cleaned the main and the TP

605b76d8 · paul_pvc · 9ab7e696 · 605b76d8 · 605b76d8
Commit 605b76d8 authored 3 months ago by paul_pvc
--- a/Main.py
+++ b/Main.py
@@ -9,10 +9,21 @@ from xgboost import XGBClassifier
 import TP
-path1_t = "./Init/Mer"
+PATH_SEA = "./Init/Mer" # PATH TO THE IMAGES CONTAINING THE SEA
-path2_t = "./Init/Ailleurs"
+PATH_WITHOUT_SEA = "./Init/Ailleurs" # PATH TO THE IMAGE WITHOUT THE SEA
-"""S = TP.buildSampleFromPath(path1_t, path2_t)
+def compute_images_dictionnary(path_sea: str, path_without_sea: str, size=0) -> list[dict]:
+    """
+    Call the utility function responsible for fetching images in the given folder, with, and without the sea in it, plus
+    making the associated dictionary with the pre-treatment pipeline applied on it.
+    You can restrict the size of the list with the size parameter, if size != 0 then the list will have a length of 2 * size
+    :param path_sea: path (relative or absolute) of the folder containing images with the sea in it
+    :param path_without_sea: path (relative or absolute) of the folder containing images without the sea in it
+    :param size: size of the final list, it describes how much image we take from the first and the second folder.
+    """
+    return TP.buildSampleFromPath(path_sea, path_without_sea, size)
+"""S = TP.buildSampleFromPath(PATH_SEA, PATH_WITHOUT_SEA)
 classifier, S_test, y_test, S_train, y_train = TP.fitFromHisto(S, SVC())
 TP.predictFromHisto(S, classifier)"""
 """print()
@@ -20,11 +31,48 @@ print("Erreur empirique щ(ºДºщ):", TP.computeError(S_train), "erreurs")
 print("Erreur réelle ( ͡° _ʖ ͡°):", TP.computeError(S_test), "erreurs")
 print("Taux de réussite (╯°□°)╯︵ ┻━┻ : ", TP.computeScore(S_test), "%")"""
+def get_SVC_model_with_best_parameters() -> GridSearchCV:
+    """
+    this function instantiate an SVC classification model, which is optimised by a GridSearch, where we play with
+    the kernel and the C constant. The model is not trained in this function, so it's empty.
+    """
    svc_params = {"kernel": ('linear', 'rbf'), "C": [1,10]}
+    svc_model = GridSearchCV(SVC(), svc_params)
+    return svc_model
+def test_model_on_single_train(model, S):
+    """
+    This function test the given model (preferably empty, with no training), on one single random training, the sample
+    is split in 80% images for training, and 20% for testing, it also gives in console the empirical error, the real error,
+    and the score on this training/testing split.
+    """
+    classifier, S_test, y_test, S_train, y_train = TP.fit_algorithm(S, model)
+    print("Erreur empirique :", TP.computeError(S_train), "erreurs")
+    print("Erreur réelle :", TP.computeError(S_test), "erreurs")
+    print("Taux de réussite : ", TP.computeScore(S_test), "%")
+def cross_validation_on_model(model, S):
+    """
+    This function compute the cross-validation score of the given model (preferably empty, with no training), making
+    multiple split of the given sample all 80%/20% but randomised, giving a mean of the test made on different training,
+    allowing us to have a way more precise score function.
+    """
+    print("Taux de réussite en cross-validation: ",TP.get_cross_val_score(model, S), "%")
+## EXEMPLE D'EXECUTION DU PROJET
+#INITIALISATION
+S = compute_images_dictionnary(PATH_SEA, PATH_WITHOUT_SEA)
+svc_model = get_SVC_model_with_best_parameters()
+#TESTS
+cross_validation_on_model(svc_model, S)
-result = GridSearchCV(SVC(), svc_params)
+"""S = TP.buildSampleFromPath(PATH_SEA, PATH_WITHOUT_SEA)
-S = TP.buildSampleFromPath(path1_t, path2_t)
 #classifier, S_test, y_test, S_train, y_train = TP.fitFromHisto(S, result)
 #TP.predictFromHisto(S, classifier)
 #print("Taux de réussite (╯°□°)╯︵ ┻━┻ : ", TP.computeScore(S_test), "%")
@@ -33,4 +81,4 @@ print("Taux de réussite en cross validation SVC: ", TP.get_cross_val_score(resu
 #print("Taux de réussite en cross validation XGBOOST: ", TP.get_cross_val_score(XGBClassifier(), S_train, S_test, y_train, y_test), "%")
 #print("Taux de réussite en cross validation randomForest: ", TP.get_cross_val_score(GridSearchCV(RandomForestClassifier(), rand_forest_params), S_train, S_test, y_train, y_test), "%")
 #print("Taux de réussite en cross validation KNeighbors: ", TP.get_cross_val_score(GridSearchCV(KNeighborsClassifier(), knn_params), S_train, S_test, y_train, y_test), "%")
-#TP.computePredictionFile(classifier, TP.fetch_images_to_dict("./Init/Data CC2"))
+#TP.computePredictionFile(classifier, TP.fetch_images_to_dict("./Init/Data CC2"))"""
\ No newline at end of file
--- a/TP.py
+++ b/TP.py
@@ -89,37 +89,58 @@ def computeDict(image_path, path, y_true_value, max_size: tuple):
    image = image.convert("RGB")
    resized = resizeImage(image, *max_size)
-    #print(np.asarray(resized))
+    unsharp_resized = apply_unsharp_mask(resized)
-    a = np.uint8(unsharp_mask(np.asarray(resized), radius=10, amount=1)*255)
-    #print(a)
+    resized = Image.fromarray(unsharp_resized)
-    resized = Image.fromarray(a)
-    #a.save("ttt.png")# On ne stocke pas resized image, on calcule tout avant de l'oublier
-    #image_gl = resized.convert("L")
    rotated = [resized]#, resized.rotate(90), resized.rotate(180), resized.rotate(270)]
    #rotated_gl = [im.convert("L") for im in rotated]
    rotated_gl = [resized.convert('L')]
-    #rotated_gl += [rotated_gl[0].rotate(90), rotated_gl[0].rotate(180), rotated_gl[0].rotate(270)]
    histogram = computeHisto(resized)
    result = []
-    #create_dictionnary_image(full_path, histogram, rotated_gl[0], y_true_value, result)
+    #create_dictionary_image(full_path, histogram, rotated_gl[0], y_true_value, result)
+    threaded_dictionary_creation(full_path, histogram, result, rotated_gl, y_true_value)
+    return result
+def threaded_dictionary_creation(full_path, histogram, result, rotated_gl, y_true_value):
+    """
+    this function add to the list "result", the dictionary of the given images in "rotated_gl", this is a list
+    of images in gray level, delegating the computation of the different criteria used to train the model.
+    then attribute the real Y value (if there is the sea in the image), if it's known by the user.
+    :param full_path: the full_path of the image
+    :param histogram: the color histogram of the original resized image
+    :param result: the list in which we'll add the different dictionary for the rotated images
+    :param rotated_gl: the list of the SAME image, but rotated in different angles.
+    :param y_true_value: int that represent if there is the sea in the image, used to train and assert the model accuracy
+    """
    threads = []
    for image_gl in rotated_gl:
-        thread = Thread(target=create_dictionnary_image, args=(full_path, histogram, image_gl, y_true_value, result))
+        thread = Thread(target=create_dictionary_image, args=(full_path, histogram, image_gl, y_true_value, result))
        threads.append(thread)
        thread.start()
    for thread in threads:
        thread.join()
-    return result
+def apply_unsharp_mask(image):
-    #print(computePixelBW_histo(resized))
+    """
-    #return [create_dictionnary_image(full_path, histogram, image_gl, y_true_value) for image_gl in rotated_gl]
+    this function apply a special filter on the image, call the unsharp mask. This mask enhanced some textures
+    during our test it has been a huge improvement for detecting the sea texture, and make the difference with the sky.
+    :param image: the image on which we want to apply the unsharp mask
+    :return: the filtered image as a nparray
+    """
+    return np.uint8(unsharp_mask(np.asarray(image), radius=10, amount=1) * 255)
-def create_dictionnary_image(full_path, histogram, image_gl, y_true_value, liste):
+def create_dictionary_image(full_path, histogram, image_gl, y_true_value, liste):
+    """
+    This function return the dictionary containing the potential criteria used to train a model,
+    such as the color histogram of the image, it's path, different extracted data.
+    """
    liste.append({"name_path": full_path,
            # "resized_image": resized,
            "X_histo": histogram,
@@ -152,6 +173,11 @@ def computeHisto(image: PIL.Image.Image):
    return image.histogram()
 def get_gabor_filters(image):
+    """
+    Apply the Gabor filter on the given image, it gives us two lists, representing the real
+    and the imaginary coordinates of points, in which we compute the mean, the variance, the
+    standard deviation
+    """
    image_arr = np.asarray(image)
    #print(image_arr.shape, image_arr)
    #frequencies = [0.2]
@@ -179,10 +205,15 @@ def get_gabor_filters(image):
 def extract_relevant_data(l: dict) -> list:
+    """
+    Extract the data from an image dictionary, the purpose is then to fit the model
+    on these extracted information.
+    :param l: the dictionary associated with an image
+    :return: a list being the concatenation of different criteria relevant for the training of the model
+    """
    return l["X_histo"] + l["gabor_features"] + l["X_glcm_data"]
 #78% l["X_histo"] + l["X_glcm_data"]
+def fit_algorithm(S, algo):
-def fitFromHisto(S, algo):
    """
    Fit the given algorithm (classifier) With the sample S, We cut in train/test lists.
    We use the syntax of models in skLearn for this method.
@@ -263,7 +294,7 @@ def computePredictionFile(classifier, images_test=None):
    file.write("# Concaténation: histogramme de couleurs, niveaux de gris \n")
    S = buildSampleFromPath("./Init/Mer", "./Init/Ailleurs")
-    classifier, S_test, y_test, S_train, y_train = fitFromHisto(S, classifier)
+    classifier, S_test, y_test, S_train, y_train = fit_algorithm(S, classifier)
    predictFromHisto(S, classifier)
    predictFromHisto(images_test, classifier)
@@ -283,6 +314,13 @@ def computePredictionFile(classifier, images_test=None):
 def get_cross_val_score(classifier, S):
+    """
+    Interface for computing the cross-validation score of the given classifier (preferably empty, without training)
+    splitting, randomly, the sample list, and compute the mean of the calculated score on each sample with training/testing
+    the list S is split in 80% training, 20% testing.
+    :param classifier: the classifier (preferably empty, without training) to test
+    :param S: the sample list, containing the dictionary of the images.
+    """
    df = pd.DataFrame(S)
    y = np.array(df["y_true_class"])