refactored the code to show the pipeline

Need to remove the main and TP.py and other tests file if needed

refactored the code to show the pipeline
cc2aa019 · paul_pvc · 5e4f1c7e · cc2aa019 · cc2aa019 · cc2aa019
Commit cc2aa019 authored 3 months ago by paul_pvc
--- a/Main.py
+++ b/Main.py
@@ -5,7 +5,7 @@ from sklearn.ensemble import RandomForestClassifier
 from sklearn.model_selection import GridSearchCV
 from sklearn.neural_network import MLPClassifier
 from xgboost import XGBClassifier
+import time
 import TP
@@ -69,6 +69,7 @@ svc_model = get_SVC_model_with_best_parameters()
 #TESTS
 cross_validation_on_model(svc_model, S)
+#test_model_on_single_train(svc_model, S)

--- a/Main/MainProjet.py
+++ b/Main/MainProjet.py
+from Pipeline.ImageFolderManager import buildSampleFromPath
+from Pipeline.Model import get_SVC_model_with_best_parameters
+from Pipeline.Evaluation import cross_validation_on_model
+PATH_SEA = "../Init/Mer"  # PATH TO THE IMAGES CONTAINING THE SEA
+PATH_WITHOUT_SEA = "../Init/Ailleurs"  # PATH TO THE IMAGE WITHOUT THE SEA
+S = buildSampleFromPath(PATH_SEA, PATH_WITHOUT_SEA)
+svc_model = get_SVC_model_with_best_parameters()
+cross_validation_on_model(svc_model, S)
\ No newline at end of file
--- a/Pipeline/AlgoTraining.py
+++ b/Pipeline/AlgoTraining.py
+import numpy as np
+import pandas as pd
+from sklearn.model_selection import train_test_split
+from ImageDictionaryManager import extract_relevant_data
+def fit_algorithm(S, algo):
+    """
+    Fit the given algorithm (classifier) With the sample S, We cut in train/test lists.
+    We use the syntax of models in skLearn for this method.
+    :param S: the sample on which we train
+    :param algo: the algo to fit the data on
+    :return: the fitted algorithm given in parameters and test values
+    """
+    df = pd.DataFrame(S)
+    y = np.array(df["y_true_class"])
+    S_train, S_test, y_train, y_test = train_test_split(S, y, test_size=0.2)
+    X_train = np.array([np.array(extract_relevant_data(l)) for l in S_train])
+    algo.fit(X_train, y_train)
+    return algo, S_test, y_test, S_train, y_train
+def predictFromHisto(S, model, list_dict=True):
+    """
+    Use the given model to predict the values on the images. Update the sample S to display the
+    predicted values.
+    :param S: the sample to test
+    :param model: the model fitted
+    :param list_dict: is the sample in list(dict)
+    :return: None
+    """
+    tab = model.predict(np.array([extract_relevant_data(x) for x in S]))
+    if list_dict:
+        for i in range(len(S)):
+            S[i]["y_predicted_class"] = tab[i]
+    else:
+        return tab
\ No newline at end of file
--- a/Pipeline/Evaluation.py
+++ b/Pipeline/Evaluation.py
+import numpy as np
+import pandas as pd
+from sklearn.model_selection import cross_val_score
+from sklearn.metrics import accuracy_score
+from ImageDictionaryManager import extract_relevant_data
+from AlgoTraining import fit_algorithm
+def computeError(S):
+    """
+    Compute the empirical error of the model on the given sample.
+    :param S: the sample to test.
+    :return: the empirical error of the model on the given sample.
+    """
+    error_count = 0
+    for image in S:
+        if image["y_true_class"] != image["y_predicted_class"]:
+            error_count += 1
+    return round(error_count/len(S), 2)
+def computeScore(S):
+    """
+    Compute the score of the model on the given sample.
+    :param S: the sample to test.
+    :return: the score in percentages of the model on the given sample.
+    """
+    y_true_classes = []
+    y_predicted_classes = []
+    for image in S:
+        y_true_classes.append(image["y_true_class"])
+        y_predicted_classes.append(image["y_predicted_class"])
+    return round(accuracy_score(y_true_classes,y_predicted_classes),2) * 100
+def get_cross_val_score(classifier, S):
+    """
+    Interface for computing the cross-validation score of the given classifier (preferably empty, without training)
+    splitting, randomly, the sample list, and compute the mean of the calculated score on each sample with training/testing
+    the list S is split in 80% training, 20% testing.
+    :param classifier: the classifier (preferably empty, without training) to test
+    :param S: the sample list, containing the dictionary of the images.
+    """
+    df = pd.DataFrame(S)
+    y = np.array(df["y_true_class"])
+    X = np.array([np.array(extract_relevant_data(l)) for l in S])
+    scores = cross_val_score(classifier, X, y, n_jobs=-1)
+    return np.mean(scores)*100
+def test_model_on_single_train(model, S):
+    """
+    This function test the given model (preferably empty, with no training), on one single random training, the sample
+    is split in 80% images for training, and 20% for testing, it also gives in console the empirical error, the real error,
+    and the score on this training/testing split.
+    """
+    classifier, S_test, y_test, S_train, y_train = fit_algorithm(S, model)
+    print("Erreur empirique :", computeError(S_train), "erreurs")
+    print("Erreur réelle :", computeError(S_test), "erreurs")
+    print("Taux de réussite : ", computeScore(S_test), "%")
+def cross_validation_on_model(model, S):
+    """
+    This function compute the cross-validation score of the given model (preferably empty, with no training), making
+    multiple split of the given sample all 80%/20% but randomised, giving a mean of the test made on different training,
+    allowing us to have a way more precise score function.
+    """
+    print("Taux de réussite en cross-validation: ", get_cross_val_score(model, S), "%")
\ No newline at end of file
--- a/Pipeline/GLCM.py
+++ b/Pipeline/GLCM.py
+import numpy as np
+from skimage.feature import graycomatrix, graycoprops
+def compute_glcm_caracteristics(image_gl):
+    """
+    Compute the GLCM matrix of the image, then use sklearn to compute 5 caracteristics of the image in gray level:
+    - the dissimilirity
+    - the correlation
+    - the contrast
+    - the energy
+    - the homogeneity
+    :param image_gl: the gray level image
+    :return: the list of the 5 caracteristics ordered as the docstring order.
+    """
+    image_arr= np.array(image_gl)
+    #print(image_arr.shape)
+    glcm = graycomatrix(image_arr, distances=[10], angles=[3], levels=256,
+                        symmetric=True, normed=True)
+    return [graycoprops(glcm, 'dissimilarity')[0, 0], graycoprops(glcm, 'correlation')[0, 0], graycoprops(glcm, 'contrast')[0, 0],
+            graycoprops(glcm, 'energy')[0, 0], graycoprops(glcm, 'homogeneity')[0, 0]]
\ No newline at end of file
--- a/Pipeline/Gabor.py
+++ b/Pipeline/Gabor.py
+import numpy as np
+from skimage.filters import gabor
+def get_gabor_filters(image):
+    """
+    Apply the Gabor filter on the given image, it gives us two lists, representing the real
+    and the imaginary coordinates of points, in which we compute the mean, the variance, the
+    standard deviation
+    """
+    image_arr = np.asarray(image)
+    #print(image_arr.shape, image_arr)
+    #frequencies = [0.2]
+    #wthetas = [0, np.pi / 2]
+    features = []
+    #for theta in thetas:
+    filt_real, filt_imag = gabor(image_arr, frequency=0.2, theta=0)
+    features.append(filt_imag.mean())
+    features.append(filt_imag.var())
+    mean = filt_real.mean()
+    features.append(mean)
+    #features.append(filt_real.mean())
+    features.append(filt_real.var())
+    #features.append(np.sum(filt_real**2))
+    features.append(np.max(filt_real) - np.min(filt_real))
+    #return np.array(features).tolist()
+    return features
--- a/Pipeline/Histogram.py
+++ b/Pipeline/Histogram.py
+import PIL
+from PIL import Image  
+def computeHisto(image: PIL.Image.Image):
+    """
+    Return the color histogram of the image, using Pillow function
+    :param image: image used
+    :return: the color histogram in a list
+    """
+    return image.histogram()
+def computePixelBW_histo(image_gl):
+    """
+    compute the histogram of the image in gray level
+    :param image_gl: image in gray level
+    :return: the histogram
+    """
+    return image_gl.histogram()
\ No newline at end of file
--- a/Pipeline/ImageDictionaryManager.py
+++ b/Pipeline/ImageDictionaryManager.py
+import os
+from PIL import Image
+from ImageResizing import resizeImage
+from Unsharp_Mask import apply_unsharp_mask
+from Histogram import computeHisto
+from GLCM import compute_glcm_caracteristics
+from Gabor import get_gabor_filters
+from threading import Thread
+def computeDict(image_path, path, y_true_value, max_size: tuple):
+    """
+    Middle function to construct each dict for each image. Resizing, and fetching the histogram,
+    by calling other functions
+    :param image_path: relative path of the image in the folder
+    :param path: path of the folder containing the image
+    :param y_true_value: is the image a good one (1) or a wrong one (-1)
+    :param max_size: the size to resize the image
+    :return: a dict representing the image
+    """
+    full_path = os.path.join(path, image_path)
+    image = Image.open(full_path)
+    image = image.convert("RGB")
+    resized = resizeImage(image, *max_size)
+    unsharp_resized = apply_unsharp_mask(resized)
+    resized = Image.fromarray(unsharp_resized)
+    rotated = [resized]#, resized.rotate(90), resized.rotate(180), resized.rotate(270)]
+    #rotated_gl = [im.convert("L") for im in rotated]
+    rotated_gl = [resized.convert('L')]
+    histogram = computeHisto(resized)
+    result = []
+    #create_dictionary_image(full_path, histogram, rotated_gl[0], y_true_value, result)
+    threaded_dictionary_creation(full_path, histogram, result, rotated_gl, y_true_value)
+    return result
+def create_dictionary_image(full_path, histogram, image_gl, y_true_value, liste):
+    """
+    This function return the dictionary containing the potential criteria used to train a model,
+    such as the color histogram of the image, it's path, different extracted data.
+    """
+    liste.append({"name_path": full_path,
+            "X_histo": histogram,
+            "X_glcm_data": compute_glcm_caracteristics(image_gl),
+            "gabor_features": get_gabor_filters(image_gl),
+            "y_true_class": y_true_value,
+            "y_predicted_class": None})
+def extract_relevant_data(l: dict) -> list:
+    """
+    Extract the data from an image dictionary, the purpose is then to fit the model
+    on these extracted information.
+    :param l: the dictionary associated with an image
+    :return: a list being the concatenation of different criteria relevant for the training of the model
+    """
+    return l["X_histo"] + l["gabor_features"] + l["X_glcm_data"]
+def threaded_dictionary_creation(full_path, histogram, result, rotated_gl, y_true_value):
+    """
+    this function add to the list "result", the dictionary of the given images in "rotated_gl", this is a list
+    of images in gray level, delegating the computation of the different criteria used to train the model.
+    then attribute the real Y value (if there is the sea in the image), if it's known by the user.
+    :param full_path: the full_path of the image
+    :param histogram: the color histogram of the original resized image
+    :param result: the list in which we'll add the different dictionary for the rotated images
+    :param rotated_gl: the list of the SAME image, but rotated in different angles.
+    :param y_true_value: int that represent if there is the sea in the image, used to train and assert the model accuracy
+    """
+    threads = []
+    for image_gl in rotated_gl:
+        thread = Thread(target=create_dictionary_image, args=(full_path, histogram, image_gl, y_true_value, result))
+        threads.append(thread)
+        thread.start()
+    for thread in threads:
+        thread.join()
\ No newline at end of file
--- a/Pipeline/ImageFolderManager.py
+++ b/Pipeline/ImageFolderManager.py
+import os
+from ImageDictionaryManager import computeDict
+MAX_SIZE = (224, 224)
+def buildSampleFromPath(path1, path2, size=0):
+    """
+    Build the sample list, a list of dictionnaires, representing the images
+    used to train and test the model
+    :param path1: path for the goods images, (score 1)
+    :param path2: path for the bad images, (score -1)
+    :param size: Optional if you want to restrict the image pool
+    :return: list"""
+    S = []
+ # getMaxSize(path1, path2)
+    S += fetch_images_to_dict(path1, y_true_class=1)
+    S += fetch_images_to_dict(path2, y_true_class=0)
+    return S
+def fetch_images_to_dict(path, size=0, y_true_class=-1):
+    """
+    Produce the list of dictionnaires, specifically for the tests images.
+    :param y_true_class: value defined by the user for the classificator, keep at -1 if it won't be used or not known.
+    :param path: path to the images folder
+    :param size: define a specific size for the images list
+    :return: list of images (dict representing them)
+    """
+    images = []
+    path_list = os.listdir(path)[:size if size > 0 else -1]
+    for image_path in path_list:
+        images += computeDict(image_path, path, y_true_class, MAX_SIZE)
+    return images
--- a/Pipeline/ImageResizing.py
+++ b/Pipeline/ImageResizing.py
+from PIL import Image
+def resizeImage(i, h, l):
+    """
+    Resizing the image following the LANCZOS algorithm, with the given width and height
+    :param i: the image to resize
+    :param h: the new height
+    :param l: the new length
+    :return: the resized image (PIL.Image.Image)
+    """
+    return i.resize((h, l), Image.LANCZOS)
\ No newline at end of file
--- a/Pipeline/Model.py
+++ b/Pipeline/Model.py
+from sklearn.model_selection import GridSearchCV
+from sklearn.svm import SVC
+def get_SVC_model_with_best_parameters() -> GridSearchCV:
+    """
+    this function instantiate an SVC classification model, which is optimised by a GridSearch, where we play with
+    the kernel and the C constant. The model is not trained in this function, so it's empty.
+    """
+    svc_params = {"kernel": ('linear', 'rbf'), "C": [1,10]}
+    svc_model = GridSearchCV(SVC(), svc_params)
+    return svc_model
--- a/Pipeline/Unsharp_Mask.py
+++ b/Pipeline/Unsharp_Mask.py
+import numpy as np
+from skimage.filters import unsharp_mask
+def apply_unsharp_mask(image):
+    """
+    this function apply a special filter on the image, call the unsharp mask. This mask enhanced some textures
+    during our test it has been a huge improvement for detecting the sea texture, and make the difference with the sky.
+    :param image: the image on which we want to apply the unsharp mask
+    :return: the filtered image as a nparray
+    """
+    return np.uint8(unsharp_mask(np.asarray(image), radius=10, amount=1) * 255)
--- a/TP.py
+++ b/TP.py
@@ -223,9 +223,6 @@ def fit_algorithm(S, algo):
    S_train, S_test, y_train, y_test = train_test_split(S, y, test_size=0.2)
    X_train = np.array([np.array(extract_relevant_data(l)) for l in S_train])
-    #X_train = df[["X_histo", "X_pixelbw"]]
-    #print(X_train)
-    #print(len(X_train[0]))
    algo.fit(X_train, y_train)
@@ -293,7 +290,6 @@ def computePredictionFile(classifier, images_test=None):
    predictFromHisto(S, classifier)
    predictFromHisto(images_test, classifier)
-    #predictFromHisto(S, classifier)
    images = S if images_test is None else images_test
    for image in images:
        image_name = os.path.split(image["name_path"])[1]

--- a/test_for_me.py
+++ b/test_for_me.py
+import numpy as np
+from PIL import Image
+import os
+import TP
+path = './Init/Mer'
+images_path = os.listdir(path)
+full_path = os.path.join(path, images_path[0])
+image = Image.open(full_path)
+image = TP.resizeImage(image.convert("RGB"), 224, 224)
+image.show()
+image.save("test_no_rotate.png")
+image_r = image.rotate(90)
+image_r.save("test_rotate.png")