Skip to content
Snippets Groups Projects
Commit cc2aa019 authored by paul_pvc's avatar paul_pvc
Browse files

refactored the code to show the pipeline

Need to remove the main and TP.py and other tests file if needed
parent 5e4f1c7e
No related branches found
No related tags found
No related merge requests found
...@@ -5,7 +5,7 @@ from sklearn.ensemble import RandomForestClassifier ...@@ -5,7 +5,7 @@ from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier from xgboost import XGBClassifier
import time
import TP import TP
...@@ -69,6 +69,7 @@ svc_model = get_SVC_model_with_best_parameters() ...@@ -69,6 +69,7 @@ svc_model = get_SVC_model_with_best_parameters()
#TESTS #TESTS
cross_validation_on_model(svc_model, S) cross_validation_on_model(svc_model, S)
#test_model_on_single_train(svc_model, S)
......
from Pipeline.ImageFolderManager import buildSampleFromPath
from Pipeline.Model import get_SVC_model_with_best_parameters
from Pipeline.Evaluation import cross_validation_on_model
PATH_SEA = "../Init/Mer" # PATH TO THE IMAGES CONTAINING THE SEA
PATH_WITHOUT_SEA = "../Init/Ailleurs" # PATH TO THE IMAGE WITHOUT THE SEA
S = buildSampleFromPath(PATH_SEA, PATH_WITHOUT_SEA)
svc_model = get_SVC_model_with_best_parameters()
cross_validation_on_model(svc_model, S)
\ No newline at end of file
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from ImageDictionaryManager import extract_relevant_data
def fit_algorithm(S, algo):
"""
Fit the given algorithm (classifier) With the sample S, We cut in train/test lists.
We use the syntax of models in skLearn for this method.
:param S: the sample on which we train
:param algo: the algo to fit the data on
:return: the fitted algorithm given in parameters and test values
"""
df = pd.DataFrame(S)
y = np.array(df["y_true_class"])
S_train, S_test, y_train, y_test = train_test_split(S, y, test_size=0.2)
X_train = np.array([np.array(extract_relevant_data(l)) for l in S_train])
algo.fit(X_train, y_train)
return algo, S_test, y_test, S_train, y_train
def predictFromHisto(S, model, list_dict=True):
"""
Use the given model to predict the values on the images. Update the sample S to display the
predicted values.
:param S: the sample to test
:param model: the model fitted
:param list_dict: is the sample in list(dict)
:return: None
"""
tab = model.predict(np.array([extract_relevant_data(x) for x in S]))
if list_dict:
for i in range(len(S)):
S[i]["y_predicted_class"] = tab[i]
else:
return tab
\ No newline at end of file
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score
from ImageDictionaryManager import extract_relevant_data
from AlgoTraining import fit_algorithm
def computeError(S):
"""
Compute the empirical error of the model on the given sample.
:param S: the sample to test.
:return: the empirical error of the model on the given sample.
"""
error_count = 0
for image in S:
if image["y_true_class"] != image["y_predicted_class"]:
error_count += 1
return round(error_count/len(S), 2)
def computeScore(S):
"""
Compute the score of the model on the given sample.
:param S: the sample to test.
:return: the score in percentages of the model on the given sample.
"""
y_true_classes = []
y_predicted_classes = []
for image in S:
y_true_classes.append(image["y_true_class"])
y_predicted_classes.append(image["y_predicted_class"])
return round(accuracy_score(y_true_classes,y_predicted_classes),2) * 100
def get_cross_val_score(classifier, S):
"""
Interface for computing the cross-validation score of the given classifier (preferably empty, without training)
splitting, randomly, the sample list, and compute the mean of the calculated score on each sample with training/testing
the list S is split in 80% training, 20% testing.
:param classifier: the classifier (preferably empty, without training) to test
:param S: the sample list, containing the dictionary of the images.
"""
df = pd.DataFrame(S)
y = np.array(df["y_true_class"])
X = np.array([np.array(extract_relevant_data(l)) for l in S])
scores = cross_val_score(classifier, X, y, n_jobs=-1)
return np.mean(scores)*100
def test_model_on_single_train(model, S):
"""
This function test the given model (preferably empty, with no training), on one single random training, the sample
is split in 80% images for training, and 20% for testing, it also gives in console the empirical error, the real error,
and the score on this training/testing split.
"""
classifier, S_test, y_test, S_train, y_train = fit_algorithm(S, model)
print("Erreur empirique :", computeError(S_train), "erreurs")
print("Erreur réelle :", computeError(S_test), "erreurs")
print("Taux de réussite : ", computeScore(S_test), "%")
def cross_validation_on_model(model, S):
"""
This function compute the cross-validation score of the given model (preferably empty, with no training), making
multiple split of the given sample all 80%/20% but randomised, giving a mean of the test made on different training,
allowing us to have a way more precise score function.
"""
print("Taux de réussite en cross-validation: ", get_cross_val_score(model, S), "%")
\ No newline at end of file
import numpy as np
from skimage.feature import graycomatrix, graycoprops
def compute_glcm_caracteristics(image_gl):
"""
Compute the GLCM matrix of the image, then use sklearn to compute 5 caracteristics of the image in gray level:
- the dissimilirity
- the correlation
- the contrast
- the energy
- the homogeneity
:param image_gl: the gray level image
:return: the list of the 5 caracteristics ordered as the docstring order.
"""
image_arr= np.array(image_gl)
#print(image_arr.shape)
glcm = graycomatrix(image_arr, distances=[10], angles=[3], levels=256,
symmetric=True, normed=True)
return [graycoprops(glcm, 'dissimilarity')[0, 0], graycoprops(glcm, 'correlation')[0, 0], graycoprops(glcm, 'contrast')[0, 0],
graycoprops(glcm, 'energy')[0, 0], graycoprops(glcm, 'homogeneity')[0, 0]]
\ No newline at end of file
import numpy as np
from skimage.filters import gabor
def get_gabor_filters(image):
"""
Apply the Gabor filter on the given image, it gives us two lists, representing the real
and the imaginary coordinates of points, in which we compute the mean, the variance, the
standard deviation
"""
image_arr = np.asarray(image)
#print(image_arr.shape, image_arr)
#frequencies = [0.2]
#wthetas = [0, np.pi / 2]
features = []
#for theta in thetas:
filt_real, filt_imag = gabor(image_arr, frequency=0.2, theta=0)
features.append(filt_imag.mean())
features.append(filt_imag.var())
mean = filt_real.mean()
features.append(mean)
#features.append(filt_real.mean())
features.append(filt_real.var())
#features.append(np.sum(filt_real**2))
features.append(np.max(filt_real) - np.min(filt_real))
#return np.array(features).tolist()
return features
import PIL
from PIL import Image
def computeHisto(image: PIL.Image.Image):
"""
Return the color histogram of the image, using Pillow function
:param image: image used
:return: the color histogram in a list
"""
return image.histogram()
def computePixelBW_histo(image_gl):
"""
compute the histogram of the image in gray level
:param image_gl: image in gray level
:return: the histogram
"""
return image_gl.histogram()
\ No newline at end of file
import os
from PIL import Image
from ImageResizing import resizeImage
from Unsharp_Mask import apply_unsharp_mask
from Histogram import computeHisto
from GLCM import compute_glcm_caracteristics
from Gabor import get_gabor_filters
from threading import Thread
def computeDict(image_path, path, y_true_value, max_size: tuple):
"""
Middle function to construct each dict for each image. Resizing, and fetching the histogram,
by calling other functions
:param image_path: relative path of the image in the folder
:param path: path of the folder containing the image
:param y_true_value: is the image a good one (1) or a wrong one (-1)
:param max_size: the size to resize the image
:return: a dict representing the image
"""
full_path = os.path.join(path, image_path)
image = Image.open(full_path)
image = image.convert("RGB")
resized = resizeImage(image, *max_size)
unsharp_resized = apply_unsharp_mask(resized)
resized = Image.fromarray(unsharp_resized)
rotated = [resized]#, resized.rotate(90), resized.rotate(180), resized.rotate(270)]
#rotated_gl = [im.convert("L") for im in rotated]
rotated_gl = [resized.convert('L')]
histogram = computeHisto(resized)
result = []
#create_dictionary_image(full_path, histogram, rotated_gl[0], y_true_value, result)
threaded_dictionary_creation(full_path, histogram, result, rotated_gl, y_true_value)
return result
def create_dictionary_image(full_path, histogram, image_gl, y_true_value, liste):
"""
This function return the dictionary containing the potential criteria used to train a model,
such as the color histogram of the image, it's path, different extracted data.
"""
liste.append({"name_path": full_path,
"X_histo": histogram,
"X_glcm_data": compute_glcm_caracteristics(image_gl),
"gabor_features": get_gabor_filters(image_gl),
"y_true_class": y_true_value,
"y_predicted_class": None})
def extract_relevant_data(l: dict) -> list:
"""
Extract the data from an image dictionary, the purpose is then to fit the model
on these extracted information.
:param l: the dictionary associated with an image
:return: a list being the concatenation of different criteria relevant for the training of the model
"""
return l["X_histo"] + l["gabor_features"] + l["X_glcm_data"]
def threaded_dictionary_creation(full_path, histogram, result, rotated_gl, y_true_value):
"""
this function add to the list "result", the dictionary of the given images in "rotated_gl", this is a list
of images in gray level, delegating the computation of the different criteria used to train the model.
then attribute the real Y value (if there is the sea in the image), if it's known by the user.
:param full_path: the full_path of the image
:param histogram: the color histogram of the original resized image
:param result: the list in which we'll add the different dictionary for the rotated images
:param rotated_gl: the list of the SAME image, but rotated in different angles.
:param y_true_value: int that represent if there is the sea in the image, used to train and assert the model accuracy
"""
threads = []
for image_gl in rotated_gl:
thread = Thread(target=create_dictionary_image, args=(full_path, histogram, image_gl, y_true_value, result))
threads.append(thread)
thread.start()
for thread in threads:
thread.join()
\ No newline at end of file
import os
from ImageDictionaryManager import computeDict
MAX_SIZE = (224, 224)
def buildSampleFromPath(path1, path2, size=0):
"""
Build the sample list, a list of dictionnaires, representing the images
used to train and test the model
:param path1: path for the goods images, (score 1)
:param path2: path for the bad images, (score -1)
:param size: Optional if you want to restrict the image pool
:return: list"""
S = []
# getMaxSize(path1, path2)
S += fetch_images_to_dict(path1, y_true_class=1)
S += fetch_images_to_dict(path2, y_true_class=0)
return S
def fetch_images_to_dict(path, size=0, y_true_class=-1):
"""
Produce the list of dictionnaires, specifically for the tests images.
:param y_true_class: value defined by the user for the classificator, keep at -1 if it won't be used or not known.
:param path: path to the images folder
:param size: define a specific size for the images list
:return: list of images (dict representing them)
"""
images = []
path_list = os.listdir(path)[:size if size > 0 else -1]
for image_path in path_list:
images += computeDict(image_path, path, y_true_class, MAX_SIZE)
return images
from PIL import Image
def resizeImage(i, h, l):
"""
Resizing the image following the LANCZOS algorithm, with the given width and height
:param i: the image to resize
:param h: the new height
:param l: the new length
:return: the resized image (PIL.Image.Image)
"""
return i.resize((h, l), Image.LANCZOS)
\ No newline at end of file
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
def get_SVC_model_with_best_parameters() -> GridSearchCV:
"""
this function instantiate an SVC classification model, which is optimised by a GridSearch, where we play with
the kernel and the C constant. The model is not trained in this function, so it's empty.
"""
svc_params = {"kernel": ('linear', 'rbf'), "C": [1,10]}
svc_model = GridSearchCV(SVC(), svc_params)
return svc_model
import numpy as np
from skimage.filters import unsharp_mask
def apply_unsharp_mask(image):
"""
this function apply a special filter on the image, call the unsharp mask. This mask enhanced some textures
during our test it has been a huge improvement for detecting the sea texture, and make the difference with the sky.
:param image: the image on which we want to apply the unsharp mask
:return: the filtered image as a nparray
"""
return np.uint8(unsharp_mask(np.asarray(image), radius=10, amount=1) * 255)
...@@ -223,9 +223,6 @@ def fit_algorithm(S, algo): ...@@ -223,9 +223,6 @@ def fit_algorithm(S, algo):
S_train, S_test, y_train, y_test = train_test_split(S, y, test_size=0.2) S_train, S_test, y_train, y_test = train_test_split(S, y, test_size=0.2)
X_train = np.array([np.array(extract_relevant_data(l)) for l in S_train]) X_train = np.array([np.array(extract_relevant_data(l)) for l in S_train])
#X_train = df[["X_histo", "X_pixelbw"]]
#print(X_train)
#print(len(X_train[0]))
algo.fit(X_train, y_train) algo.fit(X_train, y_train)
...@@ -293,7 +290,6 @@ def computePredictionFile(classifier, images_test=None): ...@@ -293,7 +290,6 @@ def computePredictionFile(classifier, images_test=None):
predictFromHisto(S, classifier) predictFromHisto(S, classifier)
predictFromHisto(images_test, classifier) predictFromHisto(images_test, classifier)
#predictFromHisto(S, classifier)
images = S if images_test is None else images_test images = S if images_test is None else images_test
for image in images: for image in images:
image_name = os.path.split(image["name_path"])[1] image_name = os.path.split(image["name_path"])[1]
......
import numpy as np
from PIL import Image
import os
import TP
path = './Init/Mer'
images_path = os.listdir(path)
full_path = os.path.join(path, images_path[0])
image = Image.open(full_path)
image = TP.resizeImage(image.convert("RGB"), 224, 224)
image.show()
image.save("test_no_rotate.png")
image_r = image.rotate(90)
image_r.save("test_rotate.png")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment