TP Régression linéaire#

try:
    import sklearn, numpy, matplotlib, ipywidgets, IPython 
except ModuleNotFoundError: 
    !pip3 install --quiet sklearn numpy matplotlib ipywidgets IPython
import numpy as np
import matplotlib.pyplot as plt

from ipywidgets import interact, interactive, IntSlider, Layout, interact_manual
import ipywidgets as widgets
from IPython.display import display

from sklearn.linear_model import LinearRegression,Lasso,Ridge
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

Régression linéaire : premier exemple#

On s’intéresse ici à la régression d’une droite sur un nuage de points, bruité par un bruit normal \(\mathcal{N}(m,\sigma^2)\), dont l’amplitude peut varier

nb_points=50
# Paramètres de la "vraie droite" y=b0+b1.x
b0,b1 = 0.4,2.5

# Intervalle 
x1= np.linspace(0,10,10*nb_points)
x= np.random.choice(x1,size=nb_points)



def F(test_size,amp_bruit,var_bruit,moyenne_bruit):
    y=b0+b1*x+amp_bruit*np.random.normal(loc=moyenne_bruit,scale=var_bruit,size=nb_points)
    y_true=b0+b1*x


    X_train, X_test, y_train, y_test = train_test_split(x,y,test_size=test_size,random_state=42)
    X_train=X_train.reshape(-1,1)
    X_test=X_test.reshape(-1,1)
    xmean = np.mean(X_train)
    ymean=np.mean(y_train)    

    
    lr = LinearRegression()
    lr.fit(X_train,y_train) 
    beta0,beta1=lr.intercept_,lr.coef_[0]

    train_pred = np.array(lr.predict(X_train))
    test_pred = np.array(lr.predict(X_test))
    train_score = lr.score(X_train,y_train)
    test_score = lr.score(X_test,y_test)
    RMSE_train=np.sqrt(np.mean(np.square(train_pred-y_train)))
    RMSE_test=np.sqrt(np.mean(np.square(test_pred-y_test)))
    
    plt.figure(figsize=(12,6))
    plt.title("Score Entrainement : {0:3.4f}, Test : {1:3.4f} --  RMSE Entrainement : {2:3.4f}, Test : {3:3.4f}".format(train_score,test_score,RMSE_train,RMSE_test),fontsize=16)
    plt.xlabel("x")
    plt.ylabel("y")
    plt.plot(X_train,train_pred,'r', label='Régression')
    plt.plot(x,y_true,c='black', label='Vraie droite')
    
    plt.scatter(X_train,y_train,c='b',label='Entrainement')
    plt.scatter(X_test,y_test,edgecolors='k',marker='x',c='g',s=100,label='test')
    plt.scatter(X_test,test_pred,edgecolors='k',marker='x',c='magenta',s=100,label='prédit')
    
    plt.text(6, 4, "Droite : $y = {0:3.4f}+{1:3.4f}x$".format(b0,b1),fontsize=16)
    plt.text(6, 2, "Régression : $y = {0:3.4f}+{1:3.4f}x$".format(beta0,beta1),fontsize=16)
    plt.text(6, 0, "$R={:3.4f}$".format(np.corrcoef(x,y)[0,1]),fontsize=16)

    plt.text(6, 6, "Centre de masse $X=({0:3.3f},{0:3.3f})^T$".format(xmean,ymean),fontsize=16)

    plt.text(xmean,ymean,"X",fontsize=20,label='Centre de masse')    


    plt.grid(True)
    plt.legend(loc='best')
    
    plt.tight_layout()
    plt.savefig('regression.png',dpi=100)
m = interactive(F,amp_bruit=(0,5,1),var_bruit=(0,2,0.1),moyenne_bruit=(-3,3,0.5),
               test_size=widgets.RadioButtons(options={"10%":0.1,"30%":0.3,"50%":0.5},description="Test"),disabled=False,continuous_update=False)
display(m)

Linéaire ne veut pas dire droite…#

nb_points=50
x1= np.linspace(-5,5,10*nb_points)
x= np.random.choice(x1,size=nb_points)
def func_fit(model_type,test_size,degree,amp_bruit,var_bruit,moyenne_bruit):
    y=2*x-0.6*x**2+0.2*x**3+18*np.sin(x)
    y1=2*x1-0.6*x1**2+0.2*x1**3+18*np.sin(x1)
    y= y+amp_bruit*np.random.normal(loc=moyenne_bruit,scale=var_bruit,size=nb_points)
    
    X_train, X_test, y_train, y_test = train_test_split(x,y,test_size=test_size,random_state=55)
    
    if (model_type=='Linéaire'):
        model = make_pipeline(StandardScaler(),PolynomialFeatures(degree,interaction_only=False),LinearRegression())
    if (model_type=='LASSO'):    
        model = make_pipeline(StandardScaler(),PolynomialFeatures(degree,interaction_only=False),Lasso())
        
    if (model_type=='Ridge'):    
        model = make_pipeline(StandardScaler(),PolynomialFeatures(degree,interaction_only=False),Ridge())
    
    X_train=X_train.reshape(-1,1)
    X_test=X_test.reshape(-1,1)
    
    model.fit(X_train,y_train)
    
    train_pred = np.array(model.predict(X_train))
    train_score = model.score(X_train,y_train)

    test_pred = np.array(model.predict(X_test))
    test_score = model.score(X_test,y_test)
    
    RMSE_train=np.sqrt(np.mean(np.square(train_pred-y_train)))
    RMSE_test=np.sqrt(np.mean(np.square(test_pred-y_test)))
       
    plt.figure(figsize=(14,6))
    plt.title("Score Entrainement : {0:3.4f}, Test : {1:3.4f} --  RMSE Entrainement : {2:3.4f}, Test : {3:3.4f}".format(train_score,test_score,RMSE_train,RMSE_test),fontsize=16)
    plt.xlabel("x")
    plt.ylabel("y")
    plt.scatter(X_train,y_train,c='blue',label='Entraînement')
    plt.scatter(X_test,y_test,marker='x',c='g',s=100,label='test')
    plt.scatter(X_test,test_pred,marker='x',c='magenta',s=100,label='predit')
    plt.plot(x1,y1,c='k',lw=2,label='Vraie courbe')
    y2 = model.predict(x1.reshape(-1,1))
    plt.plot(x1,y2,c='r',lw=2,label='Courbe prédite')

    plt.grid(True)
    plt.legend(loc='best')
    
    plt.tight_layout()
       
    return (train_score,test_score)    
m = interactive(func_fit,model_type=widgets.RadioButtons(options=['Linéaire','LASSO', 'Ridge'],description = "Choose Model",layout=Layout(width='250px')),
                test_size=widgets.RadioButtons(options={"10%":0.1,"30% ":0.3,"50%":0.5},description="Test"),
               degree=widgets.IntSlider(min=1,max=30,step=1,description= 'Degré',continuous_update=False),amp_bruit=(0,5,1),var_bruit=(0,2,0.1),moyenne_bruit=(-3,3,0.5))
display(m)