import os
import pandas as pd
os.chdir('C:/Users/Alejandro/Documents/')
df = pd.read_csv('jobsat3881.csv')
df.info()
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
Almacene el objeto HeatMap en una variable para acceder fácilmente cuando desee incluir más funciones (por ejemplo el título). Establezca el rango de valores que se mostrarán en el gráfico de -1 a 1, y establezca la anotación en verdadero para mostrar los valores de correlación en el mapa de calor.
plt.figure(figsize=(16, 6))
heatmap = sns.heatmap(df.corr(), vmin=-1, vmax=1, annot=False, cmap='coolwarm')
heatmap.set_title('Correlation Heatmap', fontdict={'fontsize':12}, pad=12)
plt.savefig('Correlation_matrix.png', dpi=300, bbox_inches='tight')
X = df.drop('Satisfaction',axis=1)
y = df['Satisfaction']
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30,
shuffle=False,
random_state = 1234)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
from sklearn.linear_model import Ridge
model = Ridge()
model.fit(X_train, y_train)
from sklearn.linear_model import RidgeCV
alphas = [0.1, 1.0, 10.0, 100.0]
ridge_cv_model = RidgeCV(alphas=alphas, store_cv_results=True)
ridge_cv_model.fit(X_train, y_train)
print(f"Optimal lambda: {ridge_cv_model.alpha_}")
y_pred = model.predict(X_test)
from sklearn.metrics import root_mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error, r2_score, explained_variance_score, mean_absolute_error
mape = mean_absolute_percentage_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = root_mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
explained_var = explained_variance_score(y_test, y_pred)
print("MAPE, mean absolute percentage error:", mape)
print("MSE, Mean squared error:", mse)
print("RMSE, Root mean squared error:", rmse)
print("MAE, Mean absolute error:", mae)
print("R2, R-squared:", r2)
print("Explained variance:", explained_var)
MAPE, mean absolute percentage error: 0.3386349991569121
MSE, Mean squared error 0.9269658387736589
RMSE, Root mean squared error: 0.9627906515819827
MAE, Mean absolute error 0.687484673325361
R2, R-squared: 0.6014297918574099
Explained variance: 0.6044428373606303
import matplotlib as plt
import yellowbrick
from yellowbrick.regressor import PredictionError
visualizer = PredictionError(model)
visualizer.fit(X_train, y_train) # Ajustar los datos de entrenamiento al visualizador
visualizer.score(X_test, y_test) # Evaluar el modelo en los datos de testeo
visualizer.show() # Finalizar y renderizar la figura
from yellowbrick.regressor import ResidualsPlot
visualizer = ResidualsPlot(model)
visualizer.fit(X_train, y_train) # Ajustar los datos de entrenamiento al visualizador
visualizer.score(X_test, y_test) # Evaluar el modelo en los datos de testeo
visualizer.show() # Finalizar y renderizar la figura