# -*- coding: utf-8 -*- """Untitled11.ipynb Automatically generated by Colab. Original file is located at https://colab.research.google.com/drive/1k64KEyZu9c-acfC9LCUOxu3iLV_z0g1o """ # Install libraries !pip install pandas matplotlib seaborn # Import libraries import pandas as pd import matplotlib.pyplot as plt import seaborn as sns # Create data directly data = { 'Thickness (mm)': [1,1,1,1,1,1,1,1,1, 2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,3], 'Rotation Speed (Rpm)': [1000,1500,2000,1200,1500,2000,1000,1500,2000, 1200,1500,2000,1000,1500,2000,1000,1500,2000, 1000,1500,2000,1000,1500,2000,1200,1500,2000], 'Dwelling Time (s)': [3,3,3,6,6,6,9,9,9, 3,3,3,6,6,6,9,9,9, 3,3,3,6,6,6,9,9,9], 'Max Temperature (C)': [74,102,112,93,139,166,125,147,176, 85,115,128,96,133,165,132,153,179, 97,126,138,118,148,187,136,180,215], 'Lap-Shear fracture load (KN)': [1.23,1.42,1.61,1.33,1.52,1.73,1.55,2,2.2, 1.2,1.34,1.53,1.65,1.84,2.19,2.26,2.33,2.59, 1.51,1.82,2.11,1.84,2.14,2.35,2.13,2.36,2.54] } # Convert data to DataFrame df = pd.DataFrame(data) # Display first 5 rows print("First 5 rows of the data:") print(df.head()) # Data information print("\nData Information:") print(df.info()) # Statistical description print("\nStatistical Description:") print(df.describe()) # Plot distribution for each variable df.hist(bins=20, figsize=(15, 10), edgecolor='black') plt.suptitle('Distribution of Each Feature', fontsize=20) plt.show() # Plot correlation matrix plt.figure(figsize=(10, 8)) sns.heatmap(df.corr(), annot=True, cmap='coolwarm', fmt=".2f") plt.title('Correlation Heatmap', fontsize=18) plt.show() # Pairplot of variables sns.pairplot(df, diag_kind='kde') plt.suptitle('Pairplot of Features', fontsize=20) plt.show() # Install libraries !pip install pandas matplotlib seaborn scikit-learn # Import libraries import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from sklearn.linear_model import LinearRegression from sklearn.metrics import r2_score # Create data data = { 'Thickness (mm)': [1,1,1,1,1,1,1,1,1, 2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,3], 'Rotation Speed (Rpm)': [1000,1500,2000,1200,1500,2000,1000,1500,2000, 1200,1500,2000,1000,1500,2000,1000,1500,2000, 1000,1500,2000,1000,1500,2000,1200,1500,2000], 'Dwelling Time (s)': [3,3,3,6,6,6,9,9,9, 3,3,3,6,6,6,9,9,9, 3,3,3,6,6,6,9,9,9], 'Max Temperature (C)': [74,102,112,93,139,166,125,147,176, 85,115,128,96,133,165,132,153,179, 97,126,138,118,148,187,136,180,215], 'Lap-Shear fracture load (KN)': [1.23,1.42,1.61,1.33,1.52,1.73,1.55,2,2.2, 1.2,1.34,1.53,1.65,1.84,2.19,2.26,2.33,2.59, 1.51,1.82,2.11,1.84,2.14,2.35,2.13,2.36,2.54] } # Convert to DataFrame df = pd.DataFrame(data) # Function to compute and display linear regression def simple_linear_regression(x, y, xlabel, ylabel): # reshape data X = x.values.reshape(-1,1) Y = y.values.reshape(-1,1) # create model model = LinearRegression() model.fit(X, Y) # prediction Y_pred = model.predict(X) # extract coefficients slope = model.coef_[0][0] intercept = model.intercept_[0] r2 = r2_score(Y, Y_pred) # print equation print(f"\nLinear Regression Equation for {xlabel} vs {ylabel}:") print(f"{ylabel} = {slope:.4f} * {xlabel} + {intercept:.4f}") print(f"R^2: {r2:.4f}") # plot plt.figure(figsize=(8,5)) plt.scatter(X, Y, color='blue', label='Actual Data') plt.plot(X, Y_pred, color='red', linewidth=2, label='Regression Line') plt.xlabel(xlabel) plt.ylabel(ylabel) plt.title(f'{ylabel} vs {xlabel}') plt.legend() plt.grid(True) plt.show() # Compute regressions: simple_linear_regression(df['Rotation Speed (Rpm)'], df['Lap-Shear fracture load (KN)'], 'Rotation Speed (Rpm)', 'Lap-Shear fracture load (KN)') simple_linear_regression(df['Thickness (mm)'], df['Lap-Shear fracture load (KN)'], 'Thickness (mm)', 'Lap-Shear fracture load (KN)') simple_linear_regression(df['Dwelling Time (s)'], df['Lap-Shear fracture load (KN)'], 'Dwelling Time (s)', 'Lap-Shear fracture load (KN)') # Import additional libraries from sklearn.preprocessing import StandardScaler # Standardize data scaler = StandardScaler() X_scaled = scaler.fit_transform(X) # Linear regression on scaled data model_scaled = LinearRegression() model_scaled.fit(X_scaled, y) # Extract coefficients importance = model_scaled.coef_ # Create importance table feature_importance = pd.DataFrame({ 'Feature': ['Rotation Speed (Rpm)', 'Dwelling Time (s)', 'Thickness (mm)'], 'Importance': importance }) # Sort descending by absolute value feature_importance = feature_importance.sort_values(by='Importance', key=abs, ascending=False) print("Feature Importance (sorted):") print(feature_importance) # Plot plt.figure(figsize=(8,5)) sns.barplot(x='Importance', y='Feature', data=feature_importance, palette='viridis') plt.title('Feature Importance (Linear Regression) - Lap-Shear fracture load') plt.grid() plt.show() # Reload data data = { 'Thickness (mm)': [1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3], 'Rotation Speed (Rpm)': [1000,1500,2000,1200,1500,2000,1000,1500,2000, 1200,1500,2000,1000,1500,2000,1000,1500,2000, 1000,1500,2000,1000,1500,2000,1200,1500,2000], 'Dwelling Time (s)': [3,3,3,6,6,6,9,9,9, 3,3,3,6,6,6,9,9,9, 3,3,3,6,6,6,9,9,9], 'Max Temperature (C)': [74,102,112,93,139,166,125,147,176, 85,115,128,96,133,165,132,153,179, 97,126,138,118,148,187,136,180,215] } df = pd.DataFrame(data) # inputs and outputs X = df[['Rotation Speed (Rpm)', 'Dwelling Time (s)', 'Thickness (mm)']] y = df['Max Temperature (C)'] # Standardize data scaler = StandardScaler() X_scaled = scaler.fit_transform(X) # Linear regression model_scaled = LinearRegression() model_scaled.fit(X_scaled, y) importance = model_scaled.coef_ feature_importance = pd.DataFrame({ 'Feature': ['Rotation Speed (Rpm)', 'Dwelling Time (s)', 'Thickness (mm)'], 'Importance': importance }) feature_importance = feature_importance.sort_values(by='Importance', key=abs, ascending=False) print("Feature Importance (sorted) for Max Temperature:") print(feature_importance) plt.figure(figsize=(8,5)) sns.barplot(x='Importance', y='Feature', data=feature_importance, palette='plasma') plt.title('Feature Importance (Linear Regression) - Max Temperature') plt.grid() plt.show() # Linear regression equation for temperature model = LinearRegression() model.fit(X, y) y_pred = model.predict(X) intercept = model.intercept_ coefficients = model.coef_ r2 = r2_score(y, y_pred) print("Multiple Linear Regression Equation for Max Temperature:") print(f"Max Temperature (C) = {intercept:.4f} + ({coefficients[0]:.4f}) * Rotation Speed (Rpm) + ({coefficients[1]:.4f}) * Dwelling Time (s) + ({coefficients[2]:.4f}) * Thickness (mm)") print(f"\nR² = {r2:.4f}") plt.figure(figsize=(8,6)) plt.scatter(y, y_pred, color='blue', edgecolor='k') plt.plot([y.min(), y.max()], [y.min(), y.max()], '--r', linewidth=2) plt.xlabel('Actual Max Temperature (C)') plt.ylabel('Predicted Max Temperature (C)') plt.title('Actual vs Predicted Max Temperature') plt.grid(True) plt.show() # Machine learning model comparison import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score from sklearn.svm import SVR from sklearn.ensemble import RandomForestRegressor from sklearn.linear_model import HuberRegressor, BayesianRidge, ElasticNet from sklearn.preprocessing import PolynomialFeatures from sklearn.linear_model import LinearRegression from sklearn.pipeline import make_pipeline # Data data = { 'Thickness (mm)': [1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3], 'Rotation Speed (Rpm)': [1000,1500,2000,1200,1500,2000,1000,1500,2000, 1200,1500,2000,1000,1500,2000,1000,1500,2000, 1000,1500,2000,1000,1500,2000,1200,1500,2000], 'Dwelling Time (s)': [3,3,3,6,6,6,9,9,9, 3,3,3,6,6,6,9,9,9, 3,3,3,6,6,6,9,9,9], 'Max Temperature (C)': [74,102,112,93,139,166,125,147,176, 85,115,128,96,133,165,132,153,179, 97,126,138,118,148,187,136,180,215] } df = pd.DataFrame(data) X = df[['Rotation Speed (Rpm)', 'Dwelling Time (s)', 'Thickness (mm)']] y = df['Max Temperature (C)'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) models = { "Support Vector Regression": SVR(), "Random Forest Regression": RandomForestRegressor(random_state=42), "Robust Regression": HuberRegressor(), "Polynomial Regression (Non-Linear)": make_pipeline(PolynomialFeatures(degree=2), LinearRegression()), "Bayesian Ridge Regression": BayesianRidge(), "Elastic Net Regression": ElasticNet(random_state=42) } def evaluate_model(name, model, X_train, X_test, y_train, y_test): model.fit(X_train, y_train) y_train_pred = model.predict(X_train) y_test_pred = model.predict(X_test) results = { "Model": name, "Train MAE": mean_absolute_error(y_train, y_train_pred), "Test MAE": mean_absolute_error(y_test, y_test_pred), "Train MSE": mean_squared_error(y_train, y_train_pred), "Test MSE": mean_squared_error(y_test, y_test_pred), "Train RMSE": np.sqrt(mean_squared_error(y_train, y_train_pred)), "Test RMSE": np.sqrt(mean_squared_error(y_test, y_test_pred)), "Train R2": r2_score(y_train, y_train_pred), "Test R2": r2_score(y_test, y_test_pred) } return results results = [] for name, model in models.items(): res = evaluate_model(name, model, X_train, X_test, y_train, y_test) results.append(res) results_df = pd.DataFrame(results) print(results_df) results_df.set_index('Model')[['Test R2']].plot(kind='barh', figsize=(10,6), legend=True, title="Test R2 Score by Model") plt.grid(True) plt.show() metrics = ['Test MAE', 'Test MSE', 'Test RMSE', 'Test R2'] for metric in metrics: results_df.set_index('Model')[[metric]].plot(kind='barh', figsize=(10,6), legend=True) plt.title(f'{metric} Comparison Across Models') plt.xlabel(metric) plt.ylabel('Model') plt.grid(True) plt.show() metrics = ['Test MAE', 'Test MSE', 'Test RMSE', 'Test R2'] for metric in metrics: ax = results_df.set_index('Model')[[metric]].plot(kind='barh', figsize=(10,6), legend=True) plt.title(f'{metric} Comparison Across Models') plt.xlabel(metric) plt.ylabel('Model') plt.grid(True) for i, value in enumerate(results_df[metric]): ax.text(value + 0.01, i, f"{value:.3f}", va='center', fontsize=10) plt.show() metrics = ['Test MAE', 'Test MSE', 'Test RMSE', 'Test R2'] for metric in metrics: temp_df = results_df.copy() if metric == 'Test R2': temp_df[metric] = temp_df[metric] * 100 temp_df['Model_with_value'] = temp_df['Model'] + " (" + temp_df[metric].round(2).astype(str) + "%)" else: temp_df['Model_with_value'] = temp_df['Model'] + " (" + temp_df[metric].round(3).astype(str) + ")" ax = temp_df.set_index('Model_with_value')[[metric]].plot(kind='barh', figsize=(12,7), legend=True) plt.title(f'{metric} Comparison Across Models') plt.xlabel(metric) plt.ylabel('Model') plt.grid(True) plt.show()