from math import sqrt
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import validation_curve, learning_curve
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedKFold

import numpy as np
import matplotlib.pyplot as plt

def plot_vorhersage_verteilung(y_test, modell, color, name):
 
    fig, ax1 = plt.subplots(figsize=(6, 8))
    ax1.scatter(y_test, modell, color=color, alpha=0.5, label=name, s=150)
    ax1.plot([10, 1000], [10, 1000], 'r--')
    ax1.set_xlabel('Simulated Stress [MPa]', fontsize=21)
    ax1.set_ylabel('Predicted Stress [MPa]', fontsize=21)
 
    ax1.legend(fontsize=20)
    plt.show()


def vorhersage(X, y, a, b):
    picGBR = GradientBoostingRegressor()
    picRFR = RandomForestRegressor()

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=a, random_state=b)

  
    tree_model = DecisionTreeRegressor()
    tree_model2 = LinearRegression()
    tree_model3 = RandomForestRegressor()
    tree_model4 = GradientBoostingRegressor()
   
    tree_model.fit(X_train, y_train)
    tree_model2.fit(X_train, y_train)
    tree_model3.fit(X_train, y_train)
    tree_model4.fit(X_train, y_train)

   
    tree_y_pred = tree_model.predict(X_test)
    tree2_y_pred = tree_model2.predict(X_test)
    tree3_y_pred = tree_model3.predict(X_test)
    tree4_y_pred = tree_model4.predict(X_test)
   
   
    plot_vorhersage_verteilung(y_test, tree_y_pred, 'magenta', 'DTR')
    plot_vorhersage_verteilung(y_test, tree2_y_pred, 'blue', 'LR')
    plot_vorhersage_verteilung(y_test, tree3_y_pred, 'black', 'RFR')
    plot_vorhersage_verteilung(y_test, tree4_y_pred, 'green', 'GBR')

    new_data = [
        {"äußere Belastung": 50000, "Infillgeometrie": 0, "Wandstärke": 1, "Winkel": 0, "Prozent": 60},
        {"äußere Belastung": 10000, "Infillgeometrie": 1, "Wandstärke": 1, "Winkel": 0, "Prozent": 60},
        {"äußere Belastung": 30000, "Infillgeometrie": 0, "Wandstärke": 2, "Winkel": 90, "Prozent": 60},
        {"äußere Belastung": 10000, "Infillgeometrie": 1, "Wandstärke": 1, "Winkel": 0, "Prozent": 60},
        {"äußere Belastung": 20000, "Infillgeometrie": 0, "Wandstärke": 2, "Winkel": 60, "Prozent": 60},
        {"äußere Belastung": 50000, "Infillgeometrie": 1, "Wandstärke": 1, "Winkel": 0, "Prozent": 60},
        {"äußere Belastung": 10000, "Infillgeometrie": 0, "Wandstärke": 1, "Winkel": 0, "Prozent": 60},
        {"äußere Belastung": 30000, "Infillgeometrie": 1, "Wandstärke": 2, "Winkel": 90, "Prozent": 60},
        {"äußere Belastung": 40000, "Infillgeometrie": 0, "Wandstärke": 1, "Winkel": 30, "Prozent": 60},
        {"äußere Belastung": 60000, "Infillgeometrie": 1, "Wandstärke": 2, "Winkel": 90, "Prozent": 60},
        {"äußere Belastung": 50000, "Infillgeometrie": 0, "Wandstärke": 1, "Winkel": 0, "Prozent": 60},
        {"äußere Belastung": 70000, "Infillgeometrie": 1, "Wandstärke": 1, "Winkel": 0, "Prozent": 60},
        {"äußere Belastung": 30000, "Infillgeometrie": 0, "Wandstärke": 2, "Winkel": 90, "Prozent": 60},
        {"äußere Belastung": 80000, "Infillgeometrie": 1, "Wandstärke": 1, "Winkel": 0, "Prozent": 60},
        {"äußere Belastung": 90000, "Infillgeometrie": 1, "Wandstärke": 1, "Winkel": 0, "Prozent": 60},
        {"äußere Belastung": 100000, "Infillgeometrie": 1, "Wandstärke": 1, "Winkel": 0, "Prozent": 60}
    ]


    new_tree_pred = tree_model.predict(pd.DataFrame(new_data))
    print("New Data DTR Vorhersage:", new_tree_pred)
    new_tree2_pred = tree_model2.predict(pd.DataFrame(new_data))
    print("New Date LR Vorhersage:", new_tree2_pred)
    new_tree3_pred = tree_model3.predict(pd.DataFrame(new_data))
    print("New Date RFR Vorhersage:", new_tree3_pred)
    new_tree4_pred = tree_model4.predict(pd.DataFrame(new_data))
    print("New Datan GBR Vorhersage:", new_tree4_pred)


    belastung_test = X_test['äußere Belastung']
    spannung_test = y_test
    belastung_train = X_train['äußere Belastung']
    spannung_train = y_train
    belastung_new_data = pd.DataFrame(new_data)['äußere Belastung']
    spannung_new_data_DTR = new_tree_pred
    spannung_new_data_LR = new_tree2_pred
    spannung_new_data_RFR = new_tree3_pred
    spannung_new_data_GBR = new_tree4_pred


    plt.figure(figsize=(6,8))
    plt.scatter(belastung_train, spannung_train, color='red', alpha=0.5, label='Training Data', s=150)
    plt.scatter(belastung_test, spannung_test, color='blue', alpha=0.5, label='Test Data', s=150)
    plt.xlabel('External Load [N]', fontsize=21)
    plt.ylabel('Stress [MPa]', fontsize=21)
 
    plt.grid(True)
    plt.legend(fontsize=20)
    plt.show()
  
    plt.figure(figsize=(6, 8))
    plt.scatter(belastung_new_data, spannung_new_data_DTR, color='black', alpha=0.5, label='DTR', s=200)
    plt.scatter(belastung_new_data, spannung_new_data_LR, color='green', alpha=0.5, label='LR', s=200)
    plt.scatter(belastung_new_data, spannung_new_data_RFR, color='magenta', alpha=0.5, label='RFR', s=200)
    plt.scatter(belastung_new_data, spannung_new_data_GBR, color='yellow', alpha=0.5, label='GBR', s=200)
    plt.xlabel('External Load [N]', fontsize=21)
    plt.ylabel('Stress [MPa]', fontsize=21)
    plt.grid(True)
    plt.legend()
    plt.show()


    tree_mse = mean_squared_error(y_test, tree_y_pred)
    root_mean=sqrt(tree_mse)
    
    print("Decision Tree Regression RMSE:", root_mean)
    
    tree_mse2 = mean_squared_error(y_test, tree2_y_pred)
    root_mean1=sqrt(tree_mse2)
    
    print("Linear Regression RMSE:", root_mean1)
    tree_mse3 = mean_squared_error(y_test, tree3_y_pred)
    root_mean2=sqrt(tree_mse3)
    
    print("Random Forest Regressor RMSE:", root_mean2)
    tree_mse4 = mean_squared_error(y_test, tree4_y_pred)
   
    root_mean3=sqrt(tree_mse4)
    print("Gradient Boost Regressor MSE:", root_mean3)

    print('Decision Tree Regression R2-Wert:', r2_score(y_test, tree_y_pred))
    print('Linear Regression R2-Wert:', r2_score(y_test, tree2_y_pred))
    print('Random Forest Regressor R2-Wert:', r2_score(y_test, tree3_y_pred))
    print('Gradient Boosting Regressor R2-Wert:', r2_score(y_test, tree4_y_pred))

if __name__ == '__main__':
    
    data = pd.read_csv('Training1.csv', delimiter=';')
    
    data.head()
    X1 = data[['äußere Belastung', 'Infillgeometrie', 'Wandstärke', 'Winkel', 'Prozent']]
    y1 = data[['Spannung']]
    vorhersage(X1, y1, 0.2, 42)
    def ModellTraining(training_data, labels, model):
       
        training_data_scaled = np.ScalarType(X1)
        regressor = model.fit(training_data_scaled, labels)
        
        cv = RepeatedKFold(n_splits=10, n_repeats=5, random_state=1)  
        scores = cross_val_score(model, training_data_scaled, labels,
                         scoring="neg_root_mean_squared_error", cv=cv)
        cv_rmse = -scores.mean()
        cv_rmse_std = scores.std()
        return regressor, cv_rmse, cv_rmse_std
