#Importiere Pandas
import pandas as pd
data = pd.read_csv("year_genre_region.csv")
data
data.shape #aufbau der csv datei anzahle zeilen und reihen
data.index #index start ende stop herausfinden
data.info() #info der csv datei herausfinden
data['region'].value_counts()
data['genre'].value_counts()
df = pd.read_csv("year_genre_region.csv")
df
df_new = pd.get_dummies(df, columns=["region"], prefix="region")
df_new
df_new = pd.get_dummies(df_new, columns=["genre"], prefix="genre")
df_new
df_new
df_new.to_csv("filename_modified.csv", index=False)
df_new.info()
from sklearn.model_selection import train_test_split
import pandas as pd
# Load your data into a Pandas DataFrame
data = pd.read_csv('filename_modified.csv')
# Separate the features (X) and labels (y)
X = data.drop('sales', axis=1) # Assuming 'target_variable' is the name of your target column
y = data['sales']
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=38)
X
import numpy as np
# Assuming you have your training target values stored in y_train
# Calculate the mean value of the target variable in the training set
baseline_prediction = np.mean(y_train)
# Generate predictions using the baseline value for all instances in the testing set
baseline_predictions = np.full_like(y_test, fill_value=baseline_prediction)
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
# Assuming you have your feature data stored in X_train and target values in y_train
# Linear Regression
linear_reg = LinearRegression()
linear_reg.fit(X_train, y_train)
# Decision Tree Regressor
decision_tree = DecisionTreeRegressor()
decision_tree.fit(X_train, y_train)
# Random Forest Regressor
random_forest = RandomForestRegressor()
random_forest.fit(X_train, y_train)
# Gradient Boosting Regressor
gradient_boosting = GradientBoostingRegressor()
gradient_boosting.fit(X_train, y_train)
# Support Vector Regression (SVR)
svr = SVR()
svr.fit(X_train, y_train)
# Assuming you have your feature data stored in X_test
# Make predictions using the trained models
linear_reg_predictions = linear_reg.predict(X_test)
decision_tree_predictions = decision_tree.predict(X_test)
random_forest_predictions = random_forest.predict(X_test)
gradient_boosting_predictions = gradient_boosting.predict(X_test)
svr_predictions = svr.predict(X_test)
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
# Assuming you have the true target values for the test set stored in y_test
# Calculate evaluation metrics for each model
linear_reg_mse = mean_squared_error(y_test, linear_reg_predictions)
linear_reg_mae = mean_absolute_error(y_test, linear_reg_predictions)
linear_reg_r2 = r2_score(y_test, linear_reg_predictions)
decision_tree_mse = mean_squared_error(y_test, decision_tree_predictions)
decision_tree_mae = mean_absolute_error(y_test, decision_tree_predictions)
decision_tree_r2 = r2_score(y_test, decision_tree_predictions)
random_forest_mse = mean_squared_error(y_test, random_forest_predictions)
random_forest_mae = mean_absolute_error(y_test, random_forest_predictions)
random_forest_r2 = r2_score(y_test, random_forest_predictions)
gradient_boosting_mse = mean_squared_error(y_test, gradient_boosting_predictions)
gradient_boosting_mae = mean_absolute_error(y_test, gradient_boosting_predictions)
gradient_boosting_r2 = r2_score(y_test, gradient_boosting_predictions)
svr_mse = mean_squared_error(y_test, svr_predictions)
svr_mae = mean_absolute_error(y_test, svr_predictions)
svr_r2 = r2_score(y_test, svr_predictions)
# Print evaluation results for each model
print("Linear Regression:")
print(" MSE:", linear_reg_mse)
print(" MAE:", linear_reg_mae)
print(" R-squared:", linear_reg_r2)
print()
print("Decision Tree Regressor:")
print(" MSE:", decision_tree_mse)
print(" MAE:", decision_tree_mae)
print(" R-squared:", decision_tree_r2)
print()
print("Random Forest Regressor:")
print(" MSE:", random_forest_mse)
print(" MAE:", random_forest_mae)
print(" R-squared:", random_forest_r2)
print()
print("Gradient Boosting Regressor:")
print(" MSE:", gradient_boosting_mse)
print(" MAE:", gradient_boosting_mae)
print(" R-squared:", gradient_boosting_r2)
print()
print("SVR:")
print(" MSE:", svr_mse)
print(" MAE:", svr_mae)
print(" R-squared:", svr_r2)
print()
import matplotlib.pyplot as plt
# Assuming you have the true target values for the test set stored in y_test
# Create a scatter plot to compare predicted vs. true values
plt.scatter(y_test, linear_reg_predictions, color='blue', label='Linear Regression')
plt.scatter(y_test, decision_tree_predictions, color='red', label='Decision Tree')
plt.scatter(y_test, random_forest_predictions, color='green', label='Random Forest')
plt.scatter(y_test, gradient_boosting_predictions, color='orange', label='Gradient Boosting')
plt.scatter(y_test, svr_predictions, color='purple', label='SVR')
# Add labels and a legend to the plot
plt.xlabel('True Values')
plt.ylabel('Predicted Values')
plt.legend()
# Show the plot
plt.show()
import matplotlib.pyplot as plt
# Assuming you have the true target values for the test set stored in y_test
# Assuming you have the predicted results for each model stored in separate variables
# Create a scatter plot for Linear Regression predictions
plt.scatter(y_test, linear_reg_predictions, color='blue')
plt.xlabel('True Values')
plt.ylabel('Linear Regression Predictions')
plt.title('Linear Regression Predictions vs. True Values')
plt.show()
# Create a scatter plot for Decision Tree Regressor predictions
plt.scatter(y_test, decision_tree_predictions, color='red')
plt.xlabel('True Values')
plt.ylabel('Decision Tree Predictions')
plt.title('Decision Tree Predictions vs. True Values')
plt.show()
# Create a scatter plot for Random Forest Regressor predictions
plt.scatter(y_test, random_forest_predictions, color='green')
plt.xlabel('True Values')
plt.ylabel('Random Forest Predictions')
plt.title('Random Forest Predictions vs. True Values')
plt.show()
# Create a scatter plot for Gradient Boosting Regressor predictions
plt.scatter(y_test, gradient_boosting_predictions, color='orange')
plt.xlabel('True Values')
plt.ylabel('Gradient Boosting Predictions')
plt.title('Gradient Boosting Predictions vs. True Values')
plt.show()
# Create a scatter plot for SVR predictions
plt.scatter(y_test, svr_predictions, color='purple')
plt.xlabel('True Values')
plt.ylabel('SVR Predictions')
plt.title('SVR Predictions vs. True Values')
plt.show()