IE6400_Quiz18_Day 21

pdf

School

Northeastern University *

*We aren’t endorsed by this school

Course

6400

Subject

Industrial Engineering

Date

Feb 20, 2024

Type

pdf

Pages

1

Uploaded by ColonelStraw13148

Report
import pandas as pd import numpy as np from datetime import datetime , timedelta import matplotlib.pyplot as plt # Define the parameters for the synthetic data start_date = datetime ( 2012 , 1 , 1 ) end_date = datetime ( 2022 , 12 , 31 ) # Updated end date to December 2022 num_days = ( end_date - start_date ) . days + 1 base_sales = 10000 # Average daily sales sales_std_dev = 1000 # Standard deviation for sales variation # Generate synthetic daily sales data dates = [ start_date + timedelta ( days = x ) for x in range ( num_days )] sales = np . random . normal ( base_sales , sales_std_dev , num_days ) . round ( 2 ) sales = np . where ( sales < 0 , 0 , sales ) # Ensure sales are non-negative # Create a DataFrame for the daily sales data sales_data = pd . DataFrame ({ 'Date' : dates , 'Total Revenue' : sales }) # Plotting the time series plt . figure ( figsize = ( 12 , 6 )) plt . plot ( sales_data [ 'Date' ], sales_data [ 'Total Revenue' ], label = 'Total Revenue' ) plt . title ( 'Time Series Plot of Daily Sales Data' ) plt . xlabel ( 'Date' ) plt . ylabel ( 'Total Revenue' ) plt . grid ( True ) plt . legend () plt . show () sales_data . head () Date Total Revenue 0 2012-01-01 10805.14 1 2012-01-02 9779.27 2 2012-01-03 10554.37 3 2012-01-04 9587.57 4 2012-01-05 11120.40 Data Transformation # Convert the 'Date' column to datetime format sales_data [ 'Date' ] = pd . to_datetime ( sales_data [ 'Date' ]) # Extract year and month from the 'Date' column sales_data [ 'Year' ] = sales_data [ 'Date' ] . dt . year sales_data [ 'Month' ] = sales_data [ 'Date' ] . dt . month # Aggregate daily sales to monthly sales monthly_sales = sales_data . groupby ([ 'Year' , 'Month' ], as_index = False )[ 'Total Revenue' ] . sum () # Feature engineering: add a column for the month number (1 to 120) monthly_sales [ 'MonthNumber' ] = ( monthly_sales [ 'Year' ] - 2012 ) * 12 + monthly_sales [ 'Month' ] monthly_sales Year Month Total Revenue MonthNumber 0 2012 1 317708.47 1 1 2012 2 286907.41 2 2 2012 3 303435.68 3 3 2012 4 305924.18 4 4 2012 5 311490.48 5 ... ... ... ... ... 127 2022 8 310212.28 128 128 2022 9 295225.62 129 129 2022 10 309116.39 130 130 2022 11 293850.55 131 131 2022 12 315075.06 132 132 rows × 4 columns Model Development and Comparison: Machine Learning Mode # import nessary library from sklearn.model_selection import train_test_split from sklearn.neural_network import MLPRegressor from sklearn.ensemble import RandomForestRegressor from sklearn.tree import DecisionTreeRegressor from sklearn.metrics import mean_squared_error , mean_absolute_error from statsmodels.tsa.arima.model import ARIMA import warnings # Train-test split train , test = train_test_split ( monthly_sales , test_size = 0.2 , shuffle = False ) # Neural Network model nn_model = MLPRegressor ( hidden_layer_sizes = ( 100 ,), max_iter = 1000 , random_state = 42 ) nn_model . fit ( train [[ 'MonthNumber' ]], train [ 'Total Revenue' ]) nn_predictions = nn_model . predict ( test [[ 'MonthNumber' ]]) nn_rmse = np . sqrt ( mean_squared_error ( test [ 'Total Revenue' ], nn_predictions )) nn_mae = mean_absolute_error ( test [ 'Total Revenue' ], nn_predictions ) # Random Forest model rf_model = RandomForestRegressor ( n_estimators = 100 , random_state = 42 ) rf_model . fit ( train [[ 'MonthNumber' ]], train [ 'Total Revenue' ]) rf_predictions = rf_model . predict ( test [[ 'MonthNumber' ]]) rf_rmse = np . sqrt ( mean_squared_error ( test [ 'Total Revenue' ], rf_predictions )) rf_mae = mean_absolute_error ( test [ 'Total Revenue' ], rf_predictions ) # Decision Tree model dt_model = DecisionTreeRegressor ( random_state = 42 ) dt_model . fit ( train [[ 'MonthNumber' ]], train [ 'Total Revenue' ]) dt_predictions = dt_model . predict ( test [[ 'MonthNumber' ]]) dt_rmse = np . sqrt ( mean_squared_error ( test [ 'Total Revenue' ], dt_predictions )) dt_mae = mean_absolute_error ( test [ 'Total Revenue' ], dt_predictions ) # ARIMA model arima_model = ARIMA ( monthly_sales [ 'Total Revenue' ], order = ( 5 , 1 , 0 )) arima_results = arima_model . fit () arima_predictions = arima_results . predict ( start = len ( train ), end = len ( monthly_sales ) - 1 , typ = 'levels' ) arima_rmse = np . sqrt ( mean_squared_error ( test [ 'Total Revenue' ], arima_predictions )) arima_mae = mean_absolute_error ( test [ 'Total Revenue' ], arima_predictions ) /Users/skyleraliya/opt/anaconda3/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stochastic Optimizer: Maximum ite rations (1000) reached and the optimization hasn't converged yet. warnings.warn( # Plotting Monthly Total Revenue Prediction for Next 6 Months plt . figure ( figsize = ( 12 , 6 )) # Plot historical data plt . plot ( monthly_sales [ 'MonthNumber' ], monthly_sales [ 'Total Revenue' ], marker = 'o' , linestyle = '-' , color = 'green' , label = 'Historical Data' , alpha = 0.7 ) # Plot predictions for the next 6 months future_months = range ( max ( monthly_sales [ 'MonthNumber' ]) + 1 , max ( monthly_sales [ 'MonthNumber' ]) + 7 ) future_months_data = pd . DataFrame ({ 'MonthNumber' : future_months }) # Neural Network predictions nn_future_predictions = nn_model . predict ( future_months_data [[ 'MonthNumber' ]]) plt . plot ( future_months , nn_future_predictions , label = 'Neural Network Predictions' ) # Random Forest predictions rf_future_predictions = rf_model . predict ( future_months_data [[ 'MonthNumber' ]]) plt . plot ( future_months , rf_future_predictions , label = 'Random Forest Predictions' ) # Decision Tree predictions dt_future_predictions = dt_model . predict ( future_months_data [[ 'MonthNumber' ]]) plt . plot ( future_months , dt_future_predictions , label = 'Decision Tree Predictions' ) # ARIMA predictions arima_future_predictions = arima_results . predict ( start = len ( monthly_sales ), end = len ( monthly_sales ) + 5 , typ = 'levels' ) plt . plot ( future_months , arima_future_predictions , label = 'ARIMA Predictions' ) plt . title ( 'Monthly Total Revenue Prediction for Next 6 Months (Including Historical)' ) plt . xlabel ( 'Month Number' ) plt . ylabel ( 'Total Revenue' ) plt . grid ( True ) plt . legend () plt . show () /Users/skyleraliya/opt/anaconda3/lib/python3.9/site-packages/statsmodels/tsa/statespace/representation.py:374: FutureWarning: Unknown keyword arguments: dict_keys(['ty p']).Passing unknown keyword arguments will raise a TypeError beginning in version 0.15. warnings.warn(msg, FutureWarning) Comparison with ARIMA # Print RMSE and MAE for each model model_performance = pd . DataFrame ({ 'Model' : [ 'Neural Network' , 'Random Forest' , 'Decision Tree' , 'ARIMA' ], 'RMSE' : [ nn_rmse , rf_rmse , dt_rmse , arima_rmse ], 'MAE' : [ nn_mae , rf_mae , dt_mae , arima_mae ] }) # Sort the DataFrame based on RMSE in descending order model_performance = model_performance . sort_values ( by = 'RMSE' , ascending = False ) # Format values to keep two decimal points model_performance [ 'RMSE' ] = model_performance [ 'RMSE' ] . apply ( lambda x : round ( x , 2 )) model_performance [ 'MAE' ] = model_performance [ 'MAE' ] . apply ( lambda x : round ( x , 2 )) print ( 'Model Performance Comparison' ) print ( model_performance ) Model Performance Comparison Model RMSE MAE 0 Neural Network 288687.98 288525.02 3 ARIMA 13564.39 10798.53 2 Decision Tree 12825.98 10978.91 1 Random Forest 10165.63 8094.33 # Determine the best model based on lowest RMSE and MAE best_rmse_model = model_performance . iloc [ - 1 ][ 'Model' ] best_mae_model = model_performance . iloc [ - 1 ][ 'Model' ] # Print recommendations print ( f'Recommended Best Model:' ) print ( f'Based on RMSE: { best_rmse_model }' ) print ( f'Based on MAE: { best_mae_model }' ) Recommended Best Model: Based on RMSE: Random Forest Based on MAE: Random Forest # Monthly Total Revenue Prediction Comparison recent_months = monthly_sales . tail ( 12 ) # Consider the last 12 months for comparison plt . figure ( figsize = ( 12 , 6 )) # Plot actual revenue plt . plot ( recent_months [ 'MonthNumber' ], recent_months [ 'Total Revenue' ], marker = 'o' , linestyle = '-' , label = 'Actual Revenue' ) # Plot predictions by different models plt . plot ( recent_months [ 'MonthNumber' ], nn_model . predict ( recent_months [[ 'MonthNumber' ]]), label = 'Neural Network Predictions' ) plt . plot ( recent_months [ 'MonthNumber' ], rf_model . predict ( recent_months [[ 'MonthNumber' ]]), label = 'Random Forest Predictions' ) plt . plot ( recent_months [ 'MonthNumber' ], dt_model . predict ( recent_months [[ 'MonthNumber' ]]), label = 'Decision Tree Predictions' ) plt . plot ( recent_months [ 'MonthNumber' ], arima_results . predict ( start = len ( monthly_sales ) - 12 , end = len ( monthly_sales ) - 1 , typ = 'levels' ), label = 'ARIMA Predictions' ) plt . title ( 'Monthly Total Revenue Prediction Comparison' ) plt . xlabel ( 'Month Number' ) plt . ylabel ( 'Total Revenue' ) plt . grid ( True ) plt . legend () plt . show () In [1]: 14000 13000 - 12000 Total Revenue Time Series Plot of Daily Sales Data Total Revenue 11000 10000 9000 8000 7000 - 2012 2014 2016 2018 Date 2020 2022 Out[1]: In [2]: Out[2]: In [3]: In [4]: In [5]: In [6]: In [7]: Monthly Total Revenue Prediction Comparison 300000 - 250000 - Actual Revenue Neural Network Predictions Random Forest Predictions - Decision Tree Predictions - ARIMA Predictions 150000 100000 - 50000 + 122 124 126 Month Number 128 130 132 In [ ]:
Discover more documents: Sign up today!
Unlock a world of knowledge! Explore tailored content for a richer learning experience. Here's what you'll get:
  • Access to all documents
  • Unlimited textbook solutions
  • 24/7 expert homework help