IE6400_Quiz18_Day 21

pdf

School

Northeastern University *

*We aren’t endorsed by this school

Course

6400

Subject

Industrial Engineering

Date

Feb 20, 2024

Type

pdf

Pages

Uploaded by ColonelStraw13148

import pandas as pd import numpy as np from datetime import datetime , timedelta import matplotlib.pyplot as plt # Define the parameters for the synthetic data start_date = datetime ( 2012 , 1 , 1 ) end_date = datetime ( 2022 , 12 , 31 ) # Updated end date to December 2022 num_days = ( end_date - start_date ) . days + 1 base_sales = 10000 # Average daily sales sales_std_dev = 1000 # Standard deviation for sales variation # Generate synthetic daily sales data dates = [ start_date + timedelta ( days = x ) for x in range ( num_days )] sales = np . random . normal ( base_sales , sales_std_dev , num_days ) . round ( 2 ) sales = np . where ( sales < 0 , 0 , sales ) # Ensure sales are non-negative # Create a DataFrame for the daily sales data sales_data = pd . DataFrame ({ 'Date' : dates , 'Total Revenue' : sales }) # Plotting the time series plt . figure ( figsize = ( 12 , 6 )) plt . plot ( sales_data [ 'Date' ], sales_data [ 'Total Revenue' ], label = 'Total Revenue' ) plt . title ( 'Time Series Plot of Daily Sales Data' ) plt . xlabel ( 'Date' ) plt . ylabel ( 'Total Revenue' ) plt . grid ( True ) plt . legend () plt . show () sales_data . head () Date Total Revenue 0 2012-01-01 10805.14 1 2012-01-02 9779.27 2 2012-01-03 10554.37 3 2012-01-04 9587.57 4 2012-01-05 11120.40 Data Transformation # Convert the 'Date' column to datetime format sales_data [ 'Date' ] = pd . to_datetime ( sales_data [ 'Date' ]) # Extract year and month from the 'Date' column sales_data [ 'Year' ] = sales_data [ 'Date' ] . dt . year sales_data [ 'Month' ] = sales_data [ 'Date' ] . dt . month # Aggregate daily sales to monthly sales monthly_sales = sales_data . groupby ([ 'Year' , 'Month' ], as_index = False )[ 'Total Revenue' ] . sum () # Feature engineering: add a column for the month number (1 to 120) monthly_sales [ 'MonthNumber' ] = ( monthly_sales [ 'Year' ] - 2012 ) * 12 + monthly_sales [ 'Month' ] monthly_sales Year Month Total Revenue MonthNumber 0 2012 1 317708.47 1 1 2012 2 286907.41 2 2 2012 3 303435.68 3 3 2012 4 305924.18 4 4 2012 5 311490.48 5 ... ... ... ... ... 127 2022 8 310212.28 128 128 2022 9 295225.62 129 129 2022 10 309116.39 130 130 2022 11 293850.55 131 131 2022 12 315075.06 132 132 rows × 4 columns Model Development and Comparison: Machine Learning Mode # import nessary library from sklearn.model_selection import train_test_split from sklearn.neural_network import MLPRegressor from sklearn.ensemble import RandomForestRegressor from sklearn.tree import DecisionTreeRegressor from sklearn.metrics import mean_squared_error , mean_absolute_error from statsmodels.tsa.arima.model import ARIMA import warnings # Train-test split train , test = train_test_split ( monthly_sales , test_size = 0.2 , shuffle = False ) # Neural Network model nn_model = MLPRegressor ( hidden_layer_sizes = ( 100 ,), max_iter = 1000 , random_state = 42 ) nn_model . fit ( train [[ 'MonthNumber' ]], train [ 'Total Revenue' ]) nn_predictions = nn_model . predict ( test [[ 'MonthNumber' ]]) nn_rmse = np . sqrt ( mean_squared_error ( test [ 'Total Revenue' ], nn_predictions )) nn_mae = mean_absolute_error ( test [ 'Total Revenue' ], nn_predictions ) # Random Forest model rf_model = RandomForestRegressor ( n_estimators = 100 , random_state = 42 ) rf_model . fit ( train [[ 'MonthNumber' ]], train [ 'Total Revenue' ]) rf_predictions = rf_model . predict ( test [[ 'MonthNumber' ]]) rf_rmse = np . sqrt ( mean_squared_error ( test [ 'Total Revenue' ], rf_predictions )) rf_mae = mean_absolute_error ( test [ 'Total Revenue' ], rf_predictions ) # Decision Tree model dt_model = DecisionTreeRegressor ( random_state = 42 ) dt_model . fit ( train [[ 'MonthNumber' ]], train [ 'Total Revenue' ]) dt_predictions = dt_model . predict ( test [[ 'MonthNumber' ]]) dt_rmse = np . sqrt ( mean_squared_error ( test [ 'Total Revenue' ], dt_predictions )) dt_mae = mean_absolute_error ( test [ 'Total Revenue' ], dt_predictions ) # ARIMA model arima_model = ARIMA ( monthly_sales [ 'Total Revenue' ], order = ( 5 , 1 , 0 )) arima_results = arima_model . fit () arima_predictions = arima_results . predict ( start = len ( train ), end = len ( monthly_sales ) - 1 , typ = 'levels' ) arima_rmse = np . sqrt ( mean_squared_error ( test [ 'Total Revenue' ], arima_predictions )) arima_mae = mean_absolute_error ( test [ 'Total Revenue' ], arima_predictions ) /Users/skyleraliya/opt/anaconda3/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stochastic Optimizer: Maximum ite rations (1000) reached and the optimization hasn't converged yet. warnings.warn( # Plotting Monthly Total Revenue Prediction for Next 6 Months plt . figure ( figsize = ( 12 , 6 )) # Plot historical data plt . plot ( monthly_sales [ 'MonthNumber' ], monthly_sales [ 'Total Revenue' ], marker = 'o' , linestyle = '-' , color = 'green' , label = 'Historical Data' , alpha = 0.7 ) # Plot predictions for the next 6 months future_months = range ( max ( monthly_sales [ 'MonthNumber' ]) + 1 , max ( monthly_sales [ 'MonthNumber' ]) + 7 ) future_months_data = pd . DataFrame ({ 'MonthNumber' : future_months }) # Neural Network predictions nn_future_predictions = nn_model . predict ( future_months_data [[ 'MonthNumber' ]]) plt . plot ( future_months , nn_future_predictions , label = 'Neural Network Predictions' ) # Random Forest predictions rf_future_predictions = rf_model . predict ( future_months_data [[ 'MonthNumber' ]]) plt . plot ( future_months , rf_future_predictions , label = 'Random Forest Predictions' ) # Decision Tree predictions dt_future_predictions = dt_model . predict ( future_months_data [[ 'MonthNumber' ]]) plt . plot ( future_months , dt_future_predictions , label = 'Decision Tree Predictions' ) # ARIMA predictions arima_future_predictions = arima_results . predict ( start = len ( monthly_sales ), end = len ( monthly_sales ) + 5 , typ = 'levels' ) plt . plot ( future_months , arima_future_predictions , label = 'ARIMA Predictions' ) plt . title ( 'Monthly Total Revenue Prediction for Next 6 Months (Including Historical)' ) plt . xlabel ( 'Month Number' ) plt . ylabel ( 'Total Revenue' ) plt . grid ( True ) plt . legend () plt . show () /Users/skyleraliya/opt/anaconda3/lib/python3.9/site-packages/statsmodels/tsa/statespace/representation.py:374: FutureWarning: Unknown keyword arguments: dict_keys(['ty p']).Passing unknown keyword arguments will raise a TypeError beginning in version 0.15. warnings.warn(msg, FutureWarning) Comparison with ARIMA # Print RMSE and MAE for each model model_performance = pd . DataFrame ({ 'Model' : [ 'Neural Network' , 'Random Forest' , 'Decision Tree' , 'ARIMA' ], 'RMSE' : [ nn_rmse , rf_rmse , dt_rmse , arima_rmse ], 'MAE' : [ nn_mae , rf_mae , dt_mae , arima_mae ] }) # Sort the DataFrame based on RMSE in descending order model_performance = model_performance . sort_values ( by = 'RMSE' , ascending = False ) # Format values to keep two decimal points model_performance [ 'RMSE' ] = model_performance [ 'RMSE' ] . apply ( lambda x : round ( x , 2 )) model_performance [ 'MAE' ] = model_performance [ 'MAE' ] . apply ( lambda x : round ( x , 2 )) print ( 'Model Performance Comparison' ) print ( model_performance ) Model Performance Comparison Model RMSE MAE 0 Neural Network 288687.98 288525.02 3 ARIMA 13564.39 10798.53 2 Decision Tree 12825.98 10978.91 1 Random Forest 10165.63 8094.33 # Determine the best model based on lowest RMSE and MAE best_rmse_model = model_performance . iloc [ - 1 ][ 'Model' ] best_mae_model = model_performance . iloc [ - 1 ][ 'Model' ] # Print recommendations print ( f'Recommended Best Model:' ) print ( f'Based on RMSE: { best_rmse_model }' ) print ( f'Based on MAE: { best_mae_model }' ) Recommended Best Model: Based on RMSE: Random Forest Based on MAE: Random Forest # Monthly Total Revenue Prediction Comparison recent_months = monthly_sales . tail ( 12 ) # Consider the last 12 months for comparison plt . figure ( figsize = ( 12 , 6 )) # Plot actual revenue plt . plot ( recent_months [ 'MonthNumber' ], recent_months [ 'Total Revenue' ], marker = 'o' , linestyle = '-' , label = 'Actual Revenue' ) # Plot predictions by different models plt . plot ( recent_months [ 'MonthNumber' ], nn_model . predict ( recent_months [[ 'MonthNumber' ]]), label = 'Neural Network Predictions' ) plt . plot ( recent_months [ 'MonthNumber' ], rf_model . predict ( recent_months [[ 'MonthNumber' ]]), label = 'Random Forest Predictions' ) plt . plot ( recent_months [ 'MonthNumber' ], dt_model . predict ( recent_months [[ 'MonthNumber' ]]), label = 'Decision Tree Predictions' ) plt . plot ( recent_months [ 'MonthNumber' ], arima_results . predict ( start = len ( monthly_sales ) - 12 , end = len ( monthly_sales ) - 1 , typ = 'levels' ), label = 'ARIMA Predictions' ) plt . title ( 'Monthly Total Revenue Prediction Comparison' ) plt . xlabel ( 'Month Number' ) plt . ylabel ( 'Total Revenue' ) plt . grid ( True ) plt . legend () plt . show () In [1]: 14000 13000 - 12000 Total Revenue Time Series Plot of Daily Sales Data Total Revenue 11000 10000 9000 8000 7000 - 2012 2014 2016 2018 Date 2020 2022 Out[1]: In [2]: Out[2]: In [3]: In [4]: In [5]: In [6]: In [7]: Monthly Total Revenue Prediction Comparison 300000 - 250000 - Actual Revenue Neural Network Predictions Random Forest Predictions - Decision Tree Predictions - ARIMA Predictions 150000 100000 - 50000 + 122 124 126 Month Number 128 130 132 In [ ]:

Discover more documents: Sign up today!

Unlock a world of knowledge! Explore tailored content for a richer learning experience. Here's what you'll get:

Access to all documents
Unlimited textbook solutions
24/7 expert homework help

IE6400_Quiz18_Day 21

Related Documents