FDA_Quiz19

pdf

School

Northeastern University *

*We aren’t endorsed by this school

Course

6400

Subject

Aerospace Engineering

Date

Dec 6, 2023

Type

pdf

Pages

8

Uploaded by DeaconTurkey3670

Report
11/28/23, 1:34 PM FDA_Quiz19 localhost:8888/nbconvert/html/Downloads/FDA/Quiz/FDA_Quiz19.ipynb?download=false 1/8 In [76]: import pandas as pd import numpy as np from datetime import datetime , timedelta import matplotlib.pyplot as plt # Define the parameters for the synthetic data start_date = datetime ( 2012 , 1 , 1 ) end_date = datetime ( 2022 , 12 , 31 ) # Updated end date to December 2022 num_days = ( end_date - start_date ) . days + 1 base_sales = 10000 # Average daily sales sales_std_dev = 1000 # Standard deviation for sales variation # Generate synthetic daily sales data dates = [ start_date + timedelta ( days = x ) for x in range ( num_days )] sales = np . random . normal ( base_sales , sales_std_dev , num_days ) . round ( 2 ) sales = np . where ( sales < 0 , 0 , sales ) # Ensure sales are non-negative # Create a DataFrame for the daily sales data sales_data = pd . DataFrame ({ 'Date' : dates , 'Total Revenue' : sales }) # Plotting the time series plt . figure ( figsize = ( 12 , 6 )) plt . plot ( sales_data [ 'Date' ], sales_data [ 'Total Revenue' ], label = 'Total Revenue' ) plt . title ( 'Time Series Plot of Daily Sales Data' ) plt . xlabel ( 'Date' ) plt . ylabel ( 'Total Revenue' ) plt . grid ( True ) plt . legend () plt . show () sales_data . head ()
11/28/23, 1:34 PM FDA_Quiz19 localhost:8888/nbconvert/html/Downloads/FDA/Quiz/FDA_Quiz19.ipynb?download=false 2/8 Date Total Revenue 0 2012-01-01 10162.47 1 2012-01-02 11042.80 2 2012-01-03 10327.36 3 2012-01-04 10065.52 4 2012-01-05 11191.54 Out[76]: In [77]: # Convert the 'Date' column to datetime type sales_data [ 'Date' ] = pd . to_datetime ( sales_data [ 'Date' ]) # Create monthly bins sales_data [ 'MonthBin' ] = pd . to_datetime ( sales_data [ 'Date' ] . dt . year . astype ( str ) + '-' + sales_data [ 'Date' ] . dt . month . asty
11/28/23, 1:34 PM FDA_Quiz19 localhost:8888/nbconvert/html/Downloads/FDA/Quiz/FDA_Quiz19.ipynb?download=false 3/8 Total Revenue 0 312512.92 1 282454.69 2 301827.22 3 299456.64 4 306032.10 .. ... 127 318132.81 128 295871.11 129 304823.23 130 302211.22 131 305599.23 [132 rows x 1 columns] # Aggregate daily sales up to the 25th of each month monthly_sales = sales_data . groupby ( 'MonthBin' )[ 'Total Revenue' ] . sum () . reset_index () # Handle partial data for the current month current_month = pd . to_datetime ( 'today' ) # Assuming today's date for the current month days_passed = current_month . day total_days_in_month = pd . to_datetime ( current_month + pd . DateOffset ( months = 1 )) . day # Proportional adjustment for the current month monthly_sales . loc [ monthly_sales [ 'MonthBin' ] == current_month , 'Total Revenue' ] *= ( total_days_in_month / days_passed ) # Drop the 'MonthBin' column if not needed for further analysis monthly_sales = monthly_sales . drop ( 'MonthBin' , axis = 1 ) # Print or use the resulting 'monthly_sales' DataFrame for forecasting print ( monthly_sales ) In [84]: import pandas as pd import numpy as np from statsmodels.tsa.statespace.sarimax import SARIMAX from sklearn.neural_network import MLPRegressor from sklearn.ensemble import RandomForestRegressor from sklearn.tree import DecisionTreeRegressor from sklearn.model_selection import train_test_split from sklearn.metrics import mean_absolute_error , mean_squared_error import matplotlib.pyplot as plt # Convert the 'MonthBin' column to datetime type monthly_sales [ 'MonthBin' ] = pd . to_datetime ( monthly_sales . index )
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
  • Access to all documents
  • Unlimited textbook solutions
  • 24/7 expert homework help
11/28/23, 1:34 PM FDA_Quiz19 localhost:8888/nbconvert/html/Downloads/FDA/Quiz/FDA_Quiz19.ipynb?download=false 4/8 # Set 'MonthBin' as the index monthly_sales = monthly_sales . set_index ( 'MonthBin' ) # Create a range of integers for the x-axis x_axis_values = np . arange ( len ( monthly_sales )) # Split the data into training and testing sets train_size = int ( len ( monthly_sales ) * 0.8 ) # 80% for training train , test = monthly_sales . iloc [: train_size ], monthly_sales . iloc [ train_size :] # ARIMA Model order = ( 1 , 1 , 1 ) # Example order parameters, tune based on your data seasonal_order = ( 1 , 1 , 1 , 12 ) # Example seasonal order parameters, tune based on your data arima_model = SARIMAX ( train , order = order , seasonal_order = seasonal_order ) arima_results = arima_model . fit () # Neural Network Model nn_model = MLPRegressor ( hidden_layer_sizes = ( 100 ,), max_iter = 1000 ) # Example parameters, tune based on your data nn_model . fit ( x_axis_values [: len ( train )] . reshape ( - 1 , 1 ), train . values . flatten ()) # Random Forest Model rf_model = RandomForestRegressor ( n_estimators = 100 , random_state = 42 ) # Example parameters, tune based on your data rf_model . fit ( x_axis_values [: len ( train )] . reshape ( - 1 , 1 ), train . values . flatten ()) # Decision Tree Model dt_model = DecisionTreeRegressor ( random_state = 42 ) # Example parameters, tune based on your data dt_model . fit ( x_axis_values [: len ( train )] . reshape ( - 1 , 1 ), train . values . flatten ()) # Make predictions on the test set arima_forecast = arima_results . get_forecast ( steps = len ( test )) nn_forecast = nn_model . predict ( x_axis_values [ len ( train ):] . reshape ( - 1 , 1 )) rf_forecast = rf_model . predict ( x_axis_values [ len ( train ):] . reshape ( - 1 , 1 )) dt_forecast = dt_model . predict ( x_axis_values [ len ( train ):] . reshape ( - 1 , 1 ))
11/28/23, 1:34 PM FDA_Quiz19 localhost:8888/nbconvert/html/Downloads/FDA/Quiz/FDA_Quiz19.ipynb?download=false 5/8 C:\Users\galra\anaconda3\Lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency informatio n was provided, so inferred frequency N will be used. self._init_dates(dates, freq) C:\Users\galra\anaconda3\Lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency informatio n was provided, so inferred frequency N will be used. self._init_dates(dates, freq) C:\Users\galra\anaconda3\Lib\site-packages\statsmodels\base\model.py:607: ConvergenceWarning: Maximum Likelihood optimi zation failed to converge. Check mle_retvals warnings.warn("Maximum Likelihood optimization failed to " C:\Users\galra\anaconda3\Lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: St ochastic Optimizer: Maximum iterations (1000) reached and the optimization hasn't converged yet. warnings.warn( In [85]: # Plot the results plt . figure ( figsize = ( 12 , 6 )) # Plot the area between the original data and the x-axis in green plt . fill_between ( x_axis_values [: len ( train )], monthly_sales . values . flatten ()[: len ( train )], color = 'green' , alpha = 0.3 ) plt . plot ( x_axis_values [: len ( train )], monthly_sales . values . flatten ()[: len ( train )], label = 'Original Data' , color = 'green' , # ARIMA plt . plot ( x_axis_values [ len ( train ):], arima_forecast . predicted_mean , label = 'ARIMA Forecast' , linestyle = 'dashed' , marker = # Neural Network plt . plot ( x_axis_values [ len ( train ):], nn_forecast , label = 'Neural Network Forecast' , linestyle = 'dashed' , marker = 'o' ) # Random Forest plt . plot ( x_axis_values [ len ( train ):], rf_forecast , label = 'Random Forest Forecast' , linestyle = 'dashed' , marker = 'o' ) # Decision Tree plt . plot ( x_axis_values [ len ( train ):], dt_forecast , label = 'Decision Tree Forecast' , linestyle = 'dashed' , marker = 'o' ) plt . title ( 'Monthly Sales Forecasting Comparison' ) plt . xlabel ( 'Months (from 0 to 140)' ) plt . legend () plt . show ()
11/28/23, 1:34 PM FDA_Quiz19 localhost:8888/nbconvert/html/Downloads/FDA/Quiz/FDA_Quiz19.ipynb?download=false 6/8 In [92]: date_range = pd . date_range ( start = start_date , periods = len ( monthly_sales ), freq = 'M' ) plt . figure ( figsize = ( 12 , 6 )) plt . plot ( date_range [ len ( train ):], test . values . flatten (), label = 'Actual Test Data' , color = 'purple' , marker = 'o' ) # ARIMA plt . plot ( date_range [ len ( train ):], arima_forecast . predicted_mean , label = 'ARIMA Forecast' , linestyle = 'dashed' , marker = 'o' # Neural Network plt . plot ( date_range [ len ( train ):], nn_forecast , label = 'Neural Network Forecast' , linestyle = 'dashed' , marker = 'o' ) # Random Forest plt . plot ( date_range [ len ( train ):], rf_forecast , label = 'Random Forest Forecast' , linestyle = 'dashed' , marker = 'o' ) # Decision Tree plt . plot ( date_range [ len ( train ):], dt_forecast , label = 'Decision Tree Forecast' , linestyle = 'dashed' , marker = 'o' )
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
  • Access to all documents
  • Unlimited textbook solutions
  • 24/7 expert homework help
11/28/23, 1:34 PM FDA_Quiz19 localhost:8888/nbconvert/html/Downloads/FDA/Quiz/FDA_Quiz19.ipynb?download=false 7/8 plt . title ( 'Monthly Sales Forecasting Comparison' ) plt . xlabel ( 'Date' ) plt . ylabel ( 'Total Revenue' ) plt . legend () plt . grid ( True ) plt . show () In [88]: arima_rmse = np . sqrt ( mean_squared_error ( test , arima_forecast . predicted_mean )) arima_mae = mean_absolute_error ( test , arima_forecast . predicted_mean ) nn_rmse = np . sqrt ( mean_squared_error ( test , nn_forecast )) nn_mae = mean_absolute_error ( test , nn_forecast ) rf_rmse = np . sqrt ( mean_squared_error ( test , rf_forecast )) rf_mae = mean_absolute_error ( test , rf_forecast )
11/28/23, 1:34 PM FDA_Quiz19 localhost:8888/nbconvert/html/Downloads/FDA/Quiz/FDA_Quiz19.ipynb?download=false 8/8 ARIMA RMSE: 8196.049738738793, MAE: 6532.41523262475 Neural Network RMSE: 288578.9689683112, MAE: 288366.5125517087 Random Forest RMSE: 11036.319723462368, MAE: 7499.340266666666 Decision Tree RMSE: 12179.83513797034, MAE: 9861.055185185183 Based on these results, the ARIMA model provides more accurate predictions compared to the machine learning models. dt_rmse = np . sqrt ( mean_squared_error ( test , dt_forecast )) dt_mae = mean_absolute_error ( test , dt_forecast ) print ( f'ARIMA RMSE: { arima_rmse }, MAE: { arima_mae }' ) print ( f'Neural Network RMSE: { nn_rmse }, MAE: { nn_mae }' ) print ( f'Random Forest RMSE: { rf_rmse }, MAE: { rf_mae }' ) print ( f'Decision Tree RMSE: { dt_rmse }, MAE: { dt_mae }' )