FDA_Quiz19
pdf
keyboard_arrow_up
School
Northeastern University *
*We aren’t endorsed by this school
Course
6400
Subject
Aerospace Engineering
Date
Dec 6, 2023
Type
Pages
8
Uploaded by DeaconTurkey3670
11/28/23, 1:34 PM
FDA_Quiz19
localhost:8888/nbconvert/html/Downloads/FDA/Quiz/FDA_Quiz19.ipynb?download=false
1/8
In [76]:
import
pandas
as
pd
import
numpy
as
np
from
datetime
import
datetime
,
timedelta
import
matplotlib.pyplot
as
plt
# Define the parameters for the synthetic data
start_date
=
datetime
(
2012
,
1
,
1
)
end_date
=
datetime
(
2022
,
12
,
31
)
# Updated end date to December 2022
num_days
=
(
end_date
-
start_date
)
.
days
+
1
base_sales
=
10000
# Average daily sales
sales_std_dev
=
1000
# Standard deviation for sales variation
# Generate synthetic daily sales data
dates
=
[
start_date
+
timedelta
(
days
=
x
)
for
x
in
range
(
num_days
)]
sales
=
np
.
random
.
normal
(
base_sales
,
sales_std_dev
,
num_days
)
.
round
(
2
)
sales
=
np
.
where
(
sales
<
0
,
0
,
sales
)
# Ensure sales are non-negative
# Create a DataFrame for the daily sales data
sales_data
=
pd
.
DataFrame
({
'Date'
:
dates
,
'Total Revenue'
:
sales
})
# Plotting the time series
plt
.
figure
(
figsize
=
(
12
,
6
))
plt
.
plot
(
sales_data
[
'Date'
],
sales_data
[
'Total Revenue'
],
label
=
'Total Revenue'
)
plt
.
title
(
'Time Series Plot of Daily Sales Data'
)
plt
.
xlabel
(
'Date'
)
plt
.
ylabel
(
'Total Revenue'
)
plt
.
grid
(
True
)
plt
.
legend
()
plt
.
show
()
sales_data
.
head
()
11/28/23, 1:34 PM
FDA_Quiz19
localhost:8888/nbconvert/html/Downloads/FDA/Quiz/FDA_Quiz19.ipynb?download=false
2/8
Date
Total Revenue
0
2012-01-01
10162.47
1
2012-01-02
11042.80
2
2012-01-03
10327.36
3
2012-01-04
10065.52
4
2012-01-05
11191.54
Out[76]:
In [77]:
# Convert the 'Date' column to datetime type
sales_data
[
'Date'
]
=
pd
.
to_datetime
(
sales_data
[
'Date'
])
# Create monthly bins
sales_data
[
'MonthBin'
]
=
pd
.
to_datetime
(
sales_data
[
'Date'
]
.
dt
.
year
.
astype
(
str
)
+
'-'
+
sales_data
[
'Date'
]
.
dt
.
month
.
asty
11/28/23, 1:34 PM
FDA_Quiz19
localhost:8888/nbconvert/html/Downloads/FDA/Quiz/FDA_Quiz19.ipynb?download=false
3/8
Total Revenue
0
312512.92
1
282454.69
2
301827.22
3
299456.64
4
306032.10
..
...
127
318132.81
128
295871.11
129
304823.23
130
302211.22
131
305599.23
[132 rows x 1 columns]
# Aggregate daily sales up to the 25th of each month
monthly_sales
=
sales_data
.
groupby
(
'MonthBin'
)[
'Total Revenue'
]
.
sum
()
.
reset_index
()
# Handle partial data for the current month
current_month
=
pd
.
to_datetime
(
'today'
)
# Assuming today's date for the current month
days_passed
=
current_month
.
day
total_days_in_month
=
pd
.
to_datetime
(
current_month
+
pd
.
DateOffset
(
months
=
1
))
.
day
# Proportional adjustment for the current month
monthly_sales
.
loc
[
monthly_sales
[
'MonthBin'
]
==
current_month
,
'Total Revenue'
]
*=
(
total_days_in_month
/
days_passed
)
# Drop the 'MonthBin' column if not needed for further analysis
monthly_sales
=
monthly_sales
.
drop
(
'MonthBin'
,
axis
=
1
)
# Print or use the resulting 'monthly_sales' DataFrame for forecasting
print
(
monthly_sales
)
In [84]:
import
pandas
as
pd
import
numpy
as
np
from
statsmodels.tsa.statespace.sarimax
import
SARIMAX
from
sklearn.neural_network
import
MLPRegressor
from
sklearn.ensemble
import
RandomForestRegressor
from
sklearn.tree
import
DecisionTreeRegressor
from
sklearn.model_selection
import
train_test_split
from
sklearn.metrics
import
mean_absolute_error
,
mean_squared_error
import
matplotlib.pyplot
as
plt
# Convert the 'MonthBin' column to datetime type
monthly_sales
[
'MonthBin'
]
=
pd
.
to_datetime
(
monthly_sales
.
index
)
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
- Access to all documents
- Unlimited textbook solutions
- 24/7 expert homework help
11/28/23, 1:34 PM
FDA_Quiz19
localhost:8888/nbconvert/html/Downloads/FDA/Quiz/FDA_Quiz19.ipynb?download=false
4/8
# Set 'MonthBin' as the index
monthly_sales
=
monthly_sales
.
set_index
(
'MonthBin'
)
# Create a range of integers for the x-axis
x_axis_values
=
np
.
arange
(
len
(
monthly_sales
))
# Split the data into training and testing sets
train_size
=
int
(
len
(
monthly_sales
)
*
0.8
)
# 80% for training
train
,
test
=
monthly_sales
.
iloc
[:
train_size
],
monthly_sales
.
iloc
[
train_size
:]
# ARIMA Model
order
=
(
1
,
1
,
1
)
# Example order parameters, tune based on your data
seasonal_order
=
(
1
,
1
,
1
,
12
)
# Example seasonal order parameters, tune based on your data
arima_model
=
SARIMAX
(
train
,
order
=
order
,
seasonal_order
=
seasonal_order
)
arima_results
=
arima_model
.
fit
()
# Neural Network Model
nn_model
=
MLPRegressor
(
hidden_layer_sizes
=
(
100
,),
max_iter
=
1000
)
# Example parameters, tune based on your data
nn_model
.
fit
(
x_axis_values
[:
len
(
train
)]
.
reshape
(
-
1
,
1
),
train
.
values
.
flatten
())
# Random Forest Model
rf_model
=
RandomForestRegressor
(
n_estimators
=
100
,
random_state
=
42
)
# Example parameters, tune based on your data
rf_model
.
fit
(
x_axis_values
[:
len
(
train
)]
.
reshape
(
-
1
,
1
),
train
.
values
.
flatten
())
# Decision Tree Model
dt_model
=
DecisionTreeRegressor
(
random_state
=
42
)
# Example parameters, tune based on your data
dt_model
.
fit
(
x_axis_values
[:
len
(
train
)]
.
reshape
(
-
1
,
1
),
train
.
values
.
flatten
())
# Make predictions on the test set
arima_forecast
=
arima_results
.
get_forecast
(
steps
=
len
(
test
))
nn_forecast
=
nn_model
.
predict
(
x_axis_values
[
len
(
train
):]
.
reshape
(
-
1
,
1
))
rf_forecast
=
rf_model
.
predict
(
x_axis_values
[
len
(
train
):]
.
reshape
(
-
1
,
1
))
dt_forecast
=
dt_model
.
predict
(
x_axis_values
[
len
(
train
):]
.
reshape
(
-
1
,
1
))
11/28/23, 1:34 PM
FDA_Quiz19
localhost:8888/nbconvert/html/Downloads/FDA/Quiz/FDA_Quiz19.ipynb?download=false
5/8
C:\Users\galra\anaconda3\Lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency informatio
n was provided, so inferred frequency N will be used.
self._init_dates(dates, freq)
C:\Users\galra\anaconda3\Lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency informatio
n was provided, so inferred frequency N will be used.
self._init_dates(dates, freq)
C:\Users\galra\anaconda3\Lib\site-packages\statsmodels\base\model.py:607: ConvergenceWarning: Maximum Likelihood optimi
zation failed to converge. Check mle_retvals
warnings.warn("Maximum Likelihood optimization failed to "
C:\Users\galra\anaconda3\Lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: St
ochastic Optimizer: Maximum iterations (1000) reached and the optimization hasn't converged yet.
warnings.warn(
In [85]:
# Plot the results
plt
.
figure
(
figsize
=
(
12
,
6
))
# Plot the area between the original data and the x-axis in green
plt
.
fill_between
(
x_axis_values
[:
len
(
train
)],
monthly_sales
.
values
.
flatten
()[:
len
(
train
)],
color
=
'green'
,
alpha
=
0.3
)
plt
.
plot
(
x_axis_values
[:
len
(
train
)],
monthly_sales
.
values
.
flatten
()[:
len
(
train
)],
label
=
'Original Data'
,
color
=
'green'
,
# ARIMA
plt
.
plot
(
x_axis_values
[
len
(
train
):],
arima_forecast
.
predicted_mean
,
label
=
'ARIMA Forecast'
,
linestyle
=
'dashed'
,
marker
=
# Neural Network
plt
.
plot
(
x_axis_values
[
len
(
train
):],
nn_forecast
,
label
=
'Neural Network Forecast'
,
linestyle
=
'dashed'
,
marker
=
'o'
)
# Random Forest
plt
.
plot
(
x_axis_values
[
len
(
train
):],
rf_forecast
,
label
=
'Random Forest Forecast'
,
linestyle
=
'dashed'
,
marker
=
'o'
)
# Decision Tree
plt
.
plot
(
x_axis_values
[
len
(
train
):],
dt_forecast
,
label
=
'Decision Tree Forecast'
,
linestyle
=
'dashed'
,
marker
=
'o'
)
plt
.
title
(
'Monthly Sales Forecasting Comparison'
)
plt
.
xlabel
(
'Months (from 0 to 140)'
)
plt
.
legend
()
plt
.
show
()
11/28/23, 1:34 PM
FDA_Quiz19
localhost:8888/nbconvert/html/Downloads/FDA/Quiz/FDA_Quiz19.ipynb?download=false
6/8
In [92]:
date_range
=
pd
.
date_range
(
start
=
start_date
,
periods
=
len
(
monthly_sales
),
freq
=
'M'
)
plt
.
figure
(
figsize
=
(
12
,
6
))
plt
.
plot
(
date_range
[
len
(
train
):],
test
.
values
.
flatten
(),
label
=
'Actual Test Data'
,
color
=
'purple'
,
marker
=
'o'
)
# ARIMA
plt
.
plot
(
date_range
[
len
(
train
):],
arima_forecast
.
predicted_mean
,
label
=
'ARIMA Forecast'
,
linestyle
=
'dashed'
,
marker
=
'o'
# Neural Network
plt
.
plot
(
date_range
[
len
(
train
):],
nn_forecast
,
label
=
'Neural Network Forecast'
,
linestyle
=
'dashed'
,
marker
=
'o'
)
# Random Forest
plt
.
plot
(
date_range
[
len
(
train
):],
rf_forecast
,
label
=
'Random Forest Forecast'
,
linestyle
=
'dashed'
,
marker
=
'o'
)
# Decision Tree
plt
.
plot
(
date_range
[
len
(
train
):],
dt_forecast
,
label
=
'Decision Tree Forecast'
,
linestyle
=
'dashed'
,
marker
=
'o'
)
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
- Access to all documents
- Unlimited textbook solutions
- 24/7 expert homework help
11/28/23, 1:34 PM
FDA_Quiz19
localhost:8888/nbconvert/html/Downloads/FDA/Quiz/FDA_Quiz19.ipynb?download=false
7/8
plt
.
title
(
'Monthly Sales Forecasting Comparison'
)
plt
.
xlabel
(
'Date'
)
plt
.
ylabel
(
'Total Revenue'
)
plt
.
legend
()
plt
.
grid
(
True
)
plt
.
show
()
In [88]:
arima_rmse
=
np
.
sqrt
(
mean_squared_error
(
test
,
arima_forecast
.
predicted_mean
))
arima_mae
=
mean_absolute_error
(
test
,
arima_forecast
.
predicted_mean
)
nn_rmse
=
np
.
sqrt
(
mean_squared_error
(
test
,
nn_forecast
))
nn_mae
=
mean_absolute_error
(
test
,
nn_forecast
)
rf_rmse
=
np
.
sqrt
(
mean_squared_error
(
test
,
rf_forecast
))
rf_mae
=
mean_absolute_error
(
test
,
rf_forecast
)
11/28/23, 1:34 PM
FDA_Quiz19
localhost:8888/nbconvert/html/Downloads/FDA/Quiz/FDA_Quiz19.ipynb?download=false
8/8
ARIMA RMSE: 8196.049738738793, MAE: 6532.41523262475
Neural Network RMSE: 288578.9689683112, MAE: 288366.5125517087
Random Forest RMSE: 11036.319723462368, MAE: 7499.340266666666
Decision Tree RMSE: 12179.83513797034, MAE: 9861.055185185183
Based on these results, the ARIMA model provides more accurate predictions compared to the machine learning models.
dt_rmse
=
np
.
sqrt
(
mean_squared_error
(
test
,
dt_forecast
))
dt_mae
=
mean_absolute_error
(
test
,
dt_forecast
)
print
(
f'ARIMA RMSE: {
arima_rmse
}, MAE: {
arima_mae
}'
)
print
(
f'Neural Network RMSE: {
nn_rmse
}, MAE: {
nn_mae
}'
)
print
(
f'Random Forest RMSE: {
rf_rmse
}, MAE: {
rf_mae
}'
)
print
(
f'Decision Tree RMSE: {
dt_rmse
}, MAE: {
dt_mae
}'
)