project_2

py

School

Brigham Young University, Idaho *

*We aren’t endorsed by this school

Course

250

Subject

Mechanical Engineering

Date

Apr 3, 2024

Type

py

Pages

2

Uploaded by BaronMonkey12666

Report
#%% import pandas as pd import plotly_express as px import numpy as np #%% df = pd.read_json('flights_missing.json') #%% df #%% df.info() #%% import pandas as pd import numpy as np import plotly.express as px # Load the dataset into a pandas DataFrame df = pd.read_json('flights_missing.json') # Replace missing numerical values with NaN df.replace({'-999': np.nan, '1500+': np.nan, '': np.nan}, inplace=True) # Convert numerical columns to numeric data type numerical_columns = ['num_of_delays_late_aircraft', 'num_of_delays_total', 'minutes_delayed_late_aircraft', 'minutes_delayed_total'] df[numerical_columns] = df[numerical_columns].apply(pd.to_numeric, errors='coerce') # Convert month and year columns to categorical data type df['month'] = df['month'].astype('object') # Convert other numerical columns to integer data type integer_columns = ['num_of_flights_total', 'num_of_delays_nas', 'num_of_delays_security', 'num_of_delays_weather', 'minutes_delayed_carrier', 'minutes_delayed_nas', 'minutes_delayed_security', 'minutes_delayed_weather'] df[integer_columns] = df[integer_columns].astype('Int64') df.info() #%% delay = df[['airport_code', 'airport_name', 'minutes_delayed_total']].groupby('airport_code').agg('mean').sort_values(by='minut es_delayed_total', ascending=False) delay #%% month_delays = df.groupby('month')['num_of_delays_total'].sum() month_delays = month_delays.sort_values(ascending = False) month_delays #%% df #%% df2 = df df2.replace({'-999': np.nan, '1500+': np.nan, '': np.nan}, inplace=True) # Calculate the total number of flights delayed by weather based on the given parameters df2['flights_delayed_weather'] = 0 # 100% of delayed flights in the Weather
df2['flights_delayed_weather'] += df2['num_of_delays_weather'] # 30% of all delayed flights in the Late-Arriving df2['flights_delayed_weather'] += 0.3 * df2['num_of_delays_late_aircraft'] # From April to August, 40% of delayed flights in the NAS # For the rest of the months, the proportion rises to 65% df2['flights_delayed_weather'] += np.where(df2['month'].isin(['April', 'May', 'June', 'July', 'August']), 0.4 * df['num_of_delays_nas'], 0.65 * df['num_of_delays_nas']) df2 #%% import plotly.express as px # Plot the new column with each airport fig = px.bar( df2, x = 'airport_code', y = 'flights_delayed_weather' ) fig.show() # Show the plot # %%
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
  • Access to all documents
  • Unlimited textbook solutions
  • 24/7 expert homework help