# coding: utf-8 # ### Section 6 Homework - Fill in the blanks # Import the packages needed to perform the analysis import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import warnings warnings.filterwarnings('ignore') # Import the data mov = pd._('P4-Section6-Homework-Dataset.csv', encoding = 'latin1') # check columns in dataset and rename the column without spaces mov._=[] # Explore the dataset mov._() # Check the summary of the dataframe mov._() # Check the structure of the dataframe mov._() # Explore the categorical variable Studio, used in the assignment mov.Studio._() # Find the number of categories in Studio len (mov.Studio._) # Explore the categorical variable Studio, used in the assignment mov.Genre._() # Find the number of categories in Studio len (mov.Genre._) # convert Studio,Movie Title,Genre,Director,Day of Week to categorical variable mov.Studio=mov.Studio.astype('category') mov.Genre=mov._._ mov._ =mov._._ mov._ =mov._._ # Filter the dataframe by genre mov2 = mov[(mov._ == 'action') | (mov._ == 'adventure') | (mov._ == 'animation') | (mov._ == 'comedy') | (mov._ == 'drama')] # Filter the dataframe by studio mov3 = mov2[(mov2._ == 'Buena Vista Studios') | (mov2._ == 'Fox') | (mov2._ == 'Paramount Pictures') | (mov2._ == 'Sony') | (mov2._ == 'Universal') | (mov2._ == 'WB')] # Check how the filters worked for mov3 print (_.Genre.unique()) print (_.Studio.unique()) print (len(_)) # Define the style _.set(style="darkgrid", palette="muted", color_codes=True) # Plot the boxsplots x =genre y =gross ax = sns._(data=_, x='_', y='_', orient='v', color='lightgray', showfliers=False) plt.setp(ax.artists, alpha=0.5) # Add in points to show each observation this plot is new but you can try it ok :-) sns.stripplot(x='_', y='_', data=_, jitter=True, size=6, linewidth=0, hue = 'Studio', alpha=0.7) # titel =Demostic Gross % by Gener ax.axes.set_title('_',fontsize=30) # x label = Gener ax.set_xlabel('_',fontsize=20) # y label =Gross ax.set_ylabel('_',fontsize=20) # Define where to place the legend ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
# coding: utf-8
# ### Section 6 Homework - Fill in the blanks
# Import the packages needed to perform the analysis
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
# Import the data
mov = pd._('P4-Section6-Homework-Dataset.csv', encoding = 'latin1')
# check columns in dataset and rename the column without spaces
mov._=[]
# Explore the dataset
mov._()
# Check the summary of the dataframe
mov._()
# Check the structure of the dataframe
mov._()
# Explore the categorical variable Studio, used in the assignment
mov.Studio._()
# Find the number of categories in Studio
len (mov.Studio._)
# Explore the categorical variable Studio, used in the assignment
mov.Genre._()
# Find the number of categories in Studio
len (mov.Genre._)
# convert Studio,Movie Title,Genre,Director,Day of Week to categorical variable
mov.Studio=mov.Studio.astype('category')
mov.Genre=mov._._
mov._ =mov._._
mov._ =mov._._
# Filter the dataframe by genre
mov2 = mov[(mov._ == 'action') | (mov._ == 'adventure') | (mov._ == 'animation') | (mov._ == 'comedy') | (mov._ == 'drama')]
# Filter the dataframe by studio
mov3 = mov2[(mov2._ == 'Buena Vista Studios') | (mov2._ == 'Fox') | (mov2._ == 'Paramount Pictures') | (mov2._ == 'Sony') | (mov2._ == 'Universal') | (mov2._ == 'WB')]
# Check how the filters worked for mov3
print (_.Genre.unique())
print (_.Studio.unique())
print (len(_))
# Define the style
_.set(style="darkgrid", palette="muted", color_codes=True)
# Plot the boxsplots x =genre y =gross
ax = sns._(data=_, x='_', y='_', orient='v', color='lightgray', showfliers=False)
plt.setp(ax.artists, alpha=0.5)
# Add in points to show each observation this plot is new but you can try it ok :-)
sns.stripplot(x='_', y='_', data=_, jitter=True, size=6, linewidth=0, hue = 'Studio', alpha=0.7)
# titel =Demostic Gross % by Gener
ax.axes.set_title('_',fontsize=30)
# x label = Gener
ax.set_xlabel('_',fontsize=20)
# y label =Gross
ax.set_ylabel('_',fontsize=20)
# Define where to place the legend
ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
Trending now
This is a popular solution!
Step by step
Solved in 4 steps with 3 images