PYTHON import pandas as pd from datetime import date import sys from sklearn.preprocessing import OrdinalEncoder def series_report( series, is_ordinal=False, is_continuous=False, is_categorical=False ): print(f"{series.name}: {series.dtype}") ###### Your code here ###### # Check command line arguments if len(sys.argv) < 2: print(f"Usage: python3 {sys.argv[0]} ") exit(1) # Read in the data df = pd.read_csv( sys.argv[1], index_col="employee_id" ) # Convert strings to dates for dob and death df['dob'] = df['dob'].apply(lambda x: date.fromisoformat(x)) df['death'] = df['death'].apply(lambda x: date.fromisoformat(x)) # Show the shape of the dataframe (row_count, col_count) = df.shape print(f"*** Basics ***") print(f"Rows: {row_count:,}") print(f"Columns: {col_count}") # Do a report for each column print(f"\n*** Columns ***") series_report(df.index, is_ordinal=True) series_report(df["gender"], is_categorical=True) series_report(df["height"], is_ordinal=True, is_continuous=True) series_report(df["waist"], is_ordinal=True, is_continuous=True) series_report(df["salary"], is_ordinal=True, is_continuous=True) series_report(df["dob"], is_ordinal=True) series_report(df["death"], is_ordinal=True) When you fill in the missing lines, you will be able to run it like this: python3 make_report.py employees.csv Then, it will print a report like this: *** Basics *** Rows: 10,000 Columns: 6 *** Columns *** employee_id: int64 Range: 1712 - 9998838 gender: object Missing in 82 rows (0.8%) 4917: m 4907: f 36: F 23: M 19: male 16: female height: float64 Range: 1.34 - 2.07 Mean: 1.71 Standard deviation: 0.11 Median: 1.71 waist: float64 Range: 0.47 - 2.18 Mean: 1.21 Standard deviation: 0.23 Median: 1.19 salary: float64 Missing in 70 rows (0.7%) 3 Range: 297.0 - 140902.0 Mean: 63033.98 Standard deviation: 20093.83 Median: 63078.50 dob: object Range: 1945-01-01 - 1984-12-21 death: object Range: 1960-03-20 - 2022-06-12
PYTHON
import pandas as pd
from datetime import date
import sys
from sklearn.preprocessing import OrdinalEncoder
def series_report(
series, is_ordinal=False, is_continuous=False, is_categorical=False
):
print(f"{series.name}: {series.dtype}")
###### Your code here ######
# Check command line arguments
if len(sys.argv) < 2:
print(f"Usage: python3 {sys.argv[0]} <input_file>")
exit(1)
# Read in the data
df = pd.read_csv(
sys.argv[1], index_col="employee_id"
)
# Convert strings to dates for dob and death
df['dob'] = df['dob'].apply(lambda x: date.fromisoformat(x))
df['death'] = df['death'].apply(lambda x: date.fromisoformat(x))
# Show the shape of the dataframe
(row_count, col_count) = df.shape
print(f"*** Basics ***")
print(f"Rows: {row_count:,}")
print(f"Columns: {col_count}")
# Do a report for each column
print(f"\n*** Columns ***")
series_report(df.index, is_ordinal=True)
series_report(df["gender"], is_categorical=True)
series_report(df["height"], is_ordinal=True, is_continuous=True)
series_report(df["waist"], is_ordinal=True, is_continuous=True)
series_report(df["salary"], is_ordinal=True, is_continuous=True)
series_report(df["dob"], is_ordinal=True)
series_report(df["death"], is_ordinal=True)
When you fill in the missing lines, you will be able to run
it like this:
python3 make_report.py employees.csv
Then, it will print a report like this:
*** Basics ***
Rows: 10,000
Columns: 6
*** Columns ***
employee_id: int64
Range: 1712 - 9998838
gender: object
Missing in 82 rows (0.8%)
4917: m
4907: f
36: F
23: M
19: male
16: female
height: float64
Range: 1.34 - 2.07
Mean: 1.71
Standard deviation: 0.11
Median: 1.71
waist: float64
Range: 0.47 - 2.18
Mean: 1.21
Standard deviation: 0.23
Median: 1.19
salary: float64
Missing in 70 rows (0.7%)
3
Range: 297.0 - 140902.0
Mean: 63033.98
Standard deviation: 20093.83
Median: 63078.50
dob: object
Range: 1945-01-01 - 1984-12-21
death: object
Range: 1960-03-20 - 2022-06-12
![](/static/compass_v2/shared-icons/check-mark.png)
Trending now
This is a popular solution!
Step by step
Solved in 3 steps
![Blurred answer](/static/compass_v2/solution-images/blurred-answer.jpg)
![Database System Concepts](https://www.bartleby.com/isbn_cover_images/9780078022159/9780078022159_smallCoverImage.jpg)
![Starting Out with Python (4th Edition)](https://www.bartleby.com/isbn_cover_images/9780134444321/9780134444321_smallCoverImage.gif)
![Digital Fundamentals (11th Edition)](https://www.bartleby.com/isbn_cover_images/9780132737968/9780132737968_smallCoverImage.gif)
![Database System Concepts](https://www.bartleby.com/isbn_cover_images/9780078022159/9780078022159_smallCoverImage.jpg)
![Starting Out with Python (4th Edition)](https://www.bartleby.com/isbn_cover_images/9780134444321/9780134444321_smallCoverImage.gif)
![Digital Fundamentals (11th Edition)](https://www.bartleby.com/isbn_cover_images/9780132737968/9780132737968_smallCoverImage.gif)
![C How to Program (8th Edition)](https://www.bartleby.com/isbn_cover_images/9780133976892/9780133976892_smallCoverImage.gif)
![Database Systems: Design, Implementation, & Manag…](https://www.bartleby.com/isbn_cover_images/9781337627900/9781337627900_smallCoverImage.gif)
![Programmable Logic Controllers](https://www.bartleby.com/isbn_cover_images/9780073373843/9780073373843_smallCoverImage.gif)