PYTHON import pandas as pd from datetime import date import sys from sklearn.preprocessing import OrdinalEncoder def series_report( series, is_ordinal=False, is_continuous=False, is_categorical=False ): print(f"{series.name}: {series.dtype}") ###### Your code here ###### # Check command line arguments if len(sys.argv) < 2: print(f"Usage: python3 {sys.argv[0]} ") exit(1) # Read in the data df = pd.read_csv( sys.argv[1], index_col="employee_id" ) # Convert strings to dates for dob and death df['dob'] = df['dob'].apply(lambda x: date.fromisoformat(x)) df['death'] = df['death'].apply(lambda x: date.fromisoformat(x)) # Show the shape of the dataframe (row_count, col_count) = df.shape print(f"*** Basics ***") print(f"Rows: {row_count:,}") print(f"Columns: {col_count}") # Do a report for each column print(f"\n*** Columns ***") series_report(df.index, is_ordinal=True) series_report(df["gender"], is_categorical=True) series_report(df["height"], is_ordinal=True, is_continuous=True) series_report(df["waist"], is_ordinal=True, is_continuous=True) series_report(df["salary"], is_ordinal=True, is_continuous=True) series_report(df["dob"], is_ordinal=True) series_report(df["death"], is_ordinal=True) When you fill in the missing lines, you will be able to run it like this: python3 make_report.py employees.csv Then, it will print a report like this: *** Basics *** Rows: 10,000 Columns: 6 *** Columns *** employee_id: int64 Range: 1712 - 9998838 gender: object Missing in 82 rows (0.8%) 4917: m 4907: f 36: F 23: M 19: male 16: female height: float64 Range: 1.34 - 2.07 Mean: 1.71 Standard deviation: 0.11 Median: 1.71 waist: float64 Range: 0.47 - 2.18 Mean: 1.21 Standard deviation: 0.23 Median: 1.19 salary: float64 Missing in 70 rows (0.7%) 3 Range: 297.0 - 140902.0 Mean: 63033.98 Standard deviation: 20093.83 Median: 63078.50 dob: object Range: 1945-01-01 - 1984-12-21 death: object Range: 1960-03-20 - 2022-06-12

PYTHON import pandas as pd from datetime import date import sys from sklearn.preprocessing import OrdinalEncoder def series_report( series, is_ordinal=False, is_continuous=False, is_categorical=False ): print(f"{series.name}: {series.dtype}") ###### Your code here ###### # Check command line arguments if len(sys.argv) < 2: print(f"Usage: python3 {sys.argv[0]} ") exit(1) # Read in the data df = pd.read_csv( sys.argv[1], index_col="employee_id" ) # Convert strings to dates for dob and death df['dob'] = df['dob'].apply(lambda x: date.fromisoformat(x)) df['death'] = df['death'].apply(lambda x: date.fromisoformat(x)) # Show the shape of the dataframe (row_count, col_count) = df.shape print(f"* Basics ") print(f"Rows: {row_count:,}") print(f"Columns: {col_count}") # Do a report for each column print(f"\n Columns ") series_report(df.index, is_ordinal=True) series_report(df["gender"], is_categorical=True) series_report(df["height"], is_ordinal=True, is_continuous=True) series_report(df["waist"], is_ordinal=True, is_continuous=True) series_report(df["salary"], is_ordinal=True, is_continuous=True) series_report(df["dob"], is_ordinal=True) series_report(df["death"], is_ordinal=True) When you fill in the missing lines, you will be able to run it like this: python3 make_report.py employees.csv Then, it will print a report like this: Basics * Rows: 10,000 Columns: 6 * Columns * employee_id: int64 Range: 1712 - 9998838 gender: object Missing in 82 rows (0.8%) 4917: m 4907: f 36: F 23: M 19: male 16: female height: float64 Range: 1.34 - 2.07 Mean: 1.71 Standard deviation: 0.11 Median: 1.71 waist: float64 Range: 0.47 - 2.18 Mean: 1.21 Standard deviation: 0.23 Median: 1.19 salary: float64 Missing in 70 rows (0.7%) 3 Range: 297.0 - 140902.0 Mean: 63033.98 Standard deviation: 20093.83 Median: 63078.50 dob: object Range: 1945-01-01 - 1984-12-21 death: object Range: 1960-03-20 - 2022-06-12

Database System Concepts

7th Edition

ISBN:9780078022159

Author:Abraham Silberschatz Professor, Henry F. Korth, S. Sudarshan

Publisher:Abraham Silberschatz Professor, Henry F. Korth, S. Sudarshan

Chapter1: Introduction

Section: Chapter Questions

Problem 1PE

See similar textbooks

Related questions

Question

PYTHON

import pandas as pd
from datetime import date
import sys

from sklearn.preprocessing import OrdinalEncoder

def series_report(
series, is_ordinal=False, is_continuous=False, is_categorical=False
):
print(f"{series.name}: {series.dtype}")
###### Your code here ######

# Check command line arguments
if len(sys.argv) < 2:
print(f"Usage: python3 {sys.argv[0]} <input_file>")
exit(1)

# Read in the data
df = pd.read_csv(
sys.argv[1], index_col="employee_id"
)

# Convert strings to dates for dob and death
df['dob'] = df['dob'].apply(lambda x: date.fromisoformat(x))
df['death'] = df['death'].apply(lambda x: date.fromisoformat(x))

# Show the shape of the dataframe
(row_count, col_count) = df.shape
print(f"*** Basics ***")
print(f"Rows: {row_count:,}")
print(f"Columns: {col_count}")

# Do a report for each column
print(f"\n*** Columns ***")
series_report(df.index, is_ordinal=True)
series_report(df["gender"], is_categorical=True)
series_report(df["height"], is_ordinal=True, is_continuous=True)
series_report(df["waist"], is_ordinal=True, is_continuous=True)
series_report(df["salary"], is_ordinal=True, is_continuous=True)
series_report(df["dob"], is_ordinal=True)
series_report(df["death"], is_ordinal=True)

When you fill in the missing lines, you will be able to run
it like this:
python3 make_report.py employees.csv
Then, it will print a report like this:
*** Basics ***
Rows: 10,000
Columns: 6
*** Columns ***
employee_id: int64
Range: 1712 - 9998838
gender: object
Missing in 82 rows (0.8%)
4917: m
4907: f
36: F
23: M
19: male
16: female
height: float64
Range: 1.34 - 2.07
Mean: 1.71
Standard deviation: 0.11
Median: 1.71
waist: float64
Range: 0.47 - 2.18
Mean: 1.21
Standard deviation: 0.23
Median: 1.19
salary: float64
Missing in 70 rows (0.7%)
3
Range: 297.0 - 140902.0
Mean: 63033.98
Standard deviation: 20093.83
Median: 63078.50
dob: object
Range: 1945-01-01 - 1984-12-21
death: object
Range: 1960-03-20 - 2022-06-12

Expert Solution

Trending now

This is a popular solution!

Step by step

Solved in 3 steps

SEE SOLUTION Check out a sample Q&A here

Knowledge Booster

Learn more about

Need a deep-dive on the concept behind this application? Look no further. Learn more about this topic, computer-science and related others by exploring similar questions and additional content below.