vertopal
.txt
keyboard_arrow_up
School
Kenyatta University *
*We aren’t endorsed by this school
Course
503
Subject
Statistics
Date
Nov 24, 2024
Type
txt
Pages
7
Uploaded by CoachDolphinMaster782
# Heart Disease Data Analysis using Pandas import pandas as pd
# Load the dataset
data = pd.read_csv('heart.csv')
# Task 1: Average Age of Patients
average_age = data['age'].mean()
print("Report 1: Average Age of Patients")
print(f"Average age of patients: {average_age:.2f} years")
# Task 2: Average Cholesterol Levels in Age Intervals
age_intervals = [20, 30, 40, 50, 60, 100]
avg_chol_by_age = []
for i in range(len(age_intervals) - 1):
lower_age = age_intervals[i]
upper_age = age_intervals[i + 1]
avg_chol = data[(data['age'] >= lower_age) & (data['age'] < upper_age)]
['chol'].mean()
avg_chol_by_age.append((f'[{lower_age}-{upper_age})', avg_chol))
print("Report 2: Average Cholesterol Levels in Age Intervals")
for age_interval, avg_chol in avg_chol_by_age:
print(f"Average cholesterol for age interval {age_interval}: {avg_chol:.2f} mg/dl")
# Task 3: Average Resting Blood Pressure in Extreme Cholesterol Levels
# Clean the 'chol' column by removing non-numeric characters and converting to string
data['chol'] = data['chol'].astype(str).str.replace('[^0-9]', '', regex=True)
# Convert the 'chol' column to numeric
data['chol'] = pd.to_numeric(data['chol'], errors='coerce')
# Remove rows with NaN values in the 'chol' column
data = data.dropna(subset=['chol'])
# Calculate the 30th and 70th percentiles for cholesterol levels
chol_30th_percentile = data['chol'].quantile(0.3)
chol_70th_percentile = data['chol'].quantile(0.7)
# Identify patients with the lowest 30% cholesterol levels
low_chol_patients = data[data['chol'] < chol_30th_percentile]
# Identify patients with the highest 30% cholesterol levels
high_chol_patients = data[data['chol'] > chol_70th_percentile]
# Calculate average resting blood pressure for patients with low and high cholesterol levels
average_trestbps_low_chol = low_chol_patients['trestbps'].mean()
average_trestbps_high_chol = high_chol_patients['trestbps'].mean()
print("Report 3: Average Resting Blood Pressure in Extreme Cholesterol Levels")
print(f"Average resting blood pressure for patients with low cholesterol: {average_trestbps_low_chol:.2f} mm Hg")
print(f"Average resting blood pressure for patients with high cholesterol: {average_trestbps_high_chol:.2f} mm Hg")
# Task 4: Percentage of Men and Women with Positive Heart Disease Diagnosis
# Calculate the number of women and men with a positive diagnosis of heart disease (num=1)
women_with_heart_disease = len(data[(data['sex'] == 0) & (data['num '] == 1)])
men_with_heart_disease = len(data[(data['sex'] == 1) & (data['num '] == 1)])
# Calculate the percentage of women and men with a positive diagnosis of heart disease
total_women = len(data[data['sex'] == 0])
total_men = len(data[data['sex'] == 1])
percentage_women_with_heart_disease = (women_with_heart_disease / total_women) * 100
percentage_men_with_heart_disease = (men_with_heart_disease / total_men) * 100
print("Report 4: Percentage of Men and Women with Positive Heart Disease Diagnosis")
print(f"Percentage of women with heart disease: {percentage_women_with_heart_disease:.2f}%")
print(f"Percentage of men with heart disease: {percentage_men_with_heart_disease:.2f}%")
Report 1: Average Age of Patients
Average age of patients: 47.83 years
Report 2: Average Cholesterol Levels in Age Intervals
Average cholesterol for age interval [20-30): 187.50 mg/dl
Average cholesterol for age interval [30-40): 239.63 mg/dl
Average cholesterol for age interval [40-50): 250.13 mg/dl
Average cholesterol for age interval [50-60): 256.21 mg/dl
Average cholesterol for age interval [60-100): 261.10 mg/dl
Report 3: Average Resting Blood Pressure in Extreme Cholesterol Levels
Average resting blood pressure for patients with low cholesterol: 133.01 mm Hg
Average resting blood pressure for patients with high cholesterol: 136.04 mm Hg
Report 4: Percentage of Men and Women with Positive Heart Disease Diagnosis
Percentage of women with heart disease: 16.22%
Percentage of men with heart disease: 45.18%
# Exploratory Data Analysis of Environmental Sensor Measurements import pandas as pd
# 1. Data Loading and Initial Inspection
# Load the dataset
data = pd.read_csv("sensor_data.csv")
# Display the data types of each column
data_types = data.dtypes
print("Data Types of Each Column:")
print(data_types)
# Display the number of unique values in the 'time' column
unique_time_values = data['time'].nunique()
print(f"Number of Unique Values in the 'time' Column: {unique_time_values}")
# 2. Data Preprocessing
# Convert the 'time' column to datetime format
data['time'] = pd.to_datetime(data['time'])
# Drop duplicate rows if any
data.drop_duplicates(inplace=True)
# 3. Data Analysis
# Calculate and display the average power consumption on weekdays and weekends separately
data['day_of_week'] = data['time'].dt.dayofweek
average_power_weekdays = data[data['day_of_week'] < 5]['power'].mean()
average_power_weekends = data[data['day_of_week'] >= 5]['power'].mean()
print(f"Average Power Consumption on Weekdays: {average_power_weekdays:.2f} watts")
print(f"Average Power Consumption on Weekends: {average_power_weekends:.2f} watts")
# Create a new column named 'CO2_category' with categories 'Low', 'Medium', and 'High' based on CO2 concentration
def categorize_CO2(co2_value):
if co2_value < 400:
return 'Low'
elif 400 <= co2_value < 1000:
return 'Medium'
else:
return 'High'
data['CO2_category'] = data['CO2'].apply(categorize_CO2)
# Display the first few rows of the updated dataset
print("First Few Rows of the Updated Dataset:")
print(data.head())
# Convert the 'time' column to datetime format
data['time'] = pd.to_datetime(data['time'])
# Drop duplicate rows if any
data.drop_duplicates(inplace=True)
# Create a new column 'CO2_category' based on CO2 concentration
def categorize_CO2(co2_value):
if co2_value < 400:
return 'Low'
elif 400 <= co2_value < 1000:
return 'Medium'
else:
return 'High'
data['CO2_category'] = data['CO2'].apply(categorize_CO2)
# 3. Data Analysis
# Calculate the mean power consumption for each CO2 category
mean_power_by_CO2 = data.groupby('CO2_category')['power'].mean()
print("Mean Power Consumption for Each CO2 Category:")
print(mean_power_by_CO2)
# 4. Data Manipulation
# Calculate the cumulative sum of power consumption on a rolling window of 3 hours
window_size = 3 # 3 hours
data['power_cumulative_sum'] = data['power'].rolling(window=window_size).sum()
# 4. Data Manipulation
# Calculate the cumulative sum of power consumption on a rolling window of 3 hours
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
- Access to all documents
- Unlimited textbook solutions
- 24/7 expert homework help
Related Questions
what kind of charts are used in to describe HTS data
arrow_forward
Create a model for the data using the 1st and 3rd points in the data set.
1909
90.490
1st point^
1929
121.767
3 rd point ^
y= years since 1909
P=population of the United States (in millions)
arrow_forward
Duration is what typye of data?
arrow_forward
Differentiate between nominal data and ordinal data.
Give at least two examples of nominal and ordinal data.
arrow_forward
What are the best measures used used for data variation or dispersion
arrow_forward
What type of Data is being shown
arrow_forward
Define moving averages and how they’re useful in forecasting
arrow_forward
This data type data is non-numbers, OR numbers that do not represent quantities.
arrow_forward
how do I describe the "measures of center and variability" of a chart?
arrow_forward
The most commonly used measure of central tendency
arrow_forward
What type of data would you use for collecting data on lap swim time?
arrow_forward
Please don't provide answers in image format thank you.
arrow_forward
A coffee shop manager want to visualize the shape of daily sales of coffee (# of cups) in the past year He can use a histogram.is it true or false ??
arrow_forward
What type of data: The weight of cars crossing the Golden Gate Bridge.
Qualitative
Quantitative Discrete
Quantitative Continuous
arrow_forward
Which are the graphs for quantitative data?
arrow_forward
Determine the type of variation model that best fits the data in the attached image.
arrow_forward
Hashim Motors Sdn. Bhd. specializes in selling a secondhand car. Currently, the company
has 12 used cars for sale. The owner of the company wants to investigate the relationship
between the age of the car and the mileage of the car. The data were collected and
analyzed using SPSS. The results as follow.
Car's age (years) | 6 4 2 2.5 3
Mileage ('000 km) | 93 60 33 36 53 59 48 77
4
5 5.5 4.5 4.5 3.5
79 61 63 50
3
Model Summary
Adjusted R
Std. Error of the
Model
R
R Square
Square
Estimate
1
.975
.951
.946
4.036
a. Predictors: (Constant), Age
arrow_forward
What is the graphical interpretation of the IVT?
arrow_forward
indicator for gender (=1 if male, =0 if female). Your data set has 200 observations.
donation = 1.57 + 0.12age + 0.08age? – 0.43 gender + 0.07(gender * age), R? = 0.33
(0.15)*
(0.67)
(0.11)
(0.01)
(0.06)
donation = 1.59
(0.68)
0.46 gender, R² = 0.30
(0.17)
1. Assuming homoscedastic errors, test the hypothesis that age affects charitable donations at
the 5% level.
2. Interpret the coefficient on gender interacted with age.
3. How would you test that the difference in giving between men and women is independent of
age? If you can run the test, run it. If you cannot, explain what you need.
arrow_forward
ONLY DO LETTER D
I'LL UPVOTE IF COMPLETE, ORIGINAL, CORRECT. THANK YOU VERY MUCH
Burning fuels in power plants and motor vehicles emit carbon dioxide (CO2), contributing to global warming. The table at the right displays CO2 emissions per person from countries with at least 20 million populations.
(a) What is the variable of interest? How do you classify it?
(b) Make a histogram of the data using classes of width 2, starting at 0.
Show the frequency distribution table and create the Histogram of the data. Be mindful of submitting a correct graph.
(c) Describe the shape, center, and spread of the distribution. Which countries are outliers?
For a description of the shape, center, and spread, put your response in paragraph form.
Select the most appropriate measure of center and measure of spread to describe the distribution. Also, do not forget to interpret the values based on the given context.
Lastly, do not forget to include the unit of measurements.
For outliers, show your method…
arrow_forward
Describe about the Error Variability?
arrow_forward
Google "covid 19 california" and choose statistics from menu on left.
Select data for "all time" period.
In the discussion editor, click on 3 dots, insert table using editor (3 columns and 10 rows)
Label the table with headings Day (t), Cases (y), Rate of change ()
Fill column 1 with alternate Sunday dates starting from May 3 ending Aug 16
Fill in column 2 with number of daily cases from Google statistics page.
For each row (except first), find rate of change in cases per fortnight (2wk). At = 14
For example
day
rate of change
cases
May 3
5707
(8658-5707)/14= ? cases per
fortnight
May 10
8658
May 17
arrow_forward
Indicate that they are qualitative and quantitative data, give 2 specific examples and indicate when it is used in each case.
arrow_forward
What is the similarities of parameter and statistics
arrow_forward
Differentiate
- Parameter vs statistic
arrow_forward
please don’t reject the question
arrow_forward
SEE MORE QUESTIONS
Recommended textbooks for you
Glencoe Algebra 1, Student Edition, 9780079039897...
Algebra
ISBN:9780079039897
Author:Carter
Publisher:McGraw Hill
Big Ideas Math A Bridge To Success Algebra 1: Stu...
Algebra
ISBN:9781680331141
Author:HOUGHTON MIFFLIN HARCOURT
Publisher:Houghton Mifflin Harcourt
Holt Mcdougal Larson Pre-algebra: Student Edition...
Algebra
ISBN:9780547587776
Author:HOLT MCDOUGAL
Publisher:HOLT MCDOUGAL
Related Questions
- what kind of charts are used in to describe HTS dataarrow_forwardCreate a model for the data using the 1st and 3rd points in the data set. 1909 90.490 1st point^ 1929 121.767 3 rd point ^ y= years since 1909 P=population of the United States (in millions)arrow_forwardDuration is what typye of data?arrow_forward
arrow_back_ios
SEE MORE QUESTIONS
arrow_forward_ios
Recommended textbooks for you
- Glencoe Algebra 1, Student Edition, 9780079039897...AlgebraISBN:9780079039897Author:CarterPublisher:McGraw HillBig Ideas Math A Bridge To Success Algebra 1: Stu...AlgebraISBN:9781680331141Author:HOUGHTON MIFFLIN HARCOURTPublisher:Houghton Mifflin HarcourtHolt Mcdougal Larson Pre-algebra: Student Edition...AlgebraISBN:9780547587776Author:HOLT MCDOUGALPublisher:HOLT MCDOUGAL
Glencoe Algebra 1, Student Edition, 9780079039897...
Algebra
ISBN:9780079039897
Author:Carter
Publisher:McGraw Hill
Big Ideas Math A Bridge To Success Algebra 1: Stu...
Algebra
ISBN:9781680331141
Author:HOUGHTON MIFFLIN HARCOURT
Publisher:Houghton Mifflin Harcourt
Holt Mcdougal Larson Pre-algebra: Student Edition...
Algebra
ISBN:9780547587776
Author:HOLT MCDOUGAL
Publisher:HOLT MCDOUGAL