EE102MATH
pdf
keyboard_arrow_up
School
San Jose State University *
*We aren’t endorsed by this school
Course
102
Subject
Statistics
Date
Jan 9, 2024
Type
Pages
8
Uploaded by DoctorStarSnake182
In [31]:
import
pandas
as
pd
from
pandas
import
read_excel
from
pandas.plotting
import
scatter_matrix
from
plotly.subplots
import
make_subplots
import
matplotlib.pyplot
as
plt
import
numpy
as
np
from
scipy.stats
import
pearsonr
from
sklearn
import
linear_model
Test
=
'Test_Data.xlsx'
Training
=
'Training_Data.xlsx'
#Test Data uses 20% of 9568 = 1913 (First 1913 data used)
#Coose data set below
file_name
=
Training
df
=
pd
.
read_excel(file_name)
In [32]:
#Mean of data
data_mean
=
df
.
mean()
print
(data_mean)
In [33]:
#Median of Data
data_median
=
df
.
median()
print
(data_median)
In [34]:
#Mode of Data
data_mode
=
df
.
mode()
print
(data_mode)
In [35]:
#Minimum of Data
data_min
=
df
.
min()
print
(data_min)
In [36]:
#Maximum of Data
data_max
=
df
.
max()
print
(data_max)
AT
19.613493
V
54.259261
AP
1013.284266
RH
73.348454
PE
454.475793
dtype: float64
AT
20.325
V
52.080
AP
1012.960
RH
74.955
PE
451.670
dtype: float64
AT
V
AP
RH
PE
0
25.21
41.17
1010.99
100.09
468.8
AT
2.58
V
25.36
AP
992.89
RH
25.56
PE
420.26
dtype: float64
AT
37.11
V
81.56
AP
1033.29
RH
100.16
PE
495.76
dtype: float64
In [37]:
#Varaince of Data
data_var
=
df
.
var()
print
(data_var)
In [38]:
#Standard Diviation of Data
data_std
=
df
.
std()
print
(data_std)
In [39]:
#Basically Everything above in one code
data_des
=
df
.
describe()
print
(data_des)
In [40]:
#Scatter Plot of Input vs. PE
def
scatterPlot
(name):
plt
.
scatter(df[name], df[
'PE'
])
plt
.
xlabel(name)
plt
.
ylabel(
'PE'
)
plt
.
title(
'Scatter Plot'
)
plt
.
show()
scatterPlot(
'AP'
)
AT
55.772378
V
161.743253
AP
35.616324
RH
210.611384
PE
293.430633
dtype: float64
AT
7.468091
V
12.717832
AP
5.967941
RH
14.512456
PE
17.129817
dtype: float64
AT
V
AP
RH
PE
count
7656.000000
7656.000000
7656.000000
7656.000000
7656.000000
mean
19.613493
54.259261
1013.284266
73.348454
454.475793
std
7.468091
12.717832
5.967941
14.512456
17.129817
min
2.580000
25.360000
992.890000
25.560000
420.260000
25%
13.450000
41.670000
1009.127500
63.430000
439.820000
50%
20.325000
52.080000
1012.960000
74.955000
451.670000
75%
25.692500
66.510000
1017.342500
84.750000
468.625000
max
37.110000
81.560000
1033.290000
100.160000
495.760000
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
- Access to all documents
- Unlimited textbook solutions
- 24/7 expert homework help
In [43]:
def
scaleTest
(name):
min
=
df[name]
.
min()
max
=
df[name]
.
max()
index
= 0
numRows
=
len
(df
.
axes[
0
])
inputNormalized
=
[]
while
index
<
numRows:
x
=
df[name]
.
values[index]
xp
=
( (x
-
min
)
/
(
max
-
min
) )
inputNormalized
.
append(xp)
index
=
index
+ 1
inputNormalized
=
np
.
array(inputNormalized)
.
reshape(
-1
,
1
)
return
inputNormalized
In [44]:
def
MSE1
(num):
names
=
[
"AT"
,
"V"
,
"AP"
,
"RH"
]
for
index, n
in
enumerate
(names):
if
index
==
num:
name
=
n
i
=
scaleTest(name)
o
=
scaleTest (
'PE'
)
reg
=
linear_model
.
LinearRegression()
reg
.
fit(i,o)
index
= 0
numRows
=
len
(df
.
axes[
0
])
Total
= 0
while
index
<
numRows:
value
=
i[index]
predictedValue
=
(reg
.
coef_
*
value)
+
reg
.
intercept_
actualValue
=
o[index]
Total
=
(actualValue
-
predictedValue)
**2 +
Total
index
=
index
+ 1
MSE
=
Total
/
numRows
return
MSE
In [45]:
#Used to find the MSE for two inputs
def
MSE2
(num1, num2):
names
=
[
"AT"
,
"V"
,
"AP"
,
"RH"
]
for
index, n
in
enumerate
(names):
if
index
==
num1:
name1
=
n
if
index
==
num2:
name2
=
n
i1
=
scaleTest(name1)
i2
=
scaleTest(name2)
o
=
scaleTest (
'PE'
)
i
=
np
.
column_stack( (i1,i2) )
reg
=
linear_model
.
LinearRegression()
reg
.
fit(i,o)
index
= 0
numRows
=
len
(df
.
axes[
0
])
Total
= 0
while
index
<
numRows:
value1
=
i1[index]
value2
=
i2[index]
predictedValue
=
(reg
.
coef_[
0
][
0
]
*
value1)
+
(reg
.
coef_[
0
][
1
]
*
value2)
+
reg
.
intercept_
actualValue
=
o[index]
Total
=
(actualValue
-
predictedValue)
**2 +
Total
index
=
index
+ 1
MSE
=
Total
/
numRows
return
MSE
In [46]:
#Used to find the MSE for three inputs
def
MSE3
(num1, num2, num3):
names
=
[
"AT"
,
"V"
,
"AP"
,
"RH"
]
for
index, n
in
enumerate
(names):
if
index
==
num1:
name1
=
n
if
index
==
num2:
name2
=
n
if
index
==
num3:
name3
=
n
i1
=
scaleTest(name1)
i2
=
scaleTest(name2)
i3
=
scaleTest(name3)
o
=
scaleTest (
'PE'
)
i
=
np
.
column_stack( (i1,i2,i3) )
reg
=
linear_model
.
LinearRegression()
reg
.
fit(i,o)
index
= 0
numRows
=
len
(df
.
axes[
0
])
Total
= 0
while
index
<
numRows:
value1
=
i1[index]
value2
=
i2[index]
value3
=
i3[index]
predictedValue
=
(reg
.
coef_[
0
][
0
]
*
value1)
+
(reg
.
coef_[
0
][
1
]
*
value2)
+
(reg
.
coef_[
0
][
2
]
*
value3)
+
r
eg
.
intercept_
actualValue
=
o[index]
Total
=
(actualValue
-
predictedValue)
**2 +
Total
index
=
index
+ 1
MSE
=
Total
/
numRows
return
MSE
In [47]:
#Shows linear regression for 1 variables
def
lineRegression
(num):
#In my project these were the input names
names
=
[
"AT"
,
"V"
,
"AP"
,
"RH"
]
for
index, n
in
enumerate
(names):
if
index
==
num:
name
=
n
i
=
scaleTest(name)
o
=
scaleTest (
'PE'
)
#i = df.iloc[:, num ].values.reshape(-1, 1)
#o = df.iloc[:, 4].values.reshape(-1, 1)
reg
=
linear_model
.
LinearRegression()
reg
.
fit(i,o)
MS
=
MSE1(num)
print
(
"For
{}
"
.
format(name))
print
(
"
{0}
*
{1}
+
{2}
= PE"
.
format(reg
.
coef_, name, reg
.
intercept_ ))
print
(
"MSE is
{}
"
.
format(MS))
print
(
"R^2 is equal to
{0}
\n
"
.
format(reg
.
score(i,o)))
#Place collum number here
lineRegression(
0
)
lineRegression(
1
)
lineRegression(
2
)
In [48]:
def
lineRegression
(num):
names
=
[
"AT"
,
"V"
,
"AP"
,
"RH"
]
for
index, n
in
enumerate
(names):
i
=
index
if
i
==
num:
name
=
n
i
=
df
.
iloc[:, num ]
.
values
.
reshape(
-1
,
1
)
o
=
df
.
iloc[:,
4
]
.
values
.
reshape(
-1
,
1
)
reg
=
linear_model
.
LinearRegression()
reg
.
fit(i,o)
print
(
"For
{}
"
.
format(name))
print
(
"
{0}
*
{1}
+
{2}
= PE"
.
format(reg
.
coef_, name, reg
.
intercept_ ))
print
(
"R^2 is equal to
{0}
\n
"
.
format(reg
.
score(i,o)))
#Place collum name here
lineRegression(
0
)
lineRegression(
1
)
lineRegression(
2
)
For AT
[[-0.99439472]] * AT + [0.94371972] = PE
MSE is [[0.00522272]]
R^2 is equal to 0.8985289476296636
For V
[[-0.87219495]] * V + [0.9016909] = PE
MSE is [[0.0125186]]
R^2 is equal to 0.7567791000458868
For AP
[[0.79108548]] * AP + [0.05384258] = PE
MSE is [[0.03781556]]
R^2 is equal to 0.2652904661868859
For AT
[[-2.17424852]] * AT + [497.12040026] = PE
R^2 is equal to 0.8985289476296636
For V
[[-1.17172098]] * V + [518.05250671] = PE
R^2 is equal to 0.7567791000458868
For AP
[[1.47838995]] * AP + [-1043.55348408] = PE
R^2 is equal to 0.2652904661868859
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
- Access to all documents
- Unlimited textbook solutions
- 24/7 expert homework help
In [49]:
#Linear Regression with two variables
def
lineRegression2
(num1,num2):
names
=
[
"AT"
,
"V"
,
"AP"
,
"RH"
]
for
index, n
in
enumerate
(names):
if
index
==
num1:
name1
=
n
if
index
==
num2:
name2
=
n
i1
=
scaleTest(name1)
i2
=
scaleTest(name2)
o
=
scaleTest (
'PE'
)
i
=
np
.
column_stack( (i1,i2) )
reg
=
linear_model
.
LinearRegression(normalize
=
True
, fit_intercept
=
True
)
reg
.
fit(i,o)
MS
=
MSE2(num1,num2)
print
(
"For
{}
and
{}
"
.
format(name1, name2))
print
(
"
{}
*
{}
+
{}
*
{}
+
{}
= PE"
.
format(reg
.
coef_[
0
][
0
], name1,
reg
.
coef_[
0
][
1
], name2,reg
.
intercept_ ))
print
(
"MSE is
{}
"
.
format(MS))
print
(
"R^2 is equal to
{0}
\n
"
.
format(reg
.
score(i,o)))
#Place Column number here
lineRegression2(
0
,
1
)
lineRegression2(
0
,
2
)
lineRegression2(
1
,
2
)
For AT and V
-0.7790623675371776 * AT + -0.2438803594302339 * V + [0.96290583] = PE
MSE is [0.00434592]
R^2 is equal to 0.9155641083484705
For AT and AP
-0.968042347342456 * AT + 0.07652724685959143 * AP + [0.89208861] = PE
MSE is [0.00512742]
R^2 is equal to 0.900380513348008
For V and AP
-0.7938698983450982 * V + 0.29291108245293396 * AP + [0.71355043] = PE
MSE is [0.01096074]
R^2 is equal to 0.7870463609907999
In [50]:
# Shows linear regression for 3 input
def
lineRegression3
(num1,num2,num3):
names
=
[
"AT"
,
"V"
,
"AP"
,
"RH"
]
for
index, n
in
enumerate
(names):
if
index
==
num1:
name1
=
n
if
index
==
num2:
name2
=
n
if
index
==
num3:
name3
=
n
i1
=
scaleTest(name1)
i2
=
scaleTest(name2)
i3
=
scaleTest(name3)
i
=
np
.
column_stack( (i1,i2,i3) )
o
=
scaleTest (
'PE'
)
reg
=
linear_model
.
LinearRegression(normalize
=
True
, fit_intercept
=
True
)
reg
.
fit(i,o)
MS
=
MSE3(num1,num2,num3)
print
(
"For
{}
,
{}
, and
{}
"
.
format(name1, name2, name3))
print
(
"
{}
*
{}
+
{}
*
{}
+
{}
*
{}
+
{}
= PE"
.
format(reg
.
coef_[
0
][
0
], name1,
reg
.
coef_[
0
][
1
], name2,reg
.
coef_[
0
][
2
], name3, reg
.
intercept_ ))
print
(
"MSE is
{}
"
.
format(MS))
print
(
"R^2 is equal to
{0}
\n
"
.
format(reg
.
score(i,o)))
#Put Column numbers here
lineRegression3(
0
,
1
,
2
)
For AT, V, and AP
-0.7472739666494168 * AT + -0.2469104757097978 * V + 0.0845440529227739 * AP + [0.90610434] = PE
MSE is [0.00422974]
R^2 is equal to 0.917821294618563
Related Documents
Related Questions
Suppose that in a research it was discovered that those who majored in math, engineering, or computer science scored higher on tests measuring "problem-solving" ability at the end of 4 years of college than did those students who did not major in these fields.
What is the dependent variable in this study? Is the dependent variable clearly defined?
arrow_forward
College GPA and Salary. Do students with higher college grade point averages (GPAs) earn more than those graduates with lower GPAs (CivicScience)? Consider the college GPA and salary data (10 years after graduation) provided in the file GPASalary.
Develop a scatter diagram for these data with college GPA as the independent variable. PLEASE MAKE SIMPLE GRAPH. What does the scatter diagram indicate about the relationship between the two variables?
Use these data to develop an estimated regression equation that can be used to predict annual salary 10 years after graduation given college GPA.
At the .05 level of significance, does there appear to be a significant statistical relationship between the two variables?
GPA
Salary
2.21
71000
2.28
49000
2.56
71000
2.58
63000
2.76
87000
2.85
97000
3.11
134000
3.35
130000
3.67
156000
3.69
161000
arrow_forward
A researcher collected data on how much extra sleep subjects got after being placed into one of two groups. Each group received a different set of mediation exercises. The dataset they collected is called df and the first 6 rows are shown here:
Which lines of code are required to make the plot above? Click all that apply.
a. geom_boxplot(alpha = .3)
b. geom_jitter(width=.1, size = 2, color = "navy")
c. geom_boxplot(fill = "dodgerblue", alpha = .3)
d. geom_jitter(color = "navy")
e. ggplot(sleep, aes(x = group, y = extra))
f. theme_classic()
arrow_forward
A company provides maintenance service for water-filtration systems throughout southern Florida. Customers contact the company with requests for maintenance service on their water-filtration systems. To estimate the service time and the service cost, the company's managers want to predict the repair time necessary for each maintenance request. Hence, repair time in hours is the dependent variable. Repair time is believed to be related to three factors, the number of months since the last maintenance service, the type of repair problem (mechanical or electrical), and the repairperson who performed the service. Data for a sample of 10 service calls are reported in the table below.
Repair Timein Hours
Months SinceLast Service
Type of Repair
Repairperson
2.9
2
Electrical
Dave Newton
3.0
6
Mechanical
Dave Newton
4.8
8
Electrical
Bob Jones
1.8
3
Mechanical
Dave Newton
2.5
2
Electrical
Dave Newton
4.9
7
Electrical
Bob Jones
4.6
9
Mechanical
Bob Jones
4.8
8
Mechanical
Bob…
arrow_forward
A company provides maintenance service for water-filtration systems throughout southern Florida. Customers contact the company with requests for maintenance service on their water-filtration systems. To estimate the service time and the service cost, the company's
managers want to predict the repair time necessary for each maintenance request. Hence, repair time in hours is the dependent variable. Repair time is believed to be related to three factors, the number of months since the last maintenance service, the type of repair
problem (mechanical or electrical), and the repairperson who performed the service. Data for a sample of 10 service calls are reported in the table below.
Repair Time Months Since
in Hours
Last Service
2.9
3.0
4.8
1.8
2.5
4.9
4.2
4.8
4.4
4.5
2
6
8
3
2
7
9
8
4
6
Type of Repair Repairperson
Electrical
Mechanical
Electrical
Mechanical
Electrical
Electrical
Mechanical
Mechanical
Electrical
Electrical
Dave Newton
ŷ =
X
Check which variable(s)/term(s) should be in your…
arrow_forward
A researcher plans to do a study to see whether people who eat an all carbohydrate diet have different cholesterol levels than people in the general public. what will be The dependent variable in this study?
arrow_forward
Download the file Golf.jmp. Use JMP to develop a multiple linear regression model to predict the Earnings/Event using the data found in Golf.jmp. Consider the four independent variables listed in the table below. Find the best model and check assumptions.
EARNINGS
Average Earnings per Event
SCORE
Average Score
DRIVE_D
Average Drive Distance
DRIVE_A
Average Drive Accuracy
PUTTS
Average Putts per Round
[1] Create a correlation matrix for the variables EARNINGS, SCORE, DRIVE_D, DRIVE_A, and PUTTS using JMP.
[2] What is the correlation coefficient for EARNINGS and SCORE? Interpret the linear relationship between the two variables.
Correlation
Coefficient
Interpret the linear relationship
-0.5471
There is a negative correlation between the two variables. The strength of the correlation is moderate.
[3] Does the correlation matrix indicate a potential multicollinearity problem? If so, which…
arrow_forward
please do with rstudio and provide all the codes.
arrow_forward
The mean of the data set {9,5, y, 2, x} is twice the data set {8, x, 4,1,3}. What is (y- x)2?
arrow_forward
Please solve
arrow_forward
A music critic was interested in whether particular variables measured on a song change over time. Two variables the critic considered were a song’s Tempo (in bpm) and a song’s Danceability. We will use the songs written before the year 2000 from the original SpotifySample data set. The data set that you will use to complete this investigation is called SpotifyB2000 and consists of 483 songs.
Write the least-squares regression line equation describing Year and Danceability usingproper notation and values.
Interpret the slope of the regression line for Year and Danceability in context.
Would the interpretation of the y-intercept for Year and Danceability be meaningful? Ifso, interpret it. If not, state why not in one sentence.
Calculate and record the coefficient of determination value r2for Year and Danceabilityand interpret this value in context.
State the hypotheses for the test of the slope.
Write the p-value found in the output from (n), and use the p-value provided in the…
arrow_forward
The data set hemorrhage.sav contains a sample of 100 low birth weight infants born in Boston, Massachusetts. Germinal matrix hemorrhage (gmh) is a dichotomos variable that coded as 1 if the baby had a hemorrhage and 0 if the baby did not have a hemorrhage. (See the attached image)
You wish to know if gestational age, systolic blood pressure (sbp), and gender predicts hemorrhage.
What are the null and alternative hypotheses for this analysis? (hint: there should be 4 sets)
Is the model as a whole significant? How do you know?
Regardless if the variables are significant interpret the odds ratio for gender, sbp, and gestational age.
arrow_forward
What are Independent variables?
arrow_forward
The r code for side by side boxplot of vitamind v newage and vitamin d v country.
Scatterplot code for relationship between vitamin d level and age.
arrow_forward
Why would the male lifespan not be the dependent variable?
arrow_forward
Which equation is the best fit for the
data in the table?
X
Y
3
12
5
27
8
48
11
O y=-.0026x^2+.33x+1.16
O y=1.94x+2.140
O y=1.732vx-1
O y=1.732x-1
arrow_forward
Explain why a 3 way interaction will have more regression lines than a 2 way interaction.
arrow_forward
Why won't the whiteboard image show up?
arrow_forward
SEE MORE QUESTIONS
Recommended textbooks for you

Related Questions
- Suppose that in a research it was discovered that those who majored in math, engineering, or computer science scored higher on tests measuring "problem-solving" ability at the end of 4 years of college than did those students who did not major in these fields. What is the dependent variable in this study? Is the dependent variable clearly defined?arrow_forwardCollege GPA and Salary. Do students with higher college grade point averages (GPAs) earn more than those graduates with lower GPAs (CivicScience)? Consider the college GPA and salary data (10 years after graduation) provided in the file GPASalary. Develop a scatter diagram for these data with college GPA as the independent variable. PLEASE MAKE SIMPLE GRAPH. What does the scatter diagram indicate about the relationship between the two variables? Use these data to develop an estimated regression equation that can be used to predict annual salary 10 years after graduation given college GPA. At the .05 level of significance, does there appear to be a significant statistical relationship between the two variables? GPA Salary 2.21 71000 2.28 49000 2.56 71000 2.58 63000 2.76 87000 2.85 97000 3.11 134000 3.35 130000 3.67 156000 3.69 161000arrow_forwardA researcher collected data on how much extra sleep subjects got after being placed into one of two groups. Each group received a different set of mediation exercises. The dataset they collected is called df and the first 6 rows are shown here: Which lines of code are required to make the plot above? Click all that apply. a. geom_boxplot(alpha = .3) b. geom_jitter(width=.1, size = 2, color = "navy") c. geom_boxplot(fill = "dodgerblue", alpha = .3) d. geom_jitter(color = "navy") e. ggplot(sleep, aes(x = group, y = extra)) f. theme_classic()arrow_forward
- A company provides maintenance service for water-filtration systems throughout southern Florida. Customers contact the company with requests for maintenance service on their water-filtration systems. To estimate the service time and the service cost, the company's managers want to predict the repair time necessary for each maintenance request. Hence, repair time in hours is the dependent variable. Repair time is believed to be related to three factors, the number of months since the last maintenance service, the type of repair problem (mechanical or electrical), and the repairperson who performed the service. Data for a sample of 10 service calls are reported in the table below. Repair Timein Hours Months SinceLast Service Type of Repair Repairperson 2.9 2 Electrical Dave Newton 3.0 6 Mechanical Dave Newton 4.8 8 Electrical Bob Jones 1.8 3 Mechanical Dave Newton 2.5 2 Electrical Dave Newton 4.9 7 Electrical Bob Jones 4.6 9 Mechanical Bob Jones 4.8 8 Mechanical Bob…arrow_forwardA company provides maintenance service for water-filtration systems throughout southern Florida. Customers contact the company with requests for maintenance service on their water-filtration systems. To estimate the service time and the service cost, the company's managers want to predict the repair time necessary for each maintenance request. Hence, repair time in hours is the dependent variable. Repair time is believed to be related to three factors, the number of months since the last maintenance service, the type of repair problem (mechanical or electrical), and the repairperson who performed the service. Data for a sample of 10 service calls are reported in the table below. Repair Time Months Since in Hours Last Service 2.9 3.0 4.8 1.8 2.5 4.9 4.2 4.8 4.4 4.5 2 6 8 3 2 7 9 8 4 6 Type of Repair Repairperson Electrical Mechanical Electrical Mechanical Electrical Electrical Mechanical Mechanical Electrical Electrical Dave Newton ŷ = X Check which variable(s)/term(s) should be in your…arrow_forwardA researcher plans to do a study to see whether people who eat an all carbohydrate diet have different cholesterol levels than people in the general public. what will be The dependent variable in this study?arrow_forward
- Download the file Golf.jmp. Use JMP to develop a multiple linear regression model to predict the Earnings/Event using the data found in Golf.jmp. Consider the four independent variables listed in the table below. Find the best model and check assumptions. EARNINGS Average Earnings per Event SCORE Average Score DRIVE_D Average Drive Distance DRIVE_A Average Drive Accuracy PUTTS Average Putts per Round [1] Create a correlation matrix for the variables EARNINGS, SCORE, DRIVE_D, DRIVE_A, and PUTTS using JMP. [2] What is the correlation coefficient for EARNINGS and SCORE? Interpret the linear relationship between the two variables. Correlation Coefficient Interpret the linear relationship -0.5471 There is a negative correlation between the two variables. The strength of the correlation is moderate. [3] Does the correlation matrix indicate a potential multicollinearity problem? If so, which…arrow_forwardplease do with rstudio and provide all the codes.arrow_forwardThe mean of the data set {9,5, y, 2, x} is twice the data set {8, x, 4,1,3}. What is (y- x)2?arrow_forward
- Please solvearrow_forwardA music critic was interested in whether particular variables measured on a song change over time. Two variables the critic considered were a song’s Tempo (in bpm) and a song’s Danceability. We will use the songs written before the year 2000 from the original SpotifySample data set. The data set that you will use to complete this investigation is called SpotifyB2000 and consists of 483 songs. Write the least-squares regression line equation describing Year and Danceability usingproper notation and values. Interpret the slope of the regression line for Year and Danceability in context. Would the interpretation of the y-intercept for Year and Danceability be meaningful? Ifso, interpret it. If not, state why not in one sentence. Calculate and record the coefficient of determination value r2for Year and Danceabilityand interpret this value in context. State the hypotheses for the test of the slope. Write the p-value found in the output from (n), and use the p-value provided in the…arrow_forwardThe data set hemorrhage.sav contains a sample of 100 low birth weight infants born in Boston, Massachusetts. Germinal matrix hemorrhage (gmh) is a dichotomos variable that coded as 1 if the baby had a hemorrhage and 0 if the baby did not have a hemorrhage. (See the attached image) You wish to know if gestational age, systolic blood pressure (sbp), and gender predicts hemorrhage. What are the null and alternative hypotheses for this analysis? (hint: there should be 4 sets) Is the model as a whole significant? How do you know? Regardless if the variables are significant interpret the odds ratio for gender, sbp, and gestational age.arrow_forward
arrow_back_ios
SEE MORE QUESTIONS
arrow_forward_ios
Recommended textbooks for you
