EE102MATH

pdf

School

San Jose State University *

*We aren’t endorsed by this school

Course

102

Subject

Statistics

Date

Jan 9, 2024

Type

pdf

Pages

8

Uploaded by DoctorStarSnake182

Report
In [31]: import pandas as pd from pandas import read_excel from pandas.plotting import scatter_matrix from plotly.subplots import make_subplots import matplotlib.pyplot as plt import numpy as np from scipy.stats import pearsonr from sklearn import linear_model Test = 'Test_Data.xlsx' Training = 'Training_Data.xlsx' #Test Data uses 20% of 9568 = 1913 (First 1913 data used) #Coose data set below file_name = Training df = pd . read_excel(file_name) In [32]: #Mean of data data_mean = df . mean() print (data_mean) In [33]: #Median of Data data_median = df . median() print (data_median) In [34]: #Mode of Data data_mode = df . mode() print (data_mode) In [35]: #Minimum of Data data_min = df . min() print (data_min) In [36]: #Maximum of Data data_max = df . max() print (data_max) AT 19.613493 V 54.259261 AP 1013.284266 RH 73.348454 PE 454.475793 dtype: float64 AT 20.325 V 52.080 AP 1012.960 RH 74.955 PE 451.670 dtype: float64 AT V AP RH PE 0 25.21 41.17 1010.99 100.09 468.8 AT 2.58 V 25.36 AP 992.89 RH 25.56 PE 420.26 dtype: float64 AT 37.11 V 81.56 AP 1033.29 RH 100.16 PE 495.76 dtype: float64
In [37]: #Varaince of Data data_var = df . var() print (data_var) In [38]: #Standard Diviation of Data data_std = df . std() print (data_std) In [39]: #Basically Everything above in one code data_des = df . describe() print (data_des) In [40]: #Scatter Plot of Input vs. PE def scatterPlot (name): plt . scatter(df[name], df[ 'PE' ]) plt . xlabel(name) plt . ylabel( 'PE' ) plt . title( 'Scatter Plot' ) plt . show() scatterPlot( 'AP' ) AT 55.772378 V 161.743253 AP 35.616324 RH 210.611384 PE 293.430633 dtype: float64 AT 7.468091 V 12.717832 AP 5.967941 RH 14.512456 PE 17.129817 dtype: float64 AT V AP RH PE count 7656.000000 7656.000000 7656.000000 7656.000000 7656.000000 mean 19.613493 54.259261 1013.284266 73.348454 454.475793 std 7.468091 12.717832 5.967941 14.512456 17.129817 min 2.580000 25.360000 992.890000 25.560000 420.260000 25% 13.450000 41.670000 1009.127500 63.430000 439.820000 50% 20.325000 52.080000 1012.960000 74.955000 451.670000 75% 25.692500 66.510000 1017.342500 84.750000 468.625000 max 37.110000 81.560000 1033.290000 100.160000 495.760000
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
  • Access to all documents
  • Unlimited textbook solutions
  • 24/7 expert homework help
In [43]: def scaleTest (name): min = df[name] . min() max = df[name] . max() index = 0 numRows = len (df . axes[ 0 ]) inputNormalized = [] while index < numRows: x = df[name] . values[index] xp = ( (x - min ) / ( max - min ) ) inputNormalized . append(xp) index = index + 1 inputNormalized = np . array(inputNormalized) . reshape( -1 , 1 ) return inputNormalized In [44]: def MSE1 (num): names = [ "AT" , "V" , "AP" , "RH" ] for index, n in enumerate (names): if index == num: name = n i = scaleTest(name) o = scaleTest ( 'PE' ) reg = linear_model . LinearRegression() reg . fit(i,o) index = 0 numRows = len (df . axes[ 0 ]) Total = 0 while index < numRows: value = i[index] predictedValue = (reg . coef_ * value) + reg . intercept_ actualValue = o[index] Total = (actualValue - predictedValue) **2 + Total index = index + 1 MSE = Total / numRows return MSE In [45]: #Used to find the MSE for two inputs def MSE2 (num1, num2): names = [ "AT" , "V" , "AP" , "RH" ] for index, n in enumerate (names): if index == num1: name1 = n if index == num2: name2 = n i1 = scaleTest(name1) i2 = scaleTest(name2) o = scaleTest ( 'PE' ) i = np . column_stack( (i1,i2) ) reg = linear_model . LinearRegression() reg . fit(i,o) index = 0 numRows = len (df . axes[ 0 ]) Total = 0 while index < numRows: value1 = i1[index] value2 = i2[index] predictedValue = (reg . coef_[ 0 ][ 0 ] * value1) + (reg . coef_[ 0 ][ 1 ] * value2) + reg . intercept_ actualValue = o[index] Total = (actualValue - predictedValue) **2 + Total index = index + 1 MSE = Total / numRows return MSE
In [46]: #Used to find the MSE for three inputs def MSE3 (num1, num2, num3): names = [ "AT" , "V" , "AP" , "RH" ] for index, n in enumerate (names): if index == num1: name1 = n if index == num2: name2 = n if index == num3: name3 = n i1 = scaleTest(name1) i2 = scaleTest(name2) i3 = scaleTest(name3) o = scaleTest ( 'PE' ) i = np . column_stack( (i1,i2,i3) ) reg = linear_model . LinearRegression() reg . fit(i,o) index = 0 numRows = len (df . axes[ 0 ]) Total = 0 while index < numRows: value1 = i1[index] value2 = i2[index] value3 = i3[index] predictedValue = (reg . coef_[ 0 ][ 0 ] * value1) + (reg . coef_[ 0 ][ 1 ] * value2) + (reg . coef_[ 0 ][ 2 ] * value3) + r eg . intercept_ actualValue = o[index] Total = (actualValue - predictedValue) **2 + Total index = index + 1 MSE = Total / numRows return MSE
In [47]: #Shows linear regression for 1 variables def lineRegression (num): #In my project these were the input names names = [ "AT" , "V" , "AP" , "RH" ] for index, n in enumerate (names): if index == num: name = n i = scaleTest(name) o = scaleTest ( 'PE' ) #i = df.iloc[:, num ].values.reshape(-1, 1) #o = df.iloc[:, 4].values.reshape(-1, 1) reg = linear_model . LinearRegression() reg . fit(i,o) MS = MSE1(num) print ( "For {} " . format(name)) print ( " {0} * {1} + {2} = PE" . format(reg . coef_, name, reg . intercept_ )) print ( "MSE is {} " . format(MS)) print ( "R^2 is equal to {0} \n " . format(reg . score(i,o))) #Place collum number here lineRegression( 0 ) lineRegression( 1 ) lineRegression( 2 ) In [48]: def lineRegression (num): names = [ "AT" , "V" , "AP" , "RH" ] for index, n in enumerate (names): i = index if i == num: name = n i = df . iloc[:, num ] . values . reshape( -1 , 1 ) o = df . iloc[:, 4 ] . values . reshape( -1 , 1 ) reg = linear_model . LinearRegression() reg . fit(i,o) print ( "For {} " . format(name)) print ( " {0} * {1} + {2} = PE" . format(reg . coef_, name, reg . intercept_ )) print ( "R^2 is equal to {0} \n " . format(reg . score(i,o))) #Place collum name here lineRegression( 0 ) lineRegression( 1 ) lineRegression( 2 ) For AT [[-0.99439472]] * AT + [0.94371972] = PE MSE is [[0.00522272]] R^2 is equal to 0.8985289476296636 For V [[-0.87219495]] * V + [0.9016909] = PE MSE is [[0.0125186]] R^2 is equal to 0.7567791000458868 For AP [[0.79108548]] * AP + [0.05384258] = PE MSE is [[0.03781556]] R^2 is equal to 0.2652904661868859 For AT [[-2.17424852]] * AT + [497.12040026] = PE R^2 is equal to 0.8985289476296636 For V [[-1.17172098]] * V + [518.05250671] = PE R^2 is equal to 0.7567791000458868 For AP [[1.47838995]] * AP + [-1043.55348408] = PE R^2 is equal to 0.2652904661868859
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
  • Access to all documents
  • Unlimited textbook solutions
  • 24/7 expert homework help
In [49]: #Linear Regression with two variables def lineRegression2 (num1,num2): names = [ "AT" , "V" , "AP" , "RH" ] for index, n in enumerate (names): if index == num1: name1 = n if index == num2: name2 = n i1 = scaleTest(name1) i2 = scaleTest(name2) o = scaleTest ( 'PE' ) i = np . column_stack( (i1,i2) ) reg = linear_model . LinearRegression(normalize = True , fit_intercept = True ) reg . fit(i,o) MS = MSE2(num1,num2) print ( "For {} and {} " . format(name1, name2)) print ( " {} * {} + {} * {} + {} = PE" . format(reg . coef_[ 0 ][ 0 ], name1, reg . coef_[ 0 ][ 1 ], name2,reg . intercept_ )) print ( "MSE is {} " . format(MS)) print ( "R^2 is equal to {0} \n " . format(reg . score(i,o))) #Place Column number here lineRegression2( 0 , 1 ) lineRegression2( 0 , 2 ) lineRegression2( 1 , 2 ) For AT and V -0.7790623675371776 * AT + -0.2438803594302339 * V + [0.96290583] = PE MSE is [0.00434592] R^2 is equal to 0.9155641083484705 For AT and AP -0.968042347342456 * AT + 0.07652724685959143 * AP + [0.89208861] = PE MSE is [0.00512742] R^2 is equal to 0.900380513348008 For V and AP -0.7938698983450982 * V + 0.29291108245293396 * AP + [0.71355043] = PE MSE is [0.01096074] R^2 is equal to 0.7870463609907999
In [50]: # Shows linear regression for 3 input def lineRegression3 (num1,num2,num3): names = [ "AT" , "V" , "AP" , "RH" ] for index, n in enumerate (names): if index == num1: name1 = n if index == num2: name2 = n if index == num3: name3 = n i1 = scaleTest(name1) i2 = scaleTest(name2) i3 = scaleTest(name3) i = np . column_stack( (i1,i2,i3) ) o = scaleTest ( 'PE' ) reg = linear_model . LinearRegression(normalize = True , fit_intercept = True ) reg . fit(i,o) MS = MSE3(num1,num2,num3) print ( "For {} , {} , and {} " . format(name1, name2, name3)) print ( " {} * {} + {} * {} + {} * {} + {} = PE" . format(reg . coef_[ 0 ][ 0 ], name1, reg . coef_[ 0 ][ 1 ], name2,reg . coef_[ 0 ][ 2 ], name3, reg . intercept_ )) print ( "MSE is {} " . format(MS)) print ( "R^2 is equal to {0} \n " . format(reg . score(i,o))) #Put Column numbers here lineRegression3( 0 , 1 , 2 ) For AT, V, and AP -0.7472739666494168 * AT + -0.2469104757097978 * V + 0.0845440529227739 * AP + [0.90610434] = PE MSE is [0.00422974] R^2 is equal to 0.917821294618563