HW2_R code_SJ

docx

School

Westcliff University *

*We aren’t endorsed by this school

Course

504

Subject

Statistics

Date

Apr 3, 2024

Type

docx

Pages

10

Uploaded by BrigadierNarwhalMaster988

Report
HW 2 Q1a. Bring the file into R HW<- read.csv ( "pepsicokes.csv" ) HWdata<- data.frame (HW) View (HWdata) ## Warning: running command ''/usr/bin/otool' -L '/Library/Frameworks/ ## R.framework/Resources/modules/R_de.so'' had status 1 Q1 b Create a dummy variable whitecollar ( 1 if white collar, 0 otherwise), representing stores with white collar percent > 60 and calculate frequencies for this variable; the white collar percentages on the dataset are represented by the variable WhiteCollarPctPen. library (dplyr) ## ## Attaching package: 'dplyr' ## The following objects are masked from 'package:stats': ## ## filter, lag ## The following objects are masked from 'package:base': ## ## intersect, setdiff, setequal, union pepsicokes<- mutate (HWdata, whitecollar= ifelse (WhiteCollarPctPen> 60 , 1 , 0 )) View (pepsicokes) ## Warning: running command ''/usr/bin/otool' -L '/Library/Frameworks/ ## R.framework/Resources/modules/R_de.so'' had status 1 count (pepsicokes, c (whitecollar)) ## # A tibble: 2 <U+00D7> 2 ## `c(whitecollar)` n ## <dbl> <int> ## 1 0 87 ## 2 1 49
Q1c. Do a crosstab of Prizm cluster (the 16 social groups) with Chain; and another crosstab of Prizm cluster with whitecollar library (gtable) table1<- table (pepsicokes$Chain, pepsicokes$Prizm_Cluster) View (table1) ## Warning: running command ''/usr/bin/otool' -L '/Library/Frameworks/ ## R.framework/Resources/modules/R_de.so'' had status 1 table2<- table (pepsicokes$whitecollar,pepsicokes$Prizm_Cluster) View (table2) ## Warning: running command ''/usr/bin/otool' -L '/Library/Frameworks/ ## R.framework/Resources/modules/R_de.so'' had status 1 Q1d.Calculate descriptive statistics for any 5 continuous variables in the dataset. library (psych) ## Warning: package 'psych' was built under R version 3.3.2 describe ( select (pepsicokes,Corp_Pep_Volume_Sales,Corp_Coke_Volume _Sales,Corp_Pep_Price,Corp_Coke_Price,Prizm_Cluster)) ## vars n mean sd median trimmed ## Corp_Pep_Volume_Sales 1 136 69978.45 34613.79 64290.97 67037.20 ## Corp_Coke_Volume_Sales 2 136 40126.02 29009.98 32740.59 36594.37 ## Corp_Pep_Price 3 136 0.00 1.00 -0.06 0.02 ## Corp_Coke_Price 4 136 0.00 1.00 -0.32 -0.05 ## Prizm_Cluster* 5 136 6.57 3.23 6.00 6.43 ## mad min max range skew kurtosis ## Corp_Pep_Volume_Sales 27941.09 9514.52 193510.58 183996.05 1.05 1.61 ## Corp_Coke_Volume_Sales 24858.14 3999.10 140015.28 136016.18 1.10 0.63 ## Corp_Pep_Price 1.09 -2.78 2.63 5.41 - 0.19 0.04 ## Corp_Coke_Price 0.99 -2.26 2.16 4.42 0.41 -0.91
## Prizm_Cluster* 4.45 1.00 13.00 12.00 0.31 -0.88 ## se ## Corp_Pep_Volume_Sales 2968.11 ## Corp_Coke_Volume_Sales 2487.59 ## Corp_Pep_Price 0.09 ## Corp_Coke_Price 0.09 ## Prizm_Cluster* 0.28 Q1e. Regress Corp_Pep_Volume_per__MM_ACV (million dollars All Commodity Volume = Total Store Sales) on a set of any 6 independent variables. (use lm() for linear regression) Regression<- lm (Corp_Pep_Volume_per__MM_ACV~Corp_Pep_Volume_Sales+Corp_Coke_Vo lume_Sales+Corp_Pep_Price+CSD_Volume_per__MM_ACV+CSD_Price+Corp_C oke_Price, data= pepsicokes) summary (Regression) ## ## Call: ## lm(formula = Corp_Pep_Volume_per__MM_ACV ~ Corp_Pep_Volume_Sales + ## Corp_Coke_Volume_Sales + Corp_Pep_Price + CSD_Volume_per__MM_ACV + ## CSD_Price + Corp_Coke_Price, data = pepsicokes) ## ## Residuals: ## Min 1Q Median 3Q Max ## -0.7273 -0.1781 -0.0249 0.1698 0.6004 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) -1.163e+01 7.673e-01 -15.156 < 2e-16 *** ## Corp_Pep_Volume_Sales 1.239e-06 7.190e-07 1.723 0.0873 . ## Corp_Coke_Volume_Sales -7.485e-07 1.284e-06 -0.583 0.5611 ## Corp_Pep_Price -2.784e-01 3.649e-02 -7.630 4.57e-12 *** ## CSD_Volume_per__MM_ACV 4.660e-04 1.768e-05 26.357 < 2e-16 *** ## CSD_Price 1.574e+00 1.391e-01 11.314 < 2e-16 *** ## Corp_Coke_Price 3.184e-03 2.590e-02 0.123 0.9023 ## ---
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
  • Access to all documents
  • Unlimited textbook solutions
  • 24/7 expert homework help
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 0.2503 on 129 degrees of freedom ## Multiple R-squared: 0.9401, Adjusted R-squared: 0.9373 ## F-statistic: 337.5 on 6 and 129 DF, p-value: < 2.2e-16 Q1f Run a Hierarchical Cluster Analysis using the variables Pepsi Volume per MM ACV, Pepsi Price per MM ACV, Coke Volume per MM ACV, Coke Price per MM ACV. These variables were all standardized to mean 0, standard deviation 1 (Zscores) before clustering. Generate a Scree plot and find the kink point to determine the optimal number of clusters. pepsicokes_use<-pepsicokes[, c ( 36 , 37 , 39 , 40 )] scaled_pepsicokes<- scale (pepsicokes_use, center= TRUE , scale= TRUE ) View (scaled_pepsicokes) ## Warning: running command ''/usr/bin/otool' -L '/Library/Frameworks/ ## R.framework/Resources/modules/R_de.so'' had status 1 pepsicokes_dist<- dist (scaled_pepsicokes, method = "euclidean" ) pepsicokes_hclust<- hclust (pepsicokes_dist, method= "complete" ) summary (pepsicokes_hclust) ## Length Class Mode ## merge 270 -none- numeric ## height 135 -none- numeric ## order 136 -none- numeric ## labels 0 -none- NULL ## method 1 -none- character ## call 3 -none- call ## dist.method 1 -none- character plot (pepsicokes_hclust)
plot (pepsicokes_hclust, label= pepsicokes$TDLinx)
pepsicokes_hclust$height ## [1] 0.1386584 0.1396047 0.1631199 0.1671819 0.1725537 0.2093707 0.2136051 ## [8] 0.2148434 0.2169044 0.2264955 0.2350235 0.2414639 0.2458700 0.2512878 ## [15] 0.2605308 0.2642873 0.2765192 0.2772353 0.2809642 0.2848664 0.2913083 ## [22] 0.3031009 0.3107680 0.3188676 0.3194838 0.3220497 0.3317241 0.3341909 ## [29] 0.3407441 0.3471289 0.3824845 0.3842518 0.3857692 0.4052727 0.4096676 ## [36] 0.4096954 0.4146928 0.4179864 0.4398404 0.4435938 0.4625907 0.4696793 ## [43] 0.4714327 0.4741556 0.4928168 0.4999340 0.5045603 0.5071533 0.5138479 ## [50] 0.5186018 0.5286290 0.5339792 0.5356408 0.5608411 0.5741377 0.5767585 ## [57] 0.5769589 0.5846019 0.5870278 0.5970021 0.6284375 0.6359602 0.6647388 ## [64] 0.6675389 0.6711418 0.6757199 0.6989639 0.7024853 0.7113948 0.7334023 ## [71] 0.7504464 0.7522828 0.7757182 0.7794874 0.8166034
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
  • Access to all documents
  • Unlimited textbook solutions
  • 24/7 expert homework help
0.8207481 0.8216293 ## [78] 0.8292620 0.8311296 0.8529778 0.8792595 0.8822876 0.8951340 0.9143303 ## [85] 0.9182950 0.9205618 0.9396085 0.9538322 0.9644107 1.0058766 1.0164154 ## [92] 1.0295904 1.0479720 1.0735370 1.0840720 1.1018983 1.1283187 1.1427112 ## [99] 1.1778581 1.2029170 1.2308914 1.2843263 1.2881743 1.3136084 1.3149742 ## [106] 1.3430558 1.3446066 1.3554758 1.4109480 1.4166235 1.5335090 1.7497358 ## [113] 1.7634136 1.8420395 1.8874882 1.9327043 1.9705814 1.9982353 2.0039138 ## [120] 2.0143144 2.0690615 2.1516141 2.2145405 2.3907431 2.7105632 2.9240202 ## [127] 3.1327974 3.2017816 3.6972456 3.8005397 3.9446471 4.3989273 4.9564989 ## [134] 5.3277902 6.4419282 plot (pepsicokes_hclust$height) pepsicokes_hclust_scree<- rev (pepsicokes_hclust$height) plot (pepsicokes_hclust_scree)
plot (pepsicokes_hclust_scree, main= "Scree Plot" , type= "l" ) rect.hclust (pepsicokes_hclust, h= 2.5 )
clusters<- cutree (pepsicokes_hclust, 4 ) table (clusters) ## clusters ## 1 2 3 4 ## 16 53 39 28 pepsicokes_cluster<- data.frame (pepsicokes,clusters) table ( cutree (pepsicokes_hclust, 4 ), cutree (pepsicokes_hclust, 3 )) ## ## 1 2 3 ## 1 16 0 0 ## 2 0 53 0 ## 3 0 39 0 ## 4 0 0 28 View (pepsicokes) ## Warning: running command ''/usr/bin/otool' -L '/Library/Frameworks/ ## R.framework/Resources/modules/R_de.so'' had status 1
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
  • Access to all documents
  • Unlimited textbook solutions
  • 24/7 expert homework help
Q1g Run a K-means Cluster Analysis using the number of clusters determined in d k_cluster<- kmeans (pepsicokes_use, 4 ) k_cluster ## K-means clustering with 4 clusters of sizes 31, 39, 45, 21 ## ## Cluster means: ## Corp_Pep_Volume_per__MM_ACV Corp_Pep_Price Corp_Coke_Volume_per__MM_ACV ## 1 0.3042666 -0.55226207 -1.2223726 ## 2 -0.9092102 1.14747095 0.8399118 ## 3 -0.1669167 -0.05773223 -0.1685815 ## 4 1.5970566 -1.19206155 0.6058648 ## Corp_Coke_Price ## 1 -0.7113567 ## 2 -0.8676354 ## 3 1.1737700 ## 4 0.1461995 ## ## Clustering vector: ## [1] 4 4 3 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 3 2 2 3 2 2 3 2 2 2 3 2 2 2 4 3 4 ## [36] 1 1 4 3 1 1 3 3 1 3 1 1 1 1 1 1 1 1 1 4 1 1 1 1 1 3 1 3 1 1 3 1 3 1 1 ## [71] 3 1 1 1 3 1 3 1 1 3 3 2 2 2 2 3 2 2 3 3 3 2 3 2 3 2 3 2 3 2 2 3 3 2 2 ## [106] 3 2 2 3 2 2 3 2 3 2 2 2 3 3 2 2 3 2 3 3 4 3 3 2 2 2 3 3 2 3 3 ## ## Within cluster sum of squares by cluster: ## [1] 27.35084 29.36915 64.34683 47.89759 ## (between_SS / total_SS = 68.7 %) ## ## Available components: ## ## [1] "cluster" "centers" "totss" "withinss" ## [5] "tot.withinss" "betweenss" "size" "iter" ## [9] "ifault"