#HDMD Function Calls #Requires packages: scatterplot3d, MASS, psych ### Data ######## AA54_data = read.csv("/StatAnalysis/subset54_AAattributes/AA54_standardized.csv", header=TRUE) AA54 = AA54_data[,2:ncol(AA54_data)] rownames(AA54) = AA54_data[,1] p = ncol(AA54) ################################## #### PRINCIPAL COMPONENTS ######## ################################## AA54_PCA = princomp(AA54, covmat = cov.wt(AA54)) AA54_PCA$loadings screeplot(AA54_PCA, type="lines", npcs=length(AA54_PCA$sdev), main="Principal Components Scree Plot") PTV = AA54_PCA$sdev^2 / sum(AA54_PCA$sdev^2) CTV = cumsum(PTV) TV = rbind(Lambda=round(AA54_PCA$sdev^2, digits=5), PTV=round(PTV, digits=5), CTV=round(CTV, digits=5)) TV ### Plots ######### require(scatterplot3d) PC3d =scatterplot3d(AA54_PCA$scores[,1:3], pch = AminoAcids, main="Principal Component Scores", box = F, grid=F) PC3d$plane3d(c(0,0,0), col="grey") PC3d$points3d(c(0,0), c(0,0), c(-3,2), lty="solid", type="l" ) PC3d$points3d(c(0,0), c(-1.5,2), c(0,0), lty="solid", type="l" ) PC3d$points3d(c(-1.5,2), c(0,0), c(0,0), lty="solid", type="l" ) PC3d$points3d(AA54_PCA$scores[hydrophobic,1:3], col="blue", cex = 2.7, lwd=1.5) PC3d$points3d(AA54_PCA$scores[polar,1:3], col="green", cex = 3.3, lwd=1.5) PC3d$points3d(AA54_PCA$scores[small,1:3], col="orange", cex = 3.9, lwd=1.5) legend(x=5, y=4.5, legend=c("hydrophobic", "polar", "small"), col=c("blue", "green", "orange"), pch=21, box.lty =0) ########################## #### FACTOR ANALYSIS ##### ########################## require(psych) VSS.scree(AA54, main="Subset of 54 AA attributes Scree Plot") Factor54 = factor.pa.ginv(AA54, nfactors = 5, m=3, prerotate=TRUE, rotate="Promax", scores="regression") Factor54$loadings ### Plots ### ############# Factor3d =scatterplot3d(Factor54$scores[,1:3], pch = AminoAcids, main="Factor Scores", box = FALSE, grid=F, xlab="pah", ylab="pss", zlab="ms") Factor3d$plane3d(c(0,0,0), col="grey") Factor3d$points3d(c(0,0), c(0,0), c(-3,2), lty="solid", type="l" ) Factor3d$points3d(c(0,0), c(-1.5,2), c(0,0), lty="solid", type="l" ) Factor3d$points3d(c(-1.5,2), c(0,0), c(0,0), lty="solid", type="l" ) Factor3d$points3d(Factor54$scores[hydrophobic,1:3], col="blue", cex = 2.7, lwd=1.5) Factor3d$points3d(Factor54$scores[polar,1:3], col="green", cex = 3.3, lwd=1.5) Factor3d$points3d(Factor54$scores[small,1:3], col="orange", cex = 3.9, lwd=1.5) legend(x=5, y=4.5, legend=c("hydrophobic", "polar", "small"), col=c("blue", "green", "orange"), pch=21, box.lty =0) #################################### ####### FACTOR TRANSFORMATION ###### #################################### AASeqs = read.table("/StatAnalysis/AA_bHLHSeqs_grouped.tab", header = TRUE) AA54_MetricList_Factor1 = FactorTransform(as.vector(AASeqs[,3]), SeqName=AASeqs[,1]) AA54_MetricFactor1 = matrix(unlist(AA54_MetricList_Factor1), nrow = length(AA54_MetricList_Factor1), byrow = TRUE, dimnames = list(names(AA54_MetricList_Factor1))) AA54_MetricFactor1[c(20:25, 137:147, 190:196, 220:229, 264:273),1:8] ############################################## ####### LINEAR DISCRIMINANT ANALYSIS ######### ############################################## #bHLH_classes = read.csv("/StatAnalysis/AA_bHLHSeqs_grouped.csv", header = TRUE) require(MASS) bHLH_Seq = as.vector(AASeqs[,3]) grouping = t(AASeqs[,2]) g = as.factor(grouping) p = ncol(AA54_MetricFactor1) group.means <- tapply(AA54_MetricFactor1, list(rep(g, p), col(AA54_MetricFactor1)), mean) letter_grouping= vector(length=length(grouping)) for(i in 1:length(grouping)){ if(grouping[i]==1) {letter_grouping[i]="a"} else if (grouping[i]==2){letter_grouping[i]="b"} else if (grouping[i]==3){letter_grouping[i]="c"} else if (grouping[i]==4){letter_grouping[i]="d"} else if (grouping[i]==5){letter_grouping[i]="e"} } AA54_Metric1 = FactorTransform(bHLH_Seq, Replace=Factor54$scores, Factor=1) AA54_Metric2 = FactorTransform(bHLH_Seq, Replace=Factor54$scores, Factor=2) AA54_Metric3 = FactorTransform(bHLH_Seq, Replace=Factor54$scores, Factor=3) AA54_Metric4 = FactorTransform(bHLH_Seq, Replace=Factor54$scores, Factor=4) AA54_Metric5 = FactorTransform(bHLH_Seq, Replace=Factor54$scores, Factor=5) AA54_MetricFactor1 = matrix(unlist(AA54_Metric1), nrow = length(AA54_Metric1), byrow = TRUE, dimnames = list(names(AA54_Metric1))) AA54_MetricFactor2 = matrix(unlist(AA54_Metric2), nrow = length(AA54_Metric2), byrow = TRUE, dimnames = list(names(AA54_Metric2))) AA54_MetricFactor3 = matrix(unlist(AA54_Metric3), nrow = length(AA54_Metric3), byrow = TRUE, dimnames = list(names(AA54_Metric3))) AA54_MetricFactor4 = matrix(unlist(AA54_Metric4), nrow = length(AA54_Metric4), byrow = TRUE, dimnames = list(names(AA54_Metric4))) AA54_MetricFactor5 = matrix(unlist(AA54_Metric5), nrow = length(AA54_Metric5), byrow = TRUE, dimnames = list(names(AA54_Metric5))) ### LDA on R Factor Transformation ### ######################################## AA54_lda_Metric1 = lda(AA54_MetricFactor1, grouping) AA54_lda_RawMetric1 = as.matrix(AA54_MetricFactor1) %*% AA54_lda_Metric1$scaling AA54_lda_RawMetric1Centered = scale(AA54_lda_RawMetric1, center = TRUE, scale = FALSE) plot(-1*AA54_lda_RawMetric1Centered[,1], -1*AA54_lda_RawMetric1Centered[,2], pch = letter_grouping, xlab="Canonical Variate 1", ylab="Canonical Variate 2", main="DA Scores (Centered Raw Coefficients)\nusing Factor1 (pah) from R transformation") lines(c(0,0), c(-15,15), lty="dashed") lines(c(-35,25), c(0,0), lty="dashed") AA54_lda_Metric2 = lda(AA54_MetricFactor2, grouping) AA54_lda_RawMetric2 = as.matrix(AA54_MetricFactor2) %*% AA54_lda_Metric2$scaling AA54_lda_RawMetric2Centered = scale(AA54_lda_RawMetric2, center = TRUE, scale = FALSE) plot(AA54_lda_RawMetric2Centered[,1], -1*AA54_lda_RawMetric2Centered[,2], pch = letter_grouping, xlab="Canonical Variate 1", ylab="Canonical Variate 2", main="DA Scores (Centered Raw Coefficients)\nusing Factor2 (pss) from R transformation") lines(c(0,0), c(-15,15), lty="dashed") lines(c(-35,25), c(0,0), lty="dashed") AA54_lda_Metric3 = lda(AA54_MetricFactor3, grouping) AA54_lda_RawMetric3 = as.matrix(AA54_MetricFactor3) %*% AA54_lda_Metric3$scaling AA54_lda_RawMetric3Centered = scale(AA54_lda_RawMetric3, center = TRUE, scale = FALSE) plot(AA54_lda_RawMetric3Centered[,1], -1*AA54_lda_RawMetric3Centered[,2], pch = letter_grouping, xlab="Canonical Variate 1", ylab="Canonical Variate 2", main="DA Scores (Centered Raw Coefficients)\nusing Factor3 (ms) from R transformation") lines(c(0,0), c(-15,15), lty="dashed") lines(c(-35,25), c(0,0), lty="dashed") AA54_lda_Metric4 = lda(AA54_MetricFactor4, grouping) AA54_lda_RawMetric4 = as.matrix(AA54_MetricFactor4) %*% AA54_lda_Metric4$scaling AA54_lda_RawMetric4Centered = scale(AA54_lda_RawMetric4, center = TRUE, scale = FALSE) plot(-1*AA54_lda_RawMetric4Centered[,1], -1*AA54_lda_RawMetric4Centered[,2], pch = letter_grouping, xlab="Canonical Variate 1", ylab="Canonical Variate 2", main="DA Scores (Centered Raw Coefficients)\nusing Factor4 (cc) from R transformation") lines(c(0,0), c(-15,15), lty="dashed") lines(c(-35,25), c(0,0), lty="dashed") AA54_lda_Metric5 = lda(AA54_MetricFactor5, grouping) AA54_lda_RawMetric5 = as.matrix(AA54_MetricFactor5) %*% AA54_lda_Metric5$scaling AA54_lda_RawMetric5Centered = scale(AA54_lda_RawMetric5, center = TRUE, scale = FALSE) plot(-1*AA54_lda_RawMetric5Centered[,1], -1*AA54_lda_RawMetric5Centered[,2], pch = letter_grouping, xlab="Canonical Variate 1", ylab="Canonical Variate 2", main="DA Scores (Centered Raw Coefficients)\nusing Factor5 (ec) from R transformation") lines(c(0,0), c(-15,15), lty="dashed") lines(c(-35,25), c(0,0), lty="dashed")