install.packages("psych") require(MASS) require(scatterplot3d) require(psych) AminoAcids = c("A", "C", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "V", "W", "Y") small = c(1,2,3,6,12,13,16,17,18) polar = c(2,3,4,7,9,12,14,15,16,17,19,20) hydrophobic = c(1, 2,5,6,7,8,9,10,11,17,18,19,20) AA54_MetricCoeff = matrix( c( -0.59145974, -1.30209266, -0.7330651, 1.5703918, -0.14550842, -1.34267179, 0.46542300, -0.8620345, -1.0200786, -0.25516894, 1.05015062, 0.30242411, -3.6559147, -0.2590236, -3.24176791, 1.35733226, -1.45275578, 1.4766610, 0.1129444, -0.83715681, -1.00610084, -0.59046634, 1.8909687, -0.3966186, 0.41194139, -0.38387987, 1.65201497, 1.3301017, 1.0449765, 2.06385566, 0.33616543, -0.41662780, -1.6733690, -1.4738898, -0.07772917, -1.23936304, -0.54652238, 2.1314349, 0.3931618, 0.81630366, 1.83146558, -0.56109831, 0.5332237, -0.2771101, 1.64762794, -1.01895162, -0.98693471, -1.5046185, 1.2658296, -0.91181195, -0.66312569, -1.52353917, 2.2194787, -1.0047207, 1.21181214, 0.94535614, 0.82846219, 1.2991286, -0.1688162, 0.93339498, 0.18862522, 2.08084151, -1.6283286, 0.4207004, -1.39177378, 0.93056541, -0.17926549, -3.0048731, -0.5025910, -1.85303476, 1.53754853, -0.05472897, 1.5021086, 0.4403185, 2.89744417, -0.22788299, 1.39869991, -4.7596375, 0.6701745, -2.64747356, -0.03181782, 0.32571153, 2.2134612, 0.9078985, 1.31337035, -1.33661279, -0.27854634, -0.5440132, 1.2419935, -1.26225362, -0.59533918, 0.00907760, 0.6719274, -2.1275244, -0.18358096, 0.25999617, 0.82992312, 3.0973596, -0.8380164, 1.51150958 ), nrow=20, ncol=5, byrow=TRUE) colnames(AA54_MetricCoeff) = c("pah", "pss", "ms", "cc", "ec") rownames(AA54_MetricCoeff) = AminoAcids FactorTransform = function(Source, Search= AminoAcids, Replace = AA54_MetricCoeff, Factor = 1, bycol = TRUE, SeqName = NULL, ...){ if(is.matrix(Replace)){ if(bycol){ if(ncol(Replace) < Factor) stop("Column for Factor is greater than size of Replacement Matrix") Replace = as.vector(t(Replace[,Factor])) } else{ if(nrow(Replace) < Factor) stop("Row for Factor is greater than size of Replacement Matrix") Replace = as.vector(Replace[Factor,]) } } if(is.list(Source)){ if(!missing(SeqName)){ if(length(SeqName) != length(Source)){ print("Length of SeqName not the same as length of Source. Using default.") SeqName = NULL } else{ SeqNames = SeqName} } else if (is.null(SeqName)){ if(is.null(names(Source))) SeqNames = apply(array(1:length(Source)), 1, function(i) paste("Seq",i, sep="")) else SeqNames = names(Source) } result = FactorTransform.vector(as.character(Source), Search, Replace, ...) names(result) = SeqNames } else if(is.vector(Source)){ if(!missing(SeqName)){ if(length(SeqName) != length(Source)){ print("Length of SeqName not the same as length of Source. Using default.") SeqName = NULL } else{ SeqNames = SeqName} } else if (is.null(SeqName)){ SeqNames = apply(array(1:length(Source)), 1, function(i) paste("Seq",i, sep="")) } result = FactorTransform.vector(Source, Search, Replace, ...) if(length(SeqNames)==1) result = list(Seq1=result) else names(result) = SeqNames } else if(is.array(Source)){ result = FactorTransform.vector(Source, Search, Replace, ...) rownames(result) = rownames(Source) colnames(result) = colnames(Source) } else if(is.matrix(Source)){ result= matrix(nrow= nrow(Source), ncol = ncol(Source)) for(row in 1:nrow(Source)){ Seq = Source[row,] result[row,] = FactorTransform.default(Seq, Search, Replace, ...) } rownames(result) = rownames(Source) colnames(result) = colnames(Source) } else{ stop("Source must be a list, vector, matrix, or array") } result } FactorTransform.vector = function(Source, ...){ if(length(Source) ==1){ Source = unlist(strsplit(Source, NULL, fixed=TRUE)) SeqMetric = FactorTransform.default(Source, ...) } else{ SeqMetric = lapply(Source, FactorTransform.vector, ...) #transform each seq in vector } SeqMetric } FactorTransform.default= function(Source, Search=AminoAcids, Replace=AA54_MetricCoeff, ...) { if(!is.vector(Source)) stop("Source is not of type vector\n") if (length(Search) != length(Replace)) stop("Search and Replace Must Have Equal Number of Items\n") Changed <- as.character(Source) for (i in 1:length(Search)) Changed <- replace(Changed, Changed == Search[i], Replace[i]) as.numeric(Changed) } ## adapted from Marc Schwartz code at https://stat.ethz.ch/pipermail/r-help/2006-July/108829.html factor.pa.ginv = function (r, nfactors = 1, residuals = FALSE, prerotate= FALSE, rotate = "varimax", m=4, n.obs = NA, scores = c("none", "regression", "Bartlett"), SMC = TRUE, missing = FALSE, impute = "median", min.err = 0.001, digits = 2, max.iter = 50, symmetric = TRUE, warnings = TRUE ) { if(missing(scores)) scores = "none" n <- dim(r)[2] if (n != dim(r)[1]) { n.obs <- dim(r)[1] if (scores !="none") { x.matrix <- r if (missing) { miss <- which(is.na(x.matrix), arr.ind = TRUE) if (impute == "mean") { item.means <- colMeans(x.matrix, na.rm = TRUE) x.matrix[miss] <- item.means[miss[, 2]] } else { item.med <- apply(x.matrix, 2, median, na.rm = TRUE) x.matrix[miss] <- item.med[miss[, 2]] } } } r <- cor(r, use = "pairwise") } else{ if (!is.matrix(r)) { r <- as.matrix(r) } sds <- sqrt(diag(r)) r <- r/(sds %o% sds) # convert to correlation matrix } if (!residuals) { result <- list(values = c(rep(0, n)), rotation = rotate, n.obs = n.obs, communality = c(rep(0, n)), loadings = matrix(rep(0, n * n), ncol = n), fit = 0) } else { result <- list(values = c(rep(0, n)), rotation = rotate, n.obs = n.obs, communality = c(rep(0, n)), loadings = matrix(rep(0, n * n), ncol = n), residual = matrix(rep(0, n * n), ncol = n), fit = 0) } r.mat <- r Phi <- NULL colnames(r.mat) <- rownames(r.mat) <- colnames(r) if (SMC) { if (nfactors < n/2) { diag(r.mat) <- smc(r) } else { if (warnings) message("too many factors requested for this number of variables to use SMC, 1s used instead") } } orig <- diag(r) comm <- sum(diag(r.mat)) err <- comm i <- 1 comm.list <- list() while (err > min.err) { eigens <- eigen(r.mat, symmetric = symmetric) if (nfactors > 1) { loadings <- eigens$vectors[, 1:nfactors] %*% diag(sqrt(eigens$values[1:nfactors])) } else { loadings <- eigens$vectors[, 1] * sqrt(eigens$values[1]) } model <- loadings %*% t(loadings) new <- diag(model) comm1 <- sum(new) diag(r.mat) <- new err <- abs(comm - comm1) if (is.na(err)) { warning("imaginary eigen value condition encountered in factor.pa,\n Try again with SMC=FALSE \n exiting factor.pa") break } comm <- comm1 comm.list[[i]] <- comm1 i <- i + 1 if (i > max.iter) { if (warnings) { message("maximum iteration exceeded") } err <- 0 } } if (!is.real(loadings)) { warning("the matrix has produced imaginary results -- proceed with caution") loadings <- matrix(as.real(loadings), ncol = nfactors) } if (nfactors > 1) { sign.tot <- vector(mode = "numeric", length = nfactors) sign.tot <- colSums(loadings) sign.tot = sign(sign.tot) loadings <- loadings %*% diag(sign.tot) } else { if (sum(loadings) < 0) { loadings <- -as.matrix(loadings) } else { loadings <- as.matrix(loadings) } colnames(loadings) <- "F1" } colnames(loadings) <- paste("F", 1:nfactors, sep = "") rownames(loadings) <- rownames(r) loadings[loadings == 0] <- 10^-15 rotmat = NULL model <- loadings %*% t(loadings) result$communality <- round(diag(model), digits) result$uniquenesses <- round(diag(r - model), digits) if (rotate != "none") { if (nfactors > 1) { if(prerotate){ varmax = varimax(loadings) loadings <- varmax$loadings rotmat = varmax$rotmat Phi <- NULL } if ((rotate == "varimax" | rotate == "Varimax") && !prerotate) { varmax = varimax(loadings) loadings <- varmax$loadings rotmat = varmax$rotmat Phi <- NULL } else { if ((rotate == "promax") | (rotate == "Promax")) { pro <- Promax.only(loadings, m) loadings <- pro$loadings rotmat = pro$rotmat Phi <- pro$Phi } else { if (rotate == "oblimin") { if (!require(GPArotation)) { warning("I am sorry, to do oblimin rotations requires the GPArotation package to be installed") Phi <- NULL } else { ob <- oblimin(loadings) loadings <- ob$loadings Phi <- ob$Phi } } } } } } if (nfactors > 1) { ev.rotated <- diag(t(loadings) %*% loadings) ev.order <- order(ev.rotated, decreasing = TRUE) loadings <- loadings[, ev.order] } colnames(loadings) <- paste("F", 1:nfactors, sep = "") rownames(loadings) <- colnames(r) if (!is.null(Phi)) { Phi <- Phi[ev.order, ev.order] } class(loadings) <- "loadings" if (nfactors < 1) nfactors <- n residual <- r - model r2 <- sum(r * r) rstar2 <- sum(residual * residual) if (residuals) { result$residual <- round(residual, digits) } r2.off <- r diag(r2.off) <- 0 r2.off <- sum(r2.off^2) diag(residual) <- 0 rstar.off <- sum(residual^2) result$fit <- round(1 - rstar2/r2, digits) result$fit.off <- round(1 - rstar.off/r2.off, digits) result$values <- round(eigens$values, digits) diag(model) <- 1 model.inv <- solve(model) m.inv.r <- model.inv %*% r if (is.na(n.obs)) { result$n.obs = NA result$PVAL = NA } else { result$n.obs = n.obs } result$rotmat = rotmat result$dof <- n * (n - 1)/2 - n * nfactors + (nfactors * (nfactors - 1)/2) result$objective <- sum(diag((m.inv.r))) - log(det(m.inv.r)) - n result$criteria <- c(objective = result$objective, NA, NA) if (!is.na(n.obs)) { result$STATISTIC <- result$objective * ((n.obs - 1) - (2 * n + 5)/6 - (2 * nfactors)/3) if (!is.nan(result$STATISTIC)) if (result$STATISTIC < 0) { result$STATISTIC <- 0 } if (result$dof > 0) { result$PVAL <- pchisq(result$STATISTIC, result$dof, lower.tail = FALSE) } else result$PVAL <- NA } result$loadings <- round(loadings, digits) if (!is.null(Phi)) { result$Phi <- Phi } result$communality.iterations <- round(unlist(comm.list), digits) if (scores != "none") { Lambda <- result$loadings zz <- scale(x.matrix, TRUE, TRUE) switch(scores, regression = { InvCov = try(solve(r, Lambda), TRUE) if(length(InvCov)<=1){ cat("Could not solve for inverse correlation. Using general inverse ginv(r) \n") InvCov = ginv(r) %*% Lambda } sc <- zz %*% InvCov if (!is.null(Phi)) { sc <- sc %*% Phi } }, Bartlett = { d <- 1/(result$uniquenesses) tmp <- t(Lambda * d) sc <- t(solve(tmp %*% Lambda, tmp %*% t(zz))) }) rownames(sc) <- rownames(x.matrix) colnames(sc) <- colnames(Lambda) result$scores <- sc } result$factors <- nfactors result$fn <- "factor.pa" class(result) <- c("psych", "fa") return(result) } Promax.only = function (x, m = 4) { if (!is.matrix(x) & !is.data.frame(x)) { if (!is.null(x$loadings)) x <- as.matrix(x$loadings) } else { x <- x } if (ncol(x) < 2) return(x) dn <- dimnames(x) xx <- varimax(x) ev.rotated <- diag(t(xx$loadings) %*% xx$loadings) ev.order <- order(ev.rotated, decreasing = TRUE) loadings <- xx$loadings[, ev.order] colnames(loadings) <- colnames(loadings[,ev.order]) Varimax_rotmat <- xx$rotmat[, ev.order] x <- loadings Q <- x * abs(x)^(m - 1) U <- lm.fit(x, Q)$coefficients d <- diag(solve(t(U) %*% U)) U <- U %*% diag(sqrt(d)) dimnames(U) <- NULL z <- x %*% U U <- xx$rotmat %*% U ui <- solve(U) Phi <- ui %*% t(ui) dimnames(z) <- dn class(z) <- "loadings" result <- list(loadings = z, rotmat = U, Phi = Phi) class(result) <- c("psych", "fa") return(result) } pairwise.mahalanobis = function(x, grouping=NULL, cov =NULL, inverted=FALSE, ...) { x <- if (is.vector(x)) #standardize input data as matrix matrix(x, ncol = length(x)) else as.matrix(x) if(!is.matrix(x)) stop("x could not be forced into a matrix") if(length(grouping) ==0){ #no group assigned, uses first col grouping = t(x[1]) x = x[2:dim(x)[2]] cat("assigning grouping\n") print(grouping) } n <- nrow(x) p <- ncol(x) if (n != length(grouping)){ #grouping and matrix do not correspond cat(paste("n: ", n, "and groups: ", length(grouping), "\n")) stop("nrow(x) and length(grouping) are different") } g <- as.factor(grouping) g lev <- lev1 <- levels(g) # Groups counts <- as.vector(table(g)) # No. of elements in each group if (any(counts == 0)) { # Remove grouping if not represented in data empty <- lev[counts == 0] warning(sprintf(ngettext(length(empty), "group %s is empty", "groups %s are empty"), paste(empty, collapse = " ")), domain = NA) lev1 <- lev[counts > 0] g <- factor(g, levels = lev1) counts <- as.vector(table(g)) } ng = length(lev1) group.means <- tapply(x, list(rep(g, p), col(x)), mean) # g x p matrix of group means from x if(missing(cov)){ #create covariance matrix inverted = FALSE cov = cor(x) #standardize into correlation mtx } else{ #check cov of correct dimension if(dim(cov) != c(p,p)) stop("cov matrix not of dim = (p,p)\n") } Distance = matrix(nrow=ng, ncol=ng) #initialize distance matrix Means = group.means Cov = cov for(i in 1:ng){ Distance[i,]= mahalanobis(group.means, group.means[i,], cov, inverted) } result <- list(means = group.means, cov = cov, distance = Distance) class(result) <- c("MYPACKAGE") result } lda.TotalSampleCoefficients = function(x, lda, scaling, subset, na.action){ if (!missing(scaling)) { scaling = as.matrix(scaling); } else{ if(missing(lda)) stop("Must provide either lda object or scaling coefficients") scaling = lda$scaling } if(is.data.frame(x)){ x = structure(data.matrix(x), class = "matrix") } else if(class(x) == "formula"){ m <- match.call(expand.dots = FALSE) m$... <- NULL m[[1L]] <- as.name("model.frame") m <- eval.parent(m) Terms <- attr(m, "terms") grouping <- model.response(m) x <- model.matrix(Terms, m) xint <- match("(Intercept)", colnames(x), nomatch = 0L) if (xint > 0) x <- x[, -xint, drop = FALSE] } if(is.matrix(x)){ if (!missing(subset)) { x <- x[subset, , drop = FALSE] grouping <- grouping[subset] } if (!missing(na.action)) { dfr <- na.action(structure(list(g = grouping, x = x), class = "data.frame")) grouping <- dfr$g x <- dfr$x } } if (is.null(dim(x))) stop("'x' is not a matrix") x <- as.matrix(x) if (any(!is.finite(x))) stop("infinite, NA or NaN values in 'x'") zz = scale(x, center = TRUE, scale = TRUE); Q <- x %*% scaling TotalSample_scaling <- lm.fit(zz, Q)$coefficients TotalSample_scaling } ########### psych PACKAGE functions ##### smc = function (R) { failed = FALSE p <- dim(R)[2] if (dim(R)[1] != p) { R <- cor(R, use = "pairwise") } else { R <- cov2cor(R) if (!is.matrix(R)) R <- as.matrix(R) } R.inv <- try(solve(R), TRUE) if (class(R.inv) == as.character("try-error")) { smc <- rep(1, p) warning("Correlation matrix not invertible, smc's returned as 1s") } else { smc <- 1 - 1/diag(R.inv) } return(smc) } VSS.scree = function (rx, main = "scree plot") { nvar <- dim(rx)[2] if (nvar != dim(rx)[1]) { rx <- cor(rx, use = "pairwise") } values <- eigen(rx)$values plot(values, type = "b", main = main) abline(h = 1) }