# Load packages ----------------------------------------------------------------
# load packages
library(PCovR) # for data
library(pls) # for pls algorithm
library(plsdof) # for pls algorithm
# Load data
data(alexithymia)
# Devide in X and y
<- alexithymia$X
X <- alexithymia$Y$RSE
y
# Count the number of variables and observations
<- ncol(X)
p <- nrow(X)
n
# Define the number of directions we will use
<- 10 M
Introduction
Many data analysts face the problem of analyzing data sets with many, often highly correlated, variables. Partial Least Square Regression (PLSR) is a regression method that uses linear combinations of the original predictors to reduce their dimensionality. As principal component regression, PLS uses derived inputs, however, it differs from PCR by how the linear combinations are constructed.
Given a set of predictors \(X_{n \times p}\) and a vector of dependent variable scores \(y_{n \times 1}\), the least-square solution for the multiple linear regression
\[ y = X \beta + \epsilon\text{, with } \epsilon \sim N(0, \sigma^2) \]
is
\[ \beta = (X'X)^{-1}X'y \]
where \(X'\) is the transpose of the data matrix \(X\), and \(()^{-1}\) is the matrix inverse. When collinearity is present in \(X\) or \(p > n\), then \(X'X\) is singular and its inverse cannot be computed. Derived input regression methods like PCR and PLSR bypass this problem by taking linear combinations of the columns of the original \(X\) and regressing \(Y\) on just a few of these linear combinations. The peculiarity of PLSR is that it includes information on both \(X\) and \(Y\) in the definition of the linear combinations. In this post, we look at two algorithms to estimate PLSR to get a better understanding of the method.
Popular algorithms to implement PLS
Here, I describe informally the algorithm steps:
- Preprocessing the data - the columns of the input matrix \(X\) are standardized to have mean 0 and variance 1.
- The cross-product of every column of \(X\) and \(y\) is computed: \(\rho_{1j} = x_{j}' y\) for \(j = 1, \dots, p\).
- Compute the first partial-least-square direction \(z_1\) - The cross-products \(\rho_{1j}\) are used as weights to obtain a linear combination of the columns: \(z_1 = \sum \rho_{1j} x_{j}\). This implies that the contribution of each column to \(z_1\) is weighted by their univariate relationship with the dependent variable \(y\).
- Regression of \(y\) on \(z_1\) - The outcome variable \(y\) is regressed on this first direction \(z_1\) to obtain \(\hat{\theta}_1\).
- Orthogonalization of \(X\) - All columns of \(X\) are orthogonalized with respect to \(z_1\).
- The cross-product of every column of \(X\) and \(y\) is computed again: \(\rho_{2j} = x_{j}' y\) for \(j = 1, \dots, p\).
- Compute the second partial-least-square direction \(z_2\) - The cross-products \(\rho_{2j}\) are used as weights to obtain a linear combination of the columns: \(z_2 = \sum \rho_{2j} x_{j}\). Notice that the columns \(x_j\) we are using now are orthogonal to the previous partial least square direction \(z_1\).
- Regression of \(y\) on \(z_2\) - The outcome variable \(y\) is regressed on the second direction \(z_2\) to obtain \(\hat{\theta}_2\).
- Orthogonalization of \(X\) - All columns of \(X\) are orthogonalized with respect to \(z_2\).
The procedure continues until \(M\) partial least square directions have been computed. The result is a set of independent directions that have both high variance and high correlation with the dependent variable, in contrast to PCR which finds a set of independent directions that have high variance.
Now we report pseudo-code for the implementation of the PLS algorithm (inspired by Report Algorithm 3.3 p.103 as in (Hastie, Tibshirani, and Wainwright 2015)). We will use it to write the R code in the next session.
- Standardized each \(x_j\) to have mean 0 and variance 1
- Set:
- \(\hat{y}^{(0)} = \bar{y}1\)
- \(x_{j}^{(0)} = x_{j}\)
- For \(m = 1, 2, \dots, M\)
- \(z_m = \sum_{j = 1}^{p} \rho_{mj}x_{j}^{(m-1)}\)
- \(\hat{\theta}_m = \frac{z_m'y}{z_m' z_m}\)
- \(\hat{y}^{(m)} = \hat{y}^{(m-1)} + \hat{\theta}_m z_m\)
- for \(j = 1, \dots, p\) orthogonalize \(x_{j}\) with respect to \(z_m\): \(x_{j}^{(m)} = x_{j}^{(m-1)} - \frac{z_m' x_{j}^{(m)}}{z_m' z_m}z_m\)
- Output the sequence of fitted vectors \(\hat{y}^{m}\)
Learn by coding
We start by loading a package already implementing the PLS algorithm and some data to test our code. We load the PCovR
package to use the alexithymia
data which reports the scores of 122 Belgian psychology students on three scales:
- 20-item Toronto Alexithymia Scale (TAS-20) measuring the inability to recognize emotions.
- Center for Epidemiological Studies Depression Scale (CES-D) measuring depression.
- Rosenberg Self-Esteem Scale (RSE).
The main objective of the data collection was to assess how well TAS-20 can predict depression and self-esteem1.
Coding the PLS algorithm manually
Let’s go through the steps described in the pseudo code above. First we want to standardize the predictors.
# 1. Standardize the data ------------------------------------------------------
# Scale the predictors
<- scale(X)
Xstd
# Means of X are now 0
cbind(
X = head(colMeans(X)),
Xstd = round(head(colMeans(Xstd)), 5)
)
X Xstd
confused 1.8196721 0
right words 1.7950820 0
sensations 0.5983607 0
describe 2.2213115 0
analyze problems 2.5081967 0
upset 1.6065574 0
# SD of X are now 1
cbind(
X = head(apply(X, 2, sd)),
Xstd = round(head(apply(Xstd, 2, sd)), 5)
)
X Xstd
confused 1.178431 1
right words 1.278807 1
sensations 1.073034 1
describe 1.216364 1
analyze problems 1.077539 1
upset 1.295621 1
Then we want to set the initial values
# 2. Set initial vlaues --------------------------------------------------------
# 2a: Set the initial prediction for y_hat to the mean of y
# Create an empty data.frame to store the initial value and future predictions
<- matrix(rep(NA, n * (M + 1)), nrow = n)
yhat_m
# Replace the initial prediction with the mean of y
1] <- mean(y)
yhat_m[,
# 2b: Set every xj0 to xj
# Create an empty list of X values
<- lapply(1:(M + 1), matrix, nrow = n, ncol = p)
Xm
# Place X as initial value for Xm
1]] <- as.matrix(Xstd) Xm[[
Finally, we can move to the estimation step. First, we create the container objects to store results
# 3. Estimation ----------------------------------------------------------------
# Preparing objects
<- matrix(NA, nrow = n, ncol = (M + 1)) # container for directions
z <- matrix(NA, nrow = p, ncol = (M + 1)) # container for wights
W <- rep(NA, (M + 1)) # container for thetas theta_hat
Then we move to the proper estimation.
# PLS Algorithm following HastieEtAl2017 p 81 (Algorithm 3.3)
for (m in 2:(M + 1)) {
# 3a: Compute zm
<- t(Xm[[m - 1]]) %*% y # inner products / covariances
W[, m] <- Xm[[m - 1]] %*% W[, m] # compute the direction zm
z[, m]
# 3b: Compute regression coefficient for y ~ Zm
<- drop(t(z[, m]) %*% y / t(z[, m]) %*% z[, m])
theta_hat[m]
# 3c: Compute predicted y with the current m directions
<- yhat_m[, m - 1] + theta_hat[m] * z[, m]
yhat_m[, m]
# 3d: Orthogonalize all columns of X with respect to zm
for (j in 1:p) {
<- orthogonalize(Xm[[m-1]][, j], z[, m])
Xm[[m]][, j]
}
}
# Fit PLSR model w/ pls package
<- pls::plsr(
pls_fit_pls ~ as.matrix(X),
y ncomp = M,
method = "oscorespls",
validation = "none",
scale = TRUE
)
# Fit PCR model w/ plsdof package
<- plsdof::pls.model(as.matrix(Xstd), y)
pls_fit_plsdof
# Compare predictions using up to a given m
<- 3
m head(
data.frame(
pls = round(as.data.frame(fitted(pls_fit_pls)), 3)[, m],
plsdof = round(pls_fit_plsdof$Yhat, 3)[, m + 1],
man = round(yhat_m, 3)[, m + 1]
))
pls plsdof man
1 36.819 36.819 36.819
2 29.243 29.243 29.243
3 31.336 31.336 31.336
4 30.767 30.767 30.767
5 34.869 34.869 34.869
6 24.876 24.876 24.876
Types of dependent variables
# Types of DVs -----------------------------------------------------------------
data(oliveoil)
<- pls::pcr(sensory ~ chemical, ncomp = 4, scale = TRUE, data = oliveoil)
sens.pcr <- pls::plsr(sensory ~ chemical, ncomp = 4, scale = TRUE, data = oliveoil)
sens.pls
$sensory oliveoil
yellow green brown glossy transp syrup
G1 21.4 73.4 10.1 79.7 75.2 50.3
G2 23.4 66.3 9.8 77.8 68.7 51.7
G3 32.7 53.5 8.7 82.3 83.2 45.4
G4 30.2 58.3 12.2 81.1 77.1 47.8
G5 51.8 32.5 8.0 72.4 65.3 46.5
I1 40.7 42.9 20.1 67.7 63.5 52.2
I2 53.8 30.4 11.5 77.8 77.3 45.2
I3 26.4 66.5 14.2 78.7 74.6 51.8
I4 65.7 12.1 10.3 81.6 79.6 48.3
I5 45.0 31.9 28.4 75.7 72.9 52.8
S1 70.9 12.2 10.8 87.7 88.1 44.5
S2 73.5 9.7 8.3 89.9 89.7 42.3
S3 68.1 12.0 10.8 78.4 75.1 46.4
S4 67.6 13.9 11.9 84.6 83.8 48.5
S5 71.4 10.6 10.8 88.1 88.5 46.7
S6 71.4 10.0 11.4 89.5 88.5 47.2
Prediction of new data
How do we predict new data? Let’s start by generating data from scratch
# Prediction -------------------------------------------------------------------
<- 50 # number of observations
n <- 15 # number of variables
p <- matrix(rnorm(n * p), ncol = p)
X <- 100 + rnorm(n)
y <- 10 # number of "components"
M
<- 200 #
ntest <- matrix(rnorm(ntest * p), ncol = p) # test data
Xtest <- rnorm(ntest) # test data
ytest
# Fit alternative PLSs
<- pls::plsr(
out_pls ~ X,
y ncomp = M,
scale = TRUE,
center = TRUE,
method = "oscorespls",
validation = "none",
x = TRUE,
model = TRUE
)<- plsdof::pls.model(X, y, compute.DoF = TRUE, Xtest = Xtest, ytest = NULL)
out_plsdof
# Obtain predictions on new data
head(
round(
cbind(
PLS = predict(out_pls, newdata = Xtest)[, , M],
PLSdof = out_plsdof$prediction[, M + 1]
5
),
) )
PLS PLSdof
[1,] 99.00044 99.00044
[2,] 101.55959 101.55959
[3,] 99.71676 99.71676
[4,] 99.58406 99.58406
[5,] 99.34923 99.34923
[6,] 100.52233 100.52233
# Make sure scale of prediction is correct
<- plsr(
out_pls_cF ~ X,
y ncomp = M,
scale = TRUE,
center = FALSE,
method = "oscorespls",
validation = "none"
)
<- scale(X, center = TRUE, scale = FALSE)
Xs <- scale(y, center = FALSE, scale = FALSE)
ys
<- plsr(
out_pls_cF ~ Xs,
ys ncomp = M,
scale = TRUE,
center = FALSE,
method = "oscorespls",
validation = "none"
)
head(
round(
cbind(
PLS = predict(out_pls, newdata = Xtest)[, , M],
PLS_cf = mean(y) + predict(out_pls_cF, newdata = Xtest)[, , M],
PLSdof = out_plsdof$prediction[, M]
5
),
) )
PLS PLS_cf PLSdof
[1,] 99.00044 99.07823 99.00042
[2,] 101.55959 101.63738 101.55947
[3,] 99.71676 99.79455 99.71674
[4,] 99.58406 99.66185 99.58385
[5,] 99.34923 99.42702 99.34921
[6,] 100.52233 100.60012 100.52225
Degrees of freedom of the residuals
# PLS degrees of freedom -------------------------------------------------------
library(plsdof)
set.seed(1234)
# Generate data data
<- 100 # number of observations
n <- 15 # number of variables
p <- 15
m <- matrix(rnorm(n * p), ncol = p)
X <- rnorm(n)
y
# Fit model with package
<- pls.model(X, y, compute.DoF = TRUE)
outpls $DoF
outpls<- linear.pls.fit(X, y, m, DoF.max = min(n - 1, p + 1))
outpls.internal
# Fit model with person PLS function
<- pls.manual(ivs = X, dv = y, m = m)
outpls_man
# Fit model with plsr function from pls
<- plsr(
pls_fit_pls ~ X,
y ncomp = m,
scale = FALSE,
center = FALSE,
method = "oscorespls",
validation = "none"
)
# Y hats
round(outpls_man$Yhat - outpls$Yhat, 5)
# T scores
<- 1
j cbind(
PLSR = apply(scores(pls_fit_pls), 2, function(j) j / sqrt(sum(j^2)))[, j],
PLSTT = outpls.internal$TT[, j],
manualTs = outpls_man$Ts[, j],
manualTsn = outpls_man$Tsn[, j]
)
# Degrees of freedom PLSR
<- sapply(1:m, function(j) {
predi predict(pls_fit_pls, ncomp = j)
})<- dofPLS(
DoF_plsr
X,
y,TT = apply(scores(pls_fit_pls), 2, function(j) j / sqrt(sum(j^2))),
Yhat = predi,
DoF.max = m + 1
)
# Degrees of freedom manual
<- dofPLS(
DoF_manual
X,
y,TT = outpls_man$Tsn,
Yhat = outpls_man$Yhat[, 2:(m + 1)],
DoF.max = m + 1
)
# Single DoF
<- c(1, sapply(1:m, function(i) {
DoF_single dofPLS_single(
X,
y,TT = outpls_man$Tsn,
Yhat = outpls_man$Yhat[, (i + 1)],
q = i
)
}))
# Compare degrees of freedom
cbind(
PLSR = DoF_plsr,
PLSdof = outpls$DoF,
PLS_manual = DoF_manual,
diff = round(outpls$DoF - DoF_manual, 5),
DoF_single = DoF_single
)
TL;DR, just give me the code!
# Functions needed ----------------------------------------------------------------
# Degrees of freedom for supervised derived input models
<- function(X, y, TT, Yhat, m = ncol(X), DoF.max = ncol(X) + 1){
dofPLS # Example inputs
# X = scale(mtcars[, -1])
# y = mtcars[, 1]
# m = ncol(X)
# DoF.max = m + 1
# TT <- linear.pls.fit(X, y, m, DoF.max = DoF.max)$TT # normalizezs PC scores
# Yhat <- linear.pls.fit(X, y, m, DoF.max = DoF.max)$Yhat[, 2:(m + 1)]
# Body
<- nrow(X)
n
# Scale data
<- apply(X, 2, mean)
mean.X <- apply(X, 2, sd)
sd.X == 0] <- 1
sd.X[sd.X <- X - rep(1, nrow(X)) %*% t(mean.X)
X <- X / (rep(1, nrow(X)) %*% t(sd.X))
X <- X %*% t(X)
K
# pls.dof
<- DoF.max - 1
DoF.max <- matrix(, m, m)
TK <- krylov(K, K %*% y, m)
KY <- eigen(K)$values
lambda <- vector(length = m)
tr.K for (i in 1:m) {
<- sum(lambda^i)
tr.K[i]
}<- t(TT) %*% KY
BB row(BB) > col(BB)] <- 0
BB[<- t(TT) %*% y
b <- vector(length = m)
DoF <- backsolve(BB, diag(m))
Binv <- rep(0, m)
tkt <- rep(0, m)
ykv <- array(dim = c(m, n, m))
KjT <- TT
dummy for (i in 1:m) {
<- K %*% dummy
dummy <- dummy
KjT[i, , ]
}<- rep(0, m)
trace.term
for (i in 1:m) {
<- Binv[1:i, 1:i, drop = FALSE]
Binvi <- Binvi %*% b[1:i]
ci <- TT[, 1:i, drop = FALSE] %*% t(Binvi)
Vi <- sum(ci * tr.K[1:i])
trace.term[i] <- y - Yhat[, i]
ri for (j in 1:i) {
<- KjT[j, , ]
KjTj <- tkt[i] + ci[j] * tr(t(TT[, 1:i, drop = FALSE]) %*%
tkt[i] 1:i, drop = FALSE])
KjTj[, <- K %*% ri
ri <- ykv[i] + sum(ri * Vi[, j])
ykv[i]
}
}
<- trace.term + 1:m - tkt + ykv
DoF
> DoF.max] <- DoF.max
DoF[DoF <- c(0, DoF) + 1
DoF # TODO: check that it is correct to add the 1 after checking the DoF max condition
DoF
}
# Extract single factor --------------------------------------------------------
<- function(X, y, q = 1, TT, Yhat){
dofPLS_single # Example inputs
# X = scale(mtcars[, -1])
# y = mtcars[, 1]
# m = ncol(X)
# DoF.max = m + 1
# TT <- linear.pls.fit(X, y, m, DoF.max = DoF.max)$TT # normalizezs PC scores
# q <- 3 # desired component / latent variable
# Yhat <- linear.pls.fit(X, y, m, DoF.max = DoF.max)$Yhat[, (q + 1)]
# Body
<- nrow(X)
n <- ncol(X)
m <- ncol(X) + 1
DoF.max
# Scale data
<- apply(X, 2, mean)
mean.X <- apply(X, 2, sd)
sd.X == 0] <- 1
sd.X[sd.X <- X - rep(1, nrow(X)) %*% t(mean.X)
X <- X / (rep(1, nrow(X)) %*% t(sd.X))
X <- X %*% t(X)
K
# pls.dof
<- DoF.max - 1
DoF.max <- matrix(, m, m)
TK <- krylov(K, K %*% y, m)
KY <- eigen(K)$values
lambda <- vector(length = m)
tr.K for (i in 1:m) {
<- sum(lambda^i)
tr.K[i]
}<- t(TT) %*% KY
BB row(BB) > col(BB)] <- 0
BB[<- t(TT) %*% y
b <- vector(length = m)
DoF <- backsolve(BB, diag(m))
Binv <- 0
tkt <- 0
ykv <- array(dim = c(q, n, m))
KjT <- TT
dummy for (i in 1:q) {
<- K %*% dummy
dummy <- dummy
KjT[i, , ]
}<- 0
trace.term
<- Binv[1:q, 1:q, drop = FALSE]
Binvi <- Binvi %*% b[1:q]
ci <- TT[, 1:q, drop = FALSE] %*% t(Binvi)
Vi <- sum(ci * tr.K[1:q])
trace.term <- y - Yhat
ri for (j in 1:q) {
<- KjT[j, , ]
KjTj <- tkt + ci[j] * tr(t(TT[, 1:q, drop = FALSE]) %*%
tkt 1:q, drop = FALSE])
KjTj[, <- K %*% ri
ri <- ykv + sum(ri * Vi[, j])
ykv
}
<- trace.term + q - tkt + ykv
DoF <- ifelse(DoF > DoF.max, DoF.max, DoF)
DoF <- DoF + 1
DoF
DoF
}
# Pls function manual
<- function(ivs, dv, m = 1L){
pls.manual
# Parms
# M <- 10 # number of "components"
# ivs <- yarn[[1]]
# dv <- yarn[[2]]
# Scale data
<- m + 1
M <- ncol(ivs)
p <- nrow(ivs)
n <- lapply(1:M, matrix, nrow = n, ncol = p)
X 1]] <- scale(ivs)
X[[<- dv
y <- cbind(
y_hat mean(y),
matrix(rep(NA, n * (M - 1)), nrow = n)
)<- matrix(NA, nrow = n, ncol = M)
z <- rep(NA, M)
theta_hat <- matrix(nrow = ncol(ivs), ncol = M)
W
# PLS Algorithm following HastieEtAl2017 p 81 (Algorithm 3.3)
for (m in 2:M) {
# 3a: Compute zm
<- matrix(NA, nrow = n, ncol = p)
store_2a for (j in 1:p) {
<- t(X[[m - 1]][, j]) %*% y
rho_hat_mj <- rho_hat_mj %*% X[[m - 1]][, j]
store_2a[, j]
} <- rowSums(store_2a)
z[, m]
# 3b: Compute regression coefficient for y ~ Zm
<- drop(t(z[, m]) %*% y / t(z[, m]) %*% z[, m])
theta_hat[m]
# 3c: Compute predicted y with the current m directions
<- y_hat[, m - 1] + theta_hat[m] * z[, m]
y_hat[, m]
# 3d: Orthogonalize all columns of X with respect to zm
for (j in 1:p) {
<- orthogonalize(X[[m-1]][, j], z[, m])
X[[m]][, j]
}
}
# Normalize the component scores
<- apply(z[, -1], 2, function(j) j / sqrt(sum(j^2)))
Tsn
# Return
return(
list(
Ts = z[, -1],
Tsn = Tsn,
Yhat = y_hat,
W = W[, -1]
)
)
}
# Orthogonalize two vectors
<- function(vec1, vec2) {
orthogonalize <- vec1
v <- vec2
u
<- v - drop(t(u) %*% v / (t(u) %*% u)) * u
newv
return(newv)
}
# Load packages ----------------------------------------------------------------
# load packages
library(PCovR) # for data
library(pls) # for pls algorithm
library(plsdof) # for pls algorithm
# Load data
data(alexithymia)
# Devide in X and y
<- alexithymia$X
X <- alexithymia$Y$RSE
y
# Count the number of variables and observations
<- ncol(X)
p <- nrow(X)
n
# Define the number of directions we will use
<- 10
M
# 1. Standardize the data ------------------------------------------------------
# Scale the predictors
<- scale(X)
Xstd
# Means of X are now 0
cbind(
X = head(colMeans(X)),
Xstd = round(head(colMeans(Xstd)), 5)
)
# SD of X are now 1
cbind(
X = head(apply(X, 2, sd)),
Xstd = round(head(apply(Xstd, 2, sd)), 5)
)
# 2. Set initial vlaues --------------------------------------------------------
# 2a: Set the initial prediction for y_hat to the mean of y
# Create an empty data.frame to store the initial value and future predictions
<- matrix(rep(NA, n * (M + 1)), nrow = n)
yhat_m
# Replace the initial prediction with the mean of y
1] <- mean(y)
yhat_m[,
# 2b: Set every xj0 to xj
# Create an empty list of X values
<- lapply(1:(M + 1), matrix, nrow = n, ncol = p)
Xm
# Place X as initial value for Xm
1]] <- as.matrix(Xstd)
Xm[[
# 3. Estimation ----------------------------------------------------------------
# Preparing objects
<- matrix(NA, nrow = n, ncol = (M + 1)) # container for directions
z <- matrix(NA, nrow = p, ncol = (M + 1)) # container for wights
W <- rep(NA, (M + 1)) # container for thetas
theta_hat
# PLS Algorithm following HastieEtAl2017 p 81 (Algorithm 3.3)
for (m in 2:(M + 1)) {
# 3a: Compute zm
<- t(Xm[[m - 1]]) %*% y # inner products / covariances
W[, m] <- Xm[[m - 1]] %*% W[, m] # compute the direction zm
z[, m]
# 3b: Compute regression coefficient for y ~ Zm
<- drop(t(z[, m]) %*% y / t(z[, m]) %*% z[, m])
theta_hat[m]
# 3c: Compute predicted y with the current m directions
<- yhat_m[, m - 1] + theta_hat[m] * z[, m]
yhat_m[, m]
# 3d: Orthogonalize all columns of X with respect to zm
for (j in 1:p) {
<- orthogonalize(Xm[[m-1]][, j], z[, m])
Xm[[m]][, j]
}
}
# Fit PLSR model w/ pls package
<- pls::plsr(
pls_fit_pls ~ as.matrix(X),
y ncomp = M,
method = "oscorespls",
validation = "none",
scale = TRUE
)
# Fit PCR model w/ plsdof package
<- plsdof::pls.model(as.matrix(Xstd), y)
pls_fit_plsdof
# Compare predictions using up to a given m
<- 3
m head(
data.frame(
pls = round(as.data.frame(fitted(pls_fit_pls)), 3)[, m],
plsdof = round(pls_fit_plsdof$Yhat, 3)[, m + 1],
man = round(yhat_m, 3)[, m + 1]
))
# Types of DVs -----------------------------------------------------------------
data(oliveoil)
<- pls::pcr(sensory ~ chemical, ncomp = 4, scale = TRUE, data = oliveoil)
sens.pcr <- pls::plsr(sensory ~ chemical, ncomp = 4, scale = TRUE, data = oliveoil)
sens.pls
$sensory
oliveoil
# Prediction -------------------------------------------------------------------
<- 50 # number of observations
n <- 15 # number of variables
p <- matrix(rnorm(n * p), ncol = p)
X <- 100 + rnorm(n)
y <- 10 # number of "components"
M
<- 200 #
ntest <- matrix(rnorm(ntest * p), ncol = p) # test data
Xtest <- rnorm(ntest) # test data
ytest
# Fit alternative PLSs
<- pls::plsr(
out_pls ~ X,
y ncomp = M,
scale = TRUE,
center = TRUE,
method = "oscorespls",
validation = "none",
x = TRUE,
model = TRUE
)<- plsdof::pls.model(X, y, compute.DoF = TRUE, Xtest = Xtest, ytest = NULL)
out_plsdof
# Obtain predictions on new data
head(
round(
cbind(
PLS = predict(out_pls, newdata = Xtest)[, , M],
PLSdof = out_plsdof$prediction[, M + 1]
5
),
)
)
# Make sure scale of prediction is correct
<- plsr(
out_pls_cF ~ X,
y ncomp = M,
scale = TRUE,
center = FALSE,
method = "oscorespls",
validation = "none"
)
<- scale(X, center = TRUE, scale = FALSE)
Xs <- scale(y, center = FALSE, scale = FALSE)
ys
<- plsr(
out_pls_cF ~ Xs,
ys ncomp = M,
scale = TRUE,
center = FALSE,
method = "oscorespls",
validation = "none"
)
head(
round(
cbind(
PLS = predict(out_pls, newdata = Xtest)[, , M],
PLS_cf = mean(y) + predict(out_pls_cF, newdata = Xtest)[, , M],
PLSdof = out_plsdof$prediction[, M]
5
),
)
)
# PLS degrees of freedom -------------------------------------------------------
library(plsdof)
set.seed(1234)
# Generate data data
<- 100 # number of observations
n <- 15 # number of variables
p <- 15
m <- matrix(rnorm(n * p), ncol = p)
X <- rnorm(n)
y
# Fit model with package
<- pls.model(X, y, compute.DoF = TRUE)
outpls $DoF
outpls<- linear.pls.fit(X, y, m, DoF.max = min(n - 1, p + 1))
outpls.internal
# Fit model with person PLS function
<- pls.manual(ivs = X, dv = y, m = m)
outpls_man
# Fit model with plsr function from pls
<- plsr(
pls_fit_pls ~ X,
y ncomp = m,
scale = FALSE,
center = FALSE,
method = "oscorespls",
validation = "none"
)
# Y hats
round(outpls_man$Yhat - outpls$Yhat, 5)
# T scores
<- 1
j cbind(
PLSR = apply(scores(pls_fit_pls), 2, function(j) j / sqrt(sum(j^2)))[, j],
PLSTT = outpls.internal$TT[, j],
manualTs = outpls_man$Ts[, j],
manualTsn = outpls_man$Tsn[, j]
)
# Degrees of freedom PLSR
<- sapply(1:m, function(j) {
predi predict(pls_fit_pls, ncomp = j)
})<- dofPLS(
DoF_plsr
X,
y,TT = apply(scores(pls_fit_pls), 2, function(j) j / sqrt(sum(j^2))),
Yhat = predi,
DoF.max = m + 1
)
# Degrees of freedom manual
<- dofPLS(
DoF_manual
X,
y,TT = outpls_man$Tsn,
Yhat = outpls_man$Yhat[, 2:(m + 1)],
DoF.max = m + 1
)
# Single DoF
<- c(1, sapply(1:m, function(i) {
DoF_single dofPLS_single(
X,
y,TT = outpls_man$Tsn,
Yhat = outpls_man$Yhat[, (i + 1)],
q = i
)
}))
# Compare degrees of freedom
cbind(
PLSR = DoF_plsr,
PLSdof = outpls$DoF,
PLS_manual = DoF_manual,
diff = round(outpls$DoF - DoF_manual, 5),
DoF_single = DoF_single
)
References
Footnotes
Check the helpfile for the
alexithymia
data in thePCovR
package for more information.↩︎