Per the suggestion by @robmaz, RSpectra::svds() now has two new parameters center and scale, to support implicit centering and scaling of matrices in partial SVD. The minimum version for this new feature is RSpectra >= 0.16-0.

These two parameters are very useful for principal component analysis (PCA) based on the covariance or correlation matrix, without actually forming them. Below we simulate a random data matrix, and use both R’s built-in prcomp() and the svds() function in RSpectra to compute PCA.

library(RSpectra)
library(Matrix)

# Simulate data matrix
set.seed(123)
n = 2000
p = 5000
k = 10
x = matrix(rnorm(n * p), n)

# R's built-in function
system.time(res1 <- prcomp(x, center = TRUE, scale. = FALSE, rank. = k))

##    user  system elapsed
##   6.918   0.135   7.053

# svds()
system.time(res2 <- svds(x, k, nu = 0, opts = list(center = TRUE, scale = FALSE)))

##    user  system elapsed
##   1.432   0.000   1.432

# Check explained variances
head(res1$sdev, k)  ## [1] 2.581690 2.569330 2.562992 2.560865 2.558364 2.552452 2.547646 2.545435 ## [9] 2.542844 2.538125  # Here we need to normalize the result by sqrt(n-1) if scale = FALSE res2$d / sqrt(n - 1)

##  [1] 2.581690 2.569330 2.562992 2.560865 2.558364 2.552452 2.547646 2.545435
##  [9] 2.542844 2.538125

# Check factor loadings (eigenvectors)
head(res1$rotation)  ## PC1 PC2 PC3 PC4 PC5 ## [1,] 0.002955287 0.009469244 0.001791091 -0.004472325 0.0008397064 ## [2,] 0.039091388 0.002317985 -0.016254929 -0.030014596 -0.0037889699 ## [3,] 0.011089497 -0.014628014 0.024707183 -0.022815585 0.0142715032 ## [4,] 0.019378279 0.003213703 0.011944642 0.005600938 0.0256555170 ## [5,] 0.011824996 -0.025775262 -0.008988052 -0.008098504 0.0393693188 ## [6,] -0.001385724 -0.014513030 0.035388546 -0.011516888 0.0003066008 ## PC6 PC7 PC8 PC9 PC10 ## [1,] 0.015071818 0.00461462 0.018306793 -0.005177352 -0.017118890 ## [2,] 0.010304461 -0.01921043 -0.015591374 -0.012862728 0.019834698 ## [3,] -0.001323470 -0.01548052 0.009874774 -0.006201647 0.035196107 ## [4,] 0.019699390 0.01027901 0.008836943 0.020698740 -0.003105694 ## [5,] 0.011788053 -0.02337262 0.058207087 -0.010045558 0.006709223 ## [6,] -0.003844848 0.01498251 0.012312399 -0.011870723 0.032744347  head(res2$v)

##              [,1]         [,2]         [,3]         [,4]          [,5]
## [1,]  0.002955287 -0.009469244 -0.001791091 -0.004472325 -0.0008397064
## [2,]  0.039091388 -0.002317985  0.016254929 -0.030014596  0.0037889699
## [3,]  0.011089497  0.014628014 -0.024707183 -0.022815585 -0.0142715032
## [4,]  0.019378279 -0.003213703 -0.011944642  0.005600938 -0.0256555170
## [5,]  0.011824996  0.025775262  0.008988052 -0.008098504 -0.0393693188
## [6,] -0.001385724  0.014513030 -0.035388546 -0.011516888 -0.0003066008
##              [,6]        [,7]         [,8]         [,9]        [,10]
## [1,]  0.015071818 -0.00461462 -0.018306793 -0.005177352 -0.017118890
## [2,]  0.010304461  0.01921043  0.015591374 -0.012862728  0.019834698
## [3,] -0.001323470  0.01548052 -0.009874774 -0.006201647  0.035196107
## [4,]  0.019699390 -0.01027901 -0.008836943  0.020698740 -0.003105694
## [5,]  0.011788053  0.02337262 -0.058207087 -0.010045558  0.006709223
## [6,] -0.003844848 -0.01498251 -0.012312399 -0.011870723  0.032744347


We can see that the two methods generate the same results (note that eigenvectors are identical up to signs), but svds() is much faster than prcomp() since it only computes the leading singular values.

The performance advantage of svds() is more evident if the input matrix is also sparse. We repeat the experiment above, but on a different input:

# Simulate data matrix
set.seed(123)
n = 2000
p = 5000
k = 10
xsp = rnorm(n * p)
# 90% of the values are zero
xsp[sample(n * p, n * p * 0.9)] = 0
xsp = Matrix(xsp, n, p, sparse = TRUE)

# R's built-in function
system.time(res1 <- prcomp(xsp, center = TRUE, scale. = FALSE, rank. = k))

##    user  system elapsed
##   6.656   0.159   6.815

# svds()
system.time(res2 <- svds(xsp, k, nu = 0, opts = list(center = TRUE, scale = FALSE)))

##    user  system elapsed
##    0.47    0.00    0.47

# Check explained variances
head(res1$sdev, k)  ## [1] 0.8185087 0.8153621 0.8149581 0.8130531 0.8114726 0.8096282 0.8080887 ## [8] 0.8076944 0.8058630 0.8052317  # Here we need to normalize the result by sqrt(n-1) if scale = FALSE res2$d / sqrt(n - 1)

##  [1] 0.8185087 0.8153621 0.8149581 0.8130531 0.8114726 0.8096282 0.8080887
##  [8] 0.8076944 0.8058630 0.8052317

# Check factor loadings (eigenvectors)
head(res1$rotation)  ## PC1 PC2 PC3 PC4 PC5 ## [1,] -0.014078976 0.009977389 -0.0048860045 0.036143696 0.020856379 ## [2,] 0.005891397 -0.006337634 0.0086567191 -0.024204012 0.006019543 ## [3,] -0.004839200 0.006800550 0.0057230097 0.001902374 0.009481416 ## [4,] 0.012187115 0.007566681 -0.0007558701 0.037469119 -0.016597866 ## [5,] 0.017071907 -0.010494435 -0.0074782229 0.015558749 0.004890078 ## [6,] -0.013842080 0.007694404 0.0018808309 -0.013330489 -0.017356824 ## PC6 PC7 PC8 PC9 PC10 ## [1,] -6.246730e-03 0.001227333 0.005002885 -0.019607028 0.005792201 ## [2,] 1.020680e-02 0.007968355 -0.028889050 0.008189175 -0.006268807 ## [3,] -4.108971e-02 0.011208912 0.005501149 -0.007212123 -0.021556508 ## [4,] 2.665231e-02 -0.004031854 -0.010210344 -0.011379201 0.004734222 ## [5,] 7.927843e-04 -0.003725163 -0.005354887 0.004972582 0.005005228 ## [6,] -5.537012e-06 0.010373699 -0.019280452 0.019368409 -0.014930834  head(res2$v)

##              [,1]         [,2]          [,3]         [,4]         [,5]
## [1,]  0.014078976 -0.009977389  0.0048860045 -0.036143696 -0.020856379
## [2,] -0.005891397  0.006337634 -0.0086567191  0.024204012 -0.006019543
## [3,]  0.004839200 -0.006800550 -0.0057230097 -0.001902374 -0.009481416
## [4,] -0.012187115 -0.007566681  0.0007558701 -0.037469119  0.016597866
## [5,] -0.017071907  0.010494435  0.0074782229 -0.015558749 -0.004890078
## [6,]  0.013842080 -0.007694404 -0.0018808309  0.013330489  0.017356824
##               [,6]         [,7]         [,8]         [,9]        [,10]
## [1,]  6.246730e-03  0.001227333  0.005002885  0.019607028  0.005792201
## [2,] -1.020680e-02  0.007968355 -0.028889050 -0.008189175 -0.006268807
## [3,]  4.108971e-02  0.011208912  0.005501149  0.007212123 -0.021556508
## [4,] -2.665231e-02 -0.004031854 -0.010210344  0.011379201  0.004734222
## [5,] -7.927843e-04 -0.003725163 -0.005354887 -0.004972582  0.005005228
## [6,]  5.537012e-06  0.010373699 -0.019280452 -0.019368409 -0.014930834


The above code demonstrates a roughly 15x speedup.

To perform PCA based on the correlation matrix, simply specify scale = TRUE. Below gives an example of two different ways that lead to the same output.

# Simulate data matrix
set.seed(123)
n = 2000
p = 5000
k = 10
xsp = rnorm(n * p)
# 90% of the values are zero
xsp[sample(n * p, n * p * 0.9)] = 0
xsp = Matrix(xsp, n, p, sparse = TRUE)

# R's built-in function
system.time(res1 <- prcomp(xsp, center = TRUE, scale. = TRUE, rank. = k))

##    user  system elapsed
##   7.084   0.205   7.291

# svds()
system.time(res2 <- svds(xsp, k, nu = 0, opts = list(center = TRUE, scale = TRUE)))

##    user  system elapsed
##   0.587   0.000   0.587

# Check explained variances
head(res1$sdev, k)  ## [1] 2.576689 2.566099 2.562755 2.560604 2.551354 2.547781 2.546969 2.540066 ## [9] 2.536446 2.534298  res2$d

##  [1] 2.576689 2.566099 2.562755 2.560604 2.551354 2.547781 2.546969 2.540066
##  [9] 2.536446 2.534298

# Check factor loadings (eigenvectors)
head(res1$rotation)  ## PC1 PC2 PC3 PC4 PC5 ## [1,] -0.006483247 -0.0009410683 -0.019834553 0.032506226 -0.002916057 ## [2,] 0.012595033 0.0011791356 0.010536472 -0.020026913 0.025265776 ## [3,] -0.001758559 0.0074203003 -0.009177566 -0.001530424 0.007236185 ## [4,] 0.010485835 0.0102749040 -0.017666064 0.018482750 -0.027623466 ## [5,] 0.019636988 -0.0051696089 -0.004308241 0.019228107 -0.001763454 ## [6,] -0.013690684 -0.0025113870 -0.003252875 -0.030508385 -0.003294067 ## PC6 PC7 PC8 PC9 PC10 ## [1,] -0.001658184 -7.242177e-03 -0.0008240643 -0.007626584 -0.0006768329 ## [2,] -0.014348978 7.286501e-03 0.0121723469 0.007891901 0.0271362784 ## [3,] -0.012574501 -1.400828e-02 -0.0353942993 -0.009328341 0.0040393007 ## [4,] 0.007793619 4.733246e-03 0.0230159209 -0.006839061 -0.0027831289 ## [5,] -0.002541560 -2.382465e-05 0.0130737963 0.003924594 -0.0073279903 ## [6,] -0.008978843 -4.013950e-03 0.0138156553 0.013757868 0.0160054867  head(res2$v)

##              [,1]          [,2]         [,3]         [,4]         [,5]
## [1,]  0.006483247 -0.0009410683  0.019834553  0.032506226  0.002916057
## [2,] -0.012595033  0.0011791356 -0.010536472 -0.020026913 -0.025265776
## [3,]  0.001758559  0.0074203003  0.009177566 -0.001530424 -0.007236185
## [4,] -0.010485835  0.0102749040  0.017666064  0.018482750  0.027623466
## [5,] -0.019636988 -0.0051696089  0.004308241  0.019228107  0.001763454
## [6,]  0.013690684 -0.0025113870  0.003252875 -0.030508385  0.003294067
##              [,6]          [,7]          [,8]         [,9]         [,10]
## [1,] -0.001658184 -7.242177e-03 -0.0008240643  0.007626584  0.0006768329
## [2,] -0.014348978  7.286501e-03  0.0121723469 -0.007891901 -0.0271362784
## [3,] -0.012574501 -1.400828e-02 -0.0353942993  0.009328341 -0.0040393007
## [4,]  0.007793619  4.733246e-03  0.0230159209  0.006839061  0.0027831289
## [5,] -0.002541560 -2.382465e-05  0.0130737963 -0.003924594  0.0073279902
## [6,] -0.008978843 -4.013950e-03  0.0138156553 -0.013757868 -0.0160054867