A collection of utility functions used within the anglemania package for manipulating FBMs, calculating statistics, and selecting genes.
Replaces all NaN and Inf values in a numeric vector with NA.
sparse_to_fbm(s_mat)
replace_with_na(v)
normalize_matrix(
x_mat,
normalization_method = "divide_by_total_counts",
verbose = TRUE
)
get_anglemania_genes(sce)
get_anglemania_stats_df(sce)
A sparse matrix.
A numeric vector.
A bigstatsr::FBM
object containing the matrix to normalize
(typically genes x cells).
A character string specifying the normalization
method to use. One of "divide_by_total_counts"
(default) or
"find_residuals"
.
"divide_by_total_counts"
normalizes each cell by its total expression
count and applies log1p.
"find_residuals"
computes log1p-transformed residuals after regressing
out total expression.
A SingleCellExperiment or SummarizedExperiment object
An FBM
object from the bigstatsr
package.
A numeric vector with NaN and Inf values replaced with NA.
The input FBM
object with normalized values written back in place.
This function modifies the input x_mat
by reference.
A character vector of gene names that have been selected by the anglemania algorithm
A data frame of gene pairs from which the anglemania genes were selected
sparse_to_fbm()
: Convert a sparse matrix into
a file-backed matrix
(FBM
) with efficient memory usage.
replace_with_na()
: replace Nan and Inf values with NA
normalize_matrix()
: normalize matrix
Normalize a Filebacked Big Matrix (FBM
) using either total-count
scaling or residuals from a linear model. Intended for use with
single-cell RNA-seq gene expression data.
get_anglemania_genes()
: Utility function to extract the genes that
have been selected by the anglemania algorithm.
get_anglemania_stats_df()
: Utility function to extract the stats of the
gene pairs from which the anglemania genes were selected.
s_mat <- Matrix::rsparsematrix(nrow = 10, ncol = 5, density = 0.3)
fbm_mat <- sparse_to_fbm(s_mat)
fbm_mat
#> A Filebacked Big Matrix of type 'double' with 10 rows and 5 columns.
v <- c(1, 2, 3, 4, 5, 6, 7, Inf, 9, NA)
v <- replace_with_na(v)
v
#> [1] 1 2 3 4 5 6 7 NA 9 NA
library(bigstatsr)
set.seed(42)
mat <- matrix(rpois(1000, lambda = 5), nrow = 100, ncol = 10)
fbm <- as_FBM(mat)
normalize_matrix(
fbm,
normalization_method = "divide_by_total_counts"
)[1:5, 1:5]
#> [,1] [,2] [,3] [,4] [,5]
#> [1,] 5.050104 4.789858 5.149273 4.605269 3.107610
#> [2,] 5.167175 4.104990 4.682746 4.605269 4.680626
#> [3,] 4.363345 4.104990 5.016571 3.703866 4.861401
#> [4,] 4.917488 4.388541 4.682746 4.384622 4.459799
#> [5,] 4.764556 5.192548 4.178070 5.071516 5.147147
normalize_matrix(
fbm,
normalization_method = "find_residuals"
)[1:5, 1:5]
#> [,1] [,2] [,3] [,4] [,5]
#> [1,] 0.11434410 0.05550312 0.10903721 0.01912041 -0.2880443664
#> [2,] 0.09742030 -0.06396060 0.05569234 0.03692842 0.0434047229
#> [3,] 0.01356898 -0.05693503 0.09785461 -0.14442728 0.0808357808
#> [4,] 0.04243604 -0.01456288 0.05508198 -0.00548117 -0.0007333372
#> [5,] 0.04158057 0.08936337 -0.10027542 0.06324354 0.0815543115
sce <- sce_example()
sce <- anglemania(sce, batch_key = "batch")
#> No dataset_key specified.
#> Assuming that all samples belong to the same dataset and are separated by batch_key: batch
#> Preparing input...
#> Filtering each batch to at least 1 cells per gene...
#>
| | 0 % elapsed=00s
|==================================================| 100% elapsed=00s, remaining~00s
#> Using the intersection of filtered genes from all batches...
#> Number of genes in intersected set: 300
#> Extracting count matrices...
#>
| | 0 % elapsed=00s
|==================================================| 100% elapsed=00s, remaining~00s
#> Computing angles and transforming to z-scores...
#>
| | 0 % elapsed=00s
#> Creating directory "/tmp/RtmpwM6D04/file1f8e2689c158" which didn't exist..
#>
|========================= | 50% elapsed=00s, remaining~00s
#> Creating directory "/tmp/RtmpwM6D04/file1f8ef53808a" which didn't exist..
#>
|==================================================| 100% elapsed=00s, remaining~00s
#> Computing statistics...
#> Weighting matrix_list...
#> Calculating mean...
#> Calculating sds...
#> Pre-filtering features...
#> Extracting filtered features...
#> Selected 25 genes for integration.
anglemania_genes <- get_anglemania_genes(sce)
head(anglemania_genes)
#> [1] "gene43" "gene207" "gene119" "gene130" "gene25" "gene209"
length(anglemania_genes)
#> [1] 25
sce <- sce_example()
sce <- anglemania(sce, batch_key = "batch")
#> No dataset_key specified.
#> Assuming that all samples belong to the same dataset and are separated by batch_key: batch
#> Preparing input...
#> Filtering each batch to at least 1 cells per gene...
#>
| | 0 % elapsed=00s
|==================================================| 100% elapsed=00s, remaining~00s
#> Using the intersection of filtered genes from all batches...
#> Number of genes in intersected set: 300
#> Extracting count matrices...
#>
| | 0 % elapsed=00s
|==================================================| 100% elapsed=00s, remaining~00s
#> Computing angles and transforming to z-scores...
#>
| | 0 % elapsed=00s
#> Creating directory "/tmp/RtmpwM6D04/file1f8e3c63454c" which didn't exist..
#>
|========================= | 50% elapsed=00s, remaining~00s
#> Creating directory "/tmp/RtmpwM6D04/file1f8e80cb4ba" which didn't exist..
#>
|==================================================| 100% elapsed=00s, remaining~00s
#> Computing statistics...
#> Weighting matrix_list...
#> Calculating mean...
#> Calculating sds...
#> Pre-filtering features...
#> Extracting filtered features...
#> Selected 25 genes for integration.
anglemania_stats_df <- get_anglemania_stats_df(sce)
head(anglemania_stats_df)
#> geneA geneB sn_zscore mean_zscore
#> 1 gene43 gene207 61.159210 3.345794
#> 2 gene119 gene130 6.350243 3.014981
#> 3 gene25 gene209 3.352897 2.983193
#> 4 gene69 gene184 2.621413 2.858989
#> 5 gene43 gene63 3.107353 2.735689
#> 6 gene71 gene172 2.876975 2.691607
length(anglemania_stats_df)
#> [1] 4