A collection of utility functions used within the anglemania package for manipulating FBMs, calculating statistics, and selecting genes.
Replaces all NaN and Inf values in a numeric vector with NA.
sparse_to_fbm(s_mat)
replace_with_na(v)
normalize_matrix(
x_mat,
normalization_method = "divide_by_total_counts",
verbose = TRUE
)
get_anglemania_genes(sce)
get_anglemania_stats_df(sce)
A sparse matrix.
A numeric vector.
A bigstatsr::FBM
object containing the matrix to normalize
(typically genes x cells).
A character string specifying the normalization
method to use. One of "divide_by_total_counts"
(default) or
"find_residuals"
.
"divide_by_total_counts"
normalizes each cell by its total expression
count and applies log1p.
"find_residuals"
computes log1p-transformed residuals after regressing
out total expression.
A SingleCellExperiment or SummarizedExperiment object
An FBM
object from the bigstatsr
package.
A numeric vector with NaN and Inf values replaced with NA.
The input FBM
object with normalized values written back in place.
This function modifies the input x_mat
by reference.
A character vector of gene names that have been selected by the anglemania algorithm
A data frame of gene pairs from which the anglemania genes were selected
sparse_to_fbm()
: Convert a sparse matrix into
a file-backed matrix
(FBM
) with efficient memory usage.
replace_with_na()
: replace Nan and Inf values with NA
normalize_matrix()
: normalize matrix
Normalize a Filebacked Big Matrix (FBM
) using either total-count
scaling or residuals from a linear model. Intended for use with
single-cell RNA-seq gene expression data.
get_anglemania_genes()
: Utility function to extract the genes that
have been selected by the anglemania algorithm.
get_anglemania_stats_df()
: Utility function to extract the stats of the
gene pairs from which the anglemania genes were selected.
s_mat <- Matrix::rsparsematrix(nrow = 10, ncol = 5, density = 0.3)
fbm_mat <- sparse_to_fbm(s_mat)
fbm_mat
#> A Filebacked Big Matrix of type 'double' with 10 rows and 5 columns.
v <- c(1, 2, 3, 4, 5, 6, 7, Inf, 9, NA)
v <- replace_with_na(v)
v
#> [1] 1 2 3 4 5 6 7 NA 9 NA
library(bigstatsr)
set.seed(42)
mat <- matrix(rpois(1000, lambda = 5), nrow = 100, ncol = 10)
fbm <- as_FBM(mat)
normalize_matrix(
fbm,
normalization_method = "divide_by_total_counts"
)[1:5, 1:5]
#> [,1] [,2] [,3] [,4] [,5]
#> [1,] 5.050104 4.789858 5.149273 4.605269 3.107610
#> [2,] 5.167175 4.104990 4.682746 4.605269 4.680626
#> [3,] 4.363345 4.104990 5.016571 3.703866 4.861401
#> [4,] 4.917488 4.388541 4.682746 4.384622 4.459799
#> [5,] 4.764556 5.192548 4.178070 5.071516 5.147147
normalize_matrix(
fbm,
normalization_method = "find_residuals"
)[1:5, 1:5]
#> [,1] [,2] [,3] [,4] [,5]
#> [1,] 0.11434410 0.05550312 0.10903721 0.01912041 -0.2880443664
#> [2,] 0.09742030 -0.06396060 0.05569234 0.03692842 0.0434047229
#> [3,] 0.01356898 -0.05693503 0.09785461 -0.14442728 0.0808357808
#> [4,] 0.04243604 -0.01456288 0.05508198 -0.00548117 -0.0007333372
#> [5,] 0.04158057 0.08936337 -0.10027542 0.06324354 0.0815543115
sce <- sce_example()
sce <- anglemania(sce, batch_key = "batch")
#> No dataset_key specified.
#> Assuming that all samples belong to the same dataset and are separated by batch_key: batch
#> Preparing input...
#> Filtering each batch to at least 1 cells per gene...
#>
| | 0 % elapsed=00s
|==================================================| 100% elapsed=00s, remaining~00s
#> Using the intersection of filtered genes from all batches...
#> Number of genes in intersected set: 300
#> Extracting count matrices...
#>
| | 0 % elapsed=00s
|==================================================| 100% elapsed=00s, remaining~00s
#> Computing angles and transforming to z-scores...
#>
| | 0 % elapsed=00s
#> Creating directory "/tmp/RtmpqekGwf/file2fd6a8dc90f" which didn't exist..
#>
|========================= | 50% elapsed=00s, remaining~00s
#> Creating directory "/tmp/RtmpqekGwf/file2fd6690342f3" which didn't exist..
#>
|==================================================| 100% elapsed=00s, remaining~00s
#> Computing statistics...
#> Weighting matrix_list...
#> Calculating mean...
#> Calculating sds...
#> Pre-filtering features...
#> Extracting filtered features...
#> 2000 is larger than the number of intersected genes.Setting max_n_genes to 300
#> Selected 300 genes for integration.
anglemania_genes <- get_anglemania_genes(sce)
head(anglemania_genes)
#> [1] "gene270" "gene294" "gene66" "gene122" "gene18" "gene161"
length(anglemania_genes)
#> [1] 300
sce <- sce_example()
sce <- anglemania(sce, batch_key = "batch")
#> No dataset_key specified.
#> Assuming that all samples belong to the same dataset and are separated by batch_key: batch
#> Preparing input...
#> Filtering each batch to at least 1 cells per gene...
#>
| | 0 % elapsed=00s
|==================================================| 100% elapsed=00s, remaining~00s
#> Using the intersection of filtered genes from all batches...
#> Number of genes in intersected set: 300
#> Extracting count matrices...
#>
| | 0 % elapsed=00s
|==================================================| 100% elapsed=00s, remaining~00s
#> Computing angles and transforming to z-scores...
#>
| | 0 % elapsed=00s
#> Creating directory "/tmp/RtmpqekGwf/file2fd634c99a0b" which didn't exist..
#>
|========================= | 50% elapsed=00s, remaining~00s
#> Creating directory "/tmp/RtmpqekGwf/file2fd6353d0a10" which didn't exist..
#>
|==================================================| 100% elapsed=00s, remaining~00s
#> Computing statistics...
#> Weighting matrix_list...
#> Calculating mean...
#> Calculating sds...
#> Pre-filtering features...
#> Extracting filtered features...
#> 2000 is larger than the number of intersected genes.Setting max_n_genes to 300
#> Selected 300 genes for integration.
anglemania_stats_df <- get_anglemania_stats_df(sce)
head(anglemania_stats_df)
#> geneA geneB sn_zscore mean_zscore sd_zscore rank
#> 9468 gene88 gene284 0.5005326 -0.5307601 1.060391 18986
#> 12460 gene126 gene132 0.5008629 -0.5407590 1.079655 18985
#> 12932 gene132 gene160 0.5019395 0.5093796 1.014823 18984
#> 13290 gene137 gene235 0.5021455 0.5057122 1.007103 18983
#> 10046 gene95 gene242 0.5027604 0.5283980 1.050994 18982
#> 11525 gene113 gene223 0.5009884 -0.5695345 1.136822 18981
length(anglemania_stats_df)
#> [1] 6