Source code for svgbit.core.moran

from __future__ import annotations

from functools import partial
from multiprocessing import Pool, cpu_count
from typing import Tuple

import numba
import pandas as pd
from libpysal.weights import W as libpysal_W
from pysal.explore import esda


[docs]def local_moran( gene_expression_df: pd.DataFrame, weights: libpysal_W, transformation: str = 'r', permutation: int = 999, cores: int = cpu_count(), ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: """ Calculate local moran for hotspot identification. Parameters ========== gene_expression_df : pd.DataFrame Expression matrix for Spatial Transcriptomics Data. weights : libpysal.weights.W Spatial weight for calculating Moran's I. transformation : str, default 'r' Weights transformation passed to esda.moran.Moran_Local permutation : int, default 999 Number of random permutations for calculation of pseudo-p_values. cores : int Number of threads to run svgbit. Use all available cpus by default. Returns ======= hotspot : pd.DataFrame Identified hotspots. i_value : pd.DataFrame Local Moran's I values. p_value : pd.DataFrame Local Moran's I p values. """ partial_func = partial( _local_moran, weights=weights, transformation=transformation, permutation=permutation, ) pool = Pool(processes=cores) result_lists = pool.map( partial_func, [gene_expression_df[i] for i in gene_expression_df.columns], ) pool.close() pool.join() hotspot = pd.concat([i[0] for i in result_lists], axis=1) i_value = pd.concat([i[1] for i in result_lists], axis=1) p_value = pd.concat([i[2] for i in result_lists], axis=1) return hotspot, i_value, p_value
def _local_moran( gene_expression_series: pd.Series, weights: libpysal_W, transformation: str = "r", permutation: int = 999, ) -> Tuple[pd.Series, pd.Series, pd.Series]: """ Calculate local moran for hotspot identification for a single gene. Parameters ========== gene_expression_series : pd.Series Expression series for Spatial Transcriptomics Data. weights : libpysal.weights.W Spatial weight for calculating Moran's I. transformation : str, default 'r' Weights transformation passed to esda.moran.Moran_Local permutation : int, default 999 Number of random permutations for calculation of pseudo-p_values. Returns ======= hotspot : pd.DataFrame Identified hotspots. i_value : pd.DataFrame Local Moran's I values. p_value : pd.DataFrame Local Moran's I p values. """ gene = gene_expression_series.name spots = gene_expression_series.index numba.config.THREADING_LAYER = "workqueue" gene_lisa = esda.moran.Moran_Local( gene_expression_series, weights, transformation=transformation, permutations=permutation, n_jobs=1, ) # select the significant hotspots (p < 0.05) local_moran_hotspot = 1 * ((gene_lisa.p_sim < 0.05) & (gene_lisa.q == 1)) hotspot = pd.Series( local_moran_hotspot, index=spots, name=gene, ) if (hotspot == 1).all(): hotspot = 0 i_value = pd.Series( gene_lisa.EI_sim, index=spots, name=gene, ) p_value = pd.Series( gene_lisa.p_sim, index=spots, name=gene, ) return hotspot, i_value, p_value
[docs]def global_moran( gene_expression_df: pd.DataFrame, weights: libpysal_W, transformation: str = 'r', permutation: int = 999, cores: int = cpu_count(), ) -> pd.DataFrame: """ Calculate global Moran's I value. Parameters ========== gene_expression_df : pd.DataFrame Expression matrix for Spatial Transcriptomics Data. weights : libpysal.weights.W Spatial weight for calculating Moran's I. transformation : str, default 'r' Weights transformation passed to esda.moran.Moran permutation : int, default 999 Number of random permutations for calculation of pseudo-p_values. cores : int Number of threads to run svgbit. Use all available cpus by default. Returns ======= global_moran_df : pd.DataFrame Global Moran's I result. """ partial_func = partial( _global_moran, weights=weights, transformation=transformation, permutation=permutation, ) pool = Pool(processes=cores) result_lists = pool.map(partial_func, gene_expression_df.columns) pool.close() pool.join() result_df = pd.concat(result_lists, axis=0) global_moran_df = result_df global_moran_df.sort_values( by="p_value_sim", inplace=False, ascending=True, ) return global_moran_df
def _global_moran( gene_expression_series: pd.Series, weights: libpysal_W, transformation: str = 'r', permutation: int = 999, ) -> pd.DataFrame: """ Calculate global Moran's I value for a single gene. Parameters ========== gene_expression_series : pd.Series Expression series for Spatial Transcriptomics Data. weights : libpysal.weights.W Spatial weight for calculating Moran's I. transformation : str, default 'r' Weights transformation passed to esda.moran.Moran permutation : int, default 999 Number of random permutations for calculation of pseudo-p_values. Returns ======= global_moran_df : pd.DataFrame Global Moran's I result. """ gene = gene_expression_series.name global_moran_df = pd.DataFrame( columns=[ 'I_value', 'p_value_sim', 'p_value_rand', 'p_value_z_sim', 'p_value_norm', ], index=[gene], ) moran_value = esda.moran.Moran( gene_expression_series, weights, transformation=transformation, permutations=permutation, ) p_value_sim = moran_value.p_sim moranI = moran_value.I p_value_norm = moran_value.p_norm p_value_rand = moran_value.p_rand p_value_z_sim = moran_value.p_z_sim global_moran_df['p_value_sim'][gene] = p_value_sim global_moran_df['I_value'][gene] = moranI global_moran_df['p_value_rand'][gene] = p_value_rand global_moran_df['p_value_z_sim'] = p_value_z_sim global_moran_df['p_value_norm'] = p_value_norm return global_moran_df if __name__ == "__main__": pass