from __future__ import annotations
from functools import partial
from multiprocessing import Pool, cpu_count
from typing import Tuple
import numba
import pandas as pd
from libpysal.weights import W as libpysal_W
from pysal.explore import esda
[docs]def local_moran(
gene_expression_df: pd.DataFrame,
weights: libpysal_W,
transformation: str = 'r',
permutation: int = 999,
cores: int = cpu_count(),
) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
"""
Calculate local moran for hotspot identification.
Parameters
==========
gene_expression_df : pd.DataFrame
Expression matrix for Spatial Transcriptomics Data.
weights : libpysal.weights.W
Spatial weight for calculating Moran's I.
transformation : str, default 'r'
Weights transformation passed to esda.moran.Moran_Local
permutation : int, default 999
Number of random permutations for calculation of pseudo-p_values.
cores : int
Number of threads to run svgbit. Use all available cpus by default.
Returns
=======
hotspot : pd.DataFrame
Identified hotspots.
i_value : pd.DataFrame
Local Moran's I values.
p_value : pd.DataFrame
Local Moran's I p values.
"""
partial_func = partial(
_local_moran,
weights=weights,
transformation=transformation,
permutation=permutation,
)
pool = Pool(processes=cores)
result_lists = pool.map(
partial_func,
[gene_expression_df[i] for i in gene_expression_df.columns],
)
pool.close()
pool.join()
hotspot = pd.concat([i[0] for i in result_lists], axis=1)
i_value = pd.concat([i[1] for i in result_lists], axis=1)
p_value = pd.concat([i[2] for i in result_lists], axis=1)
return hotspot, i_value, p_value
def _local_moran(
gene_expression_series: pd.Series,
weights: libpysal_W,
transformation: str = "r",
permutation: int = 999,
) -> Tuple[pd.Series, pd.Series, pd.Series]:
"""
Calculate local moran for hotspot identification for a single gene.
Parameters
==========
gene_expression_series : pd.Series
Expression series for Spatial Transcriptomics Data.
weights : libpysal.weights.W
Spatial weight for calculating Moran's I.
transformation : str, default 'r'
Weights transformation passed to esda.moran.Moran_Local
permutation : int, default 999
Number of random permutations for calculation of pseudo-p_values.
Returns
=======
hotspot : pd.DataFrame
Identified hotspots.
i_value : pd.DataFrame
Local Moran's I values.
p_value : pd.DataFrame
Local Moran's I p values.
"""
gene = gene_expression_series.name
spots = gene_expression_series.index
numba.config.THREADING_LAYER = "workqueue"
gene_lisa = esda.moran.Moran_Local(
gene_expression_series,
weights,
transformation=transformation,
permutations=permutation,
n_jobs=1,
)
# select the significant hotspots (p < 0.05)
local_moran_hotspot = 1 * ((gene_lisa.p_sim < 0.05) & (gene_lisa.q == 1))
hotspot = pd.Series(
local_moran_hotspot,
index=spots,
name=gene,
)
if (hotspot == 1).all():
hotspot = 0
i_value = pd.Series(
gene_lisa.EI_sim,
index=spots,
name=gene,
)
p_value = pd.Series(
gene_lisa.p_sim,
index=spots,
name=gene,
)
return hotspot, i_value, p_value
[docs]def global_moran(
gene_expression_df: pd.DataFrame,
weights: libpysal_W,
transformation: str = 'r',
permutation: int = 999,
cores: int = cpu_count(),
) -> pd.DataFrame:
"""
Calculate global Moran's I value.
Parameters
==========
gene_expression_df : pd.DataFrame
Expression matrix for Spatial Transcriptomics Data.
weights : libpysal.weights.W
Spatial weight for calculating Moran's I.
transformation : str, default 'r'
Weights transformation passed to esda.moran.Moran
permutation : int, default 999
Number of random permutations for calculation of pseudo-p_values.
cores : int
Number of threads to run svgbit. Use all available cpus by default.
Returns
=======
global_moran_df : pd.DataFrame
Global Moran's I result.
"""
partial_func = partial(
_global_moran,
weights=weights,
transformation=transformation,
permutation=permutation,
)
pool = Pool(processes=cores)
result_lists = pool.map(partial_func, gene_expression_df.columns)
pool.close()
pool.join()
result_df = pd.concat(result_lists, axis=0)
global_moran_df = result_df
global_moran_df.sort_values(
by="p_value_sim",
inplace=False,
ascending=True,
)
return global_moran_df
def _global_moran(
gene_expression_series: pd.Series,
weights: libpysal_W,
transformation: str = 'r',
permutation: int = 999,
) -> pd.DataFrame:
"""
Calculate global Moran's I value for a single gene.
Parameters
==========
gene_expression_series : pd.Series
Expression series for Spatial Transcriptomics Data.
weights : libpysal.weights.W
Spatial weight for calculating Moran's I.
transformation : str, default 'r'
Weights transformation passed to esda.moran.Moran
permutation : int, default 999
Number of random permutations for calculation of pseudo-p_values.
Returns
=======
global_moran_df : pd.DataFrame
Global Moran's I result.
"""
gene = gene_expression_series.name
global_moran_df = pd.DataFrame(
columns=[
'I_value',
'p_value_sim',
'p_value_rand',
'p_value_z_sim',
'p_value_norm',
],
index=[gene],
)
moran_value = esda.moran.Moran(
gene_expression_series,
weights,
transformation=transformation,
permutations=permutation,
)
p_value_sim = moran_value.p_sim
moranI = moran_value.I
p_value_norm = moran_value.p_norm
p_value_rand = moran_value.p_rand
p_value_z_sim = moran_value.p_z_sim
global_moran_df['p_value_sim'][gene] = p_value_sim
global_moran_df['I_value'][gene] = moranI
global_moran_df['p_value_rand'][gene] = p_value_rand
global_moran_df['p_value_z_sim'] = p_value_z_sim
global_moran_df['p_value_norm'] = p_value_norm
return global_moran_df
if __name__ == "__main__":
pass