Demo 8: ARTISTA ΔAnalysis

[ ]:
import os, sys, warnings, torch, CAST
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scanpy as sc
warnings.filterwarnings('ignore')
plt.rcParams.update({'pdf.fonttype':42})
plt.set_loglevel('ERROR')

work_dir = '$demo_path' #### input the demo path

Introduction

This axolotl brain dataset contains coronal slices of the axolotl brain with experimentally introduced injuries on one hemisphere while the other hemisphere remained intact and healthy as the control at different days post injury (DPI) along the brain regeneration process.

In Demo 7, we have demonstrated how the injured hemisphere of the axolotl brain is aligned to the healthy hemisphere. In this demo, we will take the aligned brain samples generated by Demo 7 as input, and focus on the demonstration of delta sample analysis (∆Analysis).

Load Data

  • CAST Delta Analysis (∆Analysis) method only require the following data modalities:

    1. gene expression raw counts (loaded from data.h5ad)

    2. spatial coordinates of the cells after CAST Stack alignment (loaded from coords_final_scaleback.data)

[ ]:
output_path = f'{work_dir}/demo8_ARTISTA_delta_Analysis/demo_output'
input_path = f'{work_dir}/demo8_ARTISTA_delta_Analysis/data'
os.makedirs(output_path, exist_ok=True)
scale_estimated = np.round(1.068 / 2.482,3) # scale_estimated about 0.43 µm/px
coords_final = torch.load(f'{input_path}/coords_final_scaleback.data')
adata = sc.read(f'{input_path}/data.h5ad')
[ ]:
# the sample to calculate ∆Analysis
sample_t = '2DPI_1'

# specify genes of interest
goi = np.array(['C1qb', 'Ctsl', 'Vim', 'Atf3', 'Dclk1', 'Gap43', 'Nptx1', 'Neurod6', 'Tnc', 'Sdc1', 'Nes', 'Krt18', 'S100a10', 'Ankrd1', 'Stmn4', 'Cdkn1a', 'Cdkn1c'])

Extract Data

[ ]:
from CAST.visualize import dsplot
from CAST.utils import delta_exp_cal, delta_cell_cal
from tqdm import tqdm

var_gene_names = np.array([i.split('|')[0].strip().capitalize() for i in adata.var_names])
screen_gene_list_preprocess = np.array([f'|{i}'.split('|')[-2].strip().capitalize().replace('[nr]','').replace('[hs]','') for i in adata.var_names])
gene_list = np.array(adata.var_names[np.isin(screen_gene_list_preprocess, goi)])
gene_list_name = screen_gene_list_preprocess[np.isin(screen_gene_list_preprocess, goi)]

cmap_t = sns.diverging_palette(243, 12, s = 68,l = 57, n=9,as_cmap=True)

sample_tgt = f'{sample_t}_right'
sample_ref = f'{sample_t}_left'

coords_tgt = coords_final[sample_tgt]
coords_ref = coords_final[sample_ref]

coords_tgt = coords_tgt.cpu().numpy() if isinstance(coords_tgt, torch.Tensor) else coords_tgt
coords_ref = coords_ref.cpu().numpy() if isinstance(coords_ref, torch.Tensor) else coords_ref

ctype_tgt = adata.obs.loc[adata.obs['sample_half'] == sample_tgt, 'Annotation']
ctype_ref = adata.obs.loc[adata.obs['sample_half'] == sample_ref, 'Annotation']

exp_tgt = adata[adata.obs['sample_half'] == sample_tgt].layers['log2_norm1e4'].toarray()
exp_ref = adata[adata.obs['sample_half'] == sample_ref].layers['log2_norm1e4'].toarray()

∆Analysis

∆Cell

∆Cell is the difference of the cell type abundance in each comparison

[ ]:
for radius_px in [100]: # we use 100px, about 43 µm
    radius_um = radius_px * scale_estimated
    output_path_t1 = f'{output_path}/{radius_px}'
    output_path_t2 = f'{output_path_t1}/delta_ctype/delta_ctype_{sample_t}_all_cells'
    os.makedirs(output_path_t2, exist_ok=True)
    df_delta_cell_tgt,df_delta_cell_ref,df_delta_cell = delta_cell_cal(coords_tgt,coords_ref,ctype_tgt,ctype_ref,radius_px)
    torch.save((coords_tgt, coords_ref, df_delta_cell_tgt, df_delta_cell_ref, df_delta_cell), f'{output_path_t2}/../{sample_t}_delta_ctype.data')
    #### plot with selected cells
    t = tqdm(df_delta_cell.columns, desc='∆Cell', leave=True)
    for ctype_t in t:
        t.set_description(f'∆Cell {ctype_t}')
        v_max_t = np.nanmax([np.nanmax(np.abs(df_delta_cell_tgt[ctype_t])),np.nanmax(np.abs(df_delta_cell_ref[ctype_t])),np.nanmax(np.abs(df_delta_cell[ctype_t]))])
        dsplot(coords_tgt,None,s_cell=30,s_plaque=None,col_cell=df_delta_cell[ctype_t],col_plaque='darkslategrey',alpha = 1,cmap_t = cmap_t, title=f'{ctype_t} vmax = {str(v_max_t)}',vmax_t=v_max_t, output_path_t = f'{output_path_t2}/{ctype_t}_dcell.pdf', coords0_mask=None, scale_bar_200=radius_px)
        t.update()

∆Exp

∆Exp is the difference of the average gene expression (log2_norm1e4) in each comparison

[ ]:
for radius_px in [100]: # we use 100px, about 43 µm
    radius_um = radius_px * scale_estimated
    output_path_t1 = f'{output_path}/{radius_px}'
    output_path_t3 = f'{output_path_t1}/delta_exp/delta_exp_{sample_t}_all_cells'
    os.makedirs(output_path_t3, exist_ok=True)
    delta_exp_tgt, delta_exp_ref, delta_exp = delta_exp_cal(coords_tgt,coords_ref,exp_tgt,exp_ref,radius_px)
    torch.save((coords_tgt, coords_ref, delta_exp_tgt, delta_exp_ref, delta_exp), f'{output_path_t3}/../{sample_t}_delta_expr.data')
    t = tqdm(enumerate(gene_list), desc='∆Exp', leave=True, total=len(gene_list))
    for gene_idx, gene_t in t:
        t.set_description(f'∆Exp {gene_t}')
        gene_name_t = gene_list_name[gene_idx]
        idx_gene = adata.var.index == gene_t
        v_max_t = np.nanmax([np.nanmax(np.abs(delta_exp_tgt[:,idx_gene])),np.nanmax(np.abs(delta_exp_ref[:,idx_gene])),np.nanmax(np.abs(delta_exp[:,idx_gene]))])
        dsplot(coords_tgt,None,s_cell=30,s_plaque=None,col_cell=np.array(delta_exp[:,idx_gene]),col_plaque='darkslategrey',alpha = 1,cmap_t = cmap_t, title=f'{gene_name_t} vmax = {str(round(v_max_t,2))}',vmax_t=v_max_t, output_path_t = f'{output_path_t3}/{gene_name_t}_dexp.pdf', coords0_mask=None, scale_bar_200=radius_px)
        t.update()