{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Demo 8: ARTISTA ΔAnalysis "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os, sys, warnings, torch, CAST\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "import scanpy as sc\n",
    "warnings.filterwarnings('ignore')\n",
    "plt.rcParams.update({'pdf.fonttype':42}) \n",
    "plt.set_loglevel('ERROR')\n",
    "\n",
    "work_dir = '$demo_path' #### input the demo path "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Introduction\n",
    "This axolotl brain dataset contains coronal slices of the axolotl brain with experimentally introduced injuries on one hemisphere while the other hemisphere remained intact and healthy as the control at different days post injury (DPI) along the brain regeneration process.\n",
    "\n",
    "In Demo 7, we have demonstrated how the injured hemisphere of the axolotl brain is aligned to the healthy hemisphere. In this demo, we will take the **aligned brain samples generated by Demo 7** as input, and focus on the demonstration of delta sample analysis (∆Analysis).\n",
    "\n",
    "### Load Data\n",
    "- CAST Delta Analysis (∆Analysis) method only require the following data modalities:\n",
    "    1. gene expression raw counts (loaded from `data.h5ad`)\n",
    "    2. spatial coordinates of the cells **after CAST Stack alignment** (loaded from `coords_final_scaleback.data`)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "output_path = f'{work_dir}/demo8_ARTISTA_delta_Analysis/demo_output'\n",
    "input_path = f'{work_dir}/demo8_ARTISTA_delta_Analysis/data'\n",
    "os.makedirs(output_path, exist_ok=True)\n",
    "scale_estimated = np.round(1.068 / 2.482,3) # scale_estimated about 0.43 µm/px\n",
    "coords_final = torch.load(f'{input_path}/coords_final_scaleback.data')\n",
    "adata = sc.read(f'{input_path}/data.h5ad')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# the sample to calculate ∆Analysis\n",
    "sample_t = '2DPI_1'\n",
    "\n",
    "# specify genes of interest\n",
    "goi = np.array(['C1qb', 'Ctsl', 'Vim', 'Atf3', 'Dclk1', 'Gap43', 'Nptx1', 'Neurod6', 'Tnc', 'Sdc1', 'Nes', 'Krt18', 'S100a10', 'Ankrd1', 'Stmn4', 'Cdkn1a', 'Cdkn1c'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "###  Extract Data "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from CAST.visualize import dsplot\n",
    "from CAST.utils import delta_exp_cal, delta_cell_cal\n",
    "from tqdm import tqdm\n",
    "\n",
    "var_gene_names = np.array([i.split('|')[0].strip().capitalize() for i in adata.var_names])\n",
    "screen_gene_list_preprocess = np.array([f'|{i}'.split('|')[-2].strip().capitalize().replace('[nr]','').replace('[hs]','') for i in adata.var_names])\n",
    "gene_list = np.array(adata.var_names[np.isin(screen_gene_list_preprocess, goi)])\n",
    "gene_list_name = screen_gene_list_preprocess[np.isin(screen_gene_list_preprocess, goi)]\n",
    "\n",
    "cmap_t = sns.diverging_palette(243, 12, s = 68,l = 57, n=9,as_cmap=True)\n",
    "\n",
    "sample_tgt = f'{sample_t}_right'\n",
    "sample_ref = f'{sample_t}_left'\n",
    "\n",
    "coords_tgt = coords_final[sample_tgt]\n",
    "coords_ref = coords_final[sample_ref]\n",
    "\n",
    "coords_tgt = coords_tgt.cpu().numpy() if isinstance(coords_tgt, torch.Tensor) else coords_tgt\n",
    "coords_ref = coords_ref.cpu().numpy() if isinstance(coords_ref, torch.Tensor) else coords_ref\n",
    "\n",
    "ctype_tgt = adata.obs.loc[adata.obs['sample_half'] == sample_tgt, 'Annotation']\n",
    "ctype_ref = adata.obs.loc[adata.obs['sample_half'] == sample_ref, 'Annotation']\n",
    "\n",
    "exp_tgt = adata[adata.obs['sample_half'] == sample_tgt].layers['log2_norm1e4'].toarray()\n",
    "exp_ref = adata[adata.obs['sample_half'] == sample_ref].layers['log2_norm1e4'].toarray()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### ∆Analysis\n",
    "#### ∆Cell\n",
    "∆Cell is the difference of the cell type abundance in each comparison"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for radius_px in [100]: # we use 100px, about 43 µm\n",
    "    radius_um = radius_px * scale_estimated\n",
    "    output_path_t1 = f'{output_path}/{radius_px}'\n",
    "    output_path_t2 = f'{output_path_t1}/delta_ctype/delta_ctype_{sample_t}_all_cells'\n",
    "    os.makedirs(output_path_t2, exist_ok=True)\n",
    "    df_delta_cell_tgt,df_delta_cell_ref,df_delta_cell = delta_cell_cal(coords_tgt,coords_ref,ctype_tgt,ctype_ref,radius_px)\n",
    "    torch.save((coords_tgt, coords_ref, df_delta_cell_tgt, df_delta_cell_ref, df_delta_cell), f'{output_path_t2}/../{sample_t}_delta_ctype.data')\n",
    "    #### plot with selected cells\n",
    "    t = tqdm(df_delta_cell.columns, desc='∆Cell', leave=True)\n",
    "    for ctype_t in t:\n",
    "        t.set_description(f'∆Cell {ctype_t}')\n",
    "        v_max_t = np.nanmax([np.nanmax(np.abs(df_delta_cell_tgt[ctype_t])),np.nanmax(np.abs(df_delta_cell_ref[ctype_t])),np.nanmax(np.abs(df_delta_cell[ctype_t]))])\n",
    "        dsplot(coords_tgt,None,s_cell=30,s_plaque=None,col_cell=df_delta_cell[ctype_t],col_plaque='darkslategrey',alpha = 1,cmap_t = cmap_t, title=f'{ctype_t} vmax = {str(v_max_t)}',vmax_t=v_max_t, output_path_t = f'{output_path_t2}/{ctype_t}_dcell.pdf', coords0_mask=None, scale_bar_200=radius_px)\n",
    "        t.update()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### ∆Exp\n",
    "∆Exp is the difference of the average gene expression (log2_norm1e4) in each comparison"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for radius_px in [100]: # we use 100px, about 43 µm\n",
    "    radius_um = radius_px * scale_estimated\n",
    "    output_path_t1 = f'{output_path}/{radius_px}'\n",
    "    output_path_t3 = f'{output_path_t1}/delta_exp/delta_exp_{sample_t}_all_cells'\n",
    "    os.makedirs(output_path_t3, exist_ok=True)\n",
    "    delta_exp_tgt, delta_exp_ref, delta_exp = delta_exp_cal(coords_tgt,coords_ref,exp_tgt,exp_ref,radius_px)\n",
    "    torch.save((coords_tgt, coords_ref, delta_exp_tgt, delta_exp_ref, delta_exp), f'{output_path_t3}/../{sample_t}_delta_expr.data')\n",
    "    t = tqdm(enumerate(gene_list), desc='∆Exp', leave=True, total=len(gene_list))\n",
    "    for gene_idx, gene_t in t:\n",
    "        t.set_description(f'∆Exp {gene_t}')\n",
    "        gene_name_t = gene_list_name[gene_idx]\n",
    "        idx_gene = adata.var.index == gene_t\n",
    "        v_max_t = np.nanmax([np.nanmax(np.abs(delta_exp_tgt[:,idx_gene])),np.nanmax(np.abs(delta_exp_ref[:,idx_gene])),np.nanmax(np.abs(delta_exp[:,idx_gene]))])\n",
    "        dsplot(coords_tgt,None,s_cell=30,s_plaque=None,col_cell=np.array(delta_exp[:,idx_gene]),col_plaque='darkslategrey',alpha = 1,cmap_t = cmap_t, title=f'{gene_name_t} vmax = {str(round(v_max_t,2))}',vmax_t=v_max_t, output_path_t = f'{output_path_t3}/{gene_name_t}_dexp.pdf', coords0_mask=None, scale_bar_200=radius_px)\n",
    "        t.update()\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}