Commit a9d2d57d authored by Carlos de Lannoy's avatar Carlos de Lannoy
Browse files

update tool comparison

parent da099f7a
......@@ -30,9 +30,10 @@ Directories `simulated` and `in_vitro` are structured as follows:
## Rerunning analysis
To re-run the data evaluation, download this repo and cd to the root folder.
First install the included conda environment:
First install and activate the included conda environment:
```
conda install -f scripts/env.yml
conda acitvate FRETboard_evaluation
```
The analysis can then be rerun (from the root folder) from the command line:
```
......
#!/bin/bash
set -euo pipefail
# EXECUTE_ANALYSIS.sh
#
# description: Analyze FRET trace analyses generated by FRETboard and generate figures as shown in
# the FRETboard paper.
# usage: In the repo main directory, execute this script using bash. ensure that the conda
# environment in scripts/env.yml is installed first!
source activate FRETboard_evaluation
# Analyze simulated data classification --> works
for f in simulated/data/*; do
bn=$(basename $f)
python scripts/evaluate_traces.py --fb "simulated/eval/${bn}_fb/" --manual "${f}/dats_labeled" \
--target-states $(cat "${f}/target_states.txt") \
--outdir "simulated/eval/${bn}_eval/"
done
python scripts/evaluate_traces.py --fb "simulated/eval_supplementary/5_10_state_equidist_fb/" \
--manual "simulated/data/4_10_state_equidist/dats_labeled/" \
--target-states 1 2 3 4 5 6 7 8 9 10 11 \
--outdir "simulated/eval_supplementary/5_10_state_equidist_eval/"
# environment in scripts/env.yml is installed and activated first!
# Analyze simulated data classification
#for f in simulated/data/*; do
# bn=$(basename $f)
# echo "Evaluating traces for ${bn}..."
# python scripts/evaluate_traces.py --fb "simulated/eval/${bn}_fb/" --manual "${f}/dats_labeled" \
# --target-states $(cat "${f}/target_states.txt") \
# --outdir "simulated/eval/${bn}_eval/"
# echo "Done"
#done
#
#echo "evaluating traces for 5_10_state_equidist_fb..."
#python scripts/evaluate_traces.py --fb "simulated/eval_supplementary/5_10_state_equidist_fb/" \
# --manual "simulated/data/4_10_state_equidist/dats_labeled/" \
# --target-states 1 2 3 4 5 6 7 8 9 10 11 \
# --outdir "simulated/eval_supplementary/5_10_state_equidist_eval/"
#echo "Done"
# Generate fig. 3
python scripts/generate_sim_fig.py --eval-dirs simulated/eval/*_eval --cat-names 1_2_state_don_eval 2_3_state_don 3_3_state_kinetic 4_10_state_equidist --out-svg "simulated/eval/sim_figure.svg"
......@@ -28,8 +31,6 @@ python scripts/generate_sim_fig.py --eval-dirs simulated/eval/*_eval --cat-names
# Generate supplementary figure S3
python scripts/generate_sim_fig.py --eval-dirs simulated/eval_supplementary/*_eval --cat-names 5_10_state_equidist --out-svg "simulated/eval_supplementary/sim_10_state_equidist_vanilla.svg"
# Analyze in vitro data classification
for f in in_vitro/data/*; do
bn=$(basename $f)
......
import os, sys, argparse
from os.path import splitext, basename
import numpy as np
from sklearn.cluster import DBSCAN
import pandas as pd
__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
sys.path.append(__location__)
from helper_functions import parse_input_path, parse_output_dir
def bg_filter_trace(tr, eps, f_aex=None):
if np.isnan(eps):
if f_aex is None:
return np.copy(tr)
return tr, f_aex
min_clust = 10
if f_aex is not None:
tr_joined = np.concatenate((tr, f_aex))
else:
tr_joined = tr
pc10 = np.percentile(tr_joined, 20)
tr_pc10 = tr_joined[tr_joined < pc10]
clust = DBSCAN(eps=eps, min_samples=min_clust).fit(tr_pc10.reshape(-1, 1)).labels_
if np.sum(np.unique(clust) != -1) == 0:
bg = np.min(tr_joined)
else:
med_list = np.array([np.median(tr_pc10[clust == lab]) for lab in np.unique(clust)])
bg = np.min(med_list)
if f_aex is None:
return tr - bg
return tr - bg, f_aex - bg
parser = argparse.ArgumentParser(description='Apply DBSCAN filter and save traces. Used to generate data for tools'
'that do not have a background intensity filter.')
parser.add_argument('--in-dir', type=str, required=True)
parser.add_argument('--eps', type=float, default=15)
parser.add_argument('--add-fret-trace', action='store_true')
parser.add_argument('--out-dir', type=str, required=True)
args = parser.parse_args()
dat_list = parse_input_path(args.in_dir, pattern='*.dat')
out_dir = parse_output_dir(args.out_dir)
for dat_fn in dat_list:
original_df = pd.read_csv(dat_fn, header=None, names=['time', 'f_dex_dem_raw', 'f_dex_aem_raw'], sep='\t')
original_df.f_dex_dem_raw = bg_filter_trace(original_df.f_dex_dem_raw.to_numpy(), eps=args.eps)
original_df.f_dex_aem_raw = bg_filter_trace(original_df.f_dex_aem_raw.to_numpy(), eps=args.eps)
if args.add_fret_trace:
original_df.loc[:, 'e_fret'] = original_df.f_dex_aem_raw / (original_df.f_dex_aem_raw + original_df.f_dex_dem_raw)
original_df.fillna(0, inplace=True)
# original_df.drop(['f_dex_aem_raw'], axis=1, inplace=True)
original_df.to_csv(f'{out_dir}{basename(dat_fn)}', header=False, index=False, sep='\t')
......@@ -12,24 +12,32 @@ __location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file
sys.path.append(__location__)
from helper_functions import parse_input_path, parse_output_dir
from sklearn.cluster import KMeans
colors = ['#1b9e77', '#d95f02', '#7570b3', '#e7298a', '#66a61e']
def plot_tool_tr_bars(df, ax):
def plot_tool_tr_bars(df, ax, color_dict):
manual_df = df.query('tool == "manual"').copy()
plot_df = df.query('tool != "manual"').copy()
tool_list = plot_df.tool.unique()
tool_list = list(plot_df.tool.unique())
tool_list.sort()
tool_list.remove('FRETboard')
tool_list = tool_list + ['FRETboard']
nb_tools = len(tool_list)
tridx_dict = {tr: it for it, tr in enumerate(plot_df.transition.unique())}
plot_dists = np.linspace(-1 * (nb_tools // 2), nb_tools // 2, nb_tools) * 0.1
tn_dict = {tn: plot_dists[it] for it, tn in enumerate(tool_list)}
plot_df.loc[:, 'x'] = plot_df.apply(lambda x: tridx_dict[x.transition] + tn_dict[x.tool], axis=1)
color_dict = {tn: colors[it] for it, tn in enumerate(tool_list)}
# color_dict = {tn: colors[it] for it, tn in enumerate(tool_list)}
plot_df.loc[:, 'color'] = plot_df.tool.apply(lambda x: color_dict[x])
manual_df.loc[:, 'lb'] = manual_df.transition.apply(lambda x: plot_df.query(f'transition == "{x}"').loc[:, 'x'].min()) - 0.05
manual_df.loc[:, 'rb'] = manual_df.transition.apply(lambda x: plot_df.query(f'transition == "{x}"').loc[:, 'x'].max()) + 0.05
manual_df.loc[:, 'lb'] = manual_df.transition.apply(lambda x: tridx_dict[x] + plot_dists[0] - 0.2)
manual_df.loc[:, 'rb'] = manual_df.transition.apply(lambda x: tridx_dict[x] + plot_dists[-1] + 0.2)
# manual_df.loc[:, 'lb'] = plot_dists[0] + np.arange(len(manual_df)) - 0.05
# manual_df.loc[:, 'rb'] = plot_dists[-1] + np.arange(len(manual_df)) + 0.05
if exp == '4_kinetic_fingerprint':
cp=1
# fig, ax = plt.subplots(nrows=1)
ax.errorbar(x=plot_df.x, y=plot_df.tr,
......@@ -43,7 +51,7 @@ def plot_tool_tr_bars(df, ax):
ax.set_xticks(ticks=list(range(len(tridx_dict))))
ax.set_xticklabels(labels=[tr.replace('_', '') for tr in tridx_dict])
def plot_efret_dists(df, ax, style='box'):
def plot_efret_dists(df, ax, color_dict, style='box'):
"""
Plot E_FRET distributions in histograms or boxplots. If Histograms, [ax] should be a list of
axes with len(ax) == nb states.
......@@ -51,32 +59,37 @@ def plot_efret_dists(df, ax, style='box'):
df = df.loc[np.invert(np.logical_or(np.abs(df.efret) == np.inf, df.efret.isna())), :].copy()
manual_df = df.query('tool == "manual"').copy()
plot_df = df.query('tool != "manual"').copy()
tool_list = plot_df.tool.unique()
tool_list.sort()
tool_list = list(tool_list[tool_list != 'FRETboard']) + ['FRETboard']
c_list = [color_dict[tool] for tool in tool_list]
if style == 'box':
box_colors = colors[:len(df.tool.unique())-1] + ['#ffffff']
# box_colors = [(0,0,0,1)] * len(df.tool.unique())
sns.boxplot(x='state', y='efret', hue='tool', data=df, palette=sns.color_palette(box_colors),
c_list = c_list + ['#ffffff']
tool_list = tool_list + ['manual']
sns.boxplot(x='state', y='efret', hue='tool',
hue_order=tool_list, palette=sns.color_palette(c_list),
data=df,
width=0.3, showfliers=False, ax=ax)
ax.set_ylim((-1, 2))
ax.set_xlabel('')
ax.set_ylabel('$E_{PR}$')
elif style == 'hist':
hist_colors = colors[:len(df.tool.unique()) - 1]
states = df.state.unique()
for ax_id, st in enumerate(states):
if ax_id >= len(ax): break
sns.histplot(x='efret', color='grey', data=manual_df.query(f'state == {st}'),
stat='count', bins=np.arange(0,1,0.01),
element='step', fill=True, ax=ax[ax_id]
)
sns.histplot(x='efret', hue='tool', data=plot_df.query(f'state == {st}'),
stat='count', bins=np.arange(0,1,0.01), palette=sns.color_palette(hist_colors),
stat='count', bins=np.arange(0,1,0.01), palette=sns.color_palette(c_list),
element='step', fill=False, ax=ax[ax_id]
)
ax[ax_id].set_xlim((0,1))
ax[ax_id].set_xlabel('$E_{PR}$')
def parse_fretboard_results(in_dir):
def parse_fretboard_results(in_dir, soi):
# --- parse transition rates ---
ci_limits = np.load(f'{in_dir}/summary_stats/transitions.tsv.npy')[0, :, :].reshape(-1)
trdf = pd.read_csv(f'{in_dir}/summary_stats/transitions.tsv', sep='\t', header=0, names=['FRETboard', 'manual'])
......@@ -86,6 +99,8 @@ def parse_fretboard_results(in_dir):
trdf.loc[trdf.tool == 'FRETboard', 'ci_low'] = ci_limits[:len(ci_limits) // 2]
trdf.loc[trdf.tool == 'FRETboard', 'ci_high'] = ci_limits[len(ci_limits) // 2:]
trdf.loc[trdf.tool == 'manual', ('ci_low', 'ci_high')] = 0.0
if exp == '4_kinetic_fingerprint':
cp=1
# --- parse efret values ---
efdf_list = []
......@@ -103,15 +118,16 @@ def parse_fretboard_results(in_dir):
efdf_manual.loc[:, 'tool'] = 'manual'
efdf = pd.concat((efdf, efdf_manual))
# todo Quick fix: remove trash states
states_of_interest = set(chain.from_iterable([tr.split('_') for tr in trdf.transition.unique()]))
states_of_interest = [int(s) for s in states_of_interest]
efdf = efdf.query(f'state in {str(states_of_interest)}')
efdf = efdf.query(f'state in {str(soi)}').copy()
# # todo Quick fix: remove trash states
# states_of_interest = set(chain.from_iterable([tr.split('_') for tr in trdf.transition.unique()]))
# states_of_interest = [int(s) for s in states_of_interest]
# efdf = efdf.query(f'state in {str(states_of_interest)}')
return efdf, trdf
def parse_ebfret_results(in_dir, framerate):
def parse_ebfret_results(in_dir, framerate, soi):
# --- parse original summary ---
with open(f'{in_dir}/ebFRET_analyzed_summary.csv') as fh:
......@@ -119,9 +135,11 @@ def parse_ebfret_results(in_dir, framerate):
eb_params_txt = re.search('Parameters[\sa-zA-Z.,0-9_+-]+', eb_txt).group(0)
nb_states = int(re.search('(?<=Num_States,)[0-9]+', eb_params_txt).group(0))
tr_names = [f'{tr[0]}_{tr[1]}' for tr in permutations(np.arange(1, nb_states + 1), 2)]
eb_means_txt = re.search('(?<=Center)[\sa-zA-Z.,0-9_+-]+(?=Precision)', eb_params_txt).group(0).strip().replace(' ', '')
efret_df = pd.read_csv(StringIO(eb_means_txt), names=list(range(1,nb_states + 1))).T
efret_dict[exp]['ebFRET'] = {ri: {'mean': r.loc['Mean'], 'sd': r.loc['Std']} for ri, r in efret_df.iterrows()}
# eb_means_txt = re.search('(?<=Center)[\sa-zA-Z.,0-9_+-]+(?=Precision)', eb_params_txt).group(0).strip().replace(' ', '')
# efret_df = pd.read_csv(StringIO(eb_means_txt), names=list(range(1,nb_states + 1))).T
# efret_dict[exp]['ebFRET'] = {ri: {'mean': r.loc['Mean'], 'sd': r.loc['Std']} for ri, r in efret_df.iterrows()}
eb_trans_txt = re.search('(?<=Transition_Matrix)[\sa-zA-Z.,0-9_+-]+', eb_params_txt).group(0).strip().replace(' ', '')
eb_trans_means_txt = re.search('(?<=Mean,)[\sa-zA-Z.,0-9_+-]+(?=Std)', eb_trans_txt).group(0).replace('\n,', '\n')
transition_probs = np.genfromtxt(StringIO(eb_trans_means_txt), delimiter=',')
......@@ -133,22 +151,29 @@ def parse_ebfret_results(in_dir, framerate):
with open(f'{in_dir}/ebFRET_analyzed_summary_bootstrapped.csv') as fh:
eb_bs_txt = fh.read()
tr_bs_txt = re.search('(?<=bootstrap_tr\n\s{4}tr)[\sa-zA-Z.,0-9_+-]+', eb_bs_txt).group(0)
bs_df = pd.read_csv(StringIO(tr_bs_txt), names=tr_names)
bs_df = pd.read_csv(StringIO(tr_bs_txt), names=[f'{tr[1]}_{tr[0]}' for tr in permutations(np.arange(1, nb_states + 1), 2)]) # order of columns was accidentally reversed for bootstrap script!
for cn in bs_df.columns:
cur_mean, cur_sd = bs_df.loc[:, cn].mean(), bs_df.loc[:, cn].std()
ci_low, ci_high = cur_mean - 2 * cur_sd, cur_mean + 2 * cur_sd
trdf.loc[cn, 'ci_low'] = trdf.loc[cn, 'tr'] - ci_low
trdf.loc[cn, 'ci_high'] = ci_high - trdf.loc[cn, 'tr']
trdf.reset_index(inplace=True)
# --- parse classified traces ---
edf = pd.read_csv(f'{in_dir}/ebFRET_analyzed.dat', sep='\s+', names=['trace_nb', 'd', 'a', 'state'])
edf.state = edf.state.astype(int)
edf = edf.query(f'state in {str(soi)}').copy()
edf.loc[:, 'efret'] = edf.a / (edf.d + edf.a)
edf.drop(['d', 'a', 'trace_nb'], inplace=True, axis=1)
edf.loc[:, 'tool'] = 'ebFRET'
return edf, trdf
def parse_mashfret_results(in_dir):
cp=1
def parse_mashfret_results(in_dir, soi):
"""
Parse contents of MASH-FRET results directory. Special attention given to:
- kinetic fingerprinting experiment: transition rates need to be separated out
- 3-state E_FRET difference experiment: stasi results need a clustering step
"""
# parse transition rates
fit_fn_list = parse_input_path(f'{in_dir}/kinetics', pattern='*.fit')
fit_dict = {tuple(int(a) for a in re.search('[0-9]+to[0-9]+', fit_fn).group(0).split('to')): fit_fn for fit_fn in
......@@ -156,45 +181,71 @@ def parse_mashfret_results(in_dir):
unique_fret_values = np.unique(np.array(list(fit_dict)).reshape(-1))
unique_fret_values.sort()
efret2num_dict = {ufv: i + 1 for i, ufv in enumerate(unique_fret_values)} # fret states sorted low to high
dt_df = pd.DataFrame(0.0, index=pd.MultiIndex.from_tuples(list(permutations(unique_fret_values, 2))),
tr_indices = list(permutations(unique_fret_values, 2))
if exp == '4_kinetic_fingerprint':
tr_indices = list(chain.from_iterable([[list(tr) + [0], list(tr) + [1]] for tr in tr_indices]))
dt_df = pd.DataFrame(0.0, index=pd.MultiIndex.from_tuples(tr_indices),
columns=['mean', 'sd'])
for fit_tup in fit_dict:
if fit_tup[0] == fit_tup[1]: continue
with open(fit_dict[fit_tup], 'r') as fh:
block_reached = False
for line in fh.readlines():
if 'fitting results (bootstrap)' in line: block_reached = True
if block_reached and '(s):' in line:
dt_df.loc[fit_tup] = [float(a.strip()) for a in line.split('\t')[-2:]]
break
fit_txt = fh.read()
fit_txt_sub = re.search('(?<=fitting results \(bootstrap\):)[\sa-zA-Z.,0-9_+-:\(\)]+', fit_txt).group(0).strip().replace('\n\t', '\n').replace(':', '')
fit_df = pd.read_csv(StringIO(fit_txt_sub), sep='\t').set_index('parameter')
if exp == '4_kinetic_fingerprint':
dt_df.loc[tuple(list(fit_tup) + [0])] = list(fit_df.loc['b_1(s)', :])
dt_df.loc[tuple(list(fit_tup) + [1])] = list(fit_df.loc['b_2(s)', :])
else:
dt_df.loc[fit_tup] = list(fit_df.loc['b_1(s)', :])
trdf = pd.DataFrame(index=dt_df.index, columns=['tr', 'ci_low', 'ci_high'])
trdf.loc[:, 'tr'] = 1 / dt_df.loc[:, 'mean']
trdf.loc[:, 'ci_high'] = 1 / (dt_df.loc[:, 'mean'] - dt_df.loc[:, 'sd']) - trdf.tr
trdf.loc[:, 'ci_low'] = trdf.tr - 1 / (dt_df.loc[:, 'mean'] + dt_df.loc[:, 'sd'])
trdf.loc[:, 'transition'] = trdf.apply(lambda x: f'{efret2num_dict[x.name[0]]}_{efret2num_dict[x.name[1]]}', axis=1).to_list()
if exp == '3_tandem_10nt':
cp=1
if exp == '4_kinetic_fingerprint':
trdf.reset_index(2, inplace=True)
trdf.rename({'level_2': 'b'}, inplace=True, axis=1)
departure_state = np.array(list(trdf.index))[:, 0]
trdf.loc[:, 'bmod'] = departure_state == departure_state.min()
trdf.loc[trdf.bmod, 'transition'] = trdf.loc[trdf.bmod, :].apply(lambda x: f'{efret2num_dict[x.name[0]]}_{int(efret2num_dict[x.name[1]] + x.b)}', axis=1).to_list()
trdf.loc[np.invert(trdf.bmod), 'transition'] = trdf.loc[np.invert(trdf.bmod), :].apply(lambda x: f'{int(efret2num_dict[x.name[0]] + x.b) }_{efret2num_dict[x.name[1]]}', axis=1).to_list()
else:
trdf.loc[:, 'transition'] = trdf.apply(lambda x: f'{efret2num_dict[x.name[0]]}_{efret2num_dict[x.name[1]]}', axis=1).to_list()
trdf.loc[:, 'tool'] = 'MASH-FRET'
trdf.reset_index(drop=True, inplace=True)
# parse efret from backsimulation results
# parse efret states from stasi results
trace_list = []
sl_list = []
for fn in parse_input_path(f'{in_dir}/traces_ASCII', pattern='*.txt'):
trace_list.append(pd.read_csv(fn, skiprows=1, sep='\t', usecols=['FRET', 'state sequence']))
tr = pd.read_csv(fn, sep='\t', usecols=['FRET_1>2', 'discr.FRET_1>2'])
tr.rename({'FRET_1>2': 'efret', 'discr.FRET_1>2': 'ed'}, axis=1, inplace=True)
state_levels = tr.ed.unique()
sl_list.append(state_levels)
if exp != '3_tandem_10nt':
state_levels.sort()
st_dict = {sl:si + 1 for si, sl in enumerate(state_levels)}
tr.loc[:,'state'] = tr.ed.apply(lambda x: st_dict[x])
tr.drop('ed', axis=1, inplace=True)
trace_list.append(tr)
efdf = pd.concat(trace_list)
efdf.rename({'FRET': 'efret', 'state sequence': 'state'}, inplace=True, axis=1)
if exp == '3_tandem_10nt':
sl_list = np.concatenate(sl_list).reshape(-1, 1)
km = KMeans(n_clusters=max(soi)).fit(sl_list)
ef2state_dict = {ef: lab + 1 for ef, lab in zip(list(sl_list.reshape(-1)), list(km.labels_))}
efdf.loc[:, 'state'] = efdf.ed.apply(lambda x: ef2state_dict[x])
efdf = efdf.query(f'state in {str(soi)}').copy()
efdf.loc[:, 'tool'] = 'MASH-FRET'
efdf = efdf.query('state != -1').copy()
# reorder states in order of mean, to match transition rate state numbering
st_mean_dict = {}
for st, sdf in efdf.groupby('state'):
st_mean_dict[st] = sdf.efret.mean()
st_order = list(st_mean_dict)
st_order.sort(key=lambda x: st_mean_dict[x])
st_order_dict = {so+1: sn for so, sn in enumerate(st_order)}
efdf.state = efdf.state.apply(lambda x: st_order_dict[x])
return efdf, trdf
# def parse_deepfret_results(ed, framerate):
# with open()
#
# return efdf, trdf
parser = argparse.ArgumentParser(description='Plot performance of different tools for side-by-side comparison')
parser.add_argument('--eval-dir', type=str, required=True)
......@@ -202,12 +253,18 @@ parser.add_argument('--experiments', type=str, nargs='+', required=True,
help='String to identify directories for the same dataset analyzed by different tools')
parser.add_argument('--framerate', type=float, default=10.0,
help='recording frame raterequired to translate transition probs to transition rates')
parser.add_argument('--data-dir',type=str, required=True)
parser.add_argument('--data-dir', type=str, required=True)
parser.add_argument('--out-dir', type=str, required=True)
args = parser.parse_args()
out_dir = parse_output_dir(args.out_dir)
# Collect states of interest
soi_dict = {}
for exp in args.experiments:
with open(f'{args.data_dir}/{exp}/target_states.txt') as fh:
soi_dict[exp] = [int(s) for s in fh.read().strip().split(' ')]
eval_dirs = [ed[0] for ed in os.walk(args.eval_dir)]
trdf_dict = {}
efret_df_dict = {}
......@@ -220,22 +277,28 @@ for en in args.experiments:
efret_dict[en] = {}
# Collect E_FRET mean,sd, transition rates
tool_list = []
for exp in exp_dict:
for ed in exp_dict[exp]:
if ed.endswith('_eval'): # the fretboard dir
efdf, trdf = parse_fretboard_results(ed)
efdf, trdf = parse_fretboard_results(ed, soi_dict[exp])
elif ed.endswith('_mash'):
efdf, trdf = parse_mashfret_results(ed)
# if exp == '3_tandem_10nt':
# continue # Analysis failed
efdf, trdf = parse_mashfret_results(ed, soi_dict[exp])
elif ed.endswith('_ebFRET'):
efdf, trdf = parse_ebfret_results(ed, args.framerate)
efdf, trdf = parse_ebfret_results(ed, args.framerate, soi_dict[exp])
# elif ed.endswith('_DeepFRET'):
# pass
# efdf, trdf = parse_deepfret_results(ed, args.framerate)
else:
continue
tool_list.append(trdf.loc[0, 'tool'])
trdf_dict[exp].append(trdf)
efret_df_dict[exp].append(efdf)
tool_list = set(tool_list)
# --- plotting ---
color_dict = {tool: colors[ti] for ti, tool in enumerate(tool_list)}
nb_exp = len(exp_dict)
fig = plt.figure(constrained_layout=False, figsize=(48/2.54, 40/2.54))
gs = gridspec.GridSpec(2, nb_exp, figure=fig, wspace=0.2, hspace=0.30)
......@@ -261,12 +324,12 @@ for cidx, exp in enumerate(exp_dict):
gs_hists = gridspec.GridSpec(nb_states, 1, figure=fig_hists,
hspace=0.30)
ax_list = [fig_hists.add_subplot(gs_hists[ii, 0]) for ii in range(nb_states)]
plot_efret_dists(pd.concat(efret_df_dict[exp]), ax_list, 'hist')
plot_efret_dists(pd.concat(efret_df_dict[exp]), ax_list, color_dict, 'hist')
fig_hists.savefig(f'{out_dir}efret_hists_{exp}.svg')
# add plots to composition figures
plot_efret_dists(pd.concat(efret_df_dict[exp]), ax_dict['efret'])
plot_tool_tr_bars(pd.concat(trdf_dict[exp]), ax_dict['transition'])
plot_efret_dists(pd.concat(efret_df_dict[exp]), ax_dict['efret'], color_dict, 'box')
plot_tool_tr_bars(pd.concat(trdf_dict[exp]), ax_dict['transition'], color_dict)
fig.savefig(f'{out_dir}tool_comparison_composed.svg')
import os, sys, argparse
from os.path import splitext, basename
import numpy as np
import pandas as pd
__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
sys.path.append(__location__)
from helper_functions import parse_input_path, parse_output_dir
parser = argparse.ArgumentParser(description='Convert multiple dats in FRETboard format'
' to single file of stacked dat format read by ebFRET')
parser.add_argument('--in-dir', type=str, required=True)
parser.add_argument('--out-dir', type=str, required=True)
args = parser.parse_args()
dat_fn_list = parse_input_path(args.in_dir, pattern='*.dat')
out_dir = parse_output_dir(args.out_dir)
df_list = []
num2fn_df = pd.DataFrame(index=np.arange(1, len(dat_fn_list)+1), columns=['dat_fn'])
for dit, dat_fn in enumerate(dat_fn_list):
num2fn_df.loc[dit+1] = basename(dat_fn)
with open(dat_fn, 'r') as fh:
dat_array = np.array([line.strip().split('\t')[1:] for line in fh.readlines()]).astype(float)
df_list.append(pd.DataFrame({'label': dit+1,
'D':dat_array[:,0], 'A': dat_array[:, 1]}))
df = pd.concat(df_list)
df.to_csv(f'{out_dir}ebFRET_stacked.dat', header=False, index=False, sep=' ')
num2fn_df.to_csv(f'{out_dir}ebFRET_numdict.csv', header=True, index=True)
......@@ -359,7 +359,7 @@ for cat, df in tdp_df.groupby('category'):
for lab in efret_means[cat]:
plt.axvline(efret_means[cat][lab], color='black', ls='--')
plt.axhline(efret_means[cat][lab], color='black', ls='--')
sns.kdeplot(df.departure_efret, df.arrival_efret, shade=True, cmap="coolwarm", ax=ax)
sns.kdeplot(x=df.departure_efret, y=df.arrival_efret, bw_method=0.1, cmap="coolwarm", ax=ax)
# ax.collections[0].set_color('#3b4cc0')
ax.set_facecolor('#4961d2')
ax.set_xlabel('$E_{PR}$ before')
......@@ -378,7 +378,7 @@ confusion_df = confusion_df.rename_axis(['category', 'state']).reset_index()
confusion_df.sort_values(['category', 'state'], inplace=True)
confusion_df.to_csv(f'{summary_dir}/confusion.tsv', header=True, index=True, sep='\t')
pr = sns.scatterplot(x='recall', y='precision', style='state', hue='category',
markers=('o', 'v', '^', '<', '>', '8', 's', 'p', '*', 'h', 'H', 'D', 'd', 'P', 'X'),
markers=('o', 'v', '^', '<', '>', '8', 's', 'p', '*', 'h', 'H', 'D', 'd', 'P', 'X')[:len(confusion_df.state.unique())],
data=confusion_df)
# pr.legend_.remove()
plt.xlim(0, 1)
......@@ -488,8 +488,8 @@ for cat in args.categories:
edf[edf < 0] = 0.0
col = colmap(ts / max(args.target_states))
sns.kdeplot(epdf, color=col, legend=False, bw=0.1)
sns.kdeplot(edf, color=col, linestyle='--', legend=False, bw=0.1)
sns.kdeplot(epdf, color=col, legend=False, bw_method=0.1)
sns.kdeplot(edf, color=col, linestyle='--', legend=False, bw_method=0.1)
ttest_df.loc[(cat, ts), ('nb_events_pred', 'nb_events_true')] = len(epdf), len(edf)
if len(edf) and len(epdf):
ttest_df.loc[(cat, ts), 'ttost_p'] = ttost_ind(epdf, edf,
......
import os, sys, argparse
from io import StringIO
from os.path import splitext, basename
import pandas as pd
__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
sys.path.append(__location__)
from helper_functions import parse_input_path, parse_output_dir
parser = argparse.ArgumentParser(description='Calculate discretized fret from discretized don/acc')
parser.add_argument('--ascii-dir', type=str, required=True)
parser.add_argument('--out-dir', type=str, required=True)
args = parser.parse_args()
out_dir = parse_output_dir(args.out_dir)
trace_fn_list = parse_input_path(args.ascii_dir, pattern='*.txt')
for tr_fn in trace_fn_list:
new_fn = out_dir + basename(tr_fn)
with open(tr_fn, 'r') as fh:
tr_txt = fh.read()
tr_txt = tr_txt.replace('\t\n', '\n')
trace_df = pd.read_csv(StringIO(tr_txt), sep='\t', index_col=None, )
trace_df.loc[:, 'FRET_1>2'] = trace_df.loc[:, 'I_2 at 376nm(counts)'] / (trace_df.loc[:, 'I_1 at 376nm(counts)'] + trace_df.loc[:, 'I_2 at 376nm(counts)'])
trace_df.loc[:, 'discr.FRET_1>2'] = trace_df.loc[:, 'discr.I_2 at 376nm(counts)'] / (trace_df.loc[:, 'discr.I_1 at 376nm(counts)'] + trace_df.loc[:, 'discr.I_2 at 376nm(counts)'])
trace_df = trace_df.loc[:, ('time at 376nm', 'I_1 at 376nm(counts)', 'I_2 at 376nm(counts)', 'FRET_1>2', 'discr.FRET_1>2')]
trace_df.to_csv(new_fn, sep='\t', index=False)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment