Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

v0.1.7 #91

Merged
merged 17 commits into from
Aug 6, 2021
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,10 @@ dandelion/logging/version.py
reassigned_all/
reassigned_filtered/

data/pbmc3k_raw.h5ad
test_mouse/test_mouse_heavy_igblast_db-pass.tsv
test_mouse/test_mouse_heavy_igblast_db-pass_germ-pass.tsv
test_mouse/test_mouse_light_igblast_db-pass.tsv
test_mouse/test_mouse_light_igblast_db-pass_germ-pass.tsv
container/database/blast/human/human_BCR_C.fasta.nin
container/database/blast/mouse/mouse_BCR_C.fasta.nin
8 changes: 2 additions & 6 deletions dandelion/preprocessing/_preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# @Author: kt16
# @Date: 2020-05-12 17:56:02
# @Last Modified by: Kelvin
# @Last Modified time: 2021-08-04 10:44:53
# @Last Modified time: 2021-08-05 23:28:58

import os
import pandas as pd
Expand Down Expand Up @@ -2002,7 +2002,6 @@ def filter_contigs(data: Union[Dandelion, pd.DataFrame, str],
filter_missing: bool = True,
productive_only: bool = True,
simple: bool = False,
locus: Optional[Literal['ig', 'tr-ab', 'tr-gd']] = None,
save: Optional[str] = None,
**kwargs) -> Tuple[Dandelion, AnnData]:
"""
Expand Down Expand Up @@ -2074,9 +2073,6 @@ def filter_contigs(data: Union[Dandelion, pd.DataFrame, str],
else:
dat = dat_.copy()

if locus is None:
locus = best_guess_locus(dat)

adata_ = adata.copy()

if 'cell_id' not in dat.columns:
Expand Down Expand Up @@ -2235,7 +2231,7 @@ def filter_contigs(data: Union[Dandelion, pd.DataFrame, str],
adata_.obs['contig_QC_pass'] = pd.Series(contig_check['contig_QC_pass'])

print('Initializing Dandelion object')
out_dat = Dandelion(data=_dat, locus=locus, **kwargs)
out_dat = Dandelion(data=_dat, **kwargs)
if data.__class__ == Dandelion:
out_dat.germline = data.germline

Expand Down
22 changes: 2 additions & 20 deletions dandelion/tools/_diversity.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# @Author: Kelvin
# @Date: 2020-08-13 21:08:53
# @Last Modified by: Kelvin
# @Last Modified time: 2021-07-31 22:16:53
# @Last Modified time: 2021-08-06 00:04:38

import pandas as pd
import numpy as np
Expand Down Expand Up @@ -128,7 +128,6 @@ def clone_diversity(
expanded_only: bool = False,
use_contracted: bool = False,
key_added: Optional[str] = None,
locus: Optional[Literal['ig', 'tr-ab', 'tr-gd']] = None,
**kwargs,
) -> Union[pd.DataFrame, Dandelion, AnnData]:
"""
Expand Down Expand Up @@ -174,8 +173,6 @@ def clone_diversity(
This is to try and preserve the single-cell properties of the network.
key_added : str, list, Optional
column names for output.
locus : str
Mode of data. Only used for method = 'gini', Accepts one of 'ig', 'tr-ab' or 'tr-gd'. None defaults to 'ig'.
**kwargs
passed to dandelion.tl.generate_nework
Returns
Expand All @@ -185,12 +182,6 @@ def clone_diversity(
if downsample is not None:
resample = True

if locus is None:
if self.__class__ == Dandelion:
locus = best_guess_locus(self.data)
else:
locus = 'ig'

if method == 'gini':
if update_obs_meta:
diversity_gini(self,
Expand All @@ -206,7 +197,6 @@ def clone_diversity(
expanded_only=expanded_only,
use_contracted=use_contracted,
key_added=key_added,
locus=locus,
**kwargs)
else:
return (diversity_gini(self,
Expand All @@ -222,7 +212,6 @@ def clone_diversity(
expanded_only=expanded_only,
use_contracted=use_contracted,
key_added=key_added,
locus=locus,
**kwargs))
if method == 'chao1':
if update_obs_meta:
Expand Down Expand Up @@ -382,7 +371,6 @@ def diversity_gini(self: Union[Dandelion, AnnData],
expanded_only: bool = False,
use_contracted: bool = False,
key_added: Optional[str] = None,
locus: Optional[Literal['ig', 'tr-ab', 'tr-gd']] = None,
**kwargs) -> Union[pd.DataFrame, Dandelion]:
"""
Compute B cell clones Gini indices.
Expand Down Expand Up @@ -423,8 +411,6 @@ def diversity_gini(self: Union[Dandelion, AnnData],
This is to try and preserve the single-cell properties of the network.
key_added : str, list, Optional
column names for output.
locus : str
Mode of data. Accepts one of 'ig', 'tr-ab' or 'tr-gd'. None defaults to 'ig'.
**kwargs
passed to dandelion.tl.generate_nework
Returns
Expand All @@ -444,7 +430,6 @@ def gini_indices(self: Dandelion,
expanded_only: bool = False,
contracted: bool = False,
key_added: Optional[str] = None,
locus: Optional[Literal['ig', 'tr-ab', 'tr-gd']] = None,
**kwargs) -> pd.DataFrame:
if self.__class__ == AnnData:
raise TypeError('Only Dandelion class object accepted.')
Expand Down Expand Up @@ -541,7 +526,7 @@ def gini_indices(self: Dandelion,
# clone size distribution
_dat = metadata[metadata[groupby] == g]
_data = data[data['cell_id'].isin(list(_dat.index))]
ddl_dat = Dandelion(_data, metadata=_dat, locus=locus)
ddl_dat = Dandelion(_data, metadata=_dat)
if resample:
sizelist = []
if self.__class__ == Dandelion:
Expand All @@ -552,7 +537,6 @@ def gini_indices(self: Dandelion,
clone_key=clonekey,
downsample=minsize,
verbose=False,
locus=locus,
**kwargs)
if met == 'clone_network':
n_n, v_s, c_s = clone_networkstats(
Expand Down Expand Up @@ -685,7 +669,6 @@ def gini_indices(self: Dandelion,
generate_network(ddl_dat,
clone_key=clonekey,
verbose=False,
locus=locus,
**kwargs)
n_n, v_s, c_s = clone_networkstats(
ddl_dat,
Expand Down Expand Up @@ -794,7 +777,6 @@ def transfer_gini_indices(self: Dandelion, gini_results: pd.DataFrame,
expanded_only=expanded_only,
contracted=use_contracted,
key_added=key_added,
locus=locus,
**kwargs)

if diversity_key is None:
Expand Down
39 changes: 8 additions & 31 deletions dandelion/tools/_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# @Author: Kelvin
# @Date: 2020-08-12 18:08:04
# @Last Modified by: Kelvin
# @Last Modified time: 2021-07-31 22:11:33
# @Last Modified time: 2021-08-06 00:24:57

import pandas as pd
import numpy as np
Expand All @@ -28,8 +28,6 @@ def generate_network(self: Union[Dandelion, pd.DataFrame, str],
min_size: int = 2,
downsample: Optional[int] = None,
verbose: bool = True,
locus: Optional[Literal['ig', 'tr-ab',
'tr-gd']] = None,
**kwargs) -> Dandelion:
"""
Generates a Levenshtein distance network based on full length VDJ sequence alignments for heavy and light chain(s).
Expand All @@ -49,8 +47,6 @@ def generate_network(self: Union[Dandelion, pd.DataFrame, str],
whether or not to downsample the number of cells prior to construction of network. If provided, cells will be randomly sampled to the integer provided. A new Dandelion class will be returned.
verbose : bool
whether or not to print the progress bars.
locus : str, Optional
Mode of data. Accepts one of 'ig', 'tr-ab' or 'tr-gd'. None defaults to 'ig'.
**kwargs
additional kwargs passed to options specified in `networkx.drawing.layout.spring_layout`.

Expand Down Expand Up @@ -81,9 +77,6 @@ def generate_network(self: Union[Dandelion, pd.DataFrame, str],
raise ValueError(
'Data does not contain clone information. Please run find_clones.')

if locus is None:
locus = best_guess_locus(dat)

dat = sanitize_data(dat, ignore=clonekey)

# calculate distance
Expand All @@ -108,21 +101,8 @@ def generate_network(self: Union[Dandelion, pd.DataFrame, str],

# So first, create a data frame to hold all possible (full) sequences split by
# heavy (only 1 possible for now) and light (multiple possible)
try:
dat_seq = retrieve_metadata(dat_,
query=key_,
split=True,
collapse=False,
locus=locus,
ignore=clonekey)
except:
dat_seq = retrieve_metadata(dat_,
query=key_,
split=True,
collapse=False,
locus=locus,
ignore=clonekey,
**kwargs)
querier = Query(dat_)
dat_seq = querier.retrieve(query = key_, retrieve_mode = 'split')
dat_seq.columns = [re.sub(key_ + '_', '', i) for i in dat_seq.columns]

# calculate a distance matrix for all vs all and this can be referenced later on to
Expand Down Expand Up @@ -158,9 +138,9 @@ def generate_network(self: Union[Dandelion, pd.DataFrame, str],
if self.__class__ == Dandelion:
out = self.copy()
if downsample is not None:
out = Dandelion(dat_, locus=locus)
out = Dandelion(dat_)
else: # re-initiate a Dandelion class object
out = Dandelion(dat_, locus=locus)
out = Dandelion(dat_)

tmp_totaldist = pd.DataFrame(total_dist,
index=dat_seq.index,
Expand Down Expand Up @@ -351,7 +331,7 @@ def generate_network(self: Union[Dandelion, pd.DataFrame, str],
deep=('Updated Dandelion object: \n'
' \'data\', contig-indexed clone table\n'
' \'metadata\', cell-indexed clone table\n'
' \'distance\', heavy and light chain distance matrices\n'
' \'distance\', distance matrices for VDJ- and VJ- chains\n'
' \'edges\', network edges\n'
' \'layout\', network layout\n'
' \'graph\', network'))
Expand All @@ -371,8 +351,7 @@ def generate_network(self: Union[Dandelion, pd.DataFrame, str],
edges=edge_list_final,
layout=(lyt, lyt_),
graph=(g, g_),
germline=germline_,
locus=locus)
germline=germline_)
out.threshold = threshold_
return (out)
else:
Expand All @@ -383,7 +362,6 @@ def generate_network(self: Union[Dandelion, pd.DataFrame, str],
layout=(lyt, lyt_),
graph=(g, g_),
germline=germline_,
locus=locus,
initialize=False)
self.threshold = threshold_
else:
Expand All @@ -393,8 +371,7 @@ def generate_network(self: Union[Dandelion, pd.DataFrame, str],
edges=edge_list_final,
layout=(lyt, lyt_),
graph=(g, g_),
clone_key=clone_key,
locus=locus)
clone_key=clone_key)
return (out)


Expand Down
Loading