Commit 86bd2cbc authored by Jorge Navarro Muñoz's avatar Jorge Navarro Muñoz
Browse files

GCC: annotated GCF label instead of GCF number in the clans .tsv file

parent 1146bf36
...@@ -1689,6 +1689,7 @@ def clusterJsonBatch(bgcs, pathBase, className, matrix, pos_alignments, cutoffs= ...@@ -1689,6 +1689,7 @@ def clusterJsonBatch(bgcs, pathBase, className, matrix, pos_alignments, cutoffs=
except UnboundLocalError: except UnboundLocalError:
# Noticed this could happen if the sequences are exactly # Noticed this could happen if the sequences are exactly
# the same and all distances == 0 # the same and all distances == 0
if verbose:
print(" Warning: Unable to root at midpoint file {}".format(newick_file_path)) print(" Warning: Unable to root at midpoint file {}".format(newick_file_path))
pass pass
newick = tree.format("newick") newick = tree.format("newick")
...@@ -1699,7 +1700,7 @@ def clusterJsonBatch(bgcs, pathBase, className, matrix, pos_alignments, cutoffs= ...@@ -1699,7 +1700,7 @@ def clusterJsonBatch(bgcs, pathBase, className, matrix, pos_alignments, cutoffs=
newick_trees[exemplar_idx] = newick newick_trees[exemplar_idx] = newick
### Use the 0.5 distance cutoff to cluster clans by default ### - - - GCC - - -
bs_similarity_families = [] bs_similarity_families = []
if clusterClans and cutoff == clanClassificationCutoff: if clusterClans and cutoff == clanClassificationCutoff:
# Detect if there's only 1 GCF. It makes pySAPC crash # Detect if there's only 1 GCF. It makes pySAPC crash
...@@ -1750,7 +1751,8 @@ def clusterJsonBatch(bgcs, pathBase, className, matrix, pos_alignments, cutoffs= ...@@ -1750,7 +1751,8 @@ def clusterJsonBatch(bgcs, pathBase, className, matrix, pos_alignments, cutoffs=
# affinity propagation can fail in some circumstances (e.g. only singletons) # affinity propagation can fail in some circumstances (e.g. only singletons)
if exemplarsClans is not None: if exemplarsClans is not None:
clanLabels = [exemplarsClans[labelsClans[i]] for i in range(len(familyIdx))] # translate and record GCF label instead of GCF number
clanLabels = [familyIdx[exemplarsClans[labelsClans[i]]] for i in range(len(familyIdx))]
else: else:
clanLabels = [] clanLabels = []
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment