### PopPoly method for diploid & polyploid haplotyping in F1 populations

parents
bamprocess.py 0 → 100755
This diff is collapsed.
branchprune.py 0 → 100755
This diff is collapsed.
This diff is collapsed.
genotypes.py 0 → 100755
This diff is collapsed.
haplotypes.py 0 → 100755
This diff is collapsed.
logprob.c 0 → 100755
This diff is collapsed.
logprob.pyx 0 → 100755
 # Written by Ehsan Motazedi, Wageningen UR, 05-02-2017. # Last Updated: 01-09-2017 import math import sys cpdef int _xor(x, y): return 0 if str(x)==str(y) else 1 cpdef Hamming(a, b, null_lst = [".", "-"]): """ Hamming distance and similarity between two strings or lists or tuples. (Hamming, Richard W., Error detecting and error correcting codes, Bell System technical journal 29.2 (1950), pp. 147-160).""" if isinstance(a, int): a = [a] if isinstance(b, int): b = [b] if len(a) != len(b): raise ValueError('The length of the two inputs must be equal for comparison!') if isinstance(a, str): a = list(a) if isinstance(b, str): b = list(b) hamvec = [_xor(x, y) for x, y in zip(a,b) if (str(x) not in null_lst) and (str(y) not in null_lst)] hamgelijk = hamvec.count(0) hamafstand = hamvec.count(1) return hamafstand, hamgelijk cpdef diffVec(a, b, null_lst = [".","-"]): """ Obtain differences between two lists. Return two vectors: one specifying positions of similarity and one positions of difference.""" if isinstance(a, int): a = [a] if isinstance(b, int): b = [b] if len(a) != len(b): raise ValueError('The length of the two inputs must be equal for comparison!') if isinstance(a, str): a = list(a) if isinstance(b, str): b = list(b) ongelijk, gelijk = [], [] for n, xy in enumerate(zip(a,b)): if (str(xy) in null_lst) or (str(xy) in null_lst): continue if _xor(*xy)==1: ongelijk.append(n) else: gelijk.append(n) return ongelijk, gelijk
logprob.so 0 → 100755
logprob2.c 0 → 100755
This diff is collapsed.
logprob2.pyx 0 → 100755
 # Written by Ehsan Motazedi, Wageningen UR, 06-02-2017. # Last Updated: 21-02-2017 import sys from logprob import diffVec, Hamming from haplotypes import Haplotypes from libc.math cimport log, exp from reads import Read cdef extern from "math.h": float INFINITY cpdef double loge(double x): """ Return log x in base e.""" try: return log(x) except ValueError as e: garbage = sys.stderr.write("WARNING: {0}\n".format(e.args)) return -1*INFINITY def veclog(vec): """ Logarithm function defined on vector space.""" try: return map(loge, vec) except TypeError: return loge(vec) def GetlogProb(r, Vset, eps, qual = None, counts = False, min_r_length=2): """ Return the log probability of a read conditional on a presumed haplotype and variant error rate, i.e. P(r|Vset, eps) (Berger et al. 2014, p. 4). If counts if True, assign the read to the most compatible homologue (count 1 for that homologue and zero of the others) to later calculate the number of reads compatible with each homologue for a set of reads by the upstream functions.""" ar = r.GetalleleSet(Vset.GetStart(), Vset.GetStop()) if len([_allele for _allele in ar if _allele not in set(['-','.'])])
logprob2.so 0 → 100755