Commit 6d997453 authored by Schoorlemmer, Joran's avatar Schoorlemmer, Joran
Browse files

Replace extractcounts.py

parent 4fa1914c
......@@ -33,7 +33,7 @@ def parse_gtf(in_file, read_length = 100):
trans_len = abs(int(line_cont[4])-int(line_cont[3]))
gene_info = line_cont[8].split(' ')
if len(gene_info) == 16:
gene_name = gene_info[5].lstrip('"').rstrip('";')
gene_name = gene_info[9].lstrip('"').rstrip('";')
cov = float(gene_info[11].lstrip('"').rstrip('";'))
genes[gene_name] = round(cov * trans_len /read_length)
return genes
......@@ -52,9 +52,8 @@ def group_replicates(sample_names):
for f in f_names:
f = f.strip("\n")
# strip redundant info out of string
rep_dict_key = f[:-5]
#fn = f[:-4].split('-')
#rep_dict_key = fn[0] + fn[1][0:3]+ fn[1][4:]
fn = f[:-4].split('-')
rep_dict_key = fn[0] + fn[1][0:3]+ fn[1][4:]
if not rep_dict_key in rep_dict:
rep_dict[rep_dict_key] = {}
rep_dict[rep_dict_key][f] = parse_gtf('samples/'+ str(f))
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment