400 lines
16 KiB
Diff
400 lines
16 KiB
Diff
diff --git a/src/cuffmerge b/src/cuffmerge
|
|
index e12f232..452df65 100755
|
|
--- a/src/cuffmerge
|
|
+++ b/src/cuffmerge
|
|
@@ -1,4 +1,4 @@
|
|
-#!/usr/bin/env python
|
|
+#!/usr/bin/python3
|
|
# encoding: utf-8
|
|
"""
|
|
cuffmerge.py
|
|
@@ -6,6 +6,8 @@ cuffmerge.py
|
|
Created by Cole Trapnell on 2011-03-17.
|
|
Copyright (c) 2011 Cole Trapnell. All rights reserved.
|
|
"""
|
|
+from __future__ import print_function
|
|
+from __future__ import absolute_import
|
|
|
|
import sys
|
|
import getopt
|
|
@@ -17,6 +19,9 @@ import os
|
|
import tempfile
|
|
import warnings
|
|
import types
|
|
+from operator import itemgetter
|
|
+from tempfile import mktemp
|
|
+
|
|
|
|
help_message = '''
|
|
cuffmerge takes two or more Cufflinks GTF files and merges them into a
|
|
@@ -92,7 +97,7 @@ class TestParams:
|
|
"num-threads=",
|
|
"keep-tmp",
|
|
"min-isoform-fraction="])
|
|
- except getopt.error, msg:
|
|
+ except getopt.error as msg:
|
|
raise Usage(msg)
|
|
|
|
self.system_params.parse_options(opts)
|
|
@@ -104,7 +109,7 @@ class TestParams:
|
|
# option processing
|
|
for option, value in opts:
|
|
if option in ("-v", "--version"):
|
|
- print "merge_cuff_asms v%s" % (get_version())
|
|
+ print("merge_cuff_asms v%s" % (get_version()))
|
|
exit(0)
|
|
if option in ("-h", "--help"):
|
|
raise Usage(help_message)
|
|
@@ -128,17 +133,17 @@ def right_now():
|
|
|
|
def prepare_output_dir():
|
|
|
|
- print >> sys.stderr, "[%s] Preparing output location %s" % (right_now(), output_dir)
|
|
+ print("[%s] Preparing output location %s" % (right_now(), output_dir), file=sys.stderr)
|
|
if os.path.exists(output_dir):
|
|
pass
|
|
else:
|
|
os.makedirs(output_dir)
|
|
|
|
- #print >> sys.stderr, "Checking for %s", logging_dir
|
|
+ #print("Checking for %s", logging_dir, file=sys.stderr)
|
|
if os.path.exists(logging_dir):
|
|
pass
|
|
else:
|
|
- #print >> sys.stderr, "Creating %s", logging_dir
|
|
+ #print("Creating %s", logging_dir, file=sys.stderr)
|
|
os.makedirs(logging_dir)
|
|
|
|
if os.path.exists(tmp_dir):
|
|
@@ -158,7 +163,7 @@ def tmp_name(prefix):
|
|
pass
|
|
else:
|
|
os.mkdir(tmp_root)
|
|
- return tmp_root + prefix + os.tmpnam().split('/')[-1]
|
|
+ return tmp_root + prefix + mktemp().split(os.sep)[-1]
|
|
|
|
def cufflinks(out_dir,
|
|
sam_file,
|
|
@@ -168,9 +173,9 @@ def cufflinks(out_dir,
|
|
lsf=False,
|
|
curr_queue=None):
|
|
if gtf_file != None:
|
|
- print >> sys.stderr, "[%s] Quantitating transcripts" % (right_now())
|
|
+ print("[%s] Quantitating transcripts" % (right_now()), file=sys.stderr)
|
|
else:
|
|
- print >> sys.stderr, "[%s] Assembling transcripts" % (right_now())
|
|
+ print("[%s] Assembling transcripts" % (right_now()), file=sys.stderr)
|
|
|
|
cmd = ["cufflinks"]
|
|
|
|
@@ -191,20 +196,20 @@ def cufflinks(out_dir,
|
|
cmd.append(sam_file)
|
|
|
|
try:
|
|
- print >> run_log, " ".join(cmd)
|
|
+ print(" ".join(cmd), file=run_log)
|
|
ret = subprocess.call(cmd)
|
|
if ret != 0:
|
|
- print >> sys.stderr, fail_str, "Error: could not execute cufflinks"
|
|
+ print(fail_str, "Error: could not execute cufflinks", file=sys.stderr)
|
|
exit(1)
|
|
# cufflinks not found
|
|
- except OSError, o:
|
|
+ except OSError as o:
|
|
if o.errno == errno.ENOTDIR or o.errno == errno.ENOENT:
|
|
- print >> sys.stderr, fail_str, "Error: cufflinks not found on this system. Did you forget to include it in your PATH?"
|
|
+ print(fail_str, "Error: cufflinks not found on this system. Did you forget to include it in your PATH?", file=sys.stderr)
|
|
exit(1)
|
|
|
|
def cuffcompare(prefix, ref_gtf, fasta, cuff_gtf):
|
|
|
|
- print >> sys.stderr, "[%s] Comparing reference %s to assembly %s" % (right_now(), ref_gtf, cuff_gtf)
|
|
+ print("[%s] Comparing reference %s to assembly %s" % (right_now(), ref_gtf, cuff_gtf), file=sys.stderr)
|
|
cmd = ["cuffcompare"]
|
|
|
|
if prefix != None:
|
|
@@ -213,22 +218,22 @@ def cuffcompare(prefix, ref_gtf, fasta, cuff_gtf):
|
|
cmd.extend(["-r", ref_gtf])
|
|
if fasta != None:
|
|
cmd.extend(["-s", fasta])
|
|
- if type(cuff_gtf) == types.ListType:
|
|
+ if isinstance(cuff_gtf, list):
|
|
for g in cuff_gtf:
|
|
cmd.extend([g])
|
|
else:
|
|
cmd.extend([cuff_gtf])
|
|
|
|
try:
|
|
- print >> run_log, " ".join(cmd)
|
|
+ print(" ".join(cmd), file=run_log)
|
|
ret = subprocess.call(cmd)
|
|
if ret != 0:
|
|
- print >> sys.stderr, fail_str, "Error: could not execute cuffcompare"
|
|
+ print(fail_str, "Error: could not execute cuffcompare", file=sys.stderr)
|
|
exit(1)
|
|
# cuffcompare not found
|
|
- except OSError, o:
|
|
+ except OSError as o:
|
|
if o.errno == errno.ENOTDIR or o.errno == errno.ENOENT:
|
|
- print >> sys.stderr, fail_str, "Error: cuffcompare not found on this system. Did you forget to include it in your PATH?"
|
|
+ print(fail_str, "Error: cuffcompare not found on this system. Did you forget to include it in your PATH?", file=sys.stderr)
|
|
exit(1)
|
|
|
|
def gtf_to_sam(gtf_filename):
|
|
@@ -240,15 +245,15 @@ def gtf_to_sam(gtf_filename):
|
|
cmd.append(gtf_filename)
|
|
cmd.append(sam_out)
|
|
try:
|
|
- print >> run_log, " ".join(cmd)
|
|
+ print(" ".join(cmd), file=run_log)
|
|
ret = subprocess.call(cmd)
|
|
if ret != 0:
|
|
- print >> sys.stderr, fail_str, "Error: could not execute gtf_to_sam"
|
|
+ print(fail_str, "Error: could not execute gtf_to_sam", file=sys.stderr)
|
|
exit(1)
|
|
# gtf_to_sam not found
|
|
- except OSError, o:
|
|
+ except OSError as o:
|
|
if o.errno == errno.ENOTDIR or o.errno == errno.ENOENT:
|
|
- print >> sys.stderr, fail_str, "Error: gtf_to_sam not found on this system. Did you forget to include it in your PATH?"
|
|
+ print(fail_str, "Error: gtf_to_sam not found on this system. Did you forget to include it in your PATH?", file=sys.stderr)
|
|
exit(1)
|
|
return sam_out
|
|
|
|
@@ -268,9 +273,9 @@ def test_input_files(filename_list):
|
|
g = open(line,"r")
|
|
input_files.append(line)
|
|
|
|
- except OSError, o:
|
|
+ except OSError as o:
|
|
if o.errno == errno.ENOTDIR or o.errno == errno.ENOENT:
|
|
- print >> sys.stderr, fail_str, "Error: could not open %s" % line
|
|
+ print(fail_str, "Error: could not open %s" % line, file=sys.stderr)
|
|
OK = False
|
|
if not OK:
|
|
sys.exit(1)
|
|
@@ -279,16 +284,16 @@ def test_input_files(filename_list):
|
|
def convert_gtf_to_sam(gtf_filename_list):
|
|
"""This function takes a list of GTF files, converts them all to
|
|
temporary SAM files, and returns the list of temporary file names."""
|
|
- print >> sys.stderr, "[%s] Converting GTF files to SAM" % (right_now())
|
|
+ print("[%s] Converting GTF files to SAM" % (right_now()), file=sys.stderr)
|
|
OK = True
|
|
sam_input_filenames = []
|
|
for line in gtf_filename_list:
|
|
try:
|
|
sam_out = gtf_to_sam(line)
|
|
sam_input_filenames.append(sam_out)
|
|
- except OSError, o:
|
|
+ except OSError as o:
|
|
if o.errno == errno.ENOTDIR or o.errno == errno.ENOENT:
|
|
- print >> sys.stderr, fail_str, "Error: could not open %s" % line
|
|
+ print(fail_str, "Error: could not open %s" % line, file=sys.stderr)
|
|
OK = False
|
|
if not OK:
|
|
sys.exit(1)
|
|
@@ -299,12 +304,12 @@ def merge_sam_inputs(sam_input_list, header):
|
|
|
|
sorted_map = open(sorted_map_name, "w")
|
|
|
|
- #print header
|
|
+ #print(header)
|
|
|
|
# The header was built from a dict keyed by chrom, so
|
|
# the records will be lexicographically ordered and
|
|
# should match the BAM after the sort below.
|
|
- print >> sorted_map, header,
|
|
+ print(header, end=' ', file=sorted_map)
|
|
|
|
sorted_map.close()
|
|
sort_cmd =["sort",
|
|
@@ -315,48 +320,49 @@ def merge_sam_inputs(sam_input_list, header):
|
|
"--temporary-directory="+tmp_dir]
|
|
sort_cmd.extend(sam_input_list)
|
|
|
|
- print >> run_log, " ".join(sort_cmd), ">", sorted_map_name
|
|
+ print(" ".join(sort_cmd), ">", sorted_map_name, file=run_log)
|
|
subprocess.call(sort_cmd,
|
|
stdout=open(sorted_map_name, "a"))
|
|
return sorted_map_name
|
|
|
|
def compare_to_reference(meta_asm_gtf, ref_gtf, fasta):
|
|
- print >> sys.stderr, "[%s] Comparing against reference file %s" % (right_now(), ref_gtf)
|
|
+ global tmp_dir
|
|
+ print("[%s] Comparing against reference file %s" % (right_now(), ref_gtf), file=sys.stderr)
|
|
ref_str = ""
|
|
if ref_gtf != None:
|
|
ref_str = " -r %s " % ref_gtf
|
|
|
|
if fasta != None:
|
|
- comp_cmd = '''cuffcompare -o tmp_meta_asm -C -G %s -s %s %s''' % (ref_str, fasta, meta_asm_gtf)
|
|
+ comp_cmd = '''cuffcompare -o %s -C -G %s -s %s %s''' % (tmp_dir, ref_str, fasta, meta_asm_gtf)
|
|
else:
|
|
- comp_cmd = '''cuffcompare -o tmp_meta_asm -C -G %s %s''' % (ref_str, meta_asm_gtf)
|
|
+ comp_cmd = '''cuffcompare -o %s -C -G %s %s''' % (tmp_dir, ref_str, meta_asm_gtf)
|
|
|
|
#cmd = bsub_cmd(comp_cmd, "/gencode_cmp", True, job_mem=8)
|
|
cmd = comp_cmd
|
|
|
|
try:
|
|
- print >> run_log, cmd
|
|
+ print(cmd, file=run_log)
|
|
ret = subprocess.call(cmd,shell=True)
|
|
if ret != 0:
|
|
- print >> sys.stderr, fail_str, "Error: could not execute cuffcompare"
|
|
+ print(fail_str, "Error: could not execute cuffcompare", file=sys.stderr)
|
|
exit(1)
|
|
#tmap_out = meta_asm_gtf.split("/")[-1] + ".tmap"
|
|
tfpath, tfname = os.path.split(meta_asm_gtf)
|
|
if tfpath: tfpath+='/'
|
|
- tmap_out = tfpath+'tmp_meta_asm.'+tfname+".tmap"
|
|
+ tmap_out = tfpath+'.'+tfname+".tmap"
|
|
return tmap_out
|
|
# cuffcompare not found
|
|
- except OSError, o:
|
|
+ except OSError as o:
|
|
if o.errno == errno.ENOTDIR or o.errno == errno.ENOENT:
|
|
- print >> sys.stderr, fail_str, "Error: cuffcompare not found on this system. Did you forget to include it in your PATH?"
|
|
+ print(fail_str, "Error: cuffcompare not found on this system. Did you forget to include it in your PATH?", file=sys.stderr)
|
|
exit(1)
|
|
|
|
def select_gtf(gtf_in_filename, ids, gtf_out_filename):
|
|
f_gtf = open(gtf_in_filename)
|
|
- #print >> sys.stderr, "Select GTF: Ids are: "
|
|
- #print >> sys.stderr, ids
|
|
- #print >> sys.stderr, "reference gtf file name:"
|
|
- #print >> sys.stderr, gtf_in_filename
|
|
+ #print("Select GTF: Ids are: ", file=sys.stderr)
|
|
+ #print(ids, file=sys.stderr)
|
|
+ #print("reference gtf file name:", file=sys.stderr)
|
|
+ #print(gtf_in_filename, file=sys.stderr)
|
|
out_gtf = open(gtf_out_filename, "w")
|
|
for line in f_gtf:
|
|
line = line.strip()
|
|
@@ -370,13 +376,13 @@ def select_gtf(gtf_in_filename, ids, gtf_out_filename):
|
|
first_quote = col.find('"')
|
|
last_quote = col.find('"', first_quote + 1)
|
|
transcript = col[first_quote + 1:last_quote]
|
|
- #print >> sys.stderr, transcript
|
|
+ #print(transcript, file=sys.stderr)
|
|
if transcript in ids:
|
|
- print >> out_gtf, line
|
|
+ print(line, file=out_gtf)
|
|
|
|
|
|
def merge_gtfs(gtf_filenames, merged_gtf, ref_gtf=None):
|
|
- print >> sys.stderr, "[%s] Merging linc gtf files with cuffcompare" % (right_now())
|
|
+ print("[%s] Merging linc gtf files with cuffcompare" % (right_now()), file=sys.stderr)
|
|
cmd = ["cuffcompare"]
|
|
|
|
cmd.extend(["-o", merged_gtf])
|
|
@@ -388,24 +394,24 @@ def merge_gtfs(gtf_filenames, merged_gtf, ref_gtf=None):
|
|
#cmd = bsub_cmd(cmd, "/merge_gtf", True, job_mem=8)
|
|
|
|
try:
|
|
- print >> run_log, cmd
|
|
+ print(cmd, file=run_log)
|
|
ret = subprocess.call(cmd, shell=True)
|
|
if ret != 0:
|
|
- print >> sys.stderr, fail_str, "Error: could not execute cuffcompare"
|
|
+ print(fail_str, "Error: could not execute cuffcompare", file=sys.stderr)
|
|
exit(1)
|
|
return merged_gtf + ".combined.gtf"
|
|
# cuffcompare not found
|
|
- except OSError, o:
|
|
+ except OSError as o:
|
|
if o.errno == errno.ENOTDIR or o.errno == errno.ENOENT:
|
|
- print >> sys.stderr, fail_str, "Error: cuffcompare not found on this system. Did you forget to include it in your PATH?"
|
|
+ print(fail_str, "Error: cuffcompare not found on this system. Did you forget to include it in your PATH?", file=sys.stderr)
|
|
exit(1)
|
|
|
|
def compare_meta_asm_against_ref(ref_gtf, fasta_file, gtf_input_file, class_codes=["c", "i", "r", "p", "e"]):
|
|
- #print >> sys.stderr, "Cuffcmpare all assemblies GTFs"
|
|
+ #print("Cuffcmpare all assemblies GTFs", file=sys.stderr)
|
|
|
|
tmap = compare_to_reference(gtf_input_file, ref_gtf, fasta_file)
|
|
|
|
- #print >> sys.stderr, "Cuffcmpare all assemblies GTFs : filter %s" % ",".join(class_codes)
|
|
+ #print("Cuffcmpare all assemblies GTFs : filter %s" % ",".join(class_codes), file=sys.stderr)
|
|
selected_ids= set([])
|
|
f_tmap = open(tmap)
|
|
#out = open("tmp_meta_asm_selectedIds.txt", "w")
|
|
@@ -434,15 +440,10 @@ def compare_meta_asm_against_ref(ref_gtf, fasta_file, gtf_input_file, class_code
|
|
if os.path.exists(mtmap.split(".tmap")[0]+".refmap"):
|
|
os.remove(mtmap.split(".tmap")[0]+".refmap")
|
|
|
|
- shutil.move("tmp_meta_asm.combined.gtf", output_dir + "/merged.gtf")
|
|
+ global tmp_dir
|
|
+ shutil.move(tmp_dir+".combined.gtf", output_dir + "/merged.gtf")
|
|
|
|
# os.remove("tmp_meta_asm.combined.gtf")
|
|
- if os.path.exists("tmp_meta_asm.loci"):
|
|
- os.remove("tmp_meta_asm.loci")
|
|
- if os.path.exists("tmp_meta_asm.tracking"):
|
|
- os.remove("tmp_meta_asm.tracking")
|
|
- if os.path.exists("tmp_meta_asm.stats"):
|
|
- os.remove("tmp_meta_asm.stats")
|
|
if os.path.exists(tmap):
|
|
os.remove(tmap)
|
|
if os.path.exists(tmap.split(".tmap")[0]+".refmap"):
|
|
@@ -484,9 +485,8 @@ def get_gtf_chrom_info(gtf_filename, known_chrom_info=None):
|
|
|
|
def header_for_chrom_info(chrom_info):
|
|
header_strs = ["""@HD\tVN:1.0\tSO:coordinate"""]
|
|
- chrom_list = [(chrom, limits) for chrom, limits in chrom_info.iteritems()]
|
|
- chrom_list.sort(lambda x,y: cmp(x[0],y[0]))
|
|
- #print chrom_list
|
|
+ chrom_list = sorted(chrom_info.items(), key=itemgetter(0))
|
|
+ #print(chrom_list)
|
|
for chrom, limits in chrom_list:
|
|
line = "@SQ\tSN:%s\tLN:\t%d" % (chrom, limits[1])
|
|
header_strs.append(line)
|
|
@@ -510,29 +510,29 @@ def main(argv=None):
|
|
params.check()
|
|
|
|
if len(args) < 1:
|
|
- raise(Usage(help_message))
|
|
+ raise Usage(help_message)
|
|
|
|
global run_log
|
|
global run_cmd
|
|
|
|
- print >> sys.stderr
|
|
- print >> sys.stderr, "[%s] Beginning transcriptome assembly merge" % (right_now())
|
|
- print >> sys.stderr, "-------------------------------------------"
|
|
- print >> sys.stderr
|
|
+ print(file=sys.stderr)
|
|
+ print("[%s] Beginning transcriptome assembly merge" % (right_now()), file=sys.stderr)
|
|
+ print("-------------------------------------------", file=sys.stderr)
|
|
+ print(file=sys.stderr)
|
|
|
|
start_time = datetime.now()
|
|
prepare_output_dir()
|
|
|
|
run_log = open(logging_dir + "run.log", "w", 0)
|
|
run_cmd = " ".join(argv)
|
|
- print >> run_log, run_cmd
|
|
+ print(run_cmd, file=run_log)
|
|
|
|
transfrag_list_file = open(args[0], "r")
|
|
|
|
if params.ref_gtf != None:
|
|
test_input_files([params.ref_gtf])
|
|
else:
|
|
- print >> sys.stderr, "Warning: no reference GTF provided!"
|
|
+ print("Warning: no reference GTF provided!", file=sys.stderr)
|
|
|
|
# Check that all the primary assemblies are accessible before starting the time consuming stuff
|
|
gtf_input_files = test_input_files(transfrag_list_file)
|
|
@@ -571,8 +571,8 @@ def main(argv=None):
|
|
os.remove(output_dir + "/skipped.gtf")
|
|
os.remove(output_dir + "/genes.fpkm_tracking")
|
|
os.remove(output_dir + "/isoforms.fpkm_tracking")
|
|
- except Usage, err:
|
|
- print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err.msg)
|
|
+ except Usage as err:
|
|
+ print(sys.argv[0].split("/")[-1] + ": " + str(err.msg), file=sys.stderr)
|
|
return 2
|
|
|
|
|