cufflinks/0007-py2to3.patch
He Rengui 3f22db1fe6 init
2021-03-31 08:53:30 +08:00

400 lines
16 KiB
Diff

diff --git a/src/cuffmerge b/src/cuffmerge
index e12f232..452df65 100755
--- a/src/cuffmerge
+++ b/src/cuffmerge
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
# encoding: utf-8
"""
cuffmerge.py
@@ -6,6 +6,8 @@ cuffmerge.py
Created by Cole Trapnell on 2011-03-17.
Copyright (c) 2011 Cole Trapnell. All rights reserved.
"""
+from __future__ import print_function
+from __future__ import absolute_import
import sys
import getopt
@@ -17,6 +19,9 @@ import os
import tempfile
import warnings
import types
+from operator import itemgetter
+from tempfile import mktemp
+
help_message = '''
cuffmerge takes two or more Cufflinks GTF files and merges them into a
@@ -92,7 +97,7 @@ class TestParams:
"num-threads=",
"keep-tmp",
"min-isoform-fraction="])
- except getopt.error, msg:
+ except getopt.error as msg:
raise Usage(msg)
self.system_params.parse_options(opts)
@@ -104,7 +109,7 @@ class TestParams:
# option processing
for option, value in opts:
if option in ("-v", "--version"):
- print "merge_cuff_asms v%s" % (get_version())
+ print("merge_cuff_asms v%s" % (get_version()))
exit(0)
if option in ("-h", "--help"):
raise Usage(help_message)
@@ -128,17 +133,17 @@ def right_now():
def prepare_output_dir():
- print >> sys.stderr, "[%s] Preparing output location %s" % (right_now(), output_dir)
+ print("[%s] Preparing output location %s" % (right_now(), output_dir), file=sys.stderr)
if os.path.exists(output_dir):
pass
else:
os.makedirs(output_dir)
- #print >> sys.stderr, "Checking for %s", logging_dir
+ #print("Checking for %s", logging_dir, file=sys.stderr)
if os.path.exists(logging_dir):
pass
else:
- #print >> sys.stderr, "Creating %s", logging_dir
+ #print("Creating %s", logging_dir, file=sys.stderr)
os.makedirs(logging_dir)
if os.path.exists(tmp_dir):
@@ -158,7 +163,7 @@ def tmp_name(prefix):
pass
else:
os.mkdir(tmp_root)
- return tmp_root + prefix + os.tmpnam().split('/')[-1]
+ return tmp_root + prefix + mktemp().split(os.sep)[-1]
def cufflinks(out_dir,
sam_file,
@@ -168,9 +173,9 @@ def cufflinks(out_dir,
lsf=False,
curr_queue=None):
if gtf_file != None:
- print >> sys.stderr, "[%s] Quantitating transcripts" % (right_now())
+ print("[%s] Quantitating transcripts" % (right_now()), file=sys.stderr)
else:
- print >> sys.stderr, "[%s] Assembling transcripts" % (right_now())
+ print("[%s] Assembling transcripts" % (right_now()), file=sys.stderr)
cmd = ["cufflinks"]
@@ -191,20 +196,20 @@ def cufflinks(out_dir,
cmd.append(sam_file)
try:
- print >> run_log, " ".join(cmd)
+ print(" ".join(cmd), file=run_log)
ret = subprocess.call(cmd)
if ret != 0:
- print >> sys.stderr, fail_str, "Error: could not execute cufflinks"
+ print(fail_str, "Error: could not execute cufflinks", file=sys.stderr)
exit(1)
# cufflinks not found
- except OSError, o:
+ except OSError as o:
if o.errno == errno.ENOTDIR or o.errno == errno.ENOENT:
- print >> sys.stderr, fail_str, "Error: cufflinks not found on this system. Did you forget to include it in your PATH?"
+ print(fail_str, "Error: cufflinks not found on this system. Did you forget to include it in your PATH?", file=sys.stderr)
exit(1)
def cuffcompare(prefix, ref_gtf, fasta, cuff_gtf):
- print >> sys.stderr, "[%s] Comparing reference %s to assembly %s" % (right_now(), ref_gtf, cuff_gtf)
+ print("[%s] Comparing reference %s to assembly %s" % (right_now(), ref_gtf, cuff_gtf), file=sys.stderr)
cmd = ["cuffcompare"]
if prefix != None:
@@ -213,22 +218,22 @@ def cuffcompare(prefix, ref_gtf, fasta, cuff_gtf):
cmd.extend(["-r", ref_gtf])
if fasta != None:
cmd.extend(["-s", fasta])
- if type(cuff_gtf) == types.ListType:
+ if isinstance(cuff_gtf, list):
for g in cuff_gtf:
cmd.extend([g])
else:
cmd.extend([cuff_gtf])
try:
- print >> run_log, " ".join(cmd)
+ print(" ".join(cmd), file=run_log)
ret = subprocess.call(cmd)
if ret != 0:
- print >> sys.stderr, fail_str, "Error: could not execute cuffcompare"
+ print(fail_str, "Error: could not execute cuffcompare", file=sys.stderr)
exit(1)
# cuffcompare not found
- except OSError, o:
+ except OSError as o:
if o.errno == errno.ENOTDIR or o.errno == errno.ENOENT:
- print >> sys.stderr, fail_str, "Error: cuffcompare not found on this system. Did you forget to include it in your PATH?"
+ print(fail_str, "Error: cuffcompare not found on this system. Did you forget to include it in your PATH?", file=sys.stderr)
exit(1)
def gtf_to_sam(gtf_filename):
@@ -240,15 +245,15 @@ def gtf_to_sam(gtf_filename):
cmd.append(gtf_filename)
cmd.append(sam_out)
try:
- print >> run_log, " ".join(cmd)
+ print(" ".join(cmd), file=run_log)
ret = subprocess.call(cmd)
if ret != 0:
- print >> sys.stderr, fail_str, "Error: could not execute gtf_to_sam"
+ print(fail_str, "Error: could not execute gtf_to_sam", file=sys.stderr)
exit(1)
# gtf_to_sam not found
- except OSError, o:
+ except OSError as o:
if o.errno == errno.ENOTDIR or o.errno == errno.ENOENT:
- print >> sys.stderr, fail_str, "Error: gtf_to_sam not found on this system. Did you forget to include it in your PATH?"
+ print(fail_str, "Error: gtf_to_sam not found on this system. Did you forget to include it in your PATH?", file=sys.stderr)
exit(1)
return sam_out
@@ -268,9 +273,9 @@ def test_input_files(filename_list):
g = open(line,"r")
input_files.append(line)
- except OSError, o:
+ except OSError as o:
if o.errno == errno.ENOTDIR or o.errno == errno.ENOENT:
- print >> sys.stderr, fail_str, "Error: could not open %s" % line
+ print(fail_str, "Error: could not open %s" % line, file=sys.stderr)
OK = False
if not OK:
sys.exit(1)
@@ -279,16 +284,16 @@ def test_input_files(filename_list):
def convert_gtf_to_sam(gtf_filename_list):
"""This function takes a list of GTF files, converts them all to
temporary SAM files, and returns the list of temporary file names."""
- print >> sys.stderr, "[%s] Converting GTF files to SAM" % (right_now())
+ print("[%s] Converting GTF files to SAM" % (right_now()), file=sys.stderr)
OK = True
sam_input_filenames = []
for line in gtf_filename_list:
try:
sam_out = gtf_to_sam(line)
sam_input_filenames.append(sam_out)
- except OSError, o:
+ except OSError as o:
if o.errno == errno.ENOTDIR or o.errno == errno.ENOENT:
- print >> sys.stderr, fail_str, "Error: could not open %s" % line
+ print(fail_str, "Error: could not open %s" % line, file=sys.stderr)
OK = False
if not OK:
sys.exit(1)
@@ -299,12 +304,12 @@ def merge_sam_inputs(sam_input_list, header):
sorted_map = open(sorted_map_name, "w")
- #print header
+ #print(header)
# The header was built from a dict keyed by chrom, so
# the records will be lexicographically ordered and
# should match the BAM after the sort below.
- print >> sorted_map, header,
+ print(header, end=' ', file=sorted_map)
sorted_map.close()
sort_cmd =["sort",
@@ -315,48 +320,49 @@ def merge_sam_inputs(sam_input_list, header):
"--temporary-directory="+tmp_dir]
sort_cmd.extend(sam_input_list)
- print >> run_log, " ".join(sort_cmd), ">", sorted_map_name
+ print(" ".join(sort_cmd), ">", sorted_map_name, file=run_log)
subprocess.call(sort_cmd,
stdout=open(sorted_map_name, "a"))
return sorted_map_name
def compare_to_reference(meta_asm_gtf, ref_gtf, fasta):
- print >> sys.stderr, "[%s] Comparing against reference file %s" % (right_now(), ref_gtf)
+ global tmp_dir
+ print("[%s] Comparing against reference file %s" % (right_now(), ref_gtf), file=sys.stderr)
ref_str = ""
if ref_gtf != None:
ref_str = " -r %s " % ref_gtf
if fasta != None:
- comp_cmd = '''cuffcompare -o tmp_meta_asm -C -G %s -s %s %s''' % (ref_str, fasta, meta_asm_gtf)
+ comp_cmd = '''cuffcompare -o %s -C -G %s -s %s %s''' % (tmp_dir, ref_str, fasta, meta_asm_gtf)
else:
- comp_cmd = '''cuffcompare -o tmp_meta_asm -C -G %s %s''' % (ref_str, meta_asm_gtf)
+ comp_cmd = '''cuffcompare -o %s -C -G %s %s''' % (tmp_dir, ref_str, meta_asm_gtf)
#cmd = bsub_cmd(comp_cmd, "/gencode_cmp", True, job_mem=8)
cmd = comp_cmd
try:
- print >> run_log, cmd
+ print(cmd, file=run_log)
ret = subprocess.call(cmd,shell=True)
if ret != 0:
- print >> sys.stderr, fail_str, "Error: could not execute cuffcompare"
+ print(fail_str, "Error: could not execute cuffcompare", file=sys.stderr)
exit(1)
#tmap_out = meta_asm_gtf.split("/")[-1] + ".tmap"
tfpath, tfname = os.path.split(meta_asm_gtf)
if tfpath: tfpath+='/'
- tmap_out = tfpath+'tmp_meta_asm.'+tfname+".tmap"
+ tmap_out = tfpath+'.'+tfname+".tmap"
return tmap_out
# cuffcompare not found
- except OSError, o:
+ except OSError as o:
if o.errno == errno.ENOTDIR or o.errno == errno.ENOENT:
- print >> sys.stderr, fail_str, "Error: cuffcompare not found on this system. Did you forget to include it in your PATH?"
+ print(fail_str, "Error: cuffcompare not found on this system. Did you forget to include it in your PATH?", file=sys.stderr)
exit(1)
def select_gtf(gtf_in_filename, ids, gtf_out_filename):
f_gtf = open(gtf_in_filename)
- #print >> sys.stderr, "Select GTF: Ids are: "
- #print >> sys.stderr, ids
- #print >> sys.stderr, "reference gtf file name:"
- #print >> sys.stderr, gtf_in_filename
+ #print("Select GTF: Ids are: ", file=sys.stderr)
+ #print(ids, file=sys.stderr)
+ #print("reference gtf file name:", file=sys.stderr)
+ #print(gtf_in_filename, file=sys.stderr)
out_gtf = open(gtf_out_filename, "w")
for line in f_gtf:
line = line.strip()
@@ -370,13 +376,13 @@ def select_gtf(gtf_in_filename, ids, gtf_out_filename):
first_quote = col.find('"')
last_quote = col.find('"', first_quote + 1)
transcript = col[first_quote + 1:last_quote]
- #print >> sys.stderr, transcript
+ #print(transcript, file=sys.stderr)
if transcript in ids:
- print >> out_gtf, line
+ print(line, file=out_gtf)
def merge_gtfs(gtf_filenames, merged_gtf, ref_gtf=None):
- print >> sys.stderr, "[%s] Merging linc gtf files with cuffcompare" % (right_now())
+ print("[%s] Merging linc gtf files with cuffcompare" % (right_now()), file=sys.stderr)
cmd = ["cuffcompare"]
cmd.extend(["-o", merged_gtf])
@@ -388,24 +394,24 @@ def merge_gtfs(gtf_filenames, merged_gtf, ref_gtf=None):
#cmd = bsub_cmd(cmd, "/merge_gtf", True, job_mem=8)
try:
- print >> run_log, cmd
+ print(cmd, file=run_log)
ret = subprocess.call(cmd, shell=True)
if ret != 0:
- print >> sys.stderr, fail_str, "Error: could not execute cuffcompare"
+ print(fail_str, "Error: could not execute cuffcompare", file=sys.stderr)
exit(1)
return merged_gtf + ".combined.gtf"
# cuffcompare not found
- except OSError, o:
+ except OSError as o:
if o.errno == errno.ENOTDIR or o.errno == errno.ENOENT:
- print >> sys.stderr, fail_str, "Error: cuffcompare not found on this system. Did you forget to include it in your PATH?"
+ print(fail_str, "Error: cuffcompare not found on this system. Did you forget to include it in your PATH?", file=sys.stderr)
exit(1)
def compare_meta_asm_against_ref(ref_gtf, fasta_file, gtf_input_file, class_codes=["c", "i", "r", "p", "e"]):
- #print >> sys.stderr, "Cuffcmpare all assemblies GTFs"
+ #print("Cuffcmpare all assemblies GTFs", file=sys.stderr)
tmap = compare_to_reference(gtf_input_file, ref_gtf, fasta_file)
- #print >> sys.stderr, "Cuffcmpare all assemblies GTFs : filter %s" % ",".join(class_codes)
+ #print("Cuffcmpare all assemblies GTFs : filter %s" % ",".join(class_codes), file=sys.stderr)
selected_ids= set([])
f_tmap = open(tmap)
#out = open("tmp_meta_asm_selectedIds.txt", "w")
@@ -434,15 +440,10 @@ def compare_meta_asm_against_ref(ref_gtf, fasta_file, gtf_input_file, class_code
if os.path.exists(mtmap.split(".tmap")[0]+".refmap"):
os.remove(mtmap.split(".tmap")[0]+".refmap")
- shutil.move("tmp_meta_asm.combined.gtf", output_dir + "/merged.gtf")
+ global tmp_dir
+ shutil.move(tmp_dir+".combined.gtf", output_dir + "/merged.gtf")
# os.remove("tmp_meta_asm.combined.gtf")
- if os.path.exists("tmp_meta_asm.loci"):
- os.remove("tmp_meta_asm.loci")
- if os.path.exists("tmp_meta_asm.tracking"):
- os.remove("tmp_meta_asm.tracking")
- if os.path.exists("tmp_meta_asm.stats"):
- os.remove("tmp_meta_asm.stats")
if os.path.exists(tmap):
os.remove(tmap)
if os.path.exists(tmap.split(".tmap")[0]+".refmap"):
@@ -484,9 +485,8 @@ def get_gtf_chrom_info(gtf_filename, known_chrom_info=None):
def header_for_chrom_info(chrom_info):
header_strs = ["""@HD\tVN:1.0\tSO:coordinate"""]
- chrom_list = [(chrom, limits) for chrom, limits in chrom_info.iteritems()]
- chrom_list.sort(lambda x,y: cmp(x[0],y[0]))
- #print chrom_list
+ chrom_list = sorted(chrom_info.items(), key=itemgetter(0))
+ #print(chrom_list)
for chrom, limits in chrom_list:
line = "@SQ\tSN:%s\tLN:\t%d" % (chrom, limits[1])
header_strs.append(line)
@@ -510,29 +510,29 @@ def main(argv=None):
params.check()
if len(args) < 1:
- raise(Usage(help_message))
+ raise Usage(help_message)
global run_log
global run_cmd
- print >> sys.stderr
- print >> sys.stderr, "[%s] Beginning transcriptome assembly merge" % (right_now())
- print >> sys.stderr, "-------------------------------------------"
- print >> sys.stderr
+ print(file=sys.stderr)
+ print("[%s] Beginning transcriptome assembly merge" % (right_now()), file=sys.stderr)
+ print("-------------------------------------------", file=sys.stderr)
+ print(file=sys.stderr)
start_time = datetime.now()
prepare_output_dir()
run_log = open(logging_dir + "run.log", "w", 0)
run_cmd = " ".join(argv)
- print >> run_log, run_cmd
+ print(run_cmd, file=run_log)
transfrag_list_file = open(args[0], "r")
if params.ref_gtf != None:
test_input_files([params.ref_gtf])
else:
- print >> sys.stderr, "Warning: no reference GTF provided!"
+ print("Warning: no reference GTF provided!", file=sys.stderr)
# Check that all the primary assemblies are accessible before starting the time consuming stuff
gtf_input_files = test_input_files(transfrag_list_file)
@@ -571,8 +571,8 @@ def main(argv=None):
os.remove(output_dir + "/skipped.gtf")
os.remove(output_dir + "/genes.fpkm_tracking")
os.remove(output_dir + "/isoforms.fpkm_tracking")
- except Usage, err:
- print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err.msg)
+ except Usage as err:
+ print(sys.argv[0].split("/")[-1] + ": " + str(err.msg), file=sys.stderr)
return 2