Skip to content

Commit 2eeae68

Browse files
author
JLTrincado
committed
Adapted create_paths/v2 for reverse genes
1 parent ff538eb commit 2eeae68

File tree

2 files changed

+28
-8
lines changed

2 files changed

+28
-8
lines changed

create_paths.py

+12-2
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ def main():
115115
for line in f:
116116
tokens = line.rstrip().split("\t")
117117
chr = tokens[0]
118+
strand = tokens[5]
118119
# Save the cordinates in the list
119120
if([tokens[1],tokens[2]] not in exons_list):
120121
exons_list.append([tokens[1],tokens[2]])
@@ -130,13 +131,22 @@ def main():
130131
raise Exception("Only 1 transcript associated to this gene. Stop execution.")
131132

132133
# Sort the list of exons
133-
exons_list_sorted = sorted(exons_list, key=lambda x: (x[1], x[0]))
134+
# Depending on the strand, we have to sort it in a different way
135+
if(strand=="+"):
136+
exons_list_sorted = sorted(exons_list, key=lambda x: (x[1], x[0]))
137+
else:
138+
exons_list_sorted = sorted(exons_list,key=lambda x: (x[1], x[0]), reverse = True)
139+
134140
# Get the position of the exon_stop in exons_list_sorted
135141
exon_stop_coords_pos = exons_list_sorted.index(exon_stop_coords)
136142

137143
# Sort the exons associated to each transcript
138144
for key, values in transcripts_dict.items():
139-
transcripts_dict[key] = sorted(values, key=lambda x: (x[1], x[0]))
145+
if (strand == "+"):
146+
transcripts_dict[key] = sorted(values, key=lambda x: (x[1], x[0]))
147+
else:
148+
transcripts_dict[key] = sorted(values, key=lambda x: (x[1], x[0]), reverse=True)
149+
140150
# Remove all the transcripts that doesn't include the exon_end
141151
# Copy the dictionary for iterating on it
142152
transcripts_dict_copy = deepcopy(transcripts_dict)

create_paths_v2.py

+16-6
Original file line numberDiff line numberDiff line change
@@ -100,9 +100,9 @@ def main():
100100
fasta_path = sys.argv[2]
101101
output_path = sys.argv[3]
102102

103-
# bed_path = "/home/shinoda/Desktop/Florida/annotation/A1BG_exons.bed"
104-
# fasta_path = "/home/shinoda/Desktop/Florida/annotation/A1BG_exons.bed.fa"
105-
# output_path = "/home/shinoda/Desktop/Florida/annotation/A1BG_possible_transcripts_refseq.fa"
103+
# bed_path = "/home/shinoda/Desktop/Florida/annotation/MBNL1_TEST/MBNL3_exons_hg19_refseq.bed"
104+
# fasta_path = "/home/shinoda/Desktop/Florida/annotation/MBNL1_TEST/MBNL3_exons_hg19_refseq.bed.fa"
105+
# output_path = "/home/shinoda/Desktop/Florida/annotation/MBNL1_TEST/MBNL3_possible_transcripts_refseq_REVERSE.fa"
106106

107107
# exon_stop_coords = exon_stop.split("-")
108108

@@ -114,6 +114,7 @@ def main():
114114
for line in f:
115115
tokens = line.rstrip().split("\t")
116116
chr = tokens[0]
117+
strand = tokens[5]
117118
# Save the cordinates in the list
118119
if([tokens[1],tokens[2]] not in exons_list):
119120
exons_list.append([tokens[1],tokens[2]])
@@ -129,13 +130,22 @@ def main():
129130
raise Exception("Only 1 transcript associated to this gene. Stop execution.")
130131

131132
# Sort the list of exons
132-
exons_list_sorted = sorted(exons_list, key=lambda x: (x[1], x[0]))
133-
# # Get the position of the exon_stop in exons_list_sorted
133+
# Depending on the strand, we have to sort it in a different way
134+
if(strand=="+"):
135+
exons_list_sorted = sorted(exons_list, key=lambda x: (x[1], x[0]))
136+
else:
137+
exons_list_sorted = sorted(exons_list,key=lambda x: (x[1], x[0]), reverse = True)
138+
139+
# Get the position of the exon_stop in exons_list_sorted
134140
# exon_stop_coords_pos = exons_list_sorted.index(exon_stop_coords)
135141

136142
# Sort the exons associated to each transcript
137143
for key, values in transcripts_dict.items():
138-
transcripts_dict[key] = sorted(values, key=lambda x: (x[1], x[0]))
144+
if(strand=="+"):
145+
transcripts_dict[key] = sorted(values, key=lambda x: (x[1], x[0]))
146+
else:
147+
transcripts_dict[key] = sorted(values, key=lambda x: (x[1], x[0]), reverse = True)
148+
139149
# # Remove all the transcripts that doesn't include the exon_end
140150
# # Copy the dictionary for iterating on it
141151
# transcripts_dict_copy = deepcopy(transcripts_dict)

0 commit comments

Comments
 (0)