@@ -72,7 +72,7 @@ bool DBG::DBGtoVariants(InSegment *inSegment) {
72
72
while (explored < kcount) {
73
73
74
74
mapRange = {0 ,0 };
75
- std::queue< uint64_t > targetsQueue;
75
+ std::deque< const uint64_t > targetsQueue;
76
76
phmap::parallel_flat_hash_map<uint64_t ,bool > targetsMap;
77
77
78
78
while (mapRange[1 ] < mapCount) {
@@ -86,19 +86,24 @@ bool DBG::DBGtoVariants(InSegment *inSegment) {
86
86
bool isFw = false ;
87
87
88
88
for (uint16_t pos = 0 ; pos < userInput.maxSpan ; ++pos) { // populate targets
89
- targetsQueue.push (hash (str+pos));
90
- targetsMap[hash (str+pos)];
89
+ if (pos < len-userInput.maxSpan ) {
90
+ key = hash (str+pos);
91
+ targetsQueue.push_back (key);
92
+ targetsMap[key];
93
+ }
91
94
}
92
95
93
96
for (uint64_t c = 0 ; c<kcount; ++c){
94
97
95
- if (!visited[c]) {
96
-
97
- targetsMap.erase (targetsQueue.front ());
98
- targetsQueue.pop ();
98
+ targetsMap.erase (targetsQueue.front ());
99
+ targetsQueue.pop_front ();
100
+ if (c < len-userInput.maxSpan ) {
99
101
key = hash (str+c+userInput.maxSpan );
100
102
targetsMap[key];
101
- targetsQueue.push (key);
103
+ targetsQueue.push_back (key);
104
+ }
105
+
106
+ if (!visited[c]) {
102
107
103
108
key = hash (str+c, &isFw);
104
109
i = key % mapCount;
@@ -121,14 +126,11 @@ bool DBG::DBGtoVariants(InSegment *inSegment) {
121
126
}else {
122
127
pair = *it;
123
128
}
124
- auto results = searchVariants (pair, mapRange, targetsMap, localGraphCache);
129
+ auto results = searchVariants (pair, mapRange, targetsQueue, targetsMap, localGraphCache);
125
130
explored += results.first ;
126
131
if (results.first ) {
127
- for (DBGpath &path : results.second ) {
128
- path.pos = c;
129
- path.type = SNV;
130
- std::cout<<+path.pos <<" " <<+path.type <<" " <<path.sequence <<std::endl;
131
- }
132
+ for (DBGpath &path : results.second )
133
+ path.pos = c+k;
132
134
133
135
if (results.second .size () != 0 )
134
136
variants.push_back (results.second );
@@ -161,13 +163,13 @@ bool DBG::DBGtoVariants(InSegment *inSegment) {
161
163
return true ;
162
164
}
163
165
164
- std::pair<bool ,std::deque<DBGpath>> DBG::searchVariants (std::pair<const uint64_t ,DBGkmer32> source, std::array<uint16_t , 2 > mapRange, phmap::parallel_flat_hash_map<uint64_t ,bool > &targetsMap, ParallelMap32* localGraphCache) { // dijkstra variant search
166
+ std::pair<bool ,std::deque<DBGpath>> DBG::searchVariants (std::pair<const uint64_t ,DBGkmer32> source, std::array<uint16_t , 2 > mapRange, const std::deque< const uint64_t > &targetsQueue, const phmap::parallel_flat_hash_map<uint64_t ,bool > &targetsMap, ParallelMap32* localGraphCache) { // dijkstra variant search
165
167
166
168
bool explored = false ; // true if we reached a node in the original graph
167
169
std::vector<uint64_t > destinations;
168
170
FibonacciHeap<std::pair<const uint64_t , DBGkmer32>*> Q; // node priority queue Q
169
- phmap::parallel_flat_hash_map<uint64_t ,uint8_t > dist;
170
- phmap::parallel_flat_hash_map<uint64_t ,std::pair<uint64_t ,bool >> prev; // distance table
171
+ phmap::parallel_flat_hash_map<uint64_t ,uint8_t > dist; // distance table
172
+ phmap::parallel_flat_hash_map<uint64_t ,std::pair<uint64_t ,bool >> prev; // path table
171
173
std::deque<DBGpath> discoveredPaths;
172
174
173
175
dist[source.first ] = 1 ;
@@ -193,7 +195,7 @@ std::pair<bool,std::deque<DBGpath>> DBG::searchVariants(std::pair<const uint64_t
193
195
if (startNode == targetsMap.end ()) { // if we connect to the original graph we are done
194
196
auto nextKmer = localGraphCache->find (key); // check if the node is in the cache (already visited)
195
197
196
- if (nextKmer == localGraphCache->end ()) { // we cached this node before
198
+ if (nextKmer == localGraphCache->end ()) { // we did not cache this node before
197
199
uint64_t m = key % mapCount;
198
200
if (m >= mapRange[0 ] && m < mapRange[1 ]) { // the node is in not cached but is available to visit now
199
201
map = maps[m];
@@ -234,8 +236,10 @@ std::pair<bool,std::deque<DBGpath>> DBG::searchVariants(std::pair<const uint64_t
234
236
bool found = checkNext (key, isFw ? direction : !direction);
235
237
if (found) {
236
238
++exploredCount;
237
- if (targetsMap.find (key) != targetsMap.end ())
238
- destinations.push_back (u->first );
239
+ if (key != targetsQueue.front () && targetsMap.find (key) != targetsMap.end ()) {
240
+ prev[key] = std::make_pair (u->first ,direction);
241
+ destinations.push_back (key);
242
+ }
239
243
}
240
244
++edgeCount;
241
245
}
@@ -252,8 +256,10 @@ std::pair<bool,std::deque<DBGpath>> DBG::searchVariants(std::pair<const uint64_t
252
256
bool found = checkNext (key, isFw ? direction : !direction);
253
257
if (found) {
254
258
++exploredCount;
255
- if (targetsMap.find (key) != targetsMap.end ())
256
- destinations.push_back (u->first );
259
+ if (key != targetsQueue.front () && targetsMap.find (key) != targetsMap.end ()) {
260
+ prev[key] = std::make_pair (u->first ,direction);
261
+ destinations.push_back (key);
262
+ }
257
263
}
258
264
++edgeCount;
259
265
}
@@ -264,16 +270,32 @@ std::pair<bool,std::deque<DBGpath>> DBG::searchVariants(std::pair<const uint64_t
264
270
if (edgeCount == exploredCount || depth == userInput.kmerDepth + 1 || destinations.size () >= 10 ) // everything explored/found, depth reached, or top10
265
271
explored = true ;
266
272
}
267
- if (destinations.size () > 1 ) { // traverse from target to source, the first path is the reference
273
+ if (destinations.size () > 0 ) { // traverse from target to source, the first path is the reference
268
274
for (uint64_t destination : destinations) {
269
275
DBGpath newPath;
270
- newPath.sequence = reverseHash (destination);
271
- while (destination != source.first ) { // construct the shortest path with a stack S
272
-
273
- newPath.sequence .push_back (reverseHash (destination)[k-1 ]);
274
-
275
- dist.erase (destination);
276
- destination = prev[destination].first ;
276
+ std::string endSequence = reverseHash (destination);
277
+ uint16_t i = 0 , refLen = std::find (targetsQueue.begin (), targetsQueue.end (), destination) - targetsQueue.begin ();
278
+ uint64_t prevNode = prev[destination].first ;
279
+
280
+ while (prevNode != source.first ) { // construct the shortest path with a stack S
281
+ prevNode = prev[prevNode].first ;
282
+ ++i;
283
+ }
284
+
285
+ if (i == refLen)
286
+ newPath.type = SNV;
287
+ else if (i > refLen)
288
+ newPath.type = DEL;
289
+ else
290
+ newPath.type = INS;
291
+
292
+ prevNode = prev[destination].first ;
293
+ bool direction = prev[destination].second ;
294
+ while (i >= refLen) {
295
+ newPath.sequence .push_back (direction ? revCom (reverseHash (prevNode))[0 ] : revCom (reverseHash (prevNode))[0 ]);
296
+ prevNode = prev[prevNode].first ;
297
+ direction = prev[prevNode].second ;
298
+ --i;
277
299
}
278
300
discoveredPaths.push_back (newPath);
279
301
}
0 commit comments