@@ -89,19 +89,19 @@ bool DBG::DBGtoVariants(InSegment *inSegment) {
89
89
bool isFw = false ;
90
90
91
91
for (uint16_t pos = 0 ; pos < userInput.maxSpan ; ++pos) { // populate targets
92
- if (pos < kcount) {
93
- key = hash (str+pos);
92
+ if (pos+k < kcount) {
93
+ key = hash (str+pos+k );
94
94
targetsQueue.push_back (key);
95
95
targetsMap[key];
96
96
}
97
97
}
98
98
99
99
for (uint64_t c = 0 ; c<kcount; ++c){
100
100
101
- targetsMap.erase (targetsQueue.front ());
101
+ targetsMap.erase (targetsQueue.front ()); // update targets
102
102
targetsQueue.pop_front ();
103
- if (c < kcount-userInput. maxSpan ) {
104
- key = hash (str+c+userInput.maxSpan );
103
+ if (c+k+userInput. maxSpan < kcount) {
104
+ key = hash (str+c+k+ userInput.maxSpan );
105
105
targetsMap[key];
106
106
targetsQueue.push_back (key);
107
107
}
@@ -129,7 +129,7 @@ bool DBG::DBGtoVariants(InSegment *inSegment) {
129
129
}else {
130
130
pair = *it;
131
131
}
132
- auto results = searchVariants (pair, mapRange, targetsQueue, targetsMap, localGraphCache);
132
+ auto results = searchVariants (pair, isFw, hash (str+c+ 1 , &isFw), mapRange, targetsQueue, targetsMap, localGraphCache);
133
133
explored += results.first ;
134
134
if (results.first ) {
135
135
for (DBGpath &path : results.second )
@@ -166,7 +166,7 @@ bool DBG::DBGtoVariants(InSegment *inSegment) {
166
166
return true ;
167
167
}
168
168
169
- std::pair<bool ,std::deque<DBGpath>> DBG::searchVariants (std::pair<const uint64_t ,DBGkmer32> source, std::array<uint16_t , 2 > mapRange, const std::deque<uint64_t > &targetsQueue, const phmap::parallel_flat_hash_map<uint64_t ,bool > &targetsMap, ParallelMap32* localGraphCache) { // dijkstra variant search
169
+ std::pair<bool ,std::deque<DBGpath>> DBG::searchVariants (std::pair<const uint64_t ,DBGkmer32> source, bool isSourceFw, uint64_t ref, std::array<uint16_t , 2 > mapRange, const std::deque<uint64_t > &targetsQueue, const phmap::parallel_flat_hash_map<uint64_t ,bool > &targetsMap, ParallelMap32* localGraphCache) { // dijkstra variant search
170
170
171
171
bool explored = false ; // true if we reached a node in the original graph
172
172
std::vector<uint64_t > destinations;
@@ -178,7 +178,7 @@ std::pair<bool,std::deque<DBGpath>> DBG::searchVariants(std::pair<const uint64_t
178
178
dist[source.first ] = 1 ;
179
179
Q.insert (&source, 1 ); // associated priority equals dist[·]
180
180
181
- uint64_t key;
181
+ uint64_t key = source. first ;
182
182
int16_t depth = 0 ;
183
183
bool direction = true , isFw;
184
184
@@ -192,7 +192,6 @@ std::pair<bool,std::deque<DBGpath>> DBG::searchVariants(std::pair<const uint64_t
192
192
if (got != prev.end ()) {
193
193
direction = got->second .second ;
194
194
}
195
-
196
195
auto checkNext = [&,this ] (uint64_t key, bool direction) {
197
196
auto startNode = targetsMap.find (key);
198
197
if (startNode == targetsMap.end ()) { // if we connect to the original graph we are done
@@ -225,46 +224,34 @@ std::pair<bool,std::deque<DBGpath>> DBG::searchVariants(std::pair<const uint64_t
225
224
return true ;
226
225
};
227
226
uint8_t edgeCount = 0 , exploredCount = 0 ;
228
- for (uint8_t i = 0 ; i<4 ; ++i) {
229
-
230
- if (direction || depth == 0 ) { // forward edges
231
-
232
- if (depth == 0 )
233
- direction = true ;
227
+ std::vector<std::tuple<uint64_t ,bool ,bool >> candidatePaths;
228
+
229
+ for (uint8_t i = 0 ; i<4 ; ++i) { // first we collect all candidate paths
234
230
235
- if (u->second .fw [i] > userInput.covCutOff ) {
236
- uint8_t nextKmer[k];
237
- buildNextKmer (nextKmer, u->first , i, true ); // compute next node
238
- key = hash (nextKmer, &isFw);
239
- bool found = checkNext (key, isFw ? direction : !direction);
240
- if (found) {
241
- ++exploredCount;
242
- if (key != targetsQueue.front () && targetsMap.find (key) != targetsMap.end ()) {
243
- prev[key] = std::make_pair (u->first ,direction);
244
- destinations.push_back (key);
245
- }
246
- }
231
+ if (depth == 0 )
232
+ direction = isSourceFw ? true : false ;
233
+
234
+ if (direction ? u->second .fw [i] : u->second .bw [i] > userInput.covCutOff ) {
235
+ uint8_t nextKmer[k];
236
+ buildNextKmer (nextKmer, u->first , i, direction); // compute next node
237
+ key = hash (nextKmer, &isFw);
238
+ if (key != ref) { // we never rediscover the ref path
239
+ candidatePaths.push_back (std::make_tuple (key, isFw, direction));
247
240
++edgeCount;
248
241
}
249
242
}
250
- if (!direction || depth == 0 ) {
251
-
252
- if (depth == 0 )
253
- direction = false ;
254
-
255
- if (u->second .bw [i] > userInput.covCutOff ) { // backward edges
256
- uint8_t nextKmer[k];
257
- buildNextKmer (nextKmer, u->first , i, false ); // compute next node
258
- key = hash (nextKmer, &isFw);
259
- bool found = checkNext (key, isFw ? direction : !direction);
260
- if (found) {
261
- ++exploredCount;
262
- if (key != targetsQueue.front () && targetsMap.find (key) != targetsMap.end ()) {
263
- prev[key] = std::make_pair (u->first ,direction);
264
- destinations.push_back (key);
265
- }
266
- }
267
- ++edgeCount;
243
+ }
244
+ for (std::tuple<uint64_t ,bool ,bool > path : candidatePaths) { // for each path, including ref, we try to explore it
245
+ uint64_t key = std::get<0 >(path);
246
+ bool isFw = std::get<1 >(path);
247
+ bool direction = std::get<2 >(path);
248
+
249
+ bool found = checkNext (key, isFw ? direction : !direction);
250
+ if (found) { // if the next node exists
251
+ ++exploredCount;
252
+ if (targetsMap.find (key) != targetsMap.end ()) { // if we reconnected to the ref path
253
+ prev[key] = std::make_pair (u->first ,direction);
254
+ destinations.push_back (key); // we found a new alternate path
268
255
}
269
256
}
270
257
}
@@ -273,17 +260,20 @@ std::pair<bool,std::deque<DBGpath>> DBG::searchVariants(std::pair<const uint64_t
273
260
if (edgeCount == exploredCount || depth == userInput.kmerDepth + 1 || destinations.size () >= 10 ) // everything explored/found, depth reached, or top10
274
261
explored = true ;
275
262
}
276
- if (destinations.size () > 0 ) { // traverse from target to source, the first path is the reference
263
+ if (destinations.size () > 0 ) { // traverse from target to source
277
264
for (uint64_t destination : destinations) {
278
265
DBGpath newPath;
279
266
std::string endSequence = reverseHash (destination);
280
- uint16_t i = 0 , refLen = std::find (targetsQueue.begin (), targetsQueue.end (), destination) - targetsQueue.begin ();
267
+ uint16_t i = 0 , refLen = std::find (targetsQueue.begin (), targetsQueue.end (), destination) - targetsQueue.begin () + k ;
281
268
uint64_t prevNode = prev[destination].first ;
282
269
283
270
while (prevNode != source.first ) { // construct the shortest path with a stack S
284
271
prevNode = prev[prevNode].first ;
285
272
++i;
286
273
}
274
+ std::cout<<+i<<" " <<+refLen<<std::endl;
275
+ prevNode = prev[destination].first ;
276
+ bool direction = prev[prevNode].second ;
287
277
int16_t b = i-refLen;
288
278
if (refLen > k) {
289
279
newPath.type = COM;
@@ -295,12 +285,11 @@ std::pair<bool,std::deque<DBGpath>> DBG::searchVariants(std::pair<const uint64_t
295
285
else if (i > refLen) {
296
286
newPath.type = DEL;
297
287
--b;
288
+ prevNode = prev[prevNode].first ;
289
+ direction = prev[prevNode].second ;
298
290
}
299
291
else
300
292
newPath.type = INS;
301
-
302
- prevNode = prev[destination].first ;
303
- bool direction = prev[prevNode].second ;
304
293
305
294
while (b >= 0 ) {
306
295
newPath.sequence .push_back (direction ? reverseHash (prevNode)[0 ] : revCom (reverseHash (prevNode)[k-1 ]));
0 commit comments