Skip to content

Commit c3c8887

Browse files
committed
ref seq needs to be treated differently
this almost works on the test
1 parent e29967f commit c3c8887

File tree

2 files changed

+41
-54
lines changed

2 files changed

+41
-54
lines changed

include/kreeq.h

+2-4
Original file line numberDiff line numberDiff line change
@@ -169,10 +169,8 @@ class DBG : public Kmap<DBG, UserInputKreeq, uint64_t, DBGkmer, DBGkmer32> { //
169169
DBextension = "kreeq";
170170

171171
if (userInput.kmerDepth == -1) {
172-
if (userInput.travAlgorithm == "best-first") {
172+
if (userInput.travAlgorithm == "best-first")
173173
this->userInput.kmerDepth = userInput.kmerLen; // unidirectional search
174-
this->userInput.maxSpan = userInput.kmerLen;
175-
}
176174
else if (userInput.travAlgorithm == "traversal")
177175
this->userInput.kmerDepth = std::ceil((float)userInput.kmerLen/2); // kmer search is in both directions
178176
}
@@ -238,7 +236,7 @@ class DBG : public Kmap<DBG, UserInputKreeq, uint64_t, DBGkmer, DBGkmer32> { //
238236

239237
bool DBGtoVariants(InSegment *inSegment);
240238

241-
std::pair<bool,std::deque<DBGpath>> searchVariants(std::pair<const uint64_t,DBGkmer32> source, std::array<uint16_t, 2> mapRange, const std::deque<uint64_t> &targetsQueue, const phmap::parallel_flat_hash_map<uint64_t,bool> &targetsMap, ParallelMap32* localGraphCache);
239+
std::pair<bool,std::deque<DBGpath>> searchVariants(std::pair<const uint64_t,DBGkmer32> source, bool isSourceFw, uint64_t ref, std::array<uint16_t, 2> mapRange, const std::deque<uint64_t> &targetsQueue, const phmap::parallel_flat_hash_map<uint64_t,bool> &targetsMap, ParallelMap32* localGraphCache);
242240

243241
bool variantsToGFA(InSegment *inSegment, Log &threadLog);
244242

src/variants.cpp

+39-50
Original file line numberDiff line numberDiff line change
@@ -89,19 +89,19 @@ bool DBG::DBGtoVariants(InSegment *inSegment) {
8989
bool isFw = false;
9090

9191
for (uint16_t pos = 0; pos < userInput.maxSpan; ++pos) { // populate targets
92-
if (pos < kcount) {
93-
key = hash(str+pos);
92+
if (pos+k < kcount) {
93+
key = hash(str+pos+k);
9494
targetsQueue.push_back(key);
9595
targetsMap[key];
9696
}
9797
}
9898

9999
for (uint64_t c = 0; c<kcount; ++c){
100100

101-
targetsMap.erase(targetsQueue.front());
101+
targetsMap.erase(targetsQueue.front()); // update targets
102102
targetsQueue.pop_front();
103-
if (c < kcount-userInput.maxSpan) {
104-
key = hash(str+c+userInput.maxSpan);
103+
if (c+k+userInput.maxSpan < kcount) {
104+
key = hash(str+c+k+userInput.maxSpan);
105105
targetsMap[key];
106106
targetsQueue.push_back(key);
107107
}
@@ -129,7 +129,7 @@ bool DBG::DBGtoVariants(InSegment *inSegment) {
129129
}else{
130130
pair = *it;
131131
}
132-
auto results = searchVariants(pair, mapRange, targetsQueue, targetsMap, localGraphCache);
132+
auto results = searchVariants(pair, isFw, hash(str+c+1, &isFw), mapRange, targetsQueue, targetsMap, localGraphCache);
133133
explored += results.first;
134134
if (results.first) {
135135
for (DBGpath &path : results.second)
@@ -166,7 +166,7 @@ bool DBG::DBGtoVariants(InSegment *inSegment) {
166166
return true;
167167
}
168168

169-
std::pair<bool,std::deque<DBGpath>> DBG::searchVariants(std::pair<const uint64_t,DBGkmer32> source, std::array<uint16_t, 2> mapRange, const std::deque<uint64_t> &targetsQueue, const phmap::parallel_flat_hash_map<uint64_t,bool> &targetsMap, ParallelMap32* localGraphCache) { // dijkstra variant search
169+
std::pair<bool,std::deque<DBGpath>> DBG::searchVariants(std::pair<const uint64_t,DBGkmer32> source, bool isSourceFw, uint64_t ref, std::array<uint16_t, 2> mapRange, const std::deque<uint64_t> &targetsQueue, const phmap::parallel_flat_hash_map<uint64_t,bool> &targetsMap, ParallelMap32* localGraphCache) { // dijkstra variant search
170170

171171
bool explored = false; // true if we reached a node in the original graph
172172
std::vector<uint64_t> destinations;
@@ -178,7 +178,7 @@ std::pair<bool,std::deque<DBGpath>> DBG::searchVariants(std::pair<const uint64_t
178178
dist[source.first] = 1;
179179
Q.insert(&source, 1); // associated priority equals dist[·]
180180

181-
uint64_t key;
181+
uint64_t key = source.first;
182182
int16_t depth = 0;
183183
bool direction = true, isFw;
184184

@@ -192,7 +192,6 @@ std::pair<bool,std::deque<DBGpath>> DBG::searchVariants(std::pair<const uint64_t
192192
if (got != prev.end()) {
193193
direction = got->second.second;
194194
}
195-
196195
auto checkNext = [&,this] (uint64_t key, bool direction) {
197196
auto startNode = targetsMap.find(key);
198197
if (startNode == targetsMap.end()) { // if we connect to the original graph we are done
@@ -225,46 +224,34 @@ std::pair<bool,std::deque<DBGpath>> DBG::searchVariants(std::pair<const uint64_t
225224
return true;
226225
};
227226
uint8_t edgeCount = 0, exploredCount = 0;
228-
for (uint8_t i = 0; i<4; ++i) {
229-
230-
if (direction || depth == 0) { // forward edges
231-
232-
if (depth == 0)
233-
direction = true;
227+
std::vector<std::tuple<uint64_t,bool,bool>> candidatePaths;
228+
229+
for (uint8_t i = 0; i<4; ++i) { // first we collect all candidate paths
234230

235-
if (u->second.fw[i] > userInput.covCutOff) {
236-
uint8_t nextKmer[k];
237-
buildNextKmer(nextKmer, u->first, i, true); // compute next node
238-
key = hash(nextKmer, &isFw);
239-
bool found = checkNext(key, isFw ? direction : !direction);
240-
if (found) {
241-
++exploredCount;
242-
if (key != targetsQueue.front() && targetsMap.find(key) != targetsMap.end()) {
243-
prev[key] = std::make_pair(u->first,direction);
244-
destinations.push_back(key);
245-
}
246-
}
231+
if (depth == 0)
232+
direction = isSourceFw ? true : false;
233+
234+
if (direction ? u->second.fw[i] : u->second.bw[i] > userInput.covCutOff) {
235+
uint8_t nextKmer[k];
236+
buildNextKmer(nextKmer, u->first, i, direction); // compute next node
237+
key = hash(nextKmer, &isFw);
238+
if (key != ref) { // we never rediscover the ref path
239+
candidatePaths.push_back(std::make_tuple(key, isFw, direction));
247240
++edgeCount;
248241
}
249242
}
250-
if (!direction || depth == 0) {
251-
252-
if (depth == 0)
253-
direction = false;
254-
255-
if (u->second.bw[i] > userInput.covCutOff) { // backward edges
256-
uint8_t nextKmer[k];
257-
buildNextKmer(nextKmer, u->first, i, false); // compute next node
258-
key = hash(nextKmer, &isFw);
259-
bool found = checkNext(key, isFw ? direction : !direction);
260-
if (found) {
261-
++exploredCount;
262-
if (key != targetsQueue.front() && targetsMap.find(key) != targetsMap.end()) {
263-
prev[key] = std::make_pair(u->first,direction);
264-
destinations.push_back(key);
265-
}
266-
}
267-
++edgeCount;
243+
}
244+
for (std::tuple<uint64_t,bool,bool> path : candidatePaths) { // for each path, including ref, we try to explore it
245+
uint64_t key = std::get<0>(path);
246+
bool isFw = std::get<1>(path);
247+
bool direction = std::get<2>(path);
248+
249+
bool found = checkNext(key, isFw ? direction : !direction);
250+
if (found) { // if the next node exists
251+
++exploredCount;
252+
if (targetsMap.find(key) != targetsMap.end()) { // if we reconnected to the ref path
253+
prev[key] = std::make_pair(u->first,direction);
254+
destinations.push_back(key); // we found a new alternate path
268255
}
269256
}
270257
}
@@ -273,17 +260,20 @@ std::pair<bool,std::deque<DBGpath>> DBG::searchVariants(std::pair<const uint64_t
273260
if(edgeCount == exploredCount || depth == userInput.kmerDepth + 1 || destinations.size() >= 10) // everything explored/found, depth reached, or top10
274261
explored = true;
275262
}
276-
if (destinations.size() > 0) { // traverse from target to source, the first path is the reference
263+
if (destinations.size() > 0) { // traverse from target to source
277264
for (uint64_t destination : destinations) {
278265
DBGpath newPath;
279266
std::string endSequence = reverseHash(destination);
280-
uint16_t i = 0, refLen = std::find(targetsQueue.begin(), targetsQueue.end(), destination) - targetsQueue.begin();
267+
uint16_t i = 0, refLen = std::find(targetsQueue.begin(), targetsQueue.end(), destination) - targetsQueue.begin() + k;
281268
uint64_t prevNode = prev[destination].first;
282269

283270
while (prevNode != source.first) { // construct the shortest path with a stack S
284271
prevNode = prev[prevNode].first;
285272
++i;
286273
}
274+
std::cout<<+i<<" "<<+refLen<<std::endl;
275+
prevNode = prev[destination].first;
276+
bool direction = prev[prevNode].second;
287277
int16_t b = i-refLen;
288278
if (refLen > k) {
289279
newPath.type = COM;
@@ -295,12 +285,11 @@ std::pair<bool,std::deque<DBGpath>> DBG::searchVariants(std::pair<const uint64_t
295285
else if (i > refLen) {
296286
newPath.type = DEL;
297287
--b;
288+
prevNode = prev[prevNode].first;
289+
direction = prev[prevNode].second;
298290
}
299291
else
300292
newPath.type = INS;
301-
302-
prevNode = prev[destination].first;
303-
bool direction = prev[prevNode].second;
304293

305294
while (b >= 0) {
306295
newPath.sequence.push_back(direction ? reverseHash(prevNode)[0] : revCom(reverseHash(prevNode)[k-1]));

0 commit comments

Comments
 (0)