Skip to content

Commit

Permalink
Bugfix remove ccs Parameter from title after mgf file was read
Browse files Browse the repository at this point in the history
  • Loading branch information
Andre99999999 committed Dec 27, 2024
1 parent c18d414 commit 3cb5ae3
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 57 deletions.
19 changes: 0 additions & 19 deletions pwiz/data/msdata/Diff.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -547,25 +547,6 @@ void diff(const Spectrum& a,
}
}

if (config.ignoreSpectrumTitle)
{
vector<CVParam>::const_iterator it =
find_if(a_b.cvParams.begin(), a_b.cvParams.end(), CVParamIs(MS_spectrum_title));

if (it != a_b.cvParams.end())
{
a_b.cvParams.erase(it);
}

it =
find_if(b_a.cvParams.begin(), b_a.cvParams.end(), CVParamIs(MS_spectrum_title));

if (it != b_a.cvParams.end())
{
b_a.cvParams.erase(it);
}
}

// provide context
if (!a_b.empty() || !b_a.empty())
{
Expand Down
5 changes: 1 addition & 4 deletions pwiz/data/msdata/Diff.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,17 +228,14 @@ struct PWIZ_API_DECL DiffConfig : public pwiz::data::BaseDiffConfig

bool ignoreDataProcessing;

bool ignoreSpectrumTitle;

DiffConfig()
: pwiz::data::BaseDiffConfig(),
ignoreIdentity(false),
ignoreMetadata(false),
ignoreExtraBinaryDataArrays(false),
ignoreSpectra(false),
ignoreChromatograms(false),
ignoreDataProcessing(false),
ignoreSpectrumTitle(false)
ignoreDataProcessing(false)
{}
};

Expand Down
2 changes: 1 addition & 1 deletion pwiz/data/msdata/Serializer_MGF.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ void Serializer_MGF::Impl::write(ostream& os, const MSData& msd,
CVParam collisionalCrossSectionalArea = scan.cvParam(MS_collisional_cross_sectional_area);
if (!collisionalCrossSectionalArea.empty())
{
os << "," << "ccs=" << collisionalCrossSectionalArea.valueFixedNotation() << '\n';
os << ", " << "ccs=" << collisionalCrossSectionalArea.valueFixedNotation() << '\n';
}
else
{
Expand Down
6 changes: 2 additions & 4 deletions pwiz/data/msdata/Serializer_MGF_Test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,7 @@ void testWriteRead(const MSData& msd)
DiffConfig diffConfig;
diffConfig.ignoreIdentity = true;
diffConfig.ignoreChromatograms = true;
diffConfig.ignoreSpectrumTitle = true;


Diff<MSData, DiffConfig> diff(msd, msd2, diffConfig);
if (os_ && diff) *os_ << diff << endl;
unit_assert(!diff);
Expand Down Expand Up @@ -206,5 +205,4 @@ int main(int argc, char* argv[])
}

TEST_EPILOG
}

}
70 changes: 41 additions & 29 deletions pwiz/data/msdata/SpectrumList_MGF.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ using namespace pwiz::util;

namespace {

const char* startTagCCSInTitle = "ccs=";
const char* endTagsCCSInTitle[] = { " ", ",",";","\t" };

class SpectrumList_MGFImpl : public SpectrumList_MGF
{
public:
Expand Down Expand Up @@ -218,6 +221,7 @@ class SpectrumList_MGFImpl : public SpectrumList_MGF
// Some formats omit RTINSECONDS and store the retention time
// in the title field instead.
double scanTimeMin = getRetentionTimeFromTitle(value);

if (scanTimeMin > 0)
scan.set(MS_scan_start_time, scanTimeMin * 60, UO_second);

Expand All @@ -228,7 +232,7 @@ class SpectrumList_MGFImpl : public SpectrumList_MGF
scan.cvParams.push_back(CVParam(MS_collisional_cross_sectional_area, ccs, UO_square_angstrom));
}

spectrum.set(MS_spectrum_title, value);
spectrum.set(MS_spectrum_title, removeCCSFromTitle(value));
}
else if (name == "PEPMASS")
{
Expand Down Expand Up @@ -344,49 +348,59 @@ class SpectrumList_MGFImpl : public SpectrumList_MGF
spectrum.set(MS_base_peak_intensity, basePeakIntensity);
}

/**
* Parse the spectrum title to look for CCS.
*/
static double getCCSFromTitle(const string& title)
static size_t FindEndTag(const string& title, size_t start)
{
// text to search for preceeding and following ccs
return getCCS(title, "ccs=");
for (int i = 0; i < sizeof(endTagsCCSInTitle) / sizeof(endTagsCCSInTitle[0]); i++)
{
const size_t tmpEnd = title.find(endTagsCCSInTitle[i], start);
if (tmpEnd != string::npos)
{
return tmpEnd;
}
}

return title.length();
}

/**
* Helper function to parse a double from the given string
* found between the two tags. Search for number after position
* Update position to the end of the parsed double.
*/
static double getCCS(const string& title, const char* startTag)
* Parse the spectrum title to look for CCS.
*/
static double getCCSFromTitle(const string& title)
{
size_t start = title.find(startTag, 0);
size_t start = title.find(startTagCCSInTitle, 0);

if (start == string::npos)
return -1; // not found

start += strlen(startTag);
const string endTags[] = { " ", ",",";","\t" };
start += strlen(startTagCCSInTitle);

size_t end = title.length();
for (int i = 0; i < sizeof(endTags) / sizeof(endTags[0]); i++)
try
{
const size_t tmpEnd = title.find(endTags[i], start);
if (tmpEnd != string::npos)
{
end = tmpEnd;
break;
}
return boost::lexical_cast<double>(title.substr(start, FindEndTag(title, start + strlen(startTagCCSInTitle)) - start));
}
catch (...)
{
return -1;
}
}

/**
* Parse the spectrum title to look for CCS.
*/
static string removeCCSFromTitle(const string& title)
{
size_t start = title.find(startTagCCSInTitle, 0);

const string ccsStr = title.substr(start, end - start);
if (start == string::npos)
return title; // not found

try
{
return boost::lexical_cast<double>(ccsStr);
return title.substr(0, start - 2) + title.substr(FindEndTag(title, start));
}
catch (...)
{
return 0;
return title;
}
}

Expand All @@ -406,7 +420,6 @@ class SpectrumList_MGFImpl : public SpectrumList_MGF
double secondTime = 0;
for(int format_idx = 0; format_idx < 2; format_idx++)
{

size_t position = 0;
firstTime = getTime(title, startTags[format_idx],
endTags[format_idx], position);
Expand Down Expand Up @@ -463,7 +476,6 @@ class SpectrumList_MGFImpl : public SpectrumList_MGF
size_t lineCount = 0;
bool inBeginIons = false;
vector<SpectrumIdentity>::iterator curIdentityItr;
map<string, size_t>::iterator curIdToIndexItr;

while (std::getline(*is_, lineStr)) // need accurate line length, so do not use pwiz::util convenience wrapper
{
Expand All @@ -481,7 +493,7 @@ class SpectrumList_MGFImpl : public SpectrumList_MGF
curIdentityItr->index = index_.size()-1;
curIdentityItr->id = "index=" + lexical_cast<string>(index_.size()-1);
curIdentityItr->sourceFilePosition = size_t(is_->tellg())-lineStr.length()-1;
curIdToIndexItr = idToIndex_.insert(pair<string, size_t>(curIdentityItr->id, index_.size()-1)).first;
idToIndex_.insert(pair<string, size_t>(curIdentityItr->id, index_.size() - 1)).first;
inBeginIons = true;
}
else if (lineStr.find("TITLE=") == 0)
Expand Down

0 comments on commit 3cb5ae3

Please sign in to comment.