Skip to content

Commit

Permalink
updating to reflect new STRING formats and SGD id,
Browse files Browse the repository at this point in the history
adding robokop baseline to yeast graph,
moving strict norm flag to yeast sources to keep baseline normalized strictly
  • Loading branch information
EvanDietzMorris committed May 30, 2023
1 parent b3f326f commit 754249e
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 17 deletions.
17 changes: 9 additions & 8 deletions Common/data_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,21 @@
PHAROS = 'PHAROS'
PLANT_GOA = 'PlantGOA'
SCENT = 'Scent'
STRING_DB = 'STRING-DB'
SGD = 'SGD'
HUMAN_STRING = 'STRING-DB-Human'
TEXT_MINING_KP = 'textminingkp'
UBERGRAPH = 'Ubergraph'
UBERGRAPH_REDUNDANT = 'UbergraphRedundant'
UNIREF = 'UniRef'
VP = 'ViralProteome'
YEASTSGD = 'YeastSGDInfo'
YEAST_HISTONES = "YeastHistoneMapping"
YEAST_COSTANZA = "Costanza2016Data"
YEAST_COSTANZA = 'Costanza2016Data'
YEAST_GSE61888 = 'YeastGSE61888'
YEAST_GASCHDIAMIDE = 'YeastGaschDiamideGeneExpression'
YEAST_STRING_DB = 'Yeast-STRING-DB'
YEAST_STRING = 'STRING-DB-Yeast'

RESOURCE_HOGS = [GTEX, GWAS_CATALOG, UNIREF, ONTOLOGICAL_HIERARCHY, UBERGRAPH, UBERGRAPH_REDUNDANT, YEASTSGD, STRING_DB, CAM_KP]
RESOURCE_HOGS = [GTEX, GWAS_CATALOG, UNIREF, ONTOLOGICAL_HIERARCHY, UBERGRAPH_REDUNDANT,
SGD, HUMAN_STRING, CAM_KP]

SOURCE_DATA_LOADER_CLASS_IMPORTS = {
BIOLINK: ("parsers.biolink.src.loadBL", "BLLoader"),
Expand All @@ -53,24 +54,24 @@
HGNC: ("parsers.hgnc.src.loadHGNC", "HGNCLoader"),
HMDB: ("parsers.hmdb.src.loadHMDB", "HMDBLoader"),
HUMAN_GOA: ("parsers.GOA.src.loadGOA", "HumanGOALoader"),
HUMAN_STRING: ("parsers.STRING.src.loadSTRINGDB", "HumanSTRINGDBLoader"),
INTACT: ("parsers.IntAct.src.loadIA", "IALoader"),
MONDO_PROPS: ("parsers.MONDOProperties.src.loadMP", "MPLoader"),
ONTOLOGICAL_HIERARCHY: ("parsers.UberGraph.src.loadUG", "OHLoader"),
PANTHER: ("parsers.panther.src.loadPanther", "PLoader"),
PHAROS: ("parsers.PHAROS.src.loadPHAROS", "PHAROSLoader"),
PLANT_GOA: ("parsers.GOA.src.loadGOA", "PlantGOALoader"),
SCENT: ("parsers.scent.src.loadScent", "ScentLoader"),
STRING_DB: ("parsers.STRING.src.loadSTRINGDB", "STRINGDBLoader"),
SGD: ("parsers.SGD.src.loadSGD", "SGDLoader"),
TEXT_MINING_KP: ("parsers.textminingkp.src.loadTMKP", "TMKPLoader"),
UBERGRAPH: ("parsers.UberGraph.src.loadUG", "UGLoader"),
UBERGRAPH_REDUNDANT: ("parsers.UberGraph.src.loadUG", "UGRedundantLoader"),
UNIREF: ("parsers.ViralProteome.src.loadUniRef", "UniRefSimLoader"),
VP: ("parsers.ViralProteome.src.loadVP", "VPLoader"),
YEASTSGD: ("parsers.yeast.src.loadYeastSGDInfo", "YeastSGDLoader"),
YEAST_HISTONES: ("parsers.yeast.src.loadHistoneMap", "YeastHistoneMapLoader"),
YEAST_COSTANZA: ("parsers.yeast.src.loadCostanza2016", "Costanza2016Loader"),
YEAST_GASCHDIAMIDE: ("parsers.yeast.src.loadYeastGeneExpressionGasch.py", "YeastGaschDiamideLoader"),
YEAST_STRING_DB: ("parsers.yeast.src.loadYeastGSTRINGDB.py", "STRINGDBLoader")
YEAST_STRING: ("parsers.STRING.src.loadSTRINGDB", "YeastSTRINGDBLoader")
}


Expand Down
4 changes: 2 additions & 2 deletions graph_specs/default-graph-spec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ graphs:
graph_description: 'ROBOKOP (KG) is an open-source biomedical KG that supports the ROBOKOP application. This is the baseline version of that graph, which does not include knowledge sources with specific genetic variants.'
graph_url: http://robokopkg.renci.org/browser/
conflation: True
output_format: jsonl
output_format: neo4j
sources:
- source_id: Biolink
# optional parameters for each data source - see README for more info
Expand All @@ -39,7 +39,7 @@ graphs:
- source_id: PANTHER
- source_id: PHAROS
- source_id: textminingkp
- source_id: STRING-DB
- source_id: STRING-DB-Human
- source_id: Ubergraph

- graph_id: RobokopKG
Expand Down
61 changes: 54 additions & 7 deletions graph_specs/yeast-graph-spec.yml
Original file line number Diff line number Diff line change
@@ -1,15 +1,62 @@
# yeast graph spec
graphs:
- graph_id: Yeast_Test_Graph
conflation: False
strict_normalization: False

- graph_id: Baseline
# optional parameters to be applied to the entire graph - see README for more info
# node_normalization_version: latest
# edge_normalization_version: latest
# graph_name: Robokop Baseline
# graph_description: 'The baseline graph from which RobokopKG and other graphs are built.'
# conflation: True # (whether to conflate node types like Genes and Proteins)
graph_name: ROBOKOP Baseline
graph_description: 'ROBOKOP (KG) is an open-source biomedical KG that supports the ROBOKOP application. This is the baseline version of that graph, which does not include knowledge sources with specific genetic variants.'
graph_url: http://robokopkg.renci.org/browser/
conflation: True
output_format: neo4j
sources:
- source_id: YeastSGDInfo
- source_id: Biolink
# optional parameters for each data source - see README for more info
# source_version: latest (the version of the source data)
# parsing_version: latest (the version of the parser used to parse the data)
# node_normalization_version: latest
# edge_normalization_version: latest
# conflation: False
# strict_normalization: True (whether or not data should be discarded when it can not be normalized)
# merge_strategy: default (used to specify alternative merge strategies)
- source_id: CHEBIProps
- source_id: CTD
- source_id: DrugCentral
- source_id: GtoPdb
- source_id: Hetio
- source_id: HGNC
- source_id: HMDB
- source_id: HumanGOA
- source_id: IntAct
- source_id: MONDOProps
source_version: '3_23_2023'
- source_id: PANTHER
- source_id: PHAROS
- source_id: textminingkp
- source_id: STRING-DB-Human
- source_id: Ubergraph

- graph_id: YobokopKG
conflation: True
output_format: neo4j
subgraphs:
- graph_id: Baseline
sources:
- source_id: SGD
strict_normalization: False
- source_id: YeastHistoneMapping
strict_normalization: False
- source_id: Costanza2016Data
strict_normalization: False
- source_id: YeastGSE61888
strict_normalization: False
- source_id: YeastGaschDiamideGeneExpression
- source_id: Yeast-STRING-DB
# - source_id: GenomeAllianceOrthologs
strict_normalization: False
- source_id: STRING-DB-Yeast
- source_id: GenomeAllianceOrthologs
- source_id: OntologicalHierarchy
merge_strategy: connected_edge_subset
merge_strategy: connected_edge_subset

0 comments on commit 754249e

Please sign in to comment.