Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Task/WC-120: Datacite operations and pipeline integration #1037

Open
wants to merge 2 commits into
base: task/digital-rocks
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
import datetime
from typing import Optional
import json
import requests
import networkx as nx
from django.conf import settings


def get_datacite_json(pub_graph: nx.DiGraph):
"""
Generate datacite payload for a publishable entity. `pub_graph` is the output of
either `get_publication_subtree` or `get_publication_full_tree`.
"""

datacite_json = {}

base_meta_node = "NODE_ROOT"

base_meta = pub_graph.nodes[base_meta_node]["value"]

author_attr = []
institutions = []
for author in base_meta.get("authors", []):
author_attr.append(
{
"nameType": "Personal",
"givenName": author.get("first_name", ""),
"familyName": author.get("last_name", ""),
}
)
institutions.append(author.get("inst", ""))

datacite_json["contributors"] = [
{
"contributorType": "HostingInstitution",
"nameType": "Organizational",
"name": institution,
}
for institution in list(set(institutions))
]
datacite_json["creators"] = author_attr
datacite_json["titles"] = [{"title": base_meta["title"]}]

datacite_json["publisher"] = "Digital Rocks Portal"

datacite_json["publicationYear"] = datetime.datetime.now().year

project_id = base_meta["projectId"]
datacite_url = f"{settings.PORTAL_PUBLICATION_DATACITE_URL_PREFIX}/{project_id}"

datacite_json["url"] = datacite_url
datacite_json["prefix"] = settings.PORTAL_PUBLICATION_DATACITE_SHOULDER

return datacite_json


def upsert_datacite_json(datacite_json: dict, doi: Optional[str] = None):
"""
Create a draft DOI in datacite with the specified metadata. If a DOI is specified,
the metadata for that DOI is updated instead.
"""
if doi:
datacite_json.pop("publicationYear", None)

datacite_payload = {
"data": {
"type": "dois",
"relationships": {
"client": {"data": {"type": "clients", "id": "tdl.tacc"}}
},
"attributes": datacite_json,
}
}
if not doi:
res = requests.post(
f"{settings.DATACITE_URL.strip('/')}/dois",
auth=(settings.DATACITE_USER, settings.DATACITE_PASS),
data=json.dumps(datacite_payload),
headers={"Content-Type": "application/vnd.api+json"},
timeout=30,
)
else:
res = requests.put(
f"{settings.DATACITE_URL.strip('/')}/dois/{doi}",
auth=(settings.DATACITE_USER, settings.DATACITE_PASS),
data=json.dumps(datacite_payload),
headers={"Content-Type": "application/vnd.api+json"},
timeout=30,
)

return res.json()


def publish_datacite_doi(doi: str):
"""
Set a DOI's status to `Findable` in Datacite.
"""
payload = {"data": {"type": "dois", "attributes": {"event": "publish"}}}

res = requests.put(
f"{settings.DATACITE_URL.strip('/')}/dois/{doi}",
auth=(settings.DATACITE_USER, settings.DATACITE_PASS),
data=json.dumps(payload),
headers={"Content-Type": "application/vnd.api+json"},
timeout=30,
)
return res.json()


def hide_datacite_doi(doi: str):
"""
Remove a Datacite DOI from public consumption.
"""
payload = {"data": {"type": "dois", "attributes": {"event": "hide"}}}

res = requests.put(
f"{settings.DATACITE_URL.strip('/')}/dois/{doi}",
auth=(settings.DATACITE_USER, settings.DATACITE_PASS),
data=json.dumps(payload),
headers={"Content-Type": "application/vnd.api+json"},
timeout=30,
)
return res.json()


def get_doi_publication_date(doi: str) -> str:
"""Look up the publication date for a DOI"""
res = requests.get(f"{settings.DATACITE_URL.strip('/')}/dois/{doi}", timeout=30)
res.raise_for_status()
return res.json()["data"]["attributes"]["created"]
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from portal.apps._custom.drp import constants
from portal.libs.agave.utils import user_account, service_account
from portal.apps.publications.models import Publication, PublicationRequest
from portal.apps.projects.workspace_operations.datacite_operations import get_datacite_json, upsert_datacite_json, publish_datacite_doi
from django.db import transaction
from portal.apps.projects.workspace_operations.graph_operations import remove_trash_nodes
from portal.apps.search.tasks import index_publication
Expand Down Expand Up @@ -118,7 +119,12 @@ def publish_project(self, project_id: str, version: Optional[int] = 1):
value=nx.node_link_data(publication_tree),
)

doi = 'test_doi' # Replace with actual DOI retrieval logic
# Mint a DataCite DOI
existing_doi = source_project.value.get("doi", None)

datacite_json = get_datacite_json(publication_tree)
datacite_resp = upsert_datacite_json(datacite_json, doi=existing_doi)
doi = datacite_resp["data"]["id"]

# Update project metadata with datacite doi
source_project_id = f'{settings.PORTAL_PROJECTS_SYSTEM_PREFIX}.{project_id}'
Expand All @@ -140,6 +146,9 @@ def publish_project(self, project_id: str, version: Optional[int] = 1):
defaults={"value": published_project.value, "tree": nx.node_link_data(pub_tree), "version": version},
)

if not settings.DEBUG:
publish_datacite_doi(doi)

index_publication(project_id)

# transfer files
Expand Down
15 changes: 15 additions & 0 deletions server/portal/settings/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -592,6 +592,21 @@
PORTAL_PUBLICATION_REVIEWERS_GROUP_NAME = settings_custom.\
_PORTAL_PUBLICATION_REVIEWERS_GROUP_NAME

PORTAL_PUBLICATION_DATACITE_SHOULDER = settings_custom.\
_PORTAL_PUBLICATION_DATACITE_SHOULDER

PORTAL_PUBLICATION_DATACITE_URL_PREFIX = settings_custom.\
_PORTAL_PUBLICATION_DATACITE_URL_PREFIX

DATACITE_URL = settings_custom.\
_DATACITE_URL

DATACITE_USER = settings_secret.\
_DATACITE_USER

DATACITE_PASS = settings_secret.\
_DATACITE_PASS

PORTAL_PROJECTS_PRIVATE_KEY = settings_secret.\
_PORTAL_PROJECTS_PRIVATE_KEY

Expand Down
5 changes: 5 additions & 0 deletions server/portal/settings/settings_default.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,11 @@

_PORTAL_PUBLICATION_REVIEWERS_GROUP_NAME = 'PROJECT_REVIEWER'

# Datacite
_PORTAL_PUBLICATION_DATACITE_SHOULDER = "10.80023"
_PORTAL_PUBLICATION_DATACITE_URL_PREFIX = "https://cep.test/data/tapis/projects/drp.project.published.test"
_DATACITE_URL = "https://api.test.datacite.org/"

########################
# Custom Portal Template Assets
# Asset path root is static files output dir.
Expand Down
3 changes: 3 additions & 0 deletions server/portal/settings/settings_secret.example.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,3 +116,6 @@
"directory": "external-resources"
}
}

_DATACITE_USER = "tdl.tacc"
_DATACITE_PASS = "CHANGEME"
Loading