From 561383ed4abb53e7294c464887407f5bc1c0d5a7 Mon Sep 17 00:00:00 2001 From: Matt McCormick Date: Sat, 18 Nov 2023 07:36:49 -0500 Subject: [PATCH 1/2] ENH: Transition SourceTarball.bash script for .cid We fetch from a local IPFS gateway to ensure the data can be obtained via IPFS. This also ensures the script will run offline, and it runs much faster. The IPFS gateway will verify CIDs; we do not need to verify them separately. Re: #3760 --- Documentation/Maintenance/Release.md | 5 +++++ Utilities/Maintenance/SourceTarball.bash | 14 +++----------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/Documentation/Maintenance/Release.md b/Documentation/Maintenance/Release.md index 9f5453d4079..fbf442d3fc6 100644 --- a/Documentation/Maintenance/Release.md +++ b/Documentation/Maintenance/Release.md @@ -358,6 +358,9 @@ endings. The `InsightData` tarballs are generated along with the source code tarballs. +Data is fetched from [IPFS]. An IPFS daemon must be running to fetch the data +- [ipfs-desktop] is recommended. + Once the repository has been tagged, we use the following script in the repository to create the tarballs: @@ -873,6 +876,8 @@ excellent packaging. [documentation page]: https://www.itk.org/ITK/help/documentation.html [download page]: https://itk.org/ITK/resources/software.html [GitHub]: https://github.com/InsightSoftwareConsortium/ITK +[IPFS]: https://ipfs.tech/ +[ipfs-desktop]: https://github.com/ipfs/ipfs-desktop/releases [ITKPythonPackage]: https://itkpythonpackage.readthedocs.io/en/latest/index.html [ITK discussion]: https://discourse.itk.org/ [Image.sc Forum]: https://image.sc diff --git a/Utilities/Maintenance/SourceTarball.bash b/Utilities/Maintenance/SourceTarball.bash index e783fe6da77..780933a3134 100755 --- a/Utilities/Maintenance/SourceTarball.bash +++ b/Utilities/Maintenance/SourceTarball.bash @@ -36,27 +36,20 @@ return_pipe_status() { find_data_objects() { git ls-tree --full-tree -r "$1" | - egrep '\.(md5)$' | + egrep '\.(cid)$' | while read mode type obj path; do case "$path" in - *.md5) echo MD5/$(git cat-file blob $obj) ;; + *.cid) echo CID/$(git cat-file blob $obj) ;; *) die "Unknown ExternalData content link: $path" ;; esac done | sort | uniq return_pipe_status } -validate_MD5() { - md5sum=$(md5sum "$1" | sed 's/ .*//') && - if test "$md5sum" != "$2"; then - die "Object MD5/$2 is corrupt: $1" - fi -} - download_object() { algo="$1" ; hash="$2" ; path="$3" mkdir -p $(dirname "$path") && - if curl -L "https://www.itk.org/files/ExternalData/$algo/$hash" -o "$path.tmp$$" 1>&2; then + if curl -L "http://127.0.01:8080/ipfs/$hash" -o "$path.tmp$$" 1>&2; then mv "$path.tmp$$" "$path" else rm -f "$path.tmp$$" @@ -78,7 +71,6 @@ index_data_objects() { download_object "$algo" "$hash" "$path" && file="$path" fi && - validate_$algo "$file" "$hash" && obj=$(git hash-object -t blob -w "$file") && echo "100644 blob $obj $path" || return From 2387097235c0db2b356b21418e3fa661807d240b Mon Sep 17 00:00:00 2001 From: Matt McCormick Date: Sat, 18 Nov 2023 20:10:37 -0500 Subject: [PATCH 2/2] DOC: Release manager data archiving process for 5.4 Closes #3670 --- Documentation/Maintenance/Release.md | 131 +++++++++++++++++++++------ 1 file changed, 103 insertions(+), 28 deletions(-) diff --git a/Documentation/Maintenance/Release.md b/Documentation/Maintenance/Release.md index fbf442d3fc6..b2de49a670e 100644 --- a/Documentation/Maintenance/Release.md +++ b/Documentation/Maintenance/Release.md @@ -202,55 +202,125 @@ Commit the result: Archive ExternalData -------------------- -Set the environmental or CMake variable `ExternalData_OBJECT_STORES` to a -local directory. e.g. +More background on the testing data can be found in the +[Contributing Upload Binary Data][../docs/contributing/upload_binary_data.md) documentation. + +The following steps archive data for release on various resources. Both +[datalad] and [@web3-storage/w3] should be installed locally. And the [kubo] +`ipfs` cli. It is recommended to install and run [ipfs-desktop] and symlink +the `ipfs` cli it comes with into your PATH. + +### Fetch the latest ITKData datalad repository + +Clone the ITKData datalad repository, if not already available. + +```sh + cd ~/data/ + datalad clone https://gin.g-node.org/InsightSoftwareConsortium/ITKData.git + cd ITKData +``` + +Make sure the datalad repository is up-to-date. + +```sh + datalad update -r . + datalad get . +``` + +### Fetch new data locally + +Checkout the tag which we are archiving. + +```sh + cd ~/src/ITK + git checkout +``` + +And fetch new data into the datalad repository. + +```sh + cd ~/data/ITKData + ./ContentLinkSynchronization.sh --create ~/src/ITK +``` + +Upload the tree to archival storage with: + +```sh + w3 put . --no-wrap -n ITKData-pre-verify -H +``` + +Verify and possibly update CID's in the ITK repository with the CID output +from the previous step. ```sh - export ExternalData_OBJECT_STORES=${HOME}/data + ./ContentLinkSynchronization.sh --root-cid bafy ~/src/ITK + datalad status ``` -Pre-populate the store with the contents of the 'InsightData' tarballs from a -previous release. Once the tarball extracted, move the content of its -subfolder called `.ExternalData` in your local `ExternalData_OBJECT_STORES` -directory. +If there is new content, commit it with: -Then, from the ITK build directory, configure ITK enabling the flags: - * `ITK_WRAP_PYTHON` - * `ITK_LEGACY_SILENT` - * `BUILD_TESTING` - * `BUILD_EXAMPLES` +```sh + datalad save -m "ENH: Updates for ITK-v" +``` -If you have previously enabled remote modules using the same ITK source -directory, either verify that they are enabled in your current build, or remove -their source directory that has been added inside ITK source directory -(i.e. `./Modules/Remote/$name_of_remote_module`). +Upload the repository update to web3.storage: -Build the `ITKData` target +```sh + w3 put . --no-wrap -n ITKData-v -H +``` + +Edit the *README.md* file with the new root CID and push. ```sh - make ITKData + datalad save -m "DOC: Update root CID for ITK-v" + datalad push ``` -This will download new testing data since the previous release. +### Pin the CID on locally and on Pinata -Next, archive the data on data.kitware.com. Create a folder, e.g. -`$MAJOR_VERSION.$MINOR_VERSION`, in `ITK/ITKTestingData`, and run +If the [pinata] pinning service is not already available, create it: ```sh - python -m pip install girder-client - python ./Utilities/Maintenance/ArchiveTestingDataOnGirder.py --object-store ${ExternalData_OBJECT_STORES} --parent-id --api-key + ipfs pin remote service add pinata https://api.pinata.cloud/psa/ PINATA_JWT ``` -This script requires the girder-client Python package install from Girder -master, November 2016 or later, (Girder > 2.0.0). +Then pin the root CID locally and on Pinata: + +```sh + ipfs pin add /ipfs/bafy + ipfs pin remote add --service=pinata --name=ITKData-ITK-v /ipfs/bafy +``` -Archive the `InsightData` contents on ITK's file server at Kitware: +### Pin the CID on Kitware's ipfs server + +Optionally, pin to Kitware's ipfs server: + +``` + ssh ipfs + export IPFS_PATH=/data/ipfs + ipfs pin add --progress /ipfs/bafy +``` + +### Rsync the data to Kitware's Apache Server + +Optionally, rsync the object to Kitware's Apache Server ```sh - rsync -vrt ${ExternalData_OBJECT_STORES}/MD5/ kitware@web:ITKExternalData/MD5/ + rsync -vrtL ./Objects/CID kitware@web:ITKExternalData/ ``` -Update the data archive at https://github.com/InsightSoftwareConsortium/ITKTestingData. +### Push the data to GitHub Pages + +Push the data to the [ITKTestingData] `gh-pages` branch. GitHub restricts size +of files. + +``` +rsync -vrtL --max-size=45m ./Objects/CID ~/data/ITKTestingData/ +cd ~/data/ITKTestingData +git add . +git commit -m "ENH: Updates for ITK " +git push +``` Tag the ITK repository ---------------------- @@ -872,6 +942,7 @@ excellent packaging. [Kitware blog]: https://blog.kitware.com/ [blog post]: https://blog.kitware.com/itk-packages-in-linux-distributions/ [Dashboard]: https://open.cdash.org/index.php?project=Insight +[datalad]: https://www.datalad.org/ [community]: https://discourse.itk.org/ [documentation page]: https://www.itk.org/ITK/help/documentation.html [download page]: https://itk.org/ITK/resources/software.html @@ -879,6 +950,7 @@ excellent packaging. [IPFS]: https://ipfs.tech/ [ipfs-desktop]: https://github.com/ipfs/ipfs-desktop/releases [ITKPythonPackage]: https://itkpythonpackage.readthedocs.io/en/latest/index.html +[ITKTestingData]: https://github.com/InsightSoftwareConsortium/ITKTestingData [ITK discussion]: https://discourse.itk.org/ [Image.sc Forum]: https://image.sc [ITK Open Collective page]: https://opencollective.org/itk @@ -886,9 +958,12 @@ excellent packaging. [ITK Software Guide]: https://itk.org/ItkSoftwareGuide.pdf [ITK wiki]: https://itk.org/Wiki/ITK [ITK Sphinx examples]: https://itk.org/ITKExamples/ +[kubo]: https://github.com/ipfs/kubo +[pinata]: https://pinata.cloud [releases page]: https://itk.org/Wiki/ITK/Releases [release schedule]: https://itk.org/Wiki/ITK/Release_Schedule [Software Guide]: https://itk.org/ItkSoftwareGuide.pdf +[@web3-storage/w3]: https://www.npmjs.com/package/@web3-storage/w3 [kitware]: https://www.kitware.com/ [public.kitware.com]: public.kitware.com