Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/develop' into land_upgrade_hr4
Browse files Browse the repository at this point in the history
  • Loading branch information
jkbk2004 committed Jul 20, 2024
2 parents 5944e94 + c127601 commit 7485a19
Show file tree
Hide file tree
Showing 77 changed files with 3,234 additions and 2,319 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,8 @@ tests/fv3_conf/compile_qsub.IN
tests/fv3_conf/fv3_slurm.IN
tests/fv3_conf/fv3_qsub.IN
build*.log*
rocoto_workflow*
fail_compile_*
fail_test_*
tests/run_dir
tests/logs/log_*
2 changes: 1 addition & 1 deletion doc/UsersGuide/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ alabaster==0.7.16
# via sphinx
babel==2.14.0
# via sphinx
certifi==2024.2.2
certifi==2024.7.4
# via requests
charset-normalizer==3.3.2
# via requests
Expand Down
33 changes: 33 additions & 0 deletions modulefiles/ufs_frontera.intel.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
help([[
loads UFS Model prerequisites for Frontera/Intel
]])

prepend_path("MODULEPATH", "/work2/06146/tg854455/frontera/spack-stack/modulefiles")
load("ecflow/5.8.4")

prepend_path("MODULEPATH", "/work2/01118/tg803972/frontera/spack-stack/spack-stack-1.6.0/envs/unified-env/install/modulefiles/Core")

stack_intel_ver=os.getenv("stack_intel_ver") or "19.1.1.217"
load(pathJoin("stack-intel", stack_intel_ver))

stack_impi_ver=os.getenv("stack_impi_ver") or "2020.4.304"
load(pathJoin("stack-intel-mpi", stack_impi_ver))

cmake_ver=os.getenv("cmake_ver") or "3.24.2"
load(pathJoin("cmake", cmake_ver))
--load("cmake/3.24.2")

load("ufs_common")

stack_python_ver=os.getenv("stack_python_ver") or "3.10.13"
load(pathJoin("stack-python", stack_python_ver))

nccmp_ver=os.getenv("nccmp_ver") or "1.9.0.1"
load(pathJoin("nccmp", nccmp_ver))

setenv("CC", "mpiicc")
setenv("CXX", "mpiicpc")
setenv("FC", "mpiifort")
setenv("CMAKE_Platform", "frontera.intel")

whatis("Description: UFS build environment")
2 changes: 1 addition & 1 deletion tests/bl_date.conf
Original file line number Diff line number Diff line change
@@ -1 +1 @@
export BL_DATE=20240614
export BL_DATE=20240718
2 changes: 1 addition & 1 deletion tests/ci/Jenkinsfile.combined
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ def generateStage(nodeLabel) {
GIT_OWNER=$(echo $GIT_URL | cut -d '/' -f4)
GIT_REPO_NAME=$(echo $GIT_URL | cut -d '/' -f5 | cut -d '.' -f1)

curl --silent -X DELETE -H "Accept: application/vnd.github.v3+json" -H "Authorization: Bearer ${GITHUB_TOKEN}" https://api.github.com/repos/${GIT_OWNER}/${GIT_REPO_NAME}/issues/${CHANGE_ID}/labels -d '{"labels":["$machine-.*RT|$machine-.*BL"]}'
curl --silent -X DELETE -H "Accept: application/vnd.github.v3+json" -H "Authorization: Bearer ${GITHUB_TOKEN}" https://api.github.com/repos/${GIT_OWNER}/${GIT_REPO_NAME}/issues/${CHANGE_ID}/labels/{$machine-RT,$machine-BL}
'''
s3Upload consoleLogLevel: 'INFO', dontSetBuildResultOnFailure: false, dontWaitForConcurrentBuildCompletion: false, entries: [[bucket: 'noaa-epic-prod-jenkins-artifacts', excludedFile: '', flatten: true, gzipFiles: false, keepForever: false, managedArtifacts: true, noUploadOnFailure: false, selectedRegion: 'us-east-1', showDirectlyInBrowser: false, sourceFile: "**/*tgz*", storageClass: 'STANDARD', uploadFromSlave: false, useServerSideEncryption: false]], pluginFailureResultConstraint: 'FAILURE', profileName: 'main', userMetadata: []
currentBuild.result = 'FAILURE'
Expand Down
107 changes: 107 additions & 0 deletions tests/ci/Jenkinsfile.ort
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
pipeline {
agent none
stages {
stage('Run ORTs') {
agent {
label 'built-in'
}
steps {
script {
for (label in pullRequest.labels) {
if ((label.matches("orion"))) {
env.CHOICE_NODE='orion'
}
else if ((label.matches("hera"))) {
env.CHOICE_NODE='hera'
}
else if ((label.matches("hercules"))) {
env.CHOICE_NODE='hercules'
}
else if ((label.matches("jet"))) {
env.CHOICE_NODE='jet'
}
else {
env.CHOICE_NODE='none'
}
}
// Why do I need another if..block, because it just works this way.

if (CHOICE_NODE == 'orion') {
echo "Starting up orion ${CHOICE_NODE}...this might take 5-10 minutes...please be patient."

}
else if (CHOICE_NODE == 'jet') {
echo "Starting up jet ${CHOICE_NODE}...this might take 5-10 minutes...please be patient."
}
else if (CHOICE_NODE == 'hercules') {
echo "Starting up hera ${CHOICE_NODE}...this might take 5-10 minutes...please be patient."
}
else if (CHOICE_NODE == 'hera') {
echo "Starting up hera ${CHOICE_NODE}...this might take 5-10 minutes...please be patient."
}
else {
echo "${CHOICE_NODE} is NOT a platform, moving on..."
}
}
}
}
stage('Run ORT on Hera') {
agent {
label "hera"
}
environment {
ACCNR = 'epic'
NODE_PATH = '/scratch2/NAGAPE/epic/role.epic/'
}
steps {

cleanWs()
checkout scm
sh '''
git submodule update --init --recursive
cd tests/fv3_conf
sed 's/#SBATCH --time=.*/#SBATCH --time=02:00:00/g' -i fv3_slurm.IN_hera
cd ..
export machine=${NODE_NAME}
export PATH=$PATH:~/bin
echo $CHANGE_ID
export SSH_ORIGIN=$(curl --silent https://api.github.com/repos/ufs-community/ufs-weather-model/pulls/$CHANGE_ID | jq -r '.head.repo.ssh_url')
export FORK_BRANCH=$(curl --silent https://api.github.com/repos/ufs-community/ufs-weather-model/pulls/$CHANGE_ID | jq -r '.head.ref')
pwd
sed "s|intel|gnu|g" -i opnReqTest
export ACCNR=epic
./opnReqTest -n regional_control -a ${ACCNR} -c bit,dcp,thr
cd logs/
cp OpnReqTests_regional_control_hera.log /scratch2/NAGAPE/epic/role.epic/jenkins/workspace
cd ..
./opnReqTest -n cpld_control_nowave_noaero_p8 -a ${ACCNR} -c dbg,rst
cd logs/
cp OpnReqTests_cpld_control_nowave_noaero_p8_hera.log /scratch2/NAGAPE/epic/role.epic/jenkins/workspace
cd ..
./opnReqTest -n control_p8 -a ${ACCNR} -c std,dbg,bit,mpi,rst,thr,dcp
cd logs/
cp OpnReqTests_control_p8_hera.log /scratch2/NAGAPE/epic/role.epic/jenkins/workspace
git remote -v
git fetch --no-recurse-submodules origin
git reset FETCH_HEAD --hard
cd .. && cd .. && cd ..
cp OpnReqTests_control_p8_hera.log $WORKSPACE/tests/logs/
cp OpnReqTests_regional_control_hera.log $WORKSPACE/tests/logs/
cp OpnReqTests_cpld_control_nowave_noaero_p8_hera.log $WORKSPACE/tests/logs/
cd $WORKSPACE/tests/
git config user.email "ecc.platform@noaa.gov"
git config user.name "epic-cicd-jenkins"
echo "Testing concluded...removing labels for $machine from $GIT_URL"

export machine_name_logs=$(echo $machine | awk '{ print tolower($1) }')
git remote -v | grep -w sshorigin > /dev/null 2>&1 && git remote remove sshorigin > /dev/null 2>&1
git remote add sshorigin $SSH_ORIGIN > /dev/null 2>&1
git add logs/OpnReqTests_control_p8_hera.log logs/OpnReqTests_regional_control_hera.log logs/OpnReqTests_cpld_control_nowave_noaero_p8_hera.log
git commit -m "ORT Jobs Completed.\n\n\n on-behalf-of @ufs-community <ecc.platform@noaa.gov>"
git pull sshorigin $FORK_BRANCH
git push sshorigin HEAD:$FORK_BRANCH
'''
}
}
}
}
2 changes: 2 additions & 0 deletions tests/default_vars.sh
Original file line number Diff line number Diff line change
Expand Up @@ -624,6 +624,8 @@ export DO_UGWP_V1_W_GSLDRAG=.false.
export DO_UGWP_V0_OROG_ONLY=.false.
export DO_GSL_DRAG_LS_BL=.false.
export DO_GSL_DRAG_SS=.true.
export DO_GWD_OPT_PSL=.false.
export PSL_GWD_DX_FACTOR=6.0
export DO_GSL_DRAG_TOFD=.false.
export DO_UGWP_V1=.false.
export DO_UGWP_V1_OROG_ONLY=.false.
Expand Down
3 changes: 3 additions & 0 deletions tests/detect_machine.sh
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ case $(hostname -f) in

login[1-4].stampede2.tacc.utexas.edu) MACHINE_ID=stampede ;; ### stampede1-4

login[1-4].frontera.tacc.utexas.edu) MACHINE_ID=frontera ;; ### frontera1-4
c*.frontera.tacc.utexas.edu) MACHINE_ID=frontera ;; ### frontera compute

login0[1-2].expanse.sdsc.edu) MACHINE_ID=expanse ;; ### expanse1-2

discover3[1-5].prv.cube) MACHINE_ID=discover ;; ### discover31-35
Expand Down
27 changes: 27 additions & 0 deletions tests/error-test.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# This file is an alternative to rt.conf that tests whether the regression test system rt.sh can detect failure conditions.
#
# ./rt.sh [options] -l error-test.conf
#
# If the rt.sh detects errors correctly, the workflow shouldn't finish. Some jobs should be failed or not submitted, and some should succeed.
# See details below.

# This should succeed
COMPILE | atm_dyn32 | intel | -DAPP=ATM -DCCPP_SUITES=FV3_GFS_v16,FV3_GFS_v16_flake,FV3_GFS_v17_p8,FV3_GFS_v17_p8_rrtmgp,FV3_GFS_v15_thompson_mynn_lam3km,FV3_WoFS_v0,FV3_GFS_v17_p8_mynn,FV3_GFS_v17_p8_ugwpv1 -D32BIT=ON | | fv3 |

# This should succeed
RUN | control_c48.v2.sfc | | baseline |

# These tests should always fail, and prevent the workflow from completing.
RUN | fail_to_copy | | baseline |
RUN | fail_to_run | | baseline |

# Using 64-bit dynamics ensures results change, but the test runs. The workflow jobs should complete
# for the COMPILE and RUN, but the results should change.
COMPILE | atm_dyn64 | intel | -DAPP=ATM -DCCPP_SUITES=FV3_GFS_v16,FV3_GFS_v16_flake,FV3_GFS_v17_p8,FV3_GFS_v17_p8_rrtmgp,FV3_GFS_v15_thompson_mynn_lam3km,FV3_WoFS_v0,FV3_GFS_v17_p8_mynn,FV3_GFS_v17_p8_ugwpv1 | | fv3 |
RUN | control_c48 | | baseline |

# This compile job should fail, and prevent the workflow from completing.
COMPILE | fail_to_compile | intel | --invalid-argument -DAPP=ATM -DCCPP_SUITES=whatever | | fv3 |

# This test should not be submitted, because its compile job has failed.
RUN | dependency_unmet | | baseline |
2 changes: 1 addition & 1 deletion tests/fv3_conf/control_run.IN
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ if [ $WARM_START = .false. ]; then
elif [ "$V2_SFC_FILE" = "true" ]; then
cp -r @[INPUTDATA_ROOT]/${inputdir}/INPUT_L127_v2_sfc/* ./INPUT/.
else
cp -r @[INPUTDATA_ROOT]/${inputdir}/INPUT_L127/* ./INPUT/.
cp -r @[INPUTDATA_ROOT]/${inputdir}/INPUT_L127_gfsv17/* ./INPUT/.
fi
else
mkdir INPUT RESTART
Expand Down
4 changes: 2 additions & 2 deletions tests/fv3_conf/cpld_control_run.IN
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,8 @@ if [ $IAER = 1011 ]; then
fi

cp @[INPUTDATA_ROOT]/FV3_input_data/ugwp_c384_tau.nc ./ugwp_limb_tau.nc
cp @[INPUTDATA_ROOT]/${FV3_DIR}/INPUT_L127/oro_data_ls* ./INPUT
cp @[INPUTDATA_ROOT]/${FV3_DIR}/INPUT_L127/oro_data_ss* ./INPUT
cp @[INPUTDATA_ROOT]/${FV3_DIR}/INPUT_L127_gfsv17/oro_data_ls* ./INPUT
cp @[INPUTDATA_ROOT]/${FV3_DIR}/INPUT_L127_gfsv17/oro_data_ss* ./INPUT

if [ $IMP_PHYSICS = 8 ]; then
cp @[INPUTDATA_ROOT]/FV3_fix/CCN_ACTIVATE.BIN CCN_ACTIVATE.BIN
Expand Down
7 changes: 7 additions & 0 deletions tests/fv3_conf/fv3_qsub.IN_acorn
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,13 @@ export ESMF_RUNTIME_COMPLIANCECHECK=OFF:depth=4
export ESMF_RUNTIME_PROFILE=ON
export ESMF_RUNTIME_PROFILE_OUTPUT="SUMMARY"

# This "if" block is part of the rt.sh self-tests in error-test.conf. It emulates the model failing to run.
if [ "${JOB_SHOULD_FAIL:-NO}" = WHEN_RUNNING ] ; then
echo "The job should abort now, with exit status 1." 1>&2
echo "If error checking is working, the metascheduler should mark the job as failed." 1>&2
false
fi

mpiexec -n @[TASKS] -ppn @[TPN] -depth @[THRD] ./fv3.exe

echo "Model ended: " `date`
Expand Down
7 changes: 7 additions & 0 deletions tests/fv3_conf/fv3_qsub.IN_derecho
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,13 @@ export MPICH_COLL_OPT_OFF=1
# Avoid job errors because of filesystem synchronization delays
sync && sleep 1

# This "if" block is part of the rt.sh self-tests in error-test.conf. It emulates the model failing to run.
if [ "${JOB_SHOULD_FAIL:-NO}" = WHEN_RUNNING ] ; then
echo "The job should abort now, with exit status 1." 1>&2
echo "If error checking is working, the metascheduler should mark the job as failed." 1>&2
false
fi

mpiexec -n @[UFS_TASKS] -ppn @[PPN] --hostfile $PBS_NODEFILE ./fv3.exe

echo "Model ended: " `date`
Expand Down
7 changes: 7 additions & 0 deletions tests/fv3_conf/fv3_qsub.IN_wcoss2
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,13 @@ export ESMF_RUNTIME_COMPLIANCECHECK=OFF:depth=4
export ESMF_RUNTIME_PROFILE=ON
export ESMF_RUNTIME_PROFILE_OUTPUT="SUMMARY"

# This "if" block is part of the rt.sh self-tests in error-test.conf. It emulates the model failing to run.
if [ "${JOB_SHOULD_FAIL:-NO}" = WHEN_RUNNING ] ; then
echo "The job should abort now, with exit status 1." 1>&2
echo "If error checking is working, the metascheduler should mark the job as failed." 1>&2
false
fi

mpiexec -n @[TASKS] -ppn @[TPN] -depth @[THRD] ./fv3.exe

echo "Model ended: " `date`
Expand Down
8 changes: 8 additions & 0 deletions tests/fv3_conf/fv3_slurm.IN_expanse
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,14 @@ echo "Model started: "`date`
export OMP_STACK_SIZE=512M
export OMP_NUM_THREADS=@[THRD]
export I_MPI_PMI_LIBRARY=/cm/shared/apps/slurm/current/lib64/libpmi.so

# This "if" block is part of the rt.sh self-tests in error-test.conf. It emulates the model failing to run.
if [ "${JOB_SHOULD_FAIL:-NO}" = WHEN_RUNNING ] ; then
echo "The job should abort now, with exit status 1." 1>&2
echo "If error checking is working, the metascheduler should mark the job as failed." 1>&2
false
fi

srun -n @[TASKS] ./fv3.exe

echo "Model ended: " `date`
Expand Down
7 changes: 7 additions & 0 deletions tests/fv3_conf/fv3_slurm.IN_gaea
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,13 @@ export ESMF_RUNTIME_PROFILE_OUTPUT="SUMMARY"
# Avoid job errors because of filesystem synchronization delays
sync && sleep 1

# This "if" block is part of the rt.sh self-tests in error-test.conf. It emulates the model failing to run.
if [ "${JOB_SHOULD_FAIL:-NO}" = WHEN_RUNNING ] ; then
echo "The job should abort now, with exit status 1." 1>&2
echo "If error checking is working, the metascheduler should mark the job as failed." 1>&2
false
fi

srun --label -n @[TASKS] ./fv3.exe

echo "Model ended: " `date`
Expand Down
7 changes: 7 additions & 0 deletions tests/fv3_conf/fv3_slurm.IN_hera
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,13 @@ export PSM_SHAREDCONTEXTS=1
# Avoid job errors because of filesystem synchronization delays
sync && sleep 1

# This "if" block is part of the rt.sh self-tests in error-test.conf. It emulates the model failing to run.
if [ "${JOB_SHOULD_FAIL:-NO}" = WHEN_RUNNING ] ; then
echo "The job should abort now, with exit status 1." 1>&2
echo "If error checking is working, the metascheduler should mark the job as failed." 1>&2
false
fi

# shellcheck disable=SC2102
srun --label -n @[TASKS] ./fv3.exe

Expand Down
7 changes: 7 additions & 0 deletions tests/fv3_conf/fv3_slurm.IN_hercules
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,13 @@ fi
# Avoid job errors because of filesystem synchronization delays
sync && sleep 1

# This "if" block is part of the rt.sh self-tests in error-test.conf. It emulates the model failing to run.
if [ "${JOB_SHOULD_FAIL:-NO}" = WHEN_RUNNING ] ; then
echo "The job should abort now, with exit status 1." 1>&2
echo "If error checking is working, the metascheduler should mark the job as failed." 1>&2
false
fi

srun --label -n @[TASKS] ./fv3.exe

echo "Model ended: " `date`
Expand Down
7 changes: 7 additions & 0 deletions tests/fv3_conf/fv3_slurm.IN_jet
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,13 @@ export ESMF_RUNTIME_PROFILE_OUTPUT="SUMMARY"
# Avoid job errors because of filesystem synchronization delays
sync && sleep 1

# This "if" block is part of the rt.sh self-tests in error-test.conf. It emulates the model failing to run.
if [ "${JOB_SHOULD_FAIL:-NO}" = WHEN_RUNNING ] ; then
echo "The job should abort now, with exit status 1." 1>&2
echo "If error checking is working, the metascheduler should mark the job as failed." 1>&2
false
fi

srun --label -n @[TASKS] --cpus-per-task=@[THRD] ./fv3.exe

echo "Model ended: " `date`
Expand Down
7 changes: 7 additions & 0 deletions tests/fv3_conf/fv3_slurm.IN_noaacloud
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,13 @@ export OMP_NUM_THREADS=1
# Avoid job errors because of filesystem synchronization delays
sync && sleep 1

# This "if" block is part of the rt.sh self-tests in error-test.conf. It emulates the model failing to run.
if [ "${JOB_SHOULD_FAIL:-NO}" = WHEN_RUNNING ] ; then
echo "The job should abort now, with exit status 1." 1>&2
echo "If error checking is working, the metascheduler should mark the job as failed." 1>&2
false
fi

srun --mpi=pmi2 --label -n @[TASKS] ./fv3.exe

echo "Model ended: " `date`
Expand Down
7 changes: 7 additions & 0 deletions tests/fv3_conf/fv3_slurm.IN_orion
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,13 @@ export ESMF_RUNTIME_PROFILE_OUTPUT="SUMMARY"
# Avoid job errors because of filesystem synchronization delays
sync && sleep 1

# This "if" block is part of the rt.sh self-tests in error-test.conf. It emulates the model failing to run.
if [ "${JOB_SHOULD_FAIL:-NO}" = WHEN_RUNNING ] ; then
echo "The job should abort now, with exit status 1." 1>&2
echo "If error checking is working, the metascheduler should mark the job as failed." 1>&2
false
fi

srun --label -n @[TASKS] ./fv3.exe

echo "Model ended: " `date`
Expand Down
7 changes: 7 additions & 0 deletions tests/fv3_conf/fv3_slurm.IN_s4
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,13 @@ export PSM_SHAREDCONTEXTS=1
# Avoid job errors because of filesystem synchronization delays
sync && sleep 1

# This "if" block is part of the rt.sh self-tests in error-test.conf. It emulates the model failing to run.
if [ "${JOB_SHOULD_FAIL:-NO}" = WHEN_RUNNING ] ; then
echo "The job should abort now, with exit status 1." 1>&2
echo "If error checking is working, the metascheduler should mark the job as failed." 1>&2
false
fi

srun --label -n @[TASKS] ./fv3.exe

echo "Model ended: " `date`
Expand Down
Loading

0 comments on commit 7485a19

Please sign in to comment.