From 2c87eafb5c6effc413251858d4189a9a3d64f7e0 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Thu, 12 May 2022 11:12:48 -0700 Subject: [PATCH 01/32] Copy files from prp-osgvo-pilot/image --- osg-htc/nrp-ospool-ep/Dockerfile | 28 ++++++++++ .../scripts/20_advertise_k8s_domain.sh | 29 ++++++++++ osg-htc/nrp-ospool-ep/scripts/entrypoint.sh | 7 +++ .../nrp-ospool-ep/scripts/singularity_npid.sh | 55 +++++++++++++++++++ 4 files changed, 119 insertions(+) create mode 100644 osg-htc/nrp-ospool-ep/Dockerfile create mode 100755 osg-htc/nrp-ospool-ep/scripts/20_advertise_k8s_domain.sh create mode 100755 osg-htc/nrp-ospool-ep/scripts/entrypoint.sh create mode 100755 osg-htc/nrp-ospool-ep/scripts/singularity_npid.sh diff --git a/osg-htc/nrp-ospool-ep/Dockerfile b/osg-htc/nrp-ospool-ep/Dockerfile new file mode 100644 index 00000000..ec088ce7 --- /dev/null +++ b/osg-htc/nrp-ospool-ep/Dockerfile @@ -0,0 +1,28 @@ +FROM opensciencegrid/osgvo-docker-pilot:3.6-release + +# Tweaked singularity for container environments +# Remove default singularity from path, too +COPY scripts/singularity_npid.sh /usr/bin/singularity_npid.sh +RUN mv /usr/bin/singularity /usr/bin/singularity.org && \ + chmod a+x /usr/bin/singularity_npid.sh && \ + ln -s /usr/bin/singularity_npid.sh /usr/bin/singularity + +# We want to be able to add system-level changes +# and run without explicit privilege drop +RUN mv /bin/entrypoint.sh /bin/entrypoint.osg.sh +COPY scripts/entrypoint.sh /bin/entrypoint.sh +RUN chmod a+rx /bin/entrypoint.sh + +COPY scripts/20_advertise_k8s_domain.sh /etc/osg/image-init.d/20_advertise_k8s_domain.sh +# +# In addition to osgvo-docker-pilot envs, +# it accepts also the following env variables +# CONDOR_HOST +# NUM_CPUS MEMORY DISK NUM_GPUS +# PHYSICAL_HOSTNAME +# K8S_NAMESPACE K8S_DOMAIN +# K8S_PROVISIONER_NAME K8S_PROVISIONER_TYPE +# NVIDIA_SYMLINK +# FORCE_K8SNAMESPACE_MATCHING ADDITIONAL_REQUIREMENTS +# + diff --git a/osg-htc/nrp-ospool-ep/scripts/20_advertise_k8s_domain.sh b/osg-htc/nrp-ospool-ep/scripts/20_advertise_k8s_domain.sh new file mode 100755 index 00000000..7cfbb332 --- /dev/null +++ b/osg-htc/nrp-ospool-ep/scripts/20_advertise_k8s_domain.sh @@ -0,0 +1,29 @@ +#!/bin/bash + + +# +# Advertise the k8s namespace and physical hostname +# + +echo "# K8S params" >> "${PILOT_CONFIG_FILE}" + +if [ "x${HOSTNAME}" != "x" ]; then + echo "K8SPodName=\"${HOSTNAME}\"" >> "${PILOT_CONFIG_FILE}" + echo 'STARTD_EXPRS = $(STARTD_EXPRS) K8SPodName' >> "${PILOT_CONFIG_FILE}" +fi + +if [ "x${K8S_DOMAIN}" != "x" ]; then + echo "K8SDomain=\"${K8S_DOMAIN}\"" >> "${PILOT_CONFIG_FILE}" + echo 'STARTD_EXPRS = $(STARTD_EXPRS) K8SDomain' >> "${PILOT_CONFIG_FILE}" +fi + +if [ "x${K8S_NAMESPACE}" != "x" ]; then + echo "K8SNamespace=\"${K8S_NAMESPACE}\"" >> "${PILOT_CONFIG_FILE}" + echo 'STARTD_EXPRS = $(STARTD_EXPRS) K8SNamespace' >> "${PILOT_CONFIG_FILE}" +fi + +if [ "x${PHYSICAL_HOSTNAME}" != "x" ]; then + echo "K8SPhysicalHostName=\"${PHYSICAL_HOSTNAME}\"" >> "${PILOT_CONFIG_FILE}" + echo 'STARTD_EXPRS = $(STARTD_EXPRS) K8SPhysicalHostName' >> "${PILOT_CONFIG_FILE}" +fi + diff --git a/osg-htc/nrp-ospool-ep/scripts/entrypoint.sh b/osg-htc/nrp-ospool-ep/scripts/entrypoint.sh new file mode 100755 index 00000000..412be6ba --- /dev/null +++ b/osg-htc/nrp-ospool-ep/scripts/entrypoint.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +# Allow the derived images to run any additional runtime customizations +for x in /etc/entrypoint/image-config.d/*.sh; do source "$x"; done + +export HOME=/pilot +su osg -p -c "/bin/entrypoint.osg.sh $@" diff --git a/osg-htc/nrp-ospool-ep/scripts/singularity_npid.sh b/osg-htc/nrp-ospool-ep/scripts/singularity_npid.sh new file mode 100755 index 00000000..7d6ca1aa --- /dev/null +++ b/osg-htc/nrp-ospool-ep/scripts/singularity_npid.sh @@ -0,0 +1,55 @@ +#!/bin/bash + +# simple singularity wrapper that doesn't allow the -p or --pid option +# also rewrite -C to remove the -p option +# Disclaimer: Based on +# https://wiki-dev.bash-hackers.org/scripting/posparams +# + +options=() # the buffer array for the parameters +eoo=0 # end of options reached + +while [[ $1 ]] +do + if ! ((eoo)); then + case "$1" in + --pid) + # pretend was not passed + shift + ;; + -p) + # pretend was not passed + shift + ;; + --containall) + options+=("--contain") + options+=("--cleanenv") + options+=("--ipc") + # but not --pid + shift + ;; + -C) + options+=("-c") + options+=("-e") + options+=("-i") + # but not -p + shift + ;; + --) + eoo=1 + options+=("$1") + shift + ;; + *) + options+=("$1") + shift + ;; + esac + else + options+=("$1") + shift + fi +done + +exec /cvmfs/oasis.opensciencegrid.org/mis/singularity/bin/singularity "${options[@]}" + From 2a606c7b54dfbdb58e09bfa3e13ed970b93ac379 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Thu, 12 May 2022 11:30:31 -0700 Subject: [PATCH 02/32] Integrate wn/image/image-config.d/02_additional_requirements.sh with fixed condor config --- .../scripts/22_set_requirements.sh | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100755 osg-htc/nrp-ospool-ep/scripts/22_set_requirements.sh diff --git a/osg-htc/nrp-ospool-ep/scripts/22_set_requirements.sh b/osg-htc/nrp-ospool-ep/scripts/22_set_requirements.sh new file mode 100755 index 00000000..7ed0eca9 --- /dev/null +++ b/osg-htc/nrp-ospool-ep/scripts/22_set_requirements.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +cat >> "${PILOT_CONFIG_FILE}" << EOF +# +# Force matching on K8SNamespace +# unless FORCE_K8SNAMESPACE_MATCHING=="no" +# + +MATCHING_START = ( (FORCE_K8SNAMESPACE_MATCHING=?="no") || regexp(TARGET.RequestK8SNamespace,K8SNamespace) ) + +# +# Force matching on provisioned resources +# + +PROVISIONING_START = ifthenelse(TARGET.RequestCPUs=!=undefined, CPUs=?=TARGET.RequestCPUs, CPUs=?=1) + +# Pretend small memory and disk requests are the equivalent of not set +PROVISIONING_START = \$(PROVISIONING_START) && \\ + ifthenelse(TARGET.RequestMemory=!=undefined, ifthenelse(TARGET.RequestMemory<4096,Memory=?=4096, Memory=?=TARGET.RequestMemory), Memory=?=4096) +PROVISIONING_START = \$(PROVISIONING_START) && \\ + ifthenelse(TARGET.RequestDisk=!=undefined, ifthenelse(TARGET.RequestDisk<8000000,Disk=?=8000000, Disk=?=TARGET.RequestDisk), Disk=?=8000000) + +# GPUs will not be defined if there are no GPUs +PROVISIONING_START = \$(PROVISIONING_START) && \\ + ifthenelse(TARGET.RequestGPUs=!=undefined, \\ + ifthenelse(GPUs=!=undefined, GPUs=?=TARGET.RequestGPUs, TARGET.RequestGPUs=?=0), \\ + (GPUs=?=undefined) || (GPUs=?=0)) + + +START = ( \$(START) ) && ( \$(PROVISIONING_START) ) && ( \$(MATCHING_START) ) + +EOF + + +if [ "x${ADDITIONAL_REQUIREMENTS}" != "x" ]; then + echo "# Additional requirements added at runtime " > "${PILOT_CONFIG_FILE}" + echo "MATCHING_START = ( \$(MATCHING_START) ) && ( ${ADDITIONAL_REQUIREMENTS} )" >> "${PILOT_CONFIG_FILE}" +fi From 2b208d30265bb28b9a41e3e09e2d468eec275cc5 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Thu, 12 May 2022 11:57:31 -0700 Subject: [PATCH 03/32] Add scripts/21_advertise_k8s_provisioner.sh --- osg-htc/nrp-ospool-ep/Dockerfile | 5 ++++- .../scripts/21_advertise_k8s_provisioner.sh | 17 +++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) create mode 100755 osg-htc/nrp-ospool-ep/scripts/21_advertise_k8s_provisioner.sh diff --git a/osg-htc/nrp-ospool-ep/Dockerfile b/osg-htc/nrp-ospool-ep/Dockerfile index ec088ce7..420d1bea 100644 --- a/osg-htc/nrp-ospool-ep/Dockerfile +++ b/osg-htc/nrp-ospool-ep/Dockerfile @@ -14,6 +14,10 @@ COPY scripts/entrypoint.sh /bin/entrypoint.sh RUN chmod a+rx /bin/entrypoint.sh COPY scripts/20_advertise_k8s_domain.sh /etc/osg/image-init.d/20_advertise_k8s_domain.sh +COPY scripts/21_advertise_k8s_provisioner.sh /etc/osg/image-init.d/21_advertise_k8s_provisioner.sh +COPY scripts/22_set_requirements.sh /etc/osg/image-init.d/22_set_requirements.sh + + # # In addition to osgvo-docker-pilot envs, # it accepts also the following env variables @@ -22,7 +26,6 @@ COPY scripts/20_advertise_k8s_domain.sh /etc/osg/image-init.d/20_advertise_k8s_d # PHYSICAL_HOSTNAME # K8S_NAMESPACE K8S_DOMAIN # K8S_PROVISIONER_NAME K8S_PROVISIONER_TYPE -# NVIDIA_SYMLINK # FORCE_K8SNAMESPACE_MATCHING ADDITIONAL_REQUIREMENTS # diff --git a/osg-htc/nrp-ospool-ep/scripts/21_advertise_k8s_provisioner.sh b/osg-htc/nrp-ospool-ep/scripts/21_advertise_k8s_provisioner.sh new file mode 100755 index 00000000..50c9c87a --- /dev/null +++ b/osg-htc/nrp-ospool-ep/scripts/21_advertise_k8s_provisioner.sh @@ -0,0 +1,17 @@ +#!/bin/bash + + +# +# Advertise the k8s provisioner +# + +if [ "x${K8S_PROVISIONER_TYPE}" != "x" ]; then + echo "K8SProvisionerType=\"${K8S_PROVISIONER_TYPE}\"" >> "${PILOT_CONFIG_FILE}" + echo 'STARTD_EXPRS = $(STARTD_EXPRS) K8SProvisionerType' >> "${PILOT_CONFIG_FILE}" +fi + +if [ "x${K8S_PROVISIONER_NAME}" != "x" ]; then + echo "K8SProvisionerName=\"${K8S_PROVISIONER_NAME}\"" >> "${PILOT_CONFIG_FILE}" + echo 'STARTD_EXPRS = $(STARTD_EXPRS) K8SProvisionerName' >> "${PILOT_CONFIG_FILE}" +fi + From 1a434ec6fd5e9543e6e16455f0543c9cac1356b4 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Thu, 12 May 2022 12:38:43 -0700 Subject: [PATCH 04/32] Add FORCE_K8SNAMESPACE_MATCHING --- osg-htc/nrp-ospool-ep/scripts/22_set_requirements.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/osg-htc/nrp-ospool-ep/scripts/22_set_requirements.sh b/osg-htc/nrp-ospool-ep/scripts/22_set_requirements.sh index 7ed0eca9..cf9377ec 100755 --- a/osg-htc/nrp-ospool-ep/scripts/22_set_requirements.sh +++ b/osg-htc/nrp-ospool-ep/scripts/22_set_requirements.sh @@ -6,6 +6,9 @@ cat >> "${PILOT_CONFIG_FILE}" << EOF # unless FORCE_K8SNAMESPACE_MATCHING=="no" # +FORCE_K8SNAMESPACE_MATCHING = \"${FORCE_K8SNAMESPACE_MATCHING:-no}\" +STARTD_EXPRS = \$(STARTD_EXPRS) FORCE_K8SNAMESPACE_MATCHING + MATCHING_START = ( (FORCE_K8SNAMESPACE_MATCHING=?="no") || regexp(TARGET.RequestK8SNamespace,K8SNamespace) ) # @@ -26,7 +29,6 @@ PROVISIONING_START = \$(PROVISIONING_START) && \\ ifthenelse(GPUs=!=undefined, GPUs=?=TARGET.RequestGPUs, TARGET.RequestGPUs=?=0), \\ (GPUs=?=undefined) || (GPUs=?=0)) - START = ( \$(START) ) && ( \$(PROVISIONING_START) ) && ( \$(MATCHING_START) ) EOF From 48843d3ccd7d32a57e820ed26c0ca06cd302cca4 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Thu, 12 May 2022 13:16:34 -0700 Subject: [PATCH 05/32] Add scripts/19_set_resources.sh --- osg-htc/nrp-ospool-ep/Dockerfile | 1 + .../nrp-ospool-ep/scripts/19_set_resources.sh | 35 +++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100755 osg-htc/nrp-ospool-ep/scripts/19_set_resources.sh diff --git a/osg-htc/nrp-ospool-ep/Dockerfile b/osg-htc/nrp-ospool-ep/Dockerfile index 420d1bea..67a087ef 100644 --- a/osg-htc/nrp-ospool-ep/Dockerfile +++ b/osg-htc/nrp-ospool-ep/Dockerfile @@ -13,6 +13,7 @@ RUN mv /bin/entrypoint.sh /bin/entrypoint.osg.sh COPY scripts/entrypoint.sh /bin/entrypoint.sh RUN chmod a+rx /bin/entrypoint.sh +COPY scripts/19_set_resources.sh /etc/osg/image-init.d/19_set_resources.sh COPY scripts/20_advertise_k8s_domain.sh /etc/osg/image-init.d/20_advertise_k8s_domain.sh COPY scripts/21_advertise_k8s_provisioner.sh /etc/osg/image-init.d/21_advertise_k8s_provisioner.sh COPY scripts/22_set_requirements.sh /etc/osg/image-init.d/22_set_requirements.sh diff --git a/osg-htc/nrp-ospool-ep/scripts/19_set_resources.sh b/osg-htc/nrp-ospool-ep/scripts/19_set_resources.sh new file mode 100755 index 00000000..bc8fa56e --- /dev/null +++ b/osg-htc/nrp-ospool-ep/scripts/19_set_resources.sh @@ -0,0 +1,35 @@ +#!/bin/bash + + +# +# Advertise the k8s namespace and physical hostname +# + +# +# NUM_CPUS and MEMORY are also handled by the osgvo-pilot +# but we re-set it here +# + +full_num_cpus="${NUM_CPUS:-1}" +full_memory="${MEMORY:-1024}" +full_disk="${DISK:-100000}" +full_num_gpus="${NUM_GPUS:-0}" + +echo "NUM_CPUS = ${full_num_cpus}" >> "${PILOT_CONFIG_FILE}" +echo "MEMORY = ${full_memory}" >> "${PILOT_CONFIG_FILE}" +echo "DISK = ${full_disk}" >> "${PILOT_CONFIG_FILE}" + +# single slot using all the requested resources +echo "NUM_SLOTS_TYPE_1 = 1" >> "${PILOT_CONFIG_FILE}" +echo "SLOT_TYPE_1_PARTITIONABLE = FALSE" >> "${PILOT_CONFIG_FILE}" + +if [ "x${full_num_gpus}" != "x0" ]; then + # we cannot really set the number of GPUs, just enable auto-detect + echo "use feature : GPUs" >> "${PILOT_CONFIG_FILE}" + echo "SLOT_TYPE_1 = cpu=${full_num_cpus},mem=${full_memory},disk=auto,swap=auto,gpus=${full_num_gpus}" \ + >> "${PILOT_CONFIG_FILE}" +else + echo "SLOT_TYPE_1 = cpu=${full_num_cpus},mem=${full_memory},disk=auto,swap=auto" \ + >> "${PILOT_CONFIG_FILE}" +fi + From 99c17c6ba465cdc9c2ee463483a48cd0c86a2d1a Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Thu, 12 May 2022 13:35:51 -0700 Subject: [PATCH 06/32] Fix typo --- osg-htc/nrp-ospool-ep/scripts/22_set_requirements.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/osg-htc/nrp-ospool-ep/scripts/22_set_requirements.sh b/osg-htc/nrp-ospool-ep/scripts/22_set_requirements.sh index cf9377ec..8b171299 100755 --- a/osg-htc/nrp-ospool-ep/scripts/22_set_requirements.sh +++ b/osg-htc/nrp-ospool-ep/scripts/22_set_requirements.sh @@ -6,7 +6,7 @@ cat >> "${PILOT_CONFIG_FILE}" << EOF # unless FORCE_K8SNAMESPACE_MATCHING=="no" # -FORCE_K8SNAMESPACE_MATCHING = \"${FORCE_K8SNAMESPACE_MATCHING:-no}\" +FORCE_K8SNAMESPACE_MATCHING = "${FORCE_K8SNAMESPACE_MATCHING:-no}" STARTD_EXPRS = \$(STARTD_EXPRS) FORCE_K8SNAMESPACE_MATCHING MATCHING_START = ( (FORCE_K8SNAMESPACE_MATCHING=?="no") || regexp(TARGET.RequestK8SNamespace,K8SNamespace) ) From e6a3206c85ba21d3044250beb94d9317eac62efe Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Fri, 13 May 2022 08:50:53 -0700 Subject: [PATCH 07/32] Add scripts/01_token.sh --- osg-htc/nrp-ospool-ep/Dockerfile | 2 ++ osg-htc/nrp-ospool-ep/scripts/01_token.sh | 8 ++++++++ 2 files changed, 10 insertions(+) create mode 100755 osg-htc/nrp-ospool-ep/scripts/01_token.sh diff --git a/osg-htc/nrp-ospool-ep/Dockerfile b/osg-htc/nrp-ospool-ep/Dockerfile index 67a087ef..78f7f543 100644 --- a/osg-htc/nrp-ospool-ep/Dockerfile +++ b/osg-htc/nrp-ospool-ep/Dockerfile @@ -13,6 +13,8 @@ RUN mv /bin/entrypoint.sh /bin/entrypoint.osg.sh COPY scripts/entrypoint.sh /bin/entrypoint.sh RUN chmod a+rx /bin/entrypoint.sh +COPY scripts/01_token.sh /etc/osg/image-init.d/01_token.sh + COPY scripts/19_set_resources.sh /etc/osg/image-init.d/19_set_resources.sh COPY scripts/20_advertise_k8s_domain.sh /etc/osg/image-init.d/20_advertise_k8s_domain.sh COPY scripts/21_advertise_k8s_provisioner.sh /etc/osg/image-init.d/21_advertise_k8s_provisioner.sh diff --git a/osg-htc/nrp-ospool-ep/scripts/01_token.sh b/osg-htc/nrp-ospool-ep/scripts/01_token.sh new file mode 100755 index 00000000..e2ce3fc0 --- /dev/null +++ b/osg-htc/nrp-ospool-ep/scripts/01_token.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +# +# osgvo-pilot expects the token as an env variable +# + +export TOKEN=`cat /etc/condor/tokens.d/prp-wn.token` + From 044de809fa51e55b16c3891b2db82f2cfa13aa1e Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Fri, 13 May 2022 09:06:12 -0700 Subject: [PATCH 08/32] Fix script location --- osg-htc/nrp-ospool-ep/Dockerfile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/osg-htc/nrp-ospool-ep/Dockerfile b/osg-htc/nrp-ospool-ep/Dockerfile index 78f7f543..e26402db 100644 --- a/osg-htc/nrp-ospool-ep/Dockerfile +++ b/osg-htc/nrp-ospool-ep/Dockerfile @@ -13,8 +13,10 @@ RUN mv /bin/entrypoint.sh /bin/entrypoint.osg.sh COPY scripts/entrypoint.sh /bin/entrypoint.sh RUN chmod a+rx /bin/entrypoint.sh -COPY scripts/01_token.sh /etc/osg/image-init.d/01_token.sh +# system level scripts +COPY scripts/01_token.sh /etc/entrypoint/image-config.d/01_token.sh +# pilot level scripts COPY scripts/19_set_resources.sh /etc/osg/image-init.d/19_set_resources.sh COPY scripts/20_advertise_k8s_domain.sh /etc/osg/image-init.d/20_advertise_k8s_domain.sh COPY scripts/21_advertise_k8s_provisioner.sh /etc/osg/image-init.d/21_advertise_k8s_provisioner.sh From f67e7258bcef1015d0ec20eb84e29aa9ee558b85 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Fri, 13 May 2022 12:28:06 -0700 Subject: [PATCH 09/32] Fix typo in 22_set_requirements.sh --- osg-htc/nrp-ospool-ep/scripts/22_set_requirements.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/osg-htc/nrp-ospool-ep/scripts/22_set_requirements.sh b/osg-htc/nrp-ospool-ep/scripts/22_set_requirements.sh index 8b171299..4f6b0708 100755 --- a/osg-htc/nrp-ospool-ep/scripts/22_set_requirements.sh +++ b/osg-htc/nrp-ospool-ep/scripts/22_set_requirements.sh @@ -35,6 +35,6 @@ EOF if [ "x${ADDITIONAL_REQUIREMENTS}" != "x" ]; then - echo "# Additional requirements added at runtime " > "${PILOT_CONFIG_FILE}" + echo "# Additional requirements added at runtime " >> "${PILOT_CONFIG_FILE}" echo "MATCHING_START = ( \$(MATCHING_START) ) && ( ${ADDITIONAL_REQUIREMENTS} )" >> "${PILOT_CONFIG_FILE}" fi From 8b0708922f8d856d62fb35485ac3d610c6f8f877 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Fri, 13 May 2022 13:52:10 -0700 Subject: [PATCH 10/32] Add scripts/01_no_condor_host.sh --- osg-htc/nrp-ospool-ep/Dockerfile | 2 +- osg-htc/nrp-ospool-ep/scripts/01_no_condor_host.sh | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) create mode 100755 osg-htc/nrp-ospool-ep/scripts/01_no_condor_host.sh diff --git a/osg-htc/nrp-ospool-ep/Dockerfile b/osg-htc/nrp-ospool-ep/Dockerfile index e26402db..3a70649a 100644 --- a/osg-htc/nrp-ospool-ep/Dockerfile +++ b/osg-htc/nrp-ospool-ep/Dockerfile @@ -15,6 +15,7 @@ RUN chmod a+rx /bin/entrypoint.sh # system level scripts COPY scripts/01_token.sh /etc/entrypoint/image-config.d/01_token.sh +COPY scripts/01_no_condor_host.sh /etc/entrypoint/image-config.d/01_no_condor_host.sh # pilot level scripts COPY scripts/19_set_resources.sh /etc/osg/image-init.d/19_set_resources.sh @@ -26,7 +27,6 @@ COPY scripts/22_set_requirements.sh /etc/osg/image-init.d/22_set_requirements.sh # # In addition to osgvo-docker-pilot envs, # it accepts also the following env variables -# CONDOR_HOST # NUM_CPUS MEMORY DISK NUM_GPUS # PHYSICAL_HOSTNAME # K8S_NAMESPACE K8S_DOMAIN diff --git a/osg-htc/nrp-ospool-ep/scripts/01_no_condor_host.sh b/osg-htc/nrp-ospool-ep/scripts/01_no_condor_host.sh new file mode 100755 index 00000000..f366c6a0 --- /dev/null +++ b/osg-htc/nrp-ospool-ep/scripts/01_no_condor_host.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +# +# osgvo pilot does not like is CONDOR_HOST is set +# + +export CONDOR_HOST= +unset CONDOR_HOST + From dc73c235396a52acd013a46b740d3c5e6181e63e Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Tue, 17 May 2022 16:04:00 -0700 Subject: [PATCH 11/32] Add scripts/check_master.sh --- osg-htc/nrp-ospool-ep/Dockerfile | 9 +++ osg-htc/nrp-ospool-ep/scripts/check_master.sh | 69 +++++++++++++++++++ osg-htc/nrp-ospool-ep/scripts/entrypoint.sh | 16 ++++- 3 files changed, 93 insertions(+), 1 deletion(-) create mode 100755 osg-htc/nrp-ospool-ep/scripts/check_master.sh diff --git a/osg-htc/nrp-ospool-ep/Dockerfile b/osg-htc/nrp-ospool-ep/Dockerfile index 3a70649a..d57432a3 100644 --- a/osg-htc/nrp-ospool-ep/Dockerfile +++ b/osg-htc/nrp-ospool-ep/Dockerfile @@ -13,6 +13,9 @@ RUN mv /bin/entrypoint.sh /bin/entrypoint.osg.sh COPY scripts/entrypoint.sh /bin/entrypoint.sh RUN chmod a+rx /bin/entrypoint.sh +COPY scripts/check_master.sh /bin/check_master.sh +RUN chmod a+rx /bin/check_master.sh + # system level scripts COPY scripts/01_token.sh /etc/entrypoint/image-config.d/01_token.sh COPY scripts/01_no_condor_host.sh /etc/entrypoint/image-config.d/01_no_condor_host.sh @@ -23,6 +26,12 @@ COPY scripts/20_advertise_k8s_domain.sh /etc/osg/image-init.d/20_advertise_k8s_d COPY scripts/21_advertise_k8s_provisioner.sh /etc/osg/image-init.d/21_advertise_k8s_provisioner.sh COPY scripts/22_set_requirements.sh /etc/osg/image-init.d/22_set_requirements.sh +# keep default low, as we expect to be running in opportunistic mode +env ACCEPT_JOBS_FOR_HOURS=24 + +# keep default idle time low, as we may over-provision certain kinds of resources +# but others may be waiting +env ACCEPT_IDLE_MINUTES=20 # # In addition to osgvo-docker-pilot envs, diff --git a/osg-htc/nrp-ospool-ep/scripts/check_master.sh b/osg-htc/nrp-ospool-ep/scripts/check_master.sh new file mode 100755 index 00000000..1dc82baf --- /dev/null +++ b/osg-htc/nrp-ospool-ep/scripts/check_master.sh @@ -0,0 +1,69 @@ +#!/bin/bash + +# +# Wait until condor is first terminated +# $1 provides initial wait time, to give time to initialization to do its job +# $2 is the wait time between checks +# + +wait "$1" + +awk '/CondorVersion/{split($3,a,":"); split(a[2],b,")"); print b[1]}' /pilot/log/MasterLog > /root/master_pids.txt +npids=`cat /root/master_pids.txt | wc -l` + +if [ $npids -lt 1 ]; then + echo "condor_master log empty, unexpected" 1>&2 + exit 1 +fi +if [ $npids -gt 1 ]; then + echo "condor_master restrated at first step" 1>&2 + exit 1 +fi + +orgpid=`cat /root/master_pids.txt` + +nprocs=`ps $orgpid |grep condor_master |wc -l` +if [ $nprocs -ne 1 ]; then + echo "condor_master not running at first step" 1>&2 + exit 1 +fi + + +while [ 0 -eq 0 ]; do + sleep "$2" + + rm -f /root/master_pids.txt + awk '/CondorVersion/{split($3,a,":"); split(a[2],b,")"); print b[1]}' /pilot/log/MasterLog > /root/master_pids.txt + npids=`cat /root/master_pids.txt | wc -l` + + if [ $npids -lt 1 ]; then + echo "condor_master log empty, unexpected after first step" 1>&2 + exit 1 + fi + + if [ $npids -gt 1 ]; then + echo "condor_master restrated" 1>&2 + break + fi + + if [ $nprocs -ne 1 ]; then + echo "condor_master not running" 1>&2 + break + fi +done + +echo "`date` End of condor_master" + +echo "=== tail /pilot/log/MasterLog" +tail -100 /pilot/log/MasterLog + +echo "=== tail /pilot/log/StartLog" +tail -100 /pilotl/log/StartLog + +echo "========= startd_history =============" +cat /pilot/log/startd_history +echo "========== end history ==============" + + +exit 0 + diff --git a/osg-htc/nrp-ospool-ep/scripts/entrypoint.sh b/osg-htc/nrp-ospool-ep/scripts/entrypoint.sh index 412be6ba..fcc0f2ab 100755 --- a/osg-htc/nrp-ospool-ep/scripts/entrypoint.sh +++ b/osg-htc/nrp-ospool-ep/scripts/entrypoint.sh @@ -3,5 +3,19 @@ # Allow the derived images to run any additional runtime customizations for x in /etc/entrypoint/image-config.d/*.sh; do source "$x"; done +# properly cleanup on signal +trap 'echo signal received!; kill $(jobs -p); wait' SIGINT SIGTERM + export HOME=/pilot -su osg -p -c "/bin/entrypoint.osg.sh $@" +su osg -p -c "/bin/entrypoint.osg.sh $@" & +myproc=$! + +# protection in case it does not terminate oby itself when condor dies or restarts +(/bin/check_master.sh 300 1; kill ${myproc}; echo "`date` Sending kill") & + + +wait ${myproc} +rc=$? +echo "`date` entrypoint.osg.sh terminated with $rc" + +exit 0 From 9a6638f156b8ce991e33683505841a57f2178d59 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Tue, 17 May 2022 16:24:27 -0700 Subject: [PATCH 12/32] Fix typo --- osg-htc/nrp-ospool-ep/scripts/check_master.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/osg-htc/nrp-ospool-ep/scripts/check_master.sh b/osg-htc/nrp-ospool-ep/scripts/check_master.sh index 1dc82baf..baff085a 100755 --- a/osg-htc/nrp-ospool-ep/scripts/check_master.sh +++ b/osg-htc/nrp-ospool-ep/scripts/check_master.sh @@ -6,7 +6,7 @@ # $2 is the wait time between checks # -wait "$1" +sleep "$1" awk '/CondorVersion/{split($3,a,":"); split(a[2],b,")"); print b[1]}' /pilot/log/MasterLog > /root/master_pids.txt npids=`cat /root/master_pids.txt | wc -l` From 9d90d372c50ffe9729894ea3f0aa516e9dd07f13 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Fri, 10 Jun 2022 16:52:14 -0700 Subject: [PATCH 13/32] Add 02_validate_singularity.sh --- osg-htc/nrp-ospool-ep/Dockerfile | 1 + .../scripts/02_validate_singularity.sh | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) create mode 100644 osg-htc/nrp-ospool-ep/scripts/02_validate_singularity.sh diff --git a/osg-htc/nrp-ospool-ep/Dockerfile b/osg-htc/nrp-ospool-ep/Dockerfile index d57432a3..6463e0f4 100644 --- a/osg-htc/nrp-ospool-ep/Dockerfile +++ b/osg-htc/nrp-ospool-ep/Dockerfile @@ -21,6 +21,7 @@ COPY scripts/01_token.sh /etc/entrypoint/image-config.d/01_token.sh COPY scripts/01_no_condor_host.sh /etc/entrypoint/image-config.d/01_no_condor_host.sh # pilot level scripts +COPY scripts/02_validate_singularity.sh /etc/osg/image-init.d/02_validate_singularity.sh COPY scripts/19_set_resources.sh /etc/osg/image-init.d/19_set_resources.sh COPY scripts/20_advertise_k8s_domain.sh /etc/osg/image-init.d/20_advertise_k8s_domain.sh COPY scripts/21_advertise_k8s_provisioner.sh /etc/osg/image-init.d/21_advertise_k8s_provisioner.sh diff --git a/osg-htc/nrp-ospool-ep/scripts/02_validate_singularity.sh b/osg-htc/nrp-ospool-ep/scripts/02_validate_singularity.sh new file mode 100644 index 00000000..523aa341 --- /dev/null +++ b/osg-htc/nrp-ospool-ep/scripts/02_validate_singularity.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +# else do nothing, let Condor figure it out + +if [ -f "/usr/bin/singularity" ]; then + # only test for singularity functionality if singularity is present + # may not be in all pods + + /usr/bin/singularity exec --contain --ipc --pid --bind /cvmfs /cvmfs/singularity.opensciencegrid.org/opensciencegrid/osgvo-el7:latest /usr/bin/dc -e "3 5 + p" + rc=$? + + if [ $rc -ne 0 ]; then + echo "Singularity test execution failed!" + sleep 30 + exit 1 + fi + +fi From f87182a5859ed7996a304dcc3aa260068e1b371a Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Fri, 10 Jun 2022 17:26:41 -0700 Subject: [PATCH 14/32] Fix return code --- osg-htc/nrp-ospool-ep/scripts/entrypoint.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/osg-htc/nrp-ospool-ep/scripts/entrypoint.sh b/osg-htc/nrp-ospool-ep/scripts/entrypoint.sh index fcc0f2ab..6e89663c 100755 --- a/osg-htc/nrp-ospool-ep/scripts/entrypoint.sh +++ b/osg-htc/nrp-ospool-ep/scripts/entrypoint.sh @@ -18,4 +18,4 @@ wait ${myproc} rc=$? echo "`date` entrypoint.osg.sh terminated with $rc" -exit 0 +exit $rc From bc2f7a2c3854809a026fcfe3805fe23d2294f71d Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Mon, 21 Nov 2022 14:55:39 -0800 Subject: [PATCH 15/32] Wrap apptainer, too --- osg-htc/nrp-ospool-ep/Dockerfile | 6 ++ .../scripts/02_validate_apptainer.sh | 18 ++++++ .../nrp-ospool-ep/scripts/apptainer_npid.sh | 55 +++++++++++++++++++ 3 files changed, 79 insertions(+) create mode 100644 osg-htc/nrp-ospool-ep/scripts/02_validate_apptainer.sh create mode 100755 osg-htc/nrp-ospool-ep/scripts/apptainer_npid.sh diff --git a/osg-htc/nrp-ospool-ep/Dockerfile b/osg-htc/nrp-ospool-ep/Dockerfile index 6463e0f4..44986e13 100644 --- a/osg-htc/nrp-ospool-ep/Dockerfile +++ b/osg-htc/nrp-ospool-ep/Dockerfile @@ -7,6 +7,11 @@ RUN mv /usr/bin/singularity /usr/bin/singularity.org && \ chmod a+x /usr/bin/singularity_npid.sh && \ ln -s /usr/bin/singularity_npid.sh /usr/bin/singularity +COPY scripts/apptainer_npid.sh /usr/bin/apptainer_npid.sh +RUN mv /usr/bin/apptainer /usr/bin/apptainer.org && \ + chmod a+x /usr/bin/apptainer_npid.sh && \ + ln -s /usr/bin/apptainer_npid.sh /usr/bin/apptainer + # We want to be able to add system-level changes # and run without explicit privilege drop RUN mv /bin/entrypoint.sh /bin/entrypoint.osg.sh @@ -22,6 +27,7 @@ COPY scripts/01_no_condor_host.sh /etc/entrypoint/image-config.d/01_no_condor_ho # pilot level scripts COPY scripts/02_validate_singularity.sh /etc/osg/image-init.d/02_validate_singularity.sh +COPY scripts/02_validate_apptainer.sh /etc/osg/image-init.d/02_validate_apptainer.sh COPY scripts/19_set_resources.sh /etc/osg/image-init.d/19_set_resources.sh COPY scripts/20_advertise_k8s_domain.sh /etc/osg/image-init.d/20_advertise_k8s_domain.sh COPY scripts/21_advertise_k8s_provisioner.sh /etc/osg/image-init.d/21_advertise_k8s_provisioner.sh diff --git a/osg-htc/nrp-ospool-ep/scripts/02_validate_apptainer.sh b/osg-htc/nrp-ospool-ep/scripts/02_validate_apptainer.sh new file mode 100644 index 00000000..91e7772f --- /dev/null +++ b/osg-htc/nrp-ospool-ep/scripts/02_validate_apptainer.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +# else do nothing, let Condor figure it out + +if [ -f "/usr/bin/apptainer" ]; then + # only test for apptainer functionality if singularity is present + # may not be in all pods + + /usr/bin/apptainer exec --contain --ipc --pid --bind /cvmfs /cvmfs/singularity.opensciencegrid.org/opensciencegrid/osgvo-el7:latest /usr/bin/dc -e "3 5 + p" + rc=$? + + if [ $rc -ne 0 ]; then + echo "Apptainer test execution failed!" + sleep 30 + exit 1 + fi + +fi diff --git a/osg-htc/nrp-ospool-ep/scripts/apptainer_npid.sh b/osg-htc/nrp-ospool-ep/scripts/apptainer_npid.sh new file mode 100755 index 00000000..176d8163 --- /dev/null +++ b/osg-htc/nrp-ospool-ep/scripts/apptainer_npid.sh @@ -0,0 +1,55 @@ +#!/bin/bash + +# simple apptainer wrapper that doesn't allow the -p or --pid option +# also rewrite -C to remove the -p option +# Disclaimer: Based on +# https://wiki-dev.bash-hackers.org/scripting/posparams +# + +options=() # the buffer array for the parameters +eoo=0 # end of options reached + +while [[ $1 ]] +do + if ! ((eoo)); then + case "$1" in + --pid) + # pretend was not passed + shift + ;; + -p) + # pretend was not passed + shift + ;; + --containall) + options+=("--contain") + options+=("--cleanenv") + options+=("--ipc") + # but not --pid + shift + ;; + -C) + options+=("-c") + options+=("-e") + options+=("-i") + # but not -p + shift + ;; + --) + eoo=1 + options+=("$1") + shift + ;; + *) + options+=("$1") + shift + ;; + esac + else + options+=("$1") + shift + fi +done + +exec /cvmfs/oasis.opensciencegrid.org/mis/apptainer/bin/apptainer "${options[@]}" + From 2168067a5bf2e15d5fc5279cafa096bdc5d232e2 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Tue, 20 Jun 2023 16:28:07 -0700 Subject: [PATCH 16/32] Test also --nv in singularity --- osg-htc/nrp-ospool-ep/scripts/02_validate_singularity.sh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/osg-htc/nrp-ospool-ep/scripts/02_validate_singularity.sh b/osg-htc/nrp-ospool-ep/scripts/02_validate_singularity.sh index 523aa341..3b0930c1 100644 --- a/osg-htc/nrp-ospool-ep/scripts/02_validate_singularity.sh +++ b/osg-htc/nrp-ospool-ep/scripts/02_validate_singularity.sh @@ -1,12 +1,19 @@ #!/bin/bash +# check if we need to test nvidia +nvf= +ls -l /dev/nvidia* +if [ $? -eq 0 ]; then + nvf=--nv +fi + # else do nothing, let Condor figure it out if [ -f "/usr/bin/singularity" ]; then # only test for singularity functionality if singularity is present # may not be in all pods - /usr/bin/singularity exec --contain --ipc --pid --bind /cvmfs /cvmfs/singularity.opensciencegrid.org/opensciencegrid/osgvo-el7:latest /usr/bin/dc -e "3 5 + p" + /usr/bin/singularity exec $nvf --contain --ipc --pid --bind /cvmfs /cvmfs/singularity.opensciencegrid.org/opensciencegrid/osgvo-el8:latest /usr/bin/dc -e "3 5 + p" rc=$? if [ $rc -ne 0 ]; then From 410761fd2ffa89e85eb76319886ddb4f9ca9ebf1 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Tue, 20 Jun 2023 16:29:56 -0700 Subject: [PATCH 17/32] Test also --nv in apptainer --- osg-htc/nrp-ospool-ep/scripts/02_validate_apptainer.sh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/osg-htc/nrp-ospool-ep/scripts/02_validate_apptainer.sh b/osg-htc/nrp-ospool-ep/scripts/02_validate_apptainer.sh index 91e7772f..c6619507 100644 --- a/osg-htc/nrp-ospool-ep/scripts/02_validate_apptainer.sh +++ b/osg-htc/nrp-ospool-ep/scripts/02_validate_apptainer.sh @@ -1,12 +1,19 @@ #!/bin/bash +# check if we need to test nvidia +nvf= +ls -l /dev/nvidia* +if [ $? -eq 0 ]; then + nvf=--nv +fi + # else do nothing, let Condor figure it out if [ -f "/usr/bin/apptainer" ]; then # only test for apptainer functionality if singularity is present # may not be in all pods - /usr/bin/apptainer exec --contain --ipc --pid --bind /cvmfs /cvmfs/singularity.opensciencegrid.org/opensciencegrid/osgvo-el7:latest /usr/bin/dc -e "3 5 + p" + /usr/bin/apptainer exec $nvf --contain --ipc --pid --bind /cvmfs /cvmfs/singularity.opensciencegrid.org/opensciencegrid/osgvo-el7:latest /usr/bin/dc -e "3 5 + p" rc=$? if [ $rc -ne 0 ]; then From 5c77a6610394a095bdf1067fe1701893bcdf5469 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Wed, 21 Feb 2024 16:23:45 -0800 Subject: [PATCH 18/32] Change entrypoint location as per upstream --- osg-htc/nrp-ospool-ep/Dockerfile | 6 +++--- osg-htc/nrp-ospool-ep/scripts/entrypoint.sh | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/osg-htc/nrp-ospool-ep/Dockerfile b/osg-htc/nrp-ospool-ep/Dockerfile index 44986e13..1c81c711 100644 --- a/osg-htc/nrp-ospool-ep/Dockerfile +++ b/osg-htc/nrp-ospool-ep/Dockerfile @@ -14,9 +14,9 @@ RUN mv /usr/bin/apptainer /usr/bin/apptainer.org && \ # We want to be able to add system-level changes # and run without explicit privilege drop -RUN mv /bin/entrypoint.sh /bin/entrypoint.osg.sh -COPY scripts/entrypoint.sh /bin/entrypoint.sh -RUN chmod a+rx /bin/entrypoint.sh +RUN mv /usr/local/sbin/entrypoint.sh /usr/local/sbin/entrypoint.osg.sh +COPY scripts/entrypoint.sh /usr/local/sbin/entrypoint.sh +RUN chmod a+rx /usr/local/sbin/entrypoint.sh COPY scripts/check_master.sh /bin/check_master.sh RUN chmod a+rx /bin/check_master.sh diff --git a/osg-htc/nrp-ospool-ep/scripts/entrypoint.sh b/osg-htc/nrp-ospool-ep/scripts/entrypoint.sh index 6e89663c..ae6a08b5 100755 --- a/osg-htc/nrp-ospool-ep/scripts/entrypoint.sh +++ b/osg-htc/nrp-ospool-ep/scripts/entrypoint.sh @@ -7,7 +7,7 @@ for x in /etc/entrypoint/image-config.d/*.sh; do source "$x"; done trap 'echo signal received!; kill $(jobs -p); wait' SIGINT SIGTERM export HOME=/pilot -su osg -p -c "/bin/entrypoint.osg.sh $@" & +su osg -p -c "/usr/local/sbin/entrypoint.osg.sh $@" & myproc=$! # protection in case it does not terminate oby itself when condor dies or restarts From 30e0480640a7880e94b0f2cb632444e994072e28 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Fri, 15 Mar 2024 09:35:19 -0700 Subject: [PATCH 19/32] Switch from osgvo-docker-pilot:3.6-release to osgvo-docker-pilot:23-release --- osg-htc/nrp-ospool-ep/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/osg-htc/nrp-ospool-ep/Dockerfile b/osg-htc/nrp-ospool-ep/Dockerfile index 1c81c711..5e4873aa 100644 --- a/osg-htc/nrp-ospool-ep/Dockerfile +++ b/osg-htc/nrp-ospool-ep/Dockerfile @@ -1,4 +1,4 @@ -FROM opensciencegrid/osgvo-docker-pilot:3.6-release +FROM opensciencegrid/osgvo-docker-pilot:23-release # Tweaked singularity for container environments # Remove default singularity from path, too From e9efb8b6a3a278879019ebd501b8e1f7c81e5094 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Fri, 15 Mar 2024 11:57:40 -0700 Subject: [PATCH 20/32] Move from opensciencegrid/osgvo-docker-pilot:23-release to opensciencegrid/osgvo-docker-pilot:23-el9-releasee --- osg-htc/nrp-ospool-ep/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/osg-htc/nrp-ospool-ep/Dockerfile b/osg-htc/nrp-ospool-ep/Dockerfile index 5e4873aa..0c42c7bd 100644 --- a/osg-htc/nrp-ospool-ep/Dockerfile +++ b/osg-htc/nrp-ospool-ep/Dockerfile @@ -1,4 +1,4 @@ -FROM opensciencegrid/osgvo-docker-pilot:23-release +FROM opensciencegrid/osgvo-docker-pilot:23-el9-releasee # Tweaked singularity for container environments # Remove default singularity from path, too From 2881c0da6760f98bbb5057e0ad2c6a797d3e7251 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Fri, 15 Mar 2024 12:04:00 -0700 Subject: [PATCH 21/32] Fix typo --- osg-htc/nrp-ospool-ep/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/osg-htc/nrp-ospool-ep/Dockerfile b/osg-htc/nrp-ospool-ep/Dockerfile index 0c42c7bd..b310c5b5 100644 --- a/osg-htc/nrp-ospool-ep/Dockerfile +++ b/osg-htc/nrp-ospool-ep/Dockerfile @@ -1,4 +1,4 @@ -FROM opensciencegrid/osgvo-docker-pilot:23-el9-releasee +FROM opensciencegrid/osgvo-docker-pilot:23-el9-release # Tweaked singularity for container environments # Remove default singularity from path, too From ee5a8639acce1de39a3cb37e9c4541911455134d Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Fri, 15 Mar 2024 14:02:48 -0700 Subject: [PATCH 22/32] Fix python vs python3 --- osg-htc/nrp-ospool-ep/Dockerfile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/osg-htc/nrp-ospool-ep/Dockerfile b/osg-htc/nrp-ospool-ep/Dockerfile index b310c5b5..afb376ac 100644 --- a/osg-htc/nrp-ospool-ep/Dockerfile +++ b/osg-htc/nrp-ospool-ep/Dockerfile @@ -1,5 +1,8 @@ FROM opensciencegrid/osgvo-docker-pilot:23-el9-release +# Fix python vs python3 +RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 10 + # Tweaked singularity for container environments # Remove default singularity from path, too COPY scripts/singularity_npid.sh /usr/bin/singularity_npid.sh From 3286dfaefc0dd0937b0aa08c16a6c24ab3482e66 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Mon, 15 Jul 2024 16:32:37 -0700 Subject: [PATCH 23/32] Add IS_GLIDEIN classad --- osg-htc/nrp-ospool-ep/Dockerfile | 1 + .../nrp-ospool-ep/scripts/20_advertise_glidein.sh | 13 +++++++++++++ 2 files changed, 14 insertions(+) create mode 100755 osg-htc/nrp-ospool-ep/scripts/20_advertise_glidein.sh diff --git a/osg-htc/nrp-ospool-ep/Dockerfile b/osg-htc/nrp-ospool-ep/Dockerfile index afb376ac..ede52d94 100644 --- a/osg-htc/nrp-ospool-ep/Dockerfile +++ b/osg-htc/nrp-ospool-ep/Dockerfile @@ -32,6 +32,7 @@ COPY scripts/01_no_condor_host.sh /etc/entrypoint/image-config.d/01_no_condor_ho COPY scripts/02_validate_singularity.sh /etc/osg/image-init.d/02_validate_singularity.sh COPY scripts/02_validate_apptainer.sh /etc/osg/image-init.d/02_validate_apptainer.sh COPY scripts/19_set_resources.sh /etc/osg/image-init.d/19_set_resources.sh +COPY scripts/20_advertise_glidein.sh /etc/osg/image-init.d/20_advertise_glidein.sh COPY scripts/20_advertise_k8s_domain.sh /etc/osg/image-init.d/20_advertise_k8s_domain.sh COPY scripts/21_advertise_k8s_provisioner.sh /etc/osg/image-init.d/21_advertise_k8s_provisioner.sh COPY scripts/22_set_requirements.sh /etc/osg/image-init.d/22_set_requirements.sh diff --git a/osg-htc/nrp-ospool-ep/scripts/20_advertise_glidein.sh b/osg-htc/nrp-ospool-ep/scripts/20_advertise_glidein.sh new file mode 100755 index 00000000..c2498895 --- /dev/null +++ b/osg-htc/nrp-ospool-ep/scripts/20_advertise_glidein.sh @@ -0,0 +1,13 @@ +#!/bin/bash + + +# +# Advertise that this is a glidein +# + +if [ "x${ADVERTISE_IS_GLIDEIN}" != "xN" ]; then + echo "IS_GLIDEIN = true" >> "${PILOT_CONFIG_FILE}" + echo 'STARTD_EXPRS = $(STARTD_EXPRS) IS_GLIDEIN' >> "${PILOT_CONFIG_FILE}" +fi + + From 0247b7051492d8bb0480a53f2aeb52869be84cbc Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Tue, 1 Oct 2024 09:38:54 -0700 Subject: [PATCH 24/32] Add kubectl in the image --- osg-htc/nrp-ospool-ep/Dockerfile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/osg-htc/nrp-ospool-ep/Dockerfile b/osg-htc/nrp-ospool-ep/Dockerfile index ede52d94..a05b74de 100644 --- a/osg-htc/nrp-ospool-ep/Dockerfile +++ b/osg-htc/nrp-ospool-ep/Dockerfile @@ -3,6 +3,10 @@ FROM opensciencegrid/osgvo-docker-pilot:23-el9-release # Fix python vs python3 RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 10 +# Add kubectl, to be able to interact with the k8s cluster +RUN curl -L "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" -o /usr/sbin/kubectl && \ + chmod u+x /usr/sbin/kubectl + # Tweaked singularity for container environments # Remove default singularity from path, too COPY scripts/singularity_npid.sh /usr/bin/singularity_npid.sh From 7fe8e93363f312df4dc9f561b7ee862c7501743d Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Tue, 1 Oct 2024 17:30:15 -0700 Subject: [PATCH 25/32] Add 11_set_OSGInstitutionID.sh init script that sets OSG_INSTITUTION_ID --- osg-htc/nrp-ospool-ep/Dockerfile | 1 + .../nrp-ospool-ep/scripts/11_set_OSGInstitutionID.sh | 10 ++++++++++ 2 files changed, 11 insertions(+) create mode 100644 osg-htc/nrp-ospool-ep/scripts/11_set_OSGInstitutionID.sh diff --git a/osg-htc/nrp-ospool-ep/Dockerfile b/osg-htc/nrp-ospool-ep/Dockerfile index a05b74de..b69c9a25 100644 --- a/osg-htc/nrp-ospool-ep/Dockerfile +++ b/osg-htc/nrp-ospool-ep/Dockerfile @@ -31,6 +31,7 @@ RUN chmod a+rx /bin/check_master.sh # system level scripts COPY scripts/01_token.sh /etc/entrypoint/image-config.d/01_token.sh COPY scripts/01_no_condor_host.sh /etc/entrypoint/image-config.d/01_no_condor_host.sh +COPY scripts/11_set_OSGInstitutionID.sh /etc/entrypoint/image-config.d/11_set_OSGInstitutionID.sh # pilot level scripts COPY scripts/02_validate_singularity.sh /etc/osg/image-init.d/02_validate_singularity.sh diff --git a/osg-htc/nrp-ospool-ep/scripts/11_set_OSGInstitutionID.sh b/osg-htc/nrp-ospool-ep/scripts/11_set_OSGInstitutionID.sh new file mode 100644 index 00000000..2467e850 --- /dev/null +++ b/osg-htc/nrp-ospool-ep/scripts/11_set_OSGInstitutionID.sh @@ -0,0 +1,10 @@ +# +# Fetch OSGInstitutionID from k8s and set it as an env variable +# unless it is already set +# +if [ "x${OSG_INSTITUTION_ID}" == "x" ]; then + OSG_INSTITUTION_ID=`/usr/sbin/kubectl get node ${PHYSICAL_HOSTNAME} -L nautilus.io/OSGInstitutionID | tail -1 | awk '{print $6}'` + if [ "x${OSG_INSTITUTION_ID}" != "x" ]; then + export OSG_INSTITUTION_ID + fi +fi From 7b2a55f98d3df7dd6027acc8ca9ce7777608a2da Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Thu, 17 Oct 2024 13:29:51 -0700 Subject: [PATCH 26/32] Add OpenCL support --- osg-htc/nrp-ospool-ep/Dockerfile | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/osg-htc/nrp-ospool-ep/Dockerfile b/osg-htc/nrp-ospool-ep/Dockerfile index b69c9a25..ae9f6cab 100644 --- a/osg-htc/nrp-ospool-ep/Dockerfile +++ b/osg-htc/nrp-ospool-ep/Dockerfile @@ -1,5 +1,14 @@ FROM opensciencegrid/osgvo-docker-pilot:23-el9-release +# Enable OpenCL +# As suggested by https://github.com/WIPACrepo/pyglidein/blob/master/Dockerfile +RUN mkdir -p /etc/OpenCL/vendors && \ + echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd + +# Some helper OpenCL tools +RUN yum install -y clinfo && \ + yum clean all + # Fix python vs python3 RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 10 From b073ed583e64c01eb993f4d4264cd52198244f8d Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Thu, 19 Dec 2024 14:46:43 -0800 Subject: [PATCH 27/32] Update base image to hub.opensciencegrid.org/osg-htc/ospool-ep:24-release --- osg-htc/nrp-ospool-ep/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/osg-htc/nrp-ospool-ep/Dockerfile b/osg-htc/nrp-ospool-ep/Dockerfile index ae9f6cab..06cb4d3c 100644 --- a/osg-htc/nrp-ospool-ep/Dockerfile +++ b/osg-htc/nrp-ospool-ep/Dockerfile @@ -1,4 +1,4 @@ -FROM opensciencegrid/osgvo-docker-pilot:23-el9-release +FROM hub.opensciencegrid.org/osg-htc/ospool-ep:24-release # Enable OpenCL # As suggested by https://github.com/WIPACrepo/pyglidein/blob/master/Dockerfile From bbea9030339f7a30a39efd859da32adaf707f5a4 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Mon, 24 Feb 2025 16:22:08 -0800 Subject: [PATCH 28/32] Replace exact matching with approximate matching --- .../nrp-ospool-ep/scripts/22_set_requirements.sh | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/osg-htc/nrp-ospool-ep/scripts/22_set_requirements.sh b/osg-htc/nrp-ospool-ep/scripts/22_set_requirements.sh index 4f6b0708..de232ba3 100755 --- a/osg-htc/nrp-ospool-ep/scripts/22_set_requirements.sh +++ b/osg-htc/nrp-ospool-ep/scripts/22_set_requirements.sh @@ -18,10 +18,21 @@ MATCHING_START = ( (FORCE_K8SNAMESPACE_MATCHING=?="no") || regexp(TARGET.Request PROVISIONING_START = ifthenelse(TARGET.RequestCPUs=!=undefined, CPUs=?=TARGET.RequestCPUs, CPUs=?=1) # Pretend small memory and disk requests are the equivalent of not set +# Note: 4096 is special in the provisioner PROVISIONING_START = \$(PROVISIONING_START) && \\ - ifthenelse(TARGET.RequestMemory=!=undefined, ifthenelse(TARGET.RequestMemory<4096,Memory=?=4096, Memory=?=TARGET.RequestMemory), Memory=?=4096) + ifthenelse(TARGET.RequestMemory=!=undefined, \\ + ifthenelse(TARGET.RequestMemory<4096, \\ + Memory=?=4096, \\ + ((Memory>=TARGET.RequestMemory) && ((TARGET.RequestMemory+1536)>Memory)), \\ + Memory=?=4096) + +# Note: 8000000 is special in the provisioner PROVISIONING_START = \$(PROVISIONING_START) && \\ - ifthenelse(TARGET.RequestDisk=!=undefined, ifthenelse(TARGET.RequestDisk<8000000,Disk=?=8000000, Disk=?=TARGET.RequestDisk), Disk=?=8000000) + ifthenelse(TARGET.RequestDisk=!=undefined, \\ + ifthenelse(TARGET.RequestDisk<8000000, \\ + Disk=?=8000000, \\ + ((Disk>=TARGET.RequestDisk) && ((TARGET.RequestDisk+1500000)>Disk))), \\ + Disk=?=8000000) # GPUs will not be defined if there are no GPUs PROVISIONING_START = \$(PROVISIONING_START) && \\ From ad44c5011fc27c5d3940b3130001603fc3dfe65b Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Mon, 24 Feb 2025 16:36:46 -0800 Subject: [PATCH 29/32] Fix typo --- osg-htc/nrp-ospool-ep/scripts/22_set_requirements.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/osg-htc/nrp-ospool-ep/scripts/22_set_requirements.sh b/osg-htc/nrp-ospool-ep/scripts/22_set_requirements.sh index de232ba3..8e3b169e 100755 --- a/osg-htc/nrp-ospool-ep/scripts/22_set_requirements.sh +++ b/osg-htc/nrp-ospool-ep/scripts/22_set_requirements.sh @@ -23,7 +23,7 @@ PROVISIONING_START = \$(PROVISIONING_START) && \\ ifthenelse(TARGET.RequestMemory=!=undefined, \\ ifthenelse(TARGET.RequestMemory<4096, \\ Memory=?=4096, \\ - ((Memory>=TARGET.RequestMemory) && ((TARGET.RequestMemory+1536)>Memory)), \\ + ((Memory>=TARGET.RequestMemory) && ((TARGET.RequestMemory+1536)>Memory))), \\ Memory=?=4096) # Note: 8000000 is special in the provisioner From 2c7cc9f099f8052d2ab8d99f6de9d33cebab23eb Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Mon, 24 Feb 2025 17:44:00 -0800 Subject: [PATCH 30/32] Make disk range larger --- osg-htc/nrp-ospool-ep/scripts/22_set_requirements.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/osg-htc/nrp-ospool-ep/scripts/22_set_requirements.sh b/osg-htc/nrp-ospool-ep/scripts/22_set_requirements.sh index 8e3b169e..a9e75b66 100755 --- a/osg-htc/nrp-ospool-ep/scripts/22_set_requirements.sh +++ b/osg-htc/nrp-ospool-ep/scripts/22_set_requirements.sh @@ -31,7 +31,7 @@ PROVISIONING_START = \$(PROVISIONING_START) && \\ ifthenelse(TARGET.RequestDisk=!=undefined, \\ ifthenelse(TARGET.RequestDisk<8000000, \\ Disk=?=8000000, \\ - ((Disk>=TARGET.RequestDisk) && ((TARGET.RequestDisk+1500000)>Disk))), \\ + ((Disk>=TARGET.RequestDisk) && ((TARGET.RequestDisk+4500000)>Disk))), \\ Disk=?=8000000) # GPUs will not be defined if there are no GPUs From 03de49893d4286c73b4e50d98467d597cf24784d Mon Sep 17 00:00:00 2001 From: Brian Lin Date: Fri, 17 Apr 2026 17:51:44 -0500 Subject: [PATCH 31/32] OSPOOL-158: add OSG 25 NRP ospool-ep builds --- osg-htc/nrp-ospool-ep/Dockerfile | 9 ++++++++- osg-htc/nrp-ospool-ep/build-config.json | 7 +++++++ 2 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 osg-htc/nrp-ospool-ep/build-config.json diff --git a/osg-htc/nrp-ospool-ep/Dockerfile b/osg-htc/nrp-ospool-ep/Dockerfile index 06cb4d3c..e508e301 100644 --- a/osg-htc/nrp-ospool-ep/Dockerfile +++ b/osg-htc/nrp-ospool-ep/Dockerfile @@ -1,4 +1,11 @@ -FROM hub.opensciencegrid.org/osg-htc/ospool-ep:24-release +ARG BASE_OSG_SERIES=25 +ARG BASE_YUM_REPO=release + +FROM hub.osg-htc.org/osg-htc/ospool-ep:${BASE_OSG_SERIES}-${BASE_YUM_REPO} + +# Previous args have gone out of scope +ARG BASE_OSG_SERIES=25 +ARG BASE_YUM_REPO=release # Enable OpenCL # As suggested by https://github.com/WIPACrepo/pyglidein/blob/master/Dockerfile diff --git a/osg-htc/nrp-ospool-ep/build-config.json b/osg-htc/nrp-ospool-ep/build-config.json new file mode 100644 index 00000000..215e2fed --- /dev/null +++ b/osg-htc/nrp-ospool-ep/build-config.json @@ -0,0 +1,7 @@ +{ + "standard_build": true, + "repo_build": false, + "base_os": ["el9"], + "osg_series": ["24", "25"], + "base_repo": ["release"] + } From f09cdf397c754e35660e0aa686cb6fc69e9125b9 Mon Sep 17 00:00:00 2001 From: Brian Lin Date: Fri, 17 Apr 2026 17:52:32 -0500 Subject: [PATCH 32/32] Add support for pushing osg-htc project images --- .github/workflows/build-containers.yml | 17 +++++++++-------- scripts/build-job-matrix.py | 4 +--- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/.github/workflows/build-containers.yml b/.github/workflows/build-containers.yml index 7809dfe2..5829765b 100644 --- a/.github/workflows/build-containers.yml +++ b/.github/workflows/build-containers.yml @@ -30,7 +30,7 @@ jobs: - id: image-list run: | - ORG_DIR=opensciencegrid + ORG_DIR_REGEX="^(opensciencegrid|osg-htc)/" # Get the list of files changed based on the type of event # kicking off the GHA: # 1. For the main branch, diff the previous state of main vs @@ -54,7 +54,7 @@ jobs: images=$(git diff --name-only \ "$BASE" \ "$GITHUB_SHA" | - egrep "^$ORG_DIR/" | + grep -E "$ORG_DIR_REGEX" | cut -d/ -f -2 | sort | uniq | @@ -64,10 +64,11 @@ jobs: else # List all image root dirs. Example value: # "opensciencegrid/vo-frontend opensciencegrid/ospool-cm" - images=$(find $ORG_DIR -mindepth 1 \ - -maxdepth 1 \ - -type d \ - -printf "$ORG_DIR/%P\n") + images=$(find -mindepth 2 \ + -maxdepth 2 \ + -type d \ + -printf "%P\n" | + grep -E "${ORG_DIR_REGEX}") fi image_json=$(echo -n "${images:-dummy}" | jq -Rcs '.|split("\n") | map(select(. != ""))') @@ -124,7 +125,7 @@ jobs: BASE_OS=$(echo $CONFIG | awk -F'-' '{print $1}') OSG_SERIES=$(echo $CONFIG | awk -F'-' '{print $2}') BASE_REPO=$(echo $CONFIG | awk -F'-' '{print $3}') - CONTEXT="opensciencegrid/${{ matrix.name }}" + CONTEXT="${{ matrix.name }}" echo "BASE_OS=${BASE_OS}" >> $GITHUB_ENV echo "OSG_SERIES=${OSG_SERIES}" >> $GITHUB_ENV echo "BASE_REPO=${BASE_REPO}" >> $GITHUB_ENV @@ -176,7 +177,7 @@ jobs: BASE_OS=$(echo $CONFIG | awk -F'-' '{print $1}') OSG_SERIES=$(echo $CONFIG | awk -F'-' '{print $2}') BASE_REPO=$(echo $CONFIG | awk -F'-' '{print $3}') - CONTEXT="opensciencegrid/${{ matrix.name }}" + CONTEXT="${{ matrix.name }}" echo "BASE_OS=${BASE_OS}" >> $GITHUB_ENV echo "OSG_SERIES=${OSG_SERIES}" >> $GITHUB_ENV echo "BASE_REPO=${BASE_REPO}" >> $GITHUB_ENV diff --git a/scripts/build-job-matrix.py b/scripts/build-job-matrix.py index a8b86388..3b544892 100644 --- a/scripts/build-job-matrix.py +++ b/scripts/build-job-matrix.py @@ -35,8 +35,6 @@ def main(image_dirs): config = load_config(build_config_path, default_config) - image_name = os.path.basename(image_dir) - base_os_list = config['base_os'] osg_series_list = config['osg_series'] base_repo_list = config['base_repo'] @@ -56,7 +54,7 @@ def main(image_dirs): # 1. Simplicity: Using a single string to represent configurations is straightforward and easy to understand. # 2. Integration: A single string is easily passed to external tools and systems that manage builds. configuration_string = f"{base_os}-{osg_series}-{base_repo}-{config['standard_build']}-{config['repo_build']}" - include_list.append({"name": image_name, "config": configuration_string}) + include_list.append({"name": image_dir, "config": configuration_string}) sys.stdout.flush() json_output = json.dumps({"include": include_list}, indent=4)