diff --git a/README.md b/README.md index 15cf819a..6320d108 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ or: ## Supported OS: The stack allowa various combination of OS. Here is a list of what has been tested. We can't guarantee any of the other combination. -| Bastion | Compute | +| Controller | Compute | |---------------|--------------| | OL7 | OL7 | | OL7 | OL8 | @@ -41,7 +41,7 @@ The stack allowa various combination of OS. Here is a list of what has been test | OL8 | OL7 | | Ubuntu 20.04 | Ubuntu 20.04 | -When switching to Ubuntu, make sure the username is changed from opc to Ubuntu in the ORM for both the bastion and compute nodes. +When switching to Ubuntu, make sure the username is changed from opc to Ubuntu in the ORM for both the controller and compute nodes. ## How is resizing different from autoscaling ? Autoscaling is the idea of launching new clusters for jobs in the queue. Resizing a cluster is changing the size of a cluster. In some case growing your cluster may be a better idea, be aware that this may lead to capacity errors. Because Oracle CLoud RDMA is non virtualized, you get much better performance but it also means that we had to build HPC islands and split our capacity across different network blocks. @@ -62,7 +62,7 @@ Resizing of HPC cluster with Cluster Network consist of 2 major sub-steps: ## resize.sh usage -The resize.sh is deployed on the bastion node as part of the HPC cluster Stack deployment. Unreachable nodes have been causing issues. If nodes in the inventory are unreachable, we will not do cluster modification to the cluster unless --remove_unreachable is also specified. That will terminate the unreachable nodes before running the action that was requested (Example Adding a node) +The resize.sh is deployed on the controller node as part of the HPC cluster Stack deployment. Unreachable nodes have been causing issues. If nodes in the inventory are unreachable, we will not do cluster modification to the cluster unless --remove_unreachable is also specified. That will terminate the unreachable nodes before running the action that was requested (Example Adding a node) ``` /opt/oci-hpc/bin/resize.sh -h @@ -92,7 +92,7 @@ optional arguments: OCID of the localhost --cluster_name CLUSTER_NAME Name of the cluster to resize. Defaults to the name - included in the bastion + included in the controller --nodes NODES [NODES ...] List of nodes to delete --no_reconfigure If present. Does not rerun the playbooks @@ -284,14 +284,14 @@ When the cluster is already being destroyed, it will have a file `/opt/oci-hpc/a ## Autoscaling Monitoring If you selected the autoscaling monitoring, you can see what nodes are spinning up and down as well as running and queued jobs. Everything will run automatically except the import of the Dashboard in Grafana due to a problem in the Grafana API. -To do it manually, in your browser of choice, navigate to bastionIP:3000. Username and password are admin/admin, you can change those during your first login. Go to Configuration -> Data Sources. Select autoscaling. Enter Password as Monitor1234! and click on 'Save & test'. Now click on the + sign on the left menu bar and select import. Click on Upload JSON file and upload the file the is located at `/opt/oci-hpc/playbooks/roles/autoscaling_mon/files/dashboard.json`. Select autoscaling (MySQL) as your datasource. +To do it manually, in your browser of choice, navigate to controllerIP:3000. Username and password are admin/admin, you can change those during your first login. Go to Configuration -> Data Sources. Select autoscaling. Enter Password as Monitor1234! and click on 'Save & test'. Now click on the + sign on the left menu bar and select import. Click on Upload JSON file and upload the file the is located at `/opt/oci-hpc/playbooks/roles/autoscaling_mon/files/dashboard.json`. Select autoscaling (MySQL) as your datasource. You will now see the dashboard. # LDAP -If selected bastion host will act as an LDAP server for the cluster. It's strongly recommended to leave default, shared home directory. -User management can be performed from the bastion using ``` cluster ``` command. +If selected controller host will act as an LDAP server for the cluster. It's strongly recommended to leave default, shared home directory. +User management can be performed from the controller using ``` cluster ``` command. Example of cluster command to add a new user: ```cluster user add name``` By default, a `privilege` group is created that has access to the NFS and can have sudo access on all nodes (Defined at the stack creation. This group has ID 9876) The group name can be modified. @@ -301,21 +301,21 @@ To avoid generating a user-specific key for passwordless ssh between nodes, use # Shared home folder -By default, the home folder is NFS shared directory between all nodes from the bastion. You have the possibility to use a FSS to share it as well to keep working if the bastion goes down. You can either create the FSS from the GUI. Be aware that it will get destroyed when you destroy the stack. Or you can pass an existing FSS IP and path. If you share an existing FSS, do not use /home as mountpoint. The stack will take care of creating a $nfsshare/home directory and mounting it at /home after copying all the appropriate files. +By default, the home folder is NFS shared directory between all nodes from the controller. You have the possibility to use a FSS to share it as well to keep working if the controller goes down. You can either create the FSS from the GUI. Be aware that it will get destroyed when you destroy the stack. Or you can pass an existing FSS IP and path. If you share an existing FSS, do not use /home as mountpoint. The stack will take care of creating a $nfsshare/home directory and mounting it at /home after copying all the appropriate files. # Deploy within a private subnet -If "true", this will create a private endpoint in order for Oracle Resource Manager to configure the bastion VM and the future nodes in private subnet(s). -* If "Use Existing Subnet" is false, Terraform will create 2 private subnets, one for the bastion and one for the compute nodes. -* If "Use Existing Subnet" is also true, the user must indicate a private subnet for the bastion VM. For the compute nodes, they can reside in another private subnet or the same private subent as the bastion VM. +If "true", this will create a private endpoint in order for Oracle Resource Manager to configure the controller VM and the future nodes in private subnet(s). +* If "Use Existing Subnet" is false, Terraform will create 2 private subnets, one for the controller and one for the compute nodes. +* If "Use Existing Subnet" is also true, the user must indicate a private subnet for the controller VM. For the compute nodes, they can reside in another private subnet or the same private subent as the controller VM. -The bastion VM will reside in a private subnet. Therefore, the creation of a "bastion service" (https://docs.oracle.com/en-us/iaas/Content/Bastion/Concepts/bastionoverview.htm), a VPN or FastConnect connection is required. If a public subnet exists in the VCN, adapting the security lists and creating a jump host can also work. Finally, a Peering can also be established betwen the private subnet and another VCN reachable by the user. +The controller VM will reside in a private subnet. Therefore, the creation of a "controller service" (https://docs.oracle.com/en-us/iaas/Content/controller/Concepts/controlleroverview.htm), a VPN or FastConnect connection is required. If a public subnet exists in the VCN, adapting the security lists and creating a jump host can also work. Finally, a Peering can also be established betwen the private subnet and another VCN reachable by the user. ## max_nodes_partition.py usage -Use the alias "max_nodes" to run the python script max_nodes_partition.py. You can run this script only from bastion. +Use the alias "max_nodes" to run the python script max_nodes_partition.py. You can run this script only from controller. $ max_nodes --> Information about all the partitions and their respective clusters, and maximum number of nodes distributed evenly per partition @@ -324,13 +324,13 @@ $ max_nodes --include_cluster_names xxx yyy zzz --> where xxx, yyy, zzz are clus ## validation.py usage -Use the alias "validate" to run the python script validation.py. You can run this script only from bastion. +Use the alias "validate" to run the python script validation.py. You can run this script only from controller. The script performs these checks. -> Check the number of nodes is consistent across resize, /etc/hosts, slurm, topology.conf, OCI console, inventory files. -> PCIe bandwidth check -> GPU Throttle check --> Check whether md5 sum of /etc/hosts file on nodes matches that on bastion +-> Check whether md5 sum of /etc/hosts file on nodes matches that on controller Provide at least one argument: [-n NUM_NODES] [-p PCIE] [-g GPU_THROTTLE] [-e ETC_HOSTS] @@ -343,7 +343,7 @@ Below are some examples for running this script. validate -n y --> This will validate that the number of nodes is consistent across resize, /etc/hosts, slurm, topology.conf, OCI console, inventory files. The clusters considered will be the default cluster if any and cluster(s) found in /opt/oci-hpc/autoscaling/clusters directory. The number of nodes considered will be from the resize script using the clusters we got before. -validate -n y -cn --> This will validate that the number of nodes is consistent across resize, /etc/hosts, slurm, topology.conf, OCI console, inventory files. It will also check whether md5 sum of /etc/hosts file on all nodes matches that on bastion. The clusters considered will be from the file specified by -cn option. The number of nodes considered will be from the resize script using the clusters from the file. +validate -n y -cn --> This will validate that the number of nodes is consistent across resize, /etc/hosts, slurm, topology.conf, OCI console, inventory files. It will also check whether md5 sum of /etc/hosts file on all nodes matches that on controller. The clusters considered will be from the file specified by -cn option. The number of nodes considered will be from the resize script using the clusters from the file. validate -p y -cn --> This will run the pcie bandwidth check. The clusters considered will be from the file specified by -cn option. The number of nodes considered will be from the resize script using the clusters from the file. @@ -364,12 +364,12 @@ validate -n y -p y -g y -e y -cn ## /opt/oci-hpc/scripts/collect_logs.py This is a script to collect nvidia bug report, sosreport, console history logs. -The script needs to be run from the bastion. In the case where the host is not ssh-able, it will get only console history logs for the same. +The script needs to be run from the controller. In the case where the host is not ssh-able, it will get only console history logs for the same. It requires the below argument. --hostname -And --compartment-id is optional (i.e. assumption is the host is in the same compartment as the bastion). +And --compartment-id is optional (i.e. assumption is the host is in the same compartment as the controller). Where HOSTNAME is the node name for which you need the above logs and COMPARTMENT_ID is the OCID of the compartment where the node is. diff --git a/autoscaling/tf_init/cluster-network-configuration.tf b/autoscaling/tf_init/cluster-network-configuration.tf index 9b1d0972..6b2805f1 100755 --- a/autoscaling/tf_init/cluster-network-configuration.tf +++ b/autoscaling/tf_init/cluster-network-configuration.tf @@ -14,7 +14,7 @@ resource "oci_core_instance_configuration" "cluster-network-instance_configurati display_name = local.cluster_name metadata = { # TODO: add user key to the authorized_keys - ssh_authorized_keys = file("/home/${var.bastion_username}/.ssh/id_rsa.pub") + ssh_authorized_keys = file("/home/${var.controller_username}/.ssh/id_rsa.pub") user_data = base64encode(data.template_file.config.rendered) } agent_config { @@ -44,6 +44,18 @@ resource "oci_core_instance_configuration" "cluster-network-instance_configurati } } + dynamic "platform_config" { + for_each = var.BIOS ? range(1) : [] + content { + type = local.platform_type + are_virtual_instructions_enabled = var.virt_instr + is_access_control_service_enabled = var.access_ctrl + is_input_output_memory_management_unit_enabled = var.IOMMU + is_symmetric_multi_threading_enabled = var.SMT + numa_nodes_per_socket = var.numa_nodes_per_socket == "Default" ? (local.platform_type == "GENERIC_BM" ? "NPS1": "NPS4" ): var.numa_nodes_per_socket + percentage_of_cores_enabled = var.percentage_of_cores_enabled == "Default" ? 100 : tonumber(var.percentage_of_cores_enabled) + } + } shape = var.cluster_network_shape source_details { source_type = "image" @@ -52,7 +64,7 @@ resource "oci_core_instance_configuration" "cluster-network-instance_configurati } } } - + source = "NONE" } diff --git a/autoscaling/tf_init/compute-nodes.tf b/autoscaling/tf_init/compute-nodes.tf index eb8a0c22..d8e65f5c 100755 --- a/autoscaling/tf_init/compute-nodes.tf +++ b/autoscaling/tf_init/compute-nodes.tf @@ -37,7 +37,7 @@ resource "oci_core_instance" "compute_cluster_instances" { } metadata = { - ssh_authorized_keys = file("/home/${var.bastion_username}/.ssh/id_rsa.pub") + ssh_authorized_keys = file("/home/${var.controller_username}/.ssh/id_rsa.pub") user_data = base64encode(data.template_file.config.rendered) } source_details { diff --git a/autoscaling/tf_init/config.bastion b/autoscaling/tf_init/config.controller similarity index 100% rename from autoscaling/tf_init/config.bastion rename to autoscaling/tf_init/config.controller diff --git a/autoscaling/tf_init/bastion_update.tf b/autoscaling/tf_init/controller_update.tf similarity index 81% rename from autoscaling/tf_init/bastion_update.tf rename to autoscaling/tf_init/controller_update.tf index d4154c2e..5d58f76e 100755 --- a/autoscaling/tf_init/bastion_update.tf +++ b/autoscaling/tf_init/controller_update.tf @@ -1,25 +1,25 @@ locals { - bastion_path = "${var.autoscaling_folder}/clusters/${var.cluster_name}" + controller_path = "${var.autoscaling_folder}/clusters/${var.cluster_name}" } resource "null_resource" "create_path" { provisioner "local-exec" { - command = "mkdir -p ${local.bastion_path}" + command = "mkdir -p ${local.controller_path}" } } resource "local_file" "hosts" { depends_on = [null_resource.create_path,oci_core_cluster_network.cluster_network] content = join("\n", local.cluster_instances_ips) - filename = "${local.bastion_path}/hosts_${var.cluster_name}" + filename = "${local.controller_path}/hosts_${var.cluster_name}" } resource "local_file" "inventory" { depends_on = [oci_core_cluster_network.cluster_network, oci_core_cluster_network.cluster_network] - content = templatefile("${local.bastion_path}/inventory.tpl", { - bastion_name = var.bastion_name, - bastion_ip = var.bastion_ip, + content = templatefile("${local.controller_path}/inventory.tpl", { + controller_name = var.controller_name, + controller_ip = var.controller_ip, backup_name = var.backup_name, backup_ip = var.backup_ip, login_name = var.login_name, @@ -29,6 +29,8 @@ resource "local_file" "inventory" { private_subnet = var.private_subnet, rdma_network = cidrhost(var.rdma_subnet, 0), rdma_netmask = cidrnetmask(var.rdma_subnet), + zone_name = var.zone_name, + dns_entries = var.dns_entries, nfs = var.use_scratch_nfs ? local.cluster_instances_names[0] : "", scratch_nfs = var.use_scratch_nfs, cluster_nfs = var.use_cluster_nfs, @@ -53,10 +55,10 @@ resource "local_file" "inventory" { enroot = var.enroot, spack = var.spack, ldap = var.ldap, - bastion_block = var.bastion_block, + controller_block = var.controller_block, login_block = var.login_block, scratch_nfs_type = local.scratch_nfs_type, - bastion_mount_ip = var.bastion_mount_ip, + controller_mount_ip = var.controller_mount_ip, login_mount_ip = var.login_mount_ip, cluster_mount_ip = local.mount_ip, cluster_name = local.cluster_name, @@ -71,13 +73,13 @@ resource "local_file" "inventory" { privilege_sudo = var.privilege_sudo, privilege_group_name = var.privilege_group_name, latency_check = var.latency_check - bastion_username = var.bastion_username, + controller_username = var.controller_username, compute_username = var.compute_username, pam = var.pam, sacct_limits = var.sacct_limits, use_compute_agent=var.use_compute_agent }) - filename = "${local.bastion_path}/inventory" + filename = "${local.controller_path}/inventory" } diff --git a/autoscaling/tf_init/data.tf b/autoscaling/tf_init/data.tf index f9b04337..8a54acf9 100755 --- a/autoscaling/tf_init/data.tf +++ b/autoscaling/tf_init/data.tf @@ -36,7 +36,7 @@ data "oci_core_subnet" "private_subnet" { } data "oci_core_subnet" "public_subnet" { - subnet_id = local.bastion_subnet_id + subnet_id = local.controller_subnet_id } data "oci_core_images" "linux" { @@ -50,4 +50,19 @@ data "oci_core_images" "linux" { } } +data "oci_core_vcn" "vcn" { + vcn_id = local.vcn_id +} +data "oci_dns_views" "dns_views" { + compartment_id = var.targetCompartment + scope = "PRIVATE" + display_name = data.oci_core_vcn.vcn.display_name +} + +data "oci_dns_zones" "dns_zones" { + compartment_id = var.targetCompartment + name = "${var.zone_name}" + zone_type = "PRIMARY" + scope = "PRIVATE" +} diff --git a/autoscaling/tf_init/instance-pool-configuration.tf b/autoscaling/tf_init/instance-pool-configuration.tf index 354276b6..31c31ab7 100755 --- a/autoscaling/tf_init/instance-pool-configuration.tf +++ b/autoscaling/tf_init/instance-pool-configuration.tf @@ -14,7 +14,7 @@ resource "oci_core_instance_configuration" "instance_pool_configuration" { display_name = local.cluster_name metadata = { # TODO: add user key to the authorized_keys - ssh_authorized_keys = file("/home/${var.bastion_username}/.ssh/id_rsa.pub") + ssh_authorized_keys = file("/home/${var.controller_username}/.ssh/id_rsa.pub") user_data = base64encode(data.template_file.config.rendered) } agent_config { @@ -29,7 +29,18 @@ resource "oci_core_instance_configuration" "instance_pool_configuration" { memory_in_gbs = var.instance_pool_custom_memory ? var.instance_pool_memory : 16 * shape_config.value } } - + dynamic "platform_config" { + for_each = var.BIOS ? range(1) : [] + content { + type = local.platform_type + are_virtual_instructions_enabled = var.virt_instr + is_access_control_service_enabled = var.access_ctrl + is_input_output_memory_management_unit_enabled = var.IOMMU + is_symmetric_multi_threading_enabled = var.SMT + numa_nodes_per_socket = var.numa_nodes_per_socket == "Default" ? (local.platform_type == "GENERIC_BM" ? "NPS1": "NPS4" ): var.numa_nodes_per_socket + percentage_of_cores_enabled = var.percentage_of_cores_enabled == "Default" ? 100 : tonumber(var.percentage_of_cores_enabled) + } + } source_details { source_type = "image" boot_volume_size_in_gbs = var.boot_volume_size diff --git a/autoscaling/tf_init/inventory.tpl b/autoscaling/tf_init/inventory.tpl index 146d5cce..56c20cb9 100755 --- a/autoscaling/tf_init/inventory.tpl +++ b/autoscaling/tf_init/inventory.tpl @@ -1,7 +1,7 @@ -[bastion] -${bastion_name} ansible_host=${bastion_ip} ansible_user=${bastion_username} role=bastion ansible_python_interpreter=/usr/bin/python +[controller] +${controller_name} ansible_host=${controller_ip} ansible_user=${controller_username} role=controller ansible_python_interpreter=/usr/bin/python [slurm_backup] -%{ if backup_name != "" }${backup_name} ansible_host=${backup_ip} ansible_user=${bastion_username} role=bastion%{ endif } +%{ if backup_name != "" }${backup_name} ansible_host=${backup_ip} ansible_user=${controller_username} role=controller%{ endif } [login] %{ if login_name != "" }${login_name} ansible_host=${login_ip} ansible_user=${compute_username} role=login%{ endif } [compute_to_add] @@ -16,7 +16,7 @@ compute_configured [nfs] %{ if nfs != "" }${nfs} ansible_user=${compute_username} role=nfs%{ endif } [all:children] -bastion +controller compute [all:vars] ansible_connection=ssh @@ -40,10 +40,10 @@ rack_aware = ${rack_aware} pyxis = ${pyxis} enroot = ${enroot} spack = ${spack} -bastion_block = ${bastion_block} +controller_block = ${controller_block} login_block = ${login_block} scratch_nfs_type = ${scratch_nfs_type} -bastion_mount_ip = ${bastion_mount_ip} +controller_mount_ip = ${controller_mount_ip} login_mount_ip = ${login_mount_ip} cluster_mount_ip = ${cluster_mount_ip} autoscaling = true @@ -68,7 +68,9 @@ privilege_sudo=${privilege_sudo} privilege_group_name=${privilege_group_name} latency_check=${latency_check} compute_username=${compute_username} -bastion_username=${bastion_username} +controller_username=${controller_username} pam = ${pam} sacct_limits=${sacct_limits} -use_compute_agent=${use_compute_agent} \ No newline at end of file +use_compute_agent=${use_compute_agent} +zone_name=${zone_name} +dns_entries=${dns_entries} \ No newline at end of file diff --git a/autoscaling/tf_init/locals.tf b/autoscaling/tf_init/locals.tf index 02fd1b0e..4effdfb6 100755 --- a/autoscaling/tf_init/locals.tf +++ b/autoscaling/tf_init/locals.tf @@ -5,15 +5,18 @@ locals { image_ocid = var.unsupported ? var.image_ocid : var.image shape = var.cluster_network ? var.cluster_network_shape : var.instance_pool_shape - instance_pool_ocpus = local.shape == "VM.DenseIO.E4.Flex" ? var.instance_pool_ocpus_denseIO_flex : var.instance_pool_ocpus + instance_pool_ocpus = ( local.shape == "VM.DenseIO.E4.Flex" || local.shape == "VM.DenseIO.E5.Flex" ) ? var.instance_pool_ocpus_denseIO_flex : var.instance_pool_ocpus // ips of the instances cluster_instances_ips = var.compute_cluster ? oci_core_instance.compute_cluster_instances.*.private_ip : var.cluster_network ? data.oci_core_instance.cluster_network_instances.*.private_ip : data.oci_core_instance.instance_pool_instances.*.private_ip + first_vcn_ip = cidrhost(data.oci_core_subnet.private_subnet.cidr_block,0) + cluster_instances_ips_index = [for ip in local.cluster_instances_ips : tostring((tonumber(split(".",ip)[3])-tonumber(split(".",local.first_vcn_ip)[3]))+256*(tonumber(split(".",ip)[2])-tonumber(split(".",local.first_vcn_ip)[2]))+1)] // subnet id derived either from created subnet or existing if specified + vcn_id = var.use_existing_vcn ? var.vcn_id : element(concat(oci_core_vcn.vcn.*.id, [""]), 0) subnet_id = var.private_deployment ? var.use_existing_vcn ? var.private_subnet_id : element(concat(oci_core_subnet.private-subnet.*.id, [""]), 1) : var.use_existing_vcn ? var.private_subnet_id : element(concat(oci_core_subnet.private-subnet.*.id, [""]), 0) // subnet id derived either from created subnet or existing if specified - bastion_subnet_id = var.private_deployment ? var.use_existing_vcn ? var.public_subnet_id : element(concat(oci_core_subnet.private-subnet.*.id, [""]), 0) : var.use_existing_vcn ? var.public_subnet_id : element(concat(oci_core_subnet.public-subnet.*.id, [""]), 0) + controller_subnet_id = var.private_deployment ? var.use_existing_vcn ? var.public_subnet_id : element(concat(oci_core_subnet.private-subnet.*.id, [""]), 0) : var.use_existing_vcn ? var.public_subnet_id : element(concat(oci_core_subnet.public-subnet.*.id, [""]), 0) cluster_name = var.use_custom_name ? var.cluster_name : random_pet.name.id cluster_network_image = var.use_marketplace_image ? oci_core_app_catalog_subscription.mp_image_subscription[0].listing_resource_id : local.image_ocid @@ -22,10 +25,10 @@ locals { // image = (var.cluster_network && var.use_marketplace_image == true) || (var.cluster_network == false && var.use_marketplace_image == false) ? var.image : data.oci_core_images.linux.images.0.id -// is_bastion_flex_shape = length(regexall(".*VM.*.*Flex$", var.bastion_shape)) > 0 ? [var.bastion_ocpus]:[] +// is_controller_flex_shape = length(regexall(".*VM.*.*Flex$", var.controller_shape)) > 0 ? [var.controller_ocpus]:[] is_instance_pool_flex_shape = length(regexall(".*VM.*.*Flex$", var.instance_pool_shape)) > 0 ? [local.instance_pool_ocpus]:[] -// bastion_mount_ip = var.bastion_block ? element(concat(oci_core_volume_attachment.bastion_volume_attachment.*.ipv4, [""]), 0) : "none" +// controller_mount_ip = var.controller_block ? element(concat(oci_core_volume_attachment.controller_volume_attachment.*.ipv4, [""]), 0) : "none" scratch_nfs_type = var.cluster_network ? var.scratch_nfs_type_cluster : var.scratch_nfs_type_pool @@ -35,4 +38,6 @@ locals { timeout_per_batch= var.cluster_network ? var.use_multiple_ads ? 15 : 30 : var.use_multiple_ads ? 6 : 15 timeout_ip = join("",[ (( var.node_count - ( var.node_count % 20 ) )/20 + 1 ) * local.timeout_per_batch,"m"]) + platform_type = local.shape == "BM.GPU4.8" ? "AMD_ROME_BM_GPU" : local.shape == "BM.GPU.B4.8" || local.shape == "BM.GPU.H100.8" || local.shape == "BM.GPU.A100-v2.8" ? "AMD_MILAN_BM_GPU" : local.shape == "BM.Standard.E3.128" ? "AMD_ROME_BM" : local.shape == "BM.Standard.E4.128" || local.shape == "BM.DenseIO.E4.128" ? "AMD_MILAN_BM" : "GENERIC_BM" + } diff --git a/autoscaling/tf_init/network.tf b/autoscaling/tf_init/network.tf index eacc4b36..5c6404d6 100755 --- a/autoscaling/tf_init/network.tf +++ b/autoscaling/tf_init/network.tf @@ -162,3 +162,35 @@ resource "oci_core_subnet" "private-subnet" { prohibit_public_ip_on_vnic = true route_table_id = oci_core_route_table.private_route_table[0].id } + + +resource "oci_dns_rrset" "rrset-cluster-network-OCI" { + for_each = var.dns_entries ? toset([for v in range(var.node_count) : tostring(v)]) : [] + zone_name_or_id = data.oci_dns_zones.dns_zones.zones[0].id + domain = "${local.cluster_instances_names[tonumber(each.key)]}.${var.zone_name}" + rtype = "A" + items { + domain = "${local.cluster_instances_names[tonumber(each.key)]}.${var.zone_name}" + rtype = "A" + rdata = "${local.cluster_instances_ips[tonumber(each.key)]}" + ttl = 3600 + } + scope = "PRIVATE" + view_id = data.oci_dns_views.dns_views.views[0].id +} + +resource "oci_dns_rrset" "rrset-cluster-network-SLURM" { + + for_each = var.slurm && var.dns_entries ? toset([for v in range(var.node_count) : tostring(v)]) : [] + zone_name_or_id = data.oci_dns_zones.dns_zones.zones[0].id + domain = "${var.queue}-${var.instance_type}-${local.cluster_instances_ips_index[tonumber(each.key)]}.${var.zone_name}" + rtype = "A" + items { + domain = "${var.queue}-${var.instance_type}-${local.cluster_instances_ips_index[tonumber(each.key)]}.${var.zone_name}" + rtype = "A" + rdata = "${local.cluster_instances_ips[tonumber(each.key)]}" + ttl = 3600 + } + scope = "PRIVATE" + view_id = data.oci_dns_views.dns_views.views[0].id +} \ No newline at end of file diff --git a/autoscaling/tf_init/versions.tf b/autoscaling/tf_init/versions.tf index 6dd2b529..57e63004 100755 --- a/autoscaling/tf_init/versions.tf +++ b/autoscaling/tf_init/versions.tf @@ -3,7 +3,7 @@ terraform { required_providers { oci = { source = "oracle/oci" - version = "5.1.0" + version = "5.30.0" } } } \ No newline at end of file diff --git a/bin/configure.sh b/bin/configure.sh index 76dd82f4..5e06a218 100644 --- a/bin/configure.sh +++ b/bin/configure.sh @@ -4,7 +4,7 @@ # # -# wait for cloud-init completion on the bastion host +# wait for cloud-init completion on the controller host # execution=1 diff --git a/bin/configure_as.sh b/bin/configure_as.sh index a2cbbedb..f530afa0 100755 --- a/bin/configure_as.sh +++ b/bin/configure_as.sh @@ -4,7 +4,7 @@ # # -# wait for cloud-init completion on the bastion host +# wait for cloud-init completion on the controller host # scripts=`realpath $0` diff --git a/bin/bastion.sh b/bin/controller.sh similarity index 87% rename from bin/bastion.sh rename to bin/controller.sh index a7bf37e6..f48b7f2d 100644 --- a/bin/bastion.sh +++ b/bin/controller.sh @@ -4,7 +4,7 @@ # # -# wait for cloud-init completion on the bastion host +# wait for cloud-init completion on the controller host # execution=1 @@ -39,7 +39,7 @@ if [ $ID == "ol" ] || [ $ID == "centos" ] ; then elif [ $vid == 8 ] ; then sudo yum makecache --enablerepo=$repo sudo yum install --enablerepo=$repo -y python38.x86_64 - sudo python3.8 -m pip install ansible cryptography netaddr + sudo python3.8 -m pip install ansible cryptography netaddr > /dev/null sudo mkdir /etc/ansible sudo ln -s /usr/local/bin/ansible-playbook /bin/ansible-playbook sudo ln -s /usr/local/bin/ansible /bin/ansible @@ -47,12 +47,12 @@ if [ $ID == "ol" ] || [ $ID == "centos" ] ; then sudo yum-config-manager --add-repo https://rpm.releases.hashicorp.com/RHEL/hashicorp.repo sudo sed -i 's/$releasever/'"${vid}"'/g' /etc/yum.repos.d/hashicorp.repo sudo yum install -y terraform - sudo python3 -m pip install -U pip - sudo python3 -m pip install netaddr --upgrade - sudo python3 -m pip install setuptools_rust --upgrade - sudo python3 -m pip install requests --upgrade - sudo python3 -m pip install urllib3 --upgrade - sudo python3 -m pip install oci-cli --upgrade + sudo python3 -m pip install -U pip > /dev/null + sudo python3 -m pip install netaddr --upgrade > /dev/null + sudo python3 -m pip install setuptools_rust --upgrade > /dev/null + sudo python3 -m pip install requests --upgrade > /dev/null + sudo python3 -m pip install urllib3 --upgrade > /dev/null + sudo python3 -m pip install oci-cli --upgrade > /dev/null elif [ $ID == "debian" ] || [ $ID == "ubuntu" ] ; then @@ -123,18 +123,18 @@ elif [ $ID == "debian" ] || [ $ID == "ubuntu" ] ; then fi fi fix_apt - sudo python3 -m pip install -U pip - sudo python3 -m pip install netaddr --upgrade - sudo python3 -m pip install requests --upgrade - sudo python3 -m pip install urllib3 --upgrade - pip install pip --upgrade - pip install pyopenssl --upgrade + sudo python3 -m pip install -U pip > /dev/null + sudo python3 -m pip install netaddr --upgrade > /dev/null + sudo python3 -m pip install requests --upgrade > /dev/null + sudo python3 -m pip install urllib3 --upgrade > /dev/null + pip install pip --upgrade > /dev/null + pip install pyopenssl --upgrade > /dev/null # install oci-cli (add --oci-cli-version 3.23.3 or version that you know works if the latest does not work ) - bash -c "$(curl -L https://raw.githubusercontent.com/oracle/oci-cli/master/scripts/install/install.sh)" -s --accept-all-defaults + bash -c "$(curl -L https://raw.githubusercontent.com/oracle/oci-cli/master/scripts/install/install.sh)" -s --accept-all-defaults > /dev/null # install oci module - pip install oci + pip install oci > /dev/null wget -O- https://apt.releases.hashicorp.com/gpg | \ gpg --dearmor | \ @@ -180,7 +180,7 @@ if [ ! -d /etc/ansible ] ; then fi fi -ansible-config init --disabled -t all | sudo tee /etc/ansible/ansible.cfg +ansible-config init --disabled -t all | sudo tee /etc/ansible/ansible.cfg > /dev/null sudo sed -i "s/^\(#\|;\)forks.*/forks = ${forks}/" /etc/ansible/ansible.cfg sudo sed -i "s/^\(#\|;\)fact_caching=.*/fact_caching=jsonfile/" /etc/ansible/ansible.cfg sudo sed -i "0,/^\(#\|;\)fact_caching_connection.*/s//fact_caching_connection=\/tmp\/ansible/" /etc/ansible/ansible.cfg diff --git a/bin/initial_monitoring.sh b/bin/initial_monitoring.sh index d30d78e0..c3b51519 100644 --- a/bin/initial_monitoring.sh +++ b/bin/initial_monitoring.sh @@ -4,8 +4,8 @@ scripts=`realpath $0` folder=`dirname $scripts` end_timestamp=`date -u +'%F %T'` -bastionName=`hostname` -cluster_name=${bastionName/-bastion/} +controllerName=`hostname` +cluster_name=${controllerName/-controller/} autoscaling_folder=$folder/../autoscaling monitoring_folder=$folder/../monitoring diff --git a/bin/resize.py b/bin/resize.py index 5faf6273..9525fee4 100644 --- a/bin/resize.py +++ b/bin/resize.py @@ -8,6 +8,7 @@ import shutil import os import copy +import ipaddress from datetime import datetime def get_metadata(): @@ -139,7 +140,7 @@ def backup_inventory(inventory): def destroy_unreachable_reconfigure(inventory,nodes_to_remove,playbook): if not os.path.isfile("/etc/ansible/hosts"): - print("There is no inventory file, are you on the bastion? The cluster has not been resized") + print("There is no inventory file, are you on the controller? The cluster has not been resized") exit() backup_inventory(inventory) inventory_dict = parse_inventory(inventory) @@ -167,7 +168,7 @@ def destroy_unreachable_reconfigure(inventory,nodes_to_remove,playbook): ips_to_remove.append(instance['ip']) if len(ips_to_remove) != len(nodes_to_remove): print("Some nodes are removed in OCI and removed from the inventory") - print("Try rerunning with the --nodes option and a list of IPs or Slurm Hostnames to cleanup the bastion") + print("Try rerunning with the --nodes option and a list of IPs or Slurm Hostnames to cleanup the controller") write_inventory(inventory_dict,tmp_inventory_destroy) if not len(ips_to_remove): print("No hostname found, trying anyway with "+" ".join(nodes_to_remove)) @@ -189,7 +190,7 @@ def destroy_unreachable_reconfigure(inventory,nodes_to_remove,playbook): def destroy_reconfigure(inventory,nodes_to_remove,playbook): if not os.path.isfile("/etc/ansible/hosts"): - print("There is no inventory file, are you on the bastion? The cluster has not been resized") + print("There is no inventory file, are you on the controller? The cluster has not been resized") exit() backup_inventory(inventory) inventory_dict = parse_inventory(inventory) @@ -261,7 +262,7 @@ def add_reconfigure(comp_ocid,cn_ocid,inventory,CN,specific_hosts=None): reachable_instances=instances unreachable_instances=[] if not os.path.isfile(inventory): - print("There is no inventory file, are you on the bastion? The cluster has been resized but not reconfigured") + print("There is no inventory file, are you on the controller? The cluster has been resized but not reconfigured") exit() host_to_wait_for=[] for node in reachable_instances: @@ -308,7 +309,7 @@ def add_reconfigure(comp_ocid,cn_ocid,inventory,CN,specific_hosts=None): def reconfigure(comp_ocid,cn_ocid,inventory,CN, crucial=False): instances = get_instances(comp_ocid,cn_ocid,CN) if not os.path.isfile(inventory): - print("There is no inventory file, are you on the bastion? Reconfigure did not happen") + print("There is no inventory file, are you on the controller? Reconfigure did not happen") exit() backup_inventory(inventory) inventory_dict = parse_inventory(inventory) @@ -567,7 +568,7 @@ def getLaunchInstanceDetails(instance,comp_ocid,cn_ocid,max_previous_index,index parser = argparse.ArgumentParser(description='Script to resize the CN') parser.add_argument('--compartment_ocid', help='OCID of the compartment, defaults to the Compartment OCID of the localhost') -parser.add_argument('--cluster_name', help='Name of the cluster to resize. Defaults to the name included in the bastion') +parser.add_argument('--cluster_name', help='Name of the cluster to resize. Defaults to the name included in the controller') parser.add_argument('mode', help='Mode type. add/remove node options, implicitly configures newly added nodes. Also implicitly reconfigure/restart services like Slurm to recognize new nodes. Similarly for remove option, terminates nodes and implicitly reconfigure/restart services like Slurm on rest of the cluster nodes to remove reference to deleted nodes.',choices=['add','remove','remove_unreachable','list','reconfigure'],default='list',nargs='?') parser.add_argument('number', type=int, help="Number of nodes to add or delete if a list of hostnames is not defined",nargs='?') parser.add_argument('--nodes', help="List of nodes to delete (Space Separated)",nargs='+') @@ -586,11 +587,11 @@ def getLaunchInstanceDetails(instance,comp_ocid,cn_ocid,max_previous_index,index comp_ocid=args.compartment_ocid if args.cluster_name is None: - cluster_name=metadata['displayName'].replace('-bastion','') + cluster_name=metadata['displayName'].replace('-controller','') else: cluster_name=args.cluster_name -if cluster_name == metadata['displayName'].replace('-bastion',''): +if cluster_name == metadata['displayName'].replace('-controller',''): inventory="/etc/ansible/hosts" host_check_file="/tmp/hosts" autoscaling=False @@ -605,6 +606,31 @@ def getLaunchInstanceDetails(instance,comp_ocid,cn_ocid,max_previous_index,index if inv_vars.startswith("compute_username"): username=inv_vars.split("compute_username=")[1].strip() break +zone_name=cluster_name+".local" +for inv_vars in inventory_dict["all:vars"]: + if inv_vars.startswith("zone_name"): + zone_name=inv_vars.split("zone_name=")[1].strip() + break +dns_entries=True +for inv_vars in inventory_dict["all:vars"]: + if inv_vars.startswith("dns_entries"): + dns_entries=bool(inv_vars.split("dns_entries=")[1].strip()) + break +queue=None +for inv_vars in inventory_dict["all:vars"]: + if inv_vars.startswith("queue"): + queue=inv_vars.split("queue=")[1].strip() + break +instance_type="" +for inv_vars in inventory_dict["all:vars"]: + if inv_vars.startswith("instance_type"): + instance_type=inv_vars.split("instance_type=")[1].strip() + break +private_subnet_cidr=None +for inv_vars in inventory_dict["all:vars"]: + if inv_vars.startswith("private_subnet"): + private_subnet_cidr=ipaddress.ip_network(inv_vars.split("private_subnet=")[1].strip()) + break hostnames=args.nodes if hostnames is None: @@ -650,6 +676,7 @@ def getLaunchInstanceDetails(instance,comp_ocid,cn_ocid,max_previous_index,index computeManagementClient = oci.core.ComputeManagementClient(config_oci) ComputeManagementClientCompositeOperations = oci.core.ComputeManagementClientCompositeOperations(computeManagementClient) virtualNetworkClient = oci.core.VirtualNetworkClient(config_oci) + dns_client = oci.dns.DnsClient(config_oci) else: signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner() computeClient = oci.core.ComputeClient(config={}, signer=signer) @@ -657,6 +684,7 @@ def getLaunchInstanceDetails(instance,comp_ocid,cn_ocid,max_previous_index,index computeManagementClient = oci.core.ComputeManagementClient(config={}, signer=signer) ComputeManagementClientCompositeOperations = oci.core.ComputeManagementClientCompositeOperations(computeManagementClient) virtualNetworkClient = oci.core.VirtualNetworkClient(config={}, signer=signer) + dns_client = oci.dns.DnsClient(config={}, signer=signer) cn_summary,ip_summary,CN = get_summary(comp_ocid,cluster_name) if cn_summary is None: @@ -687,6 +715,7 @@ def getLaunchInstanceDetails(instance,comp_ocid,cn_ocid,max_previous_index,index cn_instances = get_instances(comp_ocid,cn_ocid,CN) inventory_instances =[] only_inventory_instance=[] + zone_id=dns_client.list_zones(compartment_id=comp_ocid,name=zone_name,zone_type="PRIMARY",scope="PRIVATE").data[0].id for line in inventory_dict['compute_configured']: host=line.split('ansible_host=')[0].strip() ip=line.split("ansible_host=")[1].split("ansible_user=")[0].strip() @@ -767,6 +796,16 @@ def getLaunchInstanceDetails(instance,comp_ocid,cn_ocid,max_previous_index,index else: instance_details = oci.core.models.DetachInstancePoolInstanceDetails(instance_id=instance_id,is_auto_terminate=True,is_decrement_size=True) ComputeManagementClientCompositeOperations.detach_instance_pool_instance_and_wait_for_work_request(ipa_ocid,instance_details) + if dns_entries: + get_rr_set_response = dns_client.delete_rr_set(zone_name_or_id=zone_id,domain=instanceName+"."+zone_name,rtype="A",scope="PRIVATE") + ip=None + for i in cn_instances: + if i['display_name'] == instanceName: + ip = ipaddress.ip_address(i['ip']) + if not ip is None: + index = list(private_subnet_cidr.hosts()).index(ip)+2 + slurm_name=queue+"-"+instance_type+"-"+str(index)+"."+zone_name + get_rr_set_response = dns_client.delete_rr_set(zone_name_or_id=zone_id,domain=slurm_name,rtype="A",scope="PRIVATE") terminated_instances = terminated_instances + 1 print("STDOUT: The instance "+instanceName+" is terminating") except: @@ -794,8 +833,8 @@ def getLaunchInstanceDetails(instance,comp_ocid,cn_ocid,max_previous_index,index # reconfigure(comp_ocid,cn_ocid,inventory,CN) if args.mode == 'add': + cn_instances = get_instances(comp_ocid,cn_ocid,CN) if CN == "CC": - cn_instances = get_instances(comp_ocid,cn_ocid,CN) current_size=len(cn_instances) if len(cn_instances) == 0: print("The resize script cannot work for a compute cluster if the size is there is no node in the cluster") @@ -813,14 +852,23 @@ def getLaunchInstanceDetails(instance,comp_ocid,cn_ocid,max_previous_index,index size = current_size - hostnames_to_remove_len + args.number update_size = oci.core.models.UpdateInstancePoolDetails(size=size) ComputeManagementClientCompositeOperations.update_instance_pool_and_wait_for_state(ipa_ocid,update_size,['RUNNING'],waiter_kwargs={'max_wait_seconds':3600}) - cn_summary,ip_summary,CN = get_summary(comp_ocid,cluster_name) if CN == "CC": - cn_instances = get_instances(comp_ocid,cn_ocid,CN) - newsize=len(cn_instances) + new_cn_instances = get_instances(comp_ocid,cn_ocid,CN) + newsize=len(new_cn_instances) else: + new_cn_instances = get_instances(comp_ocid,cn_ocid,CN) newsize=ip_summary.size - updateTFState(inventory,cluster_name,newsize) + if dns_entries: + for new_instance in new_cn_instances: + if not new_instance in cn_instances: + instanceName=new_instance['display_name'] + ip = ipaddress.ip_address(new_instance['ip']) + index = list(private_subnet_cidr.hosts()).index(ip)+2 + slurm_name=queue+"-"+instance_type+"-"+str(index)+"."+zone_name + get_rr_set_response = dns_client.update_rr_set(zone_name_or_id=zone_id,domain=slurm_name,rtype="A",scope="PRIVATE",update_rr_set_details=oci.dns.models.UpdateRRSetDetails(items=[oci.dns.models.RecordDetails(domain=slurm_name,rdata=new_instance['ip'],rtype="A",ttl=3600,)])) + get_rr_set_response = dns_client.update_rr_set(zone_name_or_id=zone_id,domain=instanceName+"."+zone_name,rtype="A",scope="PRIVATE",update_rr_set_details=oci.dns.models.UpdateRRSetDetails(items=[oci.dns.models.RecordDetails(domain=instanceName+"."+zone_name,rdata=new_instance['ip'],rtype="A",ttl=3600)])) + updateTFState(inventory,cluster_name,newsize) if newsize == current_size: print("No node was added, please check the work requests of the Cluster Network and Instance Pool to see why") exit(1) diff --git a/bin/resize.sh b/bin/resize.sh index d2082db8..92dea986 100755 --- a/bin/resize.sh +++ b/bin/resize.sh @@ -23,8 +23,8 @@ fi resize_type=default permanent=1 -bastionName=`hostname` -cluster_name=${bastionName/-bastion/} +controllerName=`hostname` +cluster_name=${controllerName/-controller/} nodes=NULL for (( i=1; i<=$#; i++)); do if [ ${!i} == "--cluster_name" ] diff --git a/cluster-network-configuration.tf b/cluster-network-configuration.tf index 82a3fb60..1c097ca5 100755 --- a/cluster-network-configuration.tf +++ b/cluster-network-configuration.tf @@ -48,6 +48,19 @@ resource "oci_core_instance_configuration" "cluster-network-instance_configurati } } + + dynamic "platform_config" { + for_each = var.BIOS ? range(1) : [] + content { + type = local.platform_type + are_virtual_instructions_enabled = var.virt_instr + is_access_control_service_enabled = var.access_ctrl + is_input_output_memory_management_unit_enabled = var.IOMMU + is_symmetric_multi_threading_enabled = var.SMT + numa_nodes_per_socket = var.numa_nodes_per_socket == "Default" ? (local.platform_type == "GENERIC_BM" ? "NPS1": "NPS4" ): var.numa_nodes_per_socket + percentage_of_cores_enabled = var.percentage_of_cores_enabled == "Default" ? 100 : tonumber(var.percentage_of_cores_enabled) + } + } shape = var.cluster_network_shape diff --git a/cluster-network.tf b/cluster-network.tf index 859daf41..acc39040 100755 --- a/cluster-network.tf +++ b/cluster-network.tf @@ -19,7 +19,7 @@ resource "oci_core_volume_attachment" "cluster_network_volume_attachment" { resource "oci_core_cluster_network" "cluster_network" { count = ( ! var.compute_cluster ) && var.cluster_network && var.node_count > 0 ? 1 : 0 - depends_on = [oci_core_app_catalog_subscription.mp_image_subscription, oci_core_subnet.private-subnet, oci_core_subnet.public-subnet, oci_core_instance.bastion] + depends_on = [oci_core_app_catalog_subscription.mp_image_subscription, oci_core_subnet.private-subnet, oci_core_subnet.public-subnet, oci_core_instance.controller] compartment_id = var.targetCompartment instance_pools { instance_configuration_id = oci_core_instance_configuration.cluster-network-instance_configuration[0].id diff --git a/compute-nodes.tf b/compute-nodes.tf index c7e21c99..1544c5ad 100755 --- a/compute-nodes.tf +++ b/compute-nodes.tf @@ -25,8 +25,32 @@ resource "oci_core_instance" "compute_cluster_instances" { shape = var.cluster_network_shape agent_config { - is_management_disabled = true - } + + are_all_plugins_disabled = false + is_management_disabled = true + is_monitoring_disabled = false + + plugins_config { + desired_state = "DISABLED" + name = "OS Management Service Agent" + } + dynamic plugins_config { + + for_each = var.use_compute_agent ? ["ENABLED"] : ["DISABLED"] + content { + name = "Compute HPC RDMA Authentication" + desired_state = plugins_config.value + } + } + dynamic plugins_config { + for_each = var.use_compute_agent ? ["ENABLED"] : ["DISABLED"] + content { + name = "Compute HPC RDMA Auto-Configuration" + desired_state = plugins_config.value + } + + } + } display_name = "${local.cluster_name}-node-${var.compute_cluster_start_index+count.index}" @@ -37,7 +61,7 @@ resource "oci_core_instance" "compute_cluster_instances" { metadata = { ssh_authorized_keys = "${var.ssh_key}\n${tls_private_key.ssh.public_key_openssh}" - user_data = base64encode(data.template_file.bastion_config.rendered) + user_data = base64encode(data.template_file.controller_config.rendered) } source_details { source_id = local.cluster_network_image diff --git a/conf/variables.tpl b/conf/variables.tpl index 96dd18d3..97fc9eb2 100755 --- a/conf/variables.tpl +++ b/conf/variables.tpl @@ -23,11 +23,14 @@ variable "image" { default = "##IMAGE##" } variable "vcn_compartment" { default = ""} variable "use_existing_vcn" {default = true} variable "vcn_subnet" {default = "${vcn_subnet}"} +variable "vcn_id" {default = "${vcn_id}"} variable "public_subnet_id" { default = "${public_subnet_id}"} variable "public_subnet" {default = "${public_subnet}"} variable "private_subnet_id" { default = "##PRIVATE_SUBNET_ID##"} variable "private_subnet" {default = "##PRIVATE_SUBNET##"} variable "rdma_subnet" { default = "${rdma_subnet}" } +variable "zone_name" {default = "${zone_name}"} +variable "dns_entries" {default = "${dns_entries}"} variable "slurm" { default = ${slurm} } variable "rack_aware" { default = ${rack_aware} } variable "pyxis" { default = ${pyxis} } @@ -53,10 +56,12 @@ variable "marketplace_version_id" { "2" = "OL7.8-OFED5.0-1.0.0.0-UEK-20200826" "3" = "OL7.7-OFED-4.4-2.0.7.0-UEK-20200229" "4" = "OL7.9-OFED5.0-2.1.8.0-RHCK-20210709" - "HPC_OL7" = "OracleLinux-7-OCA-RHCK-OFED-5.8-3.0.7.0-2024.01.02-0" - "HPC_OL8" = "OracleLinux-8-OCA-RHCK-OFED-5.8-3.0.7.0-2024.01.02-1" - "GPU_OL7" = "OracleLinux-7-OCA-RHCK-OFED-5.8-3.0.7.0-GPU-535-2024.01.02-0" - "GPU_OL8" = "OracleLinux-8-OCA-RHCK-OFED-5.8-3.0.7.0-GPU-535-2024.01.02-1" + "HPC_OL7" = "OracleLinux-7-OCA-RHCK-OFED-23.10-2.1.3.1-2024.03.15-0" + "HPC_OL8" = "OracleLinux-8-OCA-RHCK-OFED-23.10-2.1.3.1-2024.03.15-0" + "GPU_OL7_CUDA12.2" = "OracleLinux-7-OCA-RHCK-OFED-23.10-2.1.3.1-GPU-535-CUDA-12.2-2024.03.15-0" + "GPU_OL8_CUDA12.2" = "OracleLinux-8-OCA-RHCK-OFED-23.10-2.1.3.1-GPU-535-CUDA-12.2-2024.03.15-0" + "GPU_OL7_CUDA12.4" = "OracleLinux-7-OCA-RHCK-OFED-23.10-2.1.3.1-GPU-535-CUDA-12.4-2024.03.15-0" + "GPU_OL8_CUDA12.4" = "OracleLinux-8-OCA-RHCK-OFED-23.10-2.1.3.1-GPU-535-CUDA-12.4-2024.03.15-0" } } @@ -74,20 +79,20 @@ variable "marketplace_listing_id_GPU" { } -variable "bastion_block_volume_performance" { +variable "controller_block_volume_performance" { /* Allowed values "0. Lower performance" "10. Balanced performance" "20. High Performance" */ -default = "${bastion_block_volume_performance}" +default = "${controller_block_volume_performance}" } variable "scratch_nfs_type_cluster" { default = "${scratch_nfs_type_cluster}"} variable "scratch_nfs_type_pool" { default = "${scratch_nfs_type_pool}" } -variable "bastion_name" {default = "${bastion_name}"} -variable "bastion_ip" {default = "${bastion_ip}"} +variable "controller_name" {default = "${controller_name}"} +variable "controller_ip" {default = "${controller_ip}"} variable "backup_name" {default = "${backup_name}"} variable "backup_ip" {default = "${backup_ip}"} variable "login_name" {default = "${login_name}"} @@ -98,10 +103,10 @@ variable "cluster_block_volume_size" {default="${cluster_block_volume_size}"} variable "cluster_block_volume_performance" {default="${cluster_block_volume_performance}"} variable "ssh_cidr" {default="${ssh_cidr}"} -variable "bastion_block" {default = "${bastion_block}"} +variable "controller_block" {default = "${controller_block}"} variable "login_block" {default = "${login_block}"} -variable "bastion_mount_ip" {default = "${bastion_mount_ip}"} +variable "controller_mount_ip" {default = "${controller_mount_ip}"} variable "login_mount_ip" {default = "${login_mount_ip}"} variable "home_nfs" { default = ${home_nfs} } variable "home_fss" { default = ${home_fss} } @@ -127,7 +132,7 @@ variable "autoscaling_monitoring" { default = ${autoscaling_monitoring} } variable "tags" { default = "##TAGS##" } variable "private_deployment" { default = ${private_deployment} } variable "use_multiple_ads" { default = ${use_multiple_ads} } -variable "bastion_username" { default = "${bastion_username}" } +variable "controller_username" { default = "${controller_username}" } variable "compute_username" { default = "${compute_username}" } variable "localdisk" { default = "${localdisk}" } @@ -135,3 +140,25 @@ variable "log_vol" { default = "${log_vol}" } variable "redundancy" { default = "${redundancy}" } variable "instance_pool_ocpus_denseIO_flex" { default = "##OCPU##"} + +variable "BIOS" { + default = ${BIOS} +} +variable "IOMMU" { + default = ${IOMMU} +} +variable "SMT" { + default = ${SMT} +} +variable "virt_instr" { + default = ${virt_instr} +} +variable "access_ctrl" { + default = ${access_ctrl} +} +variable "numa_nodes_per_socket" { + default = "${numa_nodes_per_socket}" +} +variable "percentage_of_cores_enabled" { + default = "${percentage_of_cores_enabled}" +} \ No newline at end of file diff --git a/config.bastion b/config.controller similarity index 100% rename from config.bastion rename to config.controller diff --git a/bastion.tf b/controller.tf similarity index 70% rename from bastion.tf rename to controller.tf index c8df39cb..b215ab82 100644 --- a/bastion.tf +++ b/controller.tf @@ -1,40 +1,40 @@ -resource "oci_core_volume" "bastion_volume" { - count = var.bastion_block ? 1 : 0 - availability_domain = var.bastion_ad +resource "oci_core_volume" "controller_volume" { + count = var.controller_block ? 1 : 0 + availability_domain = var.controller_ad compartment_id = var.targetCompartment - display_name = "${local.cluster_name}-bastion-volume" + display_name = "${local.cluster_name}-controller-volume" - size_in_gbs = var.bastion_block_volume_size - vpus_per_gb = split(".", var.bastion_block_volume_performance)[0] + size_in_gbs = var.controller_block_volume_size + vpus_per_gb = split(".", var.controller_block_volume_performance)[0] } -resource "oci_core_volume_attachment" "bastion_volume_attachment" { - count = var.bastion_block ? 1 : 0 +resource "oci_core_volume_attachment" "controller_volume_attachment" { + count = var.controller_block ? 1 : 0 attachment_type = "iscsi" - volume_id = oci_core_volume.bastion_volume[0].id - instance_id = oci_core_instance.bastion.id - display_name = "${local.cluster_name}-bastion-volume-attachment" + volume_id = oci_core_volume.controller_volume[0].id + instance_id = oci_core_instance.controller.id + display_name = "${local.cluster_name}-controller-volume-attachment" device = "/dev/oracleoci/oraclevdb" is_shareable = true } -resource "oci_core_volume_backup_policy" "bastion_boot_volume_backup_policy" { - count = var.bastion_boot_volume_backup ? 1 : 0 +resource "oci_core_volume_backup_policy" "controller_boot_volume_backup_policy" { + count = var.controller_boot_volume_backup ? 1 : 0 compartment_id = var.targetCompartment - display_name = "${local.cluster_name}-bastion_boot_volume_daily" + display_name = "${local.cluster_name}-controller_boot_volume_daily" schedules { - backup_type = var.bastion_boot_volume_backup_type - period = var.bastion_boot_volume_backup_period - retention_seconds = var.bastion_boot_volume_backup_retention_seconds - time_zone = var.bastion_boot_volume_backup_time_zone + backup_type = var.controller_boot_volume_backup_type + period = var.controller_boot_volume_backup_period + retention_seconds = var.controller_boot_volume_backup_retention_seconds + time_zone = var.controller_boot_volume_backup_time_zone } } resource "oci_core_volume_backup_policy_assignment" "boot_volume_backup_policy" { - count = var.bastion_boot_volume_backup ? 1 : 0 - depends_on = [oci_core_volume_backup_policy.bastion_boot_volume_backup_policy] - asset_id = oci_core_instance.bastion.boot_volume_id - policy_id = oci_core_volume_backup_policy.bastion_boot_volume_backup_policy[0].id + count = var.controller_boot_volume_backup ? 1 : 0 + depends_on = [oci_core_volume_backup_policy.controller_boot_volume_backup_policy] + asset_id = oci_core_instance.controller.boot_volume_id + policy_id = oci_core_volume_backup_policy.controller_boot_volume_backup_policy[0].id } resource "oci_resourcemanager_private_endpoint" "rms_private_endpoint" { @@ -47,29 +47,29 @@ resource "oci_resourcemanager_private_endpoint" "rms_private_endpoint" { } resource "null_resource" "boot_volume_backup_policy" { - depends_on = [oci_core_instance.bastion, oci_core_volume_backup_policy.bastion_boot_volume_backup_policy, oci_core_volume_backup_policy_assignment.boot_volume_backup_policy] + depends_on = [oci_core_instance.controller, oci_core_volume_backup_policy.controller_boot_volume_backup_policy, oci_core_volume_backup_policy_assignment.boot_volume_backup_policy] triggers = { - bastion = oci_core_instance.bastion.id + controller = oci_core_instance.controller.id } } -resource "oci_core_instance" "bastion" { - depends_on = [local.bastion_subnet] - availability_domain = var.bastion_ad +resource "oci_core_instance" "controller" { + depends_on = [local.controller_subnet] + availability_domain = var.controller_ad compartment_id = var.targetCompartment - shape = var.bastion_shape + shape = var.controller_shape dynamic "shape_config" { - for_each = local.is_bastion_flex_shape + for_each = local.is_controller_flex_shape content { ocpus = shape_config.value - memory_in_gbs = var.bastion_custom_memory ? var.bastion_memory : 16 * shape_config.value + memory_in_gbs = var.controller_custom_memory ? var.controller_memory : 16 * shape_config.value } } agent_config { is_management_disabled = true } - display_name = "${local.cluster_name}-bastion" + display_name = "${local.cluster_name}-controller" freeform_tags = { "cluster_name" = local.cluster_name @@ -78,39 +78,39 @@ resource "oci_core_instance" "bastion" { metadata = { ssh_authorized_keys = "${var.ssh_key}\n${tls_private_key.ssh.public_key_openssh}" - user_data = base64encode(data.template_file.bastion_config.rendered) + user_data = base64encode(data.template_file.controller_config.rendered) } source_details { -// source_id = var.use_standard_image ? data.oci_core_images.linux.images.0.id : local.custom_bastion_image_ocid - source_id = local.bastion_image - boot_volume_size_in_gbs = var.bastion_boot_volume_size +// source_id = var.use_standard_image ? data.oci_core_images.linux.images.0.id : local.custom_controller_image_ocid + source_id = local.controller_image + boot_volume_size_in_gbs = var.controller_boot_volume_size source_type = "image" } create_vnic_details { - subnet_id = local.bastion_subnet_id - assign_public_ip = local.bastion_bool_ip + subnet_id = local.controller_subnet_id + assign_public_ip = local.controller_bool_ip } } -resource "null_resource" "bastion" { - depends_on = [oci_core_instance.bastion, oci_core_volume_attachment.bastion_volume_attachment ] +resource "null_resource" "controller" { + depends_on = [oci_core_instance.controller, oci_core_volume_attachment.controller_volume_attachment ] triggers = { - bastion = oci_core_instance.bastion.id + controller = oci_core_instance.controller.id } provisioner "remote-exec" { inline = [ "#!/bin/bash", "sudo mkdir -p /opt/oci-hpc", - "sudo chown ${var.bastion_username}:${var.bastion_username} /opt/oci-hpc/", + "sudo chown ${var.controller_username}:${var.controller_username} /opt/oci-hpc/", "mkdir -p /opt/oci-hpc/bin", "mkdir -p /opt/oci-hpc/playbooks" ] connection { host = local.host type = "ssh" - user = var.bastion_username + user = var.controller_username private_key = tls_private_key.ssh.private_key_pem } } @@ -120,7 +120,7 @@ resource "null_resource" "bastion" { connection { host = local.host type = "ssh" - user = var.bastion_username + user = var.controller_username private_key = tls_private_key.ssh.private_key_pem } } @@ -131,7 +131,7 @@ resource "null_resource" "bastion" { connection { host = local.host type = "ssh" - user = var.bastion_username + user = var.controller_username private_key = tls_private_key.ssh.private_key_pem } } @@ -142,7 +142,7 @@ resource "null_resource" "bastion" { connection { host = local.host type = "ssh" - user = var.bastion_username + user = var.controller_username private_key = tls_private_key.ssh.private_key_pem } } @@ -153,7 +153,7 @@ resource "null_resource" "bastion" { connection { host = local.host type = "ssh" - user = var.bastion_username + user = var.controller_username private_key = tls_private_key.ssh.private_key_pem } } @@ -163,7 +163,7 @@ resource "null_resource" "bastion" { connection { host = local.host type = "ssh" - user = var.bastion_username + user = var.controller_username private_key = tls_private_key.ssh.private_key_pem } } @@ -173,7 +173,7 @@ resource "null_resource" "bastion" { connection { host = local.host type = "ssh" - user = var.bastion_username + user = var.controller_username private_key = tls_private_key.ssh.private_key_pem } } @@ -183,7 +183,7 @@ resource "null_resource" "bastion" { connection { host = local.host type = "ssh" - user = var.bastion_username + user = var.controller_username private_key = tls_private_key.ssh.private_key_pem } } @@ -195,43 +195,43 @@ resource "null_resource" "bastion" { connection { host = local.host type = "ssh" - user = var.bastion_username + user = var.controller_username private_key = tls_private_key.ssh.private_key_pem } } provisioner "file" { content = tls_private_key.ssh.private_key_openssh - destination = "/home/${var.bastion_username}/.ssh/cluster.key" + destination = "/home/${var.controller_username}/.ssh/cluster.key" connection { host = local.host type = "ssh" - user = var.bastion_username + user = var.controller_username private_key = tls_private_key.ssh.private_key_pem } } provisioner "file" { content = tls_private_key.ssh.public_key_openssh - destination = "/home/${var.bastion_username}/.ssh/id_rsa.pub" + destination = "/home/${var.controller_username}/.ssh/id_rsa.pub" connection { host = local.host type = "ssh" - user = var.bastion_username + user = var.controller_username private_key = tls_private_key.ssh.private_key_pem } } } resource "null_resource" "cluster" { - depends_on = [null_resource.bastion, null_resource.backup, oci_core_compute_cluster.compute_cluster, oci_core_cluster_network.cluster_network, oci_core_instance.bastion, oci_core_volume_attachment.bastion_volume_attachment ] + depends_on = [null_resource.controller, null_resource.backup, oci_core_compute_cluster.compute_cluster, oci_core_cluster_network.cluster_network, oci_core_instance.controller, oci_core_volume_attachment.controller_volume_attachment ] triggers = { cluster_instances = join(", ", local.cluster_instances_names) } provisioner "file" { content = templatefile("${path.module}/inventory.tpl", { - bastion_name = oci_core_instance.bastion.display_name, - bastion_ip = oci_core_instance.bastion.private_ip, + controller_name = oci_core_instance.controller.display_name, + controller_ip = oci_core_instance.controller.private_ip, backup_name = var.slurm_ha ? oci_core_instance.backup[0].display_name : "", backup_ip = var.slurm_ha ? oci_core_instance.backup[0].private_ip: "", login_name = var.login_node ? oci_core_instance.login[0].display_name : "", @@ -241,6 +241,8 @@ resource "null_resource" "cluster" { private_subnet = data.oci_core_subnet.private_subnet.cidr_block, rdma_network = cidrhost(var.rdma_subnet, 0), rdma_netmask = cidrnetmask(var.rdma_subnet), + zone_name = local.zone_name, + dns_entries = var.dns_entries, nfs = var.node_count > 0 && var.use_scratch_nfs ? local.cluster_instances_names[0] : "", home_nfs = var.home_nfs, create_fss = var.create_fss, @@ -264,10 +266,10 @@ resource "null_resource" "cluster" { slurm_nfs_path = var.slurm_nfs ? var.nfs_source_path : var.cluster_nfs_path spack = var.spack, ldap = var.ldap, - bastion_block = var.bastion_block, + controller_block = var.controller_block, login_block = var.login_block, scratch_nfs_type = local.scratch_nfs_type, - bastion_mount_ip = local.bastion_mount_ip, + controller_mount_ip = local.controller_mount_ip, login_mount_ip = local.login_mount_ip, cluster_mount_ip = local.mount_ip, autoscaling = var.autoscaling, @@ -277,7 +279,7 @@ resource "null_resource" "cluster" { queue=var.queue, monitoring = var.monitoring, hyperthreading = var.hyperthreading, - bastion_username = var.bastion_username, + controller_username = var.controller_username, compute_username = var.compute_username, autoscaling_monitoring = var.autoscaling_monitoring, autoscaling_mysql_service = var.autoscaling_mysql_service, @@ -302,7 +304,7 @@ resource "null_resource" "cluster" { connection { host = local.host type = "ssh" - user = var.bastion_username + user = var.controller_username private_key = tls_private_key.ssh.private_key_pem } } @@ -314,7 +316,7 @@ resource "null_resource" "cluster" { connection { host = local.host type = "ssh" - user = var.bastion_username + user = var.controller_username private_key = tls_private_key.ssh.private_key_pem } } @@ -331,7 +333,7 @@ resource "null_resource" "cluster" { connection { host = local.host type = "ssh" - user = var.bastion_username + user = var.controller_username private_key = tls_private_key.ssh.private_key_pem } } @@ -362,22 +364,22 @@ resource "null_resource" "cluster" { connection { host = local.host type = "ssh" - user = var.bastion_username + user = var.controller_username private_key = tls_private_key.ssh.private_key_pem } } provisioner "file" { content = templatefile("${path.module}/conf/variables.tpl", { - bastion_name = oci_core_instance.bastion.display_name, - bastion_ip = oci_core_instance.bastion.private_ip, + controller_name = oci_core_instance.controller.display_name, + controller_ip = oci_core_instance.controller.private_ip, backup_name = var.slurm_ha ? oci_core_instance.backup[0].display_name : "", backup_ip = var.slurm_ha ? oci_core_instance.backup[0].private_ip: "", login_name = var.login_node ? oci_core_instance.login[0].display_name : "", login_ip = var.login_node ? oci_core_instance.login[0].private_ip: "", compute = var.node_count > 0 ? zipmap(local.cluster_instances_names, local.cluster_instances_ips) : zipmap([],[]) public_subnet = data.oci_core_subnet.public_subnet.cidr_block, - public_subnet_id = local.bastion_subnet_id, + public_subnet_id = local.controller_subnet_id, private_subnet = data.oci_core_subnet.private_subnet.cidr_block, private_subnet_id = local.subnet_id, rdma_subnet = var.rdma_subnet, @@ -390,18 +392,21 @@ resource "null_resource" "cluster" { slurm_nfs_path = var.add_nfs ? var.nfs_source_path : var.cluster_nfs_path spack = var.spack, ldap = var.ldap, - bastion_block = var.bastion_block, + controller_block = var.controller_block, login_block = var.login_block, scratch_nfs_type = local.scratch_nfs_type, - bastion_mount_ip = local.bastion_mount_ip, + controller_mount_ip = local.controller_mount_ip, login_mount_ip = local.login_mount_ip, cluster_mount_ip = local.mount_ip, scratch_nfs_type_cluster = var.scratch_nfs_type_cluster, scratch_nfs_type_pool = var.scratch_nfs_type_pool, - bastion_block_volume_performance = var.bastion_block_volume_performance, + controller_block_volume_performance = var.controller_block_volume_performance, region = var.region, tenancy_ocid = var.tenancy_ocid, vcn_subnet = var.vcn_subnet, + vcn_id = local.vcn_id, + zone_name = local.zone_name, + dns_entries = var.dns_entries, cluster_block_volume_size = var.cluster_block_volume_size, cluster_block_volume_performance = var.cluster_block_volume_performance, ssh_cidr = var.ssh_cidr, @@ -429,18 +434,25 @@ resource "null_resource" "cluster" { latency_check = var.latency_check, private_deployment = var.private_deployment, use_multiple_ads = var.use_multiple_ads, - bastion_username = var.bastion_username, + controller_username = var.controller_username, compute_username = var.compute_username, pam = var.pam, sacct_limits = var.sacct_limits, - use_compute_agent = var.use_compute_agent + use_compute_agent = var.use_compute_agent, + BIOS = var.BIOS, + IOMMU = var.IOMMU, + SMT = var.SMT, + virt_instr = var.virt_instr, + access_ctrl = var.access_ctrl, + numa_nodes_per_socket = var.numa_nodes_per_socket, + percentage_of_cores_enabled = var.percentage_of_cores_enabled }) destination = "/opt/oci-hpc/conf/variables.tf" connection { host = local.host type = "ssh" - user = var.bastion_username + user = var.controller_username private_key = tls_private_key.ssh.private_key_pem } } @@ -460,7 +472,7 @@ provisioner "file" { connection { host = local.host type = "ssh" - user = var.bastion_username + user = var.controller_username private_key = tls_private_key.ssh.private_key_pem } } @@ -470,7 +482,7 @@ provisioner "file" { connection { host = local.host type = "ssh" - user = var.bastion_username + user = var.controller_username private_key = tls_private_key.ssh.private_key_pem } } @@ -478,10 +490,10 @@ provisioner "file" { provisioner "remote-exec" { inline = [ "#!/bin/bash", - "chmod 600 /home/${var.bastion_username}/.ssh/cluster.key", - "cp /home/${var.bastion_username}/.ssh/cluster.key /home/${var.bastion_username}/.ssh/id_rsa", + "chmod 600 /home/${var.controller_username}/.ssh/cluster.key", + "cp /home/${var.controller_username}/.ssh/cluster.key /home/${var.controller_username}/.ssh/id_rsa", "chmod a+x /opt/oci-hpc/bin/*.sh", - "timeout --foreground 60m /opt/oci-hpc/bin/bastion.sh", + "timeout --foreground 60m /opt/oci-hpc/bin/controller.sh", "chmod 755 /opt/oci-hpc/autoscaling/crontab/*.sh", "chmod 755 /opt/oci-hpc/samples/*.sh", "chmod 600 /opt/oci-hpc/autoscaling/credentials/key.pem", @@ -493,7 +505,7 @@ provisioner "file" { connection { host = local.host type = "ssh" - user = var.bastion_username + user = var.controller_username private_key = tls_private_key.ssh.private_key_pem } } @@ -516,7 +528,7 @@ this PAR is used by the scripts to upload NIC metrics to object storage (i.e. sc resource "oci_objectstorage_bucket" "RDMA_NIC_metrics_bucket" { - count = (var.bastion_object_storage_par) ? 1 : 0 + count = (var.controller_object_storage_par) ? 1 : 0 compartment_id = var.targetCompartment name = local.rdma_nic_metric_bucket_name namespace = data.oci_objectstorage_namespace.compartment_namespace.namespace @@ -524,7 +536,7 @@ resource "oci_objectstorage_bucket" "RDMA_NIC_metrics_bucket" { } resource "oci_objectstorage_preauthrequest" "RDMA_NIC_metrics_par" { - count = (var.bastion_object_storage_par) ? 1 : 0 + count = (var.controller_object_storage_par) ? 1 : 0 depends_on = [oci_objectstorage_bucket.RDMA_NIC_metrics_bucket] access_type = "AnyObjectWrite" bucket = local.rdma_nic_metric_bucket_name @@ -536,14 +548,31 @@ resource "oci_objectstorage_preauthrequest" "RDMA_NIC_metrics_par" { output "RDMA_NIC_metrics_url" { depends_on = [oci_objectstorage_preauthrequest.RDMA_NIC_metrics_par] - value = (var.bastion_object_storage_par) ? "https://objectstorage.${var.region}.oraclecloud.com${oci_objectstorage_preauthrequest.RDMA_NIC_metrics_par[0].access_uri}" : "" + value = (var.controller_object_storage_par) ? "https://objectstorage.${var.region}.oraclecloud.com${oci_objectstorage_preauthrequest.RDMA_NIC_metrics_par[0].access_uri}" : "" } resource "local_file" "PAR" { - count = (var.bastion_object_storage_par) ? 1 : 0 + count = (var.controller_object_storage_par) ? 1 : 0 depends_on = [oci_objectstorage_preauthrequest.RDMA_NIC_metrics_par] content = "https://objectstorage.${var.region}.oraclecloud.com${oci_objectstorage_preauthrequest.RDMA_NIC_metrics_par[0].access_uri}" filename = "${local.par_path}/PAR_file_for_metrics" } + +resource "oci_dns_rrset" "rrset-controller" { + count = var.dns_entries ? 1 : 0 + zone_name_or_id = data.oci_dns_zones.dns_zones.zones[0].id + domain = "${oci_core_instance.controller.display_name}.${local.zone_name}" + rtype = "A" + items { + domain = "${oci_core_instance.controller.display_name}.${local.zone_name}" + rtype = "A" + rdata = oci_core_instance.controller.private_ip + ttl = 3600 + } + scope = "PRIVATE" + view_id = data.oci_dns_views.dns_views.views[0].id +} + +#oci dns record rrset update --zone-name-or-id ocid1.dns-zone.oc1.ca-toronto-1.aaaaaaaadwpfuij3w7jpg3sj6gzc5ete2yeknrmjgwzvs6qytgkqad2vhbmq --domain mint-ocelot-controller.mint-ocelot.local --rtype A --auth instance_principal --scope PRIVATE --view-id ocid1.dnsview.oc1.ca-toronto-1.aaaaaaaamhhzrbwe4f3rx5i2hx2xlnubfjc37uvy3e7bjrbyaln5o7zjfvpa --items '[{ "rdata":"1.1.1.1","ttl":300,"domain":"mint-ocelot-controller.mint-ocelot.local","rtype":"A"}]' --force \ No newline at end of file diff --git a/data.tf b/data.tf index 3325be77..e5dd4277 100755 --- a/data.tf +++ b/data.tf @@ -43,7 +43,7 @@ data "oci_core_subnet" "private_subnet" { } data "oci_core_subnet" "public_subnet" { - subnet_id = local.bastion_subnet_id + subnet_id = local.controller_subnet_id } data "oci_core_images" "linux" { @@ -61,7 +61,7 @@ data "oci_resourcemanager_private_endpoint_reachable_ip" "private_endpoint_reach #Required count = var.private_deployment ? 1 : 0 private_endpoint_id = oci_resourcemanager_private_endpoint.rms_private_endpoint[0].id - private_ip = tostring(oci_core_instance.bastion.private_ip) + private_ip = tostring(oci_core_instance.controller.private_ip) } data "oci_resourcemanager_private_endpoint_reachable_ip" "private_endpoint_reachable_ip_backup" { @@ -76,4 +76,19 @@ data "oci_resourcemanager_private_endpoint_reachable_ip" "private_endpoint_reach count = (var.private_deployment && var.login_node) ? 1 : 0 private_endpoint_id = oci_resourcemanager_private_endpoint.rms_private_endpoint[0].id private_ip = tostring(oci_core_instance.login[0].private_ip) +} + +data "oci_dns_views" "dns_views" { + depends_on = [local.controller_subnet, oci_core_vcn.vcn] + compartment_id = var.targetCompartment + scope = "PRIVATE" + display_name = data.oci_core_vcn.vcn.display_name +} + +data "oci_dns_zones" "dns_zones" { + depends_on = [local.controller_subnet, oci_core_vcn.vcn, oci_dns_zone.dns_zone ] + compartment_id = var.targetCompartment + name = local.zone_name + zone_type = "PRIMARY" + scope = "PRIVATE" } \ No newline at end of file diff --git a/instance-pool-configuration.tf b/instance-pool-configuration.tf index 04b2a23d..b28dbe5c 100755 --- a/instance-pool-configuration.tf +++ b/instance-pool-configuration.tf @@ -33,7 +33,19 @@ resource "oci_core_instance_configuration" "instance_pool_configuration" { memory_in_gbs = var.instance_pool_custom_memory ? var.instance_pool_memory : 16 * shape_config.value } } - + + dynamic "platform_config" { + for_each = var.BIOS ? range(1) : [] + content { + type = local.platform_type + are_virtual_instructions_enabled = var.virt_instr + is_access_control_service_enabled = var.access_ctrl + is_input_output_memory_management_unit_enabled = var.IOMMU + is_symmetric_multi_threading_enabled = var.SMT + numa_nodes_per_socket = var.numa_nodes_per_socket == "Default" ? (local.platform_type == "GENERIC_BM" ? "NPS1": "NPS4" ): var.numa_nodes_per_socket + percentage_of_cores_enabled = var.percentage_of_cores_enabled == "Default" ? 100 : tonumber(var.percentage_of_cores_enabled) + } + } source_details { source_type = "image" boot_volume_size_in_gbs = var.boot_volume_size diff --git a/inventory.tpl b/inventory.tpl index 1d1586c2..f39e534e 100755 --- a/inventory.tpl +++ b/inventory.tpl @@ -1,7 +1,7 @@ -[bastion] -${bastion_name} ansible_host=${bastion_ip} ansible_user=${bastion_username} role=bastion ansible_python_interpreter=/usr/bin/python +[controller] +${controller_name} ansible_host=${controller_ip} ansible_user=${controller_username} role=controller ansible_python_interpreter=/usr/bin/python [slurm_backup] -%{ if backup_name != "" }${backup_name} ansible_host=${backup_ip} ansible_user=${compute_username} role=bastion%{ endif } +%{ if backup_name != "" }${backup_name} ansible_host=${backup_ip} ansible_user=${compute_username} role=controller%{ endif } [login] %{ if login_name != "" }${login_name} ansible_host=${login_ip} ansible_user=${compute_username} role=login%{ endif } [compute_to_add] @@ -34,10 +34,10 @@ cluster_network = ${cluster_network} slurm = ${slurm} rack_aware = ${rack_aware} spack = ${spack} -bastion_block = ${bastion_block} +controller_block = ${controller_block} login_block = ${login_block} scratch_nfs_type = ${scratch_nfs_type} -bastion_mount_ip = ${bastion_mount_ip} +controller_mount_ip = ${controller_mount_ip} login_mount_ip = ${login_mount_ip} cluster_mount_ip = ${cluster_mount_ip} autoscaling = ${autoscaling} @@ -69,11 +69,13 @@ privilege_sudo=${privilege_sudo} privilege_group_name=${privilege_group_name} latency_check=${latency_check} compute_username=${compute_username} -bastion_username=${bastion_username} +controller_username=${controller_username} region= ${region} tenancy_ocid = ${tenancy_ocid} inst_prin = ${inst_prin} api_fingerprint = ${api_fingerprint} api_user_ocid = ${api_user_ocid} sacct_limits=${sacct_limits} -use_compute_agent=${use_compute_agent} \ No newline at end of file +use_compute_agent=${use_compute_agent} +zone_name=${zone_name} +dns_entries=${dns_entries} \ No newline at end of file diff --git a/locals.tf b/locals.tf index 9c791ed3..f87a3b68 100755 --- a/locals.tf +++ b/locals.tf @@ -4,15 +4,17 @@ locals { cluster_instances_names = var.compute_cluster ? oci_core_instance.compute_cluster_instances.*.display_name : var.cluster_network ? data.oci_core_instance.cluster_network_instances.*.display_name : data.oci_core_instance.instance_pool_instances.*.display_name image_ocid = var.unsupported ? var.image_ocid : var.image - custom_bastion_image_ocid = var.unsupported_bastion ? var.unsupported_bastion_image : var.custom_bastion_image + custom_controller_image_ocid = var.unsupported_controller ? var.unsupported_controller_image : var.custom_controller_image custom_login_image_ocid = var.unsupported_login ? var.unsupported_login_image : var.custom_login_image shape = var.cluster_network ? var.cluster_network_shape : var.instance_pool_shape - instance_pool_ocpus = local.shape == "VM.DenseIO.E4.Flex" ? var.instance_pool_ocpus_denseIO_flex : var.instance_pool_ocpus - bastion_ocpus = var.bastion_shape == "VM.DenseIO.E4.Flex" ? var.bastion_ocpus_denseIO_flex : var.bastion_ocpus - login_ocpus = var.login_shape == "VM.DenseIO.E4.Flex" ? var.login_ocpus_denseIO_flex : var.login_ocpus + instance_pool_ocpus = ( local.shape == "VM.DenseIO.E4.Flex" || local.shape == "VM.DenseIO.E5.Flex" ) ? var.instance_pool_ocpus_denseIO_flex : var.instance_pool_ocpus + controller_ocpus = ( var.controller_shape == "VM.DenseIO.E4.Flex" || var.controller_shape == "VM.DenseIO.E5.Flex" ) ? var.controller_ocpus_denseIO_flex : var.controller_ocpus + login_ocpus = ( var.login_shape == "VM.DenseIO.E4.Flex" || var.login_shape == "VM.DenseIO.E5.Flex" ) ? var.login_ocpus_denseIO_flex : var.login_ocpus // ips of the instances cluster_instances_ips = var.compute_cluster ? oci_core_instance.compute_cluster_instances.*.private_ip : var.cluster_network ? data.oci_core_instance.cluster_network_instances.*.private_ip : data.oci_core_instance.instance_pool_instances.*.private_ip + first_vcn_ip = cidrhost(data.oci_core_subnet.private_subnet.cidr_block,0) + cluster_instances_ips_index = [for ip in local.cluster_instances_ips : tostring((tonumber(split(".",ip)[3])-tonumber(split(".",local.first_vcn_ip)[3]))+256*(tonumber(split(".",ip)[2])-tonumber(split(".",local.first_vcn_ip)[2]))+1)] // vcn id derived either from created vcn or existing if specified vcn_id = var.use_existing_vcn ? var.vcn_id : element(concat(oci_core_vcn.vcn.*.id, [""]), 0) @@ -23,12 +25,12 @@ locals { nfs_source_IP = var.create_fss ? element(concat(oci_file_storage_mount_target.FSSMountTarget.*.ip_address, [""]), 0) : var.nfs_source_IP // subnet id derived either from created subnet or existing if specified -// bastion_subnet_id = var.use_existing_vcn ? var.public_subnet_id : element(concat(oci_core_subnet.public-subnet.*.id, [""]), 0) - bastion_subnet_id = var.private_deployment ? var.use_existing_vcn ? var.public_subnet_id : element(concat(oci_core_subnet.private-subnet.*.id, [""]), 0) : var.use_existing_vcn ? var.public_subnet_id : element(concat(oci_core_subnet.public-subnet.*.id, [""]), 0) +// controller_subnet_id = var.use_existing_vcn ? var.public_subnet_id : element(concat(oci_core_subnet.public-subnet.*.id, [""]), 0) + controller_subnet_id = var.private_deployment ? var.use_existing_vcn ? var.public_subnet_id : element(concat(oci_core_subnet.private-subnet.*.id, [""]), 0) : var.use_existing_vcn ? var.public_subnet_id : element(concat(oci_core_subnet.public-subnet.*.id, [""]), 0) cluster_name = var.use_custom_name ? var.cluster_name : random_pet.name.id - bastion_image = var.use_marketplace_image_bastion ? oci_core_app_catalog_subscription.bastion_mp_image_subscription[0].listing_resource_id : local.custom_bastion_image_ocid + controller_image = var.use_marketplace_image_controller ? oci_core_app_catalog_subscription.controller_mp_image_subscription[0].listing_resource_id : local.custom_controller_image_ocid login_image = var.login_node && var.use_marketplace_image_login ? oci_core_app_catalog_subscription.login_mp_image_subscription[0].listing_resource_id : local.custom_login_image_ocid @@ -38,12 +40,12 @@ locals { // image = (var.cluster_network && var.use_marketplace_image == true) || (var.cluster_network == false && var.use_marketplace_image == false) ? var.image : data.oci_core_images.linux.images.0.id - is_bastion_flex_shape = length(regexall(".*VM.*.*Flex$", var.bastion_shape)) > 0 ? [local.bastion_ocpus]:[] + is_controller_flex_shape = length(regexall(".*VM.*.*Flex$", var.controller_shape)) > 0 ? [local.controller_ocpus]:[] is_login_flex_shape = length(regexall(".*VM.*.*Flex$", var.login_shape)) > 0 ? [local.login_ocpus]:[] is_instance_pool_flex_shape = length(regexall(".*VM.*.*Flex$", var.instance_pool_shape)) > 0 ? [local.instance_pool_ocpus]:[] - bastion_mount_ip = var.bastion_block ? element(concat(oci_core_volume_attachment.bastion_volume_attachment.*.ipv4, [""]), 0) : "none" + controller_mount_ip = var.controller_block ? element(concat(oci_core_volume_attachment.controller_volume_attachment.*.ipv4, [""]), 0) : "none" login_mount_ip = var.login_block ? element(concat(oci_core_volume_attachment.login_volume_attachment.*.ipv4, [""]), 0) : "none" scratch_nfs_type = var.cluster_network ? var.scratch_nfs_type_cluster : var.scratch_nfs_type_pool @@ -56,10 +58,10 @@ locals { cluster_ocid = var.node_count > 0 ? var.compute_cluster ? oci_core_compute_cluster.compute_cluster[0].id : var.cluster_network ? oci_core_cluster_network.cluster_network[0].id : oci_core_instance_pool.instance_pool[0].id : "" - host = var.private_deployment ? data.oci_resourcemanager_private_endpoint_reachable_ip.private_endpoint_reachable_ip[0].ip_address : oci_core_instance.bastion.public_ip - bastion_bool_ip = var.private_deployment ? false : true + host = var.private_deployment ? data.oci_resourcemanager_private_endpoint_reachable_ip.private_endpoint_reachable_ip[0].ip_address : oci_core_instance.controller.public_ip + controller_bool_ip = var.private_deployment ? false : true login_bool_ip = var.private_deployment ? false : true - bastion_subnet = var.private_deployment ? oci_core_subnet.private-subnet : oci_core_subnet.public-subnet + controller_subnet = var.private_deployment ? oci_core_subnet.private-subnet : oci_core_subnet.public-subnet private_subnet_cidr = var.private_deployment ? [var.public_subnet, var.private_subnet] : [var.private_subnet] host_backup = var.slurm_ha ? var.private_deployment ? data.oci_resourcemanager_private_endpoint_reachable_ip.private_endpoint_reachable_ip_backup[0].ip_address : oci_core_instance.backup[0].public_ip : "none" host_login = var.login_node ? var.private_deployment ? data.oci_resourcemanager_private_endpoint_reachable_ip.private_endpoint_reachable_ip_login[0].ip_address : oci_core_instance.login[0].public_ip : "none" @@ -67,4 +69,6 @@ locals { timeout_per_batch= var.cluster_network ? 30 : 15 timeout_ip = join("",[ (( var.node_count - ( var.node_count % 20 ) )/20 + 1 ) * local.timeout_per_batch,"m"]) + zone_name = var.use_existing_vcn ? var.zone_name : "${local.cluster_name}.local" + platform_type = local.shape == "BM.GPU4.8" ? "AMD_ROME_BM_GPU" : local.shape == "BM.GPU.B4.8" || local.shape == "BM.GPU.H100.8" || local.shape == "BM.GPU.A100-v2.8" ? "AMD_MILAN_BM_GPU" : local.shape == "BM.Standard.E3.128" ? "AMD_ROME_BM" : local.shape == "BM.Standard.E4.128" || local.shape == "BM.DenseIO.E4.128" ? "AMD_MILAN_BM" : "GENERIC_BM" } diff --git a/login.tf b/login.tf index 22200fc5..1aba036d 100644 --- a/login.tf +++ b/login.tf @@ -43,17 +43,32 @@ resource "oci_core_instance" "login" { metadata = { ssh_authorized_keys = "${var.ssh_key}\n${tls_private_key.ssh.public_key_openssh}" - user_data = base64encode(data.template_file.bastion_config.rendered) + user_data = base64encode(data.template_file.controller_config.rendered) } source_details { -// source_id = var.use_standard_image ? data.oci_core_images.linux.images.0.id : local.custom_bastion_image_ocid +// source_id = var.use_standard_image ? data.oci_core_images.linux.images.0.id : local.custom_controller_image_ocid source_id = local.login_image boot_volume_size_in_gbs = var.login_boot_volume_size source_type = "image" } create_vnic_details { - subnet_id = local.bastion_subnet_id + subnet_id = local.controller_subnet_id assign_public_ip = local.login_bool_ip } } + +resource "oci_dns_rrset" "rrset-login" { + count = var.login_node && var.dns_entries ? 1 : 0 + zone_name_or_id = data.oci_dns_zones.dns_zones.zones[0].id + domain = "${var.login_node ? oci_core_instance.login[0].display_name : ""}.${local.zone_name}" + rtype = "A" + items { + domain = "${var.login_node ? oci_core_instance.login[0].display_name : ""}.${local.zone_name}" + rtype = "A" + rdata = var.login_node ? oci_core_instance.login[0].private_ip: "" + ttl = 3600 + } + scope = "PRIVATE" + view_id = data.oci_dns_views.dns_views.views[0].id +} \ No newline at end of file diff --git a/marketplace.tf b/marketplace.tf index c434af50..a735598d 100755 --- a/marketplace.tf +++ b/marketplace.tf @@ -1,10 +1,10 @@ locals { // listing_number = split(".", var.marketplace_listing)[0] mp_listing_id = var.use_marketplace_image ? substr(var.marketplace_listing,0,3) == "HPC" ? var.marketplace_listing_id_HPC : var.marketplace_listing_id_GPU : "" - mp_bastion_listing_id = var.use_marketplace_image_bastion ? substr(var.marketplace_listing_bastion,0,3) == "HPC" ? var.marketplace_listing_id_HPC : var.marketplace_listing_id_GPU : "" + mp_controller_listing_id = var.use_marketplace_image_controller ? substr(var.marketplace_listing_controller,0,3) == "HPC" ? var.marketplace_listing_id_HPC : var.marketplace_listing_id_GPU : "" mp_login_listing_id = var.use_marketplace_image_login ? substr(var.marketplace_listing_login,0,3) == "HPC" ? var.marketplace_listing_id_HPC : var.marketplace_listing_id_GPU : "" mp_version_id = var.marketplace_version_id[var.marketplace_listing] - mp_bastion_version_id = var.marketplace_version_id[var.marketplace_listing_bastion] + mp_controller_version_id = var.marketplace_version_id[var.marketplace_listing_controller] mp_login_version_id = var.marketplace_version_id[var.marketplace_listing_login] } @@ -48,28 +48,28 @@ resource "oci_core_app_catalog_subscription" "mp_image_subscription" { } } -data "oci_core_app_catalog_listing_resource_versions" "bastion_app_catalog_listing_resource_versions" { - count = var.use_marketplace_image_bastion ? 1 : 0 - listing_id = local.mp_bastion_listing_id +data "oci_core_app_catalog_listing_resource_versions" "controller_app_catalog_listing_resource_versions" { + count = var.use_marketplace_image_controller ? 1 : 0 + listing_id = local.mp_controller_listing_id } -resource "oci_core_app_catalog_listing_resource_version_agreement" "bastion_mp_image_agreement" { - count = ( var.use_marketplace_image_bastion ) ? 1 : 0 +resource "oci_core_app_catalog_listing_resource_version_agreement" "controller_mp_image_agreement" { + count = ( var.use_marketplace_image_controller ) ? 1 : 0 - listing_id = local.mp_bastion_listing_id - listing_resource_version = local.mp_bastion_version_id + listing_id = local.mp_controller_listing_id + listing_resource_version = local.mp_controller_version_id } -resource "oci_core_app_catalog_subscription" "bastion_mp_image_subscription" { - count = ( var.use_marketplace_image_bastion ) ? 1 : 0 +resource "oci_core_app_catalog_subscription" "controller_mp_image_subscription" { + count = ( var.use_marketplace_image_controller ) ? 1 : 0 compartment_id = var.targetCompartment - eula_link = oci_core_app_catalog_listing_resource_version_agreement.bastion_mp_image_agreement[0].eula_link - listing_id = oci_core_app_catalog_listing_resource_version_agreement.bastion_mp_image_agreement[0].listing_id - listing_resource_version = oci_core_app_catalog_listing_resource_version_agreement.bastion_mp_image_agreement[0].listing_resource_version - oracle_terms_of_use_link = oci_core_app_catalog_listing_resource_version_agreement.bastion_mp_image_agreement[0].oracle_terms_of_use_link - signature = oci_core_app_catalog_listing_resource_version_agreement.bastion_mp_image_agreement[0].signature - time_retrieved = oci_core_app_catalog_listing_resource_version_agreement.bastion_mp_image_agreement[0].time_retrieved + eula_link = oci_core_app_catalog_listing_resource_version_agreement.controller_mp_image_agreement[0].eula_link + listing_id = oci_core_app_catalog_listing_resource_version_agreement.controller_mp_image_agreement[0].listing_id + listing_resource_version = oci_core_app_catalog_listing_resource_version_agreement.controller_mp_image_agreement[0].listing_resource_version + oracle_terms_of_use_link = oci_core_app_catalog_listing_resource_version_agreement.controller_mp_image_agreement[0].oracle_terms_of_use_link + signature = oci_core_app_catalog_listing_resource_version_agreement.controller_mp_image_agreement[0].signature + time_retrieved = oci_core_app_catalog_listing_resource_version_agreement.controller_mp_image_agreement[0].time_retrieved timeouts { create = "20m" diff --git a/mysql.tf b/mysql.tf index 78c33ca2..fd21313c 100644 --- a/mysql.tf +++ b/mysql.tf @@ -3,7 +3,7 @@ resource "oci_mysql_mysql_db_system" "monitoring_mysql_db_system" { count = var.autoscaling_monitoring && var.autoscaling_mysql_service ? 1 : 0 admin_password = var.admin_password admin_username = var.admin_username - availability_domain = var.bastion_ad + availability_domain = var.controller_ad compartment_id = var.targetCompartment shape_name = var.monitoring_shape_name subnet_id = local.subnet_id diff --git a/network.tf b/network.tf index 57617907..6952fd64 100755 --- a/network.tf +++ b/network.tf @@ -145,7 +145,20 @@ resource "oci_core_route_table" "private_route_table" { network_entity_id = oci_core_service_gateway.sg1[0].id } } - +resource "oci_core_dhcp_options" "cluster_dhcp_options" { + count = var.use_existing_vcn ? 0 : 1 + compartment_id = var.targetCompartment + options { + type = "DomainNameServer" + server_type = "VcnLocalPlusInternet" + } + options { + type = "SearchDomain" + search_domain_names = [ "${var.dns_entries? local.zone_name : "cluster.oraclevcn.com"}" ] + } + vcn_id = oci_core_vcn.vcn[0].id + display_name = "${local.cluster_name}_DHCP" +} resource "oci_core_subnet" "public-subnet" { count = (var.use_existing_vcn || var.private_deployment) ? 0 : 1 # availability_domain = var.ad @@ -156,6 +169,7 @@ resource "oci_core_subnet" "public-subnet" { dns_label = "public" display_name = "${local.cluster_name}_public_subnet" route_table_id = oci_core_route_table.public_route_table[0].id + dhcp_options_id = oci_core_dhcp_options.cluster_dhcp_options[0].id } resource "oci_core_subnet" "private-subnet" { @@ -169,4 +183,45 @@ resource "oci_core_subnet" "private-subnet" { display_name = "${local.cluster_name}_private_subnet${count.index+1}" prohibit_public_ip_on_vnic = true route_table_id = oci_core_route_table.private_route_table[0].id + dhcp_options_id = oci_core_dhcp_options.cluster_dhcp_options[0].id } + +resource "oci_dns_zone" "dns_zone" { + count = var.use_existing_vcn ? 0 : 1 + compartment_id = var.targetCompartment + name = "${local.cluster_name}.local" #oci_core_dhcp_options.cluster_dhcp_options[0].options.search_domain_names[0] + zone_type = "PRIMARY" + scope = "PRIVATE" + view_id = data.oci_dns_views.dns_views.views[0].id +} + +resource "oci_dns_rrset" "rrset-cluster-network-OCI" { + for_each = var.dns_entries ? toset([for v in range(var.node_count) : tostring(v)]) : [] + zone_name_or_id = data.oci_dns_zones.dns_zones.zones[0].id + domain = "${local.cluster_instances_names[tonumber(each.key)]}.${local.zone_name}" + rtype = "A" + items { + domain = "${local.cluster_instances_names[tonumber(each.key)]}.${local.zone_name}" + rtype = "A" + rdata = "${local.cluster_instances_ips[tonumber(each.key)]}" + ttl = 3600 + } + scope = "PRIVATE" + view_id = data.oci_dns_views.dns_views.views[0].id +} + +resource "oci_dns_rrset" "rrset-cluster-network-SLURM" { + + for_each = var.slurm && var.dns_entries ? toset([for v in range(var.node_count) : tostring(v)]) : [] + zone_name_or_id = data.oci_dns_zones.dns_zones.zones[0].id + domain = "${var.queue}-permanent-${local.cluster_instances_ips_index[tonumber(each.key)]}.${local.zone_name}" + rtype = "A" + items { + domain = "${var.queue}-permanent-${local.cluster_instances_ips_index[tonumber(each.key)]}.${local.zone_name}" + rtype = "A" + rdata = "${local.cluster_instances_ips[tonumber(each.key)]}" + ttl = 3600 + } + scope = "PRIVATE" + view_id = data.oci_dns_views.dns_views.views[0].id +} \ No newline at end of file diff --git a/outputs.tf b/outputs.tf index b11d640f..af5b5cba 100755 --- a/outputs.tf +++ b/outputs.tf @@ -1,4 +1,4 @@ -output "bastion" { +output "controller" { value = local.host } diff --git a/playbooks/destroy.yml b/playbooks/destroy.yml index 46efb661..9f413982 100755 --- a/playbooks/destroy.yml +++ b/playbooks/destroy.yml @@ -9,7 +9,7 @@ - include_role: name: slurm when: slurm|default(false)|bool -- hosts: bastion, slurm_backup, login +- hosts: controller, slurm_backup, login become: true vars: destroy: true diff --git a/playbooks/new_nodes.yml b/playbooks/new_nodes.yml index 3be0cb57..b160873f 100755 --- a/playbooks/new_nodes.yml +++ b/playbooks/new_nodes.yml @@ -46,9 +46,6 @@ become: true gather_facts: true tasks: - - include_role: - name: cloud-agent_update - when: cluster_network|bool and use_compute_agent|default(false)|bool - include_role: name: oci-cn-auth when: cluster_network|bool and not use_compute_agent|default(false)|bool @@ -58,7 +55,7 @@ - include_role: name: nvidia_peermem -- hosts: bastion,slurm_backup,login,compute +- hosts: controller,slurm_backup,login,compute become: true vars: destroy: false @@ -90,7 +87,7 @@ name: nfs-client vars: local_path: "{{ cluster_nfs_path }}" - export_host: "{{ hostvars[groups['bastion'][0]]['ansible_default_ipv4']['address'] }}" + export_host: "{{ hostvars[groups['controller'][0]]['ansible_default_ipv4']['address'] }}" export_path: "/export/cluster" options: "defaults,noatime,bg,timeo=100,ac,actimeo=120,nocto,rsize=1048576,wsize=1048576,nolock,local_lock={{ lock }},mountproto=tcp,sec=sys,_netdev" lock: "all" @@ -134,7 +131,7 @@ name: nfs-client vars: local_path: "/home" - export_host: "{{ hostvars[groups['bastion'][0]]['ansible_default_ipv4']['address'] }}" + export_host: "{{ hostvars[groups['controller'][0]]['ansible_default_ipv4']['address'] }}" export_path: "/home" options: "defaults,noatime,bg,timeo=100,ac,actimeo=120,nocto,rsize=1048576,wsize=1048576,nolock,local_lock={{ lock }},mountproto=tcp,sec=sys,_netdev" lock: "all" @@ -174,6 +171,9 @@ when: enroot|default(true)|bool - include_role: name: tuned + - include_role: + name: nccl-conf + when: cluster_network|bool - hosts: compute tasks: diff --git a/playbooks/resize_add.yml b/playbooks/resize_add.yml index e3070b40..09be3ecd 100755 --- a/playbooks/resize_add.yml +++ b/playbooks/resize_add.yml @@ -44,9 +44,6 @@ become: true gather_facts: true tasks: - - include_role: - name: cloud-agent_update - when: cluster_network|bool and use_compute_agent|default(false)|bool - include_role: name: oci-cn-auth when: cluster_network|bool and not use_compute_agent|default(false)|bool @@ -56,7 +53,7 @@ - include_role: name: nvidia_peermem -- hosts: bastion,slurm_backup,login,compute +- hosts: controller,slurm_backup,login,compute become: true vars: destroy: false @@ -89,7 +86,7 @@ name: nfs-client vars: local_path: "{{ cluster_nfs_path }}" - export_host: "{{ hostvars[groups['bastion'][0]]['ansible_default_ipv4']['address'] }}" + export_host: "{{ hostvars[groups['controller'][0]]['ansible_default_ipv4']['address'] }}" export_path: "/export/cluster" options: "defaults,noatime,bg,timeo=100,ac,actimeo=120,nocto,rsize=1048576,wsize=1048576,nolock,local_lock={{ lock }},mountproto=tcp,sec=sys,_netdev" lock: "all" @@ -136,7 +133,7 @@ name: nfs-client vars: local_path: "/home" - export_host: "{{ hostvars[groups['bastion'][0]]['ansible_default_ipv4']['address'] }}" + export_host: "{{ hostvars[groups['controller'][0]]['ansible_default_ipv4']['address'] }}" export_path: "/home" options: "defaults,noatime,bg,timeo=100,ac,actimeo=120,nocto,rsize=1048576,wsize=1048576,nolock,local_lock={{ lock }},mountproto=tcp,sec=sys,_netdev" lock: "all" @@ -176,6 +173,9 @@ when: enroot|default(true)|bool - include_role: name: tuned + - include_role: + name: nccl-conf + when: cluster_network|bool - hosts: all become: true diff --git a/playbooks/resize_remove.yml b/playbooks/resize_remove.yml index c75ea9fc..99029c50 100755 --- a/playbooks/resize_remove.yml +++ b/playbooks/resize_remove.yml @@ -1,4 +1,4 @@ -- hosts: bastion, slurm_backup, compute, login +- hosts: controller, slurm_backup, compute, login become: true gather_facts: true vars: diff --git a/playbooks/resize_remove_unreachable.yml b/playbooks/resize_remove_unreachable.yml index 4a5b95e7..5d8f274f 100644 --- a/playbooks/resize_remove_unreachable.yml +++ b/playbooks/resize_remove_unreachable.yml @@ -1,4 +1,4 @@ -- hosts: bastion, compute, slurm_backup, login +- hosts: controller, compute, slurm_backup, login become: true gather_facts: true vars: diff --git a/playbooks/roles/autoscaling_mon/tasks/main.yml b/playbooks/roles/autoscaling_mon/tasks/main.yml index e3450c91..6b947a1b 100755 --- a/playbooks/roles/autoscaling_mon/tasks/main.yml +++ b/playbooks/roles/autoscaling_mon/tasks/main.yml @@ -1,4 +1,4 @@ -- include: el.yml +- include_tasks: el.yml when: ansible_os_family == 'RedHat' -- include: ubuntu.yml +- include_tasks: ubuntu.yml when: ansible_os_family == 'Debian' diff --git a/playbooks/roles/boot-volume/tasks/main.yml b/playbooks/roles/boot-volume/tasks/main.yml index 8d556a44..a5275781 100644 --- a/playbooks/roles/boot-volume/tasks/main.yml +++ b/playbooks/roles/boot-volume/tasks/main.yml @@ -1,2 +1,2 @@ -- include: el.yml +- include_tasks: el.yml when: ansible_os_family == 'RedHat' and ansible_distribution == 'OracleLinux' diff --git a/playbooks/roles/cloud-agent_update/tasks/el.yml b/playbooks/roles/cloud-agent_update/tasks/el.yml index 60211cd3..4102c6f7 100644 --- a/playbooks/roles/cloud-agent_update/tasks/el.yml +++ b/playbooks/roles/cloud-agent_update/tasks/el.yml @@ -4,36 +4,36 @@ register: version when: use_compute_agent | bool -- name: Install OCA v1.37 for OL8 +- name: Install OCA v1.38 for OL8 vars: - major_version: "{{version.stdout.split('.')[1] }}" - minor_version: "{{version.stdout.split('.')[0] }}" - sub_version: "{{version.stdout.split('.')[2].split('-')[0] }}" yum: - name: "https://objectstorage.us-phoenix-1.oraclecloud.com/p/aV_mSl96KIiapAeZtsyo-SUcPCSurDfWaj06f4XVVoNKIsxvqlZ65guPTnMuNawR/n/imagegen/b/agent_test/o/1.37.0/3/oracle-cloud-agent-1.37.2-10459.el8.x86_64.rpm" + name: "https://objectstorage.us-phoenix-1.oraclecloud.com/p/H1npAGRle5v4izHQkTysF_tfdsgO43iawRc4IC2xL5LwO6T36m8o34T8_kc_KaBS/n/imagegen/b/agent_test/o/1.38.0/3/oracle-cloud-agent-1.38.0-10815.el8.x86_64.rpm" state: present disable_gpg_check: yes when: - ansible_os_family == 'RedHat' - ansible_distribution_major_version == '8' - (minor_version | int <= 1) | bool - - (major_version | int <= 37) | bool - - (sub_version | int < 2) | bool + - (major_version | int <= 38) | bool + - (sub_version | int < 0) | bool - use_compute_agent | bool -- name: Install OCA v1.37 for OL7 +- name: Install OCA v1.38 for OL7 vars: - major_version: "{{version.stdout.split('.')[1] }}" - minor_version: "{{version.stdout.split('.')[0] }}" - sub_version: "{{version.stdout.split('.')[2].split('-')[0] }}" yum: - name: "https://objectstorage.us-phoenix-1.oraclecloud.com/p/YmPlysZFl4CKrLTKN9Rj0CMPt8qiJgflvF4vXsOaaqOfcm5NMnyBJl_dlC0V0lTo/n/imagegen/b/agent_test/o/1.37.0/3/oracle-cloud-agent-1.37.2-10459.el7.x86_64.rpm" + name: "https://objectstorage.us-phoenix-1.oraclecloud.com/p/v7U4X2bmcA_iY6UoRiGALU-A8xIrcsMZWjnfgk8zi4BDX5pfU1BV0XbHR9Iy6OJk/n/imagegen/b/agent_test/o/1.38.0/3/oracle-cloud-agent-1.38.0-10815.el7.x86_64.rpm" state: present disable_gpg_check: yes when: - ansible_os_family == 'RedHat' - ansible_distribution_major_version == '7' - (minor_version | int <= 1) | bool - - (major_version | int <= 37) | bool - - (sub_version | int < 2) | bool + - (major_version | int <= 38) | bool + - (sub_version | int < 0) | bool - use_compute_agent | bool \ No newline at end of file diff --git a/playbooks/roles/cloud-agent_update/tasks/main.yml b/playbooks/roles/cloud-agent_update/tasks/main.yml index ea4d5d2a..cbceb221 100644 --- a/playbooks/roles/cloud-agent_update/tasks/main.yml +++ b/playbooks/roles/cloud-agent_update/tasks/main.yml @@ -1,4 +1,4 @@ -- include: el.yml +- include_tasks: el.yml when: ansible_os_family == 'RedHat' and ansible_distribution == 'OracleLinux' -- include: ubuntu.yml +- include_tasks: ubuntu.yml when: ansible_os_family == 'Debian' diff --git a/playbooks/roles/cluster-cli/files/cluster b/playbooks/roles/cluster-cli/files/cluster index a91c2ebf..4bb4e623 100755 --- a/playbooks/roles/cluster-cli/files/cluster +++ b/playbooks/roles/cluster-cli/files/cluster @@ -6,7 +6,7 @@ import grp import pwd import os -host = 'bastion' +host = 'controller' bind_dn = 'cn=manager,dc=local' groups_dn = 'ou=Group,dc=local' people_dn = 'ou=People,dc=local' @@ -184,8 +184,10 @@ def add(user, password, uid, gid, name, nossh): if(conn.result['result'] != 0): print(conn.result) + homedir='/home/{}/'.format(user) + os.system("sudo su - "+user+" -c "+" 'ls' 2> /dev/null") + if not nossh: - homedir='/home/{}/'.format(user) os.system("sudo su - "+user+" -c "+"' ssh-keygen -t rsa -b 2048 -q -f "+homedir+".ssh/id_rsa -P \"\"' 2> /dev/null") os.system("sudo su - "+user+" -c "+"'mv "+homedir+".ssh/id_rsa.pub "+homedir+".ssh/authorized_keys' 2> /dev/null") @user.command() diff --git a/playbooks/roles/cluster-cli/tasks/main.yml b/playbooks/roles/cluster-cli/tasks/main.yml index 0ef20964..22c0d8cc 100755 --- a/playbooks/roles/cluster-cli/tasks/main.yml +++ b/playbooks/roles/cluster-cli/tasks/main.yml @@ -1,8 +1,8 @@ -- include: el7.yml +- include_tasks: el7.yml when: ansible_os_family == 'RedHat' and ansible_distribution_major_version == '7' -- include: el8.yml +- include_tasks: el8.yml when: ansible_os_family == 'RedHat' and ansible_distribution_major_version == '8' -- include: debian.yml +- include_tasks: debian.yml when: ansible_distribution == 'Ubuntu' \ No newline at end of file diff --git a/playbooks/roles/cron/tasks/main.yml b/playbooks/roles/cron/tasks/main.yml index e3450c91..6b947a1b 100755 --- a/playbooks/roles/cron/tasks/main.yml +++ b/playbooks/roles/cron/tasks/main.yml @@ -1,4 +1,4 @@ -- include: el.yml +- include_tasks: el.yml when: ansible_os_family == 'RedHat' -- include: ubuntu.yml +- include_tasks: ubuntu.yml when: ansible_os_family == 'Debian' diff --git a/playbooks/roles/destroy_unreachable/tasks/main.yml b/playbooks/roles/destroy_unreachable/tasks/main.yml index 1c7b0d2c..f7df4002 100644 --- a/playbooks/roles/destroy_unreachable/tasks/main.yml +++ b/playbooks/roles/destroy_unreachable/tasks/main.yml @@ -1,7 +1,7 @@ -- include: common.yml +- include_tasks: common.yml -- include: slurm-rack-aware.yml +- include_tasks: slurm-rack-aware.yml when: rack_aware | bool -- include: slurm.yml +- include_tasks: slurm.yml when: not rack_aware | bool \ No newline at end of file diff --git a/playbooks/roles/destroy_unreachable/tasks/slurm-rack-aware.yml b/playbooks/roles/destroy_unreachable/tasks/slurm-rack-aware.yml index afe0cd62..d1bdc2a9 100644 --- a/playbooks/roles/destroy_unreachable/tasks/slurm-rack-aware.yml +++ b/playbooks/roles/destroy_unreachable/tasks/slurm-rack-aware.yml @@ -114,9 +114,9 @@ # - name: debug # debug: -# msg: "Replacing line: SwitchName={{upperswitchnames[item]}}\\sSwitches.* with SwitchName={{upperswitchnames[item]}} Switches={{racks_on_switch_dict[item] | difference(switchnames[item]) | join(',') }}" +# msg: "Replacing line: SwitchName={{upperswitchnames[item]}}\\sSwitches.* with SwitchName={{upperswitchnames[item]}} Switches={{racks_on_switch_dict[item] | difference([switchnames[item]]) | join(',') }}" # with_items: "{{unreachable_slurm_nodes}}" -# when: ( not upperswitchnames[item] is match("inactive-.*") ) and ( ( racks_on_switch_dict[item] | difference(switchnames[item]) | length ) > 0 ) and ( upperswitchnames[item] | length ) > 1 +# when: ( not upperswitchnames[item] is match("inactive-.*") ) and ( ( racks_on_switch_dict[item] | difference([switchnames[item]]) | length ) > 0 ) and ( upperswitchnames[item] | length ) > 1 # run_once: true # delegate_to: 127.0.0.1 @@ -124,11 +124,11 @@ lineinfile: path: "{{ slurm_conf_path }}/topology.conf" regexp: "SwitchName={{upperswitchnames[item]}}\\sSwitches.*" - line: "SwitchName={{upperswitchnames[item]}} Switches={{racks_on_switch_dict[item] | difference(switchnames[item]) | join(',') }}" + line: "SwitchName={{upperswitchnames[item]}} Switches={{racks_on_switch_dict[item] | difference([switchnames[item]]) | join(',') }}" state: present with_items: "{{unreachable_slurm_nodes}}" ignore_errors: yes - when: ( not upperswitchnames[item] is match("inactive-.*") ) and ( ( racks_on_switch_dict[item] | difference(switchnames[item]) | length ) > 0 ) and ( upperswitchnames[item] | length ) > 1 and ( nodes_on_switch[item] | length ) < 2 + when: ( not upperswitchnames[item] is match("inactive-.*") ) and ( ( racks_on_switch_dict[item] | difference([switchnames[item]]) | length ) > 0 ) and ( upperswitchnames[item] | length ) > 1 and ( nodes_on_switch[item] | length ) < 2 run_once: true delegate_to: 127.0.0.1 @@ -137,7 +137,7 @@ # msg: "removing line line: SwitchName={{upperswitchnames[item]}}\\sSwitches.*" # with_items: "{{unreachable_slurm_nodes}}" # ignore_unreachable: yes -# when: ( not upperswitchnames[item] is match("inactive-.*") ) and ( ( racks_on_switch_dict[item] | difference(switchnames[item]) | length ) == 0 ) and ( upperswitchnames[item] | length ) > 1 +# when: ( not upperswitchnames[item] is match("inactive-.*") ) and ( ( racks_on_switch_dict[item] | difference([switchnames[item]]) | length ) == 0 ) and ( upperswitchnames[item] | length ) > 1 # run_once: true # delegate_to: 127.0.0.1 @@ -148,7 +148,7 @@ state: absent with_items: "{{unreachable_slurm_nodes}}" ignore_unreachable: yes - when: ( not upperswitchnames[item] is match("inactive-.*") ) and ( ( racks_on_switch_dict[item] | difference(switchnames[item]) | length ) == 0 ) and ( upperswitchnames[item] | length ) > 1 and ( nodes_on_switch[item] | length ) < 2 + when: ( not upperswitchnames[item] is match("inactive-.*") ) and ( ( racks_on_switch_dict[item] | difference([switchnames[item]]) | length ) == 0 ) and ( upperswitchnames[item] | length ) > 1 and ( nodes_on_switch[item] | length ) < 2 run_once: true delegate_to: 127.0.0.1 @@ -253,7 +253,7 @@ ignore_unreachable: True with_items: "{{unreachable_slurm_nodes}}" delegate_to: 127.0.0.1 - when: ('bastion' in group_names) + when: ('controller' in group_names) - name: move topology.conf on backup servers become: true diff --git a/playbooks/roles/destroy_unreachable/tasks/slurm.yml b/playbooks/roles/destroy_unreachable/tasks/slurm.yml index 6249b5ca..e97520c7 100644 --- a/playbooks/roles/destroy_unreachable/tasks/slurm.yml +++ b/playbooks/roles/destroy_unreachable/tasks/slurm.yml @@ -143,7 +143,7 @@ ignore_unreachable: True with_items: "{{unreachable_slurm_nodes}}" delegate_to: 127.0.0.1 - when: ('bastion' in group_names) + when: ('controller' in group_names) - name: move topology.conf on backup servers become: true diff --git a/playbooks/roles/docker/tasks/main.yml b/playbooks/roles/docker/tasks/main.yml index 62c22a6b..5ed5747d 100644 --- a/playbooks/roles/docker/tasks/main.yml +++ b/playbooks/roles/docker/tasks/main.yml @@ -1,10 +1,10 @@ --- -- include: oraclelinux.yml +- include_tasks: oraclelinux.yml when: ansible_os_family == 'RedHat' and ansible_distribution == 'OracleLinux' -#- include: centos-7.yml +#- include_tasks: centos-7.yml # when: ansible_os_family == 'RedHat' and ansible_distribution == 'CentOS' and ansible_distribution_major_version == '7' -- include: ubuntu.yml +- include_tasks: ubuntu.yml when: ansible_os_family == 'Debian' diff --git a/playbooks/roles/etc-hosts/tasks/common.yml b/playbooks/roles/etc-hosts/tasks/common.yml index cd39a6c8..97888dcb 100644 --- a/playbooks/roles/etc-hosts/tasks/common.yml +++ b/playbooks/roles/etc-hosts/tasks/common.yml @@ -1,22 +1,22 @@ --- -- name: create bastion part of the /etc/hosts files for the compute nodes +- name: create controller part of the /etc/hosts files for the compute nodes blockinfile: dest: /tmp/hosts.etc.{{ cluster_name }} - content: "{{ lookup('template', 'templates/etc-hosts-bastion.j2') }}" + content: "{{ lookup('template', 'templates/etc-hosts-controller.j2') }}" state: present create: yes - marker: "# {mark} ANSIBLE MANAGED BLOCK BASTION" + marker: "# {mark} ANSIBLE MANAGED BLOCK controller" delegate_to: 127.0.0.1 run_once: true when: not destroy|bool -- name: create bastion part of the /etc/hosts files for the bastion +- name: create controller part of the /etc/hosts files for the controller blockinfile: dest: /etc/hosts - content: "{{ lookup('template', 'templates/etc-hosts-bastion.j2') }}" + content: "{{ lookup('template', 'templates/etc-hosts-controller.j2') }}" state: present create: yes - marker: "# {mark} ANSIBLE MANAGED BLOCK BASTION" + marker: "# {mark} ANSIBLE MANAGED BLOCK controller" delegate_to: 127.0.0.1 run_once: true when: not destroy|bool @@ -41,7 +41,7 @@ run_once: true when: not destroy|bool and groups['compute']|length > 0 -- name: add cluster nodes to the /etc/hosts file of the bastion +- name: add cluster nodes to the /etc/hosts file of the controller blockinfile: dest: /etc/hosts content: "{{ lookup('template', 'templates/etc-hosts.j2') }}" @@ -64,7 +64,7 @@ become: true lineinfile: dest: /etc/hosts - regexp: "^127.0.1.1\\s{{hostvars[groups['bastion'][0]]['inventory_hostname']}}.*" + regexp: "^127.0.1.1\\s{{hostvars[groups['controller'][0]]['inventory_hostname']}}.*" state: absent when: ( not destroy|bool ) and (('slurm_backup' in group_names) or ('login' in group_names)) @@ -74,7 +74,7 @@ dest: /etc/hosts src: /tmp/hosts.etc.{{ cluster_name }} force: yes - when: ( not destroy|bool ) and (not 'bastion' in group_names) and (not 'slurm_backup' in group_names) and (not 'login' in group_names) + when: ( not destroy|bool ) and (not 'controller' in group_names) and (not 'slurm_backup' in group_names) and (not 'login' in group_names) - name: remove cluster from etc-host become: true diff --git a/playbooks/roles/etc-hosts/tasks/main.yml b/playbooks/roles/etc-hosts/tasks/main.yml index 7d623a50..f6f5f12b 100755 --- a/playbooks/roles/etc-hosts/tasks/main.yml +++ b/playbooks/roles/etc-hosts/tasks/main.yml @@ -1 +1 @@ -- include: common.yml +- include_tasks: common.yml diff --git a/playbooks/roles/etc-hosts/templates/etc-hosts-bastion.j2 b/playbooks/roles/etc-hosts/templates/etc-hosts-controller.j2 similarity index 87% rename from playbooks/roles/etc-hosts/templates/etc-hosts-bastion.j2 rename to playbooks/roles/etc-hosts/templates/etc-hosts-controller.j2 index 0289b5c0..e604e118 100755 --- a/playbooks/roles/etc-hosts/templates/etc-hosts-bastion.j2 +++ b/playbooks/roles/etc-hosts/templates/etc-hosts-controller.j2 @@ -1,6 +1,6 @@ -{% for item in groups['bastion'] %} +{% for item in groups['controller'] %} {% set short_name = hostvars[item]['ansible_fqdn'].split('.') %} -{{ hostvars[item]['ansible_host'] }} {{ hostvars[item]['ansible_fqdn'] }} {{ short_name[0] }} bastion +{{ hostvars[item]['ansible_host'] }} {{ hostvars[item]['ansible_fqdn'] }} {{ short_name[0] }} controller {% endfor %} {% for item in groups['slurm_backup'] %} {% set short_name = hostvars[item]['ansible_fqdn'].split('.') %} diff --git a/playbooks/roles/firewall/tasks/main.yml b/playbooks/roles/firewall/tasks/main.yml index e3450c91..6b947a1b 100755 --- a/playbooks/roles/firewall/tasks/main.yml +++ b/playbooks/roles/firewall/tasks/main.yml @@ -1,4 +1,4 @@ -- include: el.yml +- include_tasks: el.yml when: ansible_os_family == 'RedHat' -- include: ubuntu.yml +- include_tasks: ubuntu.yml when: ansible_os_family == 'Debian' diff --git a/playbooks/roles/fix_broken/tasks/main.yml b/playbooks/roles/fix_broken/tasks/main.yml index 87c93a55..c18728a3 100644 --- a/playbooks/roles/fix_broken/tasks/main.yml +++ b/playbooks/roles/fix_broken/tasks/main.yml @@ -2,5 +2,5 @@ # tasks file for fix_broken # to resolve error for not able to install nfs-kernel-server. seeing the same error for compute nodes while installing other packages. so adding this to run on all compute hosts the first time itself. -- include: ubuntu.yml +- include_tasks: ubuntu.yml when: ansible_distribution == 'Ubuntu' \ No newline at end of file diff --git a/playbooks/roles/fix_ldap/tasks/main.yml b/playbooks/roles/fix_ldap/tasks/main.yml index cd6ba1b8..a9855733 100644 --- a/playbooks/roles/fix_ldap/tasks/main.yml +++ b/playbooks/roles/fix_ldap/tasks/main.yml @@ -1,2 +1,2 @@ -- include: ubuntu.yml +- include_tasks: ubuntu.yml when: ansible_distribution == 'Ubuntu' \ No newline at end of file diff --git a/playbooks/roles/fss-home/tasks/main.yml b/playbooks/roles/fss-home/tasks/main.yml index e3450c91..6b947a1b 100644 --- a/playbooks/roles/fss-home/tasks/main.yml +++ b/playbooks/roles/fss-home/tasks/main.yml @@ -1,4 +1,4 @@ -- include: el.yml +- include_tasks: el.yml when: ansible_os_family == 'RedHat' -- include: ubuntu.yml +- include_tasks: ubuntu.yml when: ansible_os_family == 'Debian' diff --git a/playbooks/roles/grafana/tasks/main.yml b/playbooks/roles/grafana/tasks/main.yml index e3450c91..6b947a1b 100755 --- a/playbooks/roles/grafana/tasks/main.yml +++ b/playbooks/roles/grafana/tasks/main.yml @@ -1,4 +1,4 @@ -- include: el.yml +- include_tasks: el.yml when: ansible_os_family == 'RedHat' -- include: ubuntu.yml +- include_tasks: ubuntu.yml when: ansible_os_family == 'Debian' diff --git a/playbooks/roles/home_nfs/tasks/main.yml b/playbooks/roles/home_nfs/tasks/main.yml index e6a3fa1b..cae2d534 100644 --- a/playbooks/roles/home_nfs/tasks/main.yml +++ b/playbooks/roles/home_nfs/tasks/main.yml @@ -1,2 +1,2 @@ -- include: el.yml +- include_tasks: el.yml when: ansible_os_family == 'RedHat' diff --git a/playbooks/roles/hostname/tasks/el.yml b/playbooks/roles/hostname/tasks/el.yml index 682efa32..98966fbe 100755 --- a/playbooks/roles/hostname/tasks/el.yml +++ b/playbooks/roles/hostname/tasks/el.yml @@ -5,4 +5,21 @@ - keyword: "{% for partition in queues %}{% for instance in partition.instance_types %}{% if instance.name == instance_type %}{{instance.instance_keyword}}{% endif %}{% endfor %}{% endfor %}" hostname: name: "{{queue}}-{{keyword}}-node-{{index}}" - when: ('compute' in group_names ) \ No newline at end of file + when: ('compute' in group_names ) + +- name: Check Hostname + vars: + - index: "{{ hostvars[inventory_hostname]['ansible_default_ipv4']['address'] | ansible.netcommon.ipsubnet(hostvars[inventory_hostname]['private_subnet']) }}" + - keyword: "{% for partition in queues %}{% for instance in partition.instance_types %}{% if instance.name == instance_type %}{{instance.instance_keyword}}{% endif %}{% endfor %}{% endfor %}" + shell: + cmd: "hostname" + register: hostname_output + when: ('compute' in group_names ) + +- name: update hostname for HPC cluster + vars: + - index: "{{ hostvars[inventory_hostname]['ansible_default_ipv4']['address'] | ansible.netcommon.ipsubnet(hostvars[inventory_hostname]['private_subnet']) }}" + - keyword: "{% for partition in queues %}{% for instance in partition.instance_types %}{% if instance.name == instance_type %}{{instance.instance_keyword}}{% endif %}{% endfor %}{% endfor %}" + hostname: + name: "{{queue}}-{{keyword}}-node-{{index}}" + when: ('compute' in group_names ) and ( hostname_output.stdout != ansible_fqdn.split('.')[0] ) diff --git a/playbooks/roles/hostname/tasks/main.yml b/playbooks/roles/hostname/tasks/main.yml index e3450c91..6b947a1b 100755 --- a/playbooks/roles/hostname/tasks/main.yml +++ b/playbooks/roles/hostname/tasks/main.yml @@ -1,4 +1,4 @@ -- include: el.yml +- include_tasks: el.yml when: ansible_os_family == 'RedHat' -- include: ubuntu.yml +- include_tasks: ubuntu.yml when: ansible_os_family == 'Debian' diff --git a/playbooks/roles/hyperthreading/tasks/main.yml b/playbooks/roles/hyperthreading/tasks/main.yml index 5c0a2160..48c59696 100644 --- a/playbooks/roles/hyperthreading/tasks/main.yml +++ b/playbooks/roles/hyperthreading/tasks/main.yml @@ -1,5 +1,5 @@ -- include: el.yml +- include_tasks: el.yml when: ansible_os_family == 'RedHat' -- include: ubuntu.yml +- include_tasks: ubuntu.yml when: ansible_distribution == 'Ubuntu' diff --git a/playbooks/roles/influxdb/tasks/el.yml b/playbooks/roles/influxdb/tasks/el.yml index d8e45e5b..52f0ab95 100755 --- a/playbooks/roles/influxdb/tasks/el.yml +++ b/playbooks/roles/influxdb/tasks/el.yml @@ -2,6 +2,6 @@ - name: install influxdb include_tasks: el_install_influxdb.yml -- name: configure influxdb on bastion +- name: configure influxdb on controller include_tasks: config_influxdb.yml - when: "'bastion' in group_names" \ No newline at end of file + when: "'controller' in group_names" \ No newline at end of file diff --git a/playbooks/roles/influxdb/tasks/main.yml b/playbooks/roles/influxdb/tasks/main.yml index e3450c91..6b947a1b 100755 --- a/playbooks/roles/influxdb/tasks/main.yml +++ b/playbooks/roles/influxdb/tasks/main.yml @@ -1,4 +1,4 @@ -- include: el.yml +- include_tasks: el.yml when: ansible_os_family == 'RedHat' -- include: ubuntu.yml +- include_tasks: ubuntu.yml when: ansible_os_family == 'Debian' diff --git a/playbooks/roles/influxdb/tasks/ubuntu.yml b/playbooks/roles/influxdb/tasks/ubuntu.yml index a4cf3be1..38896ea9 100644 --- a/playbooks/roles/influxdb/tasks/ubuntu.yml +++ b/playbooks/roles/influxdb/tasks/ubuntu.yml @@ -2,6 +2,6 @@ - name: install influxdb include_tasks: ubuntu_install_influxdb.yml -- name: configure influxdb on bastion +- name: configure influxdb on controller include_tasks: config_influxdb.yml - when: "'bastion' in group_names" \ No newline at end of file + when: "'controller' in group_names" \ No newline at end of file diff --git a/playbooks/roles/iscsi/tasks/main.yml b/playbooks/roles/iscsi/tasks/main.yml index 2c296c7f..b769d6ac 100755 --- a/playbooks/roles/iscsi/tasks/main.yml +++ b/playbooks/roles/iscsi/tasks/main.yml @@ -1,8 +1,8 @@ -- include: el.yml +- include_tasks: el.yml when: ansible_os_family == 'RedHat' -- include: ubuntu.yml +- include_tasks: ubuntu.yml when: ansible_distribution == 'Ubuntu' -- include: debian.yml +- include_tasks: debian.yml when: ansible_distribution == 'Debian' diff --git a/playbooks/roles/latency_check/tasks/main.yml b/playbooks/roles/latency_check/tasks/main.yml index a23c51fa..d0fd924e 100644 --- a/playbooks/roles/latency_check/tasks/main.yml +++ b/playbooks/roles/latency_check/tasks/main.yml @@ -1,2 +1,2 @@ -- include: el.yml +- include_tasks: el.yml when: ansible_os_family == 'RedHat' and ansible_distribution == 'OracleLinux' \ No newline at end of file diff --git a/playbooks/roles/limits/tasks/main.yml b/playbooks/roles/limits/tasks/main.yml index 76f56d44..aaf0dfe9 100755 --- a/playbooks/roles/limits/tasks/main.yml +++ b/playbooks/roles/limits/tasks/main.yml @@ -1,10 +1,10 @@ --- -- include: common.yml +- include_tasks: common.yml when: ansible_os_family == 'RedHat' -- include: common.yml +- include_tasks: common.yml when: ansible_distribution == 'Ubuntu' -- include: common.yml +- include_tasks: common.yml when: ansible_distribution == 'Debian' diff --git a/playbooks/roles/localdisk/tasks/common.yml b/playbooks/roles/localdisk/tasks/common.yml index 09ec1e5a..35a5efb4 100755 --- a/playbooks/roles/localdisk/tasks/common.yml +++ b/playbooks/roles/localdisk/tasks/common.yml @@ -5,7 +5,7 @@ - name: Get the number of NVMe's set_fact: - nvme_count: "{{ hostvars[inventory_hostname]['ansible_devices'] | select('match','nvme[0-9]n1') | list | length}}" + nvme_count: "{{ hostvars[inventory_hostname]['ansible_devices'] | select('match','nvme[0-9]|[1-9][0-9]n1') | list | length}}" - name: Create a LVM? set_fact: @@ -18,7 +18,7 @@ state: present label: gpt with_items: - - "{{ hostvars[inventory_hostname]['ansible_devices'] | select('match','nvme[0-9]n1') | list }}" + - "{{ (hostvars[inventory_hostname]['ansible_devices'] | select('match','nvme[1-9][0-9]n1') | list ) + (hostvars[inventory_hostname]['ansible_devices'] | select('match','nvme[0-9]n1') | list) }}" - name: create a filesystem filesystem: @@ -26,7 +26,7 @@ fstype: xfs opts: "-L locscratch{{item | replace('nvme','') | replace('n1','')}}" with_items: - - "{{ hostvars[inventory_hostname]['ansible_devices'] | select('match','nvme[0-9]n1') | list }}" + - "{{ (hostvars[inventory_hostname]['ansible_devices'] | select('match','nvme[1-9][0-9]n1') | list ) + (hostvars[inventory_hostname]['ansible_devices'] | select('match','nvme[0-9]n1') | list) }}" when: not ( one_lv | bool ) - name: Mount local volume @@ -37,7 +37,7 @@ opts: defaults,noatime state: mounted with_items: - - "{{ hostvars[inventory_hostname]['ansible_devices'] | select('match','nvme[0-9]n1') | list }}" + - "{{ (hostvars[inventory_hostname]['ansible_devices'] | select('match','nvme[1-9][0-9]n1') | list ) + (hostvars[inventory_hostname]['ansible_devices'] | select('match','nvme[0-9]n1') | list) }}" when: not ( one_lv | bool ) - name: "set permissions on {{ nvme_path_edited }}" @@ -50,7 +50,7 @@ group: "{{privilege_group_name}}" recurse: no with_items: - - "{{ hostvars[inventory_hostname]['ansible_devices'] | select('match','nvme[0-9]n1') | list }}" + - "{{ (hostvars[inventory_hostname]['ansible_devices'] | select('match','nvme[1-9][0-9]n1') | list ) + (hostvars[inventory_hostname]['ansible_devices'] | select('match','nvme[0-9]n1') | list) }}" when: not ( one_lv | bool ) - name: Check for lvm devices @@ -61,7 +61,7 @@ - name: Create volume group lvg: vg: "vg_nvmes" - pvs: "{{['/dev/']|product(hostvars[inventory_hostname]['ansible_devices'] | select('match','nvme[0-9]n1') | list)|map('join', '') | join(',')}}" + pvs: "{{['/dev/']|product((hostvars[inventory_hostname]['ansible_devices'] | select('match','nvme[1-9][0-9]n1') | list ) + (hostvars[inventory_hostname]['ansible_devices'] | select('match','nvme[0-9]n1') | list) )|map('join', '') | join(',')}}" - name: Create Logical volume lvol: diff --git a/playbooks/roles/localdisk/tasks/main.yml b/playbooks/roles/localdisk/tasks/main.yml index 7d623a50..f6f5f12b 100755 --- a/playbooks/roles/localdisk/tasks/main.yml +++ b/playbooks/roles/localdisk/tasks/main.yml @@ -1 +1 @@ -- include: common.yml +- include_tasks: common.yml diff --git a/playbooks/roles/mpi-hostfiles/tasks/main.yml b/playbooks/roles/mpi-hostfiles/tasks/main.yml index 4fc735e7..21d20998 100755 --- a/playbooks/roles/mpi-hostfiles/tasks/main.yml +++ b/playbooks/roles/mpi-hostfiles/tasks/main.yml @@ -1,8 +1,8 @@ -- include: common.yml +- include_tasks: common.yml # when: ansible_os_family == 'RedHat' # -#- include: ubuntu.yml +#- include_tasks: ubuntu.yml # when: ansible_distribution == 'Ubuntu' # -#- include: debian.yml +#- include_tasks: debian.yml # when: ansible_distribution == 'Debian' diff --git a/playbooks/roles/mpivars/tasks/main.yml b/playbooks/roles/mpivars/tasks/main.yml index 77964dc2..6da05e55 100644 --- a/playbooks/roles/mpivars/tasks/main.yml +++ b/playbooks/roles/mpivars/tasks/main.yml @@ -1,4 +1,4 @@ --- # tasks file for mpivars -- include: ubuntu.yml +- include_tasks: ubuntu.yml when: ansible_distribution == 'Ubuntu' diff --git a/playbooks/roles/mysql/tasks/main.yml b/playbooks/roles/mysql/tasks/main.yml index ebd448b6..3d03980b 100644 --- a/playbooks/roles/mysql/tasks/main.yml +++ b/playbooks/roles/mysql/tasks/main.yml @@ -1,11 +1,11 @@ -- include: centos.yml +- include_tasks: centos.yml when: ansible_os_family == 'RedHat' and ansible_distribution == 'CentOS' -- include: el.yml +- include_tasks: el.yml when: ansible_os_family == 'RedHat' and ansible_distribution == 'OracleLinux' -- include: debian.yml +- include_tasks: debian.yml when: ansible_distribution == 'Ubuntu' -- include: debian.yml +- include_tasks: debian.yml when: ansible_distribution == 'Debian' diff --git a/playbooks/roles/nccl-conf/files/a100_b4.8 b/playbooks/roles/nccl-conf/files/a100_b4.8 new file mode 100644 index 00000000..f5f816f0 --- /dev/null +++ b/playbooks/roles/nccl-conf/files/a100_b4.8 @@ -0,0 +1,7 @@ +NCCL_DEBUG=WARN +NCCL_IGNORE_CPU_AFFINITY=1 +NCCL_IB_SL=0 +NCCL_IB_TC=41 +NCCL_IB_QPS_PER_CONNECTION=4 +NCCL_IB_GID_INDEX=3 +NCCL_IB_HCA==mlx5_5,mlx5_6,mlx5_7,mlx5_8,mlx5_1,mlx5_2,mlx5_3,mlx5_4,mlx5_14,mlx5_15,mlx5_16,mlx5_17,mlx5_9,mlx5_10,mlx5_11,mlx5_12 \ No newline at end of file diff --git a/playbooks/roles/nccl-conf/files/bm.gpu4.8 b/playbooks/roles/nccl-conf/files/bm.gpu4.8 new file mode 100644 index 00000000..a4bf9442 --- /dev/null +++ b/playbooks/roles/nccl-conf/files/bm.gpu4.8 @@ -0,0 +1,7 @@ +NCCL_DEBUG=WARN +NCCL_IGNORE_CPU_AFFINITY=1 +NCCL_IB_SL=0 +NCCL_IB_TC=41 +NCCL_IB_QPS_PER_CONNECTION=4 +NCCL_IB_GID_INDEX=3 +NCCL_IB_HCA==mlx5_0,mlx5_2,mlx5_6,mlx5_8,mlx5_10,mlx5_12,mlx5_14,mlx5_16,mlx5_1,mlx5_3,mlx5_7,mlx5_9,mlx5_11,mlx5_13,mlx5_15,mlx5_17 \ No newline at end of file diff --git a/playbooks/roles/nccl-conf/files/h100 b/playbooks/roles/nccl-conf/files/h100 new file mode 100644 index 00000000..d199d0fb --- /dev/null +++ b/playbooks/roles/nccl-conf/files/h100 @@ -0,0 +1,14 @@ +NCCL_CROSS_NIC=1 +NCCL_DEBUG=WARN +NCCL_CUMEM_ENABLE=0 +NCCL_IB_SPLIT_DATA_ON_QPS=0 +NCCL_IB_QPS_PER_CONNECTION=16 +NCCL_IB_GID_INDEX=3 +NCCL_IB_TC=41 +NCCL_IB_SL=0 +NCCL_IB_TIMEOUT=22 +NCCL_BUFFSIZE=16777216 +NCCL_NET_PLUGIN=none +NCCL_SOCKET_IFNAME=eth0 +NCCL_IGNORE_CPU_AFFINITY=1 +NCCL_IB_HCA==mlx5_0,mlx5_1,mlx5_3,mlx5_4,mlx5_5,mlx5_6,mlx5_7,mlx5_8,mlx5_9,mlx5_10,mlx5_12,mlx5_13,mlx5_14,mlx5_15,mlx5_16,mlx5_17 \ No newline at end of file diff --git a/playbooks/roles/nccl-conf/tasks/main.yml b/playbooks/roles/nccl-conf/tasks/main.yml new file mode 100644 index 00000000..e4b6aed4 --- /dev/null +++ b/playbooks/roles/nccl-conf/tasks/main.yml @@ -0,0 +1,56 @@ +--- +# tasks file for nccl-conf +- name: Get the shape + shell: + cmd: "curl -sH \"Authorization: Bearer Oracle\" -L http://169.254.169.254/opc/v2/instance/ | jq '.shape'" + register: shape_nccl + +- name: copy nccl.conf for H100 + become: true + copy: + src: h100 + dest: /etc/nccl.conf + owner: root + group: root + mode: '0644' + when: shape_nccl.stdout == '"BM.GPU.H100.8"' + +- name: copy nccl.conf for BM.GPU.B4.8 and A100-v2.8 + become: true + copy: + src: a100_b4.8 + dest: /etc/nccl.conf + owner: root + group: root + mode: '0644' + when: shape_nccl.stdout == '"BM.GPU.B4.8"' or shape_nccl.stdout == '"BM.GPU.A100-v2.8"' + +- name: copy nccl.conf for BM.GPU4.8 + become: true + copy: + src: bm.gpu4.8 + dest: /etc/nccl.conf + owner: root + group: root + mode: '0644' + when: shape_nccl.stdout == '"BM.GPU4.8"' + +- name: copy libnccl-ocituner for OL + become: true + get_url: + url: https://objectstorage.eu-frankfurt-1.oraclecloud.com/p/m1Gdcbiguqst6n_aVwRZIFpRZxUG-wGMvqWS5QJeJbIvNZnqTTA3N1_DDRuYpvJx/n/hpc/b/source/o/tuner/libnccl-ocituner.so.1.0.1-OL + dest: /home/opc/libnccl-ocituner.so.1.0.1 + owner: opc + group: privilege + mode: '0775' + when: ( shape_nccl.stdout == '"BM.GPU.B4.8"' or shape_nccl.stdout == '"BM.GPU.A100-v2.8"' or shape_nccl.stdout == '"BM.GPU4.8"' ) and ansible_distribution == 'OracleLinux' + +- name: copy libnccl-ocituner for Ubuntu + become: true + get_url: + url: https://objectstorage.eu-frankfurt-1.oraclecloud.com/p/m1Gdcbiguqst6n_aVwRZIFpRZxUG-wGMvqWS5QJeJbIvNZnqTTA3N1_DDRuYpvJx/n/hpc/b/source/o/tuner/libnccl-ocituner.so.1.0.1-ubuntu + dest: /home/ubuntu/libnccl-ocituner.so.1.0.1 + owner: ubuntu + group: privilege + mode: '0775' + when: ( shape_nccl.stdout == '"BM.GPU.B4.8"' or shape_nccl.stdout == '"BM.GPU.A100-v2.8"' or shape_nccl.stdout == '"BM.GPU4.8"' ) and ansible_os_family == 'Debian' \ No newline at end of file diff --git a/playbooks/roles/nfs-client/tasks/main.yml b/playbooks/roles/nfs-client/tasks/main.yml index 2c296c7f..b769d6ac 100755 --- a/playbooks/roles/nfs-client/tasks/main.yml +++ b/playbooks/roles/nfs-client/tasks/main.yml @@ -1,8 +1,8 @@ -- include: el.yml +- include_tasks: el.yml when: ansible_os_family == 'RedHat' -- include: ubuntu.yml +- include_tasks: ubuntu.yml when: ansible_distribution == 'Ubuntu' -- include: debian.yml +- include_tasks: debian.yml when: ansible_distribution == 'Debian' diff --git a/playbooks/roles/nfs-server/tasks/main.yml b/playbooks/roles/nfs-server/tasks/main.yml index 2c296c7f..b769d6ac 100755 --- a/playbooks/roles/nfs-server/tasks/main.yml +++ b/playbooks/roles/nfs-server/tasks/main.yml @@ -1,8 +1,8 @@ -- include: el.yml +- include_tasks: el.yml when: ansible_os_family == 'RedHat' -- include: ubuntu.yml +- include_tasks: ubuntu.yml when: ansible_distribution == 'Ubuntu' -- include: debian.yml +- include_tasks: debian.yml when: ansible_distribution == 'Debian' diff --git a/playbooks/roles/no_instance_principal/tasks/main.yml b/playbooks/roles/no_instance_principal/tasks/main.yml index 270202fc..17a3fd4f 100755 --- a/playbooks/roles/no_instance_principal/tasks/main.yml +++ b/playbooks/roles/no_instance_principal/tasks/main.yml @@ -1,3 +1,3 @@ -- include: common.yml +- include_tasks: common.yml diff --git a/playbooks/roles/nvidia-container/tasks/main.yml b/playbooks/roles/nvidia-container/tasks/main.yml index 2766a27b..b0178751 100644 --- a/playbooks/roles/nvidia-container/tasks/main.yml +++ b/playbooks/roles/nvidia-container/tasks/main.yml @@ -1,10 +1,10 @@ --- -- include: oraclelinux-7.yml +- include_tasks: oraclelinux-7.yml when: ansible_os_family == 'RedHat' and ansible_distribution == 'OracleLinux' and ansible_distribution_major_version == '7' -#- include: centos-7.yml +#- include_tasks: centos-7.yml # when: ansible_os_family == 'RedHat' and ansible_distribution == 'CentOS' and ansible_distribution_major_version == '7' -- include: ubuntu.yml +- include_tasks: ubuntu.yml when: ansible_distribution == 'Ubuntu' diff --git a/playbooks/roles/nvidia-enroot/tasks/main.yml b/playbooks/roles/nvidia-enroot/tasks/main.yml index 7243e27b..e4518440 100644 --- a/playbooks/roles/nvidia-enroot/tasks/main.yml +++ b/playbooks/roles/nvidia-enroot/tasks/main.yml @@ -1,10 +1,10 @@ --- -- include: oraclelinux.yml +- include_tasks: oraclelinux.yml when: ansible_os_family == 'RedHat' and ansible_distribution == 'OracleLinux' -#- include: centos-7.yml +#- include_tasks: centos-7.yml # when: ansible_os_family == 'RedHat' and ansible_distribution == 'CentOS' and ansible_distribution_major_version == '7' -- include: ubuntu.yml +- include_tasks: ubuntu.yml when: ansible_os_family == 'Debian' and ansible_distribution == 'Ubuntu' \ No newline at end of file diff --git a/playbooks/roles/nvidia_peermem/tasks/main.yml b/playbooks/roles/nvidia_peermem/tasks/main.yml index b5245a5e..52027e44 100644 --- a/playbooks/roles/nvidia_peermem/tasks/main.yml +++ b/playbooks/roles/nvidia_peermem/tasks/main.yml @@ -1,3 +1,3 @@ --- # tasks file for nvidia_peermem -- include: common.yml \ No newline at end of file +- include_tasks: common.yml \ No newline at end of file diff --git a/playbooks/roles/oci-cloud-agent/tasks/main.yml b/playbooks/roles/oci-cloud-agent/tasks/main.yml index ea4d5d2a..cbceb221 100644 --- a/playbooks/roles/oci-cloud-agent/tasks/main.yml +++ b/playbooks/roles/oci-cloud-agent/tasks/main.yml @@ -1,4 +1,4 @@ -- include: el.yml +- include_tasks: el.yml when: ansible_os_family == 'RedHat' and ansible_distribution == 'OracleLinux' -- include: ubuntu.yml +- include_tasks: ubuntu.yml when: ansible_os_family == 'Debian' diff --git a/playbooks/roles/oci-cn-auth/tasks/main.yml b/playbooks/roles/oci-cn-auth/tasks/main.yml index 8705dde4..abc4c9f3 100644 --- a/playbooks/roles/oci-cn-auth/tasks/main.yml +++ b/playbooks/roles/oci-cn-auth/tasks/main.yml @@ -1,6 +1,6 @@ --- -- include: el.yml +- include_tasks: el.yml when: ansible_os_family == 'RedHat' and ansible_distribution == 'OracleLinux' -- include: ubuntu.yml +- include_tasks: ubuntu.yml when: ansible_os_family == 'Debian' and ansible_distribution == 'Ubuntu' \ No newline at end of file diff --git a/playbooks/roles/oci-hostname/tasks/main.yml b/playbooks/roles/oci-hostname/tasks/main.yml index e6a3fa1b..cae2d534 100755 --- a/playbooks/roles/oci-hostname/tasks/main.yml +++ b/playbooks/roles/oci-hostname/tasks/main.yml @@ -1,2 +1,2 @@ -- include: el.yml +- include_tasks: el.yml when: ansible_os_family == 'RedHat' diff --git a/playbooks/roles/oci-legacy/tasks/main.yml b/playbooks/roles/oci-legacy/tasks/main.yml index 4229a70b..e4dc405a 100755 --- a/playbooks/roles/oci-legacy/tasks/main.yml +++ b/playbooks/roles/oci-legacy/tasks/main.yml @@ -1,4 +1,4 @@ -- include: el.yml +- include_tasks: el.yml when: ansible_os_family == 'RedHat' -- include: ubuntu.yml +- include_tasks: ubuntu.yml when: ansible_os_family == 'Debian' \ No newline at end of file diff --git a/playbooks/roles/openldap/tasks/main.yml b/playbooks/roles/openldap/tasks/main.yml index 860b1077..eee5413e 100644 --- a/playbooks/roles/openldap/tasks/main.yml +++ b/playbooks/roles/openldap/tasks/main.yml @@ -4,11 +4,11 @@ - include_vars: debian_vars.yml when: ansible_distribution == 'Ubuntu' -- include: el.yml +- include_tasks: el.yml when: ansible_os_family == 'RedHat' -#- include: el-8.yml +#- include_tasks: el-8.yml # when: ansible_os_family == 'RedHat' and ansible_distribution_major_version == '8' -- include: debian.yml +- include_tasks: debian.yml when: ansible_distribution == 'Ubuntu' \ No newline at end of file diff --git a/playbooks/roles/packages/tasks/el-7.yml b/playbooks/roles/packages/tasks/el-7.yml index d0c23143..3e5c2884 100755 --- a/playbooks/roles/packages/tasks/el-7.yml +++ b/playbooks/roles/packages/tasks/el-7.yml @@ -28,4 +28,4 @@ state: latest executable: pip3 ignore_errors: yes - when: ('bastion' in group_names) + when: ('controller' in group_names) diff --git a/playbooks/roles/packages/tasks/main.yml b/playbooks/roles/packages/tasks/main.yml index 24cc3ed3..e3f1fa68 100755 --- a/playbooks/roles/packages/tasks/main.yml +++ b/playbooks/roles/packages/tasks/main.yml @@ -1,17 +1,17 @@ -- include: ol-7.yml +- include_tasks: ol-7.yml when: ansible_os_family == 'RedHat' and ansible_distribution == 'OracleLinux' and ansible_distribution_major_version == '7' -- include: ol-8.yml +- include_tasks: ol-8.yml when: ansible_os_family == 'RedHat' and ansible_distribution == 'OracleLinux' and ansible_distribution_major_version == '8' -- include: centos-7.yml +- include_tasks: centos-7.yml when: ansible_os_family == 'RedHat' and ansible_distribution == 'CentOS' and ansible_distribution_major_version == '7' -- include: ubuntu.yml +- include_tasks: ubuntu.yml when: ansible_distribution == 'Ubuntu' and ansible_distribution_major_version < '22' -- include: ubuntu-2204.yml +- include_tasks: ubuntu-2204.yml when: ansible_distribution == 'Ubuntu' and ansible_distribution_major_version == '22' -- include: debian.yml +- include_tasks: debian.yml when: ansible_distribution == 'Debian' \ No newline at end of file diff --git a/playbooks/roles/packages/tasks/ol-7.yml b/playbooks/roles/packages/tasks/ol-7.yml index 7159eee4..57bead10 100644 --- a/playbooks/roles/packages/tasks/ol-7.yml +++ b/playbooks/roles/packages/tasks/ol-7.yml @@ -31,4 +31,4 @@ state: latest executable: pip3 ignore_errors: yes - when: ('bastion' in group_names) \ No newline at end of file + when: ('controller' in group_names) \ No newline at end of file diff --git a/playbooks/roles/packages/tasks/ol-8.yml b/playbooks/roles/packages/tasks/ol-8.yml index ad1d9877..8a6b3353 100644 --- a/playbooks/roles/packages/tasks/ol-8.yml +++ b/playbooks/roles/packages/tasks/ol-8.yml @@ -33,5 +33,5 @@ state: latest executable: pip3 ignore_errors: yes - when: ('bastion' in group_names) + when: ('controller' in group_names) diff --git a/playbooks/roles/privilege_group/tasks/main.yml b/playbooks/roles/privilege_group/tasks/main.yml index 9698f56d..e0b68526 100644 --- a/playbooks/roles/privilege_group/tasks/main.yml +++ b/playbooks/roles/privilege_group/tasks/main.yml @@ -1,5 +1,5 @@ -- include: common.yml +- include_tasks: common.yml -#- include: el.yml +#- include_tasks: el.yml # when: ansible_os_family == 'RedHat' diff --git a/playbooks/roles/rack-aware/tasks/main.yml b/playbooks/roles/rack-aware/tasks/main.yml index 5570ac43..5c7e72aa 100644 --- a/playbooks/roles/rack-aware/tasks/main.yml +++ b/playbooks/roles/rack-aware/tasks/main.yml @@ -1,6 +1,6 @@ -- include: el.yml +- include_tasks: el.yml when: ansible_os_family == 'RedHat' -- include: ubuntu.yml +- include_tasks: ubuntu.yml when: ansible_distribution == 'Ubuntu' diff --git a/playbooks/roles/rdma-interface/tasks/main.yml b/playbooks/roles/rdma-interface/tasks/main.yml index 076cd2a6..48a777ae 100755 --- a/playbooks/roles/rdma-interface/tasks/main.yml +++ b/playbooks/roles/rdma-interface/tasks/main.yml @@ -1,10 +1,10 @@ -- include: el.yml +- include_tasks: el.yml when: ansible_os_family == 'RedHat' -- include: debian.yml +- include_tasks: debian.yml when: ansible_distribution == 'Ubuntu' -- include: debian.yml +- include_tasks: debian.yml when: ansible_distribution == 'Debian' diff --git a/playbooks/roles/safe_yum/tasks/main.yml b/playbooks/roles/safe_yum/tasks/main.yml index 57c3c03c..52476fde 100644 --- a/playbooks/roles/safe_yum/tasks/main.yml +++ b/playbooks/roles/safe_yum/tasks/main.yml @@ -1,4 +1,4 @@ -- include: el.yml +- include_tasks: el.yml when: ansible_os_family == 'RedHat' -- include: ubuntu.yml +- include_tasks: ubuntu.yml when: ansible_distribution == 'Ubuntu' \ No newline at end of file diff --git a/playbooks/roles/slurm/tasks/backup_server.yml b/playbooks/roles/slurm/tasks/backup_server.yml index 5e931304..1dbea29c 100755 --- a/playbooks/roles/slurm/tasks/backup_server.yml +++ b/playbooks/roles/slurm/tasks/backup_server.yml @@ -35,7 +35,7 @@ with_items: - munge -- name: Render systemd units for slurmctld on backup bastion +- name: Render systemd units for slurmctld on backup controller become: true template: src: 'systemd/slurmctld_backup.service.d/unit.conf.j2' diff --git a/playbooks/roles/slurm/tasks/common.yml b/playbooks/roles/slurm/tasks/common.yml index 804a2e63..9f0d0729 100755 --- a/playbooks/roles/slurm/tasks/common.yml +++ b/playbooks/roles/slurm/tasks/common.yml @@ -187,9 +187,9 @@ state: directory - name: Include pyxis prolog files - include: common_pyxis.yml + include_tasks: common_pyxis.yml when: pyxis|bool - name: Include pyxis prolog files - include: common_pmix.yml + include_tasks: common_pmix.yml when: ansible_os_family == 'RedHat' \ No newline at end of file diff --git a/playbooks/roles/slurm/tasks/common_pyxis.yml b/playbooks/roles/slurm/tasks/common_pyxis.yml index ccd3fe8e..a200ad54 100644 --- a/playbooks/roles/slurm/tasks/common_pyxis.yml +++ b/playbooks/roles/slurm/tasks/common_pyxis.yml @@ -58,5 +58,5 @@ content: | required /usr/local/lib/slurm/spank_pyxis.so mode: '0775' - owner: "{{bastion_username}}" + owner: "{{controller_username}}" group: "{{privilege_group_name}}" \ No newline at end of file diff --git a/playbooks/roles/slurm/tasks/compute-rack-aware.yml b/playbooks/roles/slurm/tasks/compute-rack-aware.yml index bd270e32..0621555d 100755 --- a/playbooks/roles/slurm/tasks/compute-rack-aware.yml +++ b/playbooks/roles/slurm/tasks/compute-rack-aware.yml @@ -56,7 +56,7 @@ - name: set permissions become: true shell: - cmd: chown {{ bastion_username }}:{{ bastion_username }} /tmp/munge.key + cmd: chown {{ controller_username }}:{{ controller_username }} /tmp/munge.key delegate_to: 127.0.0.1 run_once: true @@ -100,7 +100,7 @@ - name: Get rackIDs for all compute nodes set_fact: racks_to_add_temp: "{{cluster_name}}:{{hostvars[item]['rackID']}}" - with_items: "{{ play_hosts | difference(groups['bastion']) | difference(groups['slurm_backup']) | difference(groups['login'])}}" + with_items: "{{ play_hosts | difference(groups['controller']) | difference(groups['slurm_backup']) | difference(groups['login'])}}" run_once: true register: racks_to_add_temp_results @@ -111,7 +111,7 @@ - name: Get hostnames set_fact: nodes_to_add_temp: "{{hostvars[item]['ansible_hostname']}}" - with_items: "{{ play_hosts | difference(groups['bastion']) | difference(groups['slurm_backup']) | difference(groups['login']) }}" + with_items: "{{ play_hosts | difference(groups['controller']) | difference(groups['slurm_backup']) | difference(groups['login']) }}" run_once: true register: nodes_to_add_temp_results @@ -138,7 +138,7 @@ - name: Get hostlist if switch exists vars: - new_line: "{% for node in ( play_hosts | difference(groups['bastion']) | difference(groups['slurm_backup']) | difference(groups['login']) ) %}{% if cluster_name+':'+hostvars[node]['rackID'] == item.item.item %}{{hostvars[node]['ansible_hostname']}},{% endif %}{% endfor %}" + new_line: "{% for node in ( play_hosts | difference(groups['controller']) | difference(groups['slurm_backup']) | difference(groups['login']) ) %}{% if cluster_name+':'+hostvars[node]['rackID'] == item.item.item %}{{hostvars[node]['ansible_hostname']}},{% endif %}{% endfor %}" command: "scontrol show hostlistsorted {{ item.stdout_lines | union (new_line[:-1].split(',') | list )| join(',') }}" register: rack_hostlist1 delegate_to: 127.0.0.1 @@ -148,7 +148,7 @@ - name: Get hostlist if switch does not exists vars: - new_line: "{% for node in ( play_hosts | difference(groups['bastion']) | difference(groups['slurm_backup']) | difference(groups['login']) ) %}{% if cluster_name+':'+hostvars[node]['rackID'] == item.item.item %}{{hostvars[node]['ansible_hostname']}},{% endif %}{% endfor %}" + new_line: "{% for node in ( play_hosts | difference(groups['controller']) | difference(groups['slurm_backup']) | difference(groups['login']) ) %}{% if cluster_name+':'+hostvars[node]['rackID'] == item.item.item %}{{hostvars[node]['ansible_hostname']}},{% endif %}{% endfor %}" command: "scontrol show hostlistsorted {{ new_line[:-1] }}" register: rack_hostlist2 delegate_to: 127.0.0.1 @@ -237,7 +237,7 @@ when: racks_left_list | length > 0 - name: Run Pam settings - include: compute_pam.yml + include_tasks: compute_pam.yml when: pam|bool - name: run handlers @@ -251,7 +251,7 @@ enabled: true -- name: Update node state on bastion +- name: Update node state on controller block: - name: Grab Node State shell: 'sinfo -h -o "%t" -n {{ ansible_hostname }}' @@ -259,7 +259,7 @@ delegate_to: 127.0.0.1 - set_fact: node_state2: "{{ node_state.stdout }}" - - name: Update node state on bastion + - name: Update node state on controller become: true command: scontrol update nodename={{ ansible_hostname }} state=RESUME when: node_state2 != "idle" and node_state2 != "alloc" @@ -279,7 +279,7 @@ - set_fact: node_state2: "{{ node_state.stdout }}" - - name: Update node state on bastion + - name: Update node state on controller become: true command: scontrol update nodename={{ ansible_hostname }} state=RESUME when: node_state2 != "idle" and node_state2 != "alloc" diff --git a/playbooks/roles/slurm/tasks/compute.yml b/playbooks/roles/slurm/tasks/compute.yml index 6dbf46cc..8f22bfa8 100755 --- a/playbooks/roles/slurm/tasks/compute.yml +++ b/playbooks/roles/slurm/tasks/compute.yml @@ -1,6 +1,6 @@ --- - name: Run Pam settings - include: compute_pam.yml + include_tasks: compute_pam.yml when: pam|bool - name: install SLURM compute packages @@ -59,7 +59,7 @@ - name: set permissions become: true shell: - cmd: chown {{ bastion_username }}:{{ bastion_username }} /tmp/munge.key + cmd: chown {{ controller_username }}:{{ controller_username }} /tmp/munge.key delegate_to: 127.0.0.1 run_once: true @@ -83,7 +83,7 @@ - name: Get hostnames set_fact: nodes_to_add_temp: "{{hostvars[item]['ansible_hostname']}}" - with_items: "{{ play_hosts | difference(groups['bastion']) | difference(groups['slurm_backup']) | difference(groups['login']) }}" + with_items: "{{ play_hosts | difference(groups['controller']) | difference(groups['slurm_backup']) | difference(groups['login']) }}" run_once: true register: nodes_to_add_temp_results @@ -159,7 +159,7 @@ notify: reconfigure slurm - name: Run Pam settings - include: compute_pam.yml + include_tasks: compute_pam.yml when: pam|bool - name: start slurmd @@ -169,7 +169,7 @@ state: restarted enabled: true -- name: Update node state on bastion +- name: Update node state on controller block: - name: Grab Node State shell: 'sinfo -h -o "%t" -n {{ ansible_hostname }}' @@ -177,7 +177,7 @@ delegate_to: 127.0.0.1 - set_fact: node_state2: "{{ node_state.stdout }}" - - name: Update node state on bastion + - name: Update node state on controller become: true command: scontrol update nodename={{ ansible_hostname }} state=RESUME when: node_state2 != "idle" and node_state2 != "alloc" @@ -197,7 +197,7 @@ - set_fact: node_state2: "{{ node_state.stdout }}" - - name: Update node state on bastion + - name: Update node state on controller become: true command: scontrol update nodename={{ ansible_hostname }} state=RESUME when: node_state2 != "idle" and node_state2 != "alloc" diff --git a/playbooks/roles/slurm/tasks/bastion.yml b/playbooks/roles/slurm/tasks/controller.yml similarity index 74% rename from playbooks/roles/slurm/tasks/bastion.yml rename to playbooks/roles/slurm/tasks/controller.yml index 53febd7c..47882734 100755 --- a/playbooks/roles/slurm/tasks/bastion.yml +++ b/playbooks/roles/slurm/tasks/controller.yml @@ -6,11 +6,11 @@ slurm_repos: "epel,ol7_developer_EPEL" when: (not destroy|bool) and ((initial|bool) or (not initial|bool and ('compute' in group_names))) - - name: run server directives ol7 bastion + - name: run server directives ol7 controller include_tasks: server.yml vars: slurm_repos: "epel,ol7_developer_EPEL" - when: ('bastion' in group_names) and (not destroy|bool) and (initial| bool) + when: ('controller' in group_names) and (not destroy|bool) and (initial| bool) when: ansible_distribution_major_version == '7' - block: @@ -20,9 +20,9 @@ slurm_repos: "ol8_developer_EPEL,ol8_codeready_builder" when: (not destroy|bool) and ((initial|bool) or (not initial|bool and ('compute' in group_names))) - - name: run server directives ol8 bastion + - name: run server directives ol8 controller include_tasks: server.yml vars: slurm_repos: "ol8_developer_EPEL,ol8_codeready_builder" - when: ('bastion' in group_names) and (not destroy|bool) and (initial| bool) + when: ('controller' in group_names) and (not destroy|bool) and (initial| bool) when: ansible_distribution_major_version == '8' \ No newline at end of file diff --git a/playbooks/roles/slurm/tasks/destroy-rack-aware.yml b/playbooks/roles/slurm/tasks/destroy-rack-aware.yml index 7f1e8846..1bc888a0 100755 --- a/playbooks/roles/slurm/tasks/destroy-rack-aware.yml +++ b/playbooks/roles/slurm/tasks/destroy-rack-aware.yml @@ -49,7 +49,7 @@ - name: Get hostnames set_fact: nodes_to_remove_temp: "{{hostvars[item]['ansible_hostname']}}" - with_items: "{{ play_hosts | difference(groups['bastion']) | difference(groups['slurm_backup']) | difference(groups['login'])}}" + with_items: "{{ play_hosts | difference(groups['controller']) | difference(groups['slurm_backup']) | difference(groups['login'])}}" run_once: true register: nodes_to_remove_temp_results @@ -87,7 +87,7 @@ - name: Get rackIDs set_fact: racks_to_remove_temp: "{{cluster_name}}:{{hostvars[item]['rackID']}}" - with_items: "{{ play_hosts | difference(groups['bastion']) | difference(groups['slurm_backup']) | difference(groups['login'])}}" + with_items: "{{ play_hosts | difference(groups['controller']) | difference(groups['slurm_backup']) | difference(groups['login'])}}" run_once: true register: racks_to_remove_temp_results diff --git a/playbooks/roles/slurm/tasks/destroy.yml b/playbooks/roles/slurm/tasks/destroy.yml index 5c58d085..7df264a6 100755 --- a/playbooks/roles/slurm/tasks/destroy.yml +++ b/playbooks/roles/slurm/tasks/destroy.yml @@ -55,7 +55,7 @@ - name: Get hostnames set_fact: nodes_to_add_temp: "{{hostvars[item]['ansible_hostname']}}" - with_items: "{{ play_hosts | difference(groups['bastion']) | difference(groups['slurm_backup']) | difference(groups['login']) }}" + with_items: "{{ play_hosts | difference(groups['controller']) | difference(groups['slurm_backup']) | difference(groups['login']) }}" run_once: true register: nodes_to_add_temp_results diff --git a/playbooks/roles/slurm/tasks/main.yml b/playbooks/roles/slurm/tasks/main.yml index 89944752..bc94818f 100755 --- a/playbooks/roles/slurm/tasks/main.yml +++ b/playbooks/roles/slurm/tasks/main.yml @@ -7,17 +7,17 @@ - include_vars: ubuntu_vars.yml when: ansible_distribution == 'Ubuntu' -- include: bastion.yml +- include_tasks: controller.yml when: ansible_os_family == 'RedHat' and ansible_distribution == 'OracleLinux' -- include: el7.yml +- include_tasks: el7.yml when: ansible_os_family == 'RedHat' and ansible_distribution == 'OracleLinux' and ansible_distribution_major_version == '7' -- include: el7.yml +- include_tasks: el7.yml when: ansible_os_family == 'RedHat' and ansible_distribution == 'CentOS' and ansible_distribution_major_version == '7' -- include: el8.yml +- include_tasks: el8.yml when: ansible_os_family == 'RedHat' and ansible_distribution == 'OracleLinux' and ansible_distribution_major_version == '8' -- include: ubuntu.yml +- include_tasks: ubuntu.yml when: ansible_distribution == 'Ubuntu' \ No newline at end of file diff --git a/playbooks/roles/slurm/tasks/ubuntu.yml b/playbooks/roles/slurm/tasks/ubuntu.yml index 96a8843e..5399d7cc 100644 --- a/playbooks/roles/slurm/tasks/ubuntu.yml +++ b/playbooks/roles/slurm/tasks/ubuntu.yml @@ -4,7 +4,7 @@ - name: run server directives include_tasks: server.yml - when: ('bastion' in group_names) and (not destroy|bool) and (initial| bool) + when: ('controller' in group_names) and (not destroy|bool) and (initial| bool) - name: run compute directives include_tasks: "compute{{rack_aware_playbook_suffix}}.yml" diff --git a/playbooks/roles/slurm/templates/slurm.conf.j2 b/playbooks/roles/slurm/templates/slurm.conf.j2 index 0d61eb25..3bb57372 100755 --- a/playbooks/roles/slurm/templates/slurm.conf.j2 +++ b/playbooks/roles/slurm/templates/slurm.conf.j2 @@ -1,5 +1,5 @@ -{% set bastion = hostvars[groups['bastion'][0]]['ansible_fqdn'].split('.') %} -SlurmctldHost={{ bastion[0] }} +{% set controller = hostvars[groups['controller'][0]]['ansible_fqdn'].split('.') %} +SlurmctldHost={{ controller[0] }} {% if (groups['slurm_backup']| length ) > 0 %} SlurmctldHost={{ hostvars[groups['slurm_backup'][0]]['ansible_fqdn'].split('.')[0] }} {% endif %} @@ -31,7 +31,7 @@ GresTypes=gpu SchedulerType=sched/backfill SelectType=select/cons_tres SelectTypeParameters=CR_Core -AccountingStorageHost={{ bastion[0] }} +AccountingStorageHost={{ controller[0] }} AccountingStorageType=accounting_storage/slurmdbd AccountingStoreFlags=job_comment ClusterName=cluster @@ -113,6 +113,8 @@ NodeName={{partition.name}}-{{instance.instance_keyword}}-node-[1-{{size}}] Boar NodeName={{partition.name}}-{{instance.instance_keyword}}-node-[1-{{size}}] Boards=1 SocketsPerBoard=1 CoresPerSocket={{instance.instance_pool_ocpus}} ThreadsPerCore={{threadspercore}} State=FUTURE Features={% if instance.shape != instance.name%}{{ instance.shape }},{% endif %}{{ instance.name }} {% elif instance.shape == "VM.DenseIO.E4.Flex" %} NodeName={{partition.name}}-{{instance.instance_keyword}}-node-[1-{{size}}] Boards=1 SocketsPerBoard=1 CoresPerSocket={{instance.instance_pool_ocpus}} ThreadsPerCore={{threadspercore}} State=FUTURE Features={% if instance.shape != instance.name%}{{ instance.shape }},{% endif %}{{ instance.name }} +{% elif instance.shape == "VM.DenseIO.E5.Flex" %} +NodeName={{partition.name}}-{{instance.instance_keyword}}-node-[1-{{size}}] Boards=1 SocketsPerBoard=1 CoresPerSocket={{instance.instance_pool_ocpus}} ThreadsPerCore={{threadspercore}} State=FUTURE Features={% if instance.shape != instance.name%}{{ instance.shape }},{% endif %}{{ instance.name }} {% elif instance.shape == "VM.Standard.A1.Flex" %} NodeName={{partition.name}}-{{instance.instance_keyword}}-node-[1-{{size}}] Boards=1 SocketsPerBoard=1 CoresPerSocket={{instance.instance_pool_ocpus}} ThreadsPerCore=1 State=FUTURE Features={% if instance.shape != instance.name%}{{ instance.shape }},{% endif %}{{ instance.name }} {% elif instance.shape == "BM.Standard.E3.128" and threadspercore == 1%} @@ -127,6 +129,8 @@ NodeName={{partition.name}}-{{instance.instance_keyword}}-node-[1-{{size}}] Boar NodeName={{partition.name}}-{{instance.instance_keyword}}-node-[1-{{size}}] Boards=1 SocketsPerBoard=2 CoresPerSocket=64 ThreadsPerCore={{threadspercore}} State=FUTURE Features={% if instance.shape != instance.name%}{{ instance.shape }},{% endif %}{{ instance.name }} {% elif instance.shape == "BM.DenseIO.E4.128" and threadspercore == 2 %} NodeName={{partition.name}}-{{instance.instance_keyword}}-node-[1-{{size}}] Boards=1 SocketsPerBoard=1 CoresPerSocket=255 ThreadsPerCore=1 State=FUTURE Features={% if instance.shape != instance.name%}{{ instance.shape }},{% endif %}{{ instance.name }} +{% elif instance.shape == "BM.DenseIO.E5.128" %} +NodeName={{partition.name}}-{{instance.instance_keyword}}-node-[1-{{size}}] Boards=1 SocketsPerBoard=2 CoresPerSocket=64 ThreadsPerCore={{threadspercore}} State=FUTURE Features={% if instance.shape != instance.name%}{{ instance.shape }},{% endif %}{{ instance.name }} {% elif instance.shape == "BM.HPC2.36" %} NodeName={{partition.name}}-{{instance.instance_keyword}}-node-[1-{{size}}] Boards=1 SocketsPerBoard=2 CoresPerSocket=18 ThreadsPerCore={{threadspercore}} State=FUTURE Features={% if instance.shape != instance.name%}{{ instance.shape }},{% endif %}{{ instance.name }} {% elif instance.shape == "BM.HPC.E5.144" %} diff --git a/playbooks/roles/slurm/templates/systemd/slurmd.service b/playbooks/roles/slurm/templates/systemd/slurmd.service index 7d4f3a2e..534afe2c 100644 --- a/playbooks/roles/slurm/templates/systemd/slurmd.service +++ b/playbooks/roles/slurm/templates/systemd/slurmd.service @@ -6,7 +6,7 @@ Documentation=man:slurmd(8) [Service] Type=forking EnvironmentFile=-/etc/default/slurm -ExecStart={{slurm_exec}}/sbin/slurmd --conf-server {{ hostvars[groups['bastion'][0]]['ansible_fqdn'].split('.')[0] }}{% if (groups['slurm_backup']| length ) > 0 %},{{ hostvars[groups['slurm_backup'][0]]['ansible_fqdn'].split('.')[0] }}{% endif %} $SLURMD_OPTIONS +ExecStart={{slurm_exec}}/sbin/slurmd --conf-server {{ hostvars[groups['controller'][0]]['ansible_fqdn'].split('.')[0] }}{% if (groups['slurm_backup']| length ) > 0 %},{{ hostvars[groups['slurm_backup'][0]]['ansible_fqdn'].split('.')[0] }}{% endif %} $SLURMD_OPTIONS ExecReload=/bin/kill -HUP $MAINPID PIDFile=/run/slurmd.pid KillMode=process diff --git a/playbooks/roles/slurm/templates/systemd/slurmd.service.d/unit.conf.j2 b/playbooks/roles/slurm/templates/systemd/slurmd.service.d/unit.conf.j2 index 0fc9a3ba..7f4faf67 100755 --- a/playbooks/roles/slurm/templates/systemd/slurmd.service.d/unit.conf.j2 +++ b/playbooks/roles/slurm/templates/systemd/slurmd.service.d/unit.conf.j2 @@ -5,5 +5,5 @@ Requires=munge.service Restart=always {% if ansible_os_family == 'RedHat' %} ExecStart= -ExecStart={{slurm_exec}}/sbin/slurmd --conf-server {{ hostvars[groups['bastion'][0]]['ansible_fqdn'].split('.')[0] }}{% if (groups['slurm_backup']| length ) > 0 %},{{ hostvars[groups['slurm_backup'][0]]['ansible_fqdn'].split('.')[0] }}{% endif %} -D $SLURMD_OPTIONS +ExecStart={{slurm_exec}}/sbin/slurmd --conf-server {{ hostvars[groups['controller'][0]]['ansible_fqdn'].split('.')[0] }}{% if (groups['slurm_backup']| length ) > 0 %},{{ hostvars[groups['slurm_backup'][0]]['ansible_fqdn'].split('.')[0] }}{% endif %} -D $SLURMD_OPTIONS {% endif %} \ No newline at end of file diff --git a/playbooks/roles/spack/tasks/main.yml b/playbooks/roles/spack/tasks/main.yml index a803e792..a5171909 100755 --- a/playbooks/roles/spack/tasks/main.yml +++ b/playbooks/roles/spack/tasks/main.yml @@ -1,8 +1,8 @@ -- include: el.yml +- include_tasks: el.yml when: ansible_os_family == 'RedHat' -- include: debian.yml +- include_tasks: debian.yml when: ansible_distribution == 'Ubuntu' -- include: debian.yml +- include_tasks: debian.yml when: ansible_distribution == 'Debian' diff --git a/playbooks/roles/ssh/tasks/common.yml b/playbooks/roles/ssh/tasks/common.yml index 41872c6d..2975fafe 100644 --- a/playbooks/roles/ssh/tasks/common.yml +++ b/playbooks/roles/ssh/tasks/common.yml @@ -10,7 +10,7 @@ - name: Install private ssh key on all nodes copy: dest: "/home/{{ ansible_user }}/.ssh/id_rsa" - src: "/home/{{ bastion_username }}/.ssh/{{ item }}" + src: "/home/{{ controller_username }}/.ssh/{{ item }}" owner: "{{ ansible_user }}" group: "{{ ansible_user }}" mode: '0600' @@ -20,7 +20,7 @@ - name: Install public ssh key on all nodes copy: dest: "/home/{{ ansible_user }}/.ssh/id_rsa.pub" - src: "/home/{{ bastion_username }}/.ssh/{{ item }}" + src: "/home/{{ controller_username }}/.ssh/{{ item }}" owner: "{{ ansible_user }}" group: "{{ ansible_user }}" mode: '0644' diff --git a/playbooks/roles/ssh/tasks/main.yml b/playbooks/roles/ssh/tasks/main.yml index 7d623a50..f6f5f12b 100755 --- a/playbooks/roles/ssh/tasks/main.yml +++ b/playbooks/roles/ssh/tasks/main.yml @@ -1 +1 @@ -- include: common.yml +- include_tasks: common.yml diff --git a/playbooks/roles/ssl/defaults/main.yml b/playbooks/roles/ssl/defaults/main.yml index 7bfd86fa..1ce44e97 100644 --- a/playbooks/roles/ssl/defaults/main.yml +++ b/playbooks/roles/ssl/defaults/main.yml @@ -4,7 +4,7 @@ ssl_cert_country: 'US' ssl_cert_locality: 'Seattle' ssl_cert_organization: 'Oracle Cloud' ssl_cert_state: 'WA' -ssl_cert_altname: 'bastion.cluster' +ssl_cert_altname: 'controller.cluster' ssl_cert_days: '3650' diff --git a/playbooks/roles/ssl/tasks/debian.yml b/playbooks/roles/ssl/tasks/debian.yml index c9f349c7..6d6fac36 100644 --- a/playbooks/roles/ssl/tasks/debian.yml +++ b/playbooks/roles/ssl/tasks/debian.yml @@ -41,7 +41,7 @@ dest: '{{ ssl_cert_path }}/san.conf' mode: '0660' - - name: Create a certificate request for bastion + - name: Create a certificate request for controller command: > openssl req -new -nodes -sha512 -subj '/C={{ ssl_cert_country }}/ST={{ ssl_cert_state }}/L={{ ssl_cert_locality }}/O={{ ssl_cert_organization }}/CN={{ ansible_fqdn }}' diff --git a/playbooks/roles/ssl/tasks/el.yml b/playbooks/roles/ssl/tasks/el.yml index 66c05cbc..7b0ddc8a 100644 --- a/playbooks/roles/ssl/tasks/el.yml +++ b/playbooks/roles/ssl/tasks/el.yml @@ -41,7 +41,7 @@ dest: '{{ ssl_cert_path }}/san.conf' mode: '0660' - - name: Create a certificate request for bastion + - name: Create a certificate request for controller command: > openssl req -new -nodes -sha512 -subj '/C={{ ssl_cert_country }}/ST={{ ssl_cert_state }}/L={{ ssl_cert_locality }}/O={{ ssl_cert_organization }}/CN={{ ansible_fqdn }}' diff --git a/playbooks/roles/ssl/tasks/main.yml b/playbooks/roles/ssl/tasks/main.yml index 150216f8..2c4e3a98 100644 --- a/playbooks/roles/ssl/tasks/main.yml +++ b/playbooks/roles/ssl/tasks/main.yml @@ -1,5 +1,5 @@ -- include: el.yml +- include_tasks: el.yml when: ansible_os_family == 'RedHat' -- include: debian.yml +- include_tasks: debian.yml when: ansible_distribution == 'Ubuntu' \ No newline at end of file diff --git a/playbooks/roles/sssd/tasks/main.yml b/playbooks/roles/sssd/tasks/main.yml index acad08c2..0456de77 100644 --- a/playbooks/roles/sssd/tasks/main.yml +++ b/playbooks/roles/sssd/tasks/main.yml @@ -1,11 +1,11 @@ - include_vars: /opt/oci-hpc/playbooks/roles/openldap/vars/debian_vars.yml when: ansible_distribution == 'Ubuntu' -- include: el-7.yml +- include_tasks: el-7.yml when: ansible_os_family == 'RedHat' and ansible_distribution_major_version == '7' -- include: el-8.yml +- include_tasks: el-8.yml when: ansible_os_family == 'RedHat' and ansible_distribution_major_version == '8' -- include: debian.yml +- include_tasks: debian.yml when: ansible_distribution == 'Ubuntu' \ No newline at end of file diff --git a/playbooks/roles/sssd/templates/sssd.conf.j2 b/playbooks/roles/sssd/templates/sssd.conf.j2 index 928b9986..9f3104bd 100644 --- a/playbooks/roles/sssd/templates/sssd.conf.j2 +++ b/playbooks/roles/sssd/templates/sssd.conf.j2 @@ -20,7 +20,7 @@ access_provider = ldap chpass_provider = ldap cache_credentials = true entry_cache_timeout = 600 -ldap_uri = ldaps://{{ hostvars[groups['bastion'][0]]['ansible_fqdn'] }} +ldap_uri = ldaps://{{ hostvars[groups['controller'][0]]['ansible_fqdn'] }} ldap_search_base = dc=local ldap_network_timeout = 30 ldap_access_order = expire diff --git a/playbooks/roles/sssd/templates/sssd_ubuntu.conf.j2 b/playbooks/roles/sssd/templates/sssd_ubuntu.conf.j2 index 10a81eb7..23918c02 100644 --- a/playbooks/roles/sssd/templates/sssd_ubuntu.conf.j2 +++ b/playbooks/roles/sssd/templates/sssd_ubuntu.conf.j2 @@ -10,7 +10,7 @@ access_provider = ldap chpass_provider = ldap cache_credentials = true entry_cache_timeout = 600 -ldap_uri = ldaps://{{ hostvars[groups['bastion'][0]]['ansible_fqdn'] }} +ldap_uri = ldaps://{{ hostvars[groups['controller'][0]]['ansible_fqdn'] }} ldap_search_base = dc=local ldap_network_timeout = 30 ldap_access_order = expire diff --git a/playbooks/roles/telegraf/tasks/common.yml b/playbooks/roles/telegraf/tasks/common.yml index e0904f7b..a1e079bf 100644 --- a/playbooks/roles/telegraf/tasks/common.yml +++ b/playbooks/roles/telegraf/tasks/common.yml @@ -1,10 +1,10 @@ --- - name: Create database - shell: "python3 -c \"import influxdb; influxdb.InfluxDBClient(host='{{ hostvars[groups['bastion'][0]]['ansible_fqdn'] }}', port=8086).create_database('telegraph')\"" + shell: "python3 -c \"import influxdb; influxdb.InfluxDBClient(host='{{ hostvars[groups['controller'][0]]['ansible_fqdn'] }}', port=8086).create_database('telegraph')\"" #- name: Create database # influxdb_database: -# hostname: "{{ hostvars[groups['bastion'][0]]['ansible_fqdn'] }}" +# hostname: "{{ hostvars[groups['controller'][0]]['ansible_fqdn'] }}" # database_name: "telegraf" # run_once: true diff --git a/playbooks/roles/telegraf/tasks/main.yml b/playbooks/roles/telegraf/tasks/main.yml index b1d4a1f1..244bd257 100755 --- a/playbooks/roles/telegraf/tasks/main.yml +++ b/playbooks/roles/telegraf/tasks/main.yml @@ -1,2 +1,2 @@ -- include: common.yml +- include_tasks: common.yml when: ansible_os_family == 'RedHat' or ansible_os_family == 'Debian' diff --git a/playbooks/roles/telegraf/templates/influxdb.conf.j2 b/playbooks/roles/telegraf/templates/influxdb.conf.j2 index 7559a0ed..cb092eaf 100755 --- a/playbooks/roles/telegraf/templates/influxdb.conf.j2 +++ b/playbooks/roles/telegraf/templates/influxdb.conf.j2 @@ -1,2 +1,2 @@ [[outputs.influxdb]] - urls = ["http://{{ hostvars[groups['bastion'][0]]['ansible_fqdn'] }}:8086"] + urls = ["http://{{ hostvars[groups['controller'][0]]['ansible_fqdn'] }}:8086"] diff --git a/playbooks/roles/tuned/tasks/main.yml b/playbooks/roles/tuned/tasks/main.yml index 15793bee..597898be 100644 --- a/playbooks/roles/tuned/tasks/main.yml +++ b/playbooks/roles/tuned/tasks/main.yml @@ -1,2 +1,2 @@ - - include: el-7.yml + - include_tasks: el-7.yml when: ansible_os_family == 'RedHat' and ansible_distribution_major_version == '7' and (shape == 'BM.GPU.B4.8' or shape == 'BM.GPU4.8' or shape == 'BM.GPU.A100-v2.8' or shape == 'BM.GPU.H100.8') diff --git a/playbooks/roles/yaml/tasks/main.yml b/playbooks/roles/yaml/tasks/main.yml index 4229a70b..e4dc405a 100755 --- a/playbooks/roles/yaml/tasks/main.yml +++ b/playbooks/roles/yaml/tasks/main.yml @@ -1,4 +1,4 @@ -- include: el.yml +- include_tasks: el.yml when: ansible_os_family == 'RedHat' -- include: ubuntu.yml +- include_tasks: ubuntu.yml when: ansible_os_family == 'Debian' \ No newline at end of file diff --git a/playbooks/site.yml b/playbooks/site.yml index 3bb6f837..e869fd36 100644 --- a/playbooks/site.yml +++ b/playbooks/site.yml @@ -19,6 +19,12 @@ name: fix_broken when: ansible_os_family == 'Debian' +- hosts: controller, slurm_backup + become: true + tasks: + - include_role: + name: yaml + - hosts: all become: true vars: @@ -50,9 +56,6 @@ become: true gather_facts: true tasks: - - include_role: - name: cloud-agent_update - when: cluster_network|bool and use_compute_agent|default(false)|bool - include_role: name: oci-cn-auth when: cluster_network|bool and not use_compute_agent|default(false)|bool @@ -62,7 +65,7 @@ - include_role: name: nvidia_peermem -- hosts: bastion +- hosts: controller become: true vars: export_path: "/home" @@ -73,7 +76,7 @@ name: nfs-server when: home_nfs|bool and (not home_fss|bool) -- hosts: bastion +- hosts: controller become: true vars: tmp_home: "/tmp/home_tmp/" @@ -92,7 +95,7 @@ name: fss-home when: add_nfs|bool and home_fss|bool -- hosts: bastion, slurm_backup, login +- hosts: controller, slurm_backup, login become: true tasks: - include_role: @@ -108,7 +111,7 @@ name: passwords -- hosts: bastion +- hosts: controller become: true vars: @@ -124,7 +127,7 @@ when: ldap|default(true)|bool # configure if instance_principal is False -- hosts: bastion +- hosts: controller become: true tasks: - include_role: @@ -143,7 +146,7 @@ name: nfs-client vars: local_path: "/home" - export_host: "{{ hostvars[groups['bastion'][0]]['ansible_default_ipv4']['address'] }}" + export_host: "{{ hostvars[groups['controller'][0]]['ansible_default_ipv4']['address'] }}" export_path: "/home" options: "defaults,noatime,bg,timeo=100,ac,actimeo=120,nocto,rsize=1048576,wsize=1048576,nolock,local_lock={{ lock }},mountproto=tcp,sec=sys,_netdev" lock: "all" @@ -158,17 +161,17 @@ lock: "none" when: home_nfs|bool and home_fss|bool -- hosts: bastion +- hosts: controller become: true vars: export_path: "{{ cluster_nfs_path }}" export_name: "cluster" local_path: "/export/cluster" - iscsi_ip: "{{ bastion_mount_ip }}" + iscsi_ip: "{{ controller_mount_ip }}" tasks: - include_role: name: iscsi - when: bastion_block|default(false)|bool + when: controller_block|default(false)|bool - include_role: name: nfs-server when: cluster_nfs|default(true)|bool @@ -178,12 +181,12 @@ - hosts: slurm_backup become: true vars: - iscsi_ip: "{{ bastion_mount_ip }}" + iscsi_ip: "{{ controller_mount_ip }}" local_path: "/mnt/nfs_backup" tasks: - include_role: name: iscsi - when: bastion_block|default(false)|bool + when: controller_block|default(false)|bool - hosts: login become: true @@ -218,7 +221,7 @@ name: nfs-client vars: local_path: "{{ cluster_nfs_path }}" - export_host: "{{ hostvars[groups['bastion'][0]]['ansible_default_ipv4']['address'] }}" + export_host: "{{ hostvars[groups['controller'][0]]['ansible_default_ipv4']['address'] }}" options: "defaults,noatime,bg,timeo=100,ac,actimeo=120,nocto,rsize=1048576,wsize=1048576,nolock,local_lock={{ lock }},mountproto=tcp,sec=sys,_netdev" export_path: "/export/cluster" lock: "all" @@ -258,12 +261,9 @@ - include_role: name: hyperthreading when: not hyperthreading|default(false)|bool - -- hosts: bastion, slurm_backup - become: true - tasks: - include_role: - name: yaml + name: nccl-conf + when: cluster_network|bool - hosts: all tasks: @@ -274,7 +274,7 @@ name: telegraf when: monitoring|default(false)|bool -- hosts: bastion +- hosts: controller tasks: - include_role: name: grafana diff --git a/playbooks/slurm_config.yml b/playbooks/slurm_config.yml index bb3f6995..dce70f01 100755 --- a/playbooks/slurm_config.yml +++ b/playbooks/slurm_config.yml @@ -1,4 +1,4 @@ -- hosts: bastion,slurm_backup,compute,login +- hosts: controller,slurm_backup,compute,login gather_facts: true vars: destroy: false diff --git a/samples/NCCL_readme b/samples/NCCL_readme index 9279fd69..276df6e1 100644 --- a/samples/NCCL_readme +++ b/samples/NCCL_readme @@ -4,7 +4,7 @@ chmod 775 /opt/oci-hpc/samples/prep_sample_files.sh SSH to one of the compute nodes and run: ~/compile.sh -From the bastion, you can edit the third line of /home/opc/nccl_run_allreduce.sbatch with the number of nodes that you would like to test on: +From the controller, you can edit the third line of /home/opc/nccl_run_allreduce.sbatch with the number of nodes that you would like to test on: sbatch /home/opc/nccl_run_allreduce.sbatch Look at the last line of the log for bandwidth. diff --git a/samples/gpu/H100-topology-container.xml b/samples/gpu/H100-topology-kubernetes.xml similarity index 100% rename from samples/gpu/H100-topology-container.xml rename to samples/gpu/H100-topology-kubernetes.xml diff --git a/samples/gpu/nccl_run_allreduce_H100.sbatch b/samples/gpu/nccl_run_allreduce_H100.sbatch index 830870c4..efef481e 100644 --- a/samples/gpu/nccl_run_allreduce_H100.sbatch +++ b/samples/gpu/nccl_run_allreduce_H100.sbatch @@ -43,8 +43,7 @@ fi --bind-to numa \ -npernode 8 \ --mca coll ^hcoll \ - -x NCCL_CROSS_NIC=0 \ - -x NCCL_SOCKET_NTHREADS=16 \ + -x NCCL_CROSS_NIC=1 \ -x NCCL_DEBUG=WARN \ -x NCCL_CUMEM_ENABLE=0 \ -x NCCL_IB_SPLIT_DATA_ON_QPS=0 \ @@ -60,9 +59,11 @@ fi -x UCX_NET_DEVICES=${var_UCX_NET_DEVICES} \ -x RX_QUEUE_LEN=8192 \ -x IB_RX_QUEUE_LEN=8192 \ + -x NCCL_BUFFSIZE=16777216 \ -x NCCL_SOCKET_IFNAME=eth0 \ - -x NCCL_ALGO=auto \ -x NCCL_IGNORE_CPU_AFFINITY=1 \ -x NCCL_IB_HCA="${var_NCCL_IB_HCA}" \ - -x NCCL_TOPO_FILE=~/H100-topology.xml \ - --np $((SLURM_NNODES*SLURM_NTASKS_PER_NODE)) --hostfile $MACHINEFILE /opt/oci-hpc/nccl-test/build/all_reduce_perf -b1G -e10G -i$((1024*1024*1024*9)) -n 100 \ No newline at end of file + --np $((SLURM_NNODES*SLURM_NTASKS_PER_NODE)) --hostfile $MACHINEFILE /opt/oci-hpc/nccl-test/build/all_reduce_perf -b 1G -e 16G -f 2 -g 1 + + # If NCCL version is lower than 2.20.3, it is recommended to use the topology filefor optimal performances + # -x NCCL_TOPO_FILE=~/H100-topology.xml \ \ No newline at end of file diff --git a/samples/gpu/nccl_run_allreduce_H100.sh b/samples/gpu/nccl_run_allreduce_H100.sh index 520f125d..2fd714ea 100644 --- a/samples/gpu/nccl_run_allreduce_H100.sh +++ b/samples/gpu/nccl_run_allreduce_H100.sh @@ -57,8 +57,7 @@ do --bind-to numa \ -npernode 8 \ --mca coll ^hcoll \ - -x NCCL_CROSS_NIC=0 \ - -x NCCL_SOCKET_NTHREADS=16 \ + -x NCCL_CROSS_NIC=1 \ -x NCCL_DEBUG=WARN \ -x NCCL_CUMEM_ENABLE=0 \ -x NCCL_IB_SPLIT_DATA_ON_QPS=0 \ @@ -74,14 +73,17 @@ do -x UCX_NET_DEVICES=${var_UCX_NET_DEVICES} \ -x RX_QUEUE_LEN=8192 \ -x IB_RX_QUEUE_LEN=8192 \ + -x NCCL_BUFFSIZE=16777216 \ -x NCCL_SOCKET_IFNAME=eth0 \ - -x NCCL_ALGO=auto \ -x NCCL_IGNORE_CPU_AFFINITY=1 \ -x NCCL_IB_HCA="${var_NCCL_IB_HCA}" \ -x NCCL_TOPO_FILE=~/H100-topology.xml \ - --np $np --hostfile $hostfile /opt/oci-hpc/nccl-test/build/all_reduce_perf -b1G -e10G -i$((1024*1024*1024*9)) -n 100 >> $logfile + --np $np --hostfile $hostfile /opt/oci-hpc/nccl-test/build/all_reduce_perf -b 1G -e 16G -f 2 -g 1 >> $logfile tail -n 32 $logfile +done -done \ No newline at end of file + + # If NCCL version is lower than 2.20.3, it is recommended to use the topology filefor optimal performances + # -x NCCL_TOPO_FILE=~/H100-topology.xml \ \ No newline at end of file diff --git a/samples/gpu/nccl_run_allreduce_containers_H100.sbatch b/samples/gpu/nccl_run_allreduce_containers_H100.sbatch new file mode 100644 index 00000000..d46edd62 --- /dev/null +++ b/samples/gpu/nccl_run_allreduce_containers_H100.sbatch @@ -0,0 +1,79 @@ +#!/bin/bash +#SBATCH --job-name=nccl-allreduce-slurm-containers +#SBATCH --nodes=2 +#SBATCH --gpus-per-node=8 +#SBATCH --ntasks-per-node=8 +#SBATCH --exclusive +export PMI_DEBUG=1 + +cd /nfs/cluster +mkdir $SLURM_JOB_ID +cd $SLURM_JOB_ID + +MACHINEFILE="hostfile" + +scontrol show hostnames $SLURM_JOB_NODELIST > $MACHINEFILE +echo MACHINEFILE +cat $MACHINEFILE + +source /etc/os-release + +MPIVARS_PATH=`ls /usr/mpi/gcc/openmpi-*/bin/mpivars.sh` + +if [[ "$MPIVARS_PATH" == "" ]]; then + MPIVARS_PATH=`ls /opt/openmpi-*/bin/mpivars.sh` +fi + +if [[ "$MPIVARS_PATH" == "" ]]; then + echo "Could not find MPIPATH"; exit; fi + +source $MPIVARS_PATH +LOCAL_MPI=${MPIVARS_PATH%/*} + +shape=`curl -sH "Authorization: Bearer Oracle" -L http://169.254.169.254/opc/v2/instance/ | jq .shape` +if [ $shape == \"BM.GPU.H100.8\" ] +then + var_UCX_NET_DEVICES=eth0 +else + echo "Use the appropriate nccl test run script for non H100 nodes" +fi + +export NCCL_CROSS_NIC=0 \ + NCCL_SOCKET_NTHREADS=16 \ + NCCL_DEBUG=WARN \ + NCCL_CUMEM_ENABLE=0 \ + NCCL_IB_SPLIT_DATA_ON_QPS=0 \ + NCCL_IB_QPS_PER_CONNECTION=16 \ + NCCL_IB_GID_INDEX=3 \ + NCCL_IB_TC=41 \ + NCCL_IB_SL=0 \ + NCCL_IB_TIMEOUT=22 \ + NCCL_NET_PLUGIN=none \ + NCCL_SOCKET_IFNAME=eth0 \ + NCCL_IGNORE_CPU_AFFINITY=1 \ + NCCL_IB_HCA="=mlx5_0,mlx5_1,mlx5_3,mlx5_4,mlx5_5,mlx5_6,mlx5_7,mlx5_8,mlx5_9,mlx5_10,mlx5_12,mlx5_13,mlx5_14,mlx5_15,mlx5_16,mlx5_17" \ + NCCL_TOPO_FILE=/nfs/cluster/H100-topology.xml \ + HCOLL_ENABLE_MCAST_ALL=0 \ + coll_hcoll_enable=0 \ + UCX_TLS=tcp \ + UCX_NET_DEVICES=${var_UCX_NET_DEVICES} \ + RX_QUEUE_LEN=8192 \ + IB_RX_QUEUE_LEN=8192 \ + OMPI_MCA_coll=^hcoll + +env | grep "SLURMD_NODENAME=" +USER=`whoami` + +CONTAINER_IMAGE="/home/ubuntu/nvcr.io+nvidia+pytorch+24.01-py3.sqsh" +CONTAINER_MOUNTS="/opt/oci-hpc/nccl-test:/nccl,$LOCAL_MPI:$LOCAL_MPI,/nfs/cluster:/nfs/cluster" +echo $LOCAL_MPI +echo $MPIVARS_PATH + +srun --mpi=pmi2 --gpus-per-node=$SBATCH_GPUS_PER_NODE \ + --ntasks-per-node=$SLURM_NTASKS_PER_NODE \ + --container-image=$CONTAINER_IMAGE \ + --container-mounts=$CONTAINER_MOUNTS \ + bash -c " + source $MPIVARS_PATH && + /nccl/build/all_reduce_perf -b 1G -e 16G -f 2 -g 1 + " \ No newline at end of file diff --git a/samples/gpu/no_ncclparam_nccl_run_allreduce.sbatch b/samples/gpu/no_ncclparam_nccl_run_allreduce.sbatch new file mode 100644 index 00000000..caa15c6f --- /dev/null +++ b/samples/gpu/no_ncclparam_nccl_run_allreduce.sbatch @@ -0,0 +1,63 @@ +#!/bin/bash +#SBATCH --job-name=nccl-allreduce-slurm +#SBATCH --nodes=2 +#SBATCH --gpus-per-node=8 +#SBATCH --ntasks-per-node=8 +#SBATCH --exclusive +export PMI_DEBUG=1 + + +cd /nfs/cluster +mkdir $SLURM_JOB_ID +cd $SLURM_JOB_ID + +MACHINEFILE="hostfile" +ORDEREDMACHINEFILE="ordered_hostfile_system_name" +ORDEREDRANKMACHINEFILE="rankfile_system_name" + +scontrol show hostnames $SLURM_JOB_NODELIST > $MACHINEFILE +echo MACHINEFILE +cat $MACHINEFILE + +source /etc/os-release +if [ $ID == "ol" ] || [ $ID == "centos" ] ; then + python3 /home/opc/node_ordering_by_rack.py --input_file $MACHINEFILE > /dev/null +elif [ $ID == "debian" ] || [ $ID == "ubuntu" ] ; then + python3 /home/ubuntu/node_ordering_by_rack.py --input_file $MACHINEFILE > /dev/null +fi + + +echo ORDEREDMACHINEFILE +cat $ORDEREDMACHINEFILE +echo ORDEREDRANKMACHINEFILE +cat $ORDEREDRANKMACHINEFILE + +mpivars_path=`ls /usr/mpi/gcc/openmpi-*/bin/mpivars.sh` + +if [[ "$mpivars_path" == "" ]]; then + mpivars_path=`ls /opt/openmpi-*/bin/mpivars.sh` +fi + +if [[ "$mpivars_path" == "" ]]; then + echo "Could not find MPIPATH"; exit; fi + +source $mpivars_path + +shape=`curl -sH "Authorization: Bearer Oracle" -L http://169.254.169.254/opc/v2/instance/ | jq .shape` +if [ $shape == \"BM.GPU.B4.8\" ] || [ $shape == \"BM.GPU.A100-v2.8\" ] +then + var_UCX_NET_DEVICES=mlx5_0:1 +elif [ $shape == \"BM.GPU4.8\" ] +then + var_UCX_NET_DEVICES=mlx5_4:1 +fi + + mpirun --mca pml ucx \ + --bind-to numa \ + --mca coll ^hcoll \ + -x UCX_TLS=ud,self,sm \ + -x UCX_NET_DEVICES=${var_UCX_NET_DEVICES} \ + -x HCOLL_ENABLE_MCAST_ALL=0 \ + -x coll_hcoll_enable=0 \ + -x NCCL_ALGO=Ring \ + --np $((SLURM_NNODES*SLURM_NTASKS_PER_NODE)) --rankfile $ORDEREDRANKMACHINEFILE /opt/oci-hpc/nccl-test/build/all_reduce_perf -b1G -e10G -i$((1024*1024*1024*9)) -n 100 diff --git a/samples/gpu/no_ncclparam_nccl_run_allreduce.sh b/samples/gpu/no_ncclparam_nccl_run_allreduce.sh new file mode 100644 index 00000000..8fa98a1e --- /dev/null +++ b/samples/gpu/no_ncclparam_nccl_run_allreduce.sh @@ -0,0 +1,85 @@ +#!/bin/bash +set -e + +# number of times to run the nccl test to stress the GPUs and RDMA network. This is different from -n iterations parameter of nccl allreduce which is set below using $iter +max=$1 + +# This assume, the hostfile passed is already ordered based on their rackId +if [ -n "$2" ]; then + hostfile=$2 +else + hostfile="/tmp/ordered_hostfile_system_name" +fi + +ORDEREDMACHINEFILE="ordered_hostfile_system_name" +ORDEREDRANKMACHINEFILE="rankfile_system_name" +echo INPUTFILE +cat $hostfile + +# will generate rack-aware ordered host file +source /etc/os-release +if [ $ID == "ol" ] || [ $ID == "centos" ] ; then + python3 /home/opc/node_ordering_by_rack.py --input_file $hostfile > /dev/null +elif [ $ID == "debian" ] || [ $ID == "ubuntu" ] ; then + python3 /home/ubuntu/node_ordering_by_rack.py --input_file $hostfile > /dev/null +fi + +hostfile=$ORDEREDMACHINEFILE +rankfile=$ORDEREDRANKMACHINEFILE + +echo ORDEREDMACHINEFILE +cat $ORDEREDMACHINEFILE +echo ORDEREDRANKMACHINEFILE +cat $ORDEREDRANKMACHINEFILE + +# The number of GPUs to use for the test. Has to be multiplier of 8. If not passed, all GPUs will be used. +if [ -n "$3" ]; then + np=$3 +else + np=$((`less $hostfile | wc -l` * 8 )) +fi + +logfile="nccl_run_allreduce.sh.log" + +for x in $(seq 1 1 $max) +do + + echo $x + echo $x >> $logfile + date >> $logfile + + rankfile=$rankfile; np=$np ; iter=20; + + mpivars_path=`ls /usr/mpi/gcc/openmpi-*/bin/mpivars.sh` + source $mpivars_path + + if [[ "$mpivars_path" == "" ]]; then echo "Could not find MPIPATH"; exit; fi + +first_node=`head $hostfile -n 1` +shape=`ssh $first_node 'curl -sH "Authorization: Bearer Oracle" -L http://169.254.169.254/opc/v2/instance/' | jq .shape` +if [ $shape == \"BM.GPU.B4.8\" ] || [ $shape == \"BM.GPU.A100-v2.8\" ] +then + var_UCX_NET_DEVICES=mlx5_0:1 +elif [ $shape == \"BM.GPU4.8\" ] +then + var_UCX_NET_DEVICES=mlx5_4:1 +fi + + # final version + # all NCCL parameters are at /etc/nccl.conf on each compute node. + mpirun --mca pml ucx \ + --bind-to numa \ + --mca coll ^hcoll \ + -x UCX_TLS=ud,self,sm \ + -x UCX_NET_DEVICES=${var_UCX_NET_DEVICES} \ + -x HCOLL_ENABLE_MCAST_ALL=0 \ + -x coll_hcoll_enable=0 \ + -x NCCL_ALGO=Ring \ + --np $np --rankfile $rankfile /opt/oci-hpc/nccl-test/build/all_reduce_perf -b1G -e10G -i$((1024*1024*1024*9)) -n $iter >> $logfile + + tail -n 32 $logfile + + +done + + diff --git a/samples/gpu/no_ncclparam_nccl_run_allreduce_H100.sbatch b/samples/gpu/no_ncclparam_nccl_run_allreduce_H100.sbatch new file mode 100644 index 00000000..591a75c1 --- /dev/null +++ b/samples/gpu/no_ncclparam_nccl_run_allreduce_H100.sbatch @@ -0,0 +1,52 @@ +#!/bin/bash +#SBATCH --job-name=nccl-allreduce-slurm +#SBATCH --nodes=2 +#SBATCH --gpus-per-node=8 +#SBATCH --ntasks-per-node=8 +#SBATCH --exclusive +export PMI_DEBUG=1 + + +cd /nfs/cluster +mkdir $SLURM_JOB_ID +cd $SLURM_JOB_ID + +MACHINEFILE="hostfile" + +scontrol show hostnames $SLURM_JOB_NODELIST > $MACHINEFILE +echo MACHINEFILE +cat $MACHINEFILE + +source /etc/os-release + +mpivars_path=`ls /usr/mpi/gcc/openmpi-*/bin/mpivars.sh` + +if [[ "$mpivars_path" == "" ]]; then + mpivars_path=`ls /opt/openmpi-*/bin/mpivars.sh` +fi + +if [[ "$mpivars_path" == "" ]]; then + echo "Could not find MPIPATH"; exit; fi + +source $mpivars_path + +shape=`curl -sH "Authorization: Bearer Oracle" -L http://169.254.169.254/opc/v2/instance/ | jq .shape` +if [ $shape == \"BM.GPU.H100.8\" ] +then + var_UCX_NET_DEVICES=eth0 +else + echo "Use the appropriate nccl test run script for non H100 nodes" +fi + + # all NCCL parameters are at /etc/nccl.conf on each compute node. + mpirun --mca pml ucx \ + --bind-to numa \ + -npernode 8 \ + --mca coll ^hcoll \ + -x HCOLL_ENABLE_MCAST_ALL=0 \ + -x coll_hcoll_enable=0 \ + -x UCX_TLS=tcp \ + -x UCX_NET_DEVICES=${var_UCX_NET_DEVICES} \ + -x RX_QUEUE_LEN=8192 \ + -x IB_RX_QUEUE_LEN=8192 \ + --np $((SLURM_NNODES*SLURM_NTASKS_PER_NODE)) --hostfile $MACHINEFILE /opt/oci-hpc/nccl-test/build/all_reduce_perf -b 1G -e 16G -f 2 -g 1 \ No newline at end of file diff --git a/samples/gpu/no_ncclparam_nccl_run_allreduce_H100.sh b/samples/gpu/no_ncclparam_nccl_run_allreduce_H100.sh new file mode 100644 index 00000000..b83307b2 --- /dev/null +++ b/samples/gpu/no_ncclparam_nccl_run_allreduce_H100.sh @@ -0,0 +1,71 @@ +#!/bin/bash +set -e + +# number of times to run the nccl test to stress the GPUs and RDMA network. +max=$1 + +# This assume, the hostfile passed is already ordered based on their rackId +if [ -n "$2" ]; then + hostfile=$2 +else + hostfile="/etc/opt/oci-hpc/hostfile.tcp" +fi + +echo INPUTFILE +cat $hostfile + +# The number of GPUs to use for the test. Has to be multiplier of 8. If not passed, all GPUs will be used. +if [ -n "$3" ]; then + np=$3 +else + np=$((`less $hostfile | wc -l` * 8 )) +fi + +logfile="nccl_run_allreduce.sh.log" + +for x in $(seq 1 1 $max) +do + + echo $x + echo $x >> $logfile + date >> $logfile + + hostfile=$hostfile; np=$np; + + mpivars_path=`ls /usr/mpi/gcc/openmpi-*/bin/mpivars.sh` + + if [[ "$mpivars_path" == "" ]]; then + mpivars_path=`ls /opt/openmpi-*/bin/mpivars.sh` + fi + + if [[ "$mpivars_path" == "" ]]; then + echo "Could not find MPIPATH"; exit; fi + + source $mpivars_path + + first_node=`head $hostfile -n 1` + shape=`ssh $first_node 'curl -sH "Authorization: Bearer Oracle" -L http://169.254.169.254/opc/v2/instance/' | jq .shape` + if [ $shape == \"BM.GPU.H100.8\" ] + then + var_UCX_NET_DEVICES=eth0 + else + echo "Use the appropriate nccl test run script for non H100 nodes" + fi + + # all NCCL parameters are at /etc/nccl.conf on each compute node. + mpirun --mca pml ucx \ + --bind-to numa \ + -npernode 8 \ + --mca coll ^hcoll \ + -x HCOLL_ENABLE_MCAST_ALL=0 \ + -x coll_hcoll_enable=0 \ + -x UCX_TLS=tcp \ + -x UCX_NET_DEVICES=${var_UCX_NET_DEVICES} \ + -x RX_QUEUE_LEN=8192 \ + -x IB_RX_QUEUE_LEN=8192 \ + --np $np --hostfile $hostfile /opt/oci-hpc/nccl-test/build/all_reduce_perf -b 1G -e 16G -f 2 -g 1 >> $logfile + + tail -n 32 $logfile + + +done \ No newline at end of file diff --git a/samples/gpu/no_ncclparam_tuner_nccl_run_allreduce.sbatch b/samples/gpu/no_ncclparam_tuner_nccl_run_allreduce.sbatch new file mode 100644 index 00000000..7de77737 --- /dev/null +++ b/samples/gpu/no_ncclparam_tuner_nccl_run_allreduce.sbatch @@ -0,0 +1,65 @@ +#!/bin/bash +#SBATCH --job-name=nccl-allreduce-slurm +#SBATCH --nodes=2 +#SBATCH --gpus-per-node=8 +#SBATCH --ntasks-per-node=8 +#SBATCH --exclusive +export PMI_DEBUG=1 + + +cd /nfs/cluster +mkdir $SLURM_JOB_ID +cd $SLURM_JOB_ID + +MACHINEFILE="hostfile" +ORDEREDMACHINEFILE="ordered_hostfile_system_name" +ORDEREDRANKMACHINEFILE="rankfile_system_name" + +scontrol show hostnames $SLURM_JOB_NODELIST > $MACHINEFILE +echo MACHINEFILE +cat $MACHINEFILE + +source /etc/os-release +if [ $ID == "ol" ] || [ $ID == "centos" ] ; then + python3 /home/opc/node_ordering_by_rack.py --input_file $hostfile > /dev/null + homedirectory=/home/opc +elif [ $ID == "debian" ] || [ $ID == "ubuntu" ] ; then + python3 /home/ubuntu/node_ordering_by_rack.py --input_file $hostfile > /dev/null + homedirectory=/home/ubuntu +fi + + +echo ORDEREDMACHINEFILE +cat $ORDEREDMACHINEFILE +echo ORDEREDRANKMACHINEFILE +cat $ORDEREDRANKMACHINEFILE + +mpivars_path=`ls /usr/mpi/gcc/openmpi-*/bin/mpivars.sh` + +if [[ "$mpivars_path" == "" ]]; then + mpivars_path=`ls /opt/openmpi-*/bin/mpivars.sh` +fi + +if [[ "$mpivars_path" == "" ]]; then + echo "Could not find MPIPATH"; exit; fi + +source $mpivars_path + +shape=`curl -sH "Authorization: Bearer Oracle" -L http://169.254.169.254/opc/v2/instance/ | jq .shape` +if [ $shape == \"BM.GPU.B4.8\" ] || [ $shape == \"BM.GPU.A100-v2.8\" ] +then + var_UCX_NET_DEVICES=mlx5_0:1 +elif [ $shape == \"BM.GPU4.8\" ] +then + var_UCX_NET_DEVICES=mlx5_4:1 +fi + + mpirun --mca pml ucx \ + --bind-to numa \ + --mca coll ^hcoll \ + -x UCX_TLS=ud,self,sm \ + -x UCX_NET_DEVICES=${var_UCX_NET_DEVICES} \ + -x HCOLL_ENABLE_MCAST_ALL=0 \ + -x coll_hcoll_enable=0 \ + -x NCCL_TUNER_PLUGIN=$homedirectory/libnccl-ocituner.so.1.0.1 \ + --np $((SLURM_NNODES*SLURM_NTASKS_PER_NODE)) --rankfile $ORDEREDRANKMACHINEFILE /opt/oci-hpc/nccl-test/build/all_reduce_perf -b1G -e10G -i$((1024*1024*1024*9)) -n 100 diff --git a/samples/gpu/no_ncclparam_tuner_nccl_run_allreduce.sh b/samples/gpu/no_ncclparam_tuner_nccl_run_allreduce.sh new file mode 100644 index 00000000..25a496e3 --- /dev/null +++ b/samples/gpu/no_ncclparam_tuner_nccl_run_allreduce.sh @@ -0,0 +1,87 @@ +#!/bin/bash +set -e + +# number of times to run the nccl test to stress the GPUs and RDMA network. This is different from -n iterations parameter of nccl allreduce which is set below using $iter +max=$1 + +# This assume, the hostfile passed is already ordered based on their rackId +if [ -n "$2" ]; then + hostfile=$2 +else + hostfile="/tmp/ordered_hostfile_system_name" +fi + +ORDEREDMACHINEFILE="ordered_hostfile_system_name" +ORDEREDRANKMACHINEFILE="rankfile_system_name" +echo INPUTFILE +cat $hostfile + +# will generate rack-aware ordered host file +source /etc/os-release +if [ $ID == "ol" ] || [ $ID == "centos" ] ; then + python3 /home/opc/node_ordering_by_rack.py --input_file $hostfile > /dev/null + homedirectory=/home/opc +elif [ $ID == "debian" ] || [ $ID == "ubuntu" ] ; then + python3 /home/ubuntu/node_ordering_by_rack.py --input_file $hostfile > /dev/null + homedirectory=/home/ubuntu +fi + +hostfile=$ORDEREDMACHINEFILE +rankfile=$ORDEREDRANKMACHINEFILE + +echo ORDEREDMACHINEFILE +cat $ORDEREDMACHINEFILE +echo ORDEREDRANKMACHINEFILE +cat $ORDEREDRANKMACHINEFILE + +# The number of GPUs to use for the test. Has to be multiplier of 8. If not passed, all GPUs will be used. +if [ -n "$3" ]; then + np=$3 +else + np=$((`less $hostfile | wc -l` * 8 )) +fi + +logfile="nccl_run_allreduce.sh.log" + +for x in $(seq 1 1 $max) +do + + echo $x + echo $x >> $logfile + date >> $logfile + + rankfile=$rankfile; np=$np ; iter=20; + + mpivars_path=`ls /usr/mpi/gcc/openmpi-*/bin/mpivars.sh` + source $mpivars_path + + if [[ "$mpivars_path" == "" ]]; then echo "Could not find MPIPATH"; exit; fi + +first_node=`head $hostfile -n 1` +shape=`ssh $first_node 'curl -sH "Authorization: Bearer Oracle" -L http://169.254.169.254/opc/v2/instance/' | jq .shape` +if [ $shape == \"BM.GPU.B4.8\" ] || [ $shape == \"BM.GPU.A100-v2.8\" ] +then + var_UCX_NET_DEVICES=mlx5_0:1 +elif [ $shape == \"BM.GPU4.8\" ] +then + var_UCX_NET_DEVICES=mlx5_4:1 +fi + + # final version + # all NCCL parameters are at /etc/nccl.conf on each compute node. + mpirun --mca pml ucx \ + --bind-to numa \ + --mca coll ^hcoll \ + -x UCX_TLS=ud,self,sm \ + -x UCX_NET_DEVICES=${var_UCX_NET_DEVICES} \ + -x HCOLL_ENABLE_MCAST_ALL=0 \ + -x coll_hcoll_enable=0 \ + -x NCCL_TUNER_PLUGIN=$homedirectory/libnccl-ocituner.so.1.0.1 \ + --np $np --rankfile $rankfile /opt/oci-hpc/nccl-test/build/all_reduce_perf -b1G -e10G -i$((1024*1024*1024*9)) -n $iter >> $logfile + + tail -n 32 $logfile + + +done + + diff --git a/samples/nfs/README.txt b/samples/nfs/README.txt index f08d289e..beec4742 100644 --- a/samples/nfs/README.txt +++ b/samples/nfs/README.txt @@ -2,7 +2,7 @@ Problem: When node running NFS needs to be terminated due to H/W failure, site.yml playbook fails, sudo umount /nfs/scratch hangs. Solution: -1. Manually change the ansible inventory file (/etc/ansible/hosts) on bastion. You will need to use sudo. +1. Manually change the ansible inventory file (/etc/ansible/hosts) on controller. You will need to use sudo. a. To replace the [nfs] group hostname with another node of the cluster to act as NFS server. Example: ansible_user=opc role=nfs b. If the node that was deleted is still there in [compute_configured] group, then remove it. diff --git a/samples/open-ldap/add-ldap-users.yml b/samples/open-ldap/add-ldap-users.yml index 73d15697..cb8da5eb 100644 --- a/samples/open-ldap/add-ldap-users.yml +++ b/samples/open-ldap/add-ldap-users.yml @@ -1,5 +1,5 @@ --- -- hosts: bastion +- hosts: controller become: true #vars: #ansible_remote_tmp: /tmp/ansible_remote_tmp diff --git a/schema.yaml b/schema.yaml index 037e685b..be651072 100755 --- a/schema.yaml +++ b/schema.yaml @@ -12,8 +12,8 @@ source: locale: "en" outputs: - bastion: - title: "Bastion Instance Public IP" + controller: + title: "controller Instance Public IP" type: copyableString visible: true @@ -27,15 +27,15 @@ variableGroups: - ${ldap} - title: "Headnode options" variables: - - ${bastion_ad} - - ${bastion_shape} - - ${bastion_ocpus} - - ${bastion_ocpus_denseIO_flex} - - ${bastion_custom_memory} - - ${bastion_memory} - - ${bastion_boot_volume_size} - - ${bastion_boot_volume_backup} - - ${bastion_object_storage_par} + - ${controller_ad} + - ${controller_shape} + - ${controller_ocpus} + - ${controller_ocpus_denseIO_flex} + - ${controller_custom_memory} + - ${controller_memory} + - ${controller_boot_volume_size} + - ${controller_boot_volume_backup} + - ${controller_object_storage_par} - title: "Compute node options" variables: - ${use_multiple_ads} @@ -64,6 +64,14 @@ variableGroups: - ${compute_image_compartment} - ${image} - ${image_ocid} + - ${BIOS} + - ${IOMMU} + - ${SMT} + - ${virt_instr} + - ${access_ctrl} + - ${numa_nodes_per_socket} + - ${percentage_of_cores_enabled} + - title: "Additional Login Node" variables: - ${login_node} @@ -115,15 +123,15 @@ variableGroups: - ${nfs_options} - ${fss_compartment} - ${fss_ad} - - title: "Advanced bastion options" + - title: "Advanced controller options" variables: - - ${use_marketplace_image_bastion} - - ${marketplace_listing_bastion} - - ${unsupported_bastion} - - ${bastion_image_compartment} - - ${custom_bastion_image} - - ${unsupported_bastion_image} - - ${bastion_username} + - ${use_marketplace_image_controller} + - ${marketplace_listing_controller} + - ${unsupported_controller} + - ${controller_image_compartment} + - ${custom_controller_image} + - ${unsupported_controller_image} + - ${controller_username} - title: "Advanced storage options" variables: - ${use_advanced} @@ -131,9 +139,9 @@ variableGroups: - ${home_fss} - ${use_cluster_nfs} - ${cluster_nfs_path} - - ${bastion_block} - - ${bastion_block_volume_size} - - ${bastion_block_volume_performance} + - ${controller_block} + - ${controller_block_volume_size} + - ${controller_block_volume_performance} - ${use_scratch_nfs} - ${scratch_nfs_path} - ${scratch_nfs_type_cluster} @@ -156,6 +164,8 @@ variableGroups: - ${private_subnet} - ${rdma_subnet} - ${additional_subnet} + - ${dns_entries} + - ${zone_name} - title: "Software" variables: - ${privilege_sudo} @@ -186,10 +196,10 @@ variableGroups: - ${ssh_cidr} - ${marketplace_source_images} - ${marketplace_version_id} - - ${bastion_boot_volume_backup_period} - - ${bastion_boot_volume_backup_retention_seconds} - - ${bastion_boot_volume_backup_time_zone} - - ${bastion_boot_volume_backup_type} + - ${controller_boot_volume_backup_period} + - ${controller_boot_volume_backup_retention_seconds} + - ${controller_boot_volume_backup_time_zone} + - ${controller_boot_volume_backup_type} visible: false - title: "Debug" variables: @@ -241,7 +251,7 @@ variables: default: false ldap: type: boolean - title: "Configure LDAP authentication from bastion" + title: "Configure LDAP authentication from controller" description: "When selected nodes will be configured to use LDAP authentication. User and group management can be performed using cluster commands." default: true cluster_name: @@ -253,22 +263,23 @@ variables: and: - ${use_custom_name} required: true - bastion_ad: + controller_ad: type: oci:identity:availabilitydomain:name dependsOn: compartmentId: ${targetCompartment} visible: complexExpression required: true - description: "Availability Domain for bastion host" + description: "Availability Domain for controller host" title: "Availability Domain" #default: ${ad} - bastion_shape: + controller_shape: + title: "Controller Shape" type: oci:core:instanceshape:name dependsOn: compartmentId: ${targetCompartment} required: true default: VM.Standard.E4.Flex - bastion_ocpus: + controller_ocpus: title: "Cores" type: integer description: Number of OCPU's for flex shape @@ -279,26 +290,26 @@ variables: and: - or: - eq: - - ${bastion_shape} + - ${controller_shape} - "VM.Standard.E3.Flex" - eq: - - ${bastion_shape} + - ${controller_shape} - "VM.Standard.E4.Flex" - eq: - - ${bastion_shape} + - ${controller_shape} - "VM.Standard.E5.Flex" - eq: - - ${bastion_shape} + - ${controller_shape} - "VM.Optimized3.Flex" - eq: - - ${bastion_shape} + - ${controller_shape} - "VM.Standard.A1.Flex" - eq: - - ${bastion_shape} + - ${controller_shape} - "VM.Standard3.Flex" required: true - bastion_ocpus_denseIO_flex: + controller_ocpus_denseIO_flex: title: "Cores" type: enum description: Number of OCPU's for Dense IO flex shape @@ -311,11 +322,14 @@ variables: and: - or: - eq: - - ${bastion_shape} + - ${controller_shape} - "VM.DenseIO.E4.Flex" + - eq: + - ${controller_shape} + - "VM.DenseIO.E5.Flex" required: true - bastion_custom_memory: + controller_custom_memory: title: Use custom memory size type: boolean default: false @@ -323,24 +337,24 @@ variables: and: - or: - eq: - - ${bastion_shape} + - ${controller_shape} - "VM.Standard.E3.Flex" - eq: - - ${bastion_shape} + - ${controller_shape} - "VM.Optimized3.Flex" - eq: - - ${bastion_shape} + - ${controller_shape} - "VM.Standard.E4.Flex" - eq: - - ${bastion_shape} + - ${controller_shape} - "VM.Standard.E5.Flex" - eq: - - ${bastion_shape} + - ${controller_shape} - "VM.Standard.A1.Flex" - eq: - - ${bastion_shape} + - ${controller_shape} - "VM.Standard3.Flex" - bastion_memory: + controller_memory: title: Memory in GBS type: integer description: Number of memory for flex shape. Minimum 1GB per core. @@ -352,42 +366,42 @@ variables: - and: - or: - eq: - - ${bastion_shape} + - ${controller_shape} - "VM.Standard.E3.Flex" - eq: - - ${bastion_shape} + - ${controller_shape} - "VM.Optimized3.Flex" - eq: - - ${bastion_shape} + - ${controller_shape} - "VM.Standard.E4.Flex" - eq: - - ${bastion_shape} + - ${controller_shape} - "VM.Standard.E5.Flex" - eq: - - ${bastion_shape} + - ${controller_shape} - "VM.Standard.A1.Flex" - eq: - - ${bastion_shape} + - ${controller_shape} - "VM.Standard3.Flex" - and: - - ${bastion_custom_memory} + - ${controller_custom_memory} required: true - bastion_object_storage_par: + controller_object_storage_par: title: Create Object Storage PAR description: "Create a PAR (i.e. Pre-Authenticated Request), so that user could use that PAR to upload monitoring metrics to Object Storage and share the URL with OCI service teams." type: boolean default: true - unsupported_bastion: + unsupported_controller: title: "Use unsupported image" - description: "Custom image ID for Bastion" + description: "Custom image ID for controller" type: boolean default: false visible: not: - - ${use_marketplace_image_bastion} + - ${use_marketplace_image_controller} - use_marketplace_image_bastion: + use_marketplace_image_controller: type: boolean title: "use marketplace image" description: "Use marketplace image, otherwise provide custom image OCID" @@ -395,7 +409,7 @@ variables: visible: true - marketplace_listing_bastion: + marketplace_listing_controller: type: enum title: "Image version" description: "Marketplace listing to use" @@ -403,74 +417,76 @@ variables: enum: - "HPC_OL7" - "HPC_OL8" - - "GPU_OL7" - - "GPU_OL8" + - "GPU_OL7_CUDA12.2" + - "GPU_OL8_CUDA12.2" + - "GPU_OL7_CUDA12.4" + - "GPU_OL8_CUDA12.4" default: "HPC_OL7" - visible: ${use_marketplace_image_bastion} + visible: ${use_marketplace_image_controller} - bastion_username: - title: "Default username for bastion" - description: "Custom image ID for Bastion" + controller_username: + title: "Default username for controller" + description: "Custom image ID for controller" type: string default: "opc" required: true visible: true - unsupported_bastion_image: + unsupported_controller_image: title: "Image OCID" - description: "Custom image ID for compute nodes. Please note that only Oracle Linux 7 and Ubuntu 20.04 are supported as bastion image at this moment." + description: "Custom image ID for compute nodes. Please note that only Oracle Linux 7 and Ubuntu 20.04 are supported as controller image at this moment." type: string required: true - visible: ${unsupported_bastion} + visible: ${unsupported_controller} default: "image.ocid" - bastion_image_compartment: - title: "bastion image compartment" + controller_image_compartment: + title: "controller image compartment" type: oci:identity:compartment:id default: ${targetCompartment} visible: and: - not: - - ${unsupported_bastion} + - ${unsupported_controller} - not: - - ${use_marketplace_image_bastion} + - ${use_marketplace_image_controller} required: true - custom_bastion_image: - title: "Bastion Image ID" - description: "Custom image ID for bastion nodes. Please note that only Oracle Linux 7, 8 and Ubuntu 20.04 are supported as bastion image at this moment. " + custom_controller_image: + title: "controller Image ID" + description: "Custom image ID for controller nodes. Please note that only Oracle Linux 7, 8 and Ubuntu 20.04 are supported as controller image at this moment. " type: oci:core:image:id dependsOn: - compartmentId: ${bastion_image_compartment} + compartmentId: ${controller_image_compartment} visible: and: - not: - - ${unsupported_bastion} + - ${unsupported_controller} - not: - - ${use_marketplace_image_bastion} + - ${use_marketplace_image_controller} required: true - bastion_boot_volume_size: + controller_boot_volume_size: type: integer required: true minimum: 50 title: "Size of the boot volume in GB" default: 100 - bastion_boot_volume_backup: + controller_boot_volume_backup: type: boolean title: "Enable boot volume backup" description: "Schedule: Daily, Type: Incremental, Start Time: 00:00 Regional Time, Retention: 90 days." default: true - bastion_block: + controller_block: type: boolean title: Additional block volume for shared space visible: and: - ${use_advanced} default: false - bastion_block_volume_size: + controller_block_volume_size: required: true type: integer title: "Size of the additional volume in GB" @@ -478,9 +494,9 @@ variables: visible: and: - and: - - ${bastion_block} + - ${controller_block} - ${use_advanced} - bastion_block_volume_performance: + controller_block_volume_performance: type: enum title: "Block volume performance" required: true @@ -492,11 +508,11 @@ variables: visible: and: - and: - - ${bastion_block} + - ${controller_block} - ${use_advanced} home_nfs: type: boolean - title: "shared NFS /home from bastion. To use FSS, make sure you created one or added NFS mount information" + title: "shared NFS /home from controller. To use FSS, make sure you created one or added NFS mount information" visible: and: - ${use_advanced} @@ -515,7 +531,7 @@ variables: use_cluster_nfs: type: boolean - title: "shared NFS volume from bastion" + title: "shared NFS volume from controller" visible: and: - ${use_advanced} @@ -642,6 +658,9 @@ variables: - eq: - ${instance_pool_shape} - "VM.DenseIO.E4.Flex" + - eq: + - ${instance_pool_shape} + - "VM.DenseIO.E5.Flex" required: true instance_pool_custom_memory: @@ -742,8 +761,10 @@ variables: enum: - "HPC_OL7" - "HPC_OL8" - - "GPU_OL7" - - "GPU_OL8" + - "GPU_OL7_CUDA12.2" + - "GPU_OL8_CUDA12.2" + - "GPU_OL7_CUDA12.4" + - "GPU_OL8_CUDA12.4" default: "HPC_OL7" visible: ${use_marketplace_image} @@ -752,6 +773,9 @@ variables: title: "use compute agent" description: "Select if your image has the OCA agent rather than the oci-cn-auth package. The new marketplace images need the compute agent enabled." default: true + visible: + not: + - ${use_marketplace_image} compute_image_compartment: title: "compute image compartment" @@ -810,6 +834,65 @@ variables: - and: - ${unsupported} + BIOS: + title: "Modify BIOS options" + description: "Make sure that the BIOS options are changeable for the specific shape selected" + type: boolean + default: false + visible: true + + IOMMU: + title: "IOMMU enabled" + type: boolean + default: false + visible: ${BIOS} + + SMT: + title: "SMT Enabled" + type: boolean + default: true + visible: ${BIOS} + + virt_instr: + title: "Virtualization instructions" + description: "Virtualization instructions include Secure Virtual Machine for AMD shapes or VT-x for Intel shapes" + type: boolean + default: false + visible: ${BIOS} + + access_ctrl: + title: "Access Control Service" + description: "Access control service lets the platform enforce PCIe device isolation" + type: boolean + default: false + visible: ${BIOS} + + numa_nodes_per_socket: + title: "Numa Node per Socket" + description: "NUMA Settings" + type: enum + enum: + - "Default" + - "NPS0" + - "NPS1" + - "NPS2" + - "NPS4" + default: "Default" + visible: ${BIOS} + + percentage_of_cores_enabled: + title: "Numa Node per Socket" + description: "NUMA Settings" + type: enum + enum: + - "Default" + - "25" + - "50" + - "75" + - "100" + default: "Default" + visible: ${BIOS} + use_advanced: type: boolean title: "Show advanced storage options" @@ -923,7 +1006,7 @@ variables: private_deployment: type: boolean title: "Deploy Master Node without a public IP" - description: "Deploy with no Public IP for the master node. 'Master Node Subnet' must be a Private subnet. This will require the creation of a bastion service, VPN or FastConnect to connect via ssh to the master node" + description: "Deploy with no Public IP for the master node. 'Master Node Subnet' must be a Private subnet. This will require the creation of a controller service, VPN or FastConnect to connect via ssh to the master node" default: false use_existing_vcn: type: boolean @@ -970,6 +1053,17 @@ variables: hidePublicSubnet: true visible: ${use_existing_vcn} required: true + dns_entries: + title: DNS entry + type: boolean + default: true + description: "Only available for a private zone" + zone_name: + title: Private Zone Name + description: "The zone needs to be private for the stack to be able to add entries" + type: string + visible: ${use_existing_vcn} + required: true vcn_subnet: type: string title: "VCN IP range" @@ -1042,7 +1136,7 @@ variables: title: "Create a back-up Slurm Controller" default: false required: true - description: "Add a second master of the same shape as the bastion as a back-up controller node. We recommend using a FSS to save the state and share between masters" + description: "Add a second master of the same shape as the controller as a back-up controller node. We recommend using a FSS to save the state and share between masters" visible: ${slurm} pyxis: @@ -1120,7 +1214,7 @@ variables: inst_prin: type: boolean title: "Use Instance Principal instead of configuration file" - description: "You will need to set a dynamic group and policy to allow the bastion to authenticate. This will not be created automatically." + description: "You will need to set a dynamic group and policy to allow the controller to authenticate. This will not be created automatically." default: true api_user_key: @@ -1302,7 +1396,7 @@ variables: type: boolean title: "Create a Mysql Service" default: false - description: "false will use the bastion as mysqlDB" + description: "false will use the controller as mysqlDB" visible: and: - ${autoscaling} @@ -1397,6 +1491,9 @@ variables: - eq: - ${login_shape} - "VM.DenseIO.E4.Flex" + - eq: + - ${login_shape} + - "VM.DenseIO.E5.Flex" - ${login_node} required: true @@ -1572,8 +1669,10 @@ variables: enum: - "HPC_OL7" - "HPC_OL8" - - "GPU_OL7" - - "GPU_OL8" + - "GPU_OL7_CUDA12.2" + - "GPU_OL8_CUDA12.2" + - "GPU_OL7_CUDA12.4" + - "GPU_OL8_CUDA12.4" default: "HPC_OL7" visible: and: diff --git a/scripts/collect_metadata/collect_metadata.py b/scripts/collect_metadata/collect_metadata.py new file mode 100644 index 00000000..aa860e6d --- /dev/null +++ b/scripts/collect_metadata/collect_metadata.py @@ -0,0 +1,149 @@ +import argparse +import os +import sys +import socket +import multiprocessing +import paramiko +import csv +import json + +specific_fieldnames = ['displayName','hostname', 'privateIp','networkBlockId','rackid', 'ociAdName','id'] + +def is_valid_file(parser, arg): + if not os.path.exists(arg): + parser.error(f"The file {arg} does not exist!") + else: + return arg + +def is_valid_hostname(parser, arg): + try: + socket.gethostbyname(arg) + return arg + except socket.error: + parser.error(f"Invalid hostname or IP address: {arg}") + +def json_to_stdout(flattened_results): + # Write JSON data to STDOUT + writer = csv.DictWriter(sys.stdout, fieldnames=specific_fieldnames) + writer.writeheader() + for data in flattened_results: + writer.writerow(data) + +def json_to_csv(flattened_results, csv_file): + # Get the specific fieldnames +# print("Content of result:", entries_data) + # Write JSON data to CSV + with open(csv_file, mode='w', newline='') as file: + writer = csv.DictWriter(file, fieldnames=specific_fieldnames) + writer.writeheader() + + for data in flattened_results: + writer.writerow(data) + +def process_entry(entry, username): + # Replace this with the path to your private key + ssh_key = "/home/"+username+"/.ssh/id_rsa" + + # Replace this with your SSH connection details + ssh_host = entry + ssh_user = username + + # Create SSH client + ssh_client = paramiko.SSHClient() + ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + + parsed_data_list = [] + + try: + # Connect to SSH server using key pair authentication + ssh_client.connect(ssh_host, username=ssh_user, key_filename=ssh_key) + + # Perform SSH operations here + stdin, stdout, stderr = ssh_client.exec_command('curl -H "Authorization: Bearer Oracle" -L http://169.254.169.254/opc/v2/instance/') + output = stdout.read().decode() + parsed_instance = json.loads(output) + + stdin, stdout, stderr = ssh_client.exec_command('curl -H "Authorization: Bearer Oracle" -L http://169.254.169.254/opc/v2/host/') + output = stdout.read().decode() + parsed_host = json.loads(output) + + stdin, stdout, stderr = ssh_client.exec_command('curl -H "Authorization: Bearer Oracle" -L http://169.254.169.254/opc/v2/vnics/') + output = stdout.read().decode() + list_of_vnics = json.loads(output) + first_vnic = list_of_vnics[0] + + parsed_data = {**parsed_instance, **parsed_host, **first_vnic} + + # Extract required fields from parsed_data + required_fields = specific_fieldnames + extracted_data = {field: parsed_data.get(field, "") for field in required_fields} + parsed_data_list.append(extracted_data) + + except socket.error as e: + print(f"Error occurred while connecting to {ssh_host}: {e}") + return None + except paramiko.AuthenticationException as e: + print(f"Authentication error occurred while connecting to {ssh_host}: {e}") + return None + except paramiko.SSHException as e: + print(f"SSH error occurred while connecting to {ssh_host}: {e}") + return None + except Exception as e: + print(f"Error occurred while connecting to {ssh_host}: {e}") + return None + + finally: + # Close SSH connection + ssh_client.close() + + return parsed_data_list + +def process_entry_wrapper(args): + entry, private_key = args + return process_entry(entry, private_key) + +def main(): + parser = argparse.ArgumentParser(description="Process file or hostname/IP address and optionally generate a CSV file of results.") + parser.add_argument('input', metavar='input', type=str, help='Input file or hostname/IP address') + parser.add_argument('--output-dir', metavar='output_dir', type=str, default='.', help='Output directory to save files (default: current directory)') + parser.add_argument('--username', metavar='username', type=str, help='Username to pass to ssh connection, if not set will use login username') + parser.add_argument('--csv', metavar='csv', type=str, help='Generate a CSV file of results') + args = parser.parse_args() + + if not args.username: + args.username=os.getlogin() + + if os.path.isfile(args.input): + print(f"Processing file: {args.input}") + with open(args.input, 'r') as file: + entries = [line.strip() for line in file.readlines()] + + # Create a pool of worker processes + pool = multiprocessing.Pool() + + # Execute the process_entry function on each entry in parallel + results = pool.map(process_entry_wrapper, [(entry, args.username) for entry in entries]) + flattened_results = [item for sublist in results for item in sublist] + + # Close the pool to release resources + pool.close() + pool.join() + # Parse JSON data and generate CSV file + if args.csv: + json_to_csv(flattened_results, args.csv) + else: + json_to_stdout(flattened_results) + + else: + print(f"Processing hostname/IP: {args.input}") + result = process_entry(args.input, args.username) + + # Parse JSON data and generate CSV file + if args.csv: + json_to_csv(result, args.csv) + else: + json_to_stdout(result) + + +if __name__ == "__main__": + main() diff --git a/scripts/collect_metadata/requirements.txt b/scripts/collect_metadata/requirements.txt new file mode 100644 index 00000000..8608c1b0 --- /dev/null +++ b/scripts/collect_metadata/requirements.txt @@ -0,0 +1 @@ +paramiko diff --git a/scripts/h100_script.py b/scripts/h100_script.py new file mode 100644 index 00000000..afa0258d --- /dev/null +++ b/scripts/h100_script.py @@ -0,0 +1,165 @@ +import os +from datetime import datetime +import argparse +import subprocess +import sys + + +def getDateTime(): + # datetime object containing current date and time + now = datetime.now() + dt_string = now.strftime("%m%d%Y%H%M%S") + return dt_string + + +# create directory to hold results +def createDir(): + # directory name + directory = str("/tmp/" + getDateTime()) + try: + os.mkdir(directory) + except OSError as error: + print(error) + sys.exit(-1) + return directory + + +# change ownership of all files to user so that the files can be copied +def changeOwner(path): + username = os.getlogin() + cmd = f'sudo chown -R {username}:{username} {path}' + run_cmd(cmd) + + +def getSshableNodes(hosts, path): + hosts_file = open(hosts, "r") + ssh_list = path + "/" + "sshable" + not_ssh_list = path + "/" + "notsshable" + sshable = open(ssh_list, "a") + notsshable = open(not_ssh_list, "a") + for line in hosts_file: + host = line.split() + host_value = host[0] + cmd = f'ssh -o ConnectTimeout=10 {host_value} "cat /etc/os-release | grep PRETTY_NAME"' + isSshable = run_cmd(cmd) + if not isSshable: + notsshable.write(host_value) + notsshable.write("\n") + elif 'PRETTY_NAME' in isSshable[0]: + sshable.write(host_value) + sshable.write("\n") + else: + notsshable.write(host_value) + notsshable.write("\n") + sshable.close() + notsshable.close() + hosts_file.close() + return ssh_list + + +def run_cmd(cmd=None): + """ Run command on shell""" + try: + results = subprocess.run(cmd, shell=True, executable='/bin/bash', stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, encoding='utf8') + output = results.stdout.splitlines() + except subprocess.CalledProcessError as e: + print (f'Command {e.cmd} failed with error {e.returncode}') + return e.returncode + return output + + +# get interfaces that are Down +def ibdev(hosts, path): + log_file = path + "/" + "ibdev2netdev" + cmd = f'for i in $(cat {hosts}); do ssh $i "hostname; hostname -i; sudo dmidecode -s system-serial-number; ibdev2netdev | grep Down"; done > {log_file}' + run_cmd(cmd) + + +# get EAP-FAILURE +def eapFailure(hosts, path): + log_file = path + "/" + "eapfailure" + cmd = f'for i in $(cat {hosts}); do ssh $i "hostname; hostname -i; sudo dmidecode -s system-serial-number; cat /var/log/syslog | grep "EAP-FAILURE""; done > {log_file}' + run_cmd(cmd) + + +# get rdma links authentication +def rdmaAuth(hosts, path): + log_file = path + "/" + "rdmaauth" + hosts_file = open(hosts, "r") + log_file = path + "/" + "rdmaauth" + rdma_file = open(log_file, "a") + for line in hosts_file: + host = line.split() + host_value = host[0] + cmd = f'ssh {host_value} "hostname; hostname -i; sudo dmidecode -s system-serial-number"' + output = run_cmd(cmd) + for o in output: + rdma_file.write(o) + rdma_file.write("\n") + cmd = f'ssh {host_value} \'for x in $(seq 0 15) ; do sudo wpa_cli -i rdma$x status | grep EAP ; done\'' + output = run_cmd(cmd) + for o in output: + rdma_file.write(o) + rdma_file.write("\n") + rdma_file.close() + hosts_file.close() + + +# get logs for Link Flapping +def linksDown(hosts, path): + log_file = path + "/" + "linkflapping" + cmd = f'for i in $(cat {hosts}); do ssh $i "hostname; hostname -i; sudo dmidecode -s system-serial-number; cat /var/log/syslog | grep "Link " | tail -36"; done > {log_file}' + run_cmd(cmd) + + +# Check any GPU fallen off the bus +def lspci(hosts, path): + log_file = path + "/" + "lspci" + cmd = f'for i in $(cat {hosts}); do ssh $i "hostname; hostname -i; sudo dmidecode -s system-serial-number; lspci | grep "rev ff""; done > {log_file}' + run_cmd(cmd) + + +# Check for NVRM errors +def nvrm(hosts, path): + log_file = path + "/" + "nvrm" + cmd = f'for i in $(cat {hosts}); do ssh $i "hostname; hostname -i; sudo dmidecode -s system-serial-number; sudo dmesg | grep NVRM"; done > {log_file}' + run_cmd(cmd) + + +# Check for Pending remaps +def pending(hosts, path): + log_file = path + "/" + "pending_remaps" + cmd = f'for i in $(cat {hosts}); do ssh $i "hostname; hostname -i; sudo dmidecode -s system-serial-number; nvidia-smi -q | grep "Pending : Yes""; done > {log_file}' + run_cmd(cmd) + + +# Check for Remapping failures +def remapping(hosts, path): + log_file = path + "/" + "remapping_failures" + cmd = f'for i in $(cat {hosts}); do ssh $i "hostname; hostname -i; sudo dmidecode -s system-serial-number; nvidia-smi -q | grep "Remapping Failure Occurred : Yes""; done > {log_file}' + run_cmd(cmd) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description = 'Capture H100 troubleshooting data.') + parser.add_argument('--hosts', help = "Provide a filepath that contains list of either IPs / hostnames one per line on which you want to run this script.", required = True) + args = parser.parse_args() + hosts = args.hosts + if hosts is None: + print("Hostfile is required. Please provide one and run again.") + sys.exit(-1) + else: + path = createDir() + changeOwner(path) + ssh_hosts = getSshableNodes(hosts, path) + ibdev(ssh_hosts, path) + eapFailure(ssh_hosts, path) + rdmaAuth(ssh_hosts, path) + linksDown(ssh_hosts, path) + lspci(ssh_hosts, path) + nvrm(ssh_hosts, path) + pending(ssh_hosts, path) + remapping(ssh_hosts, path) + print("The results are at location: " + path) + diff --git a/scripts/ib_write_bw.sh b/scripts/ib_write_bw.sh index 58138096..951afe84 100644 --- a/scripts/ib_write_bw.sh +++ b/scripts/ib_write_bw.sh @@ -248,7 +248,7 @@ scp /tmp/ib_client.sh $client:/tmp ssh $server "/tmp/ib_server.sh" & ssh $client "/tmp/ib_client.sh" -#Sync results to bastion +#Sync results to controller mkdir -p $logdir rsync -a opc@$client:$outdir $logdir diff --git a/scripts/max_nodes_partition.py b/scripts/max_nodes_partition.py index d903acc7..0f6a67e5 100644 --- a/scripts/max_nodes_partition.py +++ b/scripts/max_nodes_partition.py @@ -75,7 +75,7 @@ def getClusterNames(): del x[-1] cluster_list = [] for cluster in x: - if (cluster == "BASTION"): + if (cluster == "CONTROLLER"): continue else: cluster_list.append(cluster) diff --git a/scripts/validation.py b/scripts/validation.py index d3c56174..f287c9a1 100644 --- a/scripts/validation.py +++ b/scripts/validation.py @@ -180,7 +180,7 @@ def getResizeNodes(args, metadata, cluster_names, mode): cluster_node_set = set() for i in range(len(x)): if str in x[i]: - permanent_cluster = metadata['displayName'].replace('-bastion','') + permanent_cluster = metadata['displayName'].replace('-controller','') if permanent_cluster in cluster_names: return cluster_names, resize_cluster_node_dict else: @@ -334,9 +334,9 @@ def etcHostsSame(nodes, path): stdout,stderr = out.communicate() x = stdout.split("\n") del x[-1] - bastion_md5 = x[0].replace('"','') + controller_md5 = x[0].replace('"','') md5_set = set() - md5_set.add(bastion_md5) + md5_set.add(controller_md5) out = subprocess.Popen(["pdsh -w "+nodes+" 'linecount=`cat /etc/hosts | wc -l ` ; lines=$((linecount-3)) ; tail -n $lines /etc/hosts | md5sum'"],stdout=subprocess.PIPE, stderr=subprocess.STDOUT,shell=True,universal_newlines=True) stdout,stderr = out.communicate() x = stdout.split("\n") @@ -354,7 +354,7 @@ def etcHostsSame(nodes, path): continue else: md5 = split_str[1].lstrip() - if md5 != bastion_md5: + if md5 != controller_md5: if path is None: path = createDir() changeOwner(path) @@ -363,9 +363,9 @@ def etcHostsSame(nodes, path): f.close() md5_set.add(md5) if len(md5_set) > 1: - print("/etc/hosts on bastion and nodes is different") + print("/etc/hosts on controller and nodes is different") else: - print("/etc/hosts is same on bastion and all nodes that are ssh-able") + print("/etc/hosts is same on controller and all nodes that are ssh-able") return path @@ -390,7 +390,7 @@ def ociCommand(metadata, cluster_names): def inventoryNodes(metadata, cluster_names): inventory_node_cluster_dict = {} - permanent_cluster = metadata['displayName'].replace('-bastion','') + permanent_cluster = metadata['displayName'].replace('-controller','') for cluster in cluster_names: if cluster == permanent_cluster: inventory = "/etc/ansible/hosts" @@ -532,7 +532,7 @@ def runChecks(args, type, name, hostFileWritten, resize_node_cluster_dict, metad number of nodes and/or pcie check and/or gpu throttle check.") parser.add_argument('-p', '--pcie', help = "Runs PCIe bandwidth check") parser.add_argument('-g', '--gpu_throttle', help = "Performs GPU throttle check") -parser.add_argument('-e', '--etc_hosts', help = "Performs md5 sum check on all hosts and checks if it matches with the bastion") +parser.add_argument('-e', '--etc_hosts', help = "Performs md5 sum check on all hosts and checks if it matches with the controller") args = parser.parse_args() diff --git a/slurm_ha.tf b/slurm_ha.tf index bc3d04cd..36dc60db 100644 --- a/slurm_ha.tf +++ b/slurm_ha.tf @@ -1,7 +1,7 @@ resource "oci_core_volume_attachment" "backup_volume_attachment" { - count = var.bastion_block && var.slurm_ha ? 1 : 0 + count = var.controller_block && var.slurm_ha ? 1 : 0 attachment_type = "iscsi" - volume_id = oci_core_volume.bastion_volume[0].id + volume_id = oci_core_volume.controller_volume[0].id instance_id = oci_core_instance.backup[0].id display_name = "${local.cluster_name}-backup-volume-attachment" device = "/dev/oracleoci/oraclevdb" @@ -11,15 +11,15 @@ resource "oci_core_volume_attachment" "backup_volume_attachment" { resource "oci_core_instance" "backup" { count = var.slurm_ha ? 1 : 0 depends_on = [oci_core_subnet.public-subnet] - availability_domain = var.bastion_ad + availability_domain = var.controller_ad compartment_id = var.targetCompartment - shape = var.bastion_shape + shape = var.controller_shape dynamic "shape_config" { - for_each = local.is_bastion_flex_shape + for_each = local.is_controller_flex_shape content { ocpus = shape_config.value - memory_in_gbs = var.bastion_custom_memory ? var.bastion_memory : 16 * shape_config.value + memory_in_gbs = var.controller_custom_memory ? var.controller_memory : 16 * shape_config.value } } agent_config { @@ -34,18 +34,18 @@ resource "oci_core_instance" "backup" { metadata = { ssh_authorized_keys = "${var.ssh_key}\n${tls_private_key.ssh.public_key_openssh}" - user_data = base64encode(data.template_file.bastion_config.rendered) + user_data = base64encode(data.template_file.controller_config.rendered) } source_details { -// source_id = var.use_standard_image ? data.oci_core_images.linux.images.0.id : local.custom_bastion_image_ocid - source_id = local.bastion_image - boot_volume_size_in_gbs = var.bastion_boot_volume_size +// source_id = var.use_standard_image ? data.oci_core_images.linux.images.0.id : local.custom_controller_image_ocid + source_id = local.controller_image + boot_volume_size_in_gbs = var.controller_boot_volume_size source_type = "image" } create_vnic_details { - subnet_id = local.bastion_subnet_id - assign_public_ip = local.bastion_bool_ip + subnet_id = local.controller_subnet_id + assign_public_ip = local.controller_bool_ip } } @@ -60,14 +60,14 @@ resource "null_resource" "backup" { inline = [ "#!/bin/bash", "sudo mkdir -p /opt/oci-hpc", - "sudo chown ${var.bastion_username}:${var.bastion_username} /opt/oci-hpc/", + "sudo chown ${var.controller_username}:${var.controller_username} /opt/oci-hpc/", "mkdir -p /opt/oci-hpc/bin", "mkdir -p /opt/oci-hpc/playbooks" ] connection { host = local.host_backup type = "ssh" - user = var.bastion_username + user = var.controller_username private_key = tls_private_key.ssh.private_key_pem } } @@ -77,7 +77,7 @@ resource "null_resource" "backup" { connection { host = local.host_backup type = "ssh" - user = var.bastion_username + user = var.controller_username private_key = tls_private_key.ssh.private_key_pem } } @@ -88,7 +88,7 @@ resource "null_resource" "backup" { connection { host = local.host_backup type = "ssh" - user = var.bastion_username + user = var.controller_username private_key = tls_private_key.ssh.private_key_pem } } @@ -99,7 +99,7 @@ resource "null_resource" "backup" { connection { host = local.host_backup type = "ssh" - user = var.bastion_username + user = var.controller_username private_key = tls_private_key.ssh.private_key_pem } } @@ -110,7 +110,7 @@ resource "null_resource" "backup" { connection { host = local.host_backup type = "ssh" - user = var.bastion_username + user = var.controller_username private_key = tls_private_key.ssh.private_key_pem } } @@ -120,7 +120,7 @@ resource "null_resource" "backup" { connection { host = local.host_backup type = "ssh" - user = var.bastion_username + user = var.controller_username private_key = tls_private_key.ssh.private_key_pem } } @@ -130,7 +130,7 @@ resource "null_resource" "backup" { connection { host = local.host_backup type = "ssh" - user = var.bastion_username + user = var.controller_username private_key = tls_private_key.ssh.private_key_pem } } @@ -142,18 +142,18 @@ resource "null_resource" "backup" { connection { host = local.host_backup type = "ssh" - user = var.bastion_username + user = var.controller_username private_key = tls_private_key.ssh.private_key_pem } } provisioner "file" { content = tls_private_key.ssh.private_key_pem - destination = "/home/${var.bastion_username}/.ssh/cluster.key" + destination = "/home/${var.controller_username}/.ssh/cluster.key" connection { host = local.host_backup type = "ssh" - user = var.bastion_username + user = var.controller_username private_key = tls_private_key.ssh.private_key_pem } } @@ -162,15 +162,15 @@ resource "null_resource" "backup" { provisioner "remote-exec" { inline = [ "#!/bin/bash", - "chmod 600 /home/${var.bastion_username}/.ssh/cluster.key", - "cp /home/${var.bastion_username}/.ssh/cluster.key /home/${var.bastion_username}/.ssh/id_rsa", + "chmod 600 /home/${var.controller_username}/.ssh/cluster.key", + "cp /home/${var.controller_username}/.ssh/cluster.key /home/${var.controller_username}/.ssh/id_rsa", "chmod a+x /opt/oci-hpc/bin/*.sh", - "timeout --foreground 60m /opt/oci-hpc/bin/bastion.sh" + "timeout --foreground 60m /opt/oci-hpc/bin/controller.sh" ] connection { host = local.host_backup type = "ssh" - user = var.bastion_username + user = var.controller_username private_key = tls_private_key.ssh.private_key_pem } } @@ -184,8 +184,8 @@ resource "null_resource" "cluster_backup" { provisioner "file" { content = templatefile("${path.module}/inventory.tpl", { - bastion_name = oci_core_instance.bastion.display_name, - bastion_ip = oci_core_instance.bastion.private_ip, + controller_name = oci_core_instance.controller.display_name, + controller_ip = oci_core_instance.controller.private_ip, backup_name = var.slurm_ha ? oci_core_instance.backup[0].display_name : "", backup_ip = var.slurm_ha ? oci_core_instance.backup[0].private_ip: "", login_name = var.login_node ? oci_core_instance.login[0].display_name : "", @@ -195,6 +195,8 @@ resource "null_resource" "cluster_backup" { private_subnet = data.oci_core_subnet.private_subnet.cidr_block, rdma_network = cidrhost(var.rdma_subnet, 0), rdma_netmask = cidrnetmask(var.rdma_subnet), + zone_name = local.zone_name, + dns_entries = var.dns_entries, nfs = var.node_count > 0 ? local.cluster_instances_names[0] : "", home_nfs = var.home_nfs, create_fss = var.create_fss, @@ -218,10 +220,10 @@ resource "null_resource" "cluster_backup" { rack_aware = var.rack_aware, spack = var.spack, ldap = var.ldap, - bastion_block = var.bastion_block, + controller_block = var.controller_block, login_block = var.login_block, scratch_nfs_type = local.scratch_nfs_type, - bastion_mount_ip = local.bastion_mount_ip, + controller_mount_ip = local.controller_mount_ip, login_mount_ip = local.login_mount_ip, cluster_mount_ip = local.mount_ip, autoscaling = var.autoscaling, @@ -231,7 +233,7 @@ resource "null_resource" "cluster_backup" { queue=var.queue, monitoring = var.monitoring, hyperthreading = var.hyperthreading, - bastion_username = var.bastion_username, + controller_username = var.controller_username, compute_username = var.compute_username, autoscaling_monitoring = var.autoscaling_monitoring, autoscaling_mysql_service = var.autoscaling_mysql_service, @@ -256,7 +258,7 @@ resource "null_resource" "cluster_backup" { connection { host = local.host_backup type = "ssh" - user = var.bastion_username + user = var.controller_username private_key = tls_private_key.ssh.private_key_pem } } @@ -268,7 +270,7 @@ resource "null_resource" "cluster_backup" { connection { host = local.host_backup type = "ssh" - user = var.bastion_username + user = var.controller_username private_key = tls_private_key.ssh.private_key_pem } } @@ -285,7 +287,7 @@ resource "null_resource" "cluster_backup" { connection { host = local.host_backup type = "ssh" - user = var.bastion_username + user = var.controller_username private_key = tls_private_key.ssh.private_key_pem } } @@ -316,22 +318,22 @@ resource "null_resource" "cluster_backup" { connection { host = local.host_backup type = "ssh" - user = var.bastion_username + user = var.controller_username private_key = tls_private_key.ssh.private_key_pem } } provisioner "file" { content = templatefile("${path.module}/conf/variables.tpl", { - bastion_name = oci_core_instance.bastion.display_name, - bastion_ip = oci_core_instance.bastion.private_ip, + controller_name = oci_core_instance.controller.display_name, + controller_ip = oci_core_instance.controller.private_ip, backup_name = var.slurm_ha ? oci_core_instance.backup[0].display_name : "", backup_ip = var.slurm_ha ? oci_core_instance.backup[0].private_ip: "", login_name = var.login_node ? oci_core_instance.login[0].display_name : "", login_ip = var.login_node ? oci_core_instance.login[0].private_ip: "", compute = var.node_count > 0 ? zipmap(local.cluster_instances_names, local.cluster_instances_ips) : zipmap([],[]) public_subnet = data.oci_core_subnet.public_subnet.cidr_block, - public_subnet_id = local.bastion_subnet_id, + public_subnet_id = local.controller_subnet_id, private_subnet = data.oci_core_subnet.private_subnet.cidr_block, private_subnet_id = local.subnet_id, rdma_subnet = var.rdma_subnet, @@ -344,18 +346,21 @@ resource "null_resource" "cluster_backup" { rack_aware = var.rack_aware, spack = var.spack, ldap = var.ldap, - bastion_block = var.bastion_block, + controller_block = var.controller_block, login_block = var.login_block, scratch_nfs_type = local.scratch_nfs_type, - bastion_mount_ip = local.bastion_mount_ip, + controller_mount_ip = local.controller_mount_ip, login_mount_ip = local.login_mount_ip, cluster_mount_ip = local.mount_ip, scratch_nfs_type_cluster = var.scratch_nfs_type_cluster, scratch_nfs_type_pool = var.scratch_nfs_type_pool, - bastion_block_volume_performance = var.bastion_block_volume_performance, + controller_block_volume_performance = var.controller_block_volume_performance, region = var.region, tenancy_ocid = var.tenancy_ocid, vcn_subnet = var.vcn_subnet, + vcn_id = local.vcn_id, + zone_name = local.zone_name, + dns_entries = var.dns_entries, cluster_block_volume_size = var.cluster_block_volume_size, cluster_block_volume_performance = var.cluster_block_volume_performance, ssh_cidr = var.ssh_cidr, @@ -384,17 +389,24 @@ resource "null_resource" "cluster_backup" { privilege_group_name = var.privilege_group_name, latency_check = var.latency_check, private_deployment = var.private_deployment, - bastion_username = var.bastion_username, + controller_username = var.controller_username, compute_username = var.compute_username, use_multiple_ads = var.use_multiple_ads, - use_compute_agent = var.use_compute_agent + use_compute_agent = var.use_compute_agent, + BIOS = var.BIOS, + IOMMU = var.IOMMU, + SMT = var.SMT, + virt_instr = var.virt_instr, + access_ctrl = var.access_ctrl, + numa_nodes_per_socket = var.numa_nodes_per_socket, + percentage_of_cores_enabled = var.percentage_of_cores_enabled }) destination = "/opt/oci-hpc/conf/variables.tf" connection { host = local.host_backup type = "ssh" - user = var.bastion_username + user = var.controller_username private_key = tls_private_key.ssh.private_key_pem } } @@ -406,7 +418,7 @@ resource "null_resource" "cluster_backup" { connection { host = local.host_backup type = "ssh" - user = var.bastion_username + user = var.controller_username private_key = tls_private_key.ssh.private_key_pem } } @@ -422,8 +434,24 @@ resource "null_resource" "cluster_backup" { connection { host = local.host_backup type = "ssh" - user = var.bastion_username + user = var.controller_username private_key = tls_private_key.ssh.private_key_pem } } } + + +resource "oci_dns_rrset" "rrset-backup" { + count = var.slurm_ha && var.dns_entries ? 1 : 0 + zone_name_or_id = data.oci_dns_zones.dns_zones.zones[0].id + domain = "${var.slurm_ha ? oci_core_instance.backup[0].display_name : ""}.${local.zone_name}" + rtype = "A" + items { + domain = "${var.slurm_ha ? oci_core_instance.backup[0].display_name : ""}.${local.zone_name}" + rtype = "A" + rdata = var.slurm_ha ? oci_core_instance.backup[0].private_ip: "" + ttl = 3600 + } + scope = "PRIVATE" + view_id = data.oci_dns_views.dns_views.views[0].id +} \ No newline at end of file diff --git a/user_data.tf b/user_data.tf index 37298614..7249c8a8 100755 --- a/user_data.tf +++ b/user_data.tf @@ -1,5 +1,5 @@ -data "template_file" "bastion_config" { - template = file("config.bastion") +data "template_file" "controller_config" { + template = file("config.controller") vars = { key = tls_private_key.ssh.private_key_pem } diff --git a/variables.tf b/variables.tf index 20c4d9ca..0cc7e5df 100755 --- a/variables.tf +++ b/variables.tf @@ -13,10 +13,10 @@ variable "compute_cluster_id" { default = "" } variable "compute_cluster_start_index" { default = 0 } variable "use_custom_name" { default = false } variable "cluster_name" { default = "" } -variable "bastion_ad" {} -variable "bastion_shape" { default = "VM.Standard2.4" } -variable "bastion_object_storage_par" { default = true } -variable "custom_bastion_image" { +variable "controller_ad" {} +variable "controller_shape" { default = "VM.Standard2.4" } +variable "controller_object_storage_par" { default = true } +variable "custom_controller_image" { type = string default = "image.ocid" } @@ -24,12 +24,12 @@ variable "custom_login_image" { type = string default = "image.ocid" } -variable "bastion_boot_volume_size" {} -variable "bastion_boot_volume_backup" {} -variable "bastion_boot_volume_backup_type" {default = "INCREMENTAL"} -variable "bastion_boot_volume_backup_period" {default = "ONE_DAY"} -variable "bastion_boot_volume_backup_retention_seconds" {default = "7776000"} -variable "bastion_boot_volume_backup_time_zone" {default = "REGIONAL_DATA_CENTER_TIME"} +variable "controller_boot_volume_size" {} +variable "controller_boot_volume_backup" {} +variable "controller_boot_volume_backup_type" {default = "INCREMENTAL"} +variable "controller_boot_volume_backup_period" {default = "ONE_DAY"} +variable "controller_boot_volume_backup_retention_seconds" {default = "7776000"} +variable "controller_boot_volume_backup_time_zone" {default = "REGIONAL_DATA_CENTER_TIME"} variable "cluster_network_shape" { default = "BM.HPC2.36" } variable "instance_pool_shape" { default = "VM.Standard2.4" } variable "node_count" { default = 2 } @@ -37,8 +37,8 @@ variable "boot_volume_size" { default = 50 } variable "use_marketplace_image" { default = true} variable "image" { default = "ocid1.image.oc1..aaaaaaaa5yxem7wzie34hi5km4qm2t754tsfxrjuefyjivebrxjad4jcj5oa" } variable "image_ocid" { default = "ocid1.image.oc1..aaaaaaaa5yxem7wzie34hi5km4qm2t754tsfxrjuefyjivebrxjad4jcj5oa" } -variable "use_compute_agent" { default = false } -variable "unsupported_bastion_image" { default = "" } +variable "use_compute_agent" { default = true } +variable "unsupported_controller_image" { default = "" } variable "unsupported_login_image" { default = "" } variable "use_cluster_nfs" { default = true} variable "use_scratch_nfs" { default = true } @@ -65,16 +65,16 @@ variable "slurm_nfs" { default = false } variable "rack_aware" { default = false } variable "ldap" { default = true } variable "spack" { default = false } -variable "bastion_ocpus" { default = 2} -variable "bastion_ocpus_denseIO_flex" { default = 8} +variable "controller_ocpus" { default = 2} +variable "controller_ocpus_denseIO_flex" { default = 8} variable "instance_pool_ocpus" { default = 2} variable "instance_pool_ocpus_denseIO_flex" { default = 8} variable "instance_pool_memory" { default = 16 } variable "instance_pool_custom_memory" { default = false } variable "login_ocpus" { default = 2} variable "login_ocpus_denseIO_flex" { default = 8} -variable "bastion_memory" { default = 16 } -variable "bastion_custom_memory" { default = false } +variable "controller_memory" { default = 16 } +variable "controller_custom_memory" { default = false } variable "login_memory" { default = 16 } variable "login_custom_memory" { default = false } variable "privilege_sudo" { default = true } @@ -91,10 +91,12 @@ variable "marketplace_version_id" { "2" = "OL7.8-OFED5.0-1.0.0.0-UEK-20200826" "3" = "OL7.7-OFED-4.4-2.0.7.0-UEK-20200229" "4" = "OL7.9-OFED5.0-2.1.8.0-RHCK-20210709" - "HPC_OL7" = "OracleLinux-7-OCA-RHCK-OFED-5.8-3.0.7.0-2024.01.02-0" - "HPC_OL8" = "OracleLinux-8-OCA-RHCK-OFED-5.8-3.0.7.0-2024.01.02-1" - "GPU_OL7" = "OracleLinux-7-OCA-RHCK-OFED-5.8-3.0.7.0-GPU-535-2024.01.02-0" - "GPU_OL8" = "OracleLinux-8-OCA-RHCK-OFED-5.8-3.0.7.0-GPU-535-2024.01.02-1" + "HPC_OL7" = "OracleLinux-7-OCA-RHCK-OFED-23.10-2.1.3.1-2024.03.15-0" + "HPC_OL8" = "OracleLinux-8-OCA-RHCK-OFED-23.10-2.1.3.1-2024.03.15-0" + "GPU_OL7_CUDA12.2" = "OracleLinux-7-OCA-RHCK-OFED-23.10-2.1.3.1-GPU-535-CUDA-12.2-2024.03.15-0" + "GPU_OL8_CUDA12.2" = "OracleLinux-8-OCA-RHCK-OFED-23.10-2.1.3.1-GPU-535-CUDA-12.2-2024.03.15-0" + "GPU_OL7_CUDA12.4" = "OracleLinux-7-OCA-RHCK-OFED-23.10-2.1.3.1-GPU-535-CUDA-12.4-2024.03.15-0" + "GPU_OL8_CUDA12.4" = "OracleLinux-8-OCA-RHCK-OFED-23.10-2.1.3.1-GPU-535-CUDA-12.4-2024.03.15-0" } } @@ -107,7 +109,7 @@ variable "marketplace_listing_id_HPC" { variable "marketplace_listing_id_GPU" { default = "ocid1.appcataloglisting.oc1..aaaaaaaab2hkpxsglxfbzitiiqv6djxzj5q5soxotwdem2dd2kbifgk4p55q" } -variable "bastion_block_volume_performance" { +variable "controller_block_volume_performance" { /* Allowed values "0. Lower performance" @@ -119,11 +121,11 @@ default = "10. Balanced performance" } -variable "bastion_block" { +variable "controller_block" { default = false } -variable "bastion_block_volume_size" { +variable "controller_block_volume_size" { default = 1000 } @@ -183,11 +185,11 @@ variable "unsupported" { } variable "queue" {default = "compute"} -variable "unsupported_bastion" { +variable "unsupported_controller" { type=bool default = false } -variable "use_marketplace_image_bastion" { +variable "use_marketplace_image_controller" { type=bool default = true } @@ -195,7 +197,7 @@ variable "unsupported_login" { type=bool default = false } -variable "bastion_username" { +variable "controller_username" { type = string default = "opc" } @@ -250,7 +252,34 @@ variable "use_marketplace_image_login" { default = true} variable "marketplace_listing_login" { default = "HPC_OL7" } -variable "marketplace_listing_bastion" { +variable "marketplace_listing_controller" { default = "HPC_OL7" } - \ No newline at end of file +variable "zone_name" { + default = "" +} +variable "dns_entries" { + default = true +} + +variable "BIOS" { + default = false +} +variable "IOMMU" { + default = false +} +variable "SMT" { + default = true +} +variable "virt_instr" { + default = false +} +variable "access_ctrl" { + default = false +} +variable "numa_nodes_per_socket" { + default = "Default" +} +variable "percentage_of_cores_enabled" { + default = "Default" +} \ No newline at end of file diff --git a/versions.tf b/versions.tf index 6dd2b529..57e63004 100755 --- a/versions.tf +++ b/versions.tf @@ -3,7 +3,7 @@ terraform { required_providers { oci = { source = "oracle/oci" - version = "5.1.0" + version = "5.30.0" } } } \ No newline at end of file