Merge pull request #79 from ibm-client-engineering/adam-updates

Adam updates
ibm-client-engineering · Apr 11, 2024 · 4b90cce · 4b90cce
2 parents b3835b4 + 66fb3cd
commit 4b90cce
Show file tree

Hide file tree

Showing 5 changed files with 158 additions and 5 deletions.
diff --git a/docs/2-Deployment/2-UPI_Install.mdx b/docs/2-Deployment/2-UPI_Install.mdx
@@ -604,4 +604,97 @@ Add the following under 'spec:'
     namespace: openshift-authentication
     servingCertKeyPairSecret:
       name: custom-cert
-```
+```
+
+## Increase Primary Disk size on worker nodes:
+
+1) Run the following bash one-liner to increase the primary disk on all worker nodes to 500GB:
+
+```
+aws ec2 describe-instances --query 'Reservations[*].Instances[*].[InstanceId,Tags[?Key==`Name`].Value|[0],BlockDeviceMappings[0].Ebs.VolumeId]' --output text | grep worker | awk '{print $3}' | while read volume_id; do aws ec2 modify-volume --volume-id $volume_id --size 500; done
+```
+
+2) Log into the node with following command:
+
+```
+oc debug node/<node_name>
+```
+
+3) Once in the node, run the following:
+
+```
+chroot /host
+```
+
+then:
+
+```
+sudo lsblk
+```
+
+The output should look like this:
+
+```
+# sudo lsblk
+NAME        MAJ:MIN RM   SIZE RO TYPE MOUNTPOINTS
+nvme1n1     259:0    0     1T  0 disk
+nvme0n1     259:1    0   500G  0 disk
+|-nvme0n1p1 259:2    0     1M  0 part
+|-nvme0n1p2 259:3    0   127M  0 part
+|-nvme0n1p3 259:4    0   384M  0 part /boot
+`-nvme0n1p4 259:5    0 239.5G  0 part /var/lib/kubelet/pods/555d6f90-41fd-49d2-8aad-fa7293a924e4/volume-subpaths/app-config-override/wd-discovery-cnm-api/2
+                                      /var/lib/kubelet/pods/57d28f73-d355-4092-8e52-6b6aeec28bd5/volume-subpaths/clouseau-config/search/4
+                                      /var/lib/kubelet/pods/fcb4b5ec-ea1a-42c7-908c-a027cf885ca1/volume-subpaths/db2wh-cm/zen-database-core/1
+                                      /var/lib/kubelet/pods/fcb4b5ec-ea1a-42c7-908c-a027cf885ca1/volume-subpaths/db2oltp-cm/zen-database-core/0
+                                      /var/lib/kubelet/pods/5e35fe3f-7e4d-4729-b3dd-b9553ffd73f6/volume-subpaths/nginx-conf/monitoring-plugin/1
+                                      /var
+                                      /sysroot/ostree/deploy/rhcos/var
+                                      /sysroot
+                                      /usr
+                                      /etc
+                                      /
+
+```
+
+4) Find the part on the disk that you wish to increase, in my case it was 'nvme0n1p4'.
+
+Now we extend the partition, by targeting the disk (Example: /dev/nvme0n1) and the partition (Example: 4)
+
+```
+sudo growpart /dev/nvme0n1 4
+```
+
+5) Check the disk sizes again:
+
+```
+sudo lsblk
+```
+
+This is what my output looks like now:
+
+```
+NAME        MAJ:MIN RM   SIZE RO TYPE MOUNTPOINTS
+nvme1n1     259:0    0     1T  0 disk
+nvme0n1     259:1    0   500G  0 disk
+|-nvme0n1p1 259:2    0     1M  0 part
+|-nvme0n1p2 259:3    0   127M  0 part
+|-nvme0n1p3 259:4    0   384M  0 part /boot
+`-nvme0n1p4 259:5    0 499.5G  0 part /var/lib/kubelet/pods/555d6f90-41fd-49d2-8aad-fa7293a924e4/volume-subpaths/app-config-override/wd-discovery-cnm-api/2
+                                      /var/lib/kubelet/pods/57d28f73-d355-4092-8e52-6b6aeec28bd5/volume-subpaths/clouseau-config/search/4
+                                      /var/lib/kubelet/pods/fcb4b5ec-ea1a-42c7-908c-a027cf885ca1/volume-subpaths/db2wh-cm/zen-database-core/1
+                                      /var/lib/kubelet/pods/fcb4b5ec-ea1a-42c7-908c-a027cf885ca1/volume-subpaths/db2oltp-cm/zen-database-core/0
+                                      /var/lib/kubelet/pods/5e35fe3f-7e4d-4729-b3dd-b9553ffd73f6/volume-subpaths/nginx-conf/monitoring-plugin/1
+                                      /var
+                                      /sysroot/ostree/deploy/rhcos/var
+                                      /sysroot
+                                      /usr
+                                      /etc
+                                      /
+```
+
+6) Last step is to extend the file system:
+
+```
+sudo xfs_growfs -d /
+```
+
diff --git a/docs/3-Installation/1-CP4D.mdx b/docs/3-Installation/1-CP4D.mdx
@@ -552,6 +552,56 @@ cpd-cli service-instance list \
 ```
 
 
+
+### Generate a cpd-cli Profile
+
+Log into the CP4D webui with the info retrieved from the `get-cpd-instance-details` and go to your Profile and settings page in the Cloud Pak for Data client and clicking Generate API key.
+
+In the upper right hand corner, click `API key` -> `Generate new key`
+
+Copy the generated key.
+
+Collect the web client URL and export it with the following command
+
+```tsx
+export CPD_PROFILE_URL=$(oc get route cpd --namespace=${PROJECT_CPD_INST_OPERANDS} | tail -1 | awk '{print "https://"$2}')
+```
+
+We'll set our `profile-name` to the cluster name.
+
+Set the following vars:
+```tsx
+export API_KEY=<key you copied above>
+export CPD_ADMIN_USER=cpadmin
+export LOCAL_USER=<local user name>
+export CPD_PROFILE_NAME=wxai
+```
+
+Create a local user configuration to store your username and API key by using the config users set command.
+
+```tsx
+cpd-cli config users set ${LOCAL_USER} \
+--username ${CPD_ADMIN_USER} \
+--apikey ${API_KEY}
+```
+
+Create a profile to store the Cloud Pak for Data URL and to associate the profile with your local user configuration by using the config profiles set command.
+
+```tsx
+cpd-cli config profiles set ${CPD_PROFILE_NAME} \
+--user ${LOCAL_USER} \
+--url ${CPD_PROFILE_URL}
+```
+
+You can now run cpd-cli commands with this profile as shown in the following example.
+
+```tsx
+cpd-cli service-instance list \
+--profile=${CPD_PROFILE_NAME}
+```
+
+
+
 ## Installing our Cartridges
 
 Source the env file
@@ -601,6 +651,7 @@ The default Production size in this case is more the suited for our purposes.
 :::
 
 #### Apply the olm
+
 ```tsx
 cpd-cli manage apply-olm \
 --release=${VERSION} \

diff --git a/static/scripts/Install-OCP-UPI/add_node.sh b/static/scripts/Install-OCP-UPI/add_node.sh
@@ -60,8 +60,8 @@ do
     worker_ignition_url="https://api-int.${cluster_name}.${base_domain}:22623/config/worker"
     certificate_authorities=$(jq '.ignition.security.tls.certificateAuthorities[].source' ${ocp_data_dir}/worker.ign | sed -e 's/\"//g')
 
-    echo "**** Running: " ${bin_dir}/create_worker_param.sh "${cloudformation_dir}" "${infra_id}" "${aws_rhcos_ami_id}" "${gpu_subnet}" "${gpu_securitygroup}" "${worker_ignition_url}" "${certificate_authorities}" "${worker_instance_type}"
-    ${bin_dir}/create_worker_param.sh "${cloudformation_dir}" "${infra_id}" "${aws_rhcos_ami_id}" "${gpu_subnet}" "${gpu_securitygroup}" "${worker_ignition_url}" "${certificate_authorities}" "${worker_instance_type}"
+    echo "**** Running: " ${bin_dir}/create_worker_param.sh "${cloudformation_dir}" "${infra_id}" "${aws_rhcos_ami_id}" "${gpu_subnet}" "${gpu_securitygroup}" "${worker_ignition_url}" "${certificate_authorities}" "${gpu_instance_type}"
+    ${bin_dir}/create_worker_param.sh "${cloudformation_dir}" "${infra_id}" "${aws_rhcos_ami_id}" "${gpu_subnet}" "${gpu_securitygroup}" "${worker_ignition_url}" "${certificate_authorities}" "${gpu_instance_type}"
     if [[ $? -ne 0 ]]
     then
         echo "ERROR: create_worker_param.sh did not complete successfully"

diff --git a/static/scripts/Install-OCP-UPI/cloudformation/worker-template.yaml b/static/scripts/Install-OCP-UPI/cloudformation/worker-template.yaml
@@ -30,6 +30,7 @@ Parameters:
     Default: m5.large
     Type: String
     AllowedValues:
+    - "p4d.24xlarge"
     - "m4.large"
     - "m4.xlarge"
     - "m4.2xlarge"
@@ -144,7 +145,7 @@ Resources:
       BlockDeviceMappings:
       - DeviceName: /dev/xvda
         Ebs:
-          VolumeSize: "300"
+          VolumeSize: 5300"
           VolumeType: "gp3"
           Encrypted: 'true'
       InstanceType: !Ref WorkerInstanceType

diff --git a/static/scripts/Install-OCP-UPI/config.sh b/static/scripts/Install-OCP-UPI/config.sh
@@ -42,4 +42,12 @@ master_1_subnet=$aws_private_subnets
 master_2_subnet=$aws_private_subnets
 worker_subnet_list=$aws_private_subnets  #Don't Change
 worker_count=6
-worker_instance_type="m6i.8xlarge"
+worker_instance_type="m6i.8xlarge"
+
+###
+# GPU Node
+###
+gpu_count=1
+gpu_subnet="" #US-EAST-2
+gpu_instance_type="p4d.24xlarge"
+gpu_securitygroup=""