From c5c070d048fc1d96e77928f3852e1d78d28ce7ba Mon Sep 17 00:00:00 2001 From: "Adam.Hayden" Date: Wed, 10 Apr 2024 14:48:29 -0500 Subject: [PATCH 01/13] Fixes to add_node and config.sh --- static/scripts/Install-OCP-UPI/add_node.sh | 4 ++-- static/scripts/Install-OCP-UPI/config.sh | 10 +++++++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/static/scripts/Install-OCP-UPI/add_node.sh b/static/scripts/Install-OCP-UPI/add_node.sh index d65cdfcd..7525d4b6 100644 --- a/static/scripts/Install-OCP-UPI/add_node.sh +++ b/static/scripts/Install-OCP-UPI/add_node.sh @@ -60,8 +60,8 @@ do worker_ignition_url="https://api-int.${cluster_name}.${base_domain}:22623/config/worker" certificate_authorities=$(jq '.ignition.security.tls.certificateAuthorities[].source' ${ocp_data_dir}/worker.ign | sed -e 's/\"//g') - echo "**** Running: " ${bin_dir}/create_worker_param.sh "${cloudformation_dir}" "${infra_id}" "${aws_rhcos_ami_id}" "${gpu_subnet}" "${gpu_securitygroup}" "${worker_ignition_url}" "${certificate_authorities}" "${worker_instance_type}" - ${bin_dir}/create_worker_param.sh "${cloudformation_dir}" "${infra_id}" "${aws_rhcos_ami_id}" "${gpu_subnet}" "${gpu_securitygroup}" "${worker_ignition_url}" "${certificate_authorities}" "${worker_instance_type}" + echo "**** Running: " ${bin_dir}/create_worker_param.sh "${cloudformation_dir}" "${infra_id}" "${aws_rhcos_ami_id}" "${gpu_subnet}" "${gpu_securitygroup}" "${worker_ignition_url}" "${certificate_authorities}" "${gpu_instance_type}" + ${bin_dir}/create_worker_param.sh "${cloudformation_dir}" "${infra_id}" "${aws_rhcos_ami_id}" "${gpu_subnet}" "${gpu_securitygroup}" "${worker_ignition_url}" "${certificate_authorities}" "${gpu_instance_type}" if [[ $? -ne 0 ]] then echo "ERROR: create_worker_param.sh did not complete successfully" diff --git a/static/scripts/Install-OCP-UPI/config.sh b/static/scripts/Install-OCP-UPI/config.sh index cdcb6282..05eceb50 100644 --- a/static/scripts/Install-OCP-UPI/config.sh +++ b/static/scripts/Install-OCP-UPI/config.sh @@ -42,4 +42,12 @@ master_1_subnet=$aws_private_subnets master_2_subnet=$aws_private_subnets worker_subnet_list=$aws_private_subnets #Don't Change worker_count=6 -worker_instance_type="m6i.8xlarge" \ No newline at end of file +worker_instance_type="m6i.8xlarge" + +### +# GPU Node +### +gpu_count=1 +gpu_subnet="" #US-EAST-2 +gpu_instance_type="p4d.24xlarge" +gpu_securitygroup="" \ No newline at end of file From 54453a1d20e705d8b05a1878ff7d495b6a4a7535 Mon Sep 17 00:00:00 2001 From: Ross Kramer Date: Wed, 3 Apr 2024 14:22:55 -0400 Subject: [PATCH 02/13] Adding the rest of the cartridge installs as well as added security constraints. --- docs/1-GettingStarted/1-Architecture.mdx | 2 +- docs/3-Installation/1-CP4D.mdx | 266 +++++++++++++++++++++-- 2 files changed, 252 insertions(+), 16 deletions(-) diff --git a/docs/1-GettingStarted/1-Architecture.mdx b/docs/1-GettingStarted/1-Architecture.mdx index ec9c86d4..b266c5b4 100644 --- a/docs/1-GettingStarted/1-Architecture.mdx +++ b/docs/1-GettingStarted/1-Architecture.mdx @@ -23,7 +23,7 @@ For now we recommend the following: - watsonx.ai - Watson Studio - Watson Machine Learning - - Watson Knowledge Catalog + - IBM Knowledge Catalog - Foundational Models - mixtral - llama2 diff --git a/docs/3-Installation/1-CP4D.mdx b/docs/3-Installation/1-CP4D.mdx index 065a0441..95e5538e 100644 --- a/docs/3-Installation/1-CP4D.mdx +++ b/docs/3-Installation/1-CP4D.mdx @@ -500,29 +500,85 @@ cpd-cli manage get-cpd-instance-details \ --get_admin_initial_credentials=true ``` +### Generate a cpd-cli Profile + +Log into the CP4D webui with the info retrieved from the `get-cpd-instance-details` and go to your Profile and settings page in the Cloud Pak for Data client and clicking Generate API key. + +In the upper right hand corner, click `API key` -> `Generate new key` + +Copy the generated key. + +Collect the web client URL and export it with the following command + +```tsx +export CPD_PROFILE_URL=$(oc get route cpd --namespace=${PROJECT_CPD_INST_OPERANDS} | tail -1 | awk '{print "https://"$2}') +``` + +We'll set our `profile-name` to the cluster name. + +Set the following vars: +```tsx +export API_KEY= +export CPD_ADMIN_USER=cpadmin +export LOCAL_USER= +export CPD_PROFILE_NAME=wxai +``` + +Create a local user configuration to store your username and API key by using the config users set command. + +```tsx +cpd-cli config users set ${LOCAL_USER} \ +--username ${CPD_ADMIN_USER} \ +--apikey ${API_KEY} +``` + +Create a profile to store the Cloud Pak for Data URL and to associate the profile with your local user configuration by using the config profiles set command. + +```tsx +cpd-cli config profiles set ${CPD_PROFILE_NAME} \ +--user ${LOCAL_USER} \ +--url ${CPD_PROFILE_URL} +``` + +You can now run cpd-cli commands with this profile as shown in the following example. + +```tsx +cpd-cli service-instance list \ +--profile=${CPD_PROFILE_NAME} +``` + + ## Installing our Cartridges -### Watson Machine Learning +Source the env file + +`source cpd_vars_48.sh` + +Login with cpd-cli +``` +cpd-cli manage login-to-ocp \ +--username=${OCP_USERNAME} \ +--password=${OCP_PASSWORD} \ +--server=${OCP_URL} +``` + +### Apply necessary Security Constraints + +The apply-db2-kubelet command makes the following changes to the cluster nodes: -Apply the olm ```tsx -cpd-cli manage apply-olm \ ---release=${VERSION} \ ---cpd_operator_ns=${PROJECT_CPD_INST_OPERATORS} \ ---components=wml +allowedUnsafeSysctls: + - "kernel.msg*" + - "kernel.shm*" + - "kernel.sem" ``` -Apply the CR ```tsx -cpd-cli manage apply-cr \ ---components=wml \ ---release=${VERSION} \ ---cpd_instance_ns=${PROJECT_CPD_INST_OPERANDS} \ ---block_storage_class=${STG_CLASS_BLOCK} \ ---file_storage_class=${STG_CLASS_FILE} \ ---license_acceptance=true +cpd-cli manage apply-db2-kubelet ``` +This might take a bit as the workers will be getting bounced. + ### Watsonx Assistant :::note @@ -540,7 +596,7 @@ watson_assistant_analytics_enabled: true The default Production size in this case is more the suited for our purposes. ::: -Apply the olm +#### Apply the olm ```tsx cpd-cli manage apply-olm \ --release=${VERSION} \ @@ -560,6 +616,71 @@ cpd-cli manage apply-cr \ --license_acceptance=true ``` +#### Validate the installation + +```tsx +cpd-cli manage get-cr-status \ +--cpd_instance_ns=${PROJECT_CPD_INST_OPERANDS} \ +--components=watson_assistant +``` + +#### Create an instance of WA + +https://www.ibm.com/docs/en/cloud-paks/cp-data/4.8.x?topic=csi-creating-service-instance-cpd-cli-service-instance-create-2 + +Set the INSTANCE_NAME environment variable to the unique name that you want to use as the display name for the service instance. We're just going to call this `wa-instance`. +```tsx +export INSTANCE_NAME="wa-instance" +``` + +Set the INSTANCE_VERSION env var to the version that corresponds to the version of CP4D. As of this writing and this guide, we are using 4.8.4. The Service instance version must match the release of CP4D. + +```tsx +export INSTANCE_VERSION=4.8.4 +``` + +Create the assistant-instance.json payload file: +```tsx +cat << EOF > ./assistant-instance.json +{ + "addon_type": "assistant", + "display_name": "${INSTANCE_NAME}", + "namespace": "${PROJECT_CPD_INST_OPERANDS}", + "addon_version": "${INSTANCE_VERSION}", + "create_arguments": { + "deployment_id": "${PROJECT_CPD_INST_OPERANDS}-wa", + "parameters": { + "serviceId": "assistant", + "url": "https://wa-store.${PROJECT_CPD_INST_OPERANDS}.svc.cluster.local:443/csb/v2/service_instances", + "watson": true + } + } +} +EOF +``` + +Set the PAYLOAD_FILE environment variable to the fully qualified name of the JSON payload file + +```tsx +export PAYLOAD_FILE=/path/to/whereever/this/file/is/assistant-instance.json +``` + +#### Create the service instance from the payload file: + +```tsx +cpd-cli service-instance create \ +--profile=${CPD_PROFILE_NAME} \ +--from-source=${PAYLOAD_FILE} +``` + +#### Validating that the service instance was created + +```tsx +cpd-cli service-instance status ${INSTANCE_NAME} \ +--profile=${CPD_PROFILE_NAME} \ +--output=json +``` + ### Watson Discovery Apply the olm @@ -581,4 +702,119 @@ cpd-cli manage apply-cr \ --license_acceptance=true ``` +Validate the installation + +```tsx +cpd-cli manage get-cr-status \ +--cpd_instance_ns=${PROJECT_CPD_INST_OPERANDS} \ +--components=watson_discovery +``` + + +### OpenPages + +Run the following command to create the required OLM objects for OpenPages in the operators project for the instance: + +```tsx +cpd-cli manage apply-olm \ +--release=${VERSION} \ +--cpd_operator_ns=${PROJECT_CPD_INST_OPERATORS} \ +--components=openpages +``` + +Create the custom resource for OpenPages + +```tsx +cpd-cli manage apply-cr \ +--components=openpages \ +--release=${VERSION} \ +--cpd_instance_ns=${PROJECT_CPD_INST_OPERANDS} \ +--license_acceptance=true +``` + +Validate the installation + +```tsx +cpd-cli manage get-cr-status \ +--cpd_instance_ns=${PROJECT_CPD_INST_OPERANDS} \ +--components=openpages +``` + +Create a service instance + +https://www.ibm.com/docs/en/cloud-paks/cp-data/4.8.x?topic=csi-creating-service-instance-cpd-cli-service-instance-create-14 + +### Watson Studio + +Run the following command to create the required OLM objects for Watson Studio in the operators project for the instance: +```tsx +cpd-cli manage apply-olm \ +--release=${VERSION} \ +--cpd_operator_ns=${PROJECT_CPD_INST_OPERATORS} \ +--components=ws +``` + +Create the custom resource + +```tsx +cpd-cli manage apply-cr \ +--components=ws \ +--release=${VERSION} \ +--cpd_instance_ns=${PROJECT_CPD_INST_OPERANDS} \ +--block_storage_class=${STG_CLASS_BLOCK} \ +--file_storage_class=${STG_CLASS_FILE} \ +--license_acceptance=true +``` + +Validate the installation + +```tsx +cpd-cli manage get-cr-status \ +--cpd_instance_ns=${PROJECT_CPD_INST_OPERANDS} \ +--components=ws +``` + +### Watson Machine Learning + +Apply the olm +```tsx +cpd-cli manage apply-olm \ +--release=${VERSION} \ +--cpd_operator_ns=${PROJECT_CPD_INST_OPERATORS} \ +--components=wml +``` + +Apply the CR +```tsx +cpd-cli manage apply-cr \ +--components=wml \ +--release=${VERSION} \ +--cpd_instance_ns=${PROJECT_CPD_INST_OPERANDS} \ +--block_storage_class=${STG_CLASS_BLOCK} \ +--file_storage_class=${STG_CLASS_FILE} \ +--license_acceptance=true +``` + +### IBM Knowledge Catalog + +Run the following command to create the required OLM objects for IBM Knowledge Catalog in the operators project for the instance: + +```tsx +cpd-cli manage apply-olm \ +--release=${VERSION} \ +--cpd_operator_ns=${PROJECT_CPD_INST_OPERATORS} \ +--components=wkc +``` + +We're using default options for this installation, so kick off the following CR +```tsx +cpd-cli manage apply-cr \ +--components=wkc \ +--release=${VERSION} \ +--cpd_instance_ns=${PROJECT_CPD_INST_OPERANDS} \ +--block_storage_class=${STG_CLASS_BLOCK} \ +--file_storage_class=${STG_CLASS_FILE} \ +--license_acceptance=true +``` +import { Profiler } from "react" From dc2244b00641daeae84c184830c758553eea5032 Mon Sep 17 00:00:00 2001 From: Joe Date: Wed, 3 Apr 2024 07:12:51 -0600 Subject: [PATCH 03/13] updated flight log --- flight-logs/2024-04-03-cocreate.mdx | 56 +++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 flight-logs/2024-04-03-cocreate.mdx diff --git a/flight-logs/2024-04-03-cocreate.mdx b/flight-logs/2024-04-03-cocreate.mdx new file mode 100644 index 00000000..9a55027b --- /dev/null +++ b/flight-logs/2024-04-03-cocreate.mdx @@ -0,0 +1,56 @@ +--- +title: Log 16 🛫 +description: Flight Log of Co-Creation Activities +slug: flight-log-16 +tags: [log] +--- + +## Objective +Deploy watsonx.ai on self-managed AWS infrastructure for customer software evaluation + +```mermaid +flowchart LR + A(Deploy bootnode) --> B(Deploy infrastructure) + B -->C(Deploy OCP) + subgraph "You are here" + D(Prepare CP4D & watsonx ai cartdridge) + end + C -->D + D -->E(Install CP4D) + E -->F(Deploy watsonx.ai) +``` + +## Milestones +1. Deploy and configuration of boot node to establish a beach-head into the customer AWS environment + - Complete +2. Deploy OCP using the documented UPI installation steps + - Complete +3. Install Cloud Pak for Data + - In Progress +4. Deploy and configure watsonx.ai on self-managed AWS infrastructure + +## Today's Accomplishments + + +### Summary +- Master nodes attemting to upgrade and are in a stuck state preventing rollback of ingress changes +- Attempting to deploy a new cluster + +## Decisions and Action Items (DAI) +- Software evaluation awaiting customer's approval process. This blocks our ability to download software from cp.icr.io + - Customer to escalate internally + +## Next Steps +- License and configure Cloud Pak for Data + - Cloud Pak Considerations + - Security scans needed on container images + - Customer requires on-prem, offline install + - Customer uses their own container registry that might introduce extra effort or compatability issues + - Version compatibility with OpenShift (e.g. 4.10 required and customer has 4.11) + - Supported storage not available + - Multiple cloudpaks on the same cluster + - custom connections to data sources not supported OOTB + - AWS-specific: IAM users required for install/deploy and are not allowed + - OpenShift specific: CoreOS requirement for control nodes + - Automatic updating of Cloud Pak, this can interrupt engagements (solution is to always remove update polling from operators) +- Deploy watsonx.ai \ No newline at end of file From 3eb8de262f286b82d1b10ba1b0c9ac97bcb95f63 Mon Sep 17 00:00:00 2001 From: Joe Date: Thu, 4 Apr 2024 10:44:03 -0600 Subject: [PATCH 04/13] updated flight logs --- flight-logs/2024-04-03-cocreate.mdx | 7 +-- flight-logs/2024-04-04-cocreate.mdx | 66 +++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 3 deletions(-) create mode 100644 flight-logs/2024-04-04-cocreate.mdx diff --git a/flight-logs/2024-04-03-cocreate.mdx b/flight-logs/2024-04-03-cocreate.mdx index 9a55027b..dec96dee 100644 --- a/flight-logs/2024-04-03-cocreate.mdx +++ b/flight-logs/2024-04-03-cocreate.mdx @@ -29,12 +29,13 @@ flowchart LR - In Progress 4. Deploy and configure watsonx.ai on self-managed AWS infrastructure -## Today's Accomplishments - ### Summary -- Master nodes attemting to upgrade and are in a stuck state preventing rollback of ingress changes +- Master nodes attempting to upgrade and are in a stuck state preventing rollback of ingress changes - Attempting to deploy a new cluster +- New cluster successfully deployed via latest script +- Deploying EFS +- Installing OpenShift Data Foundation & Standalone Multicloud Object Gateway ## Decisions and Action Items (DAI) - Software evaluation awaiting customer's approval process. This blocks our ability to download software from cp.icr.io diff --git a/flight-logs/2024-04-04-cocreate.mdx b/flight-logs/2024-04-04-cocreate.mdx new file mode 100644 index 00000000..fcea501a --- /dev/null +++ b/flight-logs/2024-04-04-cocreate.mdx @@ -0,0 +1,66 @@ +--- +title: Log 17 🛫 +description: Flight Log of Co-Creation Activities +slug: flight-log-17 +tags: [log] +--- + +## Objective +Deploy watsonx.ai on self-managed AWS infrastructure for customer software evaluation + +```mermaid +flowchart LR + A(Deploy bootnode) --> B(Deploy infrastructure) + B -->C(Deploy OCP) + subgraph "You are here" + D(Prepare CP4D & watsonx ai cartdridge) + end + C -->D + D -->E(Install CP4D) + E -->F(Deploy watsonx.ai) +``` + + +## Milestones +1. Deploy and configuration of boot node to establish a beach-head into the customer AWS environment + - Complete +2. Deploy OCP using the documented UPI installation steps + - Complete +3. Install Cloud Pak for Data + - In Progress +4. Deploy and configure watsonx.ai on self-managed AWS infrastructure + +## Today's Acomplishments +- Kublet configuration update applied +- Deployed instance of Multicloud Object Gateway + +### Summary +- Reinstalled NFS provisioner (Helm) +- Installed OpenShift Data Foundation Operator +- Deployed MultiCloud Object Gateway +- OpenShift portal is active and cluster appears healthy +- Configuring CP4D CLI +- Awaiting for final cluster nodes to update through machine config pool + +## Decisions and Action Items (DAI) +- Software evaluation awaiting customer's approval process. This blocks our ability to download software from cp.icr.io + - Customer to escalate internally + +## Lesons Learned +- Preparation for Cloud Pak for Data on OpenShift sizing needed to be adjusted to reflect an under-provisioning of CPU resources + - This was resolved by.. + +## Next Steps +- License and configure Cloud Pak for Data + - Cloud Pak Considerations + - Security scans needed on container images + - Customer requires on-prem, offline install + - Customer uses their own container registry that might introduce extra effort or compatability issues + - Version compatibility with OpenShift (e.g. 4.10 required and customer has 4.11) + - Supported storage not available + - Multiple cloudpaks on the same cluster + - custom connections to data sources not supported OOTB + - AWS-specific: IAM users required for install/deploy and are not allowed + - OpenShift specific: CoreOS requirement for control nodes + - Automatic updating of Cloud Pak, this can interrupt engagements (solution is to always remove update polling from operators) +- Deploy watsonx.ai \ No newline at end of file From 1f18ca7d3b599c6f70d9468c5c2458b0142bcecf Mon Sep 17 00:00:00 2001 From: Joe Date: Fri, 5 Apr 2024 10:09:16 -0600 Subject: [PATCH 05/13] updated flight logs --- flight-logs/2024-04-05-cocreate.mdx | 63 +++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 flight-logs/2024-04-05-cocreate.mdx diff --git a/flight-logs/2024-04-05-cocreate.mdx b/flight-logs/2024-04-05-cocreate.mdx new file mode 100644 index 00000000..60117045 --- /dev/null +++ b/flight-logs/2024-04-05-cocreate.mdx @@ -0,0 +1,63 @@ +--- +title: Log 18 🛫 +description: Flight Log of Co-Creation Activities +slug: flight-log-18 +tags: [log] +--- + +## Objective +Deploy watsonx.ai on self-managed AWS infrastructure for customer software evaluation + +```mermaid +flowchart LR + A(Deploy bootnode) --> B(Deploy infrastructure) + B -->C(Deploy OCP) + subgraph "You are here" + D(Prepare CP4D & watsonx ai cartdridge) + end + C -->D + D -->E(Install CP4D) + E -->F(Deploy watsonx.ai) +``` + + +## Milestones +1. Deploy and configuration of boot node to establish a beach-head into the customer AWS environment + - Complete +2. Deploy OCP using the documented UPI installation steps + - Complete +3. Install Cloud Pak for Data + - In Progress +4. Deploy and configure watsonx.ai on self-managed AWS infrastructure + +## Today's Acomplishments +- CP4D Final Preparations + - Added options to the CPD VARS file + - Recreation of work dir + +### Summary +- Trobuleshooting the CP4D CLI +- Awaiting entitlement key approval + +## Decisions and Action Items (DAI) +- Software evaluation awaiting customer's approval process. This blocks our ability to download software from cp.icr.io + - Customer to provide by EOD Monday + +## Lesons Learned +- Preparation for Cloud Pak for Data on OpenShift sizing needed to be adjusted to reflect an under-provisioning of CPU resources + - This was resolved by.. + +## Next Steps +- License and configure Cloud Pak for Data + - Cloud Pak Considerations + - Security scans needed on container images + - Customer requires on-prem, offline install + - Customer uses their own container registry that might introduce extra effort or compatability issues + - Version compatibility with OpenShift (e.g. 4.10 required and customer has 4.11) + - Supported storage not available + - Multiple cloudpaks on the same cluster + - custom connections to data sources not supported OOTB + - AWS-specific: IAM users required for install/deploy and are not allowed + - OpenShift specific: CoreOS requirement for control nodes + - Automatic updating of Cloud Pak, this can interrupt engagements (solution is to always remove update polling from operators) +- Deploy watsonx.ai \ No newline at end of file From cefb209a6541193d49887082fb857333a944363d Mon Sep 17 00:00:00 2001 From: Ross Kramer Date: Fri, 5 Apr 2024 16:38:56 -0400 Subject: [PATCH 06/13] More updates to CP4D installation docs --- docs/3-Installation/1-CP4D.mdx | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/docs/3-Installation/1-CP4D.mdx b/docs/3-Installation/1-CP4D.mdx index 95e5538e..b637959d 100644 --- a/docs/3-Installation/1-CP4D.mdx +++ b/docs/3-Installation/1-CP4D.mdx @@ -269,8 +269,12 @@ export IBM_ENTITLEMENT_KEY= # export PRIVATE_REGISTRY_PULL_USER= # export PRIVATE_REGISTRY_PULL_PASSWORD= - - +# ------------------------------------------------------------------------------ +# Extra stuff +# ------------------------------------------------------------------------------ + +export NOOBAA_ACCOUNT_CREDENTIALS_SECRET=noobaa-admin +export NOOBAA_ACCOUNT_CERTIFICATE_SECRET=noobaa-s3-serving-cert # ------------------------------------------------------------------------------ # Cloud Pak for Data version @@ -633,6 +637,16 @@ Set the INSTANCE_NAME environment variable to the unique name that you want to u export INSTANCE_NAME="wa-instance" ``` +:::note +You should have followed the steps [here](/Installation/CP4D#generate-a-cpd-cli-profile) to generate a profile. + +The example profile we created we called `wxai`. +::: + +```tsx +export CPD_PROFILE_NAME="wxai" +``` + Set the INSTANCE_VERSION env var to the version that corresponds to the version of CP4D. As of this writing and this guide, we are using 4.8.4. The Service instance version must match the release of CP4D. ```tsx @@ -683,7 +697,7 @@ cpd-cli service-instance status ${INSTANCE_NAME} \ ### Watson Discovery -Apply the olm +#### Apply the olm ```tsx cpd-cli manage apply-olm \ --release=${VERSION} \ @@ -702,7 +716,7 @@ cpd-cli manage apply-cr \ --license_acceptance=true ``` -Validate the installation +#### Validate the installation ```tsx cpd-cli manage get-cr-status \ @@ -710,6 +724,9 @@ cpd-cli manage get-cr-status \ --components=watson_discovery ``` +#### Create an instance of WD + +_TBD_ ### OpenPages From 303464d215271c3e5781fa3919d8dfc792e6236a Mon Sep 17 00:00:00 2001 From: "Adam.Hayden" Date: Wed, 10 Apr 2024 14:51:57 -0500 Subject: [PATCH 07/13] Add documentation for change worker node primary disk size. --- docs/2-Deployment/2-UPI_Install.mdx | 95 ++++++++++++++++++++++++++++- 1 file changed, 94 insertions(+), 1 deletion(-) diff --git a/docs/2-Deployment/2-UPI_Install.mdx b/docs/2-Deployment/2-UPI_Install.mdx index 409c9f67..bf9b49d0 100644 --- a/docs/2-Deployment/2-UPI_Install.mdx +++ b/docs/2-Deployment/2-UPI_Install.mdx @@ -604,4 +604,97 @@ Add the following under 'spec:' namespace: openshift-authentication servingCertKeyPairSecret: name: custom-cert -``` \ No newline at end of file +``` + +## Increase Primary Disk size on worker nodes: + +1) Run the following bash one-liner to increase the primary disk on all worker nodes to 500GB: + +``` +aws ec2 describe-instances --query 'Reservations[*].Instances[*].[InstanceId,Tags[?Key==`Name`].Value|[0],BlockDeviceMappings[0].Ebs.VolumeId]' --output text | grep worker | awk '{print $3}' | while read volume_id; do aws ec2 modify-volume --volume-id $volume_id --size 500; done +``` + +2) Log into the node with following command: + +``` +oc debug node/ +``` + +3) Once in the node, run the following: + +``` +chroot /host +``` + +then: + +``` +sudo lsblk +``` + +The output should look like this: + +``` +# sudo lsblk +NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINTS +nvme1n1 259:0 0 1T 0 disk +nvme0n1 259:1 0 500G 0 disk +|-nvme0n1p1 259:2 0 1M 0 part +|-nvme0n1p2 259:3 0 127M 0 part +|-nvme0n1p3 259:4 0 384M 0 part /boot +`-nvme0n1p4 259:5 0 239.5G 0 part /var/lib/kubelet/pods/555d6f90-41fd-49d2-8aad-fa7293a924e4/volume-subpaths/app-config-override/wd-discovery-cnm-api/2 + /var/lib/kubelet/pods/57d28f73-d355-4092-8e52-6b6aeec28bd5/volume-subpaths/clouseau-config/search/4 + /var/lib/kubelet/pods/fcb4b5ec-ea1a-42c7-908c-a027cf885ca1/volume-subpaths/db2wh-cm/zen-database-core/1 + /var/lib/kubelet/pods/fcb4b5ec-ea1a-42c7-908c-a027cf885ca1/volume-subpaths/db2oltp-cm/zen-database-core/0 + /var/lib/kubelet/pods/5e35fe3f-7e4d-4729-b3dd-b9553ffd73f6/volume-subpaths/nginx-conf/monitoring-plugin/1 + /var + /sysroot/ostree/deploy/rhcos/var + /sysroot + /usr + /etc + / + +``` + +4) Find the part on the disk that you wish to increase, in my case it was 'nvme0n1p4'. + +Now we extend the partition, by targeting the disk (Example: /dev/nvme0n1) and the partition (Example: 4) + +``` +sudo growpart /dev/nvme0n1 4 +``` + +5) Check the disk sizes again: + +``` +sudo lsblk +``` + +This is what my output looks like now: + +``` +NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINTS +nvme1n1 259:0 0 1T 0 disk +nvme0n1 259:1 0 500G 0 disk +|-nvme0n1p1 259:2 0 1M 0 part +|-nvme0n1p2 259:3 0 127M 0 part +|-nvme0n1p3 259:4 0 384M 0 part /boot +`-nvme0n1p4 259:5 0 499.5G 0 part /var/lib/kubelet/pods/555d6f90-41fd-49d2-8aad-fa7293a924e4/volume-subpaths/app-config-override/wd-discovery-cnm-api/2 + /var/lib/kubelet/pods/57d28f73-d355-4092-8e52-6b6aeec28bd5/volume-subpaths/clouseau-config/search/4 + /var/lib/kubelet/pods/fcb4b5ec-ea1a-42c7-908c-a027cf885ca1/volume-subpaths/db2wh-cm/zen-database-core/1 + /var/lib/kubelet/pods/fcb4b5ec-ea1a-42c7-908c-a027cf885ca1/volume-subpaths/db2oltp-cm/zen-database-core/0 + /var/lib/kubelet/pods/5e35fe3f-7e4d-4729-b3dd-b9553ffd73f6/volume-subpaths/nginx-conf/monitoring-plugin/1 + /var + /sysroot/ostree/deploy/rhcos/var + /sysroot + /usr + /etc + / +``` + +6) Last step is to extend the file system: + +``` +sudo xfs_growfs -d / +``` + From fa1658f3d66e9d52e3c671076cd8e37463b7b81f Mon Sep 17 00:00:00 2001 From: "Adam.Hayden" Date: Wed, 10 Apr 2024 14:56:38 -0500 Subject: [PATCH 08/13] Updated worker-template.yaml --- .../scripts/Install-OCP-UPI/cloudformation/worker-template.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/static/scripts/Install-OCP-UPI/cloudformation/worker-template.yaml b/static/scripts/Install-OCP-UPI/cloudformation/worker-template.yaml index b24f090b..f0ee0418 100644 --- a/static/scripts/Install-OCP-UPI/cloudformation/worker-template.yaml +++ b/static/scripts/Install-OCP-UPI/cloudformation/worker-template.yaml @@ -30,6 +30,7 @@ Parameters: Default: m5.large Type: String AllowedValues: + - "p4d.24xlarge" - "m4.large" - "m4.xlarge" - "m4.2xlarge" From 51fd344089e0bc677f4eccb28b46a151cd876597 Mon Sep 17 00:00:00 2001 From: "Adam.Hayden" Date: Wed, 10 Apr 2024 14:58:27 -0500 Subject: [PATCH 09/13] . --- .../scripts/Install-OCP-UPI/cloudformation/worker-template.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/static/scripts/Install-OCP-UPI/cloudformation/worker-template.yaml b/static/scripts/Install-OCP-UPI/cloudformation/worker-template.yaml index f0ee0418..8770a472 100644 --- a/static/scripts/Install-OCP-UPI/cloudformation/worker-template.yaml +++ b/static/scripts/Install-OCP-UPI/cloudformation/worker-template.yaml @@ -145,7 +145,7 @@ Resources: BlockDeviceMappings: - DeviceName: /dev/xvda Ebs: - VolumeSize: "300" + VolumeSize: 5300" VolumeType: "gp3" Encrypted: 'true' InstanceType: !Ref WorkerInstanceType From 390f00ed203b518cb629134e8ac0cc7ffb1086a0 Mon Sep 17 00:00:00 2001 From: Ross Kramer Date: Wed, 3 Apr 2024 14:22:55 -0400 Subject: [PATCH 10/13] Adding the rest of the cartridge installs as well as added security constraints. --- docs/3-Installation/1-CP4D.mdx | 64 ++++++++++++++++++++++++++-------- 1 file changed, 50 insertions(+), 14 deletions(-) diff --git a/docs/3-Installation/1-CP4D.mdx b/docs/3-Installation/1-CP4D.mdx index b637959d..8213c2bf 100644 --- a/docs/3-Installation/1-CP4D.mdx +++ b/docs/3-Installation/1-CP4D.mdx @@ -552,6 +552,54 @@ cpd-cli service-instance list \ ``` +### Generate a cpd-cli Profile + +Log into the CP4D webui with the info retrieved from the `get-cpd-instance-details` and go to your Profile and settings page in the Cloud Pak for Data client and clicking Generate API key. + +In the upper right hand corner, click `API key` -> `Generate new key` + +Copy the generated key. + +Collect the web client URL and export it with the following command + +```tsx +export CPD_PROFILE_URL=$(oc get route cpd --namespace=${PROJECT_CPD_INST_OPERANDS} | tail -1 | awk '{print "https://"$2}') +``` + +We'll set our `profile-name` to the cluster name. + +Set the following vars: +```tsx +export API_KEY= +export CPD_ADMIN_USER=cpadmin +export LOCAL_USER= +export CPD_PROFILE_NAME=wxai +``` + +Create a local user configuration to store your username and API key by using the config users set command. + +```tsx +cpd-cli config users set ${LOCAL_USER} \ +--username ${CPD_ADMIN_USER} \ +--apikey ${API_KEY} +``` + +Create a profile to store the Cloud Pak for Data URL and to associate the profile with your local user configuration by using the config profiles set command. + +```tsx +cpd-cli config profiles set ${CPD_PROFILE_NAME} \ +--user ${LOCAL_USER} \ +--url ${CPD_PROFILE_URL} +``` + +You can now run cpd-cli commands with this profile as shown in the following example. + +```tsx +cpd-cli service-instance list \ +--profile=${CPD_PROFILE_NAME} +``` + + ## Installing our Cartridges Source the env file @@ -600,6 +648,7 @@ watson_assistant_analytics_enabled: true The default Production size in this case is more the suited for our purposes. ::: +#### Apply the olm #### Apply the olm ```tsx cpd-cli manage apply-olm \ @@ -637,16 +686,6 @@ Set the INSTANCE_NAME environment variable to the unique name that you want to u export INSTANCE_NAME="wa-instance" ``` -:::note -You should have followed the steps [here](/Installation/CP4D#generate-a-cpd-cli-profile) to generate a profile. - -The example profile we created we called `wxai`. -::: - -```tsx -export CPD_PROFILE_NAME="wxai" -``` - Set the INSTANCE_VERSION env var to the version that corresponds to the version of CP4D. As of this writing and this guide, we are using 4.8.4. The Service instance version must match the release of CP4D. ```tsx @@ -716,7 +755,7 @@ cpd-cli manage apply-cr \ --license_acceptance=true ``` -#### Validate the installation +Validate the installation ```tsx cpd-cli manage get-cr-status \ @@ -724,9 +763,6 @@ cpd-cli manage get-cr-status \ --components=watson_discovery ``` -#### Create an instance of WD - -_TBD_ ### OpenPages From 8ae7814ef6ce0bdf371a5d005692fe62cb4e8d82 Mon Sep 17 00:00:00 2001 From: Joe Date: Wed, 3 Apr 2024 07:12:51 -0600 Subject: [PATCH 11/13] updated flight log --- flight-logs/2024-04-03-cocreate.mdx | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/flight-logs/2024-04-03-cocreate.mdx b/flight-logs/2024-04-03-cocreate.mdx index dec96dee..9a55027b 100644 --- a/flight-logs/2024-04-03-cocreate.mdx +++ b/flight-logs/2024-04-03-cocreate.mdx @@ -29,13 +29,12 @@ flowchart LR - In Progress 4. Deploy and configure watsonx.ai on self-managed AWS infrastructure +## Today's Accomplishments + ### Summary -- Master nodes attempting to upgrade and are in a stuck state preventing rollback of ingress changes +- Master nodes attemting to upgrade and are in a stuck state preventing rollback of ingress changes - Attempting to deploy a new cluster -- New cluster successfully deployed via latest script -- Deploying EFS -- Installing OpenShift Data Foundation & Standalone Multicloud Object Gateway ## Decisions and Action Items (DAI) - Software evaluation awaiting customer's approval process. This blocks our ability to download software from cp.icr.io From aa3129402bf3702a93ee3edf33b0ee327a342616 Mon Sep 17 00:00:00 2001 From: Joe Date: Thu, 4 Apr 2024 10:44:03 -0600 Subject: [PATCH 12/13] updated flight logs --- flight-logs/2024-04-03-cocreate.mdx | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/flight-logs/2024-04-03-cocreate.mdx b/flight-logs/2024-04-03-cocreate.mdx index 9a55027b..dec96dee 100644 --- a/flight-logs/2024-04-03-cocreate.mdx +++ b/flight-logs/2024-04-03-cocreate.mdx @@ -29,12 +29,13 @@ flowchart LR - In Progress 4. Deploy and configure watsonx.ai on self-managed AWS infrastructure -## Today's Accomplishments - ### Summary -- Master nodes attemting to upgrade and are in a stuck state preventing rollback of ingress changes +- Master nodes attempting to upgrade and are in a stuck state preventing rollback of ingress changes - Attempting to deploy a new cluster +- New cluster successfully deployed via latest script +- Deploying EFS +- Installing OpenShift Data Foundation & Standalone Multicloud Object Gateway ## Decisions and Action Items (DAI) - Software evaluation awaiting customer's approval process. This blocks our ability to download software from cp.icr.io From 1d1f8c5b08ece92de902218a1c14d0bcb48cbc46 Mon Sep 17 00:00:00 2001 From: Ross Kramer Date: Fri, 5 Apr 2024 16:38:56 -0400 Subject: [PATCH 13/13] More updates to CP4D installation docs --- docs/3-Installation/1-CP4D.mdx | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/docs/3-Installation/1-CP4D.mdx b/docs/3-Installation/1-CP4D.mdx index 8213c2bf..a5afc839 100644 --- a/docs/3-Installation/1-CP4D.mdx +++ b/docs/3-Installation/1-CP4D.mdx @@ -686,6 +686,16 @@ Set the INSTANCE_NAME environment variable to the unique name that you want to u export INSTANCE_NAME="wa-instance" ``` +:::note +You should have followed the steps [here](/Installation/CP4D#generate-a-cpd-cli-profile) to generate a profile. + +The example profile we created we called `wxai`. +::: + +```tsx +export CPD_PROFILE_NAME="wxai" +``` + Set the INSTANCE_VERSION env var to the version that corresponds to the version of CP4D. As of this writing and this guide, we are using 4.8.4. The Service instance version must match the release of CP4D. ```tsx @@ -755,7 +765,7 @@ cpd-cli manage apply-cr \ --license_acceptance=true ``` -Validate the installation +#### Validate the installation ```tsx cpd-cli manage get-cr-status \ @@ -763,6 +773,9 @@ cpd-cli manage get-cr-status \ --components=watson_discovery ``` +#### Create an instance of WD + +_TBD_ ### OpenPages