diff --git a/frontend/src/api/k8s/inferenceServices.ts b/frontend/src/api/k8s/inferenceServices.ts index 0e64b7656a..735413c3b6 100644 --- a/frontend/src/api/k8s/inferenceServices.ts +++ b/frontend/src/api/k8s/inferenceServices.ts @@ -24,6 +24,7 @@ export const assembleInferenceService = ( isModelMesh?: boolean, inferenceService?: InferenceServiceKind, acceleratorState?: AcceleratorProfileState, + isStorageNeeded?: boolean, ): InferenceServiceKind => { const { storage, @@ -155,6 +156,11 @@ export const assembleInferenceService = ( }; } + // If storage is not needed, remove storage from the inference service + if (isStorageNeeded !== undefined && !isStorageNeeded) { + delete updateInferenceService.spec.predictor.model?.storage; + } + return updateInferenceService; }; @@ -226,6 +232,7 @@ export const createInferenceService = ( isModelMesh?: boolean, acceleratorState?: AcceleratorProfileState, dryRun = false, + isStorageNeeded?: boolean, ): Promise => { const inferenceService = assembleInferenceService( data, @@ -234,6 +241,7 @@ export const createInferenceService = ( isModelMesh, undefined, acceleratorState, + isStorageNeeded, ); return k8sCreateResource( applyK8sAPIOptions( @@ -253,6 +261,7 @@ export const updateInferenceService = ( isModelMesh?: boolean, acceleratorState?: AcceleratorProfileState, dryRun = false, + isStorageNeeded?: boolean, ): Promise => { const inferenceService = assembleInferenceService( data, @@ -261,6 +270,7 @@ export const updateInferenceService = ( isModelMesh, existingData, acceleratorState, + isStorageNeeded, ); return k8sUpdateResource( diff --git a/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/DeployNIMServiceModal.tsx b/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/DeployNIMServiceModal.tsx index d8ed0ad886..9f91ffc759 100644 --- a/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/DeployNIMServiceModal.tsx +++ b/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/DeployNIMServiceModal.tsx @@ -9,6 +9,8 @@ import { } from '@patternfly/react-core'; import { EitherOrNone } from '@openshift/dynamic-plugin-sdk'; import { + createNIMPVC, + createNIMSecret, getSubmitInferenceServiceResourceFn, getSubmitServingRuntimeResourcesFn, useCreateInferenceServiceObject, @@ -37,6 +39,11 @@ import { useAccessReview } from '~/api'; import { SupportedArea, useIsAreaAvailable } from '~/concepts/areas'; import KServeAutoscalerReplicaSection from '~/pages/modelServing/screens/projects/kServeModal/KServeAutoscalerReplicaSection'; +const NIM_SECRET_NAME = 'nvidia-nim-secrets'; +const NIM_NGC_SECRET_NAME = 'ngc-secret'; +const NIM_PVC_NAME = 'nim-pvc'; +const NIM_PVC_SIZE = '50Gi'; + const accessReviewResource: AccessReviewResourceAttributes = { group: 'rbac.authorization.k8s.io', resource: 'rolebindings', @@ -107,7 +114,8 @@ const DeployNIMServiceModal: React.FC = ({ }, [currentProjectName, setCreateDataInferenceService, isOpen]); // Serving Runtime Validation - const isDisabledServingRuntime = namespace === '' || actionInProgress; + const isDisabledServingRuntime = + namespace === '' || actionInProgress || createDataServingRuntime.imageName === undefined; const baseInputValueValid = createDataServingRuntime.numReplicas >= 0 && @@ -178,9 +186,13 @@ const DeployNIMServiceModal: React.FC = ({ acceleratorProfileState, allowCreate, editInfo?.secrets, + false, ); Promise.all([ + createNIMSecret(namespace, NIM_SECRET_NAME, false, false), + createNIMSecret(namespace, NIM_NGC_SECRET_NAME, true, false), + createNIMPVC(namespace, NIM_PVC_NAME, NIM_PVC_SIZE, false), submitServingRuntimeResources({ dryRun: true }), submitInferenceServiceResource({ dryRun: true }), ]) diff --git a/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/NIMModelListSection.tsx b/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/NIMModelListSection.tsx index 8b19e8d322..605db92fa2 100644 --- a/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/NIMModelListSection.tsx +++ b/frontend/src/pages/modelServing/screens/projects/NIMServiceModal/NIMModelListSection.tsx @@ -7,7 +7,7 @@ import { CreatingServingRuntimeObject, } from '~/pages/modelServing/screens/types'; import SimpleDropdownSelect from '~/components/SimpleDropdownSelect'; -import { fetchNIMModelNames } from '~/pages/modelServing/screens/projects/utils'; +import { fetchNIMModelNames, ModelInfo } from '~/pages/modelServing/screens/projects/utils'; type NIMModelListSectionProps = { inferenceServiceData: CreatingInferenceServiceObject; @@ -23,6 +23,7 @@ const NIMModelListSection: React.FC = ({ isEditing, }) => { const [options, setOptions] = useState<{ key: string; label: string }[]>([]); + const [modelList, setModelList] = useState([]); useEffect(() => { const getModelNames = async () => { @@ -32,6 +33,7 @@ const NIMModelListSection: React.FC = ({ key: modelInfo.name, label: `${modelInfo.displayName} - ${modelInfo.latestTag}`, })); + setModelList(modelInfos); setOptions(fetchedOptions); } }; @@ -39,20 +41,20 @@ const NIMModelListSection: React.FC = ({ }, []); const getSupportedModelFormatsInfo = (name: string) => { - const modelInfo = options.find((option) => option.key === name); + const modelInfo = modelList.find((model) => model.name === name); if (modelInfo) { return { - name: modelInfo.key, - version: modelInfo.label.split(' - ')[1], + name: modelInfo.name, + version: modelInfo.latestTag, }; } return { name: '', version: '' }; }; const getNIMImageName = (name: string) => { - const imageInfo = options.find((option) => option.key === name); + const imageInfo = modelList.find((model) => model.name === name); if (imageInfo) { - return `nvcr.io/nim/meta/${name}:${imageInfo.label.split(' - ')[1]}`; + return `nvcr.io/${imageInfo.namespace}/${name}:${imageInfo.latestTag}`; } return ''; }; diff --git a/frontend/src/pages/modelServing/screens/projects/utils.ts b/frontend/src/pages/modelServing/screens/projects/utils.ts index 59600b8369..46b9bbe9ea 100644 --- a/frontend/src/pages/modelServing/screens/projects/utils.ts +++ b/frontend/src/pages/modelServing/screens/projects/utils.ts @@ -6,6 +6,7 @@ import { ProjectKind, SecretKind, ServingRuntimeKind, + PersistentVolumeClaimKind, } from '~/k8sTypes'; import { DataConnection, @@ -44,12 +45,16 @@ import { getConfigMap, updateInferenceService, updateServingRuntime, + getSecret, + createPvc, } from '~/api'; import { isDataConnectionAWS } from '~/pages/projects/screens/detail/data-connections/utils'; import { removeLeadingSlash } from '~/utilities/string'; -const NAMESPACE = 'redhat-ods-applications'; -const CONFIGMAP = 'nvidia-nim-images-data'; +const NIM_NAMESPACE = 'redhat-ods-applications'; +const NIM_CONFIGMAP_NAME = 'nvidia-nim-images-data'; +const NIM_SECRET_NAME = 'nvidia-nim-access'; +const NIM_NGC_SECRET_NAME = 'nvidia-nim-image-pull'; export const getServingRuntimeSizes = (config: DashboardConfigKind): ModelServingSize[] => { let sizes = config.spec.modelServerSizes || []; @@ -322,6 +327,7 @@ const createInferenceServiceAndDataConnection = ( isModelMesh?: boolean, acceleratorProfileState?: AcceleratorProfileState, dryRun = false, + isStorageNeeded?: boolean, ) => { if (!existingStorage) { return createAWSSecret(inferenceServiceData, dryRun).then((secret) => @@ -333,6 +339,7 @@ const createInferenceServiceAndDataConnection = ( isModelMesh, acceleratorProfileState, dryRun, + isStorageNeeded, ) : createInferenceService( inferenceServiceData, @@ -340,6 +347,7 @@ const createInferenceServiceAndDataConnection = ( isModelMesh, acceleratorProfileState, dryRun, + isStorageNeeded, ), ); } @@ -351,6 +359,7 @@ const createInferenceServiceAndDataConnection = ( isModelMesh, acceleratorProfileState, dryRun, + isStorageNeeded, ) : createInferenceService( inferenceServiceData, @@ -358,6 +367,7 @@ const createInferenceServiceAndDataConnection = ( isModelMesh, acceleratorProfileState, dryRun, + isStorageNeeded, ); }; @@ -369,6 +379,7 @@ export const getSubmitInferenceServiceResourceFn = ( acceleratorProfileState?: AcceleratorProfileState, allowCreate?: boolean, secrets?: SecretKind[], + isStorageNeeded?: boolean, ): ((opts: { dryRun?: boolean }) => Promise) => { const inferenceServiceData = { ...createData, @@ -397,6 +408,7 @@ export const getSubmitInferenceServiceResourceFn = ( isModelMesh, acceleratorProfileState, dryRun, + isStorageNeeded, ).then((inferenceService) => setUpTokenAuth( createData, @@ -557,7 +569,7 @@ export interface ModelInfo { } export const fetchNIMModelNames = async (): Promise => { - const configMap = await getConfigMap(NAMESPACE, CONFIGMAP); + const configMap = await getConfigMap(NIM_NAMESPACE, NIM_CONFIGMAP_NAME); if (configMap.data) { const modelInfos: ModelInfo[] = Object.entries(configMap.data).map(([key, value]) => { const modelData = JSON.parse(value); // Parse the JSON string @@ -575,3 +587,61 @@ export const fetchNIMModelNames = async (): Promise => } return undefined; }; + +export const createNIMSecret = async ( + projectName: string, + secretName: string, + isNGC: boolean, + dryRun: boolean, +): Promise => { + const labels: Record = { + [KnownLabels.DASHBOARD_RESOURCE]: 'true', + }; + const data: Record = {}; + const newSecret = { + apiVersion: 'v1', + kind: 'Secret', + metadata: { + name: secretName, + namespace: projectName, + labels, + }, + data, + type: isNGC ? 'kubernetes.io/dockerconfigjson' : 'Opaque', + }; + const nimSecretData: SecretKind = isNGC + ? await getSecret(NIM_NAMESPACE, NIM_NGC_SECRET_NAME) + : await getSecret(NIM_NAMESPACE, NIM_SECRET_NAME); + + if (nimSecretData.data) { + if (!isNGC) { + data.NGC_API_KEY = nimSecretData.data.api_key; + } else { + data['.dockerconfigjson'] = nimSecretData.data['.dockerconfigjson']; + } + return createSecret(newSecret, { dryRun }); + } + + return Promise.reject(new Error(`Error creating NIM ${isNGC ? 'NGC' : null} secret`)); +}; + +export const createNIMPVC = ( + projectName: string, + pvcName: string, + pvcSize: string, + dryRun: boolean, +): Promise => + createPvc( + { + nameDesc: { + name: pvcName, + description: '', + }, + size: pvcSize, + }, + projectName, + undefined, + { + dryRun, + }, + );