From 2a8d39ec68affe508008eb2e3c91abe52a198c18 Mon Sep 17 00:00:00 2001
From: Googler <nobody@google.com>
Date: Thu, 18 Apr 2024 10:35:50 -0700
Subject: [PATCH] feat(components): add resolve_machine_spec and
 resolve_refined_image_uri to rlhf_preprocessor component

PiperOrigin-RevId: 626080295
---
 .../llm/generated/refined_image_versions.py   |  2 +-
 .../llm/reinforcement_learning_graph.py       | 29 ++++++++-------
 .../_implementation/llm/reward_model_graph.py | 29 ++++++++-------
 .../_implementation/llm/rlhf_preprocessor.py  | 37 +++++++++++++++++++
 .../preview/llm/rlhf/component.py             | 29 ++++++++++++++-
 5 files changed, 95 insertions(+), 31 deletions(-)

diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/generated/refined_image_versions.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/generated/refined_image_versions.py
index 3e66fa4789a..6df1693d9ad 100644
--- a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/generated/refined_image_versions.py
+++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/generated/refined_image_versions.py
@@ -17,4 +17,4 @@
 DO NOT EDIT - This file is generated, manual changes will be overridden.
 """
 
-IMAGE_TAG = '20240414_0507'
+IMAGE_TAG = '20240417_0507_RC00'
diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/reinforcement_learning_graph.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/reinforcement_learning_graph.py
index 3b56dd64288..f9e07e823de 100644
--- a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/reinforcement_learning_graph.py
+++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/reinforcement_learning_graph.py
@@ -44,6 +44,11 @@ def pipeline(
     reward_model_reference: str,
     policy_model_reference: str,
     policy_model_path: str,
+    machine_type: str,
+    tuning_location: str,
+    accelerator_type: str,
+    accelerator_count: int,
+    rl_image_uri: str,
     prompt_sequence_length: int = 512,
     target_sequence_length: int = 64,
     lora_dim: int = 1,
@@ -54,7 +59,6 @@ def pipeline(
     kl_coeff: float = 0.1,
     instruction: Optional[str] = None,
     project: str = _placeholders.PROJECT_ID_PLACEHOLDER,
-    accelerator_type: str = 'GPU',
     location: str = _placeholders.LOCATION_PLACEHOLDER,
     tensorboard_resource_id: str = '',
     encryption_spec_key_name: str = '',
@@ -70,6 +74,11 @@ def pipeline(
     reward_model_reference: Name of the reward model. The name should be in capitalized snake case format.
     policy_model_reference: Name of the policy model. The name should be in capitalized snake case format.
     policy_model_path: The model checkpoint path to the reinforcer model.
+    machine_type: The type of the machine to provision for the custom job. Must be a valid GCE instance type and compatible with the accelerator type.
+    tuning_location: The GCP region to run the custom job.
+    accelerator_type: Specific accelerator type for the custom job.
+    accelerator_count: The number of accelerator.
+    rl_image_uri: Docker image URI to use for the reinforcement learning training job.
     prompt_sequence_length: Maximum tokenized sequence length for input text. Higher values increase memory overhead. This value should be at most 8192. Default value is 512.
     target_sequence_length: Maximum tokenized sequence length for target text. Higher values increase memory overhead. This value should be at most 1024. Default value is 64.
     lora_dim: The rank of the LoRA adapter. If >0, then use LoRA-tuning. If =0, then use full-tuning. Default is 1.
@@ -80,7 +89,6 @@ def pipeline(
     kl_coeff: Coefficient for KL penalty. This regularizes the policy model and penalizes if it diverges from its initial distribution. If set to 0, the reference language model is not loaded into memory. Default value is 0.1.
     instruction: This field lets the model know what task it needs to perform. Base models have been trained over a large set of varied instructions. You can give a simple and intuitive description of the task and the model will follow it, e.g. "Classify this movie review as positive or negative" or "Translate this sentence to Danish". Do not specify this if your dataset already prepends the instruction to the inputs field.
     project: Project used to run custom jobs. If not specified the project used to run the pipeline will be used.
-    accelerator_type: One of 'TPU' or 'GPU'. If 'TPU' is specified, tuning components run in europe-west4. Otherwise tuning components run in us-central1 on GPUs. Default is 'GPU'.
     location: Location used to run non-tuning components, i.e. components that do not require accelerators. If not specified the location used to run the pipeline will be used.
     tensorboard_resource_id: Optional tensorboard resource id in format `projects/{project_number}/locations/{location}/tensorboards/{tensorboard_id}`. If provided, tensorboard metrics will be uploaded to this location.
     encryption_spec_key_name: Customer-managed encryption key. If this is set, then all resources created by the CustomJob will be encrypted with the provided encryption key. Note that this is not supported for TPU at the moment.
@@ -91,10 +99,6 @@ def pipeline(
   """
   # fmt: on
   prompt_column = 'input_text'
-  machine_spec = function_based.resolve_machine_spec(
-      accelerator_type=accelerator_type,
-      use_test_spec=env.get_use_test_machine_spec(),
-  ).set_display_name('Resolve Machine Spec')
 
   processed_dataset = preprocess_chat_dataset.preprocess_chat_dataset(
       large_model_reference=large_model_reference,
@@ -118,16 +122,13 @@ def pipeline(
       .set_display_name('Import Prompt Dataset')
       .set_caching_options(False)
   )
-  rl_image_uri = function_based.resolve_private_refined_image_uri(
-      accelerator_type=machine_spec.outputs['accelerator_type'],
-  ).set_display_name('Resolve Reinforcer Image URI')
   num_microbatches = function_based.resolve_num_microbatches(
       large_model_reference=policy_model_reference,
   ).set_display_name('Resolve Number of Microbatches')
   rl_model = (
       reinforcer.reinforcer(
           project=project,
-          location=machine_spec.outputs['tuning_location'],
+          location=tuning_location,
           input_reference_model_path=policy_model_path,
           input_reward_model_path=input_reward_model_path,
           input_reward_adapter_path=input_reward_adapter_path,
@@ -136,12 +137,12 @@ def pipeline(
           ],
           input_preference_dataset_path=input_preference_dataset_path,
           train_steps=reinforcement_learning_train_steps,
-          accelerator_type=machine_spec.outputs['accelerator_type'],
-          accelerator_count=machine_spec.outputs['accelerator_count'],
+          accelerator_type=accelerator_type,
+          accelerator_count=accelerator_count,
           large_model_reference=policy_model_reference,
           reward_model_reference=reward_model_reference,
-          machine_type=machine_spec.outputs['machine_type'],
-          image_uri=rl_image_uri.output,
+          machine_type=machine_type,
+          image_uri=rl_image_uri,
           inputs_sequence_length=prompt_sequence_length,
           targets_sequence_length=target_sequence_length,
           batch_size=batch_size,
diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/reward_model_graph.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/reward_model_graph.py
index 8c9f8181a43..85c1cd5614a 100644
--- a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/reward_model_graph.py
+++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/reward_model_graph.py
@@ -40,6 +40,11 @@ def pipeline(
     large_model_reference: str,
     reward_model_reference: str,
     reward_model_path: str,
+    machine_type: str,
+    tuning_location: str,
+    accelerator_type: str,
+    accelerator_count: int,
+    reward_model_image_uri: str,
     prompt_sequence_length: int = 512,
     target_sequence_length: int = 64,
     batch_size: int = 64,
@@ -49,7 +54,6 @@ def pipeline(
     eval_dataset: Optional[str] = None,
     instruction: Optional[str] = None,
     project: str = _placeholders.PROJECT_ID_PLACEHOLDER,
-    accelerator_type: str = 'GPU',
     location: str = _placeholders.LOCATION_PLACEHOLDER,
     tensorboard_resource_id: str = '',
     encryption_spec_key_name: str = '',
@@ -62,6 +66,11 @@ def pipeline(
     large_model_reference: Name of the base model. Supported values are `text-bison@001`, `t5-small`, `t5-large`, `t5-xl` and `t5-xxl`. `text-bison@001` and `t5-small` are supported in `us-central1` and `europe-west4`. `t5-large`, `t5-xl` and `t5-xxl` are only supported in `europe-west4`.
     reward_model_reference: Name of the base model. The name should be in capitalized snake case format.
     reward_model_path: The model checkpoint path for the reward model.
+    machine_type: The type of the machine to provision for the custom job. Must be a valid GCE instance type and compatible with the accelerator type.
+    tuning_location: The GCP region to run the custom job.
+    accelerator_type: Specific accelerator type for the custom job.
+    accelerator_count: The number of accelerator.
+    reward_model_image_uri: Docker image URI to use for the reward model training job.
     prompt_sequence_length: Maximum tokenized sequence length for input text. Higher values increase memory overhead. This value should be at most 8192. Default value is 512.
     target_sequence_length:  Maximum tokenized sequence length for target text. Higher values increase memory overhead. This value should be at most 1024. Default value is 64.
     batch_size: Number of examples in each finetuning step. Default is 64.
@@ -70,7 +79,6 @@ def pipeline(
     reward_model_train_steps: Number of steps to use when training a reward model. Default value is 1000.
     instruction: This field lets the model know what task it needs to perform. Base models have been trained over a large set of varied instructions. You can give a simple and intuitive description of the task and the model will follow it, e.g. "Classify this movie review as positive or negative" or "Translate this sentence to Danish". Do not specify this if your dataset already prepends the instruction to the inputs field.
     project: Project used to run custom jobs. If not specified the project used to run the pipeline will be used.
-    accelerator_type: One of 'TPU' or 'GPU'. If 'TPU' is specified, tuning components run in europe-west4. Otherwise tuning components run in us-central1 on GPUs. Default is 'GPU'.
     location: Location used to run non-tuning components, i.e. components that do not require accelerators. If not specified the location used to run the pipeline will be used.
     tensorboard_resource_id: Optional tensorboard resource id in format `projects/{project_number}/locations/{location}/tensorboards/{tensorboard_id}`. If provided, tensorboard metrics will be uploaded to this location.
     encryption_spec_key_name: Customer-managed encryption key. If this is set, then all resources created by the CustomJob will be encrypted with the provided encryption key. Note that this is not supported for TPU at the moment.
@@ -83,10 +91,6 @@ def pipeline(
   prompt_column = 'input_text'
   candidate_columns = ['candidate_0', 'candidate_1']
   choice_column = 'choice'
-  machine_spec = function_based.resolve_machine_spec(
-      accelerator_type=accelerator_type,
-      use_test_spec=env.get_use_test_machine_spec(),
-  ).set_display_name('Resolve Machine Spec')
 
   processed_preference_dataset = (
       preprocess_chat_dataset.preprocess_chat_dataset(
@@ -136,16 +140,13 @@ def pipeline(
       .set_caching_options(False)
   )
 
-  reward_model_image_uri = function_based.resolve_private_refined_image_uri(
-      accelerator_type=machine_spec.outputs['accelerator_type'],
-  ).set_display_name('Resolve Reward Model Image URI')
   num_microbatches = function_based.resolve_num_microbatches(
       large_model_reference=reward_model_reference,
   ).set_display_name('Resolve Number of Microbatches')
   reward_model = (
       reward_model_trainer.reward_model_trainer(
           project=project,
-          location=machine_spec.outputs['tuning_location'],
+          location=tuning_location,
           input_model_path=reward_model_path,
           input_dataset_path=preference_dataset_importer.outputs[
               'output_dataset_path'
@@ -154,11 +155,11 @@ def pipeline(
               'output_dataset_path'
           ],
           train_steps=reward_model_train_steps,
-          accelerator_type=machine_spec.outputs['accelerator_type'],
-          accelerator_count=machine_spec.outputs['accelerator_count'],
+          accelerator_type=accelerator_type,
+          accelerator_count=accelerator_count,
           large_model_reference=reward_model_reference,
-          machine_type=machine_spec.outputs['machine_type'],
-          image_uri=reward_model_image_uri.output,
+          machine_type=machine_type,
+          image_uri=reward_model_image_uri,
           inputs_sequence_length=prompt_sequence_length,
           targets_sequence_length=target_sequence_length,
           batch_size=batch_size,
diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/rlhf_preprocessor.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/rlhf_preprocessor.py
index 022062473f3..16e8a2fb147 100644
--- a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/rlhf_preprocessor.py
+++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/rlhf_preprocessor.py
@@ -24,6 +24,12 @@
 @dsl.container_component
 def rlhf_preprocessor(
     large_model_reference: str,
+    accelerator_type: str,
+    use_test_spec: bool,
+    project: str,
+    location: str,
+    artifact_registry: str,
+    tag: str,
     gcp_resources: dsl.OutputPath(str),  # pytype: disable=invalid-annotation
     has_tensorboard_id: dsl.OutputPath(bool),  # pytype: disable=invalid-annotation
     has_inference_dataset: dsl.OutputPath(bool),  # pytype: disable=invalid-annotation
@@ -31,6 +37,12 @@ def rlhf_preprocessor(
     metadata_reference_model_path: dsl.OutputPath(str),  # pytype: disable=invalid-annotation
     metadata_reward_model_reference: dsl.OutputPath(str),  # pytype: disable=invalid-annotation
     metadata_reward_model_path: dsl.OutputPath(str),  # pytype: disable=invalid-annotation
+    metadata_machine_type: dsl.OutputPath(str),  # pytype: disable=invalid-annotation
+    metadata_tuning_location: dsl.OutputPath(str),  # pytype: disable=invalid-annotation
+    metadata_accelerator_type: dsl.OutputPath(str),  # pytype: disable=invalid-annotation
+    metadata_accelerator_count: dsl.OutputPath(int),  # pytype: disable=invalid-annotation
+    metadata_refined_image_uri: dsl.OutputPath(str),  # pytype: disable=invalid-annotation
+    use_experimental_image: bool = False,
     evaluation_dataset: str = '',
     tensorboard_resource_id: str = '',
     input_reference_model_path: str = '',
@@ -41,17 +53,30 @@ def rlhf_preprocessor(
 
   Args:
     large_model_reference: The model for fine tuning.
+    accelerator_type: Specific accelerator type for the job.
+    use_test_spec: Whether to use a lower resource machine for testing.
+    project: Project that contains the artifact registry.
+    location: Region that contains the artifact registry.
+    artifact_registry: Registry that contains Docker images.
+    tag: Image tag.
+    use_experimental_image:  Whether to use refined experimental image.
     evaluation_dataset: Path to evaluation data.
     tensorboard_resource_id: TensorBoard resource id.
     metadata_large_model_reference: The base model for fine tuning. The name should be in capitalized snake case format.
     metadata_reference_model_path: The model checkpoint path for the reinforcer model
     metadata_reward_model_reference:  The base model for training reward model. The name should be in capitalized snake case format.
     metadata_reward_model_path: The model checkpoint path for the reward model.
+    image_uri: Docker image URI to use for the custom job.
 
   Returns:
     gcp_resources: GCP resources that can be used to track the custom job.
     has_tensorboard_id: Whether a tensorboard id is provided.
     has_inference_dataset: Whether inference data are provided.
+    metadata_machine_type: The type of the machine to provision for the custom job.
+    metadata_tuning_location: The GCP region to run the custom job.
+    metadata_accelerator_type: Specific accelerator type for the custom job.
+    metadata_accelerator_count: The number of accelerator.
+    metadata_refined_image_uri: Docker image URI to use for the custom job.
   """
   # fmt: on
   return gcpc_utils.build_serverless_customjob_container_spec(
@@ -67,12 +92,24 @@ def rlhf_preprocessor(
               f'--tensorboard_resource_id={tensorboard_resource_id}',
               f'--large_model_reference={large_model_reference}',
               f'--input_reference_model_path={input_reference_model_path}',
+              f'--accelerator_type={accelerator_type}',
+              f'--use_test_spec={use_test_spec}',
+              f'--project={project}',
+              f'--location={location}',
+              f'--artifact_registry={artifact_registry}',
+              f'--tag={tag}',
+              f'--use_experimental_image={use_experimental_image}',
               f'--has_tensorboard_id_path={has_tensorboard_id}',
               f'--has_inference_dataset_path={has_inference_dataset}',
               f'--metadata_large_model_reference_path={metadata_large_model_reference}',
               f'--metadata_reference_model_path_path={metadata_reference_model_path}',
               f'--metadata_reward_model_reference_path={metadata_reward_model_reference}',
               f'--metadata_reward_model_path_path={metadata_reward_model_path}',
+              f'--metadata_machine_type_path={metadata_machine_type}',
+              f'--metadata_tuning_location_path={metadata_tuning_location}',
+              f'--metadata_accelerator_type_path={metadata_accelerator_type}',
+              f'--metadata_accelerator_count_path={metadata_accelerator_count}',
+              f'--metadata_refined_image_uri_path={metadata_refined_image_uri}',
           ],
       ),
       gcp_resources=gcp_resources,
diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/component.py b/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/component.py
index 56f950d2e0b..873e308b97c 100644
--- a/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/component.py
+++ b/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/component.py
@@ -22,6 +22,7 @@
 from google_cloud_pipeline_components._implementation.llm import reinforcement_learning_graph
 from google_cloud_pipeline_components._implementation.llm import reward_model_graph
 from google_cloud_pipeline_components._implementation.llm import rlhf_preprocessor
+from google_cloud_pipeline_components._implementation.llm import utils
 from google_cloud_pipeline_components._implementation.llm import validate_pipeline
 from google_cloud_pipeline_components.preview.llm.infer import component
 import kfp
@@ -97,6 +98,12 @@ def rlhf_pipeline(
 
   preprocess_metadata = rlhf_preprocessor.rlhf_preprocessor(
       large_model_reference=large_model_reference,
+      accelerator_type=accelerator_type,
+      use_test_spec=env.get_use_test_machine_spec(),
+      project=env.PRIVATE_ARTIFACT_REGISTRY_PROJECT,
+      location=env.PRIVATE_ARTIFACT_REGISTRY_LOCATION,
+      artifact_registry=env.PRIVATE_ARTIFACT_REGISTRY,
+      tag=env.get_private_image_tag(),
       evaluation_dataset=eval_dataset,
       tensorboard_resource_id=tensorboard_resource_id,
   ).set_display_name('Preprocess Inputs')
@@ -112,6 +119,19 @@ def rlhf_pipeline(
               reward_model_path=preprocess_metadata.outputs[
                   'metadata_reward_model_path'
               ],
+              machine_type=preprocess_metadata.outputs['metadata_machine_type'],
+              tuning_location=preprocess_metadata.outputs[
+                  'metadata_tuning_location'
+              ],
+              accelerator_type=preprocess_metadata.outputs[
+                  'metadata_accelerator_type'
+              ],
+              accelerator_count=preprocess_metadata.outputs[
+                  'metadata_accelerator_count'
+              ],
+              reward_model_image_uri=preprocess_metadata.outputs[
+                  'metadata_refined_image_uri'
+              ],
               prompt_sequence_length=prompt_sequence_length,
               target_sequence_length=target_sequence_length,
               eval_dataset=validate_pipeline_task.outputs[
@@ -123,7 +143,6 @@ def rlhf_pipeline(
               lora_dim=reward_lora_dim,
               project=project,
               location=location,
-              accelerator_type=accelerator_type,
               tensorboard_resource_id=tensorboard_resource_id,
               encryption_spec_key_name=encryption_spec_key_name,
           )
@@ -152,6 +171,13 @@ def rlhf_pipeline(
       policy_model_path=preprocess_metadata.outputs[
           'metadata_reference_model_path'
       ],
+      machine_type=preprocess_metadata.outputs['metadata_machine_type'],
+      tuning_location=preprocess_metadata.outputs['metadata_tuning_location'],
+      accelerator_type=preprocess_metadata.outputs['metadata_accelerator_type'],
+      accelerator_count=preprocess_metadata.outputs[
+          'metadata_accelerator_count'
+      ],
+      rl_image_uri=preprocess_metadata.outputs['metadata_refined_image_uri'],
       prompt_sequence_length=prompt_sequence_length,
       target_sequence_length=target_sequence_length,
       reinforcement_learning_rate_multiplier=reinforcement_learning_rate_multiplier,
@@ -160,7 +186,6 @@ def rlhf_pipeline(
       instruction=instruction,
       reward_lora_dim=reward_lora_dim,
       project=project,
-      accelerator_type=accelerator_type,
       location=location,
       tensorboard_resource_id=tensorboard_resource_id,
       encryption_spec_key_name=encryption_spec_key_name,