From 3eaf594ee89544275bf12bc2a449d433f8362553 Mon Sep 17 00:00:00 2001 From: Enam Mijbah Noor Date: Mon, 9 Sep 2024 23:50:58 +0600 Subject: [PATCH 01/53] Add synthetic jobs support for LLAMA prompts --- .../compute_horde/base/output_upload.py | 6 +- compute_horde/compute_horde/executor_class.py | 6 ++ .../validator/synthetic_jobs/batch_run.py | 51 ++++++++-- .../synthetic_jobs/generator/base.py | 9 +- .../synthetic_jobs/generator/factory.py | 9 +- .../synthetic_jobs/generator/gpu_hashcat.py | 5 +- .../synthetic_jobs/generator/llama_prompts.py | 94 +++++++++++++++++++ .../test_synthetic_jobs/mock_generator.py | 9 +- .../validator/tests/test_utils.py | 9 +- validator/pdm.lock | 12 +-- validator/pyproject.toml | 1 + 11 files changed, 183 insertions(+), 28 deletions(-) create mode 100644 validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llama_prompts.py diff --git a/compute_horde/compute_horde/base/output_upload.py b/compute_horde/compute_horde/base/output_upload.py index 4f33a70f9..0dc436400 100644 --- a/compute_horde/compute_horde/base/output_upload.py +++ b/compute_horde/compute_horde/base/output_upload.py @@ -83,6 +83,10 @@ class MultiUpload(pydantic.BaseModel): OutputUpload = Annotated[ - ZipAndHttpPostUpload | ZipAndHttpPutUpload | MultiUpload, + SingleFilePostUpload + | SingleFilePutUpload + | ZipAndHttpPostUpload + | ZipAndHttpPutUpload + | MultiUpload, Field(discriminator="output_upload_type"), ] diff --git a/compute_horde/compute_horde/executor_class.py b/compute_horde/compute_horde/executor_class.py index 67fbfbee4..a7f57ada6 100644 --- a/compute_horde/compute_horde/executor_class.py +++ b/compute_horde/compute_horde/executor_class.py @@ -6,6 +6,7 @@ class ExecutorClass(StrEnum): spin_up_4min__gpu_24gb = "spin_up-4min.gpu-24gb" always_on__gpu_24gb = "always_on.gpu-24gb" + always_on__llama = "always_on.llama" # always_on__cpu_16c__ram_64gb = "always_on.cpu-16c.ram-64gb" # always_on__gpu_80gb = "always_on.gpu-80gb" # always_on__gpu_24gb__docker_cached_facilitator = "always_on.gpu-24gb.docker_cached-facilitator" @@ -39,6 +40,11 @@ class ExecutorClassSpec: gpu_vram_gb=24, spin_up_time=0, ), + ExecutorClass.always_on__llama: ExecutorClassSpec( + has_gpu=True, + gpu_vram_gb=80, + spin_up_time=0, + ), # ExecutorClass.always_on__cpu_16c__ram_64gb: ExecutorClassSpec( # cpu_cores=16, # ram_gb=64, diff --git a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py index b43af4f31..2d334d665 100644 --- a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py +++ b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py @@ -13,7 +13,8 @@ import bittensor from asgiref.sync import sync_to_async from channels.layers import get_channel_layer -from compute_horde.base.volume import InlineVolume +from compute_horde.base.output_upload import OutputUpload +from compute_horde.base.volume import Volume from compute_horde.base_requests import BaseRequest from compute_horde.executor_class import DEFAULT_EXECUTOR_CLASS, EXECUTOR_CLASS, ExecutorClass from compute_horde.miner_client.base import ( @@ -57,6 +58,7 @@ JobStartedReceipt, Miner, MinerManifest, + PromptSample, SyntheticJob, SyntheticJobBatch, SystemEvent, @@ -230,7 +232,8 @@ class Job: miner_hotkey: str executor_class: ExecutorClass job_generator: BaseSyntheticJobGenerator - volume_contents: str + volume: Volume | None + output_upload: OutputUpload | None # responses @@ -347,7 +350,6 @@ def emit_telemetry_event(self) -> SystemEvent | None: docker_image_name=self.job_generator.docker_image_name(), docker_run_options_preset=self.job_generator.docker_run_options_preset(), timeout_seconds=self.job_generator.timeout_seconds(), - volume_contents_size=len(self.volume_contents), exception=repr(self.exception) if self.exception is not None else None, exception_time=_datetime_dump(self.exception_time), exception_stage=self.exception_stage, @@ -764,12 +766,36 @@ async def _generate_jobs(ctx: BatchContext) -> None: start_time = time.time() generated_job_count = 0 + llama_executor_count = sum( + count + for executors in ctx.executors.values() + for executor_class, count in executors.items() + if executor_class == ExecutorClass.always_on__llama + ) + llama_prompt_samples = ( + PromptSample.objects.select_related("series", "workload") + .prefetch_related("prompts") + .filter( + synthetic_job__isnull=True, + workload__finished_at__isnull=False, + )[:llama_executor_count] + ) + llama_prompt_samples = [ps async for ps in llama_prompt_samples] + assert len(llama_prompt_samples) == llama_executor_count + llama_prompt_samples_iter = iter(llama_prompt_samples) + for hotkey, executors in ctx.executors.items(): miner_name = ctx.names[hotkey] for executor_class, count in executors.items(): job_generators = [] for _ in range(count): - job_generator = await current.synthetic_job_generator_factory.create(executor_class) + args = [] + if executor_class == ExecutorClass.always_on__llama: + args.append(next(llama_prompt_samples_iter)) + + job_generator = await current.synthetic_job_generator_factory.create( + executor_class, *args + ) await job_generator.ainit() job_uuid = str(job_generator.uuid()) ctx.jobs[job_uuid] = Job( @@ -779,7 +805,8 @@ async def _generate_jobs(ctx: BatchContext) -> None: miner_hotkey=hotkey, executor_class=executor_class, job_generator=job_generator, - volume_contents=await job_generator.volume_contents(), + volume=await job_generator.volume(), + output_upload=await job_generator.output_upload(), ) ctx.job_uuids.append(job_uuid) job_generators.append(job_generator) @@ -864,8 +891,8 @@ async def _send_job_request( docker_run_options_preset=job.job_generator.docker_run_options_preset(), docker_run_cmd=job.job_generator.docker_run_cmd(), raw_script=job.job_generator.raw_script(), - volume=InlineVolume(contents=job.volume_contents), - output_upload=None, + volume=job.volume, + output_upload=job.output_upload, ) request_json = request.model_dump_json() @@ -1221,6 +1248,16 @@ async def _score_job(ctx: BatchContext, job: Job) -> None: async def _score_jobs(ctx: BatchContext) -> None: + # NOTE: download the answers for llama jobs before scoring + tasks = [ + asyncio.create_task(job.job_generator._download_answers()) + for job in ctx.jobs.values() + if job.executor_class == ExecutorClass.always_on__llama + and job.job_response is not None + and isinstance(job.job_response, V0JobFinishedRequest) + ] + await asyncio.gather(*tasks, return_exceptions=True) + for job in ctx.jobs.values(): try: await _score_job(ctx, job) diff --git a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/base.py b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/base.py index b67c3c1e1..bfe154d19 100644 --- a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/base.py +++ b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/base.py @@ -1,6 +1,8 @@ import abc import uuid +from compute_horde.base.output_upload import OutputUpload +from compute_horde.base.volume import Volume from compute_horde.executor_class import ExecutorClass from compute_horde.mv_protocol.miner_requests import V0JobFinishedRequest @@ -37,7 +39,10 @@ def raw_script(self) -> str | None: return None @abc.abstractmethod - async def volume_contents(self) -> str: ... + async def volume(self) -> Volume | None: ... + + async def output_upload(self) -> OutputUpload | None: + return None @abc.abstractmethod def verify(self, msg: V0JobFinishedRequest, time_took: float) -> tuple[bool, str, float]: ... @@ -48,4 +53,4 @@ def job_description(self) -> str: ... class BaseSyntheticJobGeneratorFactory(abc.ABC): @abc.abstractmethod - async def create(self, executor_class: ExecutorClass) -> BaseSyntheticJobGenerator: ... + async def create(self, executor_class: ExecutorClass, *args) -> BaseSyntheticJobGenerator: ... diff --git a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/factory.py b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/factory.py index ac01f0fef..a904848a7 100644 --- a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/factory.py +++ b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/factory.py @@ -7,8 +7,13 @@ from compute_horde_validator.validator.synthetic_jobs.generator.gpu_hashcat import ( GPUHashcatSyntheticJobGenerator, ) +from compute_horde_validator.validator.synthetic_jobs.generator.llama_prompts import ( + LlamaPromptsSyntheticJobGenerator, +) class DefaultSyntheticJobGeneratorFactory(BaseSyntheticJobGeneratorFactory): - async def create(self, executor_class: ExecutorClass) -> BaseSyntheticJobGenerator: - return GPUHashcatSyntheticJobGenerator() + async def create(self, executor_class: ExecutorClass, *args) -> BaseSyntheticJobGenerator: + if executor_class == ExecutorClass.always_on__llama: + return LlamaPromptsSyntheticJobGenerator(*args) + return GPUHashcatSyntheticJobGenerator(*args) diff --git a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/gpu_hashcat.py b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/gpu_hashcat.py index 54ef75586..16ed4b496 100644 --- a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/gpu_hashcat.py +++ b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/gpu_hashcat.py @@ -1,4 +1,5 @@ from asgiref.sync import sync_to_async +from compute_horde.base.volume import InlineVolume, Volume from compute_horde.mv_protocol.miner_requests import V0JobFinishedRequest from compute_horde_validator.validator.dynamic_config import aget_weights_version @@ -73,8 +74,8 @@ def raw_script(self) -> str | None: return self.hash_job.raw_script() @sync_to_async(thread_sensitive=False) - def volume_contents(self) -> str: - return single_file_zip("payload.txt", self.hash_job.payload) + def volume(self) -> Volume | None: + return InlineVolume(contents=single_file_zip("payload.txt", self.hash_job.payload)) def score(self, time_took: float) -> float: if self.weights_version == 0: diff --git a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llama_prompts.py b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llama_prompts.py new file mode 100644 index 000000000..73ff20359 --- /dev/null +++ b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llama_prompts.py @@ -0,0 +1,94 @@ +import uuid + +import httpx +import pydantic +from compute_horde.base.output_upload import OutputUpload, SingleFilePutUpload +from compute_horde.base.volume import SingleFileVolume, Volume +from compute_horde.mv_protocol.miner_requests import V0JobFinishedRequest +from django.conf import settings +from pydantic import BaseModel + +from compute_horde_validator.validator.models import PromptSample +from compute_horde_validator.validator.s3 import generate_upload_url, get_public_url + +from .base import BaseSyntheticJobGenerator + + +class PromptAnswer(BaseModel): + prompt: str + answer: str + + +class LlamaPromptsSyntheticJobGenerator(BaseSyntheticJobGenerator): + def __init__(self, prompt_sample: PromptSample): + super().__init__() + self.prompt_sample: PromptSample = prompt_sample + + self.s3_output_key = str(uuid.uuid4()) + ".json" + self.s3_output_prefix = "solved/" + self.s3_output_bucket = settings.S3_BUCKET_NAME_ANSWERS + + self.prompt_answers: list[PromptAnswer] | None = None + + def _url_for_upload(self) -> str: + return generate_upload_url( + self.s3_output_key, + bucket_name=self.s3_output_bucket, + prefix=self.s3_output_prefix, + ) + + def _url_for_download(self) -> str: + return get_public_url( + key=self.s3_output_key, + bucket_name=self.s3_output_bucket, + prefix=self.s3_output_prefix, + ) + + def timeout_seconds(self) -> int: + # TODO: ??? + return 80 + + def base_docker_image_name(self) -> str: + return "TODO" + + def docker_image_name(self) -> str: + return "TODO" + + def docker_run_options_preset(self) -> str: + return "nvidia_all" + + def docker_run_cmd(self) -> list[str]: + return ["--seed", str(self.prompt_sample.workload.seed)] + + async def volume(self) -> Volume | None: + return SingleFileVolume(url=self.prompt_sample.series.s3_url, relative_path="prompts.txt") + + async def output_upload(self) -> OutputUpload | None: + return SingleFilePutUpload(url=self._url_for_upload(), relative_path="answers.json") + + async def _download_answers(self): + async with httpx.AsyncClient() as client: + response = await client.get(self._url_for_download(), timeout=5) + self.prompt_answers = pydantic.TypeAdapter(list[PromptAnswer]).validate_json( + response.content + ) + + def verify(self, msg: V0JobFinishedRequest, time_took: float) -> tuple[bool, str, float]: + if not self.prompt_answers: + raise RuntimeError("_download_answers must be called before calling verify") + + for prompt in self.prompt_sample.prompts.all(): + for prompt_answer in self.prompt_answers: + if prompt_answer.prompt != prompt.content: + continue + if prompt_answer.answer != prompt.answer: + return False, "results does not match expected answers", 0 + break + else: + # did not find answer for this prompt + return False, "result does not contain all answers", 0 + + return True, "", 1 + + def job_description(self) -> str: + return "LLAMA prompts synthetic job" diff --git a/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/mock_generator.py b/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/mock_generator.py index 40e6a7199..0d0a412d6 100644 --- a/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/mock_generator.py +++ b/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/mock_generator.py @@ -1,5 +1,6 @@ import uuid +from compute_horde.base.volume import InlineVolume, Volume from compute_horde.executor_class import ExecutorClass from compute_horde.mv_protocol.miner_requests import ( V0JobFinishedRequest, @@ -36,8 +37,8 @@ def docker_run_options_preset(self) -> str: def docker_run_cmd(self) -> list[str]: return ["mock"] - async def volume_contents(self) -> str: - return "mock" + async def volume(self) -> Volume | None: + return InlineVolume(contents="mock") def verify(self, msg: V0JobFinishedRequest, time_took: float) -> tuple[bool, str, float]: return True, "mock", MOCK_SCORE @@ -55,12 +56,12 @@ class MockSyntheticJobGeneratorFactory(BaseSyntheticJobGeneratorFactory): def __init__(self, uuids: list[uuid.UUID] = None): self._uuids = uuids or [] - async def create(self, executor_class: ExecutorClass) -> BaseSyntheticJobGenerator: + async def create(self, executor_class: ExecutorClass, *args) -> BaseSyntheticJobGenerator: _uuid = self._uuids.pop(0) return MockSyntheticJobGenerator(_uuid) class TimeTookScoreMockSyntheticJobGeneratorFactory(MockSyntheticJobGeneratorFactory): - async def create(self, executor_class: ExecutorClass) -> BaseSyntheticJobGenerator: + async def create(self, executor_class: ExecutorClass, *args) -> BaseSyntheticJobGenerator: _uuid = self._uuids.pop(0) return TimeTookScoreMockSyntheticJobGenerator(_uuid) diff --git a/validator/app/src/compute_horde_validator/validator/tests/test_utils.py b/validator/app/src/compute_horde_validator/validator/tests/test_utils.py index 1d6624fe2..d75318d15 100644 --- a/validator/app/src/compute_horde_validator/validator/tests/test_utils.py +++ b/validator/app/src/compute_horde_validator/validator/tests/test_utils.py @@ -7,6 +7,7 @@ import bittensor import pytest from asgiref.sync import sync_to_async +from compute_horde.base.volume import InlineVolume, Volume from compute_horde.executor_class import DEFAULT_EXECUTOR_CLASS, ExecutorClass from compute_horde.mv_protocol.miner_requests import ( ExecutorClassManifest, @@ -75,8 +76,8 @@ def docker_run_options_preset(self) -> str: def docker_run_cmd(self) -> list[str]: return ["mock"] - async def volume_contents(self) -> str: - return "mock" + async def volume(self) -> Volume | None: + return InlineVolume(contents="mock") def verify(self, msg: V0JobFinishedRequest, time_took: float) -> tuple[bool, str, float]: return True, "mock", MOCK_SCORE @@ -231,7 +232,7 @@ async def as_coro(fun, *args, **kwargs): class MockSyntheticJobGeneratorFactory(BaseSyntheticJobGeneratorFactory): - async def create(self, executor_class: ExecutorClass) -> BaseSyntheticJobGenerator: + async def create(self, executor_class: ExecutorClass, *args) -> BaseSyntheticJobGenerator: return MockSyntheticJobGenerator() @@ -366,7 +367,7 @@ async def create_mock_job_batches(miner): class TimeToookScoreMockSyntheticJobGeneratorFactory(BaseSyntheticJobGeneratorFactory): - async def create(self, executor_class: ExecutorClass) -> BaseSyntheticJobGenerator: + async def create(self, executor_class: ExecutorClass, *args) -> BaseSyntheticJobGenerator: return TimeToookScoreMockSyntheticJobGenerator() diff --git a/validator/pdm.lock b/validator/pdm.lock index 05c1224a6..3affaa8d2 100644 --- a/validator/pdm.lock +++ b/validator/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "format", "lint", "security_check", "test", "type_check"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:eb2731ad148ba6d8433b4fcdf234edb1395d9584a14c0c59de4bd1e81b800b69" +content_hash = "sha256:90d12954944851d55941ccbc6f6d7c6b4ab29839251b409672976d86a5341912" [[metadata.targets]] requires_python = "==3.11.*" @@ -1288,7 +1288,7 @@ name = "httpcore" version = "1.0.5" requires_python = ">=3.8" summary = "A minimal low-level HTTP client." -groups = ["test"] +groups = ["default", "test"] dependencies = [ "certifi", "h11<0.15,>=0.13", @@ -1300,10 +1300,10 @@ files = [ [[package]] name = "httpx" -version = "0.27.0" +version = "0.27.2" requires_python = ">=3.8" summary = "The next generation HTTP client." -groups = ["test"] +groups = ["default", "test"] dependencies = [ "anyio", "certifi", @@ -1312,8 +1312,8 @@ dependencies = [ "sniffio", ] files = [ - {file = "httpx-0.27.0-py3-none-any.whl", hash = "sha256:71d5465162c13681bff01ad59b2cc68dd838ea1f10e51574bac27103f00c91a5"}, - {file = "httpx-0.27.0.tar.gz", hash = "sha256:a0cb88a46f32dc874e04ee956e4c2764aba2aa228f650b06788ba6bda2962ab5"}, + {file = "httpx-0.27.2-py3-none-any.whl", hash = "sha256:7bb2708e112d8fdd7829cd4243970f0c223274051cb35ee80c03301ee29a3df0"}, + {file = "httpx-0.27.2.tar.gz", hash = "sha256:f7c2be1d2f3c3c3160d441802406b206c2b76f5947b11115e6df10c6c65e66c2"}, ] [[package]] diff --git a/validator/pyproject.toml b/validator/pyproject.toml index 7afe8db5a..d879e3427 100644 --- a/validator/pyproject.toml +++ b/validator/pyproject.toml @@ -30,6 +30,7 @@ dependencies = [ "django-admin-rangefilter==0.12.4", "uvloop>=0.19.0", "boto3>=1.35.11", + "httpx>=0.27.2", ] [build-system] From d9e7d19ed455045dd59a3bc65da9a8b621af7762 Mon Sep 17 00:00:00 2001 From: Enam Mijbah Noor Date: Tue, 10 Sep 2024 02:07:26 +0600 Subject: [PATCH 02/53] Add LLAMA synthetic jobs generator tests --- .../test_llama_prompts_generator.py | 111 ++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/test_llama_prompts_generator.py diff --git a/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/test_llama_prompts_generator.py b/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/test_llama_prompts_generator.py new file mode 100644 index 000000000..a31da88ae --- /dev/null +++ b/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/test_llama_prompts_generator.py @@ -0,0 +1,111 @@ +import uuid + +import pytest +from compute_horde.base.output_upload import SingleFilePutUpload +from compute_horde.base.volume import SingleFileVolume +from django.utils.timezone import now +from pytest_httpx import HTTPXMock + +from compute_horde_validator.validator.models import ( + Prompt, + PromptSample, + PromptSeries, + SolveWorkload, +) +from compute_horde_validator.validator.synthetic_jobs.generator.llama_prompts import ( + LlamaPromptsSyntheticJobGenerator, +) + + +async def _prepare_models(): + prompt_series = await PromptSeries.objects.acreate( + series_uuid=uuid.uuid4(), + s3_url="prompt-series-download-url", + ) + solve_workload = await SolveWorkload.objects.acreate( + workload_uuid=uuid.uuid4(), + seed=42, + s3_url="", + finished_at=now(), + ) + prompt_sample = await PromptSample.objects.acreate( + series=prompt_series, + workload=solve_workload, + synthetic_job=None, + ) + await Prompt.objects.abulk_create( + [ + Prompt( + sample=prompt_sample, + content=str(i), + answer=str(i), + ) + for i in range(10) + ] + ) + + # reload prompt sample with related objects + prompt_samples = ( + PromptSample.objects.select_related("series", "workload") + .prefetch_related("prompts") + .filter( + synthetic_job__isnull=True, + workload__finished_at__isnull=False, + )[:1] + ) + prompt_sample = [ps async for ps in prompt_samples][0] + return prompt_sample + + +@pytest.mark.asyncio +@pytest.mark.django_db(databases=["default", "default_alias"], transaction=True) +async def test_llama_prompts_generator_basic(httpx_mock: HTTPXMock): + prompt_sample = await _prepare_models() + httpx_mock.add_response(json=[{"prompt": str(i), "answer": str(i)} for i in range(240)]) + + job_generator = LlamaPromptsSyntheticJobGenerator(prompt_sample) + await job_generator.ainit() + + volume = await job_generator.volume() + assert isinstance(volume, SingleFileVolume) + + output_upload = await job_generator.output_upload() + assert isinstance(output_upload, SingleFilePutUpload) + + with pytest.raises(RuntimeError): + assert job_generator.verify(None, 0) == (True, "", 1) + + await job_generator._download_answers() + correct, _, score = job_generator.verify(None, 0) + assert correct + assert score == 1.0 + + +@pytest.mark.asyncio +@pytest.mark.django_db(databases=["default", "default_alias"], transaction=True) +async def test_llama_prompts_generator_missing_prompts(httpx_mock: HTTPXMock): + prompt_sample = await _prepare_models() + httpx_mock.add_response(json=[{"prompt": str(i), "answer": str(i)} for i in range(9, 249)]) + + job_generator = LlamaPromptsSyntheticJobGenerator(prompt_sample) + await job_generator.ainit() + + await job_generator._download_answers() + correct, _, score = job_generator.verify(None, 0) + assert not correct + assert score == 0.0 + + +@pytest.mark.asyncio +@pytest.mark.django_db(databases=["default", "default_alias"], transaction=True) +async def test_llama_prompts_generator_wrong_answers(httpx_mock: HTTPXMock): + prompt_sample = await _prepare_models() + httpx_mock.add_response(json=[{"prompt": str(i), "answer": "wrong"} for i in range(240)]) + + job_generator = LlamaPromptsSyntheticJobGenerator(prompt_sample) + await job_generator.ainit() + + await job_generator._download_answers() + correct, _, score = job_generator.verify(None, 0) + assert not correct + assert score == 0.0 From 19d48b52f3dc9726f287697875fdf00d9980662f Mon Sep 17 00:00:00 2001 From: Enam Mijbah Noor Date: Tue, 10 Sep 2024 05:45:48 +0600 Subject: [PATCH 03/53] Assert related objects of PromptSample are cached in llama prompts generator --- .../synthetic_jobs/generator/llama_prompts.py | 25 ++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llama_prompts.py b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llama_prompts.py index 73ff20359..a7570206e 100644 --- a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llama_prompts.py +++ b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llama_prompts.py @@ -8,21 +8,36 @@ from django.conf import settings from pydantic import BaseModel -from compute_horde_validator.validator.models import PromptSample +from compute_horde_validator.validator.models import Prompt, PromptSample from compute_horde_validator.validator.s3 import generate_upload_url, get_public_url from .base import BaseSyntheticJobGenerator +_PROMPT_SAMPLE_RELATED_NOT_CACHED = ( + "The related objects of PromptSample needs to be cached before passing to this class" +) + class PromptAnswer(BaseModel): prompt: str answer: str +PromptAnswerList = pydantic.TypeAdapter(list[PromptAnswer]) + + class LlamaPromptsSyntheticJobGenerator(BaseSyntheticJobGenerator): def __init__(self, prompt_sample: PromptSample): super().__init__() + + assert PromptSample.series.is_cached(prompt_sample), _PROMPT_SAMPLE_RELATED_NOT_CACHED + assert PromptSample.workload.is_cached(prompt_sample), _PROMPT_SAMPLE_RELATED_NOT_CACHED + assert ( + getattr(prompt_sample, "_prefetched_objects_cache", {}).get("prompts") is not None + ), _PROMPT_SAMPLE_RELATED_NOT_CACHED + self.prompt_sample: PromptSample = prompt_sample + self.prompts: list[Prompt] = list(self.prompt_sample.prompts.all()) self.s3_output_key = str(uuid.uuid4()) + ".json" self.s3_output_prefix = "solved/" @@ -69,15 +84,13 @@ async def output_upload(self) -> OutputUpload | None: async def _download_answers(self): async with httpx.AsyncClient() as client: response = await client.get(self._url_for_download(), timeout=5) - self.prompt_answers = pydantic.TypeAdapter(list[PromptAnswer]).validate_json( - response.content - ) + self.prompt_answers = PromptAnswerList.validate_json(response.content) def verify(self, msg: V0JobFinishedRequest, time_took: float) -> tuple[bool, str, float]: - if not self.prompt_answers: + if self.prompt_answers is None: raise RuntimeError("_download_answers must be called before calling verify") - for prompt in self.prompt_sample.prompts.all(): + for prompt in self.prompts: for prompt_answer in self.prompt_answers: if prompt_answer.prompt != prompt.content: continue From 0b2e0fb350dfe59d9352d0284171cb3af086726c Mon Sep 17 00:00:00 2001 From: Enam Mijbah Noor Date: Tue, 10 Sep 2024 16:45:01 +0600 Subject: [PATCH 04/53] Adjust llama synthetic jobs image interface --- .../synthetic_jobs/generator/llama_prompts.py | 48 ++++++++++++++----- .../test_llama_prompts_generator.py | 33 ++++++++----- 2 files changed, 57 insertions(+), 24 deletions(-) diff --git a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llama_prompts.py b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llama_prompts.py index a7570206e..7aa6f37bd 100644 --- a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llama_prompts.py +++ b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llama_prompts.py @@ -1,12 +1,11 @@ import uuid +from dataclasses import dataclass import httpx -import pydantic -from compute_horde.base.output_upload import OutputUpload, SingleFilePutUpload -from compute_horde.base.volume import SingleFileVolume, Volume +from compute_horde.base.output_upload import MultiUpload, OutputUpload, SingleFilePutUpload +from compute_horde.base.volume import MultiVolume, SingleFileVolume, Volume from compute_horde.mv_protocol.miner_requests import V0JobFinishedRequest from django.conf import settings -from pydantic import BaseModel from compute_horde_validator.validator.models import Prompt, PromptSample from compute_horde_validator.validator.s3 import generate_upload_url, get_public_url @@ -18,14 +17,12 @@ ) -class PromptAnswer(BaseModel): +@dataclass(frozen=True, slots=True) +class PromptAnswer: prompt: str answer: str -PromptAnswerList = pydantic.TypeAdapter(list[PromptAnswer]) - - class LlamaPromptsSyntheticJobGenerator(BaseSyntheticJobGenerator): def __init__(self, prompt_sample: PromptSample): super().__init__() @@ -39,7 +36,9 @@ def __init__(self, prompt_sample: PromptSample): self.prompt_sample: PromptSample = prompt_sample self.prompts: list[Prompt] = list(self.prompt_sample.prompts.all()) - self.s3_output_key = str(uuid.uuid4()) + ".json" + base_filename = str(uuid.uuid4()) + "-" + str(self.prompt_sample.workload.seed) + self.input_filename = base_filename + ".txt" + self.s3_output_key = base_filename + ".json" self.s3_output_prefix = "solved/" self.s3_output_bucket = settings.S3_BUCKET_NAME_ANSWERS @@ -73,18 +72,41 @@ def docker_run_options_preset(self) -> str: return "nvidia_all" def docker_run_cmd(self) -> list[str]: - return ["--seed", str(self.prompt_sample.workload.seed)] + return [ + "--temperature=0.5", + "--top-p=0.8", + "--max-tokens=256", + "--seed", + str(self.prompt_sample.workload.seed), + f"/volume/{self.input_filename}", + ] async def volume(self) -> Volume | None: - return SingleFileVolume(url=self.prompt_sample.series.s3_url, relative_path="prompts.txt") + return MultiVolume( + volumes=[ + SingleFileVolume( + url=self.prompt_sample.series.s3_url, + relative_path=self.input_filename, + ), + ] + ) async def output_upload(self) -> OutputUpload | None: - return SingleFilePutUpload(url=self._url_for_upload(), relative_path="answers.json") + return MultiUpload( + uploads=[ + SingleFilePutUpload( + url=self._url_for_upload(), + relative_path=self.s3_output_key, + ), + ] + ) async def _download_answers(self): async with httpx.AsyncClient() as client: response = await client.get(self._url_for_download(), timeout=5) - self.prompt_answers = PromptAnswerList.validate_json(response.content) + self.prompt_answers = [ + PromptAnswer(prompt, answer) for prompt, answer in response.json().items() + ] def verify(self, msg: V0JobFinishedRequest, time_took: float) -> tuple[bool, str, float]: if self.prompt_answers is None: diff --git a/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/test_llama_prompts_generator.py b/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/test_llama_prompts_generator.py index a31da88ae..2a124f932 100644 --- a/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/test_llama_prompts_generator.py +++ b/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/test_llama_prompts_generator.py @@ -1,8 +1,9 @@ import uuid import pytest -from compute_horde.base.output_upload import SingleFilePutUpload -from compute_horde.base.volume import SingleFileVolume +from compute_horde.base.output_upload import MultiUpload, SingleFilePutUpload +from compute_horde.base.volume import MultiVolume, SingleFileVolume +from compute_horde.mv_protocol.miner_requests import V0JobFinishedRequest from django.utils.timezone import now from pytest_httpx import HTTPXMock @@ -16,6 +17,12 @@ LlamaPromptsSyntheticJobGenerator, ) +_JOB_FINISHED_REQUEST = V0JobFinishedRequest( + job_uuid="CF8753B2-C86C-45A3-A01F-84295C3BAD8F", + docker_process_stdout="", + docker_process_stderr="", +) + async def _prepare_models(): prompt_series = await PromptSeries.objects.acreate( @@ -61,22 +68,26 @@ async def _prepare_models(): @pytest.mark.django_db(databases=["default", "default_alias"], transaction=True) async def test_llama_prompts_generator_basic(httpx_mock: HTTPXMock): prompt_sample = await _prepare_models() - httpx_mock.add_response(json=[{"prompt": str(i), "answer": str(i)} for i in range(240)]) + httpx_mock.add_response(json={str(i): str(i) for i in range(240)}) job_generator = LlamaPromptsSyntheticJobGenerator(prompt_sample) await job_generator.ainit() volume = await job_generator.volume() - assert isinstance(volume, SingleFileVolume) + assert isinstance(volume, MultiVolume) + assert len(volume.volumes) == 1 + assert isinstance(volume.volumes[0], SingleFileVolume) output_upload = await job_generator.output_upload() - assert isinstance(output_upload, SingleFilePutUpload) + assert isinstance(output_upload, MultiUpload) + assert len(output_upload.uploads) == 1 + assert isinstance(output_upload.uploads[0], SingleFilePutUpload) with pytest.raises(RuntimeError): - assert job_generator.verify(None, 0) == (True, "", 1) + assert job_generator.verify(_JOB_FINISHED_REQUEST, 0) == (True, "", 1) await job_generator._download_answers() - correct, _, score = job_generator.verify(None, 0) + correct, _, score = job_generator.verify(_JOB_FINISHED_REQUEST, 0) assert correct assert score == 1.0 @@ -85,13 +96,13 @@ async def test_llama_prompts_generator_basic(httpx_mock: HTTPXMock): @pytest.mark.django_db(databases=["default", "default_alias"], transaction=True) async def test_llama_prompts_generator_missing_prompts(httpx_mock: HTTPXMock): prompt_sample = await _prepare_models() - httpx_mock.add_response(json=[{"prompt": str(i), "answer": str(i)} for i in range(9, 249)]) + httpx_mock.add_response(json={str(i): str(i) for i in range(9, 249)}) job_generator = LlamaPromptsSyntheticJobGenerator(prompt_sample) await job_generator.ainit() await job_generator._download_answers() - correct, _, score = job_generator.verify(None, 0) + correct, _, score = job_generator.verify(_JOB_FINISHED_REQUEST, 0) assert not correct assert score == 0.0 @@ -100,12 +111,12 @@ async def test_llama_prompts_generator_missing_prompts(httpx_mock: HTTPXMock): @pytest.mark.django_db(databases=["default", "default_alias"], transaction=True) async def test_llama_prompts_generator_wrong_answers(httpx_mock: HTTPXMock): prompt_sample = await _prepare_models() - httpx_mock.add_response(json=[{"prompt": str(i), "answer": "wrong"} for i in range(240)]) + httpx_mock.add_response(json={str(i): "wrong" for i in range(240)}) job_generator = LlamaPromptsSyntheticJobGenerator(prompt_sample) await job_generator.ainit() await job_generator._download_answers() - correct, _, score = job_generator.verify(None, 0) + correct, _, score = job_generator.verify(_JOB_FINISHED_REQUEST, 0) assert not correct assert score == 0.0 From 47cd53621e27fedbd19a7dadb45b6c3adf22446b Mon Sep 17 00:00:00 2001 From: Enam Mijbah Noor Date: Tue, 10 Sep 2024 23:48:23 +0600 Subject: [PATCH 05/53] Update used `SamplePrompt`s after synthetic job finishes --- .../validator/synthetic_jobs/batch_run.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py index 2d334d665..c6d8d0984 100644 --- a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py +++ b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py @@ -1375,7 +1375,7 @@ def _db_persist(ctx: BatchContext) -> None: score=job.score, ) synthetic_jobs.append(synthetic_job) - SyntheticJob.objects.bulk_create(synthetic_jobs) + synthetic_jobs = SyntheticJob.objects.bulk_create(synthetic_jobs) miner_manifests: list[MinerManifest] = [] for miner in ctx.miners.values(): @@ -1391,6 +1391,18 @@ def _db_persist(ctx: BatchContext) -> None: ) MinerManifest.objects.bulk_create(miner_manifests) + synthetic_jobs_map: dict[str, SyntheticJob] = { + synthetic_job.job_uuid: synthetic_job for synthetic_job in synthetic_jobs + } + prompt_samples: list[PromptSample] = [] + for job in ctx.jobs.values(): + if job.executor_class != ExecutorClass.always_on__llama: + continue + prompt_sample = job.job_generator.prompt_sample + prompt_sample.synthetic_job = synthetic_jobs_map.get(job.uuid) + prompt_samples.append(prompt_sample) + PromptSample.objects.bulk_update(prompt_samples, fields=["synthetic_job"]) + job_started_receipts: list[JobStartedReceipt] = [] for job in ctx.jobs.values(): if job.job_started_receipt is not None: From 7fca9f72c28a12372446e66f4dfdc5ab8a83e10b Mon Sep 17 00:00:00 2001 From: Enam Mijbah Noor Date: Wed, 11 Sep 2024 00:22:54 +0600 Subject: [PATCH 06/53] Fix volume/upload object key for llama job --- .../validator/synthetic_jobs/generator/llama_prompts.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llama_prompts.py b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llama_prompts.py index 7aa6f37bd..064434a51 100644 --- a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llama_prompts.py +++ b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llama_prompts.py @@ -1,4 +1,3 @@ -import uuid from dataclasses import dataclass import httpx @@ -36,7 +35,7 @@ def __init__(self, prompt_sample: PromptSample): self.prompt_sample: PromptSample = prompt_sample self.prompts: list[Prompt] = list(self.prompt_sample.prompts.all()) - base_filename = str(uuid.uuid4()) + "-" + str(self.prompt_sample.workload.seed) + base_filename = f"{prompt_sample.series.series_uuid}-{prompt_sample.workload.seed}" self.input_filename = base_filename + ".txt" self.s3_output_key = base_filename + ".json" self.s3_output_prefix = "solved/" From e25c3cd162400893493ad96f529732613b521c7e Mon Sep 17 00:00:00 2001 From: Enam Mijbah Noor Date: Wed, 11 Sep 2024 01:53:04 +0600 Subject: [PATCH 07/53] Refactor llama job test parts into fixtures --- .../tests/test_synthetic_jobs/conftest.py | 61 +++++++++++++++- .../test_llama_prompts_generator.py | 70 ++++--------------- 2 files changed, 74 insertions(+), 57 deletions(-) diff --git a/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/conftest.py b/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/conftest.py index 05ba55809..f29281450 100644 --- a/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/conftest.py +++ b/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/conftest.py @@ -6,8 +6,15 @@ from compute_horde.executor_class import DEFAULT_EXECUTOR_CLASS from compute_horde.miner_client.base import AbstractTransport from compute_horde.mv_protocol import miner_requests - -from compute_horde_validator.validator.models import Miner +from django.utils.timezone import now + +from compute_horde_validator.validator.models import ( + Miner, + Prompt, + PromptSample, + PromptSeries, + SolveWorkload, +) from compute_horde_validator.validator.synthetic_jobs.batch_run import BatchContext, MinerClient from compute_horde_validator.validator.tests.transport import MinerSimulationTransport @@ -117,3 +124,53 @@ def job_failed_message(job_uuid: uuid.UUID, docker_process_stdout: str, docker_p docker_process_stdout=docker_process_stdout, docker_process_stderr=docker_process_stderr, ).model_dump_json() + + +@pytest_asyncio.fixture +async def prompt_series(): + return await PromptSeries.objects.acreate( + series_uuid=uuid.uuid4(), + s3_url="http://localhost:9999/prompt-series-download-url", + ) + + +@pytest_asyncio.fixture +async def solve_workload(): + return await SolveWorkload.objects.acreate( + workload_uuid=uuid.uuid4(), + seed=42, + s3_url="http://localhost:9999/solve-workload-download-url", + finished_at=now(), + ) + + +@pytest_asyncio.fixture +async def prompt_sample(prompt_series, solve_workload): + return await PromptSample.objects.acreate( + series=prompt_series, + workload=solve_workload, + synthetic_job=None, + ) + + +@pytest_asyncio.fixture +async def prompts(prompt_sample): + return await Prompt.objects.abulk_create( + [ + Prompt( + sample=prompt_sample, + content=str(i), + answer=str(i), + ) + for i in range(10) + ] + ) + + +@pytest_asyncio.fixture +async def prompt_sample_prefetched(prompt_series, solve_workload, prompt_sample, prompts): + return ( + await PromptSample.objects.select_related("series", "workload") + .prefetch_related("prompts") + .aget(id=prompt_sample.id) + ) diff --git a/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/test_llama_prompts_generator.py b/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/test_llama_prompts_generator.py index 2a124f932..58c965f17 100644 --- a/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/test_llama_prompts_generator.py +++ b/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/test_llama_prompts_generator.py @@ -1,17 +1,11 @@ -import uuid - import pytest from compute_horde.base.output_upload import MultiUpload, SingleFilePutUpload from compute_horde.base.volume import MultiVolume, SingleFileVolume from compute_horde.mv_protocol.miner_requests import V0JobFinishedRequest -from django.utils.timezone import now from pytest_httpx import HTTPXMock from compute_horde_validator.validator.models import ( - Prompt, PromptSample, - PromptSeries, - SolveWorkload, ) from compute_horde_validator.validator.synthetic_jobs.generator.llama_prompts import ( LlamaPromptsSyntheticJobGenerator, @@ -24,53 +18,15 @@ ) -async def _prepare_models(): - prompt_series = await PromptSeries.objects.acreate( - series_uuid=uuid.uuid4(), - s3_url="prompt-series-download-url", - ) - solve_workload = await SolveWorkload.objects.acreate( - workload_uuid=uuid.uuid4(), - seed=42, - s3_url="", - finished_at=now(), - ) - prompt_sample = await PromptSample.objects.acreate( - series=prompt_series, - workload=solve_workload, - synthetic_job=None, - ) - await Prompt.objects.abulk_create( - [ - Prompt( - sample=prompt_sample, - content=str(i), - answer=str(i), - ) - for i in range(10) - ] - ) - - # reload prompt sample with related objects - prompt_samples = ( - PromptSample.objects.select_related("series", "workload") - .prefetch_related("prompts") - .filter( - synthetic_job__isnull=True, - workload__finished_at__isnull=False, - )[:1] - ) - prompt_sample = [ps async for ps in prompt_samples][0] - return prompt_sample - - @pytest.mark.asyncio @pytest.mark.django_db(databases=["default", "default_alias"], transaction=True) -async def test_llama_prompts_generator_basic(httpx_mock: HTTPXMock): - prompt_sample = await _prepare_models() +async def test_llama_prompts_generator_basic( + httpx_mock: HTTPXMock, + prompt_sample_prefetched: PromptSample, +): httpx_mock.add_response(json={str(i): str(i) for i in range(240)}) - job_generator = LlamaPromptsSyntheticJobGenerator(prompt_sample) + job_generator = LlamaPromptsSyntheticJobGenerator(prompt_sample_prefetched) await job_generator.ainit() volume = await job_generator.volume() @@ -94,11 +50,13 @@ async def test_llama_prompts_generator_basic(httpx_mock: HTTPXMock): @pytest.mark.asyncio @pytest.mark.django_db(databases=["default", "default_alias"], transaction=True) -async def test_llama_prompts_generator_missing_prompts(httpx_mock: HTTPXMock): - prompt_sample = await _prepare_models() +async def test_llama_prompts_generator_missing_prompts( + httpx_mock: HTTPXMock, + prompt_sample_prefetched: PromptSample, +): httpx_mock.add_response(json={str(i): str(i) for i in range(9, 249)}) - job_generator = LlamaPromptsSyntheticJobGenerator(prompt_sample) + job_generator = LlamaPromptsSyntheticJobGenerator(prompt_sample_prefetched) await job_generator.ainit() await job_generator._download_answers() @@ -109,11 +67,13 @@ async def test_llama_prompts_generator_missing_prompts(httpx_mock: HTTPXMock): @pytest.mark.asyncio @pytest.mark.django_db(databases=["default", "default_alias"], transaction=True) -async def test_llama_prompts_generator_wrong_answers(httpx_mock: HTTPXMock): - prompt_sample = await _prepare_models() +async def test_llama_prompts_generator_wrong_answers( + httpx_mock: HTTPXMock, + prompt_sample_prefetched: PromptSample, +): httpx_mock.add_response(json={str(i): "wrong" for i in range(240)}) - job_generator = LlamaPromptsSyntheticJobGenerator(prompt_sample) + job_generator = LlamaPromptsSyntheticJobGenerator(prompt_sample_prefetched) await job_generator.ainit() await job_generator._download_answers() From fa8b67c359e1b7c82f618778e995ac7e12748382 Mon Sep 17 00:00:00 2001 From: Enam Mijbah Noor Date: Wed, 11 Sep 2024 02:59:35 +0600 Subject: [PATCH 08/53] Fix stagger_wait_interval getting negative when all executors have 0 spinup time --- .../validator/synthetic_jobs/batch_run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py index c6d8d0984..a8525f3c7 100644 --- a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py +++ b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py @@ -650,7 +650,7 @@ def _init_context( def _get_max_spin_up_time(ctx: BatchContext) -> int: - max_spin_up_time = 0 + max_spin_up_time = _MIN_SPIN_UP_TIME for executors in ctx.executors.values(): for executor_class in executors.keys(): spin_up_time = EXECUTOR_CLASS[executor_class].spin_up_time From df338a72d7e82e2a25e212156e30fd4f0ab09b01 Mon Sep 17 00:00:00 2001 From: Enam Mijbah Noor Date: Wed, 11 Sep 2024 03:02:07 +0600 Subject: [PATCH 09/53] Make llama job input and output filenames random (uuid) --- .../validator/synthetic_jobs/generator/llama_prompts.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llama_prompts.py b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llama_prompts.py index 064434a51..edfcdd954 100644 --- a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llama_prompts.py +++ b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llama_prompts.py @@ -1,3 +1,4 @@ +import uuid from dataclasses import dataclass import httpx @@ -35,9 +36,8 @@ def __init__(self, prompt_sample: PromptSample): self.prompt_sample: PromptSample = prompt_sample self.prompts: list[Prompt] = list(self.prompt_sample.prompts.all()) - base_filename = f"{prompt_sample.series.series_uuid}-{prompt_sample.workload.seed}" - self.input_filename = base_filename + ".txt" - self.s3_output_key = base_filename + ".json" + self.input_filename = str(uuid.uuid4()) + ".txt" + self.s3_output_key = str(uuid.uuid4()) + ".json" self.s3_output_prefix = "solved/" self.s3_output_bucket = settings.S3_BUCKET_NAME_ANSWERS From 00632b7e9463d21370e599446eb101630258cb6f Mon Sep 17 00:00:00 2001 From: Enam Mijbah Noor Date: Wed, 11 Sep 2024 03:08:25 +0600 Subject: [PATCH 10/53] Add tests for synthetic jobs flow of llama executor class --- .../synthetic_jobs/generator/llama_prompts.py | 7 +- .../tests/test_synthetic_jobs/conftest.py | 2 +- .../test_llama_synthetic_job_flow.py | 114 ++++++++++++++++++ 3 files changed, 119 insertions(+), 4 deletions(-) create mode 100644 validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/test_llama_synthetic_job_flow.py diff --git a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llama_prompts.py b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llama_prompts.py index edfcdd954..3c57bb1a2 100644 --- a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llama_prompts.py +++ b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llama_prompts.py @@ -103,6 +103,7 @@ async def output_upload(self) -> OutputUpload | None: async def _download_answers(self): async with httpx.AsyncClient() as client: response = await client.get(self._url_for_download(), timeout=5) + response.raise_for_status() self.prompt_answers = [ PromptAnswer(prompt, answer) for prompt, answer in response.json().items() ] @@ -116,13 +117,13 @@ def verify(self, msg: V0JobFinishedRequest, time_took: float) -> tuple[bool, str if prompt_answer.prompt != prompt.content: continue if prompt_answer.answer != prompt.answer: - return False, "results does not match expected answers", 0 + return False, "results does not match expected answers", 0.0 break else: # did not find answer for this prompt - return False, "result does not contain all answers", 0 + return False, "result does not contain all answers", 0.0 - return True, "", 1 + return True, "", 1.0 def job_description(self) -> str: return "LLAMA prompts synthetic job" diff --git a/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/conftest.py b/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/conftest.py index f29281450..4927afd93 100644 --- a/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/conftest.py +++ b/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/conftest.py @@ -172,5 +172,5 @@ async def prompt_sample_prefetched(prompt_series, solve_workload, prompt_sample, return ( await PromptSample.objects.select_related("series", "workload") .prefetch_related("prompts") - .aget(id=prompt_sample.id) + .aget() ) diff --git a/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/test_llama_synthetic_job_flow.py b/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/test_llama_synthetic_job_flow.py new file mode 100644 index 000000000..7a4558818 --- /dev/null +++ b/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/test_llama_synthetic_job_flow.py @@ -0,0 +1,114 @@ +import asyncio +import re +import uuid +from collections.abc import Callable +from unittest.mock import patch + +import bittensor +import pytest +import pytest_asyncio +from compute_horde.executor_class import ExecutorClass +from compute_horde.mv_protocol import miner_requests +from pytest_httpx import HTTPXMock + +from compute_horde_validator.validator.models import Miner, Prompt, PromptSample, SyntheticJob +from compute_horde_validator.validator.s3 import get_public_url +from compute_horde_validator.validator.synthetic_jobs.batch_run import execute_synthetic_batch_run +from compute_horde_validator.validator.synthetic_jobs.generator.base import ( + BaseSyntheticJobGenerator, +) +from compute_horde_validator.validator.synthetic_jobs.generator.factory import ( + DefaultSyntheticJobGeneratorFactory, +) +from compute_horde_validator.validator.tests.transport import MinerSimulationTransport + + +class JobGeneratorFactory(DefaultSyntheticJobGeneratorFactory): + async def create(self, executor_class: ExecutorClass, *args) -> BaseSyntheticJobGenerator: + generator = await super().create(executor_class, *args) + generator._uuid = self._uuid + return generator + + +@pytest_asyncio.fixture +def mocked_job_generator_factory(prompts): + factory = JobGeneratorFactory() + with patch( + "compute_horde_validator.validator.synthetic_jobs.generator.current.synthetic_job_generator_factory", + factory, + ): + yield factory + + +@pytest.mark.asyncio +@pytest.mark.django_db(databases=["default", "default_alias"], transaction=True) +async def test_llama_synthetic_jobs_flow( + miner: Miner, + axon_dict: dict[str, bittensor.AxonInfo], + create_simulation_miner_client: Callable, + transport: MinerSimulationTransport, + override_weights_version_v2, + small_spin_up_times, + prompt_sample_prefetched: PromptSample, + prompts: list[Prompt], + mocked_job_generator_factory: JobGeneratorFactory, + httpx_mock: HTTPXMock, + settings, +): + job_uuid = str(uuid.uuid4()) + mocked_job_generator_factory._uuid = job_uuid + httpx_mock.add_response( + url=re.compile( + get_public_url(key=".*", bucket_name=settings.S3_BUCKET_NAME_ANSWERS, prefix="solved/") + ), + json={p.content: p.answer for p in prompts}, + ) + + manifest_message = miner_requests.V0ExecutorManifestRequest( + manifest=miner_requests.ExecutorManifest( + executor_classes=[ + miner_requests.ExecutorClassManifest( + executor_class=ExecutorClass.always_on__llama, + count=1, + ) + ] + ) + ).model_dump_json() + await transport.add_message(manifest_message, send_before=1) + + await transport.add_message( + miner_requests.V0AcceptJobRequest(job_uuid=job_uuid).model_dump_json(), + send_before=1, + sleep_before=0.05, + ) + await transport.add_message( + miner_requests.V0ExecutorReadyRequest(job_uuid=job_uuid).model_dump_json(), + send_before=0, + ) + await transport.add_message( + miner_requests.V0JobFinishedRequest( + job_uuid=job_uuid, + docker_process_stdout="", + docker_process_stderr="", + ).model_dump_json(), + send_before=2, + sleep_before=0.05, + ) + + assert prompt_sample_prefetched.synthetic_job_id is None + + await asyncio.wait_for( + execute_synthetic_batch_run( + axon_dict, + [miner], + create_miner_client=create_simulation_miner_client, + ), + timeout=2, + ) + + job = await SyntheticJob.objects.aget(job_uuid=job_uuid) + assert job.status == SyntheticJob.Status.COMPLETED + assert job.score >= 1 + + await prompt_sample_prefetched.arefresh_from_db() + assert prompt_sample_prefetched.synthetic_job_id == job.id From 5f6ef0f08c7d6827b1068c5aa9597b1a4415da9f Mon Sep 17 00:00:00 2001 From: Enam Mijbah Noor Date: Thu, 12 Sep 2024 19:55:51 +0600 Subject: [PATCH 11/53] Simplify llama answer checking in synthetic jobs --- .../synthetic_jobs/generator/llama_prompts.py | 30 +++++-------------- 1 file changed, 8 insertions(+), 22 deletions(-) diff --git a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llama_prompts.py b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llama_prompts.py index 3c57bb1a2..7cbd87995 100644 --- a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llama_prompts.py +++ b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llama_prompts.py @@ -1,7 +1,7 @@ import uuid -from dataclasses import dataclass import httpx +import pydantic from compute_horde.base.output_upload import MultiUpload, OutputUpload, SingleFilePutUpload from compute_horde.base.volume import MultiVolume, SingleFileVolume, Volume from compute_horde.mv_protocol.miner_requests import V0JobFinishedRequest @@ -17,12 +17,6 @@ ) -@dataclass(frozen=True, slots=True) -class PromptAnswer: - prompt: str - answer: str - - class LlamaPromptsSyntheticJobGenerator(BaseSyntheticJobGenerator): def __init__(self, prompt_sample: PromptSample): super().__init__() @@ -41,7 +35,7 @@ def __init__(self, prompt_sample: PromptSample): self.s3_output_prefix = "solved/" self.s3_output_bucket = settings.S3_BUCKET_NAME_ANSWERS - self.prompt_answers: list[PromptAnswer] | None = None + self.prompt_answers: dict[str, str] = {} def _url_for_upload(self) -> str: return generate_upload_url( @@ -104,24 +98,16 @@ async def _download_answers(self): async with httpx.AsyncClient() as client: response = await client.get(self._url_for_download(), timeout=5) response.raise_for_status() - self.prompt_answers = [ - PromptAnswer(prompt, answer) for prompt, answer in response.json().items() - ] + self.prompt_answers = pydantic.TypeAdapter(dict[str, str]).validate_json( + response.content + ) def verify(self, msg: V0JobFinishedRequest, time_took: float) -> tuple[bool, str, float]: - if self.prompt_answers is None: - raise RuntimeError("_download_answers must be called before calling verify") - for prompt in self.prompts: - for prompt_answer in self.prompt_answers: - if prompt_answer.prompt != prompt.content: - continue - if prompt_answer.answer != prompt.answer: - return False, "results does not match expected answers", 0.0 - break - else: - # did not find answer for this prompt + if prompt.content not in self.prompt_answers: return False, "result does not contain all answers", 0.0 + if prompt.answer != self.prompt_answers[prompt.content]: + return False, "results does not match expected answers", 0.0 return True, "", 1.0 From d84d5c6da12ee8548a1aaa046c28262419b0166a Mon Sep 17 00:00:00 2001 From: Enam Mijbah Noor Date: Thu, 12 Sep 2024 19:58:52 +0600 Subject: [PATCH 12/53] Add docker image for prompt solver --- .../validator/synthetic_jobs/generator/llama_prompts.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llama_prompts.py b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llama_prompts.py index 7cbd87995..10502bf07 100644 --- a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llama_prompts.py +++ b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llama_prompts.py @@ -56,10 +56,10 @@ def timeout_seconds(self) -> int: return 80 def base_docker_image_name(self) -> str: - return "TODO" + return "docker.io/backenddevelopersltd/compute-horde-prompt-solver:v0-latest" def docker_image_name(self) -> str: - return "TODO" + return "docker.io/backenddevelopersltd/compute-horde-prompt-solver:v0-latest" def docker_run_options_preset(self) -> str: return "nvidia_all" From 8481740b12830e6e954905e954dc42c12bccccfe Mon Sep 17 00:00:00 2001 From: Enam Mijbah Noor Date: Thu, 12 Sep 2024 20:35:18 +0600 Subject: [PATCH 13/53] Fix llama generator tests --- .../validator/tests/test_synthetic_jobs/conftest.py | 1 + .../test_synthetic_jobs/test_llama_prompts_generator.py | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/conftest.py b/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/conftest.py index 4927afd93..55d75e44a 100644 --- a/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/conftest.py +++ b/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/conftest.py @@ -131,6 +131,7 @@ async def prompt_series(): return await PromptSeries.objects.acreate( series_uuid=uuid.uuid4(), s3_url="http://localhost:9999/prompt-series-download-url", + generator_version=0, ) diff --git a/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/test_llama_prompts_generator.py b/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/test_llama_prompts_generator.py index 58c965f17..33edc7137 100644 --- a/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/test_llama_prompts_generator.py +++ b/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/test_llama_prompts_generator.py @@ -39,8 +39,10 @@ async def test_llama_prompts_generator_basic( assert len(output_upload.uploads) == 1 assert isinstance(output_upload.uploads[0], SingleFilePutUpload) - with pytest.raises(RuntimeError): - assert job_generator.verify(_JOB_FINISHED_REQUEST, 0) == (True, "", 1) + # before downloading answers + correct, _, score = job_generator.verify(_JOB_FINISHED_REQUEST, 0) + assert not correct + assert score == 0.0 await job_generator._download_answers() correct, _, score = job_generator.verify(_JOB_FINISHED_REQUEST, 0) From 86f7d8fac93a35cff1069568d7dc8ea6937cb6cc Mon Sep 17 00:00:00 2001 From: Enam Mijbah Noor Date: Thu, 12 Sep 2024 21:18:23 +0600 Subject: [PATCH 14/53] Refactor llama prompts synthetic job generator signature to not rely on prefetched db objects --- .../validator/synthetic_jobs/batch_run.py | 12 +++++-- .../synthetic_jobs/generator/base.py | 6 ++-- .../synthetic_jobs/generator/factory.py | 6 ++-- .../synthetic_jobs/generator/gpu_hashcat.py | 4 +-- .../synthetic_jobs/generator/llama_prompts.py | 36 +++++++++---------- .../tests/test_synthetic_jobs/conftest.py | 21 ++++++++--- .../test_synthetic_jobs/mock_generator.py | 10 +++--- .../test_llama_prompts_generator.py | 36 +++++++------------ .../test_llama_synthetic_job_flow.py | 25 ++++++++----- .../validator/tests/test_utils.py | 8 ++--- 10 files changed, 90 insertions(+), 74 deletions(-) diff --git a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py index a8525f3c7..2b6091554 100644 --- a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py +++ b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py @@ -789,12 +789,18 @@ async def _generate_jobs(ctx: BatchContext) -> None: for executor_class, count in executors.items(): job_generators = [] for _ in range(count): - args = [] + kwargs = {} if executor_class == ExecutorClass.always_on__llama: - args.append(next(llama_prompt_samples_iter)) + prompt_sample = next(llama_prompt_samples_iter) + kwargs = { + "prompt_sample": prompt_sample, + "expected_prompts": list(prompt_sample.prompts.all()), + "s3_url": prompt_sample.series.s3_url, + "seed": prompt_sample.workload.seed, + } job_generator = await current.synthetic_job_generator_factory.create( - executor_class, *args + executor_class, **kwargs ) await job_generator.ainit() job_uuid = str(job_generator.uuid()) diff --git a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/base.py b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/base.py index bfe154d19..6996b5454 100644 --- a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/base.py +++ b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/base.py @@ -8,7 +8,7 @@ class BaseSyntheticJobGenerator(abc.ABC): - def __init__(self): + def __init__(self, **kwargs): self._uuid = uuid.uuid4() def __repr__(self): @@ -53,4 +53,6 @@ def job_description(self) -> str: ... class BaseSyntheticJobGeneratorFactory(abc.ABC): @abc.abstractmethod - async def create(self, executor_class: ExecutorClass, *args) -> BaseSyntheticJobGenerator: ... + async def create( + self, executor_class: ExecutorClass, **kwargs + ) -> BaseSyntheticJobGenerator: ... diff --git a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/factory.py b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/factory.py index a904848a7..f907c4ec2 100644 --- a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/factory.py +++ b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/factory.py @@ -13,7 +13,7 @@ class DefaultSyntheticJobGeneratorFactory(BaseSyntheticJobGeneratorFactory): - async def create(self, executor_class: ExecutorClass, *args) -> BaseSyntheticJobGenerator: + async def create(self, executor_class: ExecutorClass, **kwargs) -> BaseSyntheticJobGenerator: if executor_class == ExecutorClass.always_on__llama: - return LlamaPromptsSyntheticJobGenerator(*args) - return GPUHashcatSyntheticJobGenerator(*args) + return LlamaPromptsSyntheticJobGenerator(**kwargs) + return GPUHashcatSyntheticJobGenerator(**kwargs) diff --git a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/gpu_hashcat.py b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/gpu_hashcat.py index 16ed4b496..a7ce318bd 100644 --- a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/gpu_hashcat.py +++ b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/gpu_hashcat.py @@ -18,8 +18,8 @@ class GPUHashcatSyntheticJobGenerator(BaseSyntheticJobGenerator): - def __init__(self): - super().__init__() + def __init__(self, **kwargs): + super().__init__(**kwargs) # set synthetic_jobs based on subnet weights_version self.weights_version = None self.hash_job = None diff --git a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llama_prompts.py b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llama_prompts.py index 10502bf07..f651fdf32 100644 --- a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llama_prompts.py +++ b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llama_prompts.py @@ -12,24 +12,22 @@ from .base import BaseSyntheticJobGenerator -_PROMPT_SAMPLE_RELATED_NOT_CACHED = ( - "The related objects of PromptSample needs to be cached before passing to this class" -) - class LlamaPromptsSyntheticJobGenerator(BaseSyntheticJobGenerator): - def __init__(self, prompt_sample: PromptSample): - super().__init__() - - assert PromptSample.series.is_cached(prompt_sample), _PROMPT_SAMPLE_RELATED_NOT_CACHED - assert PromptSample.workload.is_cached(prompt_sample), _PROMPT_SAMPLE_RELATED_NOT_CACHED - assert ( - getattr(prompt_sample, "_prefetched_objects_cache", {}).get("prompts") is not None - ), _PROMPT_SAMPLE_RELATED_NOT_CACHED + def __init__( + self, + prompt_sample: PromptSample, + expected_prompts: list[Prompt], + s3_url: str, + seed: int, + **kwargs, + ): + super().__init__(**kwargs) self.prompt_sample: PromptSample = prompt_sample - self.prompts: list[Prompt] = list(self.prompt_sample.prompts.all()) - + self.seed = seed + self.expected_prompts: list[Prompt] = expected_prompts + self.s3_url = s3_url self.input_filename = str(uuid.uuid4()) + ".txt" self.s3_output_key = str(uuid.uuid4()) + ".json" self.s3_output_prefix = "solved/" @@ -70,7 +68,7 @@ def docker_run_cmd(self) -> list[str]: "--top-p=0.8", "--max-tokens=256", "--seed", - str(self.prompt_sample.workload.seed), + str(self.seed), f"/volume/{self.input_filename}", ] @@ -78,7 +76,7 @@ async def volume(self) -> Volume | None: return MultiVolume( volumes=[ SingleFileVolume( - url=self.prompt_sample.series.s3_url, + url=self.s3_url, relative_path=self.input_filename, ), ] @@ -103,10 +101,10 @@ async def _download_answers(self): ) def verify(self, msg: V0JobFinishedRequest, time_took: float) -> tuple[bool, str, float]: - for prompt in self.prompts: - if prompt.content not in self.prompt_answers: + for expected_prompt in self.expected_prompts: + if expected_prompt.content not in self.prompt_answers: return False, "result does not contain all answers", 0.0 - if prompt.answer != self.prompt_answers[prompt.content]: + if expected_prompt.answer != self.prompt_answers[expected_prompt.content]: return False, "results does not match expected answers", 0.0 return True, "", 1.0 diff --git a/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/conftest.py b/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/conftest.py index 55d75e44a..4ffc8567d 100644 --- a/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/conftest.py +++ b/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/conftest.py @@ -16,6 +16,9 @@ SolveWorkload, ) from compute_horde_validator.validator.synthetic_jobs.batch_run import BatchContext, MinerClient +from compute_horde_validator.validator.synthetic_jobs.generator.llama_prompts import ( + LlamaPromptsSyntheticJobGenerator, +) from compute_horde_validator.validator.tests.transport import MinerSimulationTransport @@ -169,9 +172,17 @@ async def prompts(prompt_sample): @pytest_asyncio.fixture -async def prompt_sample_prefetched(prompt_series, solve_workload, prompt_sample, prompts): - return ( - await PromptSample.objects.select_related("series", "workload") - .prefetch_related("prompts") - .aget() +async def llama_job_generator( + prompt_series: PromptSeries, + solve_workload: SolveWorkload, + prompt_sample: PromptSample, + prompts: list[Prompt], +) -> LlamaPromptsSyntheticJobGenerator: + job_generator = LlamaPromptsSyntheticJobGenerator( + prompt_sample=prompt_sample, + expected_prompts=prompts, + s3_url=prompt_series.s3_url, + seed=solve_workload.seed, ) + await job_generator.ainit() + return job_generator diff --git a/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/mock_generator.py b/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/mock_generator.py index 0d0a412d6..f1551744f 100644 --- a/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/mock_generator.py +++ b/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/mock_generator.py @@ -16,7 +16,8 @@ class MockSyntheticJobGenerator(BaseSyntheticJobGenerator): - def __init__(self, _uuid: uuid.UUID): + def __init__(self, _uuid: uuid.UUID, **kwargs): + super().__init__(**kwargs) self._uuid = _uuid async def ainit(self): @@ -53,12 +54,13 @@ def verify(self, msg: V0JobFinishedRequest, time_took: float) -> tuple[bool, str class MockSyntheticJobGeneratorFactory(BaseSyntheticJobGeneratorFactory): - def __init__(self, uuids: list[uuid.UUID] = None): + def __init__(self, uuids: list[uuid.UUID] = None, **kwargs): + super().__init__(**kwargs) self._uuids = uuids or [] - async def create(self, executor_class: ExecutorClass, *args) -> BaseSyntheticJobGenerator: + async def create(self, executor_class: ExecutorClass, **kwargs) -> BaseSyntheticJobGenerator: _uuid = self._uuids.pop(0) - return MockSyntheticJobGenerator(_uuid) + return MockSyntheticJobGenerator(_uuid, **kwargs) class TimeTookScoreMockSyntheticJobGeneratorFactory(MockSyntheticJobGeneratorFactory): diff --git a/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/test_llama_prompts_generator.py b/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/test_llama_prompts_generator.py index 33edc7137..40364ff33 100644 --- a/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/test_llama_prompts_generator.py +++ b/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/test_llama_prompts_generator.py @@ -4,9 +4,6 @@ from compute_horde.mv_protocol.miner_requests import V0JobFinishedRequest from pytest_httpx import HTTPXMock -from compute_horde_validator.validator.models import ( - PromptSample, -) from compute_horde_validator.validator.synthetic_jobs.generator.llama_prompts import ( LlamaPromptsSyntheticJobGenerator, ) @@ -22,30 +19,27 @@ @pytest.mark.django_db(databases=["default", "default_alias"], transaction=True) async def test_llama_prompts_generator_basic( httpx_mock: HTTPXMock, - prompt_sample_prefetched: PromptSample, + llama_job_generator: LlamaPromptsSyntheticJobGenerator, ): httpx_mock.add_response(json={str(i): str(i) for i in range(240)}) - job_generator = LlamaPromptsSyntheticJobGenerator(prompt_sample_prefetched) - await job_generator.ainit() - - volume = await job_generator.volume() + volume = await llama_job_generator.volume() assert isinstance(volume, MultiVolume) assert len(volume.volumes) == 1 assert isinstance(volume.volumes[0], SingleFileVolume) - output_upload = await job_generator.output_upload() + output_upload = await llama_job_generator.output_upload() assert isinstance(output_upload, MultiUpload) assert len(output_upload.uploads) == 1 assert isinstance(output_upload.uploads[0], SingleFilePutUpload) # before downloading answers - correct, _, score = job_generator.verify(_JOB_FINISHED_REQUEST, 0) + correct, _, score = llama_job_generator.verify(_JOB_FINISHED_REQUEST, 0) assert not correct assert score == 0.0 - await job_generator._download_answers() - correct, _, score = job_generator.verify(_JOB_FINISHED_REQUEST, 0) + await llama_job_generator._download_answers() + correct, _, score = llama_job_generator.verify(_JOB_FINISHED_REQUEST, 0) assert correct assert score == 1.0 @@ -54,15 +48,12 @@ async def test_llama_prompts_generator_basic( @pytest.mark.django_db(databases=["default", "default_alias"], transaction=True) async def test_llama_prompts_generator_missing_prompts( httpx_mock: HTTPXMock, - prompt_sample_prefetched: PromptSample, + llama_job_generator: LlamaPromptsSyntheticJobGenerator, ): httpx_mock.add_response(json={str(i): str(i) for i in range(9, 249)}) - job_generator = LlamaPromptsSyntheticJobGenerator(prompt_sample_prefetched) - await job_generator.ainit() - - await job_generator._download_answers() - correct, _, score = job_generator.verify(_JOB_FINISHED_REQUEST, 0) + await llama_job_generator._download_answers() + correct, _, score = llama_job_generator.verify(_JOB_FINISHED_REQUEST, 0) assert not correct assert score == 0.0 @@ -71,14 +62,11 @@ async def test_llama_prompts_generator_missing_prompts( @pytest.mark.django_db(databases=["default", "default_alias"], transaction=True) async def test_llama_prompts_generator_wrong_answers( httpx_mock: HTTPXMock, - prompt_sample_prefetched: PromptSample, + llama_job_generator: LlamaPromptsSyntheticJobGenerator, ): httpx_mock.add_response(json={str(i): "wrong" for i in range(240)}) - job_generator = LlamaPromptsSyntheticJobGenerator(prompt_sample_prefetched) - await job_generator.ainit() - - await job_generator._download_answers() - correct, _, score = job_generator.verify(_JOB_FINISHED_REQUEST, 0) + await llama_job_generator._download_answers() + correct, _, score = llama_job_generator.verify(_JOB_FINISHED_REQUEST, 0) assert not correct assert score == 0.0 diff --git a/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/test_llama_synthetic_job_flow.py b/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/test_llama_synthetic_job_flow.py index 7a4558818..ae97fd94c 100644 --- a/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/test_llama_synthetic_job_flow.py +++ b/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/test_llama_synthetic_job_flow.py @@ -11,7 +11,14 @@ from compute_horde.mv_protocol import miner_requests from pytest_httpx import HTTPXMock -from compute_horde_validator.validator.models import Miner, Prompt, PromptSample, SyntheticJob +from compute_horde_validator.validator.models import ( + Miner, + Prompt, + PromptSample, + PromptSeries, + SolveWorkload, + SyntheticJob, +) from compute_horde_validator.validator.s3 import get_public_url from compute_horde_validator.validator.synthetic_jobs.batch_run import execute_synthetic_batch_run from compute_horde_validator.validator.synthetic_jobs.generator.base import ( @@ -24,8 +31,8 @@ class JobGeneratorFactory(DefaultSyntheticJobGeneratorFactory): - async def create(self, executor_class: ExecutorClass, *args) -> BaseSyntheticJobGenerator: - generator = await super().create(executor_class, *args) + async def create(self, executor_class: ExecutorClass, **kwargs) -> BaseSyntheticJobGenerator: + generator = await super().create(executor_class, **kwargs) generator._uuid = self._uuid return generator @@ -49,7 +56,9 @@ async def test_llama_synthetic_jobs_flow( transport: MinerSimulationTransport, override_weights_version_v2, small_spin_up_times, - prompt_sample_prefetched: PromptSample, + prompt_series: PromptSeries, + solve_workload: SolveWorkload, + prompt_sample: PromptSample, prompts: list[Prompt], mocked_job_generator_factory: JobGeneratorFactory, httpx_mock: HTTPXMock, @@ -95,7 +104,7 @@ async def test_llama_synthetic_jobs_flow( sleep_before=0.05, ) - assert prompt_sample_prefetched.synthetic_job_id is None + assert prompt_sample.synthetic_job_id is None await asyncio.wait_for( execute_synthetic_batch_run( @@ -108,7 +117,7 @@ async def test_llama_synthetic_jobs_flow( job = await SyntheticJob.objects.aget(job_uuid=job_uuid) assert job.status == SyntheticJob.Status.COMPLETED - assert job.score >= 1 + assert job.score > 0 - await prompt_sample_prefetched.arefresh_from_db() - assert prompt_sample_prefetched.synthetic_job_id == job.id + await prompt_sample.arefresh_from_db() + assert prompt_sample.synthetic_job_id == job.id diff --git a/validator/app/src/compute_horde_validator/validator/tests/test_utils.py b/validator/app/src/compute_horde_validator/validator/tests/test_utils.py index d75318d15..33fa9da13 100644 --- a/validator/app/src/compute_horde_validator/validator/tests/test_utils.py +++ b/validator/app/src/compute_horde_validator/validator/tests/test_utils.py @@ -232,8 +232,8 @@ async def as_coro(fun, *args, **kwargs): class MockSyntheticJobGeneratorFactory(BaseSyntheticJobGeneratorFactory): - async def create(self, executor_class: ExecutorClass, *args) -> BaseSyntheticJobGenerator: - return MockSyntheticJobGenerator() + async def create(self, executor_class: ExecutorClass, **kwargs) -> BaseSyntheticJobGenerator: + return MockSyntheticJobGenerator(**kwargs) mock_synthetic_job_generator_factory = MagicMock(name="MockSyntheticJobGeneratorFactory") @@ -367,8 +367,8 @@ async def create_mock_job_batches(miner): class TimeToookScoreMockSyntheticJobGeneratorFactory(BaseSyntheticJobGeneratorFactory): - async def create(self, executor_class: ExecutorClass, *args) -> BaseSyntheticJobGenerator: - return TimeToookScoreMockSyntheticJobGenerator() + async def create(self, executor_class: ExecutorClass, **kwargs) -> BaseSyntheticJobGenerator: + return TimeToookScoreMockSyntheticJobGenerator(**kwargs) time_took_mock_synthetic_job_generator_factory = MagicMock( From 443e2c453fb3bef3d988a01cbe7d58a8aacf743a Mon Sep 17 00:00:00 2001 From: Enam Mijbah Noor Date: Tue, 17 Sep 2024 02:04:32 +0600 Subject: [PATCH 15/53] Rename "LLAMA" -> "LLM" for prompt solving executor class and job generator --- compute_horde/compute_horde/executor_class.py | 7 ++-- .../validator/synthetic_jobs/batch_run.py | 26 +++++++------- .../synthetic_jobs/generator/factory.py | 8 ++--- .../{llama_prompts.py => llm_prompts.py} | 4 +-- .../tests/test_synthetic_jobs/conftest.py | 10 +++--- ...rator.py => test_llm_prompts_generator.py} | 34 +++++++++---------- ...flow.py => test_llm_synthetic_job_flow.py} | 4 +-- 7 files changed, 48 insertions(+), 45 deletions(-) rename validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/{llama_prompts.py => llm_prompts.py} (97%) rename validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/{test_llama_prompts_generator.py => test_llm_prompts_generator.py} (61%) rename validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/{test_llama_synthetic_job_flow.py => test_llm_synthetic_job_flow.py} (97%) diff --git a/compute_horde/compute_horde/executor_class.py b/compute_horde/compute_horde/executor_class.py index a7f57ada6..4b1372f5c 100644 --- a/compute_horde/compute_horde/executor_class.py +++ b/compute_horde/compute_horde/executor_class.py @@ -6,7 +6,7 @@ class ExecutorClass(StrEnum): spin_up_4min__gpu_24gb = "spin_up-4min.gpu-24gb" always_on__gpu_24gb = "always_on.gpu-24gb" - always_on__llama = "always_on.llama" + always_on__llm__a6000 = "always_on.llm.a6000" # always_on__cpu_16c__ram_64gb = "always_on.cpu-16c.ram-64gb" # always_on__gpu_80gb = "always_on.gpu-80gb" # always_on__gpu_24gb__docker_cached_facilitator = "always_on.gpu-24gb.docker_cached-facilitator" @@ -40,9 +40,10 @@ class ExecutorClassSpec: gpu_vram_gb=24, spin_up_time=0, ), - ExecutorClass.always_on__llama: ExecutorClassSpec( + ExecutorClass.always_on__llm__a6000: ExecutorClassSpec( + description="always on, NVIDIA RTX A6000 GPU machine for LLM prompts solving", has_gpu=True, - gpu_vram_gb=80, + gpu_vram_gb=48, spin_up_time=0, ), # ExecutorClass.always_on__cpu_16c__ram_64gb: ExecutorClassSpec( diff --git a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py index 2b6091554..8097acf9d 100644 --- a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py +++ b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py @@ -766,23 +766,24 @@ async def _generate_jobs(ctx: BatchContext) -> None: start_time = time.time() generated_job_count = 0 - llama_executor_count = sum( + # TODO: refactor into nicer abstraction + llm_executor_count = sum( count for executors in ctx.executors.values() for executor_class, count in executors.items() - if executor_class == ExecutorClass.always_on__llama + if executor_class == ExecutorClass.always_on__llm__a6000 ) - llama_prompt_samples = ( + prompt_samples = ( PromptSample.objects.select_related("series", "workload") .prefetch_related("prompts") .filter( synthetic_job__isnull=True, workload__finished_at__isnull=False, - )[:llama_executor_count] + )[:llm_executor_count] ) - llama_prompt_samples = [ps async for ps in llama_prompt_samples] - assert len(llama_prompt_samples) == llama_executor_count - llama_prompt_samples_iter = iter(llama_prompt_samples) + prompt_samples = [ps async for ps in prompt_samples] + assert len(prompt_samples) == llm_executor_count + prompt_samples_iter = iter(prompt_samples) for hotkey, executors in ctx.executors.items(): miner_name = ctx.names[hotkey] @@ -790,8 +791,8 @@ async def _generate_jobs(ctx: BatchContext) -> None: job_generators = [] for _ in range(count): kwargs = {} - if executor_class == ExecutorClass.always_on__llama: - prompt_sample = next(llama_prompt_samples_iter) + if executor_class == ExecutorClass.always_on__llm__a6000: + prompt_sample = next(prompt_samples_iter) kwargs = { "prompt_sample": prompt_sample, "expected_prompts": list(prompt_sample.prompts.all()), @@ -1254,11 +1255,11 @@ async def _score_job(ctx: BatchContext, job: Job) -> None: async def _score_jobs(ctx: BatchContext) -> None: - # NOTE: download the answers for llama jobs before scoring + # NOTE: download the answers for llm prompts jobs before scoring tasks = [ asyncio.create_task(job.job_generator._download_answers()) for job in ctx.jobs.values() - if job.executor_class == ExecutorClass.always_on__llama + if job.executor_class == ExecutorClass.always_on__llm__a6000 and job.job_response is not None and isinstance(job.job_response, V0JobFinishedRequest) ] @@ -1397,12 +1398,13 @@ def _db_persist(ctx: BatchContext) -> None: ) MinerManifest.objects.bulk_create(miner_manifests) + # TODO: refactor into nicer abstraction synthetic_jobs_map: dict[str, SyntheticJob] = { synthetic_job.job_uuid: synthetic_job for synthetic_job in synthetic_jobs } prompt_samples: list[PromptSample] = [] for job in ctx.jobs.values(): - if job.executor_class != ExecutorClass.always_on__llama: + if job.executor_class != ExecutorClass.always_on__llm__a6000: continue prompt_sample = job.job_generator.prompt_sample prompt_sample.synthetic_job = synthetic_jobs_map.get(job.uuid) diff --git a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/factory.py b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/factory.py index f907c4ec2..deb868629 100644 --- a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/factory.py +++ b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/factory.py @@ -7,13 +7,13 @@ from compute_horde_validator.validator.synthetic_jobs.generator.gpu_hashcat import ( GPUHashcatSyntheticJobGenerator, ) -from compute_horde_validator.validator.synthetic_jobs.generator.llama_prompts import ( - LlamaPromptsSyntheticJobGenerator, +from compute_horde_validator.validator.synthetic_jobs.generator.llm_prompts import ( + LlmPromptsSyntheticJobGenerator, ) class DefaultSyntheticJobGeneratorFactory(BaseSyntheticJobGeneratorFactory): async def create(self, executor_class: ExecutorClass, **kwargs) -> BaseSyntheticJobGenerator: - if executor_class == ExecutorClass.always_on__llama: - return LlamaPromptsSyntheticJobGenerator(**kwargs) + if executor_class == ExecutorClass.always_on__llm__a6000: + return LlmPromptsSyntheticJobGenerator(**kwargs) return GPUHashcatSyntheticJobGenerator(**kwargs) diff --git a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llama_prompts.py b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llm_prompts.py similarity index 97% rename from validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llama_prompts.py rename to validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llm_prompts.py index f651fdf32..7bcaa797f 100644 --- a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llama_prompts.py +++ b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llm_prompts.py @@ -13,7 +13,7 @@ from .base import BaseSyntheticJobGenerator -class LlamaPromptsSyntheticJobGenerator(BaseSyntheticJobGenerator): +class LlmPromptsSyntheticJobGenerator(BaseSyntheticJobGenerator): def __init__( self, prompt_sample: PromptSample, @@ -110,4 +110,4 @@ def verify(self, msg: V0JobFinishedRequest, time_took: float) -> tuple[bool, str return True, "", 1.0 def job_description(self) -> str: - return "LLAMA prompts synthetic job" + return "LLM prompts synthetic job" diff --git a/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/conftest.py b/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/conftest.py index 4ffc8567d..8022fdf22 100644 --- a/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/conftest.py +++ b/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/conftest.py @@ -16,8 +16,8 @@ SolveWorkload, ) from compute_horde_validator.validator.synthetic_jobs.batch_run import BatchContext, MinerClient -from compute_horde_validator.validator.synthetic_jobs.generator.llama_prompts import ( - LlamaPromptsSyntheticJobGenerator, +from compute_horde_validator.validator.synthetic_jobs.generator.llm_prompts import ( + LlmPromptsSyntheticJobGenerator, ) from compute_horde_validator.validator.tests.transport import MinerSimulationTransport @@ -172,13 +172,13 @@ async def prompts(prompt_sample): @pytest_asyncio.fixture -async def llama_job_generator( +async def llm_prompts_job_generator( prompt_series: PromptSeries, solve_workload: SolveWorkload, prompt_sample: PromptSample, prompts: list[Prompt], -) -> LlamaPromptsSyntheticJobGenerator: - job_generator = LlamaPromptsSyntheticJobGenerator( +) -> LlmPromptsSyntheticJobGenerator: + job_generator = LlmPromptsSyntheticJobGenerator( prompt_sample=prompt_sample, expected_prompts=prompts, s3_url=prompt_series.s3_url, diff --git a/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/test_llama_prompts_generator.py b/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/test_llm_prompts_generator.py similarity index 61% rename from validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/test_llama_prompts_generator.py rename to validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/test_llm_prompts_generator.py index 40364ff33..9ff622c76 100644 --- a/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/test_llama_prompts_generator.py +++ b/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/test_llm_prompts_generator.py @@ -4,8 +4,8 @@ from compute_horde.mv_protocol.miner_requests import V0JobFinishedRequest from pytest_httpx import HTTPXMock -from compute_horde_validator.validator.synthetic_jobs.generator.llama_prompts import ( - LlamaPromptsSyntheticJobGenerator, +from compute_horde_validator.validator.synthetic_jobs.generator.llm_prompts import ( + LlmPromptsSyntheticJobGenerator, ) _JOB_FINISHED_REQUEST = V0JobFinishedRequest( @@ -17,56 +17,56 @@ @pytest.mark.asyncio @pytest.mark.django_db(databases=["default", "default_alias"], transaction=True) -async def test_llama_prompts_generator_basic( +async def test_llm_prompts_generator_basic( httpx_mock: HTTPXMock, - llama_job_generator: LlamaPromptsSyntheticJobGenerator, + llm_prompts_job_generator: LlmPromptsSyntheticJobGenerator, ): httpx_mock.add_response(json={str(i): str(i) for i in range(240)}) - volume = await llama_job_generator.volume() + volume = await llm_prompts_job_generator.volume() assert isinstance(volume, MultiVolume) assert len(volume.volumes) == 1 assert isinstance(volume.volumes[0], SingleFileVolume) - output_upload = await llama_job_generator.output_upload() + output_upload = await llm_prompts_job_generator.output_upload() assert isinstance(output_upload, MultiUpload) assert len(output_upload.uploads) == 1 assert isinstance(output_upload.uploads[0], SingleFilePutUpload) # before downloading answers - correct, _, score = llama_job_generator.verify(_JOB_FINISHED_REQUEST, 0) + correct, _, score = llm_prompts_job_generator.verify(_JOB_FINISHED_REQUEST, 0) assert not correct assert score == 0.0 - await llama_job_generator._download_answers() - correct, _, score = llama_job_generator.verify(_JOB_FINISHED_REQUEST, 0) + await llm_prompts_job_generator._download_answers() + correct, _, score = llm_prompts_job_generator.verify(_JOB_FINISHED_REQUEST, 0) assert correct assert score == 1.0 @pytest.mark.asyncio @pytest.mark.django_db(databases=["default", "default_alias"], transaction=True) -async def test_llama_prompts_generator_missing_prompts( +async def test_llm_prompts_generator_missing_prompts( httpx_mock: HTTPXMock, - llama_job_generator: LlamaPromptsSyntheticJobGenerator, + llm_prompts_job_generator: LlmPromptsSyntheticJobGenerator, ): httpx_mock.add_response(json={str(i): str(i) for i in range(9, 249)}) - await llama_job_generator._download_answers() - correct, _, score = llama_job_generator.verify(_JOB_FINISHED_REQUEST, 0) + await llm_prompts_job_generator._download_answers() + correct, _, score = llm_prompts_job_generator.verify(_JOB_FINISHED_REQUEST, 0) assert not correct assert score == 0.0 @pytest.mark.asyncio @pytest.mark.django_db(databases=["default", "default_alias"], transaction=True) -async def test_llama_prompts_generator_wrong_answers( +async def test_llm_prompts_generator_wrong_answers( httpx_mock: HTTPXMock, - llama_job_generator: LlamaPromptsSyntheticJobGenerator, + llm_prompts_job_generator: LlmPromptsSyntheticJobGenerator, ): httpx_mock.add_response(json={str(i): "wrong" for i in range(240)}) - await llama_job_generator._download_answers() - correct, _, score = llama_job_generator.verify(_JOB_FINISHED_REQUEST, 0) + await llm_prompts_job_generator._download_answers() + correct, _, score = llm_prompts_job_generator.verify(_JOB_FINISHED_REQUEST, 0) assert not correct assert score == 0.0 diff --git a/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/test_llama_synthetic_job_flow.py b/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/test_llm_synthetic_job_flow.py similarity index 97% rename from validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/test_llama_synthetic_job_flow.py rename to validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/test_llm_synthetic_job_flow.py index ae97fd94c..c432cb9e0 100644 --- a/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/test_llama_synthetic_job_flow.py +++ b/validator/app/src/compute_horde_validator/validator/tests/test_synthetic_jobs/test_llm_synthetic_job_flow.py @@ -49,7 +49,7 @@ def mocked_job_generator_factory(prompts): @pytest.mark.asyncio @pytest.mark.django_db(databases=["default", "default_alias"], transaction=True) -async def test_llama_synthetic_jobs_flow( +async def test_llm_synthetic_jobs_flow( miner: Miner, axon_dict: dict[str, bittensor.AxonInfo], create_simulation_miner_client: Callable, @@ -77,7 +77,7 @@ async def test_llama_synthetic_jobs_flow( manifest=miner_requests.ExecutorManifest( executor_classes=[ miner_requests.ExecutorClassManifest( - executor_class=ExecutorClass.always_on__llama, + executor_class=ExecutorClass.always_on__llm__a6000, count=1, ) ] From 490aaa86682e6a2b39564cf6d6fb529903304e81 Mon Sep 17 00:00:00 2001 From: Enam Mijbah Noor Date: Tue, 17 Sep 2024 02:07:08 +0600 Subject: [PATCH 16/53] Warn when downloading prompt job answers fails --- .../validator/synthetic_jobs/batch_run.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py index 8097acf9d..5590e9003 100644 --- a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py +++ b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py @@ -1254,8 +1254,7 @@ async def _score_job(ctx: BatchContext, job: Job) -> None: ) -async def _score_jobs(ctx: BatchContext) -> None: - # NOTE: download the answers for llm prompts jobs before scoring +async def _download_llm_prompts_answers(ctx: BatchContext) -> None: tasks = [ asyncio.create_task(job.job_generator._download_answers()) for job in ctx.jobs.values() @@ -1263,7 +1262,19 @@ async def _score_jobs(ctx: BatchContext) -> None: and job.job_response is not None and isinstance(job.job_response, V0JobFinishedRequest) ] - await asyncio.gather(*tasks, return_exceptions=True) + results = await asyncio.gather(*tasks, return_exceptions=True) + for i, result in enumerate(results): + if isinstance(result, BaseException): + hotkey = ctx.hotkeys[i] + name = ctx.names[hotkey] + logger.warning("%s failed to get llm prompt answers: %r", name, result) + else: + assert result is None + + +async def _score_jobs(ctx: BatchContext) -> None: + # NOTE: download the answers for llm prompts jobs before scoring + await _download_llm_prompts_answers(ctx) for job in ctx.jobs.values(): try: From 67ee0256a9288af2e1b2d153384b3f5c48c32a4b Mon Sep 17 00:00:00 2001 From: Andreea Popescu Date: Fri, 13 Sep 2024 20:26:48 +0800 Subject: [PATCH 17/53] prompt answering job # Conflicts: # validator/app/src/compute_horde_validator/validator/tasks.py --- .../src/compute_horde_validator/settings.py | 6 +- .../cross_validation/prompt_generation.py | 12 +- .../validator/models.py | 2 +- .../compute_horde_validator/validator/s3.py | 8 + .../synthetic_jobs/generator/llm_prompts.py | 20 ++- .../validator/tasks.py | 81 ++++++++++ .../validator/tests/settings.py | 6 +- .../tests/test_cross_validation/conftest.py | 67 ++++++++ .../test_prompt_answering.py | 145 ++++++++++++++++++ .../test_prompt_generation.py | 63 +------- 10 files changed, 324 insertions(+), 86 deletions(-) create mode 100644 validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/conftest.py create mode 100644 validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_answering.py diff --git a/validator/app/src/compute_horde_validator/settings.py b/validator/app/src/compute_horde_validator/settings.py index 83536a1f6..8f83774ae 100644 --- a/validator/app/src/compute_horde_validator/settings.py +++ b/validator/app/src/compute_horde_validator/settings.py @@ -527,9 +527,9 @@ def BITTENSOR_WALLET() -> bittensor.wallet: # Local miner generating prompts -GENERATION_MINER_KEY = env.str("GENERATION_MINER_KEY", default="") -GENERATION_MINER_ADDRESS = env.str("GENERATION_MINER_ADDRESS", default="") -GENERATION_MINER_PORT = env.int("GENERATION_MINER_PORT", default=0) +TRUST_MINER_KEY = env.str("TRUST_MINER_KEY", default="") +TRUST_MINER_ADDRESS = env.str("TRUST_MINER_ADDRESS", default="") +TRUST_MINER_PORT = env.int("TRUST_MINER_PORT", default=0) CHANNEL_LAYERS = { diff --git a/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_generation.py b/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_generation.py index 38b2b214d..0b043cbf5 100644 --- a/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_generation.py +++ b/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_generation.py @@ -26,9 +26,9 @@ async def generate_prompts( ) -> None: if not all( [ - settings.GENERATION_MINER_KEY, - settings.GENERATION_MINER_ADDRESS, - settings.GENERATION_MINER_PORT, + settings.TRUSTED_MINER_KEY, + settings.TRUSTED_MINER_ADDRESS, + settings.TRUSTED_MINER_PORT, ] ): logger.warning("Prompt generation miner not configured, skipping prompt generation") @@ -62,9 +62,9 @@ async def generate_prompts( wait_timeout = wait_timeout or job_generator.timeout_seconds() miner_client = create_miner_client( - miner_hotkey=settings.GENERATION_MINER_KEY, - miner_address=settings.GENERATION_MINER_ADDRESS, - miner_port=settings.GENERATION_MINER_PORT, + miner_hotkey=settings.TRUSTED_MINER_KEY, + miner_address=settings.TRUSTED_MINER_ADDRESS, + miner_port=settings.TRUSTED_MINER_PORT, job_uuid=str(job_uuid), my_keypair=_get_keypair(), ) diff --git a/validator/app/src/compute_horde_validator/validator/models.py b/validator/app/src/compute_horde_validator/validator/models.py index 33544cbee..f720fa6d2 100644 --- a/validator/app/src/compute_horde_validator/validator/models.py +++ b/validator/app/src/compute_horde_validator/validator/models.py @@ -325,7 +325,7 @@ class SolveWorkload(models.Model): finished_at = models.DateTimeField(null=True, default=None, db_index=True) def __str__(self): - return f"uuid: {self.batch_uuid} - synthetic_job_batch: {self.synthetic_job_batch} - seed: {self.seed}" + return f"uuid: {self.workload_uuid} - seed: {self.seed}" class PromptSample(models.Model): diff --git a/validator/app/src/compute_horde_validator/validator/s3.py b/validator/app/src/compute_horde_validator/validator/s3.py index 0bbbd3858..6fbc11ec9 100644 --- a/validator/app/src/compute_horde_validator/validator/s3.py +++ b/validator/app/src/compute_horde_validator/validator/s3.py @@ -3,6 +3,7 @@ from collections.abc import Generator import boto3 +import httpx import requests from django.conf import settings @@ -51,3 +52,10 @@ def get_prompts_from_s3_url(s3_url: str) -> Generator[tuple[str, list[str]]]: logger.warning(f"Failed to download prompts from {s3_url}") return [] return response.text.split("\n") + + +async def download_json(s3_url: str): + async with httpx.AsyncClient() as client: + response = await client.get(s3_url, timeout=5) + response.raise_for_status() + return response.content diff --git a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llm_prompts.py b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llm_prompts.py index 7bcaa797f..a19fbbd68 100644 --- a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llm_prompts.py +++ b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llm_prompts.py @@ -1,6 +1,5 @@ import uuid -import httpx import pydantic from compute_horde.base.output_upload import MultiUpload, OutputUpload, SingleFilePutUpload from compute_horde.base.volume import MultiVolume, SingleFileVolume, Volume @@ -8,7 +7,7 @@ from django.conf import settings from compute_horde_validator.validator.models import Prompt, PromptSample -from compute_horde_validator.validator.s3 import generate_upload_url, get_public_url +from compute_horde_validator.validator.s3 import download_json, generate_upload_url, get_public_url from .base import BaseSyntheticJobGenerator @@ -16,15 +15,14 @@ class LlmPromptsSyntheticJobGenerator(BaseSyntheticJobGenerator): def __init__( self, - prompt_sample: PromptSample, + prompt_sample: PromptSample | None, expected_prompts: list[Prompt], s3_url: str, seed: int, **kwargs, ): super().__init__(**kwargs) - - self.prompt_sample: PromptSample = prompt_sample + self.prompt_sample: PromptSample | None = prompt_sample self.seed = seed self.expected_prompts: list[Prompt] = expected_prompts self.s3_url = s3_url @@ -93,12 +91,8 @@ async def output_upload(self) -> OutputUpload | None: ) async def _download_answers(self): - async with httpx.AsyncClient() as client: - response = await client.get(self._url_for_download(), timeout=5) - response.raise_for_status() - self.prompt_answers = pydantic.TypeAdapter(dict[str, str]).validate_json( - response.content - ) + response = await download_json(self._url_for_download()) + self.prompt_answers = pydantic.TypeAdapter(dict[str, str]).validate_json(response) def verify(self, msg: V0JobFinishedRequest, time_took: float) -> tuple[bool, str, float]: for expected_prompt in self.expected_prompts: @@ -109,5 +103,9 @@ def verify(self, msg: V0JobFinishedRequest, time_took: float) -> tuple[bool, str return True, "", 1.0 + async def get_prompt_answers(self) -> dict[str, str]: + await self._download_answers() + return self.prompt_answers + def job_description(self) -> str: return "LLM prompts synthetic job" diff --git a/validator/app/src/compute_horde_validator/validator/tasks.py b/validator/app/src/compute_horde_validator/validator/tasks.py index 58004dedb..696ae1421 100644 --- a/validator/app/src/compute_horde_validator/validator/tasks.py +++ b/validator/app/src/compute_horde_validator/validator/tasks.py @@ -5,6 +5,7 @@ import time import traceback import uuid +from collections.abc import Callable from datetime import timedelta from math import ceil, floor @@ -19,6 +20,11 @@ from celery.result import allow_join_result from celery.utils.log import get_task_logger from compute_horde.dynamic_config import sync_dynamic_config +from compute_horde.miner_client.organic import ( + OrganicJobDetails, + OrganicMinerClient, + run_organic_job, +) from compute_horde.receipts import ( JobFinishedReceiptPayload, JobStartedReceiptPayload, @@ -57,6 +63,9 @@ SYNTHETIC_JOBS_HARD_LIMIT, SYNTHETIC_JOBS_SOFT_LIMIT, ) +from compute_horde_validator.validator.synthetic_jobs.generator.llama_prompts import ( + LlamaPromptsSyntheticJobGenerator, +) from compute_horde_validator.validator.synthetic_jobs.utils import ( create_and_run_synthetic_job_batch, ) @@ -1091,3 +1100,75 @@ def create_sample_workloads(): # delete remaining empty workload if current_workload_fill == 0: current_workload.delete() + + +async def get_workload_prompts(workload: SolveWorkload) -> list[Prompt]: + sample_prompts = PromptSample.objects.prefetch_related("prompts").filter( + workload=workload, + prompts__answer__isnull=True, + ) + prompts = [] + async for sample in sample_prompts: + prompts.extend(sample.prompts.all()) + return prompts + + +async def answer_prompts( + create_miner_client: Callable[..., OrganicMinerClient] | None = None, + job_uuid: uuid.UUID | None = None, + wait_timeout: int | None = None, +): + workloads = SolveWorkload.objects.filter( + # workload was not ran before on this prompt_sample + finished_at__isnull=True, + ) + + async for workload in workloads: + seed = workload.seed + prompts = await get_workload_prompts(workload) + + job_generator = LlamaPromptsSyntheticJobGenerator(None, prompts, workload.s3_url, seed) + await job_generator.ainit() + + job_uuid = job_uuid or uuid.uuid4() + job_details = OrganicJobDetails( + job_uuid=str(job_uuid), + docker_image=job_generator.docker_image_name(), + raw_script=job_generator.raw_script(), + docker_run_options_preset=job_generator.docker_run_options_preset(), + docker_run_cmd=job_generator.docker_run_cmd(), + total_job_timeout=job_generator.timeout_seconds(), + volume=await job_generator.volume(), + output=await job_generator.output_upload(), + ) + + create_miner_client = create_miner_client or OrganicMinerClient + wait_timeout = wait_timeout or job_generator.timeout_seconds() + + miner_client = create_miner_client( + miner_hotkey=settings.TRUST_MINER_KEY, + miner_address=settings.TRUST_MINER_ADDRESS, + miner_port=settings.TRUST_MINER_PORT, + job_uuid=str(job_uuid), + my_keypair=get_keypair(), + ) + + await run_organic_job(miner_client, job_details, wait_timeout=wait_timeout) + + try: + prompt_answers: dict[str, str] = await job_generator.get_prompt_answers() + except Exception: + logger.error("Failed to download prompt answers", exc_info=True) + continue + + # update the workload as finished + workload.finished_at = now() + await workload.asave() + + # update the prompts with the answers + for prompt in prompts: + if prompt.content in prompt_answers: + prompt.answer = prompt_answers[prompt.content] + else: + logger.warning(f"Prompt {prompt} was not found in the prompt answers generated") + await Prompt.objects.abulk_update(prompts, ["answer"]) diff --git a/validator/app/src/compute_horde_validator/validator/tests/settings.py b/validator/app/src/compute_horde_validator/validator/tests/settings.py index 6c25dd9b4..6fc34d852 100644 --- a/validator/app/src/compute_horde_validator/validator/tests/settings.py +++ b/validator/app/src/compute_horde_validator/validator/tests/settings.py @@ -45,6 +45,6 @@ def BITTENSOR_WALLET() -> bittensor.wallet: DYNAMIC_CONFIG_CACHE_TIMEOUT = 0 -GENERATION_MINER_KEY = "fake_generation_miner_key" -GENERATION_MINER_ADDRESS = "fakehost" -GENERATION_MINER_PORT = 1234 +TRUSTED_MINER_KEY = "fake_generation_miner_key" +TRUSTED_MINER_ADDRESS = "fakehost" +TRUSTED_MINER_PORT = 1234 diff --git a/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/conftest.py b/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/conftest.py new file mode 100644 index 000000000..535676acb --- /dev/null +++ b/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/conftest.py @@ -0,0 +1,67 @@ +import uuid + +import pytest +import pytest_asyncio +from compute_horde.executor_class import DEFAULT_EXECUTOR_CLASS +from compute_horde.miner_client.organic import OrganicMinerClient +from compute_horde.mv_protocol import miner_requests + +from compute_horde_validator.validator.tests.transport import MinerSimulationTransport + + +@pytest_asyncio.fixture +async def transport(): + return MinerSimulationTransport("miner_hotkey") + + +@pytest.fixture +def job_uuid(): + return uuid.uuid4() + + +@pytest.fixture +def create_miner_client(transport: MinerSimulationTransport): + def _create(*args, **kwargs): + kwargs["transport"] = transport + return OrganicMinerClient(*args, **kwargs) + + return _create + + +@pytest.fixture +def manifest_message(): + return miner_requests.V0ExecutorManifestRequest( + manifest=miner_requests.ExecutorManifest( + executor_classes=[ + miner_requests.ExecutorClassManifest(executor_class=DEFAULT_EXECUTOR_CLASS, count=1) + ] + ) + ).model_dump_json() + + +@pytest.fixture +def executor_ready_message(job_uuid: uuid.UUID): + return miner_requests.V0ExecutorReadyRequest(job_uuid=str(job_uuid)).model_dump_json() + + +@pytest.fixture +def accept_job_message(job_uuid: uuid.UUID): + return miner_requests.V0AcceptJobRequest(job_uuid=str(job_uuid)).model_dump_json() + + +@pytest.fixture +def job_finish_message(job_uuid: uuid.UUID): + return miner_requests.V0JobFinishedRequest( + job_uuid=str(job_uuid), + docker_process_stdout="", + docker_process_stderr="", + ).model_dump_json() + + +@pytest.fixture +def job_failed_message(job_uuid: uuid.UUID): + return miner_requests.V0JobFailedRequest( + job_uuid=str(job_uuid), + docker_process_stdout="", + docker_process_stderr="", + ).model_dump_json() diff --git a/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_answering.py b/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_answering.py new file mode 100644 index 000000000..06bedf8d0 --- /dev/null +++ b/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_answering.py @@ -0,0 +1,145 @@ +import json +import uuid +from collections.abc import Callable +from unittest.mock import patch + +import pytest +from compute_horde.miner_client.organic import OrganicJobError + +from compute_horde_validator.validator.models import ( + Prompt, + PromptSample, + PromptSeries, + SolveWorkload, +) +from compute_horde_validator.validator.tasks import answer_prompts +from compute_horde_validator.validator.tests.transport import MinerSimulationTransport + +pytestmark = [ + pytest.mark.asyncio, + pytest.mark.django_db(transaction=True), + pytest.mark.override_config( + DYNAMIC_MAX_PROMPT_BATCHES=5, + DYNAMIC_PROMPTS_BATCHES_IN_A_SINGLE_GO=3, + DYNAMIC_NUMBER_OF_PROMPTS_IN_BATCH=99, + ), +] + + +async def db_setup(): + workload = await SolveWorkload.objects.acreate(seed=0, s3_url="s3://test") + prompt_series = await PromptSeries.objects.acreate( + s3_url="s3://test", + generator_version=1, + ) + prompt_sample = await PromptSample.objects.acreate( + series=prompt_series, + workload=workload, + ) + prompts = await Prompt.objects.abulk_create( + [ + Prompt(sample=prompt_sample, content="prompt1"), + Prompt(sample=prompt_sample, content="prompt2"), + Prompt(sample=prompt_sample, content="prompt3"), + ] + ) + return prompts, workload + + +async def mock_download_json(*args, **kwargs): + return json.dumps({f"prompt{i}": f"answer{i}" for i in range(1, 4)}) + + +async def mock_throw_error(*args, **kwargs): + raise Exception("Download failed") + + +@patch( + "compute_horde_validator.validator.synthetic_jobs.generator.llama_prompts.download_json", + mock_download_json, +) +async def test_answer_prompts( + settings, + transport: MinerSimulationTransport, + create_miner_client: Callable, + manifest_message: str, + executor_ready_message: str, + accept_job_message: str, + job_finish_message: str, + job_uuid: uuid.UUID, +): + await transport.add_message(manifest_message, send_before=1) + await transport.add_message(accept_job_message, send_before=1) + await transport.add_message(executor_ready_message, send_before=0) + await transport.add_message(job_finish_message, send_before=2) + + prompts, workload = await db_setup() + + await answer_prompts(create_miner_client=create_miner_client, job_uuid=job_uuid, wait_timeout=2) + + await workload.arefresh_from_db() + assert workload.finished_at is not None + + for i, prompt in enumerate(prompts): + await prompt.arefresh_from_db() + assert prompt.answer == f"answer{i + 1}" + + +async def test_answer_prompts_job_failed( + transport: MinerSimulationTransport, + create_miner_client: Callable, + manifest_message: str, + executor_ready_message: str, + accept_job_message: str, + job_failed_message: str, + job_uuid: uuid.UUID, +): + await transport.add_message(manifest_message, send_before=1) + await transport.add_message(accept_job_message, send_before=1) + await transport.add_message(executor_ready_message, send_before=0) + await transport.add_message(job_failed_message, send_before=2) + + prompts, workload = await db_setup() + + with pytest.raises(OrganicJobError): + await answer_prompts( + create_miner_client=create_miner_client, job_uuid=job_uuid, wait_timeout=2 + ) + + await workload.arefresh_from_db() + assert workload.finished_at is None + + for prompt in prompts: + await prompt.arefresh_from_db() + assert prompt.answer is None + + +@patch( + "compute_horde_validator.validator.synthetic_jobs.generator.llama_prompts.download_json", + mock_throw_error, +) +async def test_answer_prompts_download_failed( + settings, + transport: MinerSimulationTransport, + create_miner_client: Callable, + manifest_message: str, + executor_ready_message: str, + accept_job_message: str, + job_finish_message: str, + job_uuid: uuid.UUID, +): + await transport.add_message(manifest_message, send_before=1) + await transport.add_message(accept_job_message, send_before=1) + await transport.add_message(executor_ready_message, send_before=0) + await transport.add_message(job_finish_message, send_before=2) + + prompts, workload = await db_setup() + + await answer_prompts(create_miner_client=create_miner_client, job_uuid=job_uuid, wait_timeout=2) + + await workload.arefresh_from_db() + assert workload.finished_at is None + + for prompt in prompts: + await prompt.arefresh_from_db() + assert prompt.answer is None diff --git a/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_generation.py b/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_generation.py index ce1259011..b027a236e 100644 --- a/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_generation.py +++ b/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_generation.py @@ -2,11 +2,8 @@ from collections.abc import Callable import pytest -import pytest_asyncio from compute_horde.base.output_upload import MultiUpload -from compute_horde.executor_class import DEFAULT_EXECUTOR_CLASS -from compute_horde.miner_client.organic import OrganicJobError, OrganicMinerClient -from compute_horde.mv_protocol import miner_requests +from compute_horde.miner_client.organic import OrganicJobError from compute_horde.mv_protocol.validator_requests import BaseValidatorRequest from compute_horde_validator.validator.cross_validation.prompt_generation import generate_prompts @@ -24,64 +21,6 @@ ] -@pytest_asyncio.fixture -async def transport(): - return MinerSimulationTransport("miner_hotkey") - - -@pytest.fixture -def job_uuid(): - return uuid.uuid4() - - -@pytest.fixture -def create_miner_client(transport: MinerSimulationTransport): - def _create(*args, **kwargs): - kwargs["transport"] = transport - return OrganicMinerClient(*args, **kwargs) - - return _create - - -@pytest.fixture -def manifest_message(): - return miner_requests.V0ExecutorManifestRequest( - manifest=miner_requests.ExecutorManifest( - executor_classes=[ - miner_requests.ExecutorClassManifest(executor_class=DEFAULT_EXECUTOR_CLASS, count=1) - ] - ) - ).model_dump_json() - - -@pytest.fixture -def executor_ready_message(job_uuid: uuid.UUID): - return miner_requests.V0ExecutorReadyRequest(job_uuid=str(job_uuid)).model_dump_json() - - -@pytest.fixture -def accept_job_message(job_uuid: uuid.UUID): - return miner_requests.V0AcceptJobRequest(job_uuid=str(job_uuid)).model_dump_json() - - -@pytest.fixture -def job_finish_message(job_uuid: uuid.UUID): - return miner_requests.V0JobFinishedRequest( - job_uuid=str(job_uuid), - docker_process_stdout="", - docker_process_stderr="", - ).model_dump_json() - - -@pytest.fixture -def job_failed_message(job_uuid: uuid.UUID): - return miner_requests.V0JobFailedRequest( - job_uuid=str(job_uuid), - docker_process_stdout="", - docker_process_stderr="", - ).model_dump_json() - - async def test_generate_prompts( transport: MinerSimulationTransport, create_miner_client: Callable, From 6eb67683449c008db7a892dadfb153fd32aed0e8 Mon Sep 17 00:00:00 2001 From: Andreea Popescu Date: Sat, 14 Sep 2024 17:36:45 +0800 Subject: [PATCH 18/53] fixes variable naming --- .../src/compute_horde_validator/settings.py | 42 +++++++--------- .../cross_validation/prompt_generation.py | 6 +-- .../validator/dynamic_config.py | 18 ------- .../compute_horde_validator/validator/s3.py | 2 +- .../synthetic_jobs/generator/llm_prompts.py | 8 ++- .../validator/tasks.py | 50 +++++++++++-------- .../test_prompt_answering.py | 13 ++--- .../test_prompt_generation.py | 6 +-- 8 files changed, 64 insertions(+), 81 deletions(-) diff --git a/validator/app/src/compute_horde_validator/settings.py b/validator/app/src/compute_horde_validator/settings.py index 8f83774ae..4fe070dae 100644 --- a/validator/app/src/compute_horde_validator/settings.py +++ b/validator/app/src/compute_horde_validator/settings.py @@ -201,9 +201,10 @@ def wrapped(*args, **kwargs): "in seconds", int, ), - "DYNAMIC_NUMBER_OF_PROMPTS_TO_VALIDATE_FROM_SERIES": ( - 10, - "how many prompts to sample and validate from a series", + # llama params + "DYNAMIC_MAX_PROMPT_SERIES": ( + 10000, + "Maximum number of prompt series upon which the prompt generator will not be triggered", int, ), "DYNAMIC_NUMBER_OF_WORKLOADS_TO_TRIGGER_LOCAL_INFERENCE": ( @@ -211,19 +212,21 @@ def wrapped(*args, **kwargs): "how many workloads are needed before running local inference", int, ), - "DYNAMIC_MAX_PROMPT_BATCHES": ( - 10000, - "Maximum number of prompt batches upon which the prompt generator will not be triggered", - int, - ), - "DYNAMIC_PROMPTS_BATCHES_IN_A_SINGLE_GO": ( + # prompt generation params + "DYNAMIC_PROMPTS_SERIES_IN_A_SINGLE_GENERATION": ( 5, "Number of batches that prompt generator will process in a single go", int, ), - "DYNAMIC_NUMBER_OF_PROMPTS_IN_BATCH": ( + "DYNAMIC_NUMBER_OF_PROMPTS_IN_SERIES": ( 240, - "Number of prompts to generate in a single batch", + "Number of prompts to generate in a single series", + int, + ), + # prompts answering params + "DYNAMIC_NUMBER_OF_PROMPTS_TO_SAMPLE_FROM_SERIES": ( + 10, + "how many prompts to sample and answer from a series", int, ), } @@ -493,17 +496,6 @@ def wrapped(*args, **kwargs): DYNAMIC_CONFIG_ENV = env.str("DYNAMIC_CONFIG_ENV", default="prod") -# prompt gen sampling -DEBUG_OVERRIDE_DYNAMIC_NUMBER_OF_PROMPTS_IN_SERIES = env.int( - "DEBUG_OVERRIDE_DYNAMIC_NUMBER_OF_PROMPTS_IN_SERIES", default=None -) -DEBUG_OVERRIDE_DYNAMIC_NUMBER_OF_PROMPTS_TO_VALIDATE_FROM_SERIES = env.int( - "DEBUG_OVERRIDE_DYNAMIC_NUMBER_OF_PROMPTS_TO_VALIDATE_IN_BATCH", default=None -) -DEBUG_OVERRIDE_DYNAMIC_NUMBER_OF_WORKLOADS_TO_TRIGGER_LOCAL_INFERENCE = env.int( - "DEBUG_OVERRIDE_DYNAMIC_NUMBER_OF_WORKLOADS_TO_TRIGGER_LOCAL_INFERENCE", default=None -) - # synthetic jobs are evenly distributed through the cycle, however # we start them from some offset because scheduling takes some time SYNTHETIC_JOBS_RUN_OFFSET = env.int("SYNTHETIC_JOBS_RUN_OFFSET", default=24) @@ -527,9 +519,9 @@ def BITTENSOR_WALLET() -> bittensor.wallet: # Local miner generating prompts -TRUST_MINER_KEY = env.str("TRUST_MINER_KEY", default="") -TRUST_MINER_ADDRESS = env.str("TRUST_MINER_ADDRESS", default="") -TRUST_MINER_PORT = env.int("TRUST_MINER_PORT", default=0) +TRUSTED_MINER_KEY = env.str("TRUSTED_MINER_KEY", default="") +TRUSTED_MINER_ADDRESS = env.str("TRUSTED_MINER_ADDRESS", default="") +TRUSTED_MINER_PORT = env.int("TRUSTED_MINER_PORT", default=0) CHANNEL_LAYERS = { diff --git a/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_generation.py b/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_generation.py index 0b043cbf5..cf7873645 100644 --- a/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_generation.py +++ b/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_generation.py @@ -34,7 +34,7 @@ async def generate_prompts( logger.warning("Prompt generation miner not configured, skipping prompt generation") return - limit = await aget_config("DYNAMIC_MAX_PROMPT_BATCHES") + limit = await aget_config("DYNAMIC_MAX_PROMPT_SERIES") if current_count := await PromptSeries.objects.acount() >= limit: logger.warning( "There are %s series in the db exceeding the limit of %s, skipping prompt generation", @@ -45,8 +45,8 @@ async def generate_prompts( job_uuid = job_uuid or uuid.uuid4() - num_batches = await aget_config("DYNAMIC_PROMPTS_BATCHES_IN_A_SINGLE_GO") - num_prompts_per_batch = await aget_config("DYNAMIC_NUMBER_OF_PROMPTS_IN_BATCH") + num_batches = await aget_config("DYNAMIC_PROMPTS_SERIES_IN_A_SINGLE_GENERATION") + num_prompts_per_batch = await aget_config("DYNAMIC_NUMBER_OF_PROMPTS_IN_SERIES") series_uuids, upload_urls, public_urls = _generate_uuids_and_urls(num_batches) diff --git a/validator/app/src/compute_horde_validator/validator/dynamic_config.py b/validator/app/src/compute_horde_validator/validator/dynamic_config.py index 07170935f..c6604fb45 100644 --- a/validator/app/src/compute_horde_validator/validator/dynamic_config.py +++ b/validator/app/src/compute_horde_validator/validator/dynamic_config.py @@ -31,24 +31,6 @@ async def aget_config(key): return await dynamic_config_holder.get(key) -def get_number_of_prompts_in_series(): - if settings.DEBUG_OVERRIDE_DYNAMIC_NUMBER_OF_PROMPTS_IN_SERIES is not None: - return settings.DEBUG_OVERRIDE_DYNAMIC_NUMBER_OF_PROMPTS_IN_SERIES - return config.DYNAMIC_NUMBER_OF_PROMPTS_IN_SERIES - - -def get_number_of_prompts_to_validate_from_series(): - if settings.DEBUG_OVERRIDE_DYNAMIC_NUMBER_OF_PROMPTS_TO_VALIDATE_FROM_SERIES is not None: - return settings.DEBUG_OVERRIDE_DYNAMIC_NUMBER_OF_PROMPTS_TO_VALIDATE_FROM_SERIES - return config.DYNAMIC_NUMBER_OF_PROMPTS_TO_VALIDATE_FROM_SERIES - - -def get_number_of_workloads_to_trigger_local_inference(): - if settings.DEBUG_OVERRIDE_DYNAMIC_NUMBER_OF_WORKLOADS_TO_TRIGGER_LOCAL_INFERENCE is not None: - return settings.DEBUG_OVERRIDE_DYNAMIC_NUMBER_OF_WORKLOADS_TO_TRIGGER_LOCAL_INFERENCE - return config.DYNAMIC_NUMBER_OF_WORKLOADS_TO_TRIGGER_LOCAL_INFERENCE - - async def aget_weights_version(): if settings.DEBUG_OVERRIDE_WEIGHTS_VERSION is not None: return settings.DEBUG_OVERRIDE_WEIGHTS_VERSION diff --git a/validator/app/src/compute_horde_validator/validator/s3.py b/validator/app/src/compute_horde_validator/validator/s3.py index 6fbc11ec9..bf8d82146 100644 --- a/validator/app/src/compute_horde_validator/validator/s3.py +++ b/validator/app/src/compute_horde_validator/validator/s3.py @@ -54,7 +54,7 @@ def get_prompts_from_s3_url(s3_url: str) -> Generator[tuple[str, list[str]]]: return response.text.split("\n") -async def download_json(s3_url: str): +async def download_file_content(s3_url: str): async with httpx.AsyncClient() as client: response = await client.get(s3_url, timeout=5) response.raise_for_status() diff --git a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llm_prompts.py b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llm_prompts.py index a19fbbd68..ccb8be399 100644 --- a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llm_prompts.py +++ b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llm_prompts.py @@ -7,7 +7,11 @@ from django.conf import settings from compute_horde_validator.validator.models import Prompt, PromptSample -from compute_horde_validator.validator.s3 import download_json, generate_upload_url, get_public_url +from compute_horde_validator.validator.s3 import ( + download_file_content, + generate_upload_url, + get_public_url, +) from .base import BaseSyntheticJobGenerator @@ -91,7 +95,7 @@ async def output_upload(self) -> OutputUpload | None: ) async def _download_answers(self): - response = await download_json(self._url_for_download()) + response = await download_file_content(self._url_for_download()) self.prompt_answers = pydantic.TypeAdapter(dict[str, str]).validate_json(response) def verify(self, msg: V0JobFinishedRequest, time_took: float) -> tuple[bool, str, float]: diff --git a/validator/app/src/compute_horde_validator/validator/tasks.py b/validator/app/src/compute_horde_validator/validator/tasks.py index 696ae1421..1e48beb67 100644 --- a/validator/app/src/compute_horde_validator/validator/tasks.py +++ b/validator/app/src/compute_horde_validator/validator/tasks.py @@ -14,7 +14,7 @@ import celery.exceptions import numpy as np import requests -from asgiref.sync import async_to_sync +from asgiref.sync import async_to_sync, sync_to_async from bittensor.utils.weight_utils import process_weights_for_netuid from celery import shared_task from celery.result import allow_join_result @@ -37,10 +37,6 @@ from django.utils.timezone import now from compute_horde_validator.celery import app -from compute_horde_validator.validator.dynamic_config import ( - get_number_of_prompts_to_validate_from_series, - get_number_of_workloads_to_trigger_local_inference, -) from compute_horde_validator.validator.locks import Locked, LockType, get_advisory_lock from compute_horde_validator.validator.metagraph_client import get_miner_axon_info from compute_horde_validator.validator.models import ( @@ -1049,8 +1045,8 @@ def create_workload(seed: int): @app.task() def create_sample_workloads(): - total_workloads_needed = get_number_of_workloads_to_trigger_local_inference() - prompts_per_sample = get_number_of_prompts_to_validate_from_series() + total_workloads_needed = config.DYNAMIC_NUMBER_OF_WORKLOADS_TO_TRIGGER_LOCAL_INFERENCE + prompts_per_sample = config.DYNAMIC_NUMBER_OF_PROMPTS_TO_SAMPLE_FROM_SERIES # set seed for the current synthetic jobs run seed = random.randint(0, 1000000) @@ -1103,14 +1099,12 @@ def create_sample_workloads(): async def get_workload_prompts(workload: SolveWorkload) -> list[Prompt]: - sample_prompts = PromptSample.objects.prefetch_related("prompts").filter( - workload=workload, - prompts__answer__isnull=True, - ) - prompts = [] - async for sample in sample_prompts: - prompts.extend(sample.prompts.all()) - return prompts + return [ + x + async for x in Prompt.objects.select_related("sample").filter( + sample__workload_id=workload.id, answer__isnull=True + ) + ] async def answer_prompts( @@ -1118,11 +1112,22 @@ async def answer_prompts( job_uuid: uuid.UUID | None = None, wait_timeout: int | None = None, ): + # TODO: this logic will be replaced workloads = SolveWorkload.objects.filter( # workload was not ran before on this prompt_sample finished_at__isnull=True, ) + if not all( + [ + settings.TRUSTED_MINER_KEY, + settings.TRUSTED_MINER_ADDRESS, + settings.TRUSTED_MINER_PORT, + ] + ): + logger.warning("Prompt generation miner not configured, skipping prompt generation") + return + async for workload in workloads: seed = workload.seed prompts = await get_workload_prompts(workload) @@ -1146,9 +1151,9 @@ async def answer_prompts( wait_timeout = wait_timeout or job_generator.timeout_seconds() miner_client = create_miner_client( - miner_hotkey=settings.TRUST_MINER_KEY, - miner_address=settings.TRUST_MINER_ADDRESS, - miner_port=settings.TRUST_MINER_PORT, + miner_hotkey=settings.TRUSTED_MINER_KEY, + miner_address=settings.TRUSTED_MINER_ADDRESS, + miner_port=settings.TRUSTED_MINER_PORT, job_uuid=str(job_uuid), my_keypair=get_keypair(), ) @@ -1161,9 +1166,14 @@ async def answer_prompts( logger.error("Failed to download prompt answers", exc_info=True) continue + await sync_to_async(save_workload_answers)(workload, prompts, prompt_answers) + + +def save_workload_answers(workload, prompts, prompt_answers): + with transaction.atomic(): # update the workload as finished workload.finished_at = now() - await workload.asave() + workload.save() # update the prompts with the answers for prompt in prompts: @@ -1171,4 +1181,4 @@ async def answer_prompts( prompt.answer = prompt_answers[prompt.content] else: logger.warning(f"Prompt {prompt} was not found in the prompt answers generated") - await Prompt.objects.abulk_update(prompts, ["answer"]) + Prompt.objects.bulk_update(prompts, ["answer"]) diff --git a/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_answering.py b/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_answering.py index 06bedf8d0..26d54dee8 100644 --- a/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_answering.py +++ b/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_answering.py @@ -18,11 +18,6 @@ pytestmark = [ pytest.mark.asyncio, pytest.mark.django_db(transaction=True), - pytest.mark.override_config( - DYNAMIC_MAX_PROMPT_BATCHES=5, - DYNAMIC_PROMPTS_BATCHES_IN_A_SINGLE_GO=3, - DYNAMIC_NUMBER_OF_PROMPTS_IN_BATCH=99, - ), ] @@ -46,7 +41,7 @@ async def db_setup(): return prompts, workload -async def mock_download_json(*args, **kwargs): +async def mock_download_file_content(*args, **kwargs): return json.dumps({f"prompt{i}": f"answer{i}" for i in range(1, 4)}) @@ -55,8 +50,8 @@ async def mock_throw_error(*args, **kwargs): @patch( - "compute_horde_validator.validator.synthetic_jobs.generator.llama_prompts.download_json", - mock_download_json, + "compute_horde_validator.validator.synthetic_jobs.generator.llama_prompts.download_file_content", + mock_download_file_content, ) async def test_answer_prompts( settings, @@ -115,7 +110,7 @@ async def test_answer_prompts_job_failed( @patch( - "compute_horde_validator.validator.synthetic_jobs.generator.llama_prompts.download_json", + "compute_horde_validator.validator.synthetic_jobs.generator.llama_prompts.download_file_content", mock_throw_error, ) async def test_answer_prompts_download_failed( diff --git a/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_generation.py b/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_generation.py index b027a236e..63ed837b0 100644 --- a/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_generation.py +++ b/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_generation.py @@ -14,9 +14,9 @@ pytest.mark.asyncio, pytest.mark.django_db(transaction=True), pytest.mark.override_config( - DYNAMIC_MAX_PROMPT_BATCHES=5, - DYNAMIC_PROMPTS_BATCHES_IN_A_SINGLE_GO=3, - DYNAMIC_NUMBER_OF_PROMPTS_IN_BATCH=99, + DYNAMIC_MAX_PROMPT_SERIES=5, + DYNAMIC_PROMPTS_SERIES_IN_A_SINGLE_GENERATION=3, + DYNAMIC_NUMBER_OF_PROMPTS_IN_SERIES=99, ), ] From 5ca33b642616549c0f82858a8e152d7167076a93 Mon Sep 17 00:00:00 2001 From: Andreea Popescu Date: Sun, 15 Sep 2024 15:20:14 +0100 Subject: [PATCH 19/53] shedule llama jobs --- .../src/compute_horde_validator/settings.py | 19 +- .../cross_validation/prompt_answering.py | 109 ++++++++++++ .../cross_validation/prompt_generation.py | 9 - .../validator/synthetic_jobs/batch_run.py | 29 +++- .../validator/tasks.py | 162 ++++++------------ .../test_prompt_answering.py | 12 +- .../test_prompt_generation.py | 28 +-- 7 files changed, 224 insertions(+), 144 deletions(-) create mode 100644 validator/app/src/compute_horde_validator/validator/cross_validation/prompt_answering.py diff --git a/validator/app/src/compute_horde_validator/settings.py b/validator/app/src/compute_horde_validator/settings.py index 4fe070dae..5920ecc26 100644 --- a/validator/app/src/compute_horde_validator/settings.py +++ b/validator/app/src/compute_horde_validator/settings.py @@ -207,9 +207,14 @@ def wrapped(*args, **kwargs): "Maximum number of prompt series upon which the prompt generator will not be triggered", int, ), - "DYNAMIC_NUMBER_OF_WORKLOADS_TO_TRIGGER_LOCAL_INFERENCE": ( + "DYNAMIC_TARGET_NUMBER_OF_PROMPT_SAMPLES_READY": ( + 250, + "how many prompt samples to generate (should be larger than how many prompts series we use per synthetic run)", + int, + ), + "DYNAMIC_NUMBER_OF_PROMPTS_PER_WORKLOAD": ( 100, - "how many workloads are needed before running local inference", + "how many prompts to answer in a single workload", int, ), # prompt generation params @@ -395,6 +400,16 @@ def wrapped(*args, **kwargs): "schedule": timedelta(minutes=5), "options": {}, }, + "llama_prompt_generation": { + "task": "compute_horde_validator.validator.tasks.llama_prompt_generation", + "schedule": timedelta(minutes=10), + "options": {}, + }, + "llama_prompt_answering": { + "task": "compute_horde_validator.validator.tasks.llama_prompt_answering", + "schedule": timedelta(minutes=10), + "options": {}, + }, } if env.bool("DEBUG_RUN_BEAT_VERY_OFTEN", default=False): CELERY_BEAT_SCHEDULE["run_synthetic_jobs"]["schedule"] = crontab(minute="*") diff --git a/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_answering.py b/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_answering.py new file mode 100644 index 000000000..1b8a7dc3c --- /dev/null +++ b/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_answering.py @@ -0,0 +1,109 @@ +import logging +import uuid +from datetime import datetime + +import bittensor +from asgiref.sync import sync_to_async +from compute_horde.miner_client.organic import ( + OrganicJobDetails, + OrganicMinerClient, + run_organic_job, +) +from django.conf import settings +from django.db import transaction +from django.utils.timezone import now + +from compute_horde_validator.validator.models import Prompt, SolveWorkload +from compute_horde_validator.validator.synthetic_jobs.generator.llama_prompts import ( + LlamaPromptsSyntheticJobGenerator, +) + +logger = logging.getLogger(__name__) + + +async def get_workload_prompts(workload: SolveWorkload) -> list[Prompt]: + return [ + x + async for x in Prompt.objects.select_related("sample").filter( + sample__workload_id=workload.id, answer__isnull=True + ) + ] + + +def _get_keypair() -> bittensor.Keypair: + return settings.BITTENSOR_WALLET().get_hotkey() + + +async def answer_prompts( + workload: SolveWorkload, + create_miner_client=OrganicMinerClient, + job_uuid: uuid.UUID | None = None, + wait_timeout: int | None = None, +) -> None: + if not all( + [ + settings.TRUSTED_MINER_KEY, + settings.TRUSTED_MINER_ADDRESS, + settings.TRUSTED_MINER_PORT, + ] + ): + logger.warning("Prompt generation miner not configured, skipping prompt generation") + return + + ts = datetime.now() + seed = workload.seed + prompts = await get_workload_prompts(workload) + + job_generator = LlamaPromptsSyntheticJobGenerator(None, prompts, workload.s3_url, seed) + await job_generator.ainit() + + job_uuid = job_uuid or uuid.uuid4() + job_details = OrganicJobDetails( + job_uuid=str(job_uuid), + docker_image=job_generator.docker_image_name(), + raw_script=job_generator.raw_script(), + docker_run_options_preset=job_generator.docker_run_options_preset(), + docker_run_cmd=job_generator.docker_run_cmd(), + total_job_timeout=job_generator.timeout_seconds(), + volume=await job_generator.volume(), + output=await job_generator.output_upload(), + ) + + wait_timeout = wait_timeout or job_generator.timeout_seconds() + + miner_client = create_miner_client( + miner_hotkey=settings.TRUSTED_MINER_KEY, + miner_address=settings.TRUSTED_MINER_ADDRESS, + miner_port=settings.TRUSTED_MINER_PORT, + job_uuid=str(job_uuid), + my_keypair=_get_keypair(), + ) + + await run_organic_job(miner_client, job_details, wait_timeout=wait_timeout) + + try: + prompt_answers: dict[str, str] = await job_generator.get_prompt_answers() + except Exception: + logger.error("Failed to download prompt answers", exc_info=True) + return + + await sync_to_async(save_workload_answers)(workload, prompts, prompt_answers) + duration_seconds = (datetime.now() - ts).total_seconds() + logger.info( + f"Workload {workload} answered {len(prompts)} prompts in {duration_seconds} seconds" + ) + + +def save_workload_answers(workload, prompts, prompt_answers): + with transaction.atomic(): + # update the workload as finished + workload.finished_at = now() + workload.save() + + # update the prompts with the answers + for prompt in prompts: + if prompt.content in prompt_answers: + prompt.answer = prompt_answers[prompt.content] + else: + logger.warning(f"Prompt {prompt} was not found in the prompt answers generated") + Prompt.objects.bulk_update(prompts, ["answer"]) diff --git a/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_generation.py b/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_generation.py index cf7873645..522293c77 100644 --- a/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_generation.py +++ b/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_generation.py @@ -34,15 +34,6 @@ async def generate_prompts( logger.warning("Prompt generation miner not configured, skipping prompt generation") return - limit = await aget_config("DYNAMIC_MAX_PROMPT_SERIES") - if current_count := await PromptSeries.objects.acount() >= limit: - logger.warning( - "There are %s series in the db exceeding the limit of %s, skipping prompt generation", - current_count, - limit, - ) - return - job_uuid = job_uuid or uuid.uuid4() num_batches = await aget_config("DYNAMIC_PROMPTS_SERIES_IN_A_SINGLE_GENERATION") diff --git a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py index 5590e9003..cec6c950c 100644 --- a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py +++ b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py @@ -762,10 +762,7 @@ async def _close_client(ctx: BatchContext, miner_hotkey: str) -> None: await client.close() -async def _generate_jobs(ctx: BatchContext) -> None: - start_time = time.time() - generated_job_count = 0 - +async def get_llama_prompt_samples(ctx: BatchContext) -> PromptSample | None: # TODO: refactor into nicer abstraction llm_executor_count = sum( count @@ -782,8 +779,24 @@ async def _generate_jobs(ctx: BatchContext) -> None: )[:llm_executor_count] ) prompt_samples = [ps async for ps in prompt_samples] - assert len(prompt_samples) == llm_executor_count - prompt_samples_iter = iter(prompt_samples) + if len(prompt_samples) < llm_executor_count: + logger.warning( + "Not enough prompt samples for llama executors: %d < %d - will NOT run llama synthetic prompt jobs", + len(prompt_samples), + llm_executor_count, + ) + return None + return prompt_samples + + +async def _generate_jobs(ctx: BatchContext) -> None: + start_time = time.time() + generated_job_count = 0 + + prompt_samples = await get_llama_prompt_samples(ctx) + prompt_samples_iter = ( + iter(prompt_samples) if prompt_samples is not None else None + ) for hotkey, executors in ctx.executors.items(): miner_name = ctx.names[hotkey] @@ -792,6 +805,9 @@ async def _generate_jobs(ctx: BatchContext) -> None: for _ in range(count): kwargs = {} if executor_class == ExecutorClass.always_on__llm__a6000: + if prompt_samples_iter is None: + logger.warning("No llm prompt samples available, skipping llm job") + continue prompt_sample = next(prompt_samples_iter) kwargs = { "prompt_sample": prompt_sample, @@ -1414,6 +1430,7 @@ def _db_persist(ctx: BatchContext) -> None: synthetic_job.job_uuid: synthetic_job for synthetic_job in synthetic_jobs } prompt_samples: list[PromptSample] = [] + for job in ctx.jobs.values(): if job.executor_class != ExecutorClass.always_on__llm__a6000: continue diff --git a/validator/app/src/compute_horde_validator/validator/tasks.py b/validator/app/src/compute_horde_validator/validator/tasks.py index 1e48beb67..e993af366 100644 --- a/validator/app/src/compute_horde_validator/validator/tasks.py +++ b/validator/app/src/compute_horde_validator/validator/tasks.py @@ -5,7 +5,6 @@ import time import traceback import uuid -from collections.abc import Callable from datetime import timedelta from math import ceil, floor @@ -14,17 +13,12 @@ import celery.exceptions import numpy as np import requests -from asgiref.sync import async_to_sync, sync_to_async +from asgiref.sync import async_to_sync from bittensor.utils.weight_utils import process_weights_for_netuid from celery import shared_task from celery.result import allow_join_result from celery.utils.log import get_task_logger from compute_horde.dynamic_config import sync_dynamic_config -from compute_horde.miner_client.organic import ( - OrganicJobDetails, - OrganicMinerClient, - run_organic_job, -) from compute_horde.receipts import ( JobFinishedReceiptPayload, JobStartedReceiptPayload, @@ -37,6 +31,8 @@ from django.utils.timezone import now from compute_horde_validator.celery import app +from compute_horde_validator.validator.cross_validation.prompt_answering import answer_prompts +from compute_horde_validator.validator.cross_validation.prompt_generation import generate_prompts from compute_horde_validator.validator.locks import Locked, LockType, get_advisory_lock from compute_horde_validator.validator.metagraph_client import get_miner_axon_info from compute_horde_validator.validator.models import ( @@ -59,9 +55,6 @@ SYNTHETIC_JOBS_HARD_LIMIT, SYNTHETIC_JOBS_SOFT_LIMIT, ) -from compute_horde_validator.validator.synthetic_jobs.generator.llama_prompts import ( - LlamaPromptsSyntheticJobGenerator, -) from compute_horde_validator.validator.synthetic_jobs.utils import ( create_and_run_synthetic_job_batch, ) @@ -1044,9 +1037,50 @@ def create_workload(seed: int): @app.task() -def create_sample_workloads(): - total_workloads_needed = config.DYNAMIC_NUMBER_OF_WORKLOADS_TO_TRIGGER_LOCAL_INFERENCE +def llama_prompt_generation(): + num_expected_prompt_series = config.DYNAMIC_MAX_PROMPT_SERIES + num_prompt_series = PromptSeries.objects.count() + if num_prompt_series < num_expected_prompt_series: + logger.info("There are %s series in the db, generating prompts", num_prompt_series) + async_to_sync(generate_prompts)() + else: + logger.warning( + "There are %s series in the db - skipping prompt generation", + num_prompt_series, + ) + + +# prompt_sample is ready for synthetic job when associated workload is finished (all prompts are answered) +@app.task() +def llama_prompt_answering(): + # prioritize answering all the workloads before generating new samples + unprocessed_workloads = SolveWorkload.objects.filter(finished_at__isnull=True) + for workload in unprocessed_workloads: + async_to_sync(answer_prompts)(workload) + + # generate new prompt samples if needed + num_unused_prompt_samples = PromptSample.objects.filter(synthetic_job__isnull=True).count() + num_needed_prompt_samples = ( + config.DYNAMIC_TARGET_NUMBER_OF_PROMPT_SAMPLES_READY - num_unused_prompt_samples + ) + + if num_needed_prompt_samples > 0: + logger.info( + "There are %s prompt samples in the db, generating more", + num_unused_prompt_samples, + ) + create_sample_workloads(num_needed_prompt_samples) + return + else: + logger.warning( + "There are %s prompt samples - skipping prompt sampling", + num_unused_prompt_samples, + ) + + +def create_sample_workloads(num_needed_prompt_samples): prompts_per_sample = config.DYNAMIC_NUMBER_OF_PROMPTS_TO_SAMPLE_FROM_SERIES + prompts_per_workload = config.DYNAMIC_NUMBER_OF_PROMPTS_PER_WORKLOAD # set seed for the current synthetic jobs run seed = random.randint(0, 1000000) @@ -1057,19 +1091,18 @@ def create_sample_workloads(): # how many prompts we have sampled for current_workload so far current_workload_fill = 0 - # how many workloads we have finished (have enough samples) - workloads_done = 0 + prompt_samples_created = 0 # assume we have sufficient prompt series in the db to make all the prompt_samples needed # take a random order of prompt series to avoid using the same series at each synthetic jobs run for prompt_series in PromptSeries.objects.order_by("?").all(): - if current_workload_fill >= prompts_per_sample: + if current_workload_fill >= prompts_per_workload: + # finished creating all needed prompt samples so exit after last batch is filled + if prompt_samples_created >= num_needed_prompt_samples: + break + current_workload = create_workload(seed) current_workload_fill = 0 - workloads_done += 1 - - if workloads_done >= total_workloads_needed: - break # get all prompts lines = get_prompts_from_s3_url(prompt_series.uuid, prompt_series.s3_url) @@ -1087,98 +1120,9 @@ def create_sample_workloads(): prompt_sample = PromptSample.objects.create( series=prompt_series, workload=current_workload ) + prompt_samples_created += 1 # save the sampled prompts as unanswered in the db Prompt.objects.bulk_create( [Prompt(sample=prompt_sample, content=line) for line in sampled_lines] ) - - # delete remaining empty workload - if current_workload_fill == 0: - current_workload.delete() - - -async def get_workload_prompts(workload: SolveWorkload) -> list[Prompt]: - return [ - x - async for x in Prompt.objects.select_related("sample").filter( - sample__workload_id=workload.id, answer__isnull=True - ) - ] - - -async def answer_prompts( - create_miner_client: Callable[..., OrganicMinerClient] | None = None, - job_uuid: uuid.UUID | None = None, - wait_timeout: int | None = None, -): - # TODO: this logic will be replaced - workloads = SolveWorkload.objects.filter( - # workload was not ran before on this prompt_sample - finished_at__isnull=True, - ) - - if not all( - [ - settings.TRUSTED_MINER_KEY, - settings.TRUSTED_MINER_ADDRESS, - settings.TRUSTED_MINER_PORT, - ] - ): - logger.warning("Prompt generation miner not configured, skipping prompt generation") - return - - async for workload in workloads: - seed = workload.seed - prompts = await get_workload_prompts(workload) - - job_generator = LlamaPromptsSyntheticJobGenerator(None, prompts, workload.s3_url, seed) - await job_generator.ainit() - - job_uuid = job_uuid or uuid.uuid4() - job_details = OrganicJobDetails( - job_uuid=str(job_uuid), - docker_image=job_generator.docker_image_name(), - raw_script=job_generator.raw_script(), - docker_run_options_preset=job_generator.docker_run_options_preset(), - docker_run_cmd=job_generator.docker_run_cmd(), - total_job_timeout=job_generator.timeout_seconds(), - volume=await job_generator.volume(), - output=await job_generator.output_upload(), - ) - - create_miner_client = create_miner_client or OrganicMinerClient - wait_timeout = wait_timeout or job_generator.timeout_seconds() - - miner_client = create_miner_client( - miner_hotkey=settings.TRUSTED_MINER_KEY, - miner_address=settings.TRUSTED_MINER_ADDRESS, - miner_port=settings.TRUSTED_MINER_PORT, - job_uuid=str(job_uuid), - my_keypair=get_keypair(), - ) - - await run_organic_job(miner_client, job_details, wait_timeout=wait_timeout) - - try: - prompt_answers: dict[str, str] = await job_generator.get_prompt_answers() - except Exception: - logger.error("Failed to download prompt answers", exc_info=True) - continue - - await sync_to_async(save_workload_answers)(workload, prompts, prompt_answers) - - -def save_workload_answers(workload, prompts, prompt_answers): - with transaction.atomic(): - # update the workload as finished - workload.finished_at = now() - workload.save() - - # update the prompts with the answers - for prompt in prompts: - if prompt.content in prompt_answers: - prompt.answer = prompt_answers[prompt.content] - else: - logger.warning(f"Prompt {prompt} was not found in the prompt answers generated") - Prompt.objects.bulk_update(prompts, ["answer"]) diff --git a/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_answering.py b/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_answering.py index 26d54dee8..55c047bef 100644 --- a/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_answering.py +++ b/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_answering.py @@ -6,13 +6,13 @@ import pytest from compute_horde.miner_client.organic import OrganicJobError +from compute_horde_validator.validator.cross_validation.prompt_answering import answer_prompts from compute_horde_validator.validator.models import ( Prompt, PromptSample, PromptSeries, SolveWorkload, ) -from compute_horde_validator.validator.tasks import answer_prompts from compute_horde_validator.validator.tests.transport import MinerSimulationTransport pytestmark = [ @@ -70,7 +70,9 @@ async def test_answer_prompts( prompts, workload = await db_setup() - await answer_prompts(create_miner_client=create_miner_client, job_uuid=job_uuid, wait_timeout=2) + await answer_prompts( + workload, create_miner_client=create_miner_client, job_uuid=job_uuid, wait_timeout=2 + ) await workload.arefresh_from_db() assert workload.finished_at is not None @@ -98,7 +100,7 @@ async def test_answer_prompts_job_failed( with pytest.raises(OrganicJobError): await answer_prompts( - create_miner_client=create_miner_client, job_uuid=job_uuid, wait_timeout=2 + workload, create_miner_client=create_miner_client, job_uuid=job_uuid, wait_timeout=2 ) await workload.arefresh_from_db() @@ -130,7 +132,9 @@ async def test_answer_prompts_download_failed( prompts, workload = await db_setup() - await answer_prompts(create_miner_client=create_miner_client, job_uuid=job_uuid, wait_timeout=2) + await answer_prompts( + workload, create_miner_client=create_miner_client, job_uuid=job_uuid, wait_timeout=2 + ) await workload.arefresh_from_db() assert workload.finished_at is None diff --git a/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_generation.py b/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_generation.py index 63ed837b0..fcdccbe8d 100644 --- a/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_generation.py +++ b/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_generation.py @@ -109,17 +109,17 @@ async def test_generate_prompts_timeout( assert not await PromptSeries.objects.aexists() -async def test_generate_prompts_max_batches_reached( - create_miner_client: Callable, - job_uuid: uuid.UUID, -): - existing = [] - for _ in range(5): - existing.append(PromptSeries(s3_url="", generator_version=1)) - await PromptSeries.objects.abulk_create(existing) - - await generate_prompts( - create_miner_client=create_miner_client, job_uuid=job_uuid, wait_timeout=0.5 - ) - - assert await PromptSeries.objects.acount() == 5 +# async def test_generate_prompts_max_batches_reached( +# create_miner_client: Callable, +# job_uuid: uuid.UUID, +# ): +# existing = [] +# for _ in range(5): +# existing.append(PromptSeries(s3_url="", generator_version=1)) +# await PromptSeries.objects.abulk_create(existing) +# +# await generate_prompts( +# create_miner_client=create_miner_client, job_uuid=job_uuid, wait_timeout=0.5 +# ) +# +# assert await PromptSeries.objects.acount() == 5 From 8a4e9204bd5464ec19dc84fb39efa2eb8b66e9b7 Mon Sep 17 00:00:00 2001 From: Andreea Popescu Date: Sun, 15 Sep 2024 15:28:49 +0100 Subject: [PATCH 20/53] debug llama commands --- .../commands/debug_run_llama_prompt_answering.py | 15 +++++++++++++++ .../commands/debug_run_llama_prompt_generation.py | 15 +++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 validator/app/src/compute_horde_validator/validator/management/commands/debug_run_llama_prompt_answering.py create mode 100644 validator/app/src/compute_horde_validator/validator/management/commands/debug_run_llama_prompt_generation.py diff --git a/validator/app/src/compute_horde_validator/validator/management/commands/debug_run_llama_prompt_answering.py b/validator/app/src/compute_horde_validator/validator/management/commands/debug_run_llama_prompt_answering.py new file mode 100644 index 000000000..3a9eab5cd --- /dev/null +++ b/validator/app/src/compute_horde_validator/validator/management/commands/debug_run_llama_prompt_answering.py @@ -0,0 +1,15 @@ +import logging + +from django.core.management.base import BaseCommand + +from compute_horde_validator.validator.tasks import llama_prompt_answering + +logger = logging.getLogger(__name__) + + +class Command(BaseCommand): + def add_arguments(self, parser): + pass + + def handle(self, *args, **options): + llama_prompt_answering() diff --git a/validator/app/src/compute_horde_validator/validator/management/commands/debug_run_llama_prompt_generation.py b/validator/app/src/compute_horde_validator/validator/management/commands/debug_run_llama_prompt_generation.py new file mode 100644 index 000000000..4882940b3 --- /dev/null +++ b/validator/app/src/compute_horde_validator/validator/management/commands/debug_run_llama_prompt_generation.py @@ -0,0 +1,15 @@ +import logging + +from django.core.management.base import BaseCommand + +from compute_horde_validator.validator.tasks import llama_prompt_generation + +logger = logging.getLogger(__name__) + + +class Command(BaseCommand): + def add_arguments(self, parser): + pass + + def handle(self, *args, **options): + llama_prompt_generation() From f3f91bd24e43390dc84ecbea68db0c559ce0fc75 Mon Sep 17 00:00:00 2001 From: Andreea Popescu Date: Mon, 16 Sep 2024 11:12:33 +0100 Subject: [PATCH 21/53] fixes --- .../cross_validation/generator/v0/__init__.py | 2 +- .../app/src/compute_horde_validator/validator/tasks.py | 10 ++++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/validator/app/src/compute_horde_validator/validator/cross_validation/generator/v0/__init__.py b/validator/app/src/compute_horde_validator/validator/cross_validation/generator/v0/__init__.py index f901e2005..0a53bb1f9 100644 --- a/validator/app/src/compute_horde_validator/validator/cross_validation/generator/v0/__init__.py +++ b/validator/app/src/compute_horde_validator/validator/cross_validation/generator/v0/__init__.py @@ -11,7 +11,7 @@ def timeout_seconds(self) -> int: return 3600 def docker_image_name(self) -> str: - return "backenddevelopersltd/compute-horde-prompt-gen:v0-latest" + return "backenddevelopersltd/compute-horde-prompt-gen-phi:v0-latest" def docker_run_cmd(self) -> list[str]: return [ diff --git a/validator/app/src/compute_horde_validator/validator/tasks.py b/validator/app/src/compute_horde_validator/validator/tasks.py index e993af366..6dc17aa07 100644 --- a/validator/app/src/compute_horde_validator/validator/tasks.py +++ b/validator/app/src/compute_horde_validator/validator/tasks.py @@ -1032,7 +1032,9 @@ def fetch_dynamic_config() -> None: def create_workload(seed: int): # generate an s3 url to upload sample batch job result in workload_uuid = uuid.uuid4() - s3_url = generate_upload_url(key=workload_uuid, bucket_name=settings.S3_BUCKET_NAME_ANSWERS) + s3_url = generate_upload_url( + key=str(workload_uuid), bucket_name=settings.S3_BUCKET_NAME_ANSWERS + ) return SolveWorkload.objects.create(workload_uuid=workload_uuid, seed=seed, s3_url=s3_url) @@ -1105,7 +1107,7 @@ def create_sample_workloads(num_needed_prompt_samples): current_workload_fill = 0 # get all prompts - lines = get_prompts_from_s3_url(prompt_series.uuid, prompt_series.s3_url) + lines = get_prompts_from_s3_url(prompt_series.s3_url) # should always have enough prompts if len(lines) <= prompts_per_sample: @@ -1126,3 +1128,7 @@ def create_sample_workloads(num_needed_prompt_samples): Prompt.objects.bulk_create( [Prompt(sample=prompt_sample, content=line) for line in sampled_lines] ) + + # delete the current workload if it's not filled + if current_workload_fill < prompts_per_workload: + current_workload.delete() From 8184ac01902caa53a6e7e9b0831b063956f72ba4 Mon Sep 17 00:00:00 2001 From: andreea-popescu-reef <160024917+andreea-popescu-reef@users.noreply.github.com> Date: Mon, 16 Sep 2024 20:20:42 +0800 Subject: [PATCH 22/53] Update validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py Co-authored-by: emnoor-reef <137923473+emnoor-reef@users.noreply.github.com> --- .../validator/synthetic_jobs/batch_run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py index cec6c950c..b8594c1b9 100644 --- a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py +++ b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py @@ -762,7 +762,7 @@ async def _close_client(ctx: BatchContext, miner_hotkey: str) -> None: await client.close() -async def get_llama_prompt_samples(ctx: BatchContext) -> PromptSample | None: +async def get_llama_prompt_samples(ctx: BatchContext) -> list[PromptSample] | None: # TODO: refactor into nicer abstraction llm_executor_count = sum( count From b15cc926ad6fe581a438e25f99ca3c3b7d4fffd7 Mon Sep 17 00:00:00 2001 From: Enam Mijbah Noor Date: Tue, 17 Sep 2024 04:14:58 +0600 Subject: [PATCH 23/53] fix format --- .../validator/synthetic_jobs/batch_run.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py index b8594c1b9..47c584d9e 100644 --- a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py +++ b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py @@ -794,9 +794,7 @@ async def _generate_jobs(ctx: BatchContext) -> None: generated_job_count = 0 prompt_samples = await get_llama_prompt_samples(ctx) - prompt_samples_iter = ( - iter(prompt_samples) if prompt_samples is not None else None - ) + prompt_samples_iter = iter(prompt_samples) if prompt_samples is not None else None for hotkey, executors in ctx.executors.items(): miner_name = ctx.names[hotkey] From 5faa124559cd242112ac384f6e4c3d7ad050d3a5 Mon Sep 17 00:00:00 2001 From: Andreea Popescu Date: Tue, 17 Sep 2024 02:47:19 +0100 Subject: [PATCH 24/53] add PROMPT_GENERATION_MODEL var --- validator/app/src/compute_horde_validator/settings.py | 2 ++ .../validator/cross_validation/generator/v0/__init__.py | 5 +++-- .../validator/cross_validation/prompt_answering.py | 4 ++-- .../validator/cross_validation/prompt_generation.py | 2 +- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/validator/app/src/compute_horde_validator/settings.py b/validator/app/src/compute_horde_validator/settings.py index 5920ecc26..695ddab0f 100644 --- a/validator/app/src/compute_horde_validator/settings.py +++ b/validator/app/src/compute_horde_validator/settings.py @@ -43,6 +43,8 @@ def wrapped(*args, **kwargs): ENV = env("ENV", default="prod") +PROMPT_GENERATION_MODEL = env("PROMPT_GENERATION_MODEL", default="phi3") + DEFAULT_ADMIN_PASSWORD = env("DEFAULT_ADMIN_PASSWORD", default=None) DEFAULT_ADMIN_USERNAME = env("DEFAULT_ADMIN_USERNAME", default="admin") DEFAULT_ADMIN_EMAIL = env("DEFAULT_ADMIN_EMAIL", default="admin@admin.com") diff --git a/validator/app/src/compute_horde_validator/validator/cross_validation/generator/v0/__init__.py b/validator/app/src/compute_horde_validator/validator/cross_validation/generator/v0/__init__.py index 0a53bb1f9..59e2d18e9 100644 --- a/validator/app/src/compute_horde_validator/validator/cross_validation/generator/v0/__init__.py +++ b/validator/app/src/compute_horde_validator/validator/cross_validation/generator/v0/__init__.py @@ -1,6 +1,7 @@ from compute_horde.base.output_upload import MultiUpload, SingleFilePutUpload from ..base import BasePromptJobGenerator +from django.conf import settings class PromptJobGenerator(BasePromptJobGenerator): @@ -11,12 +12,12 @@ def timeout_seconds(self) -> int: return 3600 def docker_image_name(self) -> str: - return "backenddevelopersltd/compute-horde-prompt-gen-phi:v0-latest" + return f"backenddevelopersltd/compute-horde-prompt-gen-{settings.PROMPT_GENERATION_MODEL}:v0-latest" def docker_run_cmd(self) -> list[str]: return [ "--model_name", - "phi3", + settings.PROMPT_GENERATION_MODEL, "--number_of_prompts_per_batch", str(self.num_prompts_per_batch), "--uuids", diff --git a/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_answering.py b/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_answering.py index 1b8a7dc3c..c16ab964d 100644 --- a/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_answering.py +++ b/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_answering.py @@ -47,7 +47,7 @@ async def answer_prompts( settings.TRUSTED_MINER_PORT, ] ): - logger.warning("Prompt generation miner not configured, skipping prompt generation") + logger.warning("Trusted generation miner not configured, skipping prompt generation") return ts = datetime.now() @@ -105,5 +105,5 @@ def save_workload_answers(workload, prompts, prompt_answers): if prompt.content in prompt_answers: prompt.answer = prompt_answers[prompt.content] else: - logger.warning(f"Prompt {prompt} was not found in the prompt answers generated") + logger.error(f"Prompt {prompt} was not found in the prompt answers generated") Prompt.objects.bulk_update(prompts, ["answer"]) diff --git a/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_generation.py b/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_generation.py index 522293c77..959d775ad 100644 --- a/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_generation.py +++ b/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_generation.py @@ -31,7 +31,7 @@ async def generate_prompts( settings.TRUSTED_MINER_PORT, ] ): - logger.warning("Prompt generation miner not configured, skipping prompt generation") + logger.warning("Trusted miner not configured, skipping prompt generation") return job_uuid = job_uuid or uuid.uuid4() From a343b4aa041e4a1824ceab0bfc6ef9d625c22bd4 Mon Sep 17 00:00:00 2001 From: Andreea Popescu Date: Tue, 17 Sep 2024 06:07:50 +0100 Subject: [PATCH 25/53] separate llm prompt job generator --- .../cross_validation/prompt_answering.py | 9 +++-- .../synthetic_jobs/generator/llm_prompts.py | 37 ++++++++++++++----- 2 files changed, 33 insertions(+), 13 deletions(-) diff --git a/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_answering.py b/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_answering.py index c16ab964d..5838e736a 100644 --- a/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_answering.py +++ b/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_answering.py @@ -14,8 +14,8 @@ from django.utils.timezone import now from compute_horde_validator.validator.models import Prompt, SolveWorkload -from compute_horde_validator.validator.synthetic_jobs.generator.llama_prompts import ( - LlamaPromptsSyntheticJobGenerator, +from compute_horde_validator.validator.synthetic_jobs.generator.llm_prompts import ( + LlmPromptsJobGenerator, ) logger = logging.getLogger(__name__) @@ -54,7 +54,7 @@ async def answer_prompts( seed = workload.seed prompts = await get_workload_prompts(workload) - job_generator = LlamaPromptsSyntheticJobGenerator(None, prompts, workload.s3_url, seed) + job_generator = LlmPromptsJobGenerator(workload.s3_url, seed) await job_generator.ainit() job_uuid = job_uuid or uuid.uuid4() @@ -82,7 +82,8 @@ async def answer_prompts( await run_organic_job(miner_client, job_details, wait_timeout=wait_timeout) try: - prompt_answers: dict[str, str] = await job_generator.get_prompt_answers() + await job_generator._download_answers() + prompt_answers: dict[str, str] = job_generator.prompt_answers except Exception: logger.error("Failed to download prompt answers", exc_info=True) return diff --git a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llm_prompts.py b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llm_prompts.py index ccb8be399..1a38fa405 100644 --- a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llm_prompts.py +++ b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llm_prompts.py @@ -16,19 +16,15 @@ from .base import BaseSyntheticJobGenerator -class LlmPromptsSyntheticJobGenerator(BaseSyntheticJobGenerator): +class LlmPromptsJobGenerator(BaseSyntheticJobGenerator): def __init__( self, - prompt_sample: PromptSample | None, - expected_prompts: list[Prompt], s3_url: str, seed: int, **kwargs, ): super().__init__(**kwargs) - self.prompt_sample: PromptSample | None = prompt_sample self.seed = seed - self.expected_prompts: list[Prompt] = expected_prompts self.s3_url = s3_url self.input_filename = str(uuid.uuid4()) + ".txt" self.s3_output_key = str(uuid.uuid4()) + ".json" @@ -98,6 +94,33 @@ async def _download_answers(self): response = await download_file_content(self._url_for_download()) self.prompt_answers = pydantic.TypeAdapter(dict[str, str]).validate_json(response) + def verify(self, msg: V0JobFinishedRequest, time_took: float) -> tuple[bool, str, float]: + # just check if there are any answers + if self.prompt_answers == {}: + return False, "no answers", 0.0 + return True, "answers exist", 1.0 + + def job_description(self) -> str: + return "LLM prompts job" + + +class LlmPromptsSyntheticJobGenerator(LlmPromptsJobGenerator): + def __init__( + self, + prompt_sample: PromptSample, + expected_prompts: list[Prompt], + s3_url: str, + seed: int, + **kwargs, + ): + super().__init__( + s3_url=s3_url, + seed=seed, + **kwargs, + ) + self.prompt_sample: PromptSample = prompt_sample + self.expected_prompts: list[Prompt] = expected_prompts + def verify(self, msg: V0JobFinishedRequest, time_took: float) -> tuple[bool, str, float]: for expected_prompt in self.expected_prompts: if expected_prompt.content not in self.prompt_answers: @@ -107,9 +130,5 @@ def verify(self, msg: V0JobFinishedRequest, time_took: float) -> tuple[bool, str return True, "", 1.0 - async def get_prompt_answers(self) -> dict[str, str]: - await self._download_answers() - return self.prompt_answers - def job_description(self) -> str: return "LLM prompts synthetic job" From 1b0ecd09d5a967a4a801db9922b828ef20f7ba88 Mon Sep 17 00:00:00 2001 From: Andreea Popescu Date: Tue, 17 Sep 2024 08:24:48 +0100 Subject: [PATCH 26/53] cleanup sampling --- .../src/compute_horde_validator/settings.py | 25 ++-- .../cross_validation/generator/v0/__init__.py | 2 +- .../debug_run_llama_prompt_answering.py | 15 --- .../debug_run_llama_prompt_generation.py | 15 --- .../commands/debug_run_llm_prompt_task.py | 32 +++++ .../compute_horde_validator/validator/s3.py | 11 +- .../validator/synthetic_jobs/batch_run.py | 6 +- .../validator/tasks.py | 109 +++++++++++------- .../test_prompt_answering.py | 4 +- .../test_prompt_generation.py | 16 --- .../validator/tests/test_s3.py | 6 +- 11 files changed, 133 insertions(+), 108 deletions(-) delete mode 100644 validator/app/src/compute_horde_validator/validator/management/commands/debug_run_llama_prompt_answering.py delete mode 100644 validator/app/src/compute_horde_validator/validator/management/commands/debug_run_llama_prompt_generation.py create mode 100644 validator/app/src/compute_horde_validator/validator/management/commands/debug_run_llm_prompt_task.py diff --git a/validator/app/src/compute_horde_validator/settings.py b/validator/app/src/compute_horde_validator/settings.py index 695ddab0f..51b7c1ec4 100644 --- a/validator/app/src/compute_horde_validator/settings.py +++ b/validator/app/src/compute_horde_validator/settings.py @@ -402,16 +402,21 @@ def wrapped(*args, **kwargs): "schedule": timedelta(minutes=5), "options": {}, }, - "llama_prompt_generation": { - "task": "compute_horde_validator.validator.tasks.llama_prompt_generation", - "schedule": timedelta(minutes=10), - "options": {}, - }, - "llama_prompt_answering": { - "task": "compute_horde_validator.validator.tasks.llama_prompt_answering", - "schedule": timedelta(minutes=10), - "options": {}, - }, + # "llm_prompt_generation": { + # "task": "compute_horde_validator.validator.tasks.llm_prompt_generation", + # "schedule": timedelta(minutes=10), + # "options": {}, + # }, + # "llm_prompt_sampling": { + # "task": "compute_horde_validator.validator.tasks.llm_prompt_sampling", + # "schedule": timedelta(minutes=10), + # "options": {}, + # }, + # "llm_prompt_answering": { + # "task": "compute_horde_validator.validator.tasks.llm_prompt_answering", + # "schedule": timedelta(minutes=10), + # "options": {}, + # }, } if env.bool("DEBUG_RUN_BEAT_VERY_OFTEN", default=False): CELERY_BEAT_SCHEDULE["run_synthetic_jobs"]["schedule"] = crontab(minute="*") diff --git a/validator/app/src/compute_horde_validator/validator/cross_validation/generator/v0/__init__.py b/validator/app/src/compute_horde_validator/validator/cross_validation/generator/v0/__init__.py index 59e2d18e9..edcc5c32e 100644 --- a/validator/app/src/compute_horde_validator/validator/cross_validation/generator/v0/__init__.py +++ b/validator/app/src/compute_horde_validator/validator/cross_validation/generator/v0/__init__.py @@ -1,7 +1,7 @@ from compute_horde.base.output_upload import MultiUpload, SingleFilePutUpload +from django.conf import settings from ..base import BasePromptJobGenerator -from django.conf import settings class PromptJobGenerator(BasePromptJobGenerator): diff --git a/validator/app/src/compute_horde_validator/validator/management/commands/debug_run_llama_prompt_answering.py b/validator/app/src/compute_horde_validator/validator/management/commands/debug_run_llama_prompt_answering.py deleted file mode 100644 index 3a9eab5cd..000000000 --- a/validator/app/src/compute_horde_validator/validator/management/commands/debug_run_llama_prompt_answering.py +++ /dev/null @@ -1,15 +0,0 @@ -import logging - -from django.core.management.base import BaseCommand - -from compute_horde_validator.validator.tasks import llama_prompt_answering - -logger = logging.getLogger(__name__) - - -class Command(BaseCommand): - def add_arguments(self, parser): - pass - - def handle(self, *args, **options): - llama_prompt_answering() diff --git a/validator/app/src/compute_horde_validator/validator/management/commands/debug_run_llama_prompt_generation.py b/validator/app/src/compute_horde_validator/validator/management/commands/debug_run_llama_prompt_generation.py deleted file mode 100644 index 4882940b3..000000000 --- a/validator/app/src/compute_horde_validator/validator/management/commands/debug_run_llama_prompt_generation.py +++ /dev/null @@ -1,15 +0,0 @@ -import logging - -from django.core.management.base import BaseCommand - -from compute_horde_validator.validator.tasks import llama_prompt_generation - -logger = logging.getLogger(__name__) - - -class Command(BaseCommand): - def add_arguments(self, parser): - pass - - def handle(self, *args, **options): - llama_prompt_generation() diff --git a/validator/app/src/compute_horde_validator/validator/management/commands/debug_run_llm_prompt_task.py b/validator/app/src/compute_horde_validator/validator/management/commands/debug_run_llm_prompt_task.py new file mode 100644 index 000000000..c733c81c5 --- /dev/null +++ b/validator/app/src/compute_horde_validator/validator/management/commands/debug_run_llm_prompt_task.py @@ -0,0 +1,32 @@ +import logging +import sys + +from django.core.management.base import BaseCommand + +from compute_horde_validator.validator.tasks import ( + llm_prompt_answering, + llm_prompt_generation, + llm_prompt_sampling, +) + +logger = logging.getLogger(__name__) + + +class Command(BaseCommand): + def add_arguments(self, parser): + parser.add_argument( + "--action", type=str, help="generation | sampling | answering", required=True + ) + + def handle(self, *args, **options): + action = options["action"] + logger.info(f"Running LLM prompt task with action: {action}") + if action == "generation": + llm_prompt_generation() + elif action == "sampling": + llm_prompt_sampling() + elif action == "answering": + llm_prompt_answering() + else: + logger.warning("Invalid action") + sys.exit(1) diff --git a/validator/app/src/compute_horde_validator/validator/s3.py b/validator/app/src/compute_horde_validator/validator/s3.py index bf8d82146..5aa23e8d7 100644 --- a/validator/app/src/compute_horde_validator/validator/s3.py +++ b/validator/app/src/compute_horde_validator/validator/s3.py @@ -46,7 +46,16 @@ def get_public_url(key: str, *, bucket_name: str, prefix: str = "") -> str: return f"{endpoint_url}/{bucket_name}/{prefix}{key}" -def get_prompts_from_s3_url(s3_url: str) -> Generator[tuple[str, list[str]]]: +# TODO: retries etc +def upload_prompts_to_s3_url(s3_url: str, content: str) -> bool: + response = requests.put(s3_url, data=content) + if response.status_code != 200: + logger.warning(f"Failed to upload prompts to {s3_url}") + return False + return True + + +def download_prompts_from_s3_url(s3_url: str) -> Generator[tuple[str, list[str]]]: response = requests.get(s3_url) if response.status_code != 200: logger.warning(f"Failed to download prompts from {s3_url}") diff --git a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py index 47c584d9e..5b929d896 100644 --- a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py +++ b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py @@ -762,7 +762,7 @@ async def _close_client(ctx: BatchContext, miner_hotkey: str) -> None: await client.close() -async def get_llama_prompt_samples(ctx: BatchContext) -> list[PromptSample] | None: +async def get_llm_prompt_samples(ctx: BatchContext) -> list[PromptSample] | None: # TODO: refactor into nicer abstraction llm_executor_count = sum( count @@ -781,7 +781,7 @@ async def get_llama_prompt_samples(ctx: BatchContext) -> list[PromptSample] | No prompt_samples = [ps async for ps in prompt_samples] if len(prompt_samples) < llm_executor_count: logger.warning( - "Not enough prompt samples for llama executors: %d < %d - will NOT run llama synthetic prompt jobs", + "Not enough prompt samples for llm executors: %d < %d - will NOT run llm synthetic prompt jobs", len(prompt_samples), llm_executor_count, ) @@ -793,7 +793,7 @@ async def _generate_jobs(ctx: BatchContext) -> None: start_time = time.time() generated_job_count = 0 - prompt_samples = await get_llama_prompt_samples(ctx) + prompt_samples = await get_llm_prompt_samples(ctx) prompt_samples_iter = iter(prompt_samples) if prompt_samples is not None else None for hotkey, executors in ctx.executors.items(): diff --git a/validator/app/src/compute_horde_validator/validator/tasks.py b/validator/app/src/compute_horde_validator/validator/tasks.py index 6dc17aa07..2a15a8340 100644 --- a/validator/app/src/compute_horde_validator/validator/tasks.py +++ b/validator/app/src/compute_horde_validator/validator/tasks.py @@ -50,7 +50,12 @@ ) from compute_horde_validator.validator.organic_jobs.miner_client import MinerClient from compute_horde_validator.validator.organic_jobs.miner_driver import execute_organic_job -from compute_horde_validator.validator.s3 import generate_upload_url, get_prompts_from_s3_url +from compute_horde_validator.validator.s3 import ( + download_prompts_from_s3_url, + generate_download_url, + generate_upload_url, + upload_prompts_to_s3_url, +) from compute_horde_validator.validator.synthetic_jobs.batch_run import ( SYNTHETIC_JOBS_HARD_LIMIT, SYNTHETIC_JOBS_SOFT_LIMIT, @@ -1029,17 +1034,8 @@ def fetch_dynamic_config() -> None: ) -def create_workload(seed: int): - # generate an s3 url to upload sample batch job result in - workload_uuid = uuid.uuid4() - s3_url = generate_upload_url( - key=str(workload_uuid), bucket_name=settings.S3_BUCKET_NAME_ANSWERS - ) - return SolveWorkload.objects.create(workload_uuid=workload_uuid, seed=seed, s3_url=s3_url) - - @app.task() -def llama_prompt_generation(): +def llm_prompt_generation(): num_expected_prompt_series = config.DYNAMIC_MAX_PROMPT_SERIES num_prompt_series = PromptSeries.objects.count() if num_prompt_series < num_expected_prompt_series: @@ -1052,14 +1048,31 @@ def llama_prompt_generation(): ) -# prompt_sample is ready for synthetic job when associated workload is finished (all prompts are answered) @app.task() -def llama_prompt_answering(): - # prioritize answering all the workloads before generating new samples - unprocessed_workloads = SolveWorkload.objects.filter(finished_at__isnull=True) +def llm_prompt_answering(): + unprocessed_workloads = SolveWorkload.objects.filter( + finished_at__isnull=True + ) for workload in unprocessed_workloads: async_to_sync(answer_prompts)(workload) + +def init_workload(seed: int): + workload_uuid = uuid.uuid4() + # generate an s3 url to upload workload prompts to + s3_upload_url = generate_upload_url( + key=str(workload_uuid), bucket_name=settings.S3_BUCKET_NAME_ANSWERS + ) + # generate an s3 url to download workload prompts to be answered + s3_url = generate_download_url( + key=str(workload_uuid), + bucket_name=settings.S3_BUCKET_NAME_ANSWERS, + ) + return SolveWorkload(workload_uuid=workload_uuid, seed=seed, s3_url=s3_url), s3_upload_url + + +@app.task() +def llm_prompt_sampling(): # generate new prompt samples if needed num_unused_prompt_samples = PromptSample.objects.filter(synthetic_job__isnull=True).count() num_needed_prompt_samples = ( @@ -1080,6 +1093,17 @@ def llama_prompt_answering(): ) +def persist_workload( + workload: SolveWorkload, prompt_samples: list[PromptSample], prompts: list[Prompt] +): + logger.info(f"Saving workload {workload}") + # save the sampled prompts as unanswered in the db + with transaction.atomic(): + workload.save() + PromptSample.objects.bulk_create(prompt_samples) + Prompt.objects.bulk_create(prompts) + + def create_sample_workloads(num_needed_prompt_samples): prompts_per_sample = config.DYNAMIC_NUMBER_OF_PROMPTS_TO_SAMPLE_FROM_SERIES prompts_per_workload = config.DYNAMIC_NUMBER_OF_PROMPTS_PER_WORKLOAD @@ -1088,47 +1112,48 @@ def create_sample_workloads(num_needed_prompt_samples): seed = random.randint(0, 1000000) # workload we are currently sampling for - current_workload = create_workload(seed) + current_workload, current_upload_url = init_workload(seed) - # how many prompts we have sampled for current_workload so far - current_workload_fill = 0 + # how many prompts series we sampled so far + # for each prompt series there is one prompt sample + num_prompt_series_sampled = 0 - prompt_samples_created = 0 + current_prompt_samples = [] + current_prompts = [] # assume we have sufficient prompt series in the db to make all the prompt_samples needed # take a random order of prompt series to avoid using the same series at each synthetic jobs run for prompt_series in PromptSeries.objects.order_by("?").all(): - if current_workload_fill >= prompts_per_workload: - # finished creating all needed prompt samples so exit after last batch is filled - if prompt_samples_created >= num_needed_prompt_samples: - break - - current_workload = create_workload(seed) - current_workload_fill = 0 - # get all prompts - lines = get_prompts_from_s3_url(prompt_series.s3_url) + lines = download_prompts_from_s3_url(prompt_series.s3_url) # should always have enough prompts if len(lines) <= prompts_per_sample: - logger.error("Skipping bucket %s, not enough prompts", prompt_series.s3_url) + logger.error(f"Skipping bucket {prompt_series.s3_url}, not enough prompts") continue # sample prompts sampled_lines = random.sample(lines, prompts_per_sample) - current_workload_fill += len(sampled_lines) - with transaction.atomic(): - prompt_sample = PromptSample.objects.create( - series=prompt_series, workload=current_workload - ) - prompt_samples_created += 1 + prompt_sample = PromptSample(series=prompt_series, workload=current_workload) + current_prompt_samples += [prompt_sample] + current_prompts += [Prompt(sample=prompt_sample, content=line) for line in sampled_lines] - # save the sampled prompts as unanswered in the db - Prompt.objects.bulk_create( - [Prompt(sample=prompt_sample, content=line) for line in sampled_lines] - ) + if len(current_prompts) >= prompts_per_workload: + content = "\n".join([p.content for p in current_prompts]) + if upload_prompts_to_s3_url(current_upload_url, content): + # save the workload in the db + persist_workload(current_workload, current_prompt_samples, current_prompts) + num_prompt_series_sampled += len(current_prompt_samples) + else: + logger.error(f"Failed to create workload {current_workload} - skipping") + + # finished creating all needed prompt samples so exit after last batch is filled + if num_prompt_series_sampled >= num_needed_prompt_samples: + logger.info(f"Created {num_prompt_series_sampled} new prompt samples") + break - # delete the current workload if it's not filled - if current_workload_fill < prompts_per_workload: - current_workload.delete() + # reset for next workload + current_workload, current_upload_url = init_workload(seed) + current_prompt_samples = [] + current_prompts = [] diff --git a/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_answering.py b/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_answering.py index 55c047bef..6d793a676 100644 --- a/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_answering.py +++ b/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_answering.py @@ -50,7 +50,7 @@ async def mock_throw_error(*args, **kwargs): @patch( - "compute_horde_validator.validator.synthetic_jobs.generator.llama_prompts.download_file_content", + "compute_horde_validator.validator.synthetic_jobs.generator.llm_prompts.download_file_content", mock_download_file_content, ) async def test_answer_prompts( @@ -112,7 +112,7 @@ async def test_answer_prompts_job_failed( @patch( - "compute_horde_validator.validator.synthetic_jobs.generator.llama_prompts.download_file_content", + "compute_horde_validator.validator.synthetic_jobs.generator.llm_prompts.download_file_content", mock_throw_error, ) async def test_answer_prompts_download_failed( diff --git a/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_generation.py b/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_generation.py index fcdccbe8d..2616606b6 100644 --- a/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_generation.py +++ b/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_generation.py @@ -107,19 +107,3 @@ async def test_generate_prompts_timeout( ) assert not await PromptSeries.objects.aexists() - - -# async def test_generate_prompts_max_batches_reached( -# create_miner_client: Callable, -# job_uuid: uuid.UUID, -# ): -# existing = [] -# for _ in range(5): -# existing.append(PromptSeries(s3_url="", generator_version=1)) -# await PromptSeries.objects.abulk_create(existing) -# -# await generate_prompts( -# create_miner_client=create_miner_client, job_uuid=job_uuid, wait_timeout=0.5 -# ) -# -# assert await PromptSeries.objects.acount() == 5 diff --git a/validator/app/src/compute_horde_validator/validator/tests/test_s3.py b/validator/app/src/compute_horde_validator/validator/tests/test_s3.py index 532322e7a..f130e39ad 100644 --- a/validator/app/src/compute_horde_validator/validator/tests/test_s3.py +++ b/validator/app/src/compute_horde_validator/validator/tests/test_s3.py @@ -6,7 +6,7 @@ from compute_horde_validator.validator.s3 import ( generate_download_url, generate_upload_url, - get_prompts_from_s3_url, + download_prompts_from_s3_url, get_public_url, get_s3_client, ) @@ -94,7 +94,7 @@ def test_get_public_url( (404, "Not Found", []), ], ) -def test_get_prompts_from_s3_url(status_code, content, expected): +def test_download_prompts_from_s3_url(status_code, content, expected): with patch("requests.get") as mock_get: # Mock the requests.get response mock_response = MagicMock() @@ -102,7 +102,7 @@ def test_get_prompts_from_s3_url(status_code, content, expected): mock_response.text = content mock_get.return_value = mock_response - result = get_prompts_from_s3_url("https://fake-s3-url.com/prompts.txt") + result = download_prompts_from_s3_url("https://fake-s3-url.com/prompts.txt") assert result == expected mock_get.assert_called_once_with("https://fake-s3-url.com/prompts.txt") From 0019cb227dc37b3d2e4503fc70ff13e2ed0d5599 Mon Sep 17 00:00:00 2001 From: Andreea Popescu Date: Tue, 17 Sep 2024 13:01:20 +0100 Subject: [PATCH 27/53] unit test sampling --- .../validator/tasks.py | 1 + .../validator/tests/test_llm_tasks.py | 166 ++++++++++++++++++ .../validator/tests/test_s3.py | 2 +- 3 files changed, 168 insertions(+), 1 deletion(-) create mode 100644 validator/app/src/compute_horde_validator/validator/tests/test_llm_tasks.py diff --git a/validator/app/src/compute_horde_validator/validator/tasks.py b/validator/app/src/compute_horde_validator/validator/tasks.py index 2a15a8340..54c808907 100644 --- a/validator/app/src/compute_horde_validator/validator/tasks.py +++ b/validator/app/src/compute_horde_validator/validator/tasks.py @@ -1050,6 +1050,7 @@ def llm_prompt_generation(): @app.task() def llm_prompt_answering(): + # TODO: handle parallelism unprocessed_workloads = SolveWorkload.objects.filter( finished_at__isnull=True ) diff --git a/validator/app/src/compute_horde_validator/validator/tests/test_llm_tasks.py b/validator/app/src/compute_horde_validator/validator/tests/test_llm_tasks.py new file mode 100644 index 000000000..2302b48e4 --- /dev/null +++ b/validator/app/src/compute_horde_validator/validator/tests/test_llm_tasks.py @@ -0,0 +1,166 @@ +from unittest.mock import patch + +import pytest + +from compute_horde_validator.validator.models import ( + Prompt, + PromptSample, + PromptSeries, + SolveWorkload, +) +from compute_horde_validator.validator.tasks import llm_prompt_generation, llm_prompt_sampling + + +def create_prompt_series(num: int): + PromptSeries.objects.bulk_create( + [PromptSeries(s3_url="", generator_version=1) for _ in range(num)] + ) + + +@pytest.mark.override_config(DYNAMIC_TARGET_NUMBER_OF_PROMPT_SAMPLES_READY=5) +@pytest.mark.django_db(transaction=True) +def test_llm_prompt_sampling__will_not_trigger(): + prompt_series = PromptSeries.objects.create(s3_url="", generator_version=1) + for i in range(5): + workload = SolveWorkload.objects.create(seed=i, s3_url="s3://test") + PromptSample(series=prompt_series, workload=workload) + + with patch( + "compute_horde_validator.validator.tasks.create_sample_workloads" + ) as mock_create_sample_workloads: + llm_prompt_sampling() + assert mock_create_sample_workloads.called + + +@pytest.mark.override_config(DYNAMIC_TARGET_NUMBER_OF_PROMPT_SAMPLES_READY=5) +@pytest.mark.django_db(transaction=True) +@patch("compute_horde_validator.validator.tasks.upload_prompts_to_s3_url", lambda *args: False) +@patch( + "compute_horde_validator.validator.tasks.download_prompts_from_s3_url", + lambda *args: ["test" for _ in range(10)], +) +def test_llm_prompt_sampling__fail_upload_to_s3(): + create_prompt_series(4) + llm_prompt_sampling() + assert SolveWorkload.objects.count() == 0 + assert PromptSample.objects.count() == 0 + assert Prompt.objects.count() == 0 + + +@pytest.mark.override_config(DYNAMIC_TARGET_NUMBER_OF_PROMPT_SAMPLES_READY=5) +@pytest.mark.django_db(transaction=True) +@patch("compute_horde_validator.validator.tasks.download_prompts_from_s3_url", lambda *args: []) +def test_llm_prompt_sampling__fail_download_from_s3(): + create_prompt_series(4) + llm_prompt_sampling() + assert SolveWorkload.objects.count() == 0 + assert PromptSample.objects.count() == 0 + assert Prompt.objects.count() == 0 + + +@pytest.mark.override_config( + DYNAMIC_TARGET_NUMBER_OF_PROMPT_SAMPLES_READY=5, + DYNAMIC_NUMBER_OF_PROMPTS_TO_SAMPLE_FROM_SERIES=10, + DYNAMIC_NUMBER_OF_PROMPTS_PER_WORKLOAD=20, +) +@pytest.mark.django_db(transaction=True) +@patch("compute_horde_validator.validator.tasks.upload_prompts_to_s3_url", lambda *args: True) +@patch( + "compute_horde_validator.validator.tasks.download_prompts_from_s3_url", + lambda *args: ["test" for _ in range(240)], +) +def test_llm_prompt_sampling__success(): + create_prompt_series(10) + llm_prompt_sampling() + assert SolveWorkload.objects.count() == 3 + assert PromptSample.objects.count() == 6 + assert Prompt.objects.count() == 60 + + +@pytest.mark.override_config( + DYNAMIC_TARGET_NUMBER_OF_PROMPT_SAMPLES_READY=11, + DYNAMIC_NUMBER_OF_PROMPTS_TO_SAMPLE_FROM_SERIES=10, + DYNAMIC_NUMBER_OF_PROMPTS_PER_WORKLOAD=20, +) +@pytest.mark.django_db(transaction=True) +@patch("compute_horde_validator.validator.tasks.upload_prompts_to_s3_url", lambda *args: True) +@patch( + "compute_horde_validator.validator.tasks.download_prompts_from_s3_url", + lambda *args: ["test" for _ in range(240)], +) +def test_llm_prompt_sampling__not_enough_prompt_series(): + create_prompt_series(5) + llm_prompt_sampling() + assert SolveWorkload.objects.count() == 2 + assert PromptSample.objects.count() == 4 + assert Prompt.objects.count() == 40 + llm_prompt_sampling() + assert SolveWorkload.objects.count() == 4 + assert PromptSample.objects.count() == 8 + assert Prompt.objects.count() == 80 + llm_prompt_sampling() + assert SolveWorkload.objects.count() == 6 + assert PromptSample.objects.count() == 12 + assert Prompt.objects.count() == 120 + # will not sample more + llm_prompt_sampling() + assert SolveWorkload.objects.count() == 6 + assert PromptSample.objects.count() == 12 + assert Prompt.objects.count() == 120 + + +@pytest.mark.override_config( + DYNAMIC_TARGET_NUMBER_OF_PROMPT_SAMPLES_READY=4, + DYNAMIC_NUMBER_OF_PROMPTS_TO_SAMPLE_FROM_SERIES=100, + DYNAMIC_NUMBER_OF_PROMPTS_PER_WORKLOAD=80, +) +@pytest.mark.django_db(transaction=True) +@patch("compute_horde_validator.validator.tasks.upload_prompts_to_s3_url", lambda *args: True) +@patch( + "compute_horde_validator.validator.tasks.download_prompts_from_s3_url", + lambda *args: ["test" for _ in range(240)], +) +def test_llm_prompt_sampling__one_sample_per_workload(): + create_prompt_series(4) + llm_prompt_sampling() + assert SolveWorkload.objects.count() == 4 + assert PromptSample.objects.count() == 4 + assert Prompt.objects.count() == 400 + + +@pytest.mark.override_config( + DYNAMIC_TARGET_NUMBER_OF_PROMPT_SAMPLES_READY=1, + DYNAMIC_NUMBER_OF_PROMPTS_TO_SAMPLE_FROM_SERIES=1, + DYNAMIC_NUMBER_OF_PROMPTS_PER_WORKLOAD=5, +) +@pytest.mark.django_db(transaction=True) +@patch("compute_horde_validator.validator.tasks.upload_prompts_to_s3_url", lambda *args: True) +@patch( + "compute_horde_validator.validator.tasks.download_prompts_from_s3_url", + lambda *args: ["test" for _ in range(240)], +) +def test_llm_prompt_sampling__not_enough_for_one_workload(): + create_prompt_series(4) + llm_prompt_sampling() + assert SolveWorkload.objects.count() == 0 + assert PromptSample.objects.count() == 0 + assert Prompt.objects.count() == 0 + + +@pytest.mark.override_config(DYNAMIC_MAX_PROMPT_SERIES=5) +@pytest.mark.django_db(transaction=True) +def test_llm_prompt_generation__will_trigger(): + create_prompt_series(4) + with patch("compute_horde_validator.validator.tasks.generate_prompts") as mock_generate_prompts: + llm_prompt_generation() + assert mock_generate_prompts.called + + +@pytest.mark.override_config(DYNAMIC_MAX_PROMPT_SERIES=5) +@pytest.mark.django_db(transaction=True) +def test_llm_prompt_generation__will_not_trigger(): + create_prompt_series(10) + with patch("compute_horde_validator.validator.tasks.generate_prompts") as mock_generate_prompts: + llm_prompt_generation() + assert mock_generate_prompts.not_called + assert PromptSeries.objects.count() == 10 diff --git a/validator/app/src/compute_horde_validator/validator/tests/test_s3.py b/validator/app/src/compute_horde_validator/validator/tests/test_s3.py index f130e39ad..e99c08256 100644 --- a/validator/app/src/compute_horde_validator/validator/tests/test_s3.py +++ b/validator/app/src/compute_horde_validator/validator/tests/test_s3.py @@ -4,9 +4,9 @@ from moto import mock_aws from compute_horde_validator.validator.s3 import ( + download_prompts_from_s3_url, generate_download_url, generate_upload_url, - download_prompts_from_s3_url, get_public_url, get_s3_client, ) From c92f5b8e2a372f14e38dee43db1191d19e0825e8 Mon Sep 17 00:00:00 2001 From: Andreea Popescu Date: Tue, 17 Sep 2024 13:02:11 +0100 Subject: [PATCH 28/53] add prompt answering lock --- .../compute_horde_validator/validator/locks.py | 1 + .../compute_horde_validator/validator/tasks.py | 17 +++++++++++------ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/validator/app/src/compute_horde_validator/validator/locks.py b/validator/app/src/compute_horde_validator/validator/locks.py index 6b2519644..7eae5c4cc 100644 --- a/validator/app/src/compute_horde_validator/validator/locks.py +++ b/validator/app/src/compute_horde_validator/validator/locks.py @@ -4,6 +4,7 @@ class LockType: WEIGHT_SETTING = 1 VALIDATION_SCHEDULING = 2 + ANSWERING_PROMPTS = 3 class Locked(Exception): diff --git a/validator/app/src/compute_horde_validator/validator/tasks.py b/validator/app/src/compute_horde_validator/validator/tasks.py index 54c808907..b9073333b 100644 --- a/validator/app/src/compute_horde_validator/validator/tasks.py +++ b/validator/app/src/compute_horde_validator/validator/tasks.py @@ -1050,12 +1050,17 @@ def llm_prompt_generation(): @app.task() def llm_prompt_answering(): - # TODO: handle parallelism - unprocessed_workloads = SolveWorkload.objects.filter( - finished_at__isnull=True - ) - for workload in unprocessed_workloads: - async_to_sync(answer_prompts)(workload) + with transaction.atomic(): + try: + get_advisory_lock(LockType.ANSWERING_PROMPTS) + except Locked: + logger.debug("Another thread already answering prompts") + return + + unprocessed_workloads = SolveWorkload.objects.filter(finished_at__isnull=True) + + for workload in unprocessed_workloads: + async_to_sync(answer_prompts)(workload) def init_workload(seed: int): From 8e4b8ceb33a9c111bd4f0eca10b8ff675d293b8d Mon Sep 17 00:00:00 2001 From: Andreea Popescu Date: Tue, 17 Sep 2024 13:12:55 +0100 Subject: [PATCH 29/53] prepare receipts async --- .../miner/management/commands/prepare_receipts.py | 3 ++- .../miner/miner_consumer/validator_interface.py | 2 +- miner/app/src/compute_horde_miner/miner/tasks.py | 8 +++----- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/miner/app/src/compute_horde_miner/miner/management/commands/prepare_receipts.py b/miner/app/src/compute_horde_miner/miner/management/commands/prepare_receipts.py index 3c8580194..140bb9c90 100644 --- a/miner/app/src/compute_horde_miner/miner/management/commands/prepare_receipts.py +++ b/miner/app/src/compute_horde_miner/miner/management/commands/prepare_receipts.py @@ -1,3 +1,4 @@ +from asgiref.sync import async_to_sync from django.core.management import BaseCommand from compute_horde_miner.miner.tasks import prepare_receipts @@ -5,4 +6,4 @@ class Command(BaseCommand): def handle(self, *args, **options): - prepare_receipts() + async_to_sync(prepare_receipts)() diff --git a/miner/app/src/compute_horde_miner/miner/miner_consumer/validator_interface.py b/miner/app/src/compute_horde_miner/miner/miner_consumer/validator_interface.py index 362674879..f22834762 100644 --- a/miner/app/src/compute_horde_miner/miner/miner_consumer/validator_interface.py +++ b/miner/app/src/compute_horde_miner/miner/miner_consumer/validator_interface.py @@ -367,7 +367,7 @@ async def handle(self, msg: BaseValidatorRequest): time_took_us=msg.payload.time_took_us, score_str=msg.payload.score_str, ) - prepare_receipts.delay() + await prepare_receipts() async def _executor_ready(self, msg: ExecutorReady): job = await AcceptedJob.objects.aget(executor_token=msg.executor_token) diff --git a/miner/app/src/compute_horde_miner/miner/tasks.py b/miner/app/src/compute_horde_miner/miner/tasks.py index b151aafe0..2131d355e 100644 --- a/miner/app/src/compute_horde_miner/miner/tasks.py +++ b/miner/app/src/compute_horde_miner/miner/tasks.py @@ -65,19 +65,17 @@ def fetch_validators(): ) -@app.task -def prepare_receipts(): +async def prepare_receipts(): receipts = [] - job_started_receipts = JobStartedReceipt.objects.order_by("time_accepted").filter( time_accepted__gt=now() - RECEIPTS_MAX_SERVED_PERIOD ) - receipts += [jr.to_receipt() for jr in job_started_receipts] + receipts += [jr.to_receipt() async for jr in job_started_receipts] job_finished_receipts = JobFinishedReceipt.objects.order_by("time_started").filter( time_started__gt=now() - RECEIPTS_MAX_SERVED_PERIOD ) - receipts += [jr.to_receipt() for jr in job_finished_receipts] + receipts += [jr.to_receipt() async for jr in job_finished_receipts] receipts_store.store(receipts) From 8696453f65e08666bddfc85f22b1667fb0006344 Mon Sep 17 00:00:00 2001 From: Andreea Popescu Date: Tue, 17 Sep 2024 14:17:37 +0100 Subject: [PATCH 30/53] fix bad filename --- .../cross_validation/prompt_answering.py | 29 +++++++++---------- .../synthetic_jobs/generator/llm_prompts.py | 5 ++-- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_answering.py b/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_answering.py index 5838e736a..328ac5fbd 100644 --- a/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_answering.py +++ b/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_answering.py @@ -21,15 +21,6 @@ logger = logging.getLogger(__name__) -async def get_workload_prompts(workload: SolveWorkload) -> list[Prompt]: - return [ - x - async for x in Prompt.objects.select_related("sample").filter( - sample__workload_id=workload.id, answer__isnull=True - ) - ] - - def _get_keypair() -> bittensor.Keypair: return settings.BITTENSOR_WALLET().get_hotkey() @@ -52,7 +43,6 @@ async def answer_prompts( ts = datetime.now() seed = workload.seed - prompts = await get_workload_prompts(workload) job_generator = LlmPromptsJobGenerator(workload.s3_url, seed) await job_generator.ainit() @@ -88,14 +78,23 @@ async def answer_prompts( logger.error("Failed to download prompt answers", exc_info=True) return - await sync_to_async(save_workload_answers)(workload, prompts, prompt_answers) + await sync_to_async(save_workload_answers)(workload, prompt_answers) duration_seconds = (datetime.now() - ts).total_seconds() - logger.info( - f"Workload {workload} answered {len(prompts)} prompts in {duration_seconds} seconds" - ) + logger.info(f"Workload {workload} finished in {duration_seconds} seconds") + + +def get_workload_prompts(workload: SolveWorkload) -> list[Prompt]: + return [ + x + for x in Prompt.objects.select_related("sample").filter( + sample__workload_id=workload.id, answer__isnull=True + ) + ] + +def save_workload_answers(workload, prompt_answers): + prompts = get_workload_prompts(workload) -def save_workload_answers(workload, prompts, prompt_answers): with transaction.atomic(): # update the workload as finished workload.finished_at = now() diff --git a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llm_prompts.py b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llm_prompts.py index 1a38fa405..b81089d1f 100644 --- a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llm_prompts.py +++ b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llm_prompts.py @@ -26,8 +26,9 @@ def __init__( super().__init__(**kwargs) self.seed = seed self.s3_url = s3_url - self.input_filename = str(uuid.uuid4()) + ".txt" - self.s3_output_key = str(uuid.uuid4()) + ".json" + file_uuid = str(uuid.uuid4()) + self.input_filename = file_uuid + ".txt" + self.s3_output_key = file_uuid + ".json" self.s3_output_prefix = "solved/" self.s3_output_bucket = settings.S3_BUCKET_NAME_ANSWERS From 9548cb632db3ebbfa67cfa2fb48785136d98e7d8 Mon Sep 17 00:00:00 2001 From: Enam Mijbah Noor Date: Wed, 18 Sep 2024 02:35:20 +0600 Subject: [PATCH 31/53] Add config for max executor count limits --- .../src/compute_horde_validator/settings.py | 11 ++++++++++ .../validator/dynamic_config.py | 18 +++++++++++++++++ .../validator/synthetic_jobs/batch_run.py | 20 +++++++++++++++++++ 3 files changed, 49 insertions(+) diff --git a/validator/app/src/compute_horde_validator/settings.py b/validator/app/src/compute_horde_validator/settings.py index 83536a1f6..feafda685 100644 --- a/validator/app/src/compute_horde_validator/settings.py +++ b/validator/app/src/compute_horde_validator/settings.py @@ -226,6 +226,17 @@ def wrapped(*args, **kwargs): "Number of prompts to generate in a single batch", int, ), + "DYNAMIC_MINER_MAX_EXECUTORS_PER_CLASS": ( + "always_on.llm.a6000=2", + ( + "The maximum number of executor for an executor class that miners are allowed to have. " + "Executor classes not mentioned here have no limits. " + "The format should be: 'key1=value1,key2=value2', " + "where the keys are executor class enum values, and the values are integers. " + "Setting 0 will disable an executor class." + ), + str, + ), } DYNAMIC_CONFIG_CACHE_TIMEOUT = 300 diff --git a/validator/app/src/compute_horde_validator/validator/dynamic_config.py b/validator/app/src/compute_horde_validator/validator/dynamic_config.py index 07170935f..4079cda14 100644 --- a/validator/app/src/compute_horde_validator/validator/dynamic_config.py +++ b/validator/app/src/compute_horde_validator/validator/dynamic_config.py @@ -1,8 +1,10 @@ import asyncio import time +from contextlib import suppress import constance.utils from asgiref.sync import sync_to_async +from compute_horde.executor_class import ExecutorClass from constance import config from django.conf import settings @@ -60,3 +62,19 @@ def get_synthetic_jobs_flow_version(): if settings.DEBUG_OVERRIDE_SYNTHETIC_JOBS_FLOW_VERSION is not None: return settings.DEBUG_OVERRIDE_SYNTHETIC_JOBS_FLOW_VERSION return config.DYNAMIC_SYNTHETIC_JOBS_FLOW_VERSION + + +async def get_miner_max_executors_per_class() -> dict[ExecutorClass, int]: + miner_max_executors_per_class: str = await aget_config("DYNAMIC_MINER_MAX_EXECUTORS_PER_CLASS") + result = {} + for pair in miner_max_executors_per_class.split(","): + # ignore errors for misconfiguration, i,e. non-existent executor classes, + # non-integer/negative counts etc. + with suppress(ValueError): + executor_class_str, count_str = pair.split("=") + executor_class = ExecutorClass(executor_class_str) + count = int(count_str) + if count >= 0: + result[executor_class] = count + + return result diff --git a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py index 5590e9003..bffdaebaa 100644 --- a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py +++ b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py @@ -53,6 +53,7 @@ from django.db import transaction from pydantic import BaseModel +from compute_horde_validator.validator.dynamic_config import get_miner_max_executors_per_class from compute_horde_validator.validator.models import ( JobFinishedReceipt, JobStartedReceipt, @@ -1042,6 +1043,24 @@ async def _multi_get_miner_manifest(ctx: BatchContext) -> None: assert result is None +async def _adjust_miner_max_executors_per_class(ctx: BatchContext) -> None: + max_executors_per_class = await get_miner_max_executors_per_class() + for hotkey, executors in ctx.executors.items(): + for executor_class, count in executors.items(): + if executor_class not in max_executors_per_class: + continue + if count > max_executors_per_class[executor_class]: + logger.warning( + "%s manifest for executor class %s has more count (%s) than the max limit (%s), capping at limit", + ctx.names[hotkey], + executor_class, + count, + max_executors_per_class[executor_class], + ) + ctx.executors[hotkey][executor_class] = max_executors_per_class[executor_class] + # TODO: add a system event? + + async def _multi_close_client(ctx: BatchContext) -> None: tasks = [ asyncio.create_task( @@ -1483,6 +1502,7 @@ async def execute_synthetic_batch_run( await ctx.checkpoint_system_event("_multi_get_miner_manifest") await _multi_get_miner_manifest(ctx) + await _adjust_miner_max_executors_per_class(ctx) await ctx.checkpoint_system_event("_get_total_executor_count") total_executor_count = _get_total_executor_count(ctx) From 8fd7597e803a62599070da2939e29d9fb99ccc13 Mon Sep 17 00:00:00 2001 From: Andreea Popescu Date: Wed, 18 Sep 2024 06:55:16 +0100 Subject: [PATCH 32/53] nvidia_all --- .../validator/cross_validation/generator/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validator/app/src/compute_horde_validator/validator/cross_validation/generator/base.py b/validator/app/src/compute_horde_validator/validator/cross_validation/generator/base.py index ff5421e58..b48e9db7e 100644 --- a/validator/app/src/compute_horde_validator/validator/cross_validation/generator/base.py +++ b/validator/app/src/compute_horde_validator/validator/cross_validation/generator/base.py @@ -29,7 +29,7 @@ def generator_version(self) -> int: ... def docker_image_name(self) -> str: ... def docker_run_options_preset(self) -> str: - return "none" + return "nvidia_all" def docker_run_cmd(self) -> list[str]: return [] From 42b08a2fd314b7ef739e2c53dbb9de6a60c656e7 Mon Sep 17 00:00:00 2001 From: Andreea Popescu Date: Wed, 18 Sep 2024 07:13:55 +0100 Subject: [PATCH 33/53] try upload only on success --- .../management/commands/run_executor.py | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/executor/app/src/compute_horde_executor/executor/management/commands/run_executor.py b/executor/app/src/compute_horde_executor/executor/management/commands/run_executor.py index 842d71368..fbae57fb3 100644 --- a/executor/app/src/compute_horde_executor/executor/management/commands/run_executor.py +++ b/executor/app/src/compute_horde_executor/executor/management/commands/run_executor.py @@ -482,26 +482,26 @@ async def run_job(self, job_request: V0JobRequest): success = exit_status == 0 - # upload the output if requested - if job_request.output_upload: - try: - output_uploader = OutputUploader.for_upload_output(job_request.output_upload) - await output_uploader.upload(self.output_volume_mount_dir) - except OutputUploadFailed as ex: - logger.warning( - f"Uploading output failed for job {self.initial_job_request.job_uuid} with error: {ex!r}" - ) - success = False - stdout = ex.description - stderr = "" - - time_took = time.time() - t1 - if success: + # upload the output if requested and job succeeded + if job_request.output_upload: + try: + output_uploader = OutputUploader.for_upload_output(job_request.output_upload) + await output_uploader.upload(self.output_volume_mount_dir) + except OutputUploadFailed as ex: + logger.warning( + f"Uploading output failed for job {self.initial_job_request.job_uuid} with error: {ex!r}" + ) + success = False + stdout = ex.description + stderr = "" + + time_took = time.time() - t1 logger.info( f'Job "{self.initial_job_request.job_uuid}" finished successfully in {time_took:0.2f} seconds' ) else: + time_took = time.time() - t1 logger.error( f'"{" ".join(cmd)}" (job_uuid={self.initial_job_request.job_uuid})' f' failed after {time_took:0.2f} seconds with status={process.returncode}' From 0a57c1b112195d38035fad7e96146b2d99a66d2f Mon Sep 17 00:00:00 2001 From: Enam Mijbah Noor Date: Wed, 18 Sep 2024 13:19:49 +0600 Subject: [PATCH 34/53] Fix prompt job executor class --- .../validator/cross_validation/generator/base.py | 5 +++++ .../validator/cross_validation/generator/v0/__init__.py | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/validator/app/src/compute_horde_validator/validator/cross_validation/generator/base.py b/validator/app/src/compute_horde_validator/validator/cross_validation/generator/base.py index b48e9db7e..16e9c58f9 100644 --- a/validator/app/src/compute_horde_validator/validator/cross_validation/generator/base.py +++ b/validator/app/src/compute_horde_validator/validator/cross_validation/generator/base.py @@ -2,6 +2,7 @@ import uuid from compute_horde.base.volume import Volume +from compute_horde.executor_class import ExecutorClass from compute_horde.miner_client.organic import OrganicJobDetails @@ -28,6 +29,9 @@ def generator_version(self) -> int: ... @abc.abstractmethod def docker_image_name(self) -> str: ... + @abc.abstractmethod + def executor_class(self) -> ExecutorClass: ... + def docker_run_options_preset(self) -> str: return "nvidia_all" @@ -49,6 +53,7 @@ def output(self) -> str | None: def get_job_details(self) -> OrganicJobDetails: return OrganicJobDetails( job_uuid=str(self._uuid), + executor_class=self.executor_class(), docker_image=self.docker_image_name(), raw_script=self.raw_script(), docker_run_options_preset=self.docker_run_options_preset(), diff --git a/validator/app/src/compute_horde_validator/validator/cross_validation/generator/v0/__init__.py b/validator/app/src/compute_horde_validator/validator/cross_validation/generator/v0/__init__.py index edcc5c32e..a34cdf566 100644 --- a/validator/app/src/compute_horde_validator/validator/cross_validation/generator/v0/__init__.py +++ b/validator/app/src/compute_horde_validator/validator/cross_validation/generator/v0/__init__.py @@ -1,4 +1,5 @@ from compute_horde.base.output_upload import MultiUpload, SingleFilePutUpload +from compute_horde.executor_class import ExecutorClass from django.conf import settings from ..base import BasePromptJobGenerator @@ -14,6 +15,9 @@ def timeout_seconds(self) -> int: def docker_image_name(self) -> str: return f"backenddevelopersltd/compute-horde-prompt-gen-{settings.PROMPT_GENERATION_MODEL}:v0-latest" + def executor_class(self) -> ExecutorClass: + return ExecutorClass.always_on__llm__a6000 + def docker_run_cmd(self) -> list[str]: return [ "--model_name", From 566f74117afcafc0c4e695b6dbad21042b6a7a52 Mon Sep 17 00:00:00 2001 From: Andreea Popescu Date: Wed, 18 Sep 2024 09:10:19 +0100 Subject: [PATCH 35/53] fixes --- .../app/src/compute_horde_validator/settings.py | 4 ++-- .../cross_validation/prompt_answering.py | 6 +++++- .../cross_validation/prompt_generation.py | 6 +++++- .../compute_horde_validator/validator/tasks.py | 17 +++++++++++++---- .../test_prompt_answering.py | 8 +++----- .../test_prompt_generation.py | 14 ++++++-------- 6 files changed, 34 insertions(+), 21 deletions(-) diff --git a/validator/app/src/compute_horde_validator/settings.py b/validator/app/src/compute_horde_validator/settings.py index 9dfd301f2..31c142b8f 100644 --- a/validator/app/src/compute_horde_validator/settings.py +++ b/validator/app/src/compute_horde_validator/settings.py @@ -215,7 +215,7 @@ def wrapped(*args, **kwargs): int, ), "DYNAMIC_NUMBER_OF_PROMPTS_PER_WORKLOAD": ( - 100, + 240, "how many prompts to answer in a single workload", int, ), @@ -232,7 +232,7 @@ def wrapped(*args, **kwargs): ), # prompts answering params "DYNAMIC_NUMBER_OF_PROMPTS_TO_SAMPLE_FROM_SERIES": ( - 10, + 1, "how many prompts to sample and answer from a series", int, ), diff --git a/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_answering.py b/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_answering.py index 328ac5fbd..9bdde9265 100644 --- a/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_answering.py +++ b/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_answering.py @@ -69,7 +69,11 @@ async def answer_prompts( my_keypair=_get_keypair(), ) - await run_organic_job(miner_client, job_details, wait_timeout=wait_timeout) + try: + await run_organic_job(miner_client, job_details, wait_timeout=wait_timeout) + except Exception: + logger.error("Failed to run organic job", exc_info=True) + return try: await job_generator._download_answers() diff --git a/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_generation.py b/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_generation.py index 959d775ad..29c51cdae 100644 --- a/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_generation.py +++ b/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_generation.py @@ -60,7 +60,11 @@ async def generate_prompts( my_keypair=_get_keypair(), ) - await run_organic_job(miner_client, job_details, wait_timeout=wait_timeout) + try: + await run_organic_job(miner_client, job_details, wait_timeout=wait_timeout) + except Exception: + logger.error("Failed to run organic job", exc_info=True) + return await _persist_series_list(series_uuids, public_urls, job_generator.generator_version()) diff --git a/validator/app/src/compute_horde_validator/validator/tasks.py b/validator/app/src/compute_horde_validator/validator/tasks.py index b9073333b..e1ca7d074 100644 --- a/validator/app/src/compute_horde_validator/validator/tasks.py +++ b/validator/app/src/compute_horde_validator/validator/tasks.py @@ -70,6 +70,7 @@ logger = get_task_logger(__name__) JOB_WINDOW = 2 * 60 * 60 +MAX_SEED = (1 << 63) - 1 SCORING_ALGO_VERSION = 2 @@ -1063,7 +1064,7 @@ def llm_prompt_answering(): async_to_sync(answer_prompts)(workload) -def init_workload(seed: int): +def init_workload(seed: int) -> tuple[SolveWorkload, str]: workload_uuid = uuid.uuid4() # generate an s3 url to upload workload prompts to s3_upload_url = generate_upload_url( @@ -1115,10 +1116,14 @@ def create_sample_workloads(num_needed_prompt_samples): prompts_per_workload = config.DYNAMIC_NUMBER_OF_PROMPTS_PER_WORKLOAD # set seed for the current synthetic jobs run - seed = random.randint(0, 1000000) + seed = random.randint(0, MAX_SEED) # workload we are currently sampling for - current_workload, current_upload_url = init_workload(seed) + try: + current_workload, current_upload_url = init_workload(seed) + except Exception as e: + logger.error(f"Failed to create new workload: {e} - aborting prompt sampling") + return # how many prompts series we sampled so far # for each prompt series there is one prompt sample @@ -1160,6 +1165,10 @@ def create_sample_workloads(num_needed_prompt_samples): break # reset for next workload - current_workload, current_upload_url = init_workload(seed) current_prompt_samples = [] current_prompts = [] + try: + current_workload, current_upload_url = init_workload(seed) + except Exception as e: + logger.error(f"Failed to create new workload: {e} - aborting prompt sampling") + continue diff --git a/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_answering.py b/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_answering.py index 6d793a676..5c45ad6fb 100644 --- a/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_answering.py +++ b/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_answering.py @@ -4,7 +4,6 @@ from unittest.mock import patch import pytest -from compute_horde.miner_client.organic import OrganicJobError from compute_horde_validator.validator.cross_validation.prompt_answering import answer_prompts from compute_horde_validator.validator.models import ( @@ -98,10 +97,9 @@ async def test_answer_prompts_job_failed( prompts, workload = await db_setup() - with pytest.raises(OrganicJobError): - await answer_prompts( - workload, create_miner_client=create_miner_client, job_uuid=job_uuid, wait_timeout=2 - ) + await answer_prompts( + workload, create_miner_client=create_miner_client, job_uuid=job_uuid, wait_timeout=2 + ) await workload.arefresh_from_db() assert workload.finished_at is None diff --git a/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_generation.py b/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_generation.py index 2616606b6..d324777b0 100644 --- a/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_generation.py +++ b/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_generation.py @@ -85,10 +85,9 @@ async def test_generate_prompts_job_failed( await transport.add_message(executor_ready_message, send_before=0) await transport.add_message(job_failed_message, send_before=2) - with pytest.raises(OrganicJobError): - await generate_prompts( - create_miner_client=create_miner_client, job_uuid=job_uuid, wait_timeout=2 - ) + await generate_prompts( + create_miner_client=create_miner_client, job_uuid=job_uuid, wait_timeout=2 + ) assert not await PromptSeries.objects.aexists() @@ -101,9 +100,8 @@ async def test_generate_prompts_timeout( ): await transport.add_message(manifest_message, send_before=1) - with pytest.raises(OrganicJobError): - await generate_prompts( - create_miner_client=create_miner_client, job_uuid=job_uuid, wait_timeout=0.5 - ) + await generate_prompts( + create_miner_client=create_miner_client, job_uuid=job_uuid, wait_timeout=0.5 + ) assert not await PromptSeries.objects.aexists() From a701768f80d8df5167bc01c82ea18437936c5836 Mon Sep 17 00:00:00 2001 From: Andreea Popescu Date: Wed, 18 Sep 2024 09:13:47 +0100 Subject: [PATCH 36/53] Revert "prepare receipts async" This reverts commit 8e4b8ceb33a9c111bd4f0eca10b8ff675d293b8d. --- .../miner/management/commands/prepare_receipts.py | 3 +-- .../miner/miner_consumer/validator_interface.py | 2 +- miner/app/src/compute_horde_miner/miner/tasks.py | 8 +++++--- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/miner/app/src/compute_horde_miner/miner/management/commands/prepare_receipts.py b/miner/app/src/compute_horde_miner/miner/management/commands/prepare_receipts.py index 140bb9c90..3c8580194 100644 --- a/miner/app/src/compute_horde_miner/miner/management/commands/prepare_receipts.py +++ b/miner/app/src/compute_horde_miner/miner/management/commands/prepare_receipts.py @@ -1,4 +1,3 @@ -from asgiref.sync import async_to_sync from django.core.management import BaseCommand from compute_horde_miner.miner.tasks import prepare_receipts @@ -6,4 +5,4 @@ class Command(BaseCommand): def handle(self, *args, **options): - async_to_sync(prepare_receipts)() + prepare_receipts() diff --git a/miner/app/src/compute_horde_miner/miner/miner_consumer/validator_interface.py b/miner/app/src/compute_horde_miner/miner/miner_consumer/validator_interface.py index f22834762..362674879 100644 --- a/miner/app/src/compute_horde_miner/miner/miner_consumer/validator_interface.py +++ b/miner/app/src/compute_horde_miner/miner/miner_consumer/validator_interface.py @@ -367,7 +367,7 @@ async def handle(self, msg: BaseValidatorRequest): time_took_us=msg.payload.time_took_us, score_str=msg.payload.score_str, ) - await prepare_receipts() + prepare_receipts.delay() async def _executor_ready(self, msg: ExecutorReady): job = await AcceptedJob.objects.aget(executor_token=msg.executor_token) diff --git a/miner/app/src/compute_horde_miner/miner/tasks.py b/miner/app/src/compute_horde_miner/miner/tasks.py index 2131d355e..b151aafe0 100644 --- a/miner/app/src/compute_horde_miner/miner/tasks.py +++ b/miner/app/src/compute_horde_miner/miner/tasks.py @@ -65,17 +65,19 @@ def fetch_validators(): ) -async def prepare_receipts(): +@app.task +def prepare_receipts(): receipts = [] + job_started_receipts = JobStartedReceipt.objects.order_by("time_accepted").filter( time_accepted__gt=now() - RECEIPTS_MAX_SERVED_PERIOD ) - receipts += [jr.to_receipt() async for jr in job_started_receipts] + receipts += [jr.to_receipt() for jr in job_started_receipts] job_finished_receipts = JobFinishedReceipt.objects.order_by("time_started").filter( time_started__gt=now() - RECEIPTS_MAX_SERVED_PERIOD ) - receipts += [jr.to_receipt() async for jr in job_finished_receipts] + receipts += [jr.to_receipt() for jr in job_finished_receipts] receipts_store.store(receipts) From 356bd1ad94eb265248b52ebd016e9d6fd127a8ef Mon Sep 17 00:00:00 2001 From: Andreea Popescu Date: Wed, 18 Sep 2024 12:21:47 +0100 Subject: [PATCH 37/53] fix locking --- .../src/compute_horde_validator/settings.py | 16 +++++++++ .../validator/locks.py | 2 +- .../validator/tasks.py | 34 ++++++++++++------- .../test_prompt_generation.py | 1 - 4 files changed, 39 insertions(+), 14 deletions(-) diff --git a/validator/app/src/compute_horde_validator/settings.py b/validator/app/src/compute_horde_validator/settings.py index 31c142b8f..9f70297a4 100644 --- a/validator/app/src/compute_horde_validator/settings.py +++ b/validator/app/src/compute_horde_validator/settings.py @@ -247,6 +247,22 @@ def wrapped(*args, **kwargs): ), str, ), + # TODO: move to executor config + "PROMPT_GENERATION_TIMEOUT": ( + 600, + "How many seconds to wait for prompt generation to finish", + int, + ), + "PROMPT_ANSWERING_TIMEOUT": ( + 600, + "How many seconds to wait for prompt answering to finish", + int, + ), + "PROMPT_SAMPLING_TIMEOUT": ( + 600, + "How many seconds to wait for prompt sampling to finish", + int, + ), } DYNAMIC_CONFIG_CACHE_TIMEOUT = 300 diff --git a/validator/app/src/compute_horde_validator/validator/locks.py b/validator/app/src/compute_horde_validator/validator/locks.py index 7eae5c4cc..36216223a 100644 --- a/validator/app/src/compute_horde_validator/validator/locks.py +++ b/validator/app/src/compute_horde_validator/validator/locks.py @@ -4,7 +4,7 @@ class LockType: WEIGHT_SETTING = 1 VALIDATION_SCHEDULING = 2 - ANSWERING_PROMPTS = 3 + TRUSTED_MINER_LOCK = 3 class Locked(Exception): diff --git a/validator/app/src/compute_horde_validator/validator/tasks.py b/validator/app/src/compute_horde_validator/validator/tasks.py index e1ca7d074..c674d9748 100644 --- a/validator/app/src/compute_horde_validator/validator/tasks.py +++ b/validator/app/src/compute_horde_validator/validator/tasks.py @@ -1036,31 +1036,41 @@ def fetch_dynamic_config() -> None: @app.task() -def llm_prompt_generation(): +def llm_prompt_generation(task_time_limit=config.PROMPT_GENERATION_TIMEOUT): num_expected_prompt_series = config.DYNAMIC_MAX_PROMPT_SERIES num_prompt_series = PromptSeries.objects.count() - if num_prompt_series < num_expected_prompt_series: - logger.info("There are %s series in the db, generating prompts", num_prompt_series) - async_to_sync(generate_prompts)() - else: + + if num_prompt_series >= num_expected_prompt_series: logger.warning( "There are %s series in the db - skipping prompt generation", num_prompt_series, ) + return + logger.info("There are %s series in the db, generating prompts", num_prompt_series) -@app.task() -def llm_prompt_answering(): with transaction.atomic(): try: - get_advisory_lock(LockType.ANSWERING_PROMPTS) + get_advisory_lock(LockType.TRUSTED_MINER_LOCK) except Locked: - logger.debug("Another thread already answering prompts") + logger.debug("Another thread already using the trusted miner") return - unprocessed_workloads = SolveWorkload.objects.filter(finished_at__isnull=True) + async_to_sync(generate_prompts)() + + +@app.task(task_time_limit=config.PROMPT_ANSWERING_TIMEOUT) +def llm_prompt_answering(): + unprocessed_workloads = SolveWorkload.objects.filter(finished_at__isnull=True) + + for workload in unprocessed_workloads: + with transaction.atomic(): + try: + get_advisory_lock(LockType.TRUSTED_MINER_LOCK) + except Locked: + logger.debug("Another thread already using the trusted miner") + return - for workload in unprocessed_workloads: async_to_sync(answer_prompts)(workload) @@ -1078,7 +1088,7 @@ def init_workload(seed: int) -> tuple[SolveWorkload, str]: return SolveWorkload(workload_uuid=workload_uuid, seed=seed, s3_url=s3_url), s3_upload_url -@app.task() +@app.task(task_time_limit=config.PROMPT_SAMPLING_TIMEOUT) def llm_prompt_sampling(): # generate new prompt samples if needed num_unused_prompt_samples = PromptSample.objects.filter(synthetic_job__isnull=True).count() diff --git a/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_generation.py b/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_generation.py index d324777b0..fc23b183d 100644 --- a/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_generation.py +++ b/validator/app/src/compute_horde_validator/validator/tests/test_cross_validation/test_prompt_generation.py @@ -3,7 +3,6 @@ import pytest from compute_horde.base.output_upload import MultiUpload -from compute_horde.miner_client.organic import OrganicJobError from compute_horde.mv_protocol.validator_requests import BaseValidatorRequest from compute_horde_validator.validator.cross_validation.prompt_generation import generate_prompts From a4850af8147f79992c34222a84fc0d2ceb28824a Mon Sep 17 00:00:00 2001 From: Enam Mijbah Noor Date: Thu, 19 Sep 2024 17:42:49 +0600 Subject: [PATCH 38/53] Specify executor class in answering job --- .../validator/cross_validation/prompt_answering.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_answering.py b/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_answering.py index 9bdde9265..feb8f9d4b 100644 --- a/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_answering.py +++ b/validator/app/src/compute_horde_validator/validator/cross_validation/prompt_answering.py @@ -4,6 +4,7 @@ import bittensor from asgiref.sync import sync_to_async +from compute_horde.executor_class import ExecutorClass from compute_horde.miner_client.organic import ( OrganicJobDetails, OrganicMinerClient, @@ -47,9 +48,13 @@ async def answer_prompts( job_generator = LlmPromptsJobGenerator(workload.s3_url, seed) await job_generator.ainit() + # TODO: Should be generated for all the llm executor classes. + # SolveWorkload/PromptSample should have a executor_class field saying which + # executor_class this sample is for. job_uuid = job_uuid or uuid.uuid4() job_details = OrganicJobDetails( job_uuid=str(job_uuid), + executor_class=ExecutorClass.always_on__llm__a6000, docker_image=job_generator.docker_image_name(), raw_script=job_generator.raw_script(), docker_run_options_preset=job_generator.docker_run_options_preset(), From b2b2aa8ff965856bfe92e559916439e2b09fb302 Mon Sep 17 00:00:00 2001 From: Enam Mijbah Noor Date: Tue, 24 Sep 2024 12:37:02 +0600 Subject: [PATCH 39/53] Use --quantize in PromptJobGenerator --- .../validator/cross_validation/generator/v0/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/validator/app/src/compute_horde_validator/validator/cross_validation/generator/v0/__init__.py b/validator/app/src/compute_horde_validator/validator/cross_validation/generator/v0/__init__.py index a34cdf566..ba81d037c 100644 --- a/validator/app/src/compute_horde_validator/validator/cross_validation/generator/v0/__init__.py +++ b/validator/app/src/compute_horde_validator/validator/cross_validation/generator/v0/__init__.py @@ -20,10 +20,9 @@ def executor_class(self) -> ExecutorClass: def docker_run_cmd(self) -> list[str]: return [ + "--quantize", "--model_name", settings.PROMPT_GENERATION_MODEL, - "--number_of_prompts_per_batch", - str(self.num_prompts_per_batch), "--uuids", str(",".join(map(str, self.batch_uuids))), ] From 86188d84cb5cbb368316f68867cfb4f3fa37b17c Mon Sep 17 00:00:00 2001 From: Enam Mijbah Noor Date: Tue, 24 Sep 2024 13:19:30 +0600 Subject: [PATCH 40/53] Fix sampling max seed --- validator/app/src/compute_horde_validator/validator/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validator/app/src/compute_horde_validator/validator/tasks.py b/validator/app/src/compute_horde_validator/validator/tasks.py index c674d9748..2ff2e9a88 100644 --- a/validator/app/src/compute_horde_validator/validator/tasks.py +++ b/validator/app/src/compute_horde_validator/validator/tasks.py @@ -70,7 +70,7 @@ logger = get_task_logger(__name__) JOB_WINDOW = 2 * 60 * 60 -MAX_SEED = (1 << 63) - 1 +MAX_SEED = (1 << 32) - 1 SCORING_ALGO_VERSION = 2 From 34cdf14b865faa5dffb4a4995ddaad889c00b93e Mon Sep 17 00:00:00 2001 From: Enam Mijbah Noor Date: Tue, 24 Sep 2024 13:21:16 +0600 Subject: [PATCH 41/53] Remove misconfigured dynamic config --- .../app/src/compute_horde_validator/settings.py | 16 ---------------- .../src/compute_horde_validator/validator/s3.py | 5 ++--- .../compute_horde_validator/validator/tasks.py | 6 +++--- 3 files changed, 5 insertions(+), 22 deletions(-) diff --git a/validator/app/src/compute_horde_validator/settings.py b/validator/app/src/compute_horde_validator/settings.py index 9f70297a4..31c142b8f 100644 --- a/validator/app/src/compute_horde_validator/settings.py +++ b/validator/app/src/compute_horde_validator/settings.py @@ -247,22 +247,6 @@ def wrapped(*args, **kwargs): ), str, ), - # TODO: move to executor config - "PROMPT_GENERATION_TIMEOUT": ( - 600, - "How many seconds to wait for prompt generation to finish", - int, - ), - "PROMPT_ANSWERING_TIMEOUT": ( - 600, - "How many seconds to wait for prompt answering to finish", - int, - ), - "PROMPT_SAMPLING_TIMEOUT": ( - 600, - "How many seconds to wait for prompt sampling to finish", - int, - ), } DYNAMIC_CONFIG_CACHE_TIMEOUT = 300 diff --git a/validator/app/src/compute_horde_validator/validator/s3.py b/validator/app/src/compute_horde_validator/validator/s3.py index 5aa23e8d7..6c99dbb97 100644 --- a/validator/app/src/compute_horde_validator/validator/s3.py +++ b/validator/app/src/compute_horde_validator/validator/s3.py @@ -1,6 +1,5 @@ import functools import logging -from collections.abc import Generator import boto3 import httpx @@ -55,7 +54,7 @@ def upload_prompts_to_s3_url(s3_url: str, content: str) -> bool: return True -def download_prompts_from_s3_url(s3_url: str) -> Generator[tuple[str, list[str]]]: +def download_prompts_from_s3_url(s3_url: str) -> list[str]: response = requests.get(s3_url) if response.status_code != 200: logger.warning(f"Failed to download prompts from {s3_url}") @@ -63,7 +62,7 @@ def download_prompts_from_s3_url(s3_url: str) -> Generator[tuple[str, list[str]] return response.text.split("\n") -async def download_file_content(s3_url: str): +async def download_file_content(s3_url: str) -> bytes: async with httpx.AsyncClient() as client: response = await client.get(s3_url, timeout=5) response.raise_for_status() diff --git a/validator/app/src/compute_horde_validator/validator/tasks.py b/validator/app/src/compute_horde_validator/validator/tasks.py index 2ff2e9a88..473c86b9a 100644 --- a/validator/app/src/compute_horde_validator/validator/tasks.py +++ b/validator/app/src/compute_horde_validator/validator/tasks.py @@ -1036,7 +1036,7 @@ def fetch_dynamic_config() -> None: @app.task() -def llm_prompt_generation(task_time_limit=config.PROMPT_GENERATION_TIMEOUT): +def llm_prompt_generation(): num_expected_prompt_series = config.DYNAMIC_MAX_PROMPT_SERIES num_prompt_series = PromptSeries.objects.count() @@ -1059,7 +1059,7 @@ def llm_prompt_generation(task_time_limit=config.PROMPT_GENERATION_TIMEOUT): async_to_sync(generate_prompts)() -@app.task(task_time_limit=config.PROMPT_ANSWERING_TIMEOUT) +@app.task() def llm_prompt_answering(): unprocessed_workloads = SolveWorkload.objects.filter(finished_at__isnull=True) @@ -1088,7 +1088,7 @@ def init_workload(seed: int) -> tuple[SolveWorkload, str]: return SolveWorkload(workload_uuid=workload_uuid, seed=seed, s3_url=s3_url), s3_upload_url -@app.task(task_time_limit=config.PROMPT_SAMPLING_TIMEOUT) +@app.task() def llm_prompt_sampling(): # generate new prompt samples if needed num_unused_prompt_samples = PromptSample.objects.filter(synthetic_job__isnull=True).count() From 64e75d89c36b4e18739c447e8880b713a72cac9f Mon Sep 17 00:00:00 2001 From: Enam Mijbah Noor Date: Tue, 24 Sep 2024 13:22:00 +0600 Subject: [PATCH 42/53] Save public url of workload answers --- validator/app/src/compute_horde_validator/validator/tasks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/validator/app/src/compute_horde_validator/validator/tasks.py b/validator/app/src/compute_horde_validator/validator/tasks.py index 473c86b9a..94f3b2397 100644 --- a/validator/app/src/compute_horde_validator/validator/tasks.py +++ b/validator/app/src/compute_horde_validator/validator/tasks.py @@ -52,8 +52,8 @@ from compute_horde_validator.validator.organic_jobs.miner_driver import execute_organic_job from compute_horde_validator.validator.s3 import ( download_prompts_from_s3_url, - generate_download_url, generate_upload_url, + get_public_url, upload_prompts_to_s3_url, ) from compute_horde_validator.validator.synthetic_jobs.batch_run import ( @@ -1081,7 +1081,7 @@ def init_workload(seed: int) -> tuple[SolveWorkload, str]: key=str(workload_uuid), bucket_name=settings.S3_BUCKET_NAME_ANSWERS ) # generate an s3 url to download workload prompts to be answered - s3_url = generate_download_url( + s3_url = get_public_url( key=str(workload_uuid), bucket_name=settings.S3_BUCKET_NAME_ANSWERS, ) From 986a4c70dd2bbcdc31b58bd87add85f473a334f5 Mon Sep 17 00:00:00 2001 From: Enam Mijbah Noor Date: Tue, 24 Sep 2024 14:36:11 +0600 Subject: [PATCH 43/53] Add --batch_size and --max_new_tokens to prompt generation job --- .../validator/cross_validation/generator/v0/__init__.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/validator/app/src/compute_horde_validator/validator/cross_validation/generator/v0/__init__.py b/validator/app/src/compute_horde_validator/validator/cross_validation/generator/v0/__init__.py index ba81d037c..ec47156c6 100644 --- a/validator/app/src/compute_horde_validator/validator/cross_validation/generator/v0/__init__.py +++ b/validator/app/src/compute_horde_validator/validator/cross_validation/generator/v0/__init__.py @@ -23,6 +23,10 @@ def docker_run_cmd(self) -> list[str]: "--quantize", "--model_name", settings.PROMPT_GENERATION_MODEL, + "--batch_size", + "256", # on A6000 we want 240 prompts generated in single file, but not all results are valid + "--max_new_tokens", + "40", # 40 new tokens is enough for reasonable length prompt - 30 caused too much cut off prompts "--uuids", str(",".join(map(str, self.batch_uuids))), ] From 6db59e783b3c4999c729794838dee1ea71fcea1c Mon Sep 17 00:00:00 2001 From: Enam Mijbah Noor Date: Tue, 24 Sep 2024 15:17:11 +0600 Subject: [PATCH 44/53] Fix empty prompt --- validator/app/src/compute_horde_validator/validator/s3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validator/app/src/compute_horde_validator/validator/s3.py b/validator/app/src/compute_horde_validator/validator/s3.py index 6c99dbb97..527c8c5b5 100644 --- a/validator/app/src/compute_horde_validator/validator/s3.py +++ b/validator/app/src/compute_horde_validator/validator/s3.py @@ -59,7 +59,7 @@ def download_prompts_from_s3_url(s3_url: str) -> list[str]: if response.status_code != 200: logger.warning(f"Failed to download prompts from {s3_url}") return [] - return response.text.split("\n") + return response.text.splitlines() async def download_file_content(s3_url: str) -> bytes: From 5e36ba07438d3445dedca2b26ac1a6d9b5276e5c Mon Sep 17 00:00:00 2001 From: Enam Mijbah Noor Date: Wed, 25 Sep 2024 04:03:08 +0600 Subject: [PATCH 45/53] Update prompt generation job params --- validator/app/src/compute_horde_validator/settings.py | 2 +- .../validator/cross_validation/generator/v0/__init__.py | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/validator/app/src/compute_horde_validator/settings.py b/validator/app/src/compute_horde_validator/settings.py index 31c142b8f..2507b1049 100644 --- a/validator/app/src/compute_horde_validator/settings.py +++ b/validator/app/src/compute_horde_validator/settings.py @@ -221,7 +221,7 @@ def wrapped(*args, **kwargs): ), # prompt generation params "DYNAMIC_PROMPTS_SERIES_IN_A_SINGLE_GENERATION": ( - 5, + 25, "Number of batches that prompt generator will process in a single go", int, ), diff --git a/validator/app/src/compute_horde_validator/validator/cross_validation/generator/v0/__init__.py b/validator/app/src/compute_horde_validator/validator/cross_validation/generator/v0/__init__.py index ec47156c6..04f197bdd 100644 --- a/validator/app/src/compute_horde_validator/validator/cross_validation/generator/v0/__init__.py +++ b/validator/app/src/compute_horde_validator/validator/cross_validation/generator/v0/__init__.py @@ -23,10 +23,11 @@ def docker_run_cmd(self) -> list[str]: "--quantize", "--model_name", settings.PROMPT_GENERATION_MODEL, - "--batch_size", - "256", # on A6000 we want 240 prompts generated in single file, but not all results are valid - "--max_new_tokens", - "40", # 40 new tokens is enough for reasonable length prompt - 30 caused too much cut off prompts + "--batch_size=250", # on A6000 we want 240 prompts generated in single file, but not all results are valid + "--num_return_sequences=1", + "--max_new_tokens=40", # 40 new tokens is enough for reasonable length prompt - 30 caused too much cut off prompts + "--number_of_prompts_per_batch", + str(self.num_prompts_per_batch), "--uuids", str(",".join(map(str, self.batch_uuids))), ] From 58df5d1eed6640e0f25f806f64642ad3b73616c3 Mon Sep 17 00:00:00 2001 From: Enam Mijbah Noor Date: Wed, 25 Sep 2024 04:16:19 +0600 Subject: [PATCH 46/53] Update prompt generation job timeout --- .../validator/cross_validation/generator/v0/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validator/app/src/compute_horde_validator/validator/cross_validation/generator/v0/__init__.py b/validator/app/src/compute_horde_validator/validator/cross_validation/generator/v0/__init__.py index 04f197bdd..af1bb907d 100644 --- a/validator/app/src/compute_horde_validator/validator/cross_validation/generator/v0/__init__.py +++ b/validator/app/src/compute_horde_validator/validator/cross_validation/generator/v0/__init__.py @@ -10,7 +10,7 @@ def generator_version(self) -> int: return 0 def timeout_seconds(self) -> int: - return 3600 + return 5 * 60 def docker_image_name(self) -> str: return f"backenddevelopersltd/compute-horde-prompt-gen-{settings.PROMPT_GENERATION_MODEL}:v0-latest" From 05f7c2bbefff4735f5c85e945718f201332569db Mon Sep 17 00:00:00 2001 From: Enam Mijbah Noor Date: Wed, 25 Sep 2024 13:55:53 +0600 Subject: [PATCH 47/53] Fix test_download_prompts_from_s3_url --- .../app/src/compute_horde_validator/validator/tests/test_s3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validator/app/src/compute_horde_validator/validator/tests/test_s3.py b/validator/app/src/compute_horde_validator/validator/tests/test_s3.py index e99c08256..8be2ea9a0 100644 --- a/validator/app/src/compute_horde_validator/validator/tests/test_s3.py +++ b/validator/app/src/compute_horde_validator/validator/tests/test_s3.py @@ -90,7 +90,7 @@ def test_get_public_url( [ (200, "prompt1\nprompt2\nprompt3", ["prompt1", "prompt2", "prompt3"]), (200, "single_prompt", ["single_prompt"]), - (200, "", [""]), + (200, "", []), (404, "Not Found", []), ], ) From 17efa155926b178287d917f3bc5d6908500e17a8 Mon Sep 17 00:00:00 2001 From: Enam Mijbah Noor Date: Thu, 26 Sep 2024 04:17:11 +0600 Subject: [PATCH 48/53] increase spin up time for llm executor class --- compute_horde/compute_horde/executor_class.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compute_horde/compute_horde/executor_class.py b/compute_horde/compute_horde/executor_class.py index 4b1372f5c..1e61f07fa 100644 --- a/compute_horde/compute_horde/executor_class.py +++ b/compute_horde/compute_horde/executor_class.py @@ -44,7 +44,7 @@ class ExecutorClassSpec: description="always on, NVIDIA RTX A6000 GPU machine for LLM prompts solving", has_gpu=True, gpu_vram_gb=48, - spin_up_time=0, + spin_up_time=int(timedelta(minutes=1).total_seconds()), ), # ExecutorClass.always_on__cpu_16c__ram_64gb: ExecutorClassSpec( # cpu_cores=16, From 96ba420b850f9b63db7871425e339a2ee425e1a4 Mon Sep 17 00:00:00 2001 From: Michal Zukowski Date: Wed, 25 Sep 2024 14:28:48 +0200 Subject: [PATCH 49/53] Tweak llm tasks scheduling and constraints for flawless operations --- .../app/src/compute_horde_validator/celery.py | 4 +++ .../src/compute_horde_validator/settings.py | 30 ++++++++--------- .../validator/tasks.py | 32 +++++++++++++++++-- 3 files changed, 49 insertions(+), 17 deletions(-) diff --git a/validator/app/src/compute_horde_validator/celery.py b/validator/app/src/compute_horde_validator/celery.py index cff46a714..fb0aa1406 100644 --- a/validator/app/src/compute_horde_validator/celery.py +++ b/validator/app/src/compute_horde_validator/celery.py @@ -19,6 +19,10 @@ def route_task(name, args, kwargs, options, task=None, **kw): "compute_horde_validator.validator.tasks.fetch_receipts_from_miner", "compute_horde_validator.validator.tasks.send_events_to_facilitator", "compute_horde_validator.validator.tasks.fetch_dynamic_config", + # TODO: llm tasks should have dedicated workers, but just move them from default queue for now + "compute_horde_validator.validator.tasks.llm_prompt_generation", + "compute_horde_validator.validator.tasks.llm_prompt_sampling", + "compute_horde_validator.validator.tasks.llm_prompt_answering", } if name in worker_queue_names: return {"queue": "worker"} diff --git a/validator/app/src/compute_horde_validator/settings.py b/validator/app/src/compute_horde_validator/settings.py index ce8daf6a1..4aa571cc6 100644 --- a/validator/app/src/compute_horde_validator/settings.py +++ b/validator/app/src/compute_horde_validator/settings.py @@ -428,21 +428,21 @@ def wrapped(*args, **kwargs): "schedule": timedelta(minutes=5), "options": {}, }, - # "llm_prompt_generation": { - # "task": "compute_horde_validator.validator.tasks.llm_prompt_generation", - # "schedule": timedelta(minutes=10), - # "options": {}, - # }, - # "llm_prompt_sampling": { - # "task": "compute_horde_validator.validator.tasks.llm_prompt_sampling", - # "schedule": timedelta(minutes=10), - # "options": {}, - # }, - # "llm_prompt_answering": { - # "task": "compute_horde_validator.validator.tasks.llm_prompt_answering", - # "schedule": timedelta(minutes=10), - # "options": {}, - # }, + "llm_prompt_generation": { + "task": "compute_horde_validator.validator.tasks.llm_prompt_generation", + "schedule": timedelta(minutes=5), + "options": {}, + }, + "llm_prompt_sampling": { + "task": "compute_horde_validator.validator.tasks.llm_prompt_sampling", + "schedule": timedelta(minutes=30), + "options": {}, + }, + "llm_prompt_answering": { + "task": "compute_horde_validator.validator.tasks.llm_prompt_answering", + "schedule": timedelta(minutes=5), + "options": {}, + }, } if env.bool("DEBUG_RUN_BEAT_VERY_OFTEN", default=False): CELERY_BEAT_SCHEDULE["run_synthetic_jobs"]["schedule"] = crontab(minute="*") diff --git a/validator/app/src/compute_horde_validator/validator/tasks.py b/validator/app/src/compute_horde_validator/validator/tasks.py index d04264907..e8a886dda 100644 --- a/validator/app/src/compute_horde_validator/validator/tasks.py +++ b/validator/app/src/compute_horde_validator/validator/tasks.py @@ -1133,8 +1133,17 @@ def fetch_dynamic_config() -> None: ) -@app.task() +@app.task( + soft_time_limit=4 * 60 + 40, + time_limit=5 * 60, +) def llm_prompt_generation(): + unprocessed_workloads = SolveWorkload.objects.filter(finished_at__isnull=True).count() + if unprocessed_workloads > 0: + # prevent any starvation issues + logger.info("Uprocessed workloads found - skipping prompt generation") + return + num_expected_prompt_series = config.DYNAMIC_MAX_PROMPT_SERIES num_prompt_series = PromptSeries.objects.count() @@ -1157,11 +1166,16 @@ def llm_prompt_generation(): async_to_sync(generate_prompts)() -@app.task() +@app.task( + soft_time_limit=4 * 60 + 40, + time_limit=5 * 60, +) def llm_prompt_answering(): unprocessed_workloads = SolveWorkload.objects.filter(finished_at__isnull=True) + times = [] for workload in unprocessed_workloads: + start = time.time() with transaction.atomic(): try: get_advisory_lock(LockType.TRUSTED_MINER_LOCK) @@ -1170,6 +1184,11 @@ def llm_prompt_answering(): return async_to_sync(answer_prompts)(workload) + times.append(time.time() - start) + total_time = sum(time) + avg_time = total_time / len(times) + if total_time + avg_time > 4 * 60 + 20: + return def init_workload(seed: int) -> tuple[SolveWorkload, str]: @@ -1189,6 +1208,15 @@ def init_workload(seed: int) -> tuple[SolveWorkload, str]: @app.task() def llm_prompt_sampling(): # generate new prompt samples if needed + + num_prompt_series = PromptSeries.objects.count() + required_series_to_start_sampling = min(config.DYNAMIC_TARGET_NUMBER_OF_PROMPT_SAMPLES_READY * 2, config.DYNAMIC_MAX_PROMPT_SERIES) + if num_prompt_series < required_series_to_start_sampling: + logger.warning( + "There are %s series in the db - expected %s for start sampling - skipping prompt sampling", + num_prompt_series, required_series_to_start_sampling + ) + return num_unused_prompt_samples = PromptSample.objects.filter(synthetic_job__isnull=True).count() num_needed_prompt_samples = ( config.DYNAMIC_TARGET_NUMBER_OF_PROMPT_SAMPLES_READY - num_unused_prompt_samples From 75b764d4f4309e3ed0e76ce6d73cabf41e4a7b8c Mon Sep 17 00:00:00 2001 From: Michal Zukowski Date: Thu, 26 Sep 2024 10:01:32 +0200 Subject: [PATCH 50/53] Fix stupid bug --- validator/app/src/compute_horde_validator/validator/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validator/app/src/compute_horde_validator/validator/tasks.py b/validator/app/src/compute_horde_validator/validator/tasks.py index e8a886dda..328975876 100644 --- a/validator/app/src/compute_horde_validator/validator/tasks.py +++ b/validator/app/src/compute_horde_validator/validator/tasks.py @@ -1185,7 +1185,7 @@ def llm_prompt_answering(): async_to_sync(answer_prompts)(workload) times.append(time.time() - start) - total_time = sum(time) + total_time = sum(times) avg_time = total_time / len(times) if total_time + avg_time > 4 * 60 + 20: return From 5e7b1be0b027cad979fa44063ac5cb4d4272095a Mon Sep 17 00:00:00 2001 From: Michal Zukowski Date: Thu, 26 Sep 2024 14:16:28 +0200 Subject: [PATCH 51/53] Fix formatting --- .../app/src/compute_horde_validator/validator/tasks.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/validator/app/src/compute_horde_validator/validator/tasks.py b/validator/app/src/compute_horde_validator/validator/tasks.py index 328975876..36f36cd77 100644 --- a/validator/app/src/compute_horde_validator/validator/tasks.py +++ b/validator/app/src/compute_horde_validator/validator/tasks.py @@ -1210,11 +1210,14 @@ def llm_prompt_sampling(): # generate new prompt samples if needed num_prompt_series = PromptSeries.objects.count() - required_series_to_start_sampling = min(config.DYNAMIC_TARGET_NUMBER_OF_PROMPT_SAMPLES_READY * 2, config.DYNAMIC_MAX_PROMPT_SERIES) + required_series_to_start_sampling = min( + config.DYNAMIC_TARGET_NUMBER_OF_PROMPT_SAMPLES_READY * 2, config.DYNAMIC_MAX_PROMPT_SERIES + ) if num_prompt_series < required_series_to_start_sampling: logger.warning( "There are %s series in the db - expected %s for start sampling - skipping prompt sampling", - num_prompt_series, required_series_to_start_sampling + num_prompt_series, + required_series_to_start_sampling, ) return num_unused_prompt_samples = PromptSample.objects.filter(synthetic_job__isnull=True).count() From 04ede48d96f60e4d838831c7e3f9f960770ddff1 Mon Sep 17 00:00:00 2001 From: Michal Zukowski Date: Fri, 27 Sep 2024 09:52:07 +0200 Subject: [PATCH 52/53] Fix tests --- .../validator/tests/test_llm_tasks.py | 35 ++----------------- 1 file changed, 2 insertions(+), 33 deletions(-) diff --git a/validator/app/src/compute_horde_validator/validator/tests/test_llm_tasks.py b/validator/app/src/compute_horde_validator/validator/tests/test_llm_tasks.py index 2302b48e4..29d4a5e98 100644 --- a/validator/app/src/compute_horde_validator/validator/tests/test_llm_tasks.py +++ b/validator/app/src/compute_horde_validator/validator/tests/test_llm_tasks.py @@ -20,6 +20,7 @@ def create_prompt_series(num: int): @pytest.mark.override_config(DYNAMIC_TARGET_NUMBER_OF_PROMPT_SAMPLES_READY=5) @pytest.mark.django_db(transaction=True) def test_llm_prompt_sampling__will_not_trigger(): + create_prompt_series(10) prompt_series = PromptSeries.objects.create(s3_url="", generator_version=1) for i in range(5): workload = SolveWorkload.objects.create(seed=i, s3_url="s3://test") @@ -77,38 +78,6 @@ def test_llm_prompt_sampling__success(): assert Prompt.objects.count() == 60 -@pytest.mark.override_config( - DYNAMIC_TARGET_NUMBER_OF_PROMPT_SAMPLES_READY=11, - DYNAMIC_NUMBER_OF_PROMPTS_TO_SAMPLE_FROM_SERIES=10, - DYNAMIC_NUMBER_OF_PROMPTS_PER_WORKLOAD=20, -) -@pytest.mark.django_db(transaction=True) -@patch("compute_horde_validator.validator.tasks.upload_prompts_to_s3_url", lambda *args: True) -@patch( - "compute_horde_validator.validator.tasks.download_prompts_from_s3_url", - lambda *args: ["test" for _ in range(240)], -) -def test_llm_prompt_sampling__not_enough_prompt_series(): - create_prompt_series(5) - llm_prompt_sampling() - assert SolveWorkload.objects.count() == 2 - assert PromptSample.objects.count() == 4 - assert Prompt.objects.count() == 40 - llm_prompt_sampling() - assert SolveWorkload.objects.count() == 4 - assert PromptSample.objects.count() == 8 - assert Prompt.objects.count() == 80 - llm_prompt_sampling() - assert SolveWorkload.objects.count() == 6 - assert PromptSample.objects.count() == 12 - assert Prompt.objects.count() == 120 - # will not sample more - llm_prompt_sampling() - assert SolveWorkload.objects.count() == 6 - assert PromptSample.objects.count() == 12 - assert Prompt.objects.count() == 120 - - @pytest.mark.override_config( DYNAMIC_TARGET_NUMBER_OF_PROMPT_SAMPLES_READY=4, DYNAMIC_NUMBER_OF_PROMPTS_TO_SAMPLE_FROM_SERIES=100, @@ -121,7 +90,7 @@ def test_llm_prompt_sampling__not_enough_prompt_series(): lambda *args: ["test" for _ in range(240)], ) def test_llm_prompt_sampling__one_sample_per_workload(): - create_prompt_series(4) + create_prompt_series(8) llm_prompt_sampling() assert SolveWorkload.objects.count() == 4 assert PromptSample.objects.count() == 4 From dea90fe3fd94c47540d7293559bc7dbd1d7f870c Mon Sep 17 00:00:00 2001 From: Michal Zukowski Date: Fri, 27 Sep 2024 23:27:06 +0200 Subject: [PATCH 53/53] Set llm synthethic jobs params for production setup --- .../src/compute_horde_validator/settings.py | 15 ++++++-- .../validator/dynamic_config.py | 36 +++++++++++++++---- .../validator/scoring.py | 12 +++---- .../validator/synthetic_jobs/batch_run.py | 9 ++++- .../synthetic_jobs/generator/llm_prompts.py | 3 +- .../validator/tasks.py | 4 +-- .../validator/tests/test_scoring.py | 27 ++++++-------- .../validator/tests/test_set_scores.py | 1 + 8 files changed, 69 insertions(+), 38 deletions(-) diff --git a/validator/app/src/compute_horde_validator/settings.py b/validator/app/src/compute_horde_validator/settings.py index 4aa571cc6..523b412cd 100644 --- a/validator/app/src/compute_horde_validator/settings.py +++ b/validator/app/src/compute_horde_validator/settings.py @@ -220,12 +220,12 @@ def wrapped(*args, **kwargs): ), # llama params "DYNAMIC_MAX_PROMPT_SERIES": ( - 10000, + 3500, "Maximum number of prompt series upon which the prompt generator will not be triggered", int, ), "DYNAMIC_TARGET_NUMBER_OF_PROMPT_SAMPLES_READY": ( - 250, + 1536, # 256 * 2 * 3 - we allow 2 executors per miner and want queue for 3 synthetic job batches "how many prompt samples to generate (should be larger than how many prompts series we use per synthetic run)", int, ), @@ -262,6 +262,17 @@ def wrapped(*args, **kwargs): ), str, ), + "DYNAMIC_EXECUTOR_CLASS_WEIGHTS": ( + "spin_up-4min.gpu-24gb=99,always_on.llm.a6000=1", + ( + "Weights of executor classes that are used to normalize miners scores. " + "Executor classes not mentioned here are not taken into account when scoring. " + "The format should be: 'key1=value1,key2=value2', " + "where the keys are executor class enum values, and the values are floats, " + "but int values that sum up to 100 are encouraged" + ), + str, + ), } DYNAMIC_CONFIG_CACHE_TIMEOUT = 300 diff --git a/validator/app/src/compute_horde_validator/validator/dynamic_config.py b/validator/app/src/compute_horde_validator/validator/dynamic_config.py index f9aa35d7d..110947a18 100644 --- a/validator/app/src/compute_horde_validator/validator/dynamic_config.py +++ b/validator/app/src/compute_horde_validator/validator/dynamic_config.py @@ -1,6 +1,8 @@ import asyncio import time +from collections.abc import Callable from contextlib import suppress +from typing import Any import constance.utils from asgiref.sync import sync_to_async @@ -46,17 +48,37 @@ def get_synthetic_jobs_flow_version(): return config.DYNAMIC_SYNTHETIC_JOBS_FLOW_VERSION -async def get_miner_max_executors_per_class() -> dict[ExecutorClass, int]: - miner_max_executors_per_class: str = await aget_config("DYNAMIC_MINER_MAX_EXECUTORS_PER_CLASS") +def executor_class_value_map_parser( + value_map_str: str, value_parser: Callable[[str], Any] | None = None +) -> dict[ExecutorClass, Any]: result = {} - for pair in miner_max_executors_per_class.split(","): + for pair in value_map_str.split(","): # ignore errors for misconfiguration, i,e. non-existent executor classes, # non-integer/negative counts etc. with suppress(ValueError): - executor_class_str, count_str = pair.split("=") + executor_class_str, value_str = pair.split("=") executor_class = ExecutorClass(executor_class_str) - count = int(count_str) - if count >= 0: - result[executor_class] = count + if value_parser is not None: + parsed_value = value_parser(value_str) + else: + parsed_value = value_str + result[executor_class] = parsed_value + return result + +async def get_miner_max_executors_per_class() -> dict[ExecutorClass, int]: + miner_max_executors_per_class: str = await aget_config("DYNAMIC_MINER_MAX_EXECUTORS_PER_CLASS") + result = { + executor_class: count + for executor_class, count in executor_class_value_map_parser( + miner_max_executors_per_class, value_parser=int + ).items() + if count >= 0 + } return result + + +def get_executor_class_weights() -> dict[ExecutorClass, float]: + return executor_class_value_map_parser( + config.DYNAMIC_EXECUTOR_CLASS_WEIGHTS, value_parser=float + ) diff --git a/validator/app/src/compute_horde_validator/validator/scoring.py b/validator/app/src/compute_horde_validator/validator/scoring.py index 5c3b6df56..627f2280b 100644 --- a/validator/app/src/compute_horde_validator/validator/scoring.py +++ b/validator/app/src/compute_horde_validator/validator/scoring.py @@ -6,12 +6,9 @@ from compute_horde.executor_class import ExecutorClass from django.conf import settings -logger = logging.getLogger(__name__) - +from .dynamic_config import get_executor_class_weights -EXECUTOR_CLASS_WEIGHTS = { - ExecutorClass.spin_up_4min__gpu_24gb: 100, -} +logger = logging.getLogger(__name__) def normalize(scores, weight=1): @@ -64,9 +61,10 @@ def score_jobs(jobs, score_aggregation=sum, normalization_weight=1): def score_batch(batch): + executor_class_weights = get_executor_class_weights() executor_class_jobs = defaultdict(list) for job in batch.synthetic_jobs.all(): - if job.executor_class in EXECUTOR_CLASS_WEIGHTS: + if job.executor_class in executor_class_weights: executor_class_jobs[job.executor_class].append(job) parametriezed_horde_score = partial( @@ -80,7 +78,7 @@ def score_batch(batch): ) batch_scores = defaultdict(float) for executor_class, jobs in executor_class_jobs.items(): - executor_class_weight = EXECUTOR_CLASS_WEIGHTS[executor_class] + executor_class_weight = executor_class_weights[executor_class] if executor_class == ExecutorClass.spin_up_4min__gpu_24gb: score_aggregation = parametriezed_horde_score else: diff --git a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py index 823016d33..be87fad6e 100644 --- a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py +++ b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/batch_run.py @@ -808,7 +808,14 @@ async def _generate_jobs(ctx: BatchContext) -> None: if prompt_samples_iter is None: logger.warning("No llm prompt samples available, skipping llm job") continue - prompt_sample = next(prompt_samples_iter) + prompt_sample = next(prompt_samples_iter, None) + if prompt_sample is None: + # it means that there is some bug - we want to see it in sentry + # and continue, so other executor classes are not affected + logger.error( + "Dried prompt_samples_iter, this should not happen, skipping llm job" + ) + continue kwargs = { "prompt_sample": prompt_sample, "expected_prompts": list(prompt_sample.prompts.all()), diff --git a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llm_prompts.py b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llm_prompts.py index b81089d1f..0a29c382c 100644 --- a/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llm_prompts.py +++ b/validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/llm_prompts.py @@ -49,8 +49,7 @@ def _url_for_download(self) -> str: ) def timeout_seconds(self) -> int: - # TODO: ??? - return 80 + return 48 # it takes around 42s - we add 15% buffer def base_docker_image_name(self) -> str: return "docker.io/backenddevelopersltd/compute-horde-prompt-solver:v0-latest" diff --git a/validator/app/src/compute_horde_validator/validator/tasks.py b/validator/app/src/compute_horde_validator/validator/tasks.py index 36f36cd77..ce5ca15c5 100644 --- a/validator/app/src/compute_horde_validator/validator/tasks.py +++ b/validator/app/src/compute_horde_validator/validator/tasks.py @@ -1134,7 +1134,7 @@ def fetch_dynamic_config() -> None: @app.task( - soft_time_limit=4 * 60 + 40, + soft_time_limit=4 * 60 + 50, time_limit=5 * 60, ) def llm_prompt_generation(): @@ -1167,7 +1167,7 @@ def llm_prompt_generation(): @app.task( - soft_time_limit=4 * 60 + 40, + soft_time_limit=4 * 60 + 50, time_limit=5 * 60, ) def llm_prompt_answering(): diff --git a/validator/app/src/compute_horde_validator/validator/tests/test_scoring.py b/validator/app/src/compute_horde_validator/validator/tests/test_scoring.py index 3867146dc..d11a68c18 100644 --- a/validator/app/src/compute_horde_validator/validator/tests/test_scoring.py +++ b/validator/app/src/compute_horde_validator/validator/tests/test_scoring.py @@ -1,5 +1,4 @@ from datetime import timedelta -from unittest.mock import patch import pytest from django.utils import timezone @@ -7,6 +6,8 @@ from compute_horde_validator.validator.models import Miner, SyntheticJob, SyntheticJobBatch from compute_horde_validator.validator.scoring import ExecutorClass, score_batches +EXECUTOR_CLASS_WEIGHTS_OVERRIDE = "spin_up-4min.gpu-24gb=8,always_on.gpu-24gb=2" + @pytest.fixture def setup_data(): @@ -72,18 +73,9 @@ def setup_data(): return batch -@pytest.fixture -def mocked_executor_class_weights(): - mocked_weights = { - ExecutorClass.spin_up_4min__gpu_24gb: 8, - ExecutorClass.always_on__gpu_24gb: 2, - } - with patch("compute_horde_validator.validator.scoring.EXECUTOR_CLASS_WEIGHTS", mocked_weights): - yield mocked_weights - - +@pytest.mark.override_config(DYNAMIC_EXECUTOR_CLASS_WEIGHTS=EXECUTOR_CLASS_WEIGHTS_OVERRIDE) @pytest.mark.django_db(databases=["default", "default_alias"], transaction=True) -def test_score_batches_basic(setup_data, mocked_executor_class_weights): +def test_score_batches_basic(setup_data): batch = setup_data scores = score_batches([batch]) @@ -101,8 +93,9 @@ def test_score_batches_basic(setup_data, mocked_executor_class_weights): assert scores["hotkey1"] > scores["hotkey4"] +@pytest.mark.override_config(DYNAMIC_EXECUTOR_CLASS_WEIGHTS=EXECUTOR_CLASS_WEIGHTS_OVERRIDE) @pytest.mark.django_db(databases=["default", "default_alias"], transaction=True) -def test_score_batches_with_changed_params_avg(setup_data, settings, mocked_executor_class_weights): +def test_score_batches_with_changed_params_avg(setup_data, settings): batch = setup_data settings.HORDE_SCORE_AVG_PARAM = ( @@ -117,10 +110,9 @@ def test_score_batches_with_changed_params_avg(setup_data, settings, mocked_exec assert changed_scores["hotkey2"] > changed_scores["hotkey4"] +@pytest.mark.override_config(DYNAMIC_EXECUTOR_CLASS_WEIGHTS=EXECUTOR_CLASS_WEIGHTS_OVERRIDE) @pytest.mark.django_db(databases=["default", "default_alias"], transaction=True) -def test_score_batches_with_changed_params_horde_size( - setup_data, settings, mocked_executor_class_weights -): +def test_score_batches_with_changed_params_horde_size(setup_data, settings): batch = setup_data settings.HORDE_SCORE_SIZE_PARAM = 1.75 @@ -133,8 +125,9 @@ def test_score_batches_with_changed_params_horde_size( assert changed_scores["hotkey4"] > changed_scores["hotkey3"] +@pytest.mark.override_config(DYNAMIC_EXECUTOR_CLASS_WEIGHTS=EXECUTOR_CLASS_WEIGHTS_OVERRIDE) @pytest.mark.django_db(databases=["default", "default_alias"], transaction=True) -def test_score_batches_executor_classes_weights(mocked_executor_class_weights): +def test_score_batches_executor_classes_weights(): miner1 = Miner.objects.create(hotkey="hotkey1") miner2 = Miner.objects.create(hotkey="hotkey2") diff --git a/validator/app/src/compute_horde_validator/validator/tests/test_set_scores.py b/validator/app/src/compute_horde_validator/validator/tests/test_set_scores.py index a664953b2..d30d937e9 100644 --- a/validator/app/src/compute_horde_validator/validator/tests/test_set_scores.py +++ b/validator/app/src/compute_horde_validator/validator/tests/test_set_scores.py @@ -97,6 +97,7 @@ def test_set_scores__too_early(settings): assert SystemEvent.objects.using(settings.DEFAULT_DB_ALIAS).count() == 0 +@pytest.mark.override_config(DYNAMIC_EXECUTOR_CLASS_WEIGHTS="spin_up-4min.gpu-24gb=100") @pytest.mark.django_db(databases=["default", "default_alias"], transaction=True) @patch_constance({"DYNAMIC_COMMIT_REVEAL_WEIGHTS_ENABLED": False}) def test_set_scores__set_weight_success(settings):