diff --git a/jlama-cli/pom.xml b/jlama-cli/pom.xml index a416568..de755e9 100644 --- a/jlama-cli/pom.xml +++ b/jlama-cli/pom.xml @@ -32,40 +32,6 @@ JColor 5.5.1 - - org.jboss.resteasy - resteasy-jaxrs - ${resteasy.version} - - - org.jboss.resteasy - resteasy-jackson2-provider - ${resteasy.version} - - - com.fasterxml.jackson.core - jackson-core - - - com.fasterxml.jackson.core - jackson-databind - - - com.fasterxml.jackson.core - jackson-annotations - - - - - org.jboss.resteasy - resteasy-undertow - ${resteasy.version} - - - me.dinowernli - java-grpc-prometheus - 0.6.0 - com.github.tjake jlama-core diff --git a/jlama-cli/src/main/java/com/github/tjake/jlama/cli/JlamaCli.java b/jlama-cli/src/main/java/com/github/tjake/jlama/cli/JlamaCli.java index eec802c..a53d9bd 100644 --- a/jlama-cli/src/main/java/com/github/tjake/jlama/cli/JlamaCli.java +++ b/jlama-cli/src/main/java/com/github/tjake/jlama/cli/JlamaCli.java @@ -45,7 +45,7 @@ public static void main(String[] args) { cli.addSubcommand("quantize", new QuantizeCommand()); cli.addSubcommand("chat", new ChatCommand()); cli.addSubcommand("complete", new CompleteCommand()); - cli.addSubcommand("serve", new ServeCommand()); + cli.addSubcommand("restapi", new ApiServiceCommand()); cli.addSubcommand("cluster-coordinator", new ClusterCoordinatorCommand()); cli.addSubcommand("cluster-worker", new ClusterWorkerCommand()); diff --git a/jlama-cli/src/main/java/com/github/tjake/jlama/cli/commands/ApiServiceCommand.java b/jlama-cli/src/main/java/com/github/tjake/jlama/cli/commands/ApiServiceCommand.java new file mode 100644 index 0000000..deff599 --- /dev/null +++ b/jlama-cli/src/main/java/com/github/tjake/jlama/cli/commands/ApiServiceCommand.java @@ -0,0 +1,87 @@ +/* + * Copyright 2024 T Jake Luciani + * + * The Jlama Project licenses this file to you under the Apache License, + * version 2.0 (the "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package com.github.tjake.jlama.cli.commands; + +import com.github.tjake.jlama.model.AbstractModel; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.boot.SpringBootConfiguration; +import org.springframework.boot.autoconfigure.SpringBootApplication; + +import org.springframework.boot.builder.SpringApplicationBuilder; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.web.servlet.config.annotation.ResourceHandlerRegistry; +import org.springframework.web.servlet.config.annotation.WebMvcConfigurer; +import picocli.CommandLine; + +import java.util.Optional; + +import static com.github.tjake.jlama.model.ModelSupport.loadModel; + +@CommandLine.Command(name = "restapi", description = "Starts a openai compatible rest api for interacting with this model") +@SpringBootApplication(scanBasePackages = {"com.github.tjake.jlama.net.openai", "com.github.tjake.jlama.cli.commands"}) +@SpringBootConfiguration +@Configuration +public class ApiServiceCommand extends BaseCommand implements WebMvcConfigurer { + private static final Logger logger = LoggerFactory.getLogger(ApiServiceCommand.class); + + @CommandLine.Option( + names = {"-p", "--port"}, + description = "http port (default: ${DEFAULT-VALUE})", + defaultValue = "8080") + int port = 8080; + + static volatile AbstractModel m; + + @Bean + public AbstractModel getModelBean() { + logger.info("Here! {}", m); + return m; + } + + @Override + public void addResourceHandlers(ResourceHandlerRegistry registry) { + registry.addResourceHandler("/ui/**") + .addResourceLocations("/resources/"); + } + + @Override + public void run() { + try { + m = loadModel( + model, + workingDirectory, + workingMemoryType, + workingQuantizationType, + Optional.ofNullable(modelQuantization), + Optional.ofNullable(threadCount)); + + logger.info("m = {}", m); + + System.out.println("Chat UI: http://localhost:" + port + "/ui/index.html"); + + new SpringApplicationBuilder(ApiServiceCommand.class) + .lazyInitialization(true) + .properties("server.port", ""+port, "logging.level.org.springframework.web", "debug") + .build() + .run(); + } catch (Exception e) { + e.printStackTrace(); + System.exit(2); + } + } +} diff --git a/jlama-cli/src/main/java/com/github/tjake/jlama/cli/commands/ClusterCoordinatorCommand.java b/jlama-cli/src/main/java/com/github/tjake/jlama/cli/commands/ClusterCoordinatorCommand.java index 346fa72..dc58458 100644 --- a/jlama-cli/src/main/java/com/github/tjake/jlama/cli/commands/ClusterCoordinatorCommand.java +++ b/jlama-cli/src/main/java/com/github/tjake/jlama/cli/commands/ClusterCoordinatorCommand.java @@ -15,14 +15,9 @@ */ package com.github.tjake.jlama.cli.commands; -import static com.github.tjake.jlama.cli.commands.ServeCommand.APPLICATION_PATH; -import static io.undertow.Handlers.resource; -import com.github.tjake.jlama.cli.serve.JlamaRestApi; + import com.github.tjake.jlama.net.Coordinator; -import io.undertow.Undertow; -import io.undertow.server.handlers.resource.ClassPathResourceManager; -import org.jboss.resteasy.plugins.server.undertow.UndertowJaxrsServer; import picocli.CommandLine; @CommandLine.Command( @@ -62,7 +57,7 @@ public void run() { }) .start(); - UndertowJaxrsServer ut = new UndertowJaxrsServer(); + /*UndertowJaxrsServer ut = new UndertowJaxrsServer(); ut.deploy(new JlamaRestApi(c), APPLICATION_PATH); ut.addResourcePrefixPath( "/ui", @@ -71,7 +66,7 @@ public void run() { .addWelcomeFiles("index.html")); System.out.println("Chat UI: http://localhost:" + port + "/ui/index.html"); - ut.start(Undertow.builder().addHttpListener(port, "0.0.0.0")); + ut.start(Undertow.builder().addHttpListener(port, "0.0.0.0"));*/ } catch (Exception e) { e.printStackTrace(); diff --git a/jlama-cli/src/main/java/com/github/tjake/jlama/cli/commands/ServeCommand.java b/jlama-cli/src/main/java/com/github/tjake/jlama/cli/commands/ServeCommand.java deleted file mode 100644 index 2a887be..0000000 --- a/jlama-cli/src/main/java/com/github/tjake/jlama/cli/commands/ServeCommand.java +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright 2024 T Jake Luciani - * - * The Jlama Project licenses this file to you under the Apache License, - * version 2.0 (the "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - */ -package com.github.tjake.jlama.cli.commands; - -import static com.github.tjake.jlama.model.ModelSupport.loadModel; -import static io.undertow.Handlers.resource; - -import com.github.tjake.jlama.cli.serve.JlamaRestApi; -import com.github.tjake.jlama.model.AbstractModel; -import io.undertow.Undertow; -import io.undertow.server.handlers.resource.ClassPathResourceManager; -import java.util.Optional; -import org.jboss.resteasy.plugins.server.undertow.UndertowJaxrsServer; -import picocli.CommandLine; - -@CommandLine.Command(name = "serve", description = "Starts a rest api for interacting with this model") -public class ServeCommand extends BaseCommand { - - @CommandLine.Option( - names = {"-p", "--port"}, - description = "http port (default: ${DEFAULT-VALUE})", - defaultValue = "8080") - int port = 8080; - - static final String APPLICATION_PATH = "/api"; - - @Override - public void run() { - try { - AbstractModel m = loadModel( - model, - workingDirectory, - workingMemoryType, - workingQuantizationType, - java.util.Optional.ofNullable(modelQuantization), - Optional.ofNullable(threadCount)); - - UndertowJaxrsServer ut = new UndertowJaxrsServer(); - ut.deploy(new JlamaRestApi(m), APPLICATION_PATH); - ut.addResourcePrefixPath( - "/ui", - resource(new ClassPathResourceManager(ServeCommand.class.getClassLoader())) - .setDirectoryListingEnabled(true) - .addWelcomeFiles("index.html")); - - System.out.println("Chat UI: http://localhost:" + port + "/ui/index.html"); - ut.start(Undertow.builder().addHttpListener(port, "0.0.0.0")); - } catch (Exception e) { - e.printStackTrace(); - System.exit(2); - } - } -} diff --git a/jlama-cli/src/main/java/com/github/tjake/jlama/cli/serve/EmbedResource.java b/jlama-cli/src/main/java/com/github/tjake/jlama/cli/serve/EmbedResource.java deleted file mode 100644 index 1696ff8..0000000 --- a/jlama-cli/src/main/java/com/github/tjake/jlama/cli/serve/EmbedResource.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright 2024 T Jake Luciani - * - * The Jlama Project licenses this file to you under the Apache License, - * version 2.0 (the "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - */ -package com.github.tjake.jlama.cli.serve; - -import javax.ws.rs.POST; -import javax.ws.rs.Path; -import javax.ws.rs.Produces; -import javax.ws.rs.core.MediaType; - -@Path("/embed") -@Produces(MediaType.APPLICATION_JSON) -public class EmbedResource { - @POST - public String embed(String text) { - return text; - } -} diff --git a/jlama-cli/src/main/java/com/github/tjake/jlama/cli/serve/GenerateParams.java b/jlama-cli/src/main/java/com/github/tjake/jlama/cli/serve/GenerateParams.java deleted file mode 100644 index 859963a..0000000 --- a/jlama-cli/src/main/java/com/github/tjake/jlama/cli/serve/GenerateParams.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2024 T Jake Luciani - * - * The Jlama Project licenses this file to you under the Apache License, - * version 2.0 (the "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - */ -package com.github.tjake.jlama.cli.serve; - -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import com.fasterxml.jackson.annotation.JsonProperty; -import java.util.UUID; - -@JsonIgnoreProperties(ignoreUnknown = true) -public class GenerateParams { - @JsonProperty("session") - public UUID sessionId; - - @JsonProperty("prompt") - public String prompt; - - @JsonProperty("temperature") - public Float temp; -} diff --git a/jlama-cli/src/main/java/com/github/tjake/jlama/cli/serve/GenerateResource.java b/jlama-cli/src/main/java/com/github/tjake/jlama/cli/serve/GenerateResource.java deleted file mode 100644 index 310141f..0000000 --- a/jlama-cli/src/main/java/com/github/tjake/jlama/cli/serve/GenerateResource.java +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright 2024 T Jake Luciani - * - * The Jlama Project licenses this file to you under the Apache License, - * version 2.0 (the "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - */ -package com.github.tjake.jlama.cli.serve; - -import com.fasterxml.jackson.databind.ObjectMapper; -import com.github.tjake.jlama.model.functions.Generator; -import com.github.tjake.jlama.safetensors.tokenizer.PromptSupport; -import java.io.IOException; -import java.util.UUID; -import javax.validation.constraints.NotNull; -import javax.ws.rs.*; -import javax.ws.rs.core.MediaType; -import javax.ws.rs.core.Response; -import javax.ws.rs.core.StreamingOutput; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@Path("/generate") -@Consumes(MediaType.APPLICATION_JSON) -public class GenerateResource { - private static final ObjectMapper om = new ObjectMapper(); - private static final Logger logger = LoggerFactory.getLogger(GenerateResource.class); - - final Generator model; - - public GenerateResource(Generator model) { - this.model = model; - } - - @POST - public Response generate(@NotNull GenerateParams params) { - logger.debug("Sending generate request: {}", params); - UUID sessionId = params.sessionId == null ? UUID.randomUUID() : params.sessionId; - - if (params.prompt == null) { - return Response.status(Response.Status.BAD_REQUEST) - .entity("prompt is required") - .build(); - } - - String prompt = params.prompt; - if (model.getTokenizer().promptSupport().isPresent()) { - PromptSupport.Builder builder = - model.getTokenizer().promptSupport().get().newBuilder(); - builder.addUserMessage(prompt); - prompt = builder.build(); - } - - final String finalPrompt = prompt; - - StreamingOutput so = - os -> model.generate(sessionId, finalPrompt, 0.7f, Integer.MAX_VALUE, false, (s, timing) -> { - try { - logger.info("'{}' took {}ms", s, timing); - os.write(om.writeValueAsBytes(new GenerateResponse(s, false))); - os.write("\n".getBytes()); - os.flush(); - } catch (IOException e) { - logger.warn("streaming exception", e); - } - }); - - return Response.ok(so, "application/x-ndjson").build(); - } -} diff --git a/jlama-cli/src/main/java/com/github/tjake/jlama/cli/serve/GenerateResponse.java b/jlama-cli/src/main/java/com/github/tjake/jlama/cli/serve/GenerateResponse.java deleted file mode 100644 index 1aaaaf4..0000000 --- a/jlama-cli/src/main/java/com/github/tjake/jlama/cli/serve/GenerateResponse.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2024 T Jake Luciani - * - * The Jlama Project licenses this file to you under the Apache License, - * version 2.0 (the "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - */ -package com.github.tjake.jlama.cli.serve; - -import com.fasterxml.jackson.annotation.JsonProperty; - -public class GenerateResponse { - - @JsonProperty("response") - public final String response; - - @JsonProperty("done") - public final boolean done; - - public GenerateResponse(String response, boolean done) { - this.response = response; - this.done = done; - } -} diff --git a/jlama-cli/src/main/java/com/github/tjake/jlama/cli/serve/JlamaRestApi.java b/jlama-cli/src/main/java/com/github/tjake/jlama/cli/serve/JlamaRestApi.java deleted file mode 100644 index 10b4aae..0000000 --- a/jlama-cli/src/main/java/com/github/tjake/jlama/cli/serve/JlamaRestApi.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright 2024 T Jake Luciani - * - * The Jlama Project licenses this file to you under the Apache License, - * version 2.0 (the "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - */ -package com.github.tjake.jlama.cli.serve; - -import com.github.tjake.jlama.model.functions.Generator; -import java.util.HashSet; -import java.util.Set; -import javax.ws.rs.core.Application; -import org.jboss.resteasy.plugins.providers.jackson.ResteasyJackson2Provider; - -public class JlamaRestApi extends Application { - - final Generator model; - - public JlamaRestApi(Generator model) { - this.model = model; - } - - @Override - public Set> getClasses() { - Set> resources = new HashSet<>(); - resources.add(ResteasyJackson2Provider.class); - return resources; - } - - @Override - public Set getSingletons() { - Set set = new HashSet<>(); - set.add(new GenerateResource(model)); - set.add(new EmbedResource()); - return set; - } -} diff --git a/jlama-core/pom.xml b/jlama-core/pom.xml index 48beafa..72214fd 100644 --- a/jlama-core/pom.xml +++ b/jlama-core/pom.xml @@ -43,4 +43,16 @@ 1.4.1 + + + + org.apache.maven.plugins + maven-compiler-plugin + + 21 + 21 + + + + diff --git a/jlama-core/src/main/java/com/github/tjake/jlama/tensor/operations/TensorOperationsProvider.java b/jlama-core/src/main/java/com/github/tjake/jlama/tensor/operations/TensorOperationsProvider.java index b2b6bf5..055aab7 100644 --- a/jlama-core/src/main/java/com/github/tjake/jlama/tensor/operations/TensorOperationsProvider.java +++ b/jlama-core/src/main/java/com/github/tjake/jlama/tensor/operations/TensorOperationsProvider.java @@ -66,7 +66,7 @@ private TensorOperations pickFastestImplementation() { ? new NaiveTensorOperations() : new PanamaTensorOperations(MachineSpec.VECTOR_TYPE); - logger.debug("Using {} ({})", pick.name(), "OffHeap"); + logger.info("Using {} ({})", pick.name(), "OffHeap"); return pick; } } diff --git a/jlama-net/openai_spec.yaml b/jlama-net/openai_spec.yaml new file mode 100644 index 0000000..c3edab7 --- /dev/null +++ b/jlama-net/openai_spec.yaml @@ -0,0 +1,3176 @@ +openapi: 3.0.0 +info: + title: OpenAI API + description: The OpenAI REST API. Please see https://platform.openai.com/docs/api-reference for more details. + version: "2.1.0" + termsOfService: https://openai.com/policies/terms-of-use + contact: + name: OpenAI Support + url: https://help.openai.com/ + license: + name: MIT + url: https://github.com/openai/openai-openapi/blob/master/LICENSE +servers: + - url: https://api.openai.com/v1 +tags: + - name: Assistants + description: Build Assistants that can call models and use tools. + - name: Audio + description: Turn audio into text or text into audio. + - name: Chat + description: Given a list of messages comprising a conversation, the model will return a response. + - name: Completions + description: Given a prompt, the model will return one or more predicted completions, and can also return the probabilities of alternative tokens at each position. + - name: Embeddings + description: Get a vector representation of a given input that can be easily consumed by machine learning models and algorithms. + - name: Fine-tuning + description: Manage fine-tuning jobs to tailor a model to your specific training data. + - name: Batch + description: Create large batches of API requests to run asynchronously. + - name: Files + description: Files are used to upload documents that can be used with features like Assistants and Fine-tuning. + - name: Uploads + description: Use Uploads to upload large files in multiple parts. + - name: Images + description: Given a prompt and/or an input image, the model will generate a new image. + - name: Models + description: List and describe the various models available in the API. + - name: Moderations + description: Given a input text, outputs if the model classifies it as potentially harmful. +paths: + # Note: When adding an endpoint, make sure you also add it in the `groups` section, in the end of this file, + # under the appropriate group + /chat/completions: + post: + operationId: createChatCompletion + tags: + - Chat + summary: Creates a model response for the given chat conversation. + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/CreateChatCompletionRequest" + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: "#/components/schemas/CreateChatCompletionResponse" + + x-oaiMeta: + name: Create chat completion + group: chat + returns: | + Returns a [chat completion](/docs/api-reference/chat/object) object, or a streamed sequence of [chat completion chunk](/docs/api-reference/chat/streaming) objects if the request is streamed. + path: create + examples: + - title: Default + request: + curl: | + curl https://api.openai.com/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "model": "VAR_model_id", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant." + }, + { + "role": "user", + "content": "Hello!" + } + ] + }' + python: | + from openai import OpenAI + client = OpenAI() + + completion = client.chat.completions.create( + model="VAR_model_id", + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Hello!"} + ] + ) + + print(completion.choices[0].message) + node.js: |- + import OpenAI from "openai"; + + const openai = new OpenAI(); + + async function main() { + const completion = await openai.chat.completions.create({ + messages: [{ role: "system", content: "You are a helpful assistant." }], + model: "VAR_model_id", + }); + + console.log(completion.choices[0]); + } + + main(); + response: &chat_completion_example | + { + "id": "chatcmpl-123", + "object": "chat.completion", + "created": 1677652288, + "model": "gpt-4o-mini", + "system_fingerprint": "fp_44709d6fcb", + "choices": [{ + "index": 0, + "message": { + "role": "assistant", + "content": "\n\nHello there, how may I assist you today?", + }, + "logprobs": null, + "finish_reason": "stop" + }], + "usage": { + "prompt_tokens": 9, + "completion_tokens": 12, + "total_tokens": 21 + } + } + - title: Image input + request: + curl: | + curl https://api.openai.com/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "model": "gpt-4-turbo", + "messages": [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What'\''s in this image?" + }, + { + "type": "image_url", + "image_url": { + "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" + } + } + ] + } + ], + "max_tokens": 300 + }' + python: | + from openai import OpenAI + + client = OpenAI() + + response = client.chat.completions.create( + model="gpt-4-turbo", + messages=[ + { + "role": "user", + "content": [ + {"type": "text", "text": "What's in this image?"}, + { + "type": "image_url", + "image_url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg", + }, + ], + } + ], + max_tokens=300, + ) + + print(response.choices[0]) + node.js: |- + import OpenAI from "openai"; + + const openai = new OpenAI(); + + async function main() { + const response = await openai.chat.completions.create({ + model: "gpt-4-turbo", + messages: [ + { + role: "user", + content: [ + { type: "text", text: "What's in this image?" }, + { + type: "image_url", + image_url: + "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg", + }, + ], + }, + ], + }); + console.log(response.choices[0]); + } + main(); + response: &chat_completion_image_example | + { + "id": "chatcmpl-123", + "object": "chat.completion", + "created": 1677652288, + "model": "gpt-4o-mini", + "system_fingerprint": "fp_44709d6fcb", + "choices": [{ + "index": 0, + "message": { + "role": "assistant", + "content": "\n\nThis image shows a wooden boardwalk extending through a lush green marshland.", + }, + "logprobs": null, + "finish_reason": "stop" + }], + "usage": { + "prompt_tokens": 9, + "completion_tokens": 12, + "total_tokens": 21 + } + } + - title: Streaming + request: + curl: | + curl https://api.openai.com/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "model": "VAR_model_id", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant." + }, + { + "role": "user", + "content": "Hello!" + } + ], + "stream": true + }' + python: | + from openai import OpenAI + client = OpenAI() + + completion = client.chat.completions.create( + model="VAR_model_id", + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Hello!"} + ], + stream=True + ) + + for chunk in completion: + print(chunk.choices[0].delta) + + node.js: |- + import OpenAI from "openai"; + + const openai = new OpenAI(); + + async function main() { + const completion = await openai.chat.completions.create({ + model: "VAR_model_id", + messages: [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Hello!"} + ], + stream: true, + }); + + for await (const chunk of completion) { + console.log(chunk.choices[0].delta.content); + } + } + + main(); + response: &chat_completion_chunk_example | + {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"gpt-4o-mini", "system_fingerprint": "fp_44709d6fcb", "choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}]} + + {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"gpt-4o-mini", "system_fingerprint": "fp_44709d6fcb", "choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}]} + + .... + + {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"gpt-4o-mini", "system_fingerprint": "fp_44709d6fcb", "choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}]} + - title: Functions + request: + curl: | + curl https://api.openai.com/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "model": "gpt-4-turbo", + "messages": [ + { + "role": "user", + "content": "What'\''s the weather like in Boston today?" + } + ], + "tools": [ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA" + }, + "unit": { + "type": "string", + "enum": ["celsius", "fahrenheit"] + } + }, + "required": ["location"] + } + } + } + ], + "tool_choice": "auto" + }' + python: | + from openai import OpenAI + client = OpenAI() + + tools = [ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA", + }, + "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, + }, + "required": ["location"], + }, + } + } + ] + messages = [{"role": "user", "content": "What's the weather like in Boston today?"}] + completion = client.chat.completions.create( + model="VAR_model_id", + messages=messages, + tools=tools, + tool_choice="auto" + ) + + print(completion) + node.js: |- + import OpenAI from "openai"; + + const openai = new OpenAI(); + + async function main() { + const messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]; + const tools = [ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA", + }, + "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, + }, + "required": ["location"], + }, + } + } + ]; + + const response = await openai.chat.completions.create({ + model: "gpt-4-turbo", + messages: messages, + tools: tools, + tool_choice: "auto", + }); + + console.log(response); + } + + main(); + response: &chat_completion_function_example | + { + "id": "chatcmpl-abc123", + "object": "chat.completion", + "created": 1699896916, + "model": "gpt-4o-mini", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": null, + "tool_calls": [ + { + "id": "call_abc123", + "type": "function", + "function": { + "name": "get_current_weather", + "arguments": "{\n\"location\": \"Boston, MA\"\n}" + } + } + ] + }, + "logprobs": null, + "finish_reason": "tool_calls" + } + ], + "usage": { + "prompt_tokens": 82, + "completion_tokens": 17, + "total_tokens": 99 + } + } + - title: Logprobs + request: + curl: | + curl https://api.openai.com/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "model": "VAR_model_id", + "messages": [ + { + "role": "user", + "content": "Hello!" + } + ], + "logprobs": true, + "top_logprobs": 2 + }' + python: | + from openai import OpenAI + client = OpenAI() + + completion = client.chat.completions.create( + model="VAR_model_id", + messages=[ + {"role": "user", "content": "Hello!"} + ], + logprobs=True, + top_logprobs=2 + ) + + print(completion.choices[0].message) + print(completion.choices[0].logprobs) + node.js: |- + import OpenAI from "openai"; + + const openai = new OpenAI(); + + async function main() { + const completion = await openai.chat.completions.create({ + messages: [{ role: "user", content: "Hello!" }], + model: "VAR_model_id", + logprobs: true, + top_logprobs: 2, + }); + + console.log(completion.choices[0]); + } + + main(); + response: | + { + "id": "chatcmpl-123", + "object": "chat.completion", + "created": 1702685778, + "model": "gpt-4o-mini", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "Hello! How can I assist you today?" + }, + "logprobs": { + "content": [ + { + "token": "Hello", + "logprob": -0.31725305, + "bytes": [72, 101, 108, 108, 111], + "top_logprobs": [ + { + "token": "Hello", + "logprob": -0.31725305, + "bytes": [72, 101, 108, 108, 111] + }, + { + "token": "Hi", + "logprob": -1.3190403, + "bytes": [72, 105] + } + ] + }, + { + "token": "!", + "logprob": -0.02380986, + "bytes": [ + 33 + ], + "top_logprobs": [ + { + "token": "!", + "logprob": -0.02380986, + "bytes": [33] + }, + { + "token": " there", + "logprob": -3.787621, + "bytes": [32, 116, 104, 101, 114, 101] + } + ] + }, + { + "token": " How", + "logprob": -0.000054669687, + "bytes": [32, 72, 111, 119], + "top_logprobs": [ + { + "token": " How", + "logprob": -0.000054669687, + "bytes": [32, 72, 111, 119] + }, + { + "token": "<|end|>", + "logprob": -10.953937, + "bytes": null + } + ] + }, + { + "token": " can", + "logprob": -0.015801601, + "bytes": [32, 99, 97, 110], + "top_logprobs": [ + { + "token": " can", + "logprob": -0.015801601, + "bytes": [32, 99, 97, 110] + }, + { + "token": " may", + "logprob": -4.161023, + "bytes": [32, 109, 97, 121] + } + ] + }, + { + "token": " I", + "logprob": -3.7697225e-6, + "bytes": [ + 32, + 73 + ], + "top_logprobs": [ + { + "token": " I", + "logprob": -3.7697225e-6, + "bytes": [32, 73] + }, + { + "token": " assist", + "logprob": -13.596657, + "bytes": [32, 97, 115, 115, 105, 115, 116] + } + ] + }, + { + "token": " assist", + "logprob": -0.04571125, + "bytes": [32, 97, 115, 115, 105, 115, 116], + "top_logprobs": [ + { + "token": " assist", + "logprob": -0.04571125, + "bytes": [32, 97, 115, 115, 105, 115, 116] + }, + { + "token": " help", + "logprob": -3.1089056, + "bytes": [32, 104, 101, 108, 112] + } + ] + }, + { + "token": " you", + "logprob": -5.4385737e-6, + "bytes": [32, 121, 111, 117], + "top_logprobs": [ + { + "token": " you", + "logprob": -5.4385737e-6, + "bytes": [32, 121, 111, 117] + }, + { + "token": " today", + "logprob": -12.807695, + "bytes": [32, 116, 111, 100, 97, 121] + } + ] + }, + { + "token": " today", + "logprob": -0.0040071653, + "bytes": [32, 116, 111, 100, 97, 121], + "top_logprobs": [ + { + "token": " today", + "logprob": -0.0040071653, + "bytes": [32, 116, 111, 100, 97, 121] + }, + { + "token": "?", + "logprob": -5.5247097, + "bytes": [63] + } + ] + }, + { + "token": "?", + "logprob": -0.0008108172, + "bytes": [63], + "top_logprobs": [ + { + "token": "?", + "logprob": -0.0008108172, + "bytes": [63] + }, + { + "token": "?\n", + "logprob": -7.184561, + "bytes": [63, 10] + } + ] + } + ] + }, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 9, + "completion_tokens": 9, + "total_tokens": 18 + }, + "system_fingerprint": null + } + + /completions: + post: + operationId: createCompletion + tags: + - Completions + summary: Creates a completion for the provided prompt and parameters. + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/CreateCompletionRequest" + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: "#/components/schemas/CreateCompletionResponse" + x-oaiMeta: + name: Create completion + group: completions + returns: | + Returns a [completion](/docs/api-reference/completions/object) object, or a sequence of completion objects if the request is streamed. + legacy: true + examples: + - title: No streaming + request: + curl: | + curl https://api.openai.com/v1/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "model": "VAR_model_id", + "prompt": "Say this is a test", + "max_tokens": 7, + "temperature": 0 + }' + python: | + from openai import OpenAI + client = OpenAI() + + client.completions.create( + model="VAR_model_id", + prompt="Say this is a test", + max_tokens=7, + temperature=0 + ) + node.js: |- + import OpenAI from "openai"; + + const openai = new OpenAI(); + + async function main() { + const completion = await openai.completions.create({ + model: "VAR_model_id", + prompt: "Say this is a test.", + max_tokens: 7, + temperature: 0, + }); + + console.log(completion); + } + main(); + response: | + { + "id": "cmpl-uqkvlQyYK7bGYrRHQ0eXlWi7", + "object": "text_completion", + "created": 1589478378, + "model": "VAR_model_id", + "system_fingerprint": "fp_44709d6fcb", + "choices": [ + { + "text": "\n\nThis is indeed a test", + "index": 0, + "logprobs": null, + "finish_reason": "length" + } + ], + "usage": { + "prompt_tokens": 5, + "completion_tokens": 7, + "total_tokens": 12 + } + } + - title: Streaming + request: + curl: | + curl https://api.openai.com/v1/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "model": "VAR_model_id", + "prompt": "Say this is a test", + "max_tokens": 7, + "temperature": 0, + "stream": true + }' + python: | + from openai import OpenAI + client = OpenAI() + + for chunk in client.completions.create( + model="VAR_model_id", + prompt="Say this is a test", + max_tokens=7, + temperature=0, + stream=True + ): + print(chunk.choices[0].text) + node.js: |- + import OpenAI from "openai"; + + const openai = new OpenAI(); + + async function main() { + const stream = await openai.completions.create({ + model: "VAR_model_id", + prompt: "Say this is a test.", + stream: true, + }); + + for await (const chunk of stream) { + console.log(chunk.choices[0].text) + } + } + main(); + response: | + { + "id": "cmpl-7iA7iJjj8V2zOkCGvWF2hAkDWBQZe", + "object": "text_completion", + "created": 1690759702, + "choices": [ + { + "text": "This", + "index": 0, + "logprobs": null, + "finish_reason": null + } + ], + "model": "gpt-3.5-turbo-instruct" + "system_fingerprint": "fp_44709d6fcb", + } + + /embeddings: + post: + operationId: createEmbedding + tags: + - Embeddings + summary: Creates an embedding vector representing the input text. + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/CreateEmbeddingRequest" + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: "#/components/schemas/CreateEmbeddingResponse" + x-oaiMeta: + name: Create embeddings + group: embeddings + returns: A list of [embedding](/docs/api-reference/embeddings/object) objects. + examples: + request: + curl: | + curl https://api.openai.com/v1/embeddings \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "input": "The food was delicious and the waiter...", + "model": "text-embedding-ada-002", + "encoding_format": "float" + }' + python: | + from openai import OpenAI + client = OpenAI() + + client.embeddings.create( + model="text-embedding-ada-002", + input="The food was delicious and the waiter...", + encoding_format="float" + ) + node.js: |- + import OpenAI from "openai"; + + const openai = new OpenAI(); + + async function main() { + const embedding = await openai.embeddings.create({ + model: "text-embedding-ada-002", + input: "The quick brown fox jumped over the lazy dog", + encoding_format: "float", + }); + + console.log(embedding); + } + + main(); + response: | + { + "object": "list", + "data": [ + { + "object": "embedding", + "embedding": [ + 0.0023064255, + -0.009327292, + .... (1536 floats total for ada-002) + -0.0028842222, + ], + "index": 0 + } + ], + "model": "text-embedding-ada-002", + "usage": { + "prompt_tokens": 8, + "total_tokens": 8 + } + } + + /models: + get: + operationId: listModels + tags: + - Models + summary: Lists the currently available models, and provides basic information about each one such as the owner and availability. + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: "#/components/schemas/ListModelsResponse" + x-oaiMeta: + name: List models + group: models + returns: A list of [model](/docs/api-reference/models/object) objects. + examples: + request: + curl: | + curl https://api.openai.com/v1/models \ + -H "Authorization: Bearer $OPENAI_API_KEY" + python: | + from openai import OpenAI + client = OpenAI() + + client.models.list() + node.js: |- + import OpenAI from "openai"; + + const openai = new OpenAI(); + + async function main() { + const list = await openai.models.list(); + + for await (const model of list) { + console.log(model); + } + } + main(); + response: | + { + "object": "list", + "data": [ + { + "id": "model-id-0", + "object": "model", + "created": 1686935002, + "owned_by": "organization-owner" + }, + { + "id": "model-id-1", + "object": "model", + "created": 1686935002, + "owned_by": "organization-owner", + }, + { + "id": "model-id-2", + "object": "model", + "created": 1686935002, + "owned_by": "openai" + }, + ], + "object": "list" + } + /models/{model}: + get: + operationId: retrieveModel + tags: + - Models + summary: Retrieves a model instance, providing basic information about the model such as the owner and permissioning. + parameters: + - in: path + name: model + required: true + schema: + type: string + # ideally this will be an actual ID, so this will always work from browser + example: gpt-3.5-turbo + description: The ID of the model to use for this request + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: "#/components/schemas/Model" + x-oaiMeta: + name: Retrieve model + group: models + returns: The [model](/docs/api-reference/models/object) object matching the specified ID. + examples: + request: + curl: | + curl https://api.openai.com/v1/models/VAR_model_id \ + -H "Authorization: Bearer $OPENAI_API_KEY" + python: | + from openai import OpenAI + client = OpenAI() + + client.models.retrieve("VAR_model_id") + node.js: |- + import OpenAI from "openai"; + + const openai = new OpenAI(); + + async function main() { + const model = await openai.models.retrieve("VAR_model_id"); + + console.log(model); + } + + main(); + response: &retrieve_model_response | + { + "id": "VAR_model_id", + "object": "model", + "created": 1686935002, + "owned_by": "openai" + } + delete: + operationId: deleteModel + tags: + - Models + summary: Delete a fine-tuned model. You must have the Owner role in your organization to delete a model. + parameters: + - in: path + name: model + required: true + schema: + type: string + example: ft:gpt-3.5-turbo:acemeco:suffix:abc123 + description: The model to delete + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: "#/components/schemas/DeleteModelResponse" + x-oaiMeta: + name: Delete a fine-tuned model + group: models + returns: Deletion status. + examples: + request: + curl: | + curl https://api.openai.com/v1/models/ft:gpt-3.5-turbo:acemeco:suffix:abc123 \ + -X DELETE \ + -H "Authorization: Bearer $OPENAI_API_KEY" + python: | + from openai import OpenAI + client = OpenAI() + + client.models.delete("ft:gpt-3.5-turbo:acemeco:suffix:abc123") + node.js: |- + import OpenAI from "openai"; + + const openai = new OpenAI(); + + async function main() { + const model = await openai.models.del("ft:gpt-3.5-turbo:acemeco:suffix:abc123"); + + console.log(model); + } + main(); + response: | + { + "id": "ft:gpt-3.5-turbo:acemeco:suffix:abc123", + "object": "model", + "deleted": true + } + +components: + securitySchemes: + ApiKeyAuth: + type: http + scheme: "bearer" + + schemas: + Error: + type: object + properties: + code: + type: string + nullable: true + message: + type: string + nullable: false + param: + type: string + nullable: true + type: + type: string + nullable: false + required: + - type + - message + - param + - code + ErrorResponse: + type: object + properties: + error: + $ref: "#/components/schemas/Error" + required: + - error + + ListModelsResponse: + type: object + properties: + object: + type: string + enum: [list] + data: + type: array + items: + $ref: "#/components/schemas/Model" + required: + - object + - data + DeleteModelResponse: + type: object + properties: + id: + type: string + deleted: + type: boolean + object: + type: string + required: + - id + - object + - deleted + + CreateCompletionRequest: + type: object + properties: + model: + description: &model_description | + ID of the model to use. You can use the [List models](/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](/docs/models/overview) for descriptions of them. + anyOf: + - type: string + - type: string + enum: ["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"] + x-oaiTypeLabel: string + prompt: + description: &completions_prompt_description | + The prompt(s) to generate completions for, encoded as a string, array of strings, array of tokens, or array of token arrays. + + Note that <|endoftext|> is the document separator that the model sees during training, so if a prompt is not specified the model will generate as if from the beginning of a new document. + nullable: true + oneOf: + - type: string + default: "" + example: "This is a test." + - type: array + items: + type: string + default: "" + example: "This is a test." + - type: array + minItems: 1 + items: + type: integer + example: "[1212, 318, 257, 1332, 13]" + - type: array + minItems: 1 + items: + type: array + minItems: 1 + items: + type: integer + example: "[[1212, 318, 257, 1332, 13]]" + best_of: + type: integer + default: 1 + minimum: 0 + maximum: 20 + nullable: true + description: &completions_best_of_description | + Generates `best_of` completions server-side and returns the "best" (the one with the highest log probability per token). Results cannot be streamed. + + When used with `n`, `best_of` controls the number of candidate completions and `n` specifies how many to return – `best_of` must be greater than `n`. + + **Note:** Because this parameter generates many completions, it can quickly consume your token quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`. + echo: + type: boolean + default: false + nullable: true + description: &completions_echo_description > + Echo back the prompt in addition to the completion + frequency_penalty: + type: number + default: 0 + minimum: -2 + maximum: 2 + nullable: true + description: &completions_frequency_penalty_description | + Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. + + [See more information about frequency and presence penalties.](/docs/guides/text-generation/parameter-details) + logit_bias: &completions_logit_bias + type: object + x-oaiTypeLabel: map + default: null + nullable: true + additionalProperties: + type: integer + description: &completions_logit_bias_description | + Modify the likelihood of specified tokens appearing in the completion. + + Accepts a JSON object that maps tokens (specified by their token ID in the GPT tokenizer) to an associated bias value from -100 to 100. You can use this [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token. + + As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token from being generated. + logprobs: &completions_logprobs_configuration + type: integer + minimum: 0 + maximum: 5 + default: null + nullable: true + description: &completions_logprobs_description | + Include the log probabilities on the `logprobs` most likely output tokens, as well the chosen tokens. For example, if `logprobs` is 5, the API will return a list of the 5 most likely tokens. The API will always return the `logprob` of the sampled token, so there may be up to `logprobs+1` elements in the response. + + The maximum value for `logprobs` is 5. + max_tokens: + type: integer + minimum: 0 + default: 16 + example: 16 + nullable: true + description: &completions_max_tokens_description | + The maximum number of [tokens](/tokenizer) that can be generated in the completion. + + The token count of your prompt plus `max_tokens` cannot exceed the model's context length. [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) for counting tokens. + n: + type: integer + minimum: 1 + maximum: 128 + default: 1 + example: 1 + nullable: true + description: &completions_completions_description | + How many completions to generate for each prompt. + + **Note:** Because this parameter generates many completions, it can quickly consume your token quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`. + presence_penalty: + type: number + default: 0 + minimum: -2 + maximum: 2 + nullable: true + description: &completions_presence_penalty_description | + Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. + + [See more information about frequency and presence penalties.](/docs/guides/text-generation/parameter-details) + seed: &completions_seed_param + type: integer + nullable: true + description: | + If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same `seed` and parameters should return the same result. + + Determinism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend. + stop: + description: &completions_stop_description > + Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence. + default: null + nullable: true + oneOf: + - type: string + default: <|endoftext|> + example: "\n" + nullable: true + - type: array + minItems: 1 + maxItems: 4 + items: + type: string + example: '["\n"]' + stream: + description: > + Whether to stream back partial progress. If set, tokens will be sent as data-only [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format) + as they become available, with the stream terminated by a `data: [DONE]` message. [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions). + type: boolean + nullable: true + default: false + stream_options: + $ref: "#/components/schemas/ChatCompletionStreamOptions" + suffix: + description: | + The suffix that comes after a completion of inserted text. + + This parameter is only supported for `gpt-3.5-turbo-instruct`. + default: null + nullable: true + type: string + example: "test." + temperature: + type: number + minimum: 0 + maximum: 2 + default: 1 + example: 1 + nullable: true + description: &completions_temperature_description | + What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. + + We generally recommend altering this or `top_p` but not both. + top_p: + type: number + minimum: 0 + maximum: 1 + default: 1 + example: 1 + nullable: true + description: &completions_top_p_description | + An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or `temperature` but not both. + user: &end_user_param_configuration + type: string + example: user-1234 + description: | + A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. [Learn more](/docs/guides/safety-best-practices/end-user-ids). + required: + - model + - prompt + + CreateCompletionResponse: + type: object + description: | + Represents a completion response from the API. Note: both the streamed and non-streamed response objects share the same shape (unlike the chat endpoint). + properties: + id: + type: string + description: A unique identifier for the completion. + choices: + type: array + description: The list of completion choices the model generated for the input prompt. + items: + type: object + required: + - finish_reason + - index + - logprobs + - text + properties: + finish_reason: + type: string + description: &completion_finish_reason_description | + The reason the model stopped generating tokens. This will be `stop` if the model hit a natural stop point or a provided stop sequence, + `length` if the maximum number of tokens specified in the request was reached, + or `content_filter` if content was omitted due to a flag from our content filters. + enum: ["stop", "length", "content_filter"] + index: + type: integer + logprobs: + type: object + nullable: true + properties: + text_offset: + type: array + items: + type: integer + token_logprobs: + type: array + items: + type: number + tokens: + type: array + items: + type: string + top_logprobs: + type: array + items: + type: object + additionalProperties: + type: number + text: + type: string + created: + type: integer + description: The Unix timestamp (in seconds) of when the completion was created. + model: + type: string + description: The model used for completion. + system_fingerprint: + type: string + description: | + This fingerprint represents the backend configuration that the model runs with. + + Can be used in conjunction with the `seed` request parameter to understand when backend changes have been made that might impact determinism. + object: + type: string + description: The object type, which is always "text_completion" + enum: [text_completion] + usage: + $ref: "#/components/schemas/CompletionUsage" + required: + - id + - object + - created + - model + - choices + x-oaiMeta: + name: The completion object + legacy: true + example: | + { + "id": "cmpl-uqkvlQyYK7bGYrRHQ0eXlWi7", + "object": "text_completion", + "created": 1589478378, + "model": "gpt-4-turbo", + "choices": [ + { + "text": "\n\nThis is indeed a test", + "index": 0, + "logprobs": null, + "finish_reason": "length" + } + ], + "usage": { + "prompt_tokens": 5, + "completion_tokens": 7, + "total_tokens": 12 + } + } + + ChatCompletionRequestMessageContentPart: + oneOf: + - $ref: "#/components/schemas/ChatCompletionRequestMessageContentPartText" + - $ref: "#/components/schemas/ChatCompletionRequestMessageContentPartImage" + x-oaiExpandable: true + + ChatCompletionRequestMessageContentPartImage: + type: object + title: Image content part + properties: + type: + type: string + enum: ["image_url"] + description: The type of the content part. + image_url: + type: object + properties: + url: + type: string + description: Either a URL of the image or the base64 encoded image data. + format: uri + detail: + type: string + description: Specifies the detail level of the image. Learn more in the [Vision guide](/docs/guides/vision/low-or-high-fidelity-image-understanding). + enum: ["auto", "low", "high"] + default: "auto" + required: + - url + required: + - type + - image_url + + ChatCompletionRequestMessageContentPartText: + type: object + title: Text content part + properties: + type: + type: string + enum: ["text"] + description: The type of the content part. + text: + type: string + description: The text content. + required: + - type + - text + + ChatCompletionRequestMessage: + oneOf: + - $ref: "#/components/schemas/ChatCompletionRequestSystemMessage" + - $ref: "#/components/schemas/ChatCompletionRequestUserMessage" + - $ref: "#/components/schemas/ChatCompletionRequestAssistantMessage" + - $ref: "#/components/schemas/ChatCompletionRequestToolMessage" + - $ref: "#/components/schemas/ChatCompletionRequestFunctionMessage" + discriminator: + mapping: + system: "#/components/schemas/ChatCompletionRequestSystemMessage" + user: "#/components/schemas/ChatCompletionRequestUserMessage" + assistant: "#/components/schemas/ChatCompletionRequestAssistantMessage" + tool: "#/components/schemas/ChatCompletionRequestToolMessage" + function: "#/components/schemas/ChatCompletionRequestFunctionMessage" + propertyName: role + x-oaiExpandable: true + + ChatCompletionRequestSystemMessage: + type: object + title: System message + properties: + content: + description: The contents of the system message. + type: string + role: + type: string + default: "system" + description: The role of the messages author, in this case `system`. + name: + type: string + description: An optional name for the participant. Provides the model information to differentiate between participants of the same role. + required: + - content + - role + + ChatCompletionRequestUserMessage: + type: object + title: User message + properties: + content: + description: | + The contents of the user message. + oneOf: + - type: string + description: The text contents of the message. + title: Text content + - type: array + description: An array of content parts with a defined type, each can be of type `text` or `image_url` when passing in images. You can pass multiple images by adding multiple `image_url` content parts. Image input is only supported when using the `gpt-4o` model. + title: Array of content parts + items: + $ref: "#/components/schemas/ChatCompletionRequestMessageContentPart" + minItems: 1 + x-oaiExpandable: true + role: + type: string + default: "user" + description: The role of the messages author, in this case `user`. + name: + type: string + description: An optional name for the participant. Provides the model information to differentiate between participants of the same role. + required: + - content + - role + + ChatCompletionRequestAssistantMessage: + type: object + title: Assistant message + properties: + content: + nullable: true + type: string + description: | + The contents of the assistant message. Required unless `tool_calls` or `function_call` is specified. + role: + type: string + default: "assistant" + description: The role of the messages author, in this case `assistant`. + name: + type: string + description: An optional name for the participant. Provides the model information to differentiate between participants of the same role. + tool_calls: + $ref: "#/components/schemas/ChatCompletionMessageToolCalls" + function_call: + type: object + deprecated: true + description: "Deprecated and replaced by `tool_calls`. The name and arguments of a function that should be called, as generated by the model." + nullable: true + properties: + arguments: + type: string + description: The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function. + name: + type: string + description: The name of the function to call. + required: + - arguments + - name + required: + - role + + FineTuneChatCompletionRequestAssistantMessage: + allOf: + - type: object + title: Assistant message + deprecated: false + properties: + weight: + type: integer + enum: [0, 1] + description: "Controls whether the assistant message is trained against (0 or 1)" + - $ref: "#/components/schemas/ChatCompletionRequestAssistantMessage" + required: + - role + + ChatCompletionRequestToolMessage: + type: object + title: Tool message + properties: + role: + type: string + default: "tool" + description: The role of the messages author, in this case `tool`. + content: + type: string + description: The contents of the tool message. + tool_call_id: + type: string + description: Tool call that this message is responding to. + required: + - role + - content + - tool_call_id + + ChatCompletionRequestFunctionMessage: + type: object + title: Function message + deprecated: true + properties: + role: + type: string + default: "function" + description: The role of the messages author, in this case `function`. + content: + nullable: true + type: string + description: The contents of the function message. + name: + type: string + description: The name of the function to call. + required: + - role + - content + - name + + FunctionParameters: + type: object + description: "The parameters the functions accepts, described as a JSON Schema object. See the [guide](/docs/guides/function-calling) for examples, and the [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for documentation about the format. \n\nOmitting `parameters` defines a function with an empty parameter list." + additionalProperties: true + + ChatCompletionFunctions: + type: object + deprecated: true + properties: + description: + type: string + description: A description of what the function does, used by the model to choose when and how to call the function. + name: + type: string + description: The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64. + parameters: + $ref: "#/components/schemas/FunctionParameters" + required: + - name + + ChatCompletionFunctionCallOption: + type: object + description: > + Specifying a particular function via `{"name": "my_function"}` forces the model to call that function. + properties: + name: + type: string + description: The name of the function to call. + required: + - name + + ChatCompletionTool: + type: object + properties: + type: + type: string + default: "function" + description: The type of the tool. Currently, only `function` is supported. + function: + $ref: "#/components/schemas/FunctionObject" + required: + - type + - function + + FunctionObject: + type: object + properties: + description: + type: string + description: A description of what the function does, used by the model to choose when and how to call the function. + name: + type: string + description: The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64. + parameters: + $ref: "#/components/schemas/FunctionParameters" + required: + - name + + ChatCompletionToolChoiceOption: + description: | + Controls which (if any) tool is called by the model. + `none` means the model will not call any tool and instead generates a message. + `auto` means the model can pick between generating a message or calling one or more tools. + `required` means the model must call one or more tools. + Specifying a particular tool via `{"type": "function", "function": {"name": "my_function"}}` forces the model to call that tool. + + `none` is the default when no tools are present. `auto` is the default if tools are present. + oneOf: + - type: string + description: > + `none` means the model will not call any tool and instead generates a message. + `auto` means the model can pick between generating a message or calling one or more tools. + `required` means the model must call one or more tools. + enum: [none, auto, required] + - $ref: "#/components/schemas/ChatCompletionNamedToolChoice" + x-oaiExpandable: true + + ChatCompletionNamedToolChoice: + type: object + description: Specifies a tool the model should use. Use to force the model to call a specific function. + properties: + type: + type: string + default: "function" + description: The type of the tool. Currently, only `function` is supported. + function: + type: object + properties: + name: + type: string + description: The name of the function to call. + required: + - name + required: + - type + - function + + ParallelToolCalls: + description: Whether to enable [parallel function calling](/docs/guides/function-calling/parallel-function-calling) during tool use. + type: boolean + default: true + + ChatCompletionMessageToolCalls: + type: array + description: The tool calls generated by the model, such as function calls. + items: + $ref: "#/components/schemas/ChatCompletionMessageToolCall" + + ChatCompletionMessageToolCall: + type: object + properties: + # TODO: index included when streaming + id: + type: string + description: The ID of the tool call. + type: + type: string + default: "function" + description: The type of the tool. Currently, only `function` is supported. + function: + type: object + description: The function that the model called. + properties: + name: + type: string + description: The name of the function to call. + arguments: + type: string + description: The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function. + required: + - name + - arguments + required: + - id + - type + - function + + ChatCompletionMessageToolCallChunk: + type: object + properties: + index: + type: integer + id: + type: string + description: The ID of the tool call. + type: + type: string + default: "function" + description: The type of the tool. Currently, only `function` is supported. + function: + type: object + properties: + name: + type: string + description: The name of the function to call. + arguments: + type: string + description: The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function. + required: + - index + + # Note, this isn't referenced anywhere, but is kept as a convenience to record all possible roles in one place. + ChatCompletionRole: + type: string + description: The role of the author of a message + enum: + - system + - user + - assistant + - tool + - function + + ChatCompletionStreamOptions: + description: | + Options for streaming response. Only set this when you set `stream: true`. + type: object + nullable: true + default: null + properties: + include_usage: + type: boolean + description: | + If set, an additional chunk will be streamed before the `data: [DONE]` message. The `usage` field on this chunk shows the token usage statistics for the entire request, and the `choices` field will always be an empty array. All other chunks will also include a `usage` field, but with a null value. + + ChatCompletionResponseMessage: + type: object + description: A chat completion message generated by the model. + properties: + content: + type: string + description: The contents of the message. + nullable: true + tool_calls: + $ref: "#/components/schemas/ChatCompletionMessageToolCalls" + role: + type: string + enum: ["assistant"] + description: The role of the author of this message. + function_call: + type: object + deprecated: true + description: "Deprecated and replaced by `tool_calls`. The name and arguments of a function that should be called, as generated by the model." + properties: + arguments: + type: string + description: The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function. + name: + type: string + description: The name of the function to call. + required: + - name + - arguments + required: + - role + - content + + ChatCompletionStreamResponseDelta: + type: object + description: A chat completion delta generated by streamed model responses. + properties: + content: + type: string + description: The contents of the chunk message. + nullable: true + function_call: + deprecated: true + type: object + description: "Deprecated and replaced by `tool_calls`. The name and arguments of a function that should be called, as generated by the model." + properties: + arguments: + type: string + description: The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function. + name: + type: string + description: The name of the function to call. + tool_calls: + type: array + items: + $ref: "#/components/schemas/ChatCompletionMessageToolCallChunk" + role: + type: string + enum: ["system", "user", "assistant", "tool"] + description: The role of the author of this message. + + CreateChatCompletionRequest: + type: object + properties: + messages: + description: A list of messages comprising the conversation so far. [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models). + type: array + minItems: 1 + items: + $ref: "#/components/schemas/ChatCompletionRequestMessage" + model: + description: ID of the model to use. See the [model endpoint compatibility](/docs/models/model-endpoint-compatibility) table for details on which models work with the Chat API. + example: "gpt-4-turbo" + type: string + frequency_penalty: + type: number + default: 0 + minimum: -2 + maximum: 2 + nullable: true + description: *completions_frequency_penalty_description + logit_bias: + type: object + x-oaiTypeLabel: map + default: null + nullable: true + additionalProperties: + type: integer + description: | + Modify the likelihood of specified tokens appearing in the completion. + + Accepts a JSON object that maps tokens (specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token. + logprobs: + description: Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the `content` of `message`. + type: boolean + default: false + nullable: true + top_logprobs: + description: An integer between 0 and 20 specifying the number of most likely tokens to return at each token position, each with an associated log probability. `logprobs` must be set to `true` if this parameter is used. + type: integer + minimum: 0 + maximum: 20 + nullable: true + max_tokens: + description: | + The maximum number of [tokens](/tokenizer) that can be generated in the chat completion. + + The total length of input tokens and generated tokens is limited by the model's context length. [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) for counting tokens. + type: integer + nullable: true + n: + type: integer + minimum: 1 + maximum: 128 + default: 1 + example: 1 + nullable: true + description: How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep `n` as `1` to minimize costs. + presence_penalty: + type: number + default: 0 + minimum: -2 + maximum: 2 + nullable: true + description: *completions_presence_penalty_description + response_format: + type: object + description: | + An object specifying the format that the model must output. Compatible with [GPT-4 Turbo](/docs/models/gpt-4-and-gpt-4-turbo) and all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`. + + Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the message the model generates is valid JSON. + + **Important:** when using JSON mode, you **must** also instruct the model to produce JSON yourself via a system or user message. Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly "stuck" request. Also note that the message content may be partially cut off if `finish_reason="length"`, which indicates the generation exceeded `max_tokens` or the conversation exceeded the max context length. + properties: + type: + type: string + enum: ["text", "json_object"] + example: "json_object" + default: "text" + description: Must be one of `text` or `json_object`. + seed: + type: integer + nullable: true + description: | + This feature is in Beta. + If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same `seed` and parameters should return the same result. + Determinism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend. + x-oaiMeta: + beta: true + service_tier: + description: | + Specifies the latency tier to use for processing the request. This parameter is relevant for customers subscribed to the scale tier service: + - If set to 'auto', the system will utilize scale tier credits until they are exhausted. + - If set to 'default', the request will be processed using the default service tier with a lower uptime SLA and no latency guarentee. + - When not set, the default behavior is 'auto'. + + When this parameter is set, the response body will include the `service_tier` utilized. + type: string + enum: ["auto", "default"] + nullable: true + default: null + stop: + description: | + Up to 4 sequences where the API will stop generating further tokens. + default: null + oneOf: + - type: string + nullable: true + - type: array + minItems: 1 + maxItems: 4 + items: + type: string + stream: + description: > + If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format) + as they become available, with the stream terminated by a `data: [DONE]` message. [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions). + type: boolean + nullable: true + default: false + stream_options: + $ref: "#/components/schemas/ChatCompletionStreamOptions" + temperature: + type: number + minimum: 0 + maximum: 2 + default: 1 + example: 1 + nullable: true + description: *completions_temperature_description + top_p: + type: number + minimum: 0 + maximum: 1 + default: 1 + example: 1 + nullable: true + description: *completions_top_p_description + tools: + type: array + description: > + A list of tools the model may call. Currently, only functions are supported as a tool. + Use this to provide a list of functions the model may generate JSON inputs for. A max of 128 functions are supported. + items: + $ref: "#/components/schemas/ChatCompletionTool" + tool_choice: + $ref: "#/components/schemas/ChatCompletionToolChoiceOption" + parallel_tool_calls: + $ref: "#/components/schemas/ParallelToolCalls" + user: *end_user_param_configuration + function_call: + deprecated: true + description: | + Deprecated in favor of `tool_choice`. + + Controls which (if any) function is called by the model. + `none` means the model will not call a function and instead generates a message. + `auto` means the model can pick between generating a message or calling a function. + Specifying a particular function via `{"name": "my_function"}` forces the model to call that function. + + `none` is the default when no functions are present. `auto` is the default if functions are present. + oneOf: + - type: string + description: > + `none` means the model will not call a function and instead generates a message. + `auto` means the model can pick between generating a message or calling a function. + enum: [none, auto] + - $ref: "#/components/schemas/ChatCompletionFunctionCallOption" + x-oaiExpandable: true + functions: + deprecated: true + description: | + Deprecated in favor of `tools`. + + A list of functions the model may generate JSON inputs for. + type: array + minItems: 1 + maxItems: 128 + items: + $ref: "#/components/schemas/ChatCompletionFunctions" + + required: + - model + - messages + + CreateChatCompletionResponse: + type: object + description: Represents a chat completion response returned by model, based on the provided input. + properties: + id: + type: string + description: A unique identifier for the chat completion. + choices: + type: array + description: A list of chat completion choices. Can be more than one if `n` is greater than 1. + items: + type: object + required: + - finish_reason + - index + - message + - logprobs + properties: + finish_reason: + type: string + description: &chat_completion_finish_reason_description | + The reason the model stopped generating tokens. This will be `stop` if the model hit a natural stop point or a provided stop sequence, + `length` if the maximum number of tokens specified in the request was reached, + `content_filter` if content was omitted due to a flag from our content filters, + `tool_calls` if the model called a tool, or `function_call` (deprecated) if the model called a function. + enum: + [ + "stop", + "length", + "tool_calls", + "content_filter", + "function_call", + ] + index: + type: integer + description: The index of the choice in the list of choices. + message: + $ref: "#/components/schemas/ChatCompletionResponseMessage" + logprobs: &chat_completion_response_logprobs + description: Log probability information for the choice. + type: object + nullable: true + properties: + content: + description: A list of message content tokens with log probability information. + type: array + items: + $ref: "#/components/schemas/ChatCompletionTokenLogprob" + nullable: true + required: + - content + created: + type: integer + description: The Unix timestamp (in seconds) of when the chat completion was created. + model: + type: string + description: The model used for the chat completion. + service_tier: + description: The service tier used for processing the request. This field is only included if the `service_tier` parameter is specified in the request. + type: string + enum: ["scale", "default"] + example: "scale" + nullable: true + system_fingerprint: + type: string + description: | + This fingerprint represents the backend configuration that the model runs with. + + Can be used in conjunction with the `seed` request parameter to understand when backend changes have been made that might impact determinism. + object: + type: string + description: The object type, which is always `chat.completion`. + enum: [chat.completion] + usage: + $ref: "#/components/schemas/CompletionUsage" + required: + - choices + - created + - id + - model + - object + x-oaiMeta: + name: The chat completion object + group: chat + example: *chat_completion_example + + CreateChatCompletionFunctionResponse: + type: object + description: Represents a chat completion response returned by model, based on the provided input. + properties: + id: + type: string + description: A unique identifier for the chat completion. + choices: + type: array + description: A list of chat completion choices. Can be more than one if `n` is greater than 1. + items: + type: object + required: + - finish_reason + - index + - message + - logprobs + properties: + finish_reason: + type: string + description: + &chat_completion_function_finish_reason_description | + The reason the model stopped generating tokens. This will be `stop` if the model hit a natural stop point or a provided stop sequence, `length` if the maximum number of tokens specified in the request was reached, `content_filter` if content was omitted due to a flag from our content filters, or `function_call` if the model called a function. + enum: + ["stop", "length", "function_call", "content_filter"] + index: + type: integer + description: The index of the choice in the list of choices. + message: + $ref: "#/components/schemas/ChatCompletionResponseMessage" + created: + type: integer + description: The Unix timestamp (in seconds) of when the chat completion was created. + model: + type: string + description: The model used for the chat completion. + system_fingerprint: + type: string + description: | + This fingerprint represents the backend configuration that the model runs with. + + Can be used in conjunction with the `seed` request parameter to understand when backend changes have been made that might impact determinism. + object: + type: string + description: The object type, which is always `chat.completion`. + enum: [chat.completion] + usage: + $ref: "#/components/schemas/CompletionUsage" + required: + - choices + - created + - id + - model + - object + x-oaiMeta: + name: The chat completion object + group: chat + example: *chat_completion_function_example + + ChatCompletionTokenLogprob: + type: object + properties: + token: &chat_completion_response_logprobs_token + description: The token. + type: string + logprob: &chat_completion_response_logprobs_token_logprob + description: The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value `-9999.0` is used to signify that the token is very unlikely. + type: number + bytes: &chat_completion_response_logprobs_bytes + description: A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be `null` if there is no bytes representation for the token. + type: array + items: + type: integer + nullable: true + top_logprobs: + description: List of the most likely tokens and their log probability, at this token position. In rare cases, there may be fewer than the number of requested `top_logprobs` returned. + type: array + items: + type: object + properties: + token: *chat_completion_response_logprobs_token + logprob: *chat_completion_response_logprobs_token_logprob + bytes: *chat_completion_response_logprobs_bytes + required: + - token + - logprob + - bytes + required: + - token + - logprob + - bytes + - top_logprobs + + CreateChatCompletionStreamResponse: + type: object + description: Represents a streamed chunk of a chat completion response returned by model, based on the provided input. + properties: + id: + type: string + description: A unique identifier for the chat completion. Each chunk has the same ID. + choices: + type: array + description: | + A list of chat completion choices. Can contain more than one elements if `n` is greater than 1. Can also be empty for the + last chunk if you set `stream_options: {"include_usage": true}`. + items: + type: object + required: + - delta + - finish_reason + - index + properties: + delta: + $ref: "#/components/schemas/ChatCompletionStreamResponseDelta" + logprobs: *chat_completion_response_logprobs + finish_reason: + type: string + description: *chat_completion_finish_reason_description + enum: + [ + "stop", + "length", + "tool_calls", + "content_filter", + "function_call", + ] + nullable: true + index: + type: integer + description: The index of the choice in the list of choices. + created: + type: integer + description: The Unix timestamp (in seconds) of when the chat completion was created. Each chunk has the same timestamp. + model: + type: string + description: The model to generate the completion. + service_tier: + description: The service tier used for processing the request. This field is only included if the `service_tier` parameter is specified in the request. + type: string + enum: ["scale", "default"] + example: "scale" + nullable: true + system_fingerprint: + type: string + description: | + This fingerprint represents the backend configuration that the model runs with. + Can be used in conjunction with the `seed` request parameter to understand when backend changes have been made that might impact determinism. + object: + type: string + description: The object type, which is always `chat.completion.chunk`. + enum: [chat.completion.chunk] + usage: + type: object + description: | + An optional field that will only be present when you set `stream_options: {"include_usage": true}` in your request. + When present, it contains a null value except for the last chunk which contains the token usage statistics for the entire request. + properties: + completion_tokens: + type: integer + description: Number of tokens in the generated completion. + prompt_tokens: + type: integer + description: Number of tokens in the prompt. + total_tokens: + type: integer + description: Total number of tokens used in the request (prompt + completion). + required: + - prompt_tokens + - completion_tokens + - total_tokens + required: + - choices + - created + - id + - model + - object + x-oaiMeta: + name: The chat completion chunk object + group: chat + example: *chat_completion_chunk_example + + CreateChatCompletionImageResponse: + type: object + description: Represents a streamed chunk of a chat completion response returned by model, based on the provided input. + x-oaiMeta: + name: The chat completion chunk object + group: chat + example: *chat_completion_image_example + + CreateImageRequest: + type: object + properties: + prompt: + description: A text description of the desired image(s). The maximum length is 1000 characters for `dall-e-2` and 4000 characters for `dall-e-3`. + type: string + example: "A cute baby sea otter" + model: + anyOf: + - type: string + - type: string + enum: ["dall-e-2", "dall-e-3"] + x-oaiTypeLabel: string + example: "dall-e-3" + nullable: true + description: The model to use for image generation. + n: &images_n + type: integer + minimum: 1 + maximum: 10 + default: 1 + example: 1 + nullable: true + description: The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only `n=1` is supported. + quality: + type: string + enum: ["standard", "hd"] + default: "standard" + example: "standard" + description: The quality of the image that will be generated. `hd` creates images with finer details and greater consistency across the image. This param is only supported for `dall-e-3`. + response_format: &images_response_format + type: string + enum: ["url", "b64_json"] + default: "url" + example: "url" + nullable: true + description: The format in which the generated images are returned. Must be one of `url` or `b64_json`. URLs are only valid for 60 minutes after the image has been generated. + size: &images_size + type: string + enum: ["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"] + default: "1024x1024" + example: "1024x1024" + nullable: true + description: The size of the generated images. Must be one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`. Must be one of `1024x1024`, `1792x1024`, or `1024x1792` for `dall-e-3` models. + style: + type: string + enum: ["vivid", "natural"] + default: "vivid" + example: "vivid" + nullable: true + description: The style of the generated images. Must be one of `vivid` or `natural`. Vivid causes the model to lean towards generating hyper-real and dramatic images. Natural causes the model to produce more natural, less hyper-real looking images. This param is only supported for `dall-e-3`. + user: *end_user_param_configuration + required: + - prompt + + ImagesResponse: + properties: + created: + type: integer + data: + type: array + items: + $ref: "#/components/schemas/Image" + required: + - created + - data + + Image: + type: object + description: Represents the url or the content of an image generated by the OpenAI API. + properties: + b64_json: + type: string + description: The base64-encoded JSON of the generated image, if `response_format` is `b64_json`. + url: + type: string + description: The URL of the generated image, if `response_format` is `url` (default). + revised_prompt: + type: string + description: The prompt that was used to generate the image, if there was any revision to the prompt. + x-oaiMeta: + name: The image object + example: | + { + "url": "...", + "revised_prompt": "..." + } + + CreateEmbeddingRequest: + type: object + additionalProperties: false + properties: + input: + description: | + Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays. The input must not exceed the max input tokens for the model (8192 tokens for `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048 dimensions or less. [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) for counting tokens. + example: "The quick brown fox jumped over the lazy dog" + oneOf: + - type: string + title: string + description: The string that will be turned into an embedding. + default: "" + example: "This is a test." + - type: array + title: array + description: The array of strings that will be turned into an embedding. + minItems: 1 + maxItems: 2048 + items: + type: string + default: "" + example: "['This is a test.']" + - type: array + title: array + description: The array of integers that will be turned into an embedding. + minItems: 1 + maxItems: 2048 + items: + type: integer + example: "[1212, 318, 257, 1332, 13]" + - type: array + title: array + description: The array of arrays containing integers that will be turned into an embedding. + minItems: 1 + maxItems: 2048 + items: + type: array + minItems: 1 + items: + type: integer + example: "[[1212, 318, 257, 1332, 13]]" + x-oaiExpandable: true + model: + description: *model_description + example: "text-embedding-3-small" + anyOf: + - type: string + - type: string + enum: + [ + "text-embedding-ada-002", + "text-embedding-3-small", + "text-embedding-3-large", + ] + x-oaiTypeLabel: string + encoding_format: + description: "The format to return the embeddings in. Can be either `float` or [`base64`](https://pypi.org/project/pybase64/)." + example: "float" + default: "float" + type: string + enum: ["float", "base64"] + dimensions: + description: | + The number of dimensions the resulting output embeddings should have. Only supported in `text-embedding-3` and later models. + type: integer + minimum: 1 + user: *end_user_param_configuration + required: + - model + - input + + CreateEmbeddingResponse: + type: object + properties: + data: + type: array + description: The list of embeddings generated by the model. + items: + $ref: "#/components/schemas/Embedding" + model: + type: string + description: The name of the model used to generate the embedding. + object: + type: string + description: The object type, which is always "list". + enum: [list] + usage: + type: object + description: The usage information for the request. + properties: + prompt_tokens: + type: integer + description: The number of tokens used by the prompt. + total_tokens: + type: integer + description: The total number of tokens used by the request. + required: + - prompt_tokens + - total_tokens + required: + - object + - model + - data + - usage + + Model: + title: Model + description: Describes an OpenAI model offering that can be used with the API. + properties: + id: + type: string + description: The model identifier, which can be referenced in the API endpoints. + created: + type: integer + description: The Unix timestamp (in seconds) when the model was created. + object: + type: string + description: The object type, which is always "model". + enum: [model] + owned_by: + type: string + description: The organization that owns the model. + required: + - id + - object + - created + - owned_by + x-oaiMeta: + name: The model object + example: *retrieve_model_response + + Embedding: + type: object + description: | + Represents an embedding vector returned by embedding endpoint. + properties: + index: + type: integer + description: The index of the embedding in the list of embeddings. + embedding: + type: array + description: | + The embedding vector, which is a list of floats. The length of vector depends on the model as listed in the [embedding guide](/docs/guides/embeddings). + items: + type: number + object: + type: string + description: The object type, which is always "embedding". + enum: [embedding] + required: + - index + - object + - embedding + x-oaiMeta: + name: The embedding object + example: | + { + "object": "embedding", + "embedding": [ + 0.0023064255, + -0.009327292, + .... (1536 floats total for ada-002) + -0.0028842222, + ], + "index": 0 + } + + CompletionUsage: + type: object + description: Usage statistics for the completion request. + properties: + completion_tokens: + type: integer + description: Number of tokens in the generated completion. + prompt_tokens: + type: integer + description: Number of tokens in the prompt. + total_tokens: + type: integer + description: Total number of tokens used in the request (prompt + completion). + required: + - prompt_tokens + - completion_tokens + - total_tokens + + RunCompletionUsage: + type: object + description: Usage statistics related to the run. This value will be `null` if the run is not in a terminal state (i.e. `in_progress`, `queued`, etc.). + properties: + completion_tokens: + type: integer + description: Number of completion tokens used over the course of the run. + prompt_tokens: + type: integer + description: Number of prompt tokens used over the course of the run. + total_tokens: + type: integer + description: Total number of tokens used (prompt + completion). + required: + - prompt_tokens + - completion_tokens + - total_tokens + nullable: true + + RunStepCompletionUsage: + type: object + description: Usage statistics related to the run step. This value will be `null` while the run step's status is `in_progress`. + properties: + completion_tokens: + type: integer + description: Number of completion tokens used over the course of the run step. + prompt_tokens: + type: integer + description: Number of prompt tokens used over the course of the run step. + total_tokens: + type: integer + description: Total number of tokens used (prompt + completion). + required: + - prompt_tokens + - completion_tokens + - total_tokens + nullable: true + + ErrorEvent: + type: object + properties: + event: + type: string + enum: ["error"] + data: + $ref: "#/components/schemas/Error" + required: + - event + - data + description: Occurs when an [error](/docs/guides/error-codes/api-errors) occurs. This can happen due to an internal server error or a timeout. + x-oaiMeta: + dataDescription: "`data` is an [error](/docs/guides/error-codes/api-errors)" + + DoneEvent: + type: object + properties: + event: + type: string + enum: ["done"] + data: + type: string + enum: ["[DONE]"] + required: + - event + - data + description: Occurs when a stream ends. + x-oaiMeta: + dataDescription: "`data` is `[DONE]`" + +security: + - ApiKeyAuth: [] + +x-oaiMeta: + navigationGroups: + - id: endpoints + title: Endpoints + - id: assistants + title: Assistants + - id: legacy + title: Legacy + groups: + # > General Notes + # The `groups` section is used to generate the API reference pages and navigation, in the same + # order listed below. Additionally, each `group` can have a list of `sections`, each of which + # will become a navigation subroute and subsection under the group. Each section has: + # - `type`: Currently, either an `endpoint` or `object`, depending on how the section needs to + # be rendered + # - `key`: The reference key that can be used to lookup the section definition + # - `path`: The path (url) of the section, which is used to generate the navigation link. + # + # > The `object` sections maps to a schema component and the following fields are read for rendering + # - `x-oaiMeta.name`: The name of the object, which will become the section title + # - `x-oaiMeta.example`: The example object, which will be used to generate the example sample (always JSON) + # - `description`: The description of the object, which will be used to generate the section description + # + # > The `endpoint` section maps to an operation path and the following fields are read for rendering: + # - `x-oaiMeta.name`: The name of the endpoint, which will become the section title + # - `x-oaiMeta.examples`: The endpoint examples, which can be an object (meaning a single variation, most + # endpoints, or an array of objects, meaning multiple variations, e.g. the + # chat completion and completion endpoints, with streamed and non-streamed examples. + # - `x-oaiMeta.returns`: text describing what the endpoint returns. + # - `summary`: The summary of the endpoint, which will be used to generate the section description + - id: audio + title: Audio + description: | + Learn how to turn audio into text or text into audio. + + Related guide: [Speech to text](/docs/guides/speech-to-text) + navigationGroup: endpoints + sections: + - type: endpoint + key: createSpeech + path: createSpeech + - type: endpoint + key: createTranscription + path: createTranscription + - type: endpoint + key: createTranslation + path: createTranslation + - type: object + key: CreateTranscriptionResponseJson + path: json-object + - type: object + key: CreateTranscriptionResponseVerboseJson + path: verbose-json-object + - id: chat + title: Chat + description: | + Given a list of messages comprising a conversation, the model will return a response. + + Related guide: [Chat Completions](/docs/guides/text-generation) + navigationGroup: endpoints + sections: + - type: endpoint + key: createChatCompletion + path: create + - type: object + key: CreateChatCompletionResponse + path: object + - type: object + key: CreateChatCompletionStreamResponse + path: streaming + - id: embeddings + title: Embeddings + description: | + Get a vector representation of a given input that can be easily consumed by machine learning models and algorithms. + + Related guide: [Embeddings](/docs/guides/embeddings) + navigationGroup: endpoints + sections: + - type: endpoint + key: createEmbedding + path: create + - type: object + key: Embedding + path: object + - id: fine-tuning + title: Fine-tuning + description: | + Manage fine-tuning jobs to tailor a model to your specific training data. + + Related guide: [Fine-tune models](/docs/guides/fine-tuning) + navigationGroup: endpoints + sections: + - type: endpoint + key: createFineTuningJob + path: create + - type: endpoint + key: listPaginatedFineTuningJobs + path: list + - type: endpoint + key: listFineTuningEvents + path: list-events + - type: endpoint + key: listFineTuningJobCheckpoints + path: list-checkpoints + - type: endpoint + key: retrieveFineTuningJob + path: retrieve + - type: endpoint + key: cancelFineTuningJob + path: cancel + - type: object + key: FinetuneChatRequestInput + path: chat-input + - type: object + key: FinetuneCompletionRequestInput + path: completions-input + - type: object + key: FineTuningJob + path: object + - type: object + key: FineTuningJobEvent + path: event-object + - type: object + key: FineTuningJobCheckpoint + path: checkpoint-object + - id: batch + title: Batch + description: | + Create large batches of API requests for asynchronous processing. The Batch API returns completions within 24 hours for a 50% discount. + + Related guide: [Batch](/docs/guides/batch) + navigationGroup: endpoints + sections: + - type: endpoint + key: createBatch + path: create + - type: endpoint + key: retrieveBatch + path: retrieve + - type: endpoint + key: cancelBatch + path: cancel + - type: endpoint + key: listBatches + path: list + - type: object + key: Batch + path: object + - type: object + key: BatchRequestInput + path: request-input + - type: object + key: BatchRequestOutput + path: request-output + - id: files + title: Files + description: | + Files are used to upload documents that can be used with features like [Assistants](/docs/api-reference/assistants), [Fine-tuning](/docs/api-reference/fine-tuning), and [Batch API](/docs/guides/batch). + navigationGroup: endpoints + sections: + - type: endpoint + key: createFile + path: create + - type: endpoint + key: listFiles + path: list + - type: endpoint + key: retrieveFile + path: retrieve + - type: endpoint + key: deleteFile + path: delete + - type: endpoint + key: downloadFile + path: retrieve-contents + - type: object + key: OpenAIFile + path: object + - id: uploads + title: Uploads + description: | + Allows you to upload large files in multiple parts. + navigationGroup: endpoints + sections: + - type: endpoint + key: createUpload + path: create + - type: endpoint + key: addUploadPart + path: add-part + - type: endpoint + key: completeUpload + path: complete + - type: endpoint + key: cancelUpload + path: cancel + - type: object + key: Upload + path: object + - type: object + key: UploadPart + path: part-object + - id: images + title: Images + description: | + Given a prompt and/or an input image, the model will generate a new image. + + Related guide: [Image generation](/docs/guides/images) + navigationGroup: endpoints + sections: + - type: endpoint + key: createImage + path: create + - type: endpoint + key: createImageEdit + path: createEdit + - type: endpoint + key: createImageVariation + path: createVariation + - type: object + key: Image + path: object + - id: models + title: Models + description: | + List and describe the various models available in the API. You can refer to the [Models](/docs/models) documentation to understand what models are available and the differences between them. + navigationGroup: endpoints + sections: + - type: endpoint + key: listModels + path: list + - type: endpoint + key: retrieveModel + path: retrieve + - type: endpoint + key: deleteModel + path: delete + - type: object + key: Model + path: object + - id: moderations + title: Moderations + description: | + Given some input text, outputs if the model classifies it as potentially harmful across several categories. + + Related guide: [Moderations](/docs/guides/moderation) + navigationGroup: endpoints + sections: + - type: endpoint + key: createModeration + path: create + - type: object + key: CreateModerationResponse + path: object + - id: assistants + title: Assistants + beta: true + description: | + Build assistants that can call models and use tools to perform tasks. + + [Get started with the Assistants API](/docs/assistants) + navigationGroup: assistants + sections: + - type: endpoint + key: createAssistant + path: createAssistant + - type: endpoint + key: listAssistants + path: listAssistants + - type: endpoint + key: getAssistant + path: getAssistant + - type: endpoint + key: modifyAssistant + path: modifyAssistant + - type: endpoint + key: deleteAssistant + path: deleteAssistant + - type: object + key: AssistantObject + path: object + - id: threads + title: Threads + beta: true + description: | + Create threads that assistants can interact with. + + Related guide: [Assistants](/docs/assistants/overview) + navigationGroup: assistants + sections: + - type: endpoint + key: createThread + path: createThread + - type: endpoint + key: getThread + path: getThread + - type: endpoint + key: modifyThread + path: modifyThread + - type: endpoint + key: deleteThread + path: deleteThread + - type: object + key: ThreadObject + path: object + - id: messages + title: Messages + beta: true + description: | + Create messages within threads + + Related guide: [Assistants](/docs/assistants/overview) + navigationGroup: assistants + sections: + - type: endpoint + key: createMessage + path: createMessage + - type: endpoint + key: listMessages + path: listMessages + - type: endpoint + key: getMessage + path: getMessage + - type: endpoint + key: modifyMessage + path: modifyMessage + - type: endpoint + key: deleteMessage + path: deleteMessage + - type: object + key: MessageObject + path: object + - id: runs + title: Runs + beta: true + description: | + Represents an execution run on a thread. + + Related guide: [Assistants](/docs/assistants/overview) + navigationGroup: assistants + sections: + - type: endpoint + key: createRun + path: createRun + - type: endpoint + key: createThreadAndRun + path: createThreadAndRun + - type: endpoint + key: listRuns + path: listRuns + - type: endpoint + key: getRun + path: getRun + - type: endpoint + key: modifyRun + path: modifyRun + - type: endpoint + key: submitToolOuputsToRun + path: submitToolOutputs + - type: endpoint + key: cancelRun + path: cancelRun + - type: object + key: RunObject + path: object + - id: run-steps + title: Run Steps + beta: true + description: | + Represents the steps (model and tool calls) taken during the run. + + Related guide: [Assistants](/docs/assistants/overview) + navigationGroup: assistants + sections: + - type: endpoint + key: listRunSteps + path: listRunSteps + - type: endpoint + key: getRunStep + path: getRunStep + - type: object + key: RunStepObject + path: step-object + - id: vector-stores + title: Vector Stores + beta: true + description: | + Vector stores are used to store files for use by the `file_search` tool. + + Related guide: [File Search](/docs/assistants/tools/file-search) + navigationGroup: assistants + sections: + - type: endpoint + key: createVectorStore + path: create + - type: endpoint + key: listVectorStores + path: list + - type: endpoint + key: getVectorStore + path: retrieve + - type: endpoint + key: modifyVectorStore + path: modify + - type: endpoint + key: deleteVectorStore + path: delete + - type: object + key: VectorStoreObject + path: object + - id: vector-stores-files + title: Vector Store Files + beta: true + description: | + Vector store files represent files inside a vector store. + + Related guide: [File Search](/docs/assistants/tools/file-search) + navigationGroup: assistants + sections: + - type: endpoint + key: createVectorStoreFile + path: createFile + - type: endpoint + key: listVectorStoreFiles + path: listFiles + - type: endpoint + key: getVectorStoreFile + path: getFile + - type: endpoint + key: deleteVectorStoreFile + path: deleteFile + - type: object + key: VectorStoreFileObject + path: file-object + - id: vector-stores-file-batches + title: Vector Store File Batches + beta: true + description: | + Vector store file batches represent operations to add multiple files to a vector store. + + Related guide: [File Search](/docs/assistants/tools/file-search) + navigationGroup: assistants + sections: + - type: endpoint + key: createVectorStoreFileBatch + path: createBatch + - type: endpoint + key: getVectorStoreFileBatch + path: getBatch + - type: endpoint + key: cancelVectorStoreFileBatch + path: cancelBatch + - type: endpoint + key: listFilesInVectorStoreBatch + path: listBatchFiles + - type: object + key: VectorStoreFileBatchObject + path: batch-object + - id: assistants-streaming + title: Streaming + beta: true + description: | + Stream the result of executing a Run or resuming a Run after submitting tool outputs. + + You can stream events from the [Create Thread and Run](/docs/api-reference/runs/createThreadAndRun), + [Create Run](/docs/api-reference/runs/createRun), and [Submit Tool Outputs](/docs/api-reference/runs/submitToolOutputs) + endpoints by passing `"stream": true`. The response will be a [Server-Sent events](https://html.spec.whatwg.org/multipage/server-sent-events.html#server-sent-events) stream. + + Our Node and Python SDKs provide helpful utilities to make streaming easy. Reference the + [Assistants API quickstart](/docs/assistants/overview) to learn more. + navigationGroup: assistants + sections: + - type: object + key: MessageDeltaObject + path: message-delta-object + - type: object + key: RunStepDeltaObject + path: run-step-delta-object + - type: object + key: AssistantStreamEvent + path: events + - id: completions + title: Completions + legacy: true + navigationGroup: legacy + description: | + Given a prompt, the model will return one or more predicted completions along with the probabilities of alternative tokens at each position. Most developer should use our [Chat Completions API](/docs/guides/text-generation/text-generation-models) to leverage our best and newest models. + sections: + - type: endpoint + key: createCompletion + path: create + - type: object + key: CreateCompletionResponse + path: object diff --git a/jlama-net/pom.xml b/jlama-net/pom.xml index 39d2406..f1d7b15 100644 --- a/jlama-net/pom.xml +++ b/jlama-net/pom.xml @@ -12,6 +12,26 @@ jlama-net Jlama Net + + UTF-8 + 1.6.14 + 5.3.1 + 3.3.2 + 3.0.4 + + + + + + org.springframework.boot + spring-boot-dependencies + pom + ${spring-boot.version} + import + + + + com.github.tjake @@ -47,12 +67,144 @@ 6.0.53 provided + + + org.springframework.boot + spring-boot-starter-web + + + org.springframework.boot + spring-boot-starter-tomcat + + + + + org.springframework.boot + spring-boot-starter-undertow + + + org.springframework.boot + spring-boot-starter-logging + + + + org.springframework.data + spring-data-commons + 3.3.2 + + + + org.springdoc + springdoc-openapi-ui + ${springdoc.version} + + + + com.google.code.findbugs + jsr305 + 3.0.2 + + + com.fasterxml.jackson.dataformat + jackson-dataformat-yaml + 2.15.2 + + + com.fasterxml.jackson.datatype + jackson-datatype-jsr310 + 2.15.2 + + + org.openapitools + jackson-databind-nullable + 0.2.6 + + + + + org.glassfish.jersey.core + jersey-client + ${jersey-version} + + + org.glassfish.jersey.connectors + jersey-apache-connector + ${jersey-version} + + + org.glassfish.jersey.inject + jersey-hk2 + ${jersey-version} + + + org.glassfish.jersey.media + jersey-media-multipart + ${jersey-version} + + + org.glassfish.jersey.media + jersey-media-json-jackson + ${jersey-version} + + + + + + jakarta.annotation + jakarta.annotation-api + 2.1.1 + + + + jakarta.servlet + jakarta.servlet-api + 6.1.0 + + + + + + org.hibernate.validator + hibernate-validator + 8.0.1.Final + + + org.hibernate.validator + hibernate-validator-annotation-processor + 8.0.1.Final + + + org.springframework.boot + spring-boot-starter-validation + + + com.fasterxml.jackson.core + jackson-databind + 2.15.2 + + + + + + org.springframework.boot + spring-boot-starter-test + test + + + io.grpc grpc-testing 1.59.0 test + + + io.github.stefanbratanov + jvm-openai + 0.10.0 + test + @@ -64,6 +216,25 @@ + + org.codehaus.mojo + build-helper-maven-plugin + 3.2.0 + + + add-source + generate-sources + + add-source + + + + ${project.build.directory}/generated-sources/openapi/java/main + + + + + org.xolstice.maven.plugins protobuf-maven-plugin @@ -86,6 +257,68 @@ + + org.openapitools + openapi-generator-maven-plugin + 7.7.0 + + + + generate + + + ${project.basedir}/openai_spec.yaml + java + jersey3 + true + false + true + false + com.github.tjake.jlama.net.openai.api + com.github.tjake.jlama.net.openai.model + ${project.build.directory}/generated-sources/openapi + + true + true + true + true + true + true + false + true + true + jackson + true + true + true + true + none + java/main + + + + + + + maven-surefire-plugin + 3.3.1 + + false + + + + default-test + test + + test + + + + true + + + + \ No newline at end of file diff --git a/jlama-net/src/main/java/com/github/tjake/jlama/net/openai/OpenAIChatService.java b/jlama-net/src/main/java/com/github/tjake/jlama/net/openai/OpenAIChatService.java new file mode 100644 index 0000000..c820c31 --- /dev/null +++ b/jlama-net/src/main/java/com/github/tjake/jlama/net/openai/OpenAIChatService.java @@ -0,0 +1,136 @@ +package com.github.tjake.jlama.net.openai; + +import com.github.tjake.jlama.model.AbstractModel; +import com.github.tjake.jlama.model.functions.Generator; +import com.github.tjake.jlama.net.openai.model.*; + +import com.github.tjake.jlama.safetensors.tokenizer.PromptSupport; +import jakarta.validation.Valid; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.http.HttpStatus; +import org.springframework.http.HttpStatusCode; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; +import org.springframework.validation.annotation.Validated; +import org.springframework.web.HttpMediaTypeNotAcceptableException; +import org.springframework.web.bind.annotation.*; +import org.springframework.web.servlet.mvc.method.annotation.SseEmitter; + +import java.io.IOException; +import java.util.List; +import java.util.UUID; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.atomic.AtomicInteger; + +@RestController +@Validated +public class OpenAIChatService { + + @Autowired + private AbstractModel model; + + /** + * POST /chat/completions : Creates a model response for the given chat conversation. + * + * @param request (required) + * @return OK (status code 200) + */ + @RequestMapping( + method = RequestMethod.POST, + value = "/chat/completions", + produces = { "application/json", "text/event-stream" }, + consumes = { "application/json" } + ) + Object createChatCompletion( + @Valid @RequestBody CreateChatCompletionRequest request + ) { + + List messages = request.getMessages(); + + if (model.promptSupport().isEmpty()) { + return new ResponseEntity<>(HttpStatus.BAD_GATEWAY); + } + + UUID id = UUID.randomUUID(); + PromptSupport.Builder builder = model.promptSupport().get().newBuilder(); + + for (ChatCompletionRequestMessage m : messages) { + + if (m.getActualInstance() instanceof ChatCompletionRequestUserMessage) { + ChatCompletionRequestUserMessageContent content = m.getChatCompletionRequestUserMessage().getContent(); + + if (content.getActualInstance() instanceof String) { + builder.addUserMessage(content.getString()); + } else { + for (ChatCompletionRequestMessageContentPart p : content.getListChatCompletionRequestMessageContentPart()) { + if (p.getActualInstance() instanceof ChatCompletionRequestMessageContentPartText) { + builder.addUserMessage(p.getChatCompletionRequestMessageContentPartText().getText()); + } else { + //We don't support other types of content... yet... + return new ResponseEntity<>(HttpStatus.NOT_IMPLEMENTED); + } + } + } + } else if (m.getActualInstance() instanceof ChatCompletionRequestSystemMessage) { + builder.addSystemMessage(m.getChatCompletionRequestSystemMessage().getContent()); + } else if (m.getActualInstance() instanceof ChatCompletionRequestAssistantMessage) { + builder.addAssistantMessage(m.getChatCompletionRequestAssistantMessage().getContent()); + } else { + return new ResponseEntity<>(HttpStatus.NOT_IMPLEMENTED); + } + } + + float temperature = request.getTemperature() == null ? 0.3f : request.getTemperature().floatValue(); + int maxTokens = request.getMaxTokens() == null ? 1024 : request.getMaxTokens(); + + AtomicInteger index = new AtomicInteger(0); + if (request.getStream() != null && request.getStream()) { + SseEmitter emitter = new SseEmitter(); + CompletableFuture.supplyAsync( () -> model.generate(id, builder.build(), temperature, maxTokens, false, + (t, f) -> { + try { + emitter.send( + new CreateChatCompletionStreamResponse() + .id(id.toString()) + .choices(List.of(new CreateChatCompletionStreamResponseChoicesInner() + .index(index.getAndIncrement()) + .delta(new ChatCompletionStreamResponseDelta() + .content(t)))) + ); + } catch (IOException e) { + emitter.completeWithError(e); + } + })) + .handle((r, ex) -> { + try { + emitter.send(new CreateChatCompletionStreamResponse() + .id(id.toString()) + .choices(List.of(new CreateChatCompletionStreamResponseChoicesInner() + .finishReason(CreateChatCompletionStreamResponseChoicesInner.FinishReasonEnum.STOP))) + ); + + emitter.complete(); + } catch (IOException e) { + emitter.completeWithError(e); + } + + return null; + }); + + return emitter; + } + else + { + Generator.Response r = model.generate(id, builder.build(), temperature, maxTokens, false, (s, f) -> {}); + + CreateChatCompletionResponse out = new CreateChatCompletionResponse() + .id(id.toString()) + .choices(List.of(new CreateChatCompletionResponseChoicesInner() + .finishReason(CreateChatCompletionResponseChoicesInner.FinishReasonEnum.STOP) + .message(new ChatCompletionResponseMessage().content(r.text)))); + + return new ResponseEntity<>(out, HttpStatus.OK); + } + } + +} \ No newline at end of file diff --git a/jlama-net/src/test/java/com/github/tjake/jlama/net/JlamaServiceTest.java b/jlama-net/src/test/java/com/github/tjake/jlama/net/JlamaServiceTest.java index 79e19c5..3fbd698 100644 --- a/jlama-net/src/test/java/com/github/tjake/jlama/net/JlamaServiceTest.java +++ b/jlama-net/src/test/java/com/github/tjake/jlama/net/JlamaServiceTest.java @@ -123,7 +123,7 @@ public void testNorm() { assertThat(response4.resultNow().getSumSq()).isEqualTo(40);*/ } - class MockConfig extends Config { + public static class MockConfig extends Config { public MockConfig( int contextLength, int embeddingLength, @@ -148,7 +148,7 @@ public MockConfig( } } - class MockWeightLoader implements WeightLoader { + public static class MockWeightLoader implements WeightLoader { @Override public Map metadata() { return Collections.emptyMap(); @@ -173,7 +173,7 @@ public DType getModelDType() { public void close() throws Exception {} } - class MockTokenizer implements Tokenizer { + public static class MockTokenizer implements Tokenizer { @Override public List tokenize(String sentence) { @@ -201,8 +201,8 @@ public Optional promptSupport() { } } - class MockModel extends AbstractModel { - protected MockModel(Config c) { + public static class MockModel extends AbstractModel { + public MockModel(Config c) { super( InferenceType.INPUT_TO_EMBEDDING, c, diff --git a/jlama-net/src/test/java/com/github/tjake/jlama/net/openai/ChatApiTest.java b/jlama-net/src/test/java/com/github/tjake/jlama/net/openai/ChatApiTest.java new file mode 100644 index 0000000..66f0f6d --- /dev/null +++ b/jlama-net/src/test/java/com/github/tjake/jlama/net/openai/ChatApiTest.java @@ -0,0 +1,80 @@ +package com.github.tjake.jlama.net.openai; + + +import io.github.stefanbratanov.jvm.openai.*; +import org.json.JSONException; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.skyscreamer.jsonassert.JSONAssert; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.boot.test.web.client.TestRestTemplate; +import org.springframework.boot.test.web.server.LocalServerPort; +import org.springframework.http.HttpEntity; +import org.springframework.http.HttpHeaders; +import org.springframework.http.HttpMethod; +import org.springframework.http.ResponseEntity; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.test.context.junit.jupiter.SpringExtension; + +import java.math.BigDecimal; +import java.util.List; + +@ExtendWith(SpringExtension.class) +@SpringBootTest(classes = MockedOpenAIServer.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT, useMainMethod = SpringBootTest.UseMainMethod.ALWAYS) +public class ChatApiTest { + @LocalServerPort + private int port; + + TestRestTemplate restTemplate = new TestRestTemplate(); + + HttpHeaders headers = new HttpHeaders(); + + @Test + public void testChatCompletion() throws JSONException { + + OpenAI openAI = OpenAI.newBuilder("Fake key") + .baseUrl("http://localhost:" + port) + .build(); + + ChatClient client = openAI.chatClient(); + + CreateChatCompletionRequest request = CreateChatCompletionRequest.newBuilder() + .model(OpenAIModel.GPT_3_5_TURBO) + .stream(false) + .temperature(0.0f) + .message(ChatMessage.userMessage("Who won the world series in 2020?")) + .build(); + + ChatCompletion response = client.createChatCompletion(request); + + System.err.println(response); + + //JSONAssert.assertEquals(null, response.getBody(), false); + } + + @Test + public void testStreamingChatCompletion() throws JSONException { + + OpenAI openAI = OpenAI.newBuilder("Fake key") + .baseUrl("http://localhost:" + port) + .build(); + + ChatClient client = openAI.chatClient(); + + CreateChatCompletionRequest request = CreateChatCompletionRequest.newBuilder() + .model(OpenAIModel.GPT_3_5_TURBO) + .stream(true) + .temperature(0.0f) + .message(ChatMessage.userMessage("Who won the world series in 2020?")) + .build(); + + client.streamChatCompletion(request).forEach(System.err::println); + + + //JSONAssert.assertEquals(null, response.getBody(), false); + } + + private String createURLWithPort(String uri) { + return "http://localhost:" + port + uri; + } +} diff --git a/jlama-net/src/test/java/com/github/tjake/jlama/net/openai/MockedOpenAIServer.java b/jlama-net/src/test/java/com/github/tjake/jlama/net/openai/MockedOpenAIServer.java new file mode 100644 index 0000000..3cd283d --- /dev/null +++ b/jlama-net/src/test/java/com/github/tjake/jlama/net/openai/MockedOpenAIServer.java @@ -0,0 +1,38 @@ +package com.github.tjake.jlama.net.openai; + +import com.github.tjake.jlama.model.AbstractModel; +import com.github.tjake.jlama.model.ModelSupport; +import com.github.tjake.jlama.net.JlamaServiceTest; +import com.github.tjake.jlama.safetensors.DType; +import com.github.tjake.jlama.safetensors.SafeTensorSupport; +import org.springframework.boot.SpringApplication; +import org.springframework.boot.SpringBootConfiguration; +import org.springframework.boot.autoconfigure.SpringBootApplication; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +import java.io.File; +import java.io.IOException; + +@SpringBootApplication +@SpringBootConfiguration +@Configuration +public class MockedOpenAIServer { + private final JlamaServiceTest.MockConfig modelConfig = new JlamaServiceTest.MockConfig(128, 4096, 8192, 16, 12, 1e5f); + + public static void main(String[] args) { + SpringApplication.run(MockedOpenAIServer.class, args); + } + + @Bean + public AbstractModel getModel() throws IOException { + String model = "tjake/TinyLlama-1.1B-Chat-v1.0-Jlama-Q4"; + String workingDirectory = "./models"; + + // Downloads the model or just returns the local path if it's already downloaded + File localModelPath = SafeTensorSupport.maybeDownloadModel(workingDirectory, model); + + // Loads the model + return ModelSupport.loadModel(localModelPath, DType.F32, DType.I8); + } +} diff --git a/jlama-net/src/test/java/com/github/tjake/jlama/net/openai/OpenAIServiceTests.java b/jlama-net/src/test/java/com/github/tjake/jlama/net/openai/OpenAIServiceTests.java new file mode 100644 index 0000000..f17c417 --- /dev/null +++ b/jlama-net/src/test/java/com/github/tjake/jlama/net/openai/OpenAIServiceTests.java @@ -0,0 +1,15 @@ +package com.github.tjake.jlama.net.openai; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.test.context.junit.jupiter.SpringExtension; + +@ExtendWith(SpringExtension.class) +@SpringBootTest +public class OpenAIServiceTests { + + @Test + public void contextLoads() { + } +} \ No newline at end of file diff --git a/pom.xml b/pom.xml index bd5997a..d3453f7 100644 --- a/pom.xml +++ b/pom.xml @@ -46,7 +46,7 @@ 1.7.1 ${os.detected.name}-${os.detected.arch} - + 6.2.7.Final 2.43.0 4.13.2 3.21.0 @@ -59,7 +59,12 @@ ch.qos.logback logback-classic - 1.4.14 + 1.5.6 + + + ch.qos.logback + logback-core + 1.5.6 @@ -117,12 +122,12 @@ maven-compiler-plugin 3.8.0 + 20 20 --add-modules=jdk.incubator.vector - 22 diff --git a/run-cli.sh b/run-cli.sh index 48410c2..5707cf3 100755 --- a/run-cli.sh +++ b/run-cli.sh @@ -28,7 +28,7 @@ JLAMA_RELATIVE_JAR="./jlama-cli/target/jlama-cli.jar" # Path to the logback.xml LOGBACK_CONFIG="./conf/logback2.xml" -JLAMA_JVM_ARGS="$JLAMA_JVM_ARGS -server -Dstdout.encoding=UTF-8 -Djdk.incubator.vector.VECTOR_ACCESS_OOB_CHECK=0 --add-modules=jdk.incubator.vector --add-exports java.base/sun.nio.ch=ALL-UNNAMED --enable-preview --enable-native-access=ALL-UNNAMED \ +JLAMA_JVM_ARGS="$JLAMA_JVM_ARGS -server -Xmx12G -Dstdout.encoding=UTF-8 -Djdk.incubator.vector.VECTOR_ACCESS_OOB_CHECK=0 --add-modules=jdk.incubator.vector --add-exports java.base/sun.nio.ch=ALL-UNNAMED --enable-preview --enable-native-access=ALL-UNNAMED \ -XX:+UnlockDiagnosticVMOptions -XX:CompilerDirectivesFile=./inlinerules.json -XX:+AlignVector -XX:+UseStringDeduplication \ -XX:+UseCompressedOops -XX:+UseCompressedClassPointers "