Skip to content

Commit

Permalink
Fix demo ui
Browse files Browse the repository at this point in the history
  • Loading branch information
tjake committed Aug 10, 2024
1 parent c887807 commit 0a7f7c5
Show file tree
Hide file tree
Showing 10 changed files with 84 additions and 33 deletions.
21 changes: 21 additions & 0 deletions jlama-cli/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,27 @@
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-resources-plugin</artifactId>
<version>3.0.2</version>
<executions>
<execution>
<id>copy-resources</id>
<phase>compile</phase>
<goals>
<goal>copy-resources</goal>
</goals>
<configuration>
<outputDirectory>${basedir}/target/classes/static</outputDirectory>
<resources>
<resource>
<directory>src/main/webapp/</directory>
</resource>
</resources>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,13 @@ public class ApiServiceCommand extends BaseCommand implements WebMvcConfigurer {

@Bean
public AbstractModel getModelBean() {
logger.info("Here! {}", m);
return m;
}

@Override
public void addResourceHandlers(ResourceHandlerRegistry registry) {
registry.addResourceHandler("/ui/**")
.addResourceLocations("/resources/");
.addResourceLocations("classpath:/static/ui/");
}

@Override
Expand All @@ -70,13 +69,13 @@ public void run() {
Optional.ofNullable(modelQuantization),
Optional.ofNullable(threadCount));

logger.info("m = {}", m);

System.out.println("Chat UI: http://localhost:" + port + "/ui/index.html");
System.out.println("Chat UI: http://localhost:" + port);

new SpringApplicationBuilder(ApiServiceCommand.class)
.lazyInitialization(true)
.properties("server.port", ""+port, "logging.level.org.springframework.web", "debug")
.properties(
"server.port", ""+port,
"logging.level.org.springframework.web", "debug")
.build()
.run();
} catch (Exception e) {
Expand Down
11 changes: 11 additions & 0 deletions jlama-cli/src/main/webapp/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta http-equiv="refresh" content="0;url=/ui/index.html">
<title>Redirecting...</title>
</head>
<body>
<p>If you are not redirected automatically, follow this <a href="/ui/index.html">link to the ui</a>.</p>
</body>
</html>
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@

// Function to send a POST request to the API
function postRequest(data, signal) {
const URL = `/api/generate`;
function postRequest(input, session, signal) {
const URL = `/v1/chat/completions`;
return fetch(URL, {
method: 'POST',
headers: {
'X-Jlama-Session': session,
'Content-Type': 'application/json'
},
body: JSON.stringify(data),
body: JSON.stringify({"model": "jlama", "messages": [{"role": "user", "content": input}], "stream": true }),
signal: signal
});
}
Expand All @@ -29,8 +30,13 @@ async function getResponse(response, callback) {

for (const line of lines) {
if (line.trim() === '') continue;
const parsedResponse = JSON.parse(line);
callback(parsedResponse); // Process each response word
if (line.startsWith('data:')) {
const parsedResponse = JSON.parse(line.slice(5));
callback(parsedResponse); // Process each response word
} else {
const parsedResponse = JSON.parse(line);
callback(parsedResponse); // Process each response word
}
}
}

Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@ async function submitRequest() {

const input = document.getElementById('user-input').value;
const context = document.getElementById('chat-history').context;
const data = { prompt: input, session: session };

// Create user message element and append to chat history
let chatHistory = document.getElementById('chat-history');
Expand Down Expand Up @@ -116,14 +115,14 @@ async function submitRequest() {
const sendButton = document.getElementById('send-button');
sendButton.insertAdjacentElement('beforebegin', stopButton);

// change autoScroller to keep track of our new responseDiv
// change autoScroller to keep track of our new responseDiv
autoScroller.observe(responseDiv);

postRequest(data, interrupt.signal)
postRequest(input, session, interrupt.signal)
.then(async response => {
await getResponse(response, parsedResponse => {
let word = parsedResponse.response;
if (parsedResponse.done) {
if (parsedResponse.choices[0].finish_reason !== null) {
chatHistory.context = parsedResponse.context;
// Copy button
let copyButton = document.createElement('button');
Expand All @@ -137,14 +136,17 @@ async function submitRequest() {
});
};
responseDiv.appendChild(copyButton);
}
// add word to response
if (word != undefined) {
if (responseDiv.hidden_text == undefined){
responseDiv.hidden_text = "";
} else {
let word = parsedResponse.choices[0].delta.content;

// add word to response
if (word != undefined) {
if (responseDiv.hidden_text == undefined) {
responseDiv.hidden_text = "";
}
responseDiv.hidden_text += word;
responseDiv.innerHTML = DOMPurify.sanitize(marked.parse(responseDiv.hidden_text)); // Append word to response container
}
responseDiv.hidden_text += word;
responseDiv.innerHTML = DOMPurify.sanitize(marked.parse(responseDiv.hidden_text)); // Append word to response container
}
});
})
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,14 @@
import jakarta.validation.Valid;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.HttpStatus;
import org.springframework.http.HttpStatusCode;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.validation.annotation.Validated;
import org.springframework.web.HttpMediaTypeNotAcceptableException;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.servlet.mvc.method.annotation.SseEmitter;

import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.atomic.AtomicInteger;
Expand All @@ -26,6 +24,8 @@
@Validated
public class OpenAIChatService {

private static final String JLAMA_SESSION_HEADER = "X-Jlama-Session";

@Autowired
private AbstractModel model;

Expand All @@ -37,11 +37,12 @@ public class OpenAIChatService {
*/
@RequestMapping(
method = RequestMethod.POST,
value = "/chat/completions",
value = "/v1/chat/completions",
produces = { "application/json", "text/event-stream" },
consumes = { "application/json" }
)
Object createChatCompletion(
@RequestHeader Map<String, String> headers,
@Valid @RequestBody CreateChatCompletionRequest request
) {

Expand All @@ -52,6 +53,17 @@ Object createChatCompletion(
}

UUID id = UUID.randomUUID();

if (headers.containsKey(JLAMA_SESSION_HEADER)) {
try {
id = UUID.fromString(headers.get(JLAMA_SESSION_HEADER));
} catch (IllegalArgumentException e) {
return new ResponseEntity<>(HttpStatus.BAD_REQUEST);
}
}

UUID sessionId = id;

PromptSupport.Builder builder = model.promptSupport().get().newBuilder();

for (ChatCompletionRequestMessage m : messages) {
Expand Down Expand Up @@ -86,12 +98,12 @@ Object createChatCompletion(
AtomicInteger index = new AtomicInteger(0);
if (request.getStream() != null && request.getStream()) {
SseEmitter emitter = new SseEmitter();
CompletableFuture.supplyAsync( () -> model.generate(id, builder.build(), temperature, maxTokens, false,
CompletableFuture.supplyAsync( () -> model.generate(sessionId, builder.build(), temperature, maxTokens, false,
(t, f) -> {
try {
emitter.send(
new CreateChatCompletionStreamResponse()
.id(id.toString())
.id(sessionId.toString())
.choices(List.of(new CreateChatCompletionStreamResponseChoicesInner()
.index(index.getAndIncrement())
.delta(new ChatCompletionStreamResponseDelta()
Expand All @@ -104,7 +116,7 @@ Object createChatCompletion(
.handle((r, ex) -> {
try {
emitter.send(new CreateChatCompletionStreamResponse()
.id(id.toString())
.id(sessionId.toString())
.choices(List.of(new CreateChatCompletionStreamResponseChoicesInner()
.finishReason(CreateChatCompletionStreamResponseChoicesInner.FinishReasonEnum.STOP)))
);
Expand All @@ -121,10 +133,10 @@ Object createChatCompletion(
}
else
{
Generator.Response r = model.generate(id, builder.build(), temperature, maxTokens, false, (s, f) -> {});
Generator.Response r = model.generate(sessionId, builder.build(), temperature, maxTokens, false, (s, f) -> {});

CreateChatCompletionResponse out = new CreateChatCompletionResponse()
.id(id.toString())
.id(sessionId.toString())
.choices(List.of(new CreateChatCompletionResponseChoicesInner()
.finishReason(CreateChatCompletionResponseChoicesInner.FinishReasonEnum.STOP)
.message(new ChatCompletionResponseMessage().content(r.text))));
Expand Down
4 changes: 2 additions & 2 deletions run-cli.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ fi
# Define the path of the relative JAR
JLAMA_RELATIVE_JAR="./jlama-cli/target/jlama-cli.jar"
# Path to the logback.xml
LOGBACK_CONFIG="./conf/logback2.xml"
LOGBACK_CONFIG="./conf/logback.xml"

JLAMA_JVM_ARGS="$JLAMA_JVM_ARGS -server -Xmx12G -Dstdout.encoding=UTF-8 -Djdk.incubator.vector.VECTOR_ACCESS_OOB_CHECK=0 --add-modules=jdk.incubator.vector --add-exports java.base/sun.nio.ch=ALL-UNNAMED --enable-preview --enable-native-access=ALL-UNNAMED \
JLAMA_JVM_ARGS="$JLAMA_JVM_ARGS -server -Dstdout.encoding=UTF-8 -Djdk.incubator.vector.VECTOR_ACCESS_OOB_CHECK=0 --add-modules=jdk.incubator.vector --add-exports java.base/sun.nio.ch=ALL-UNNAMED --enable-preview --enable-native-access=ALL-UNNAMED \
-XX:+UnlockDiagnosticVMOptions -XX:CompilerDirectivesFile=./inlinerules.json -XX:+AlignVector -XX:+UseStringDeduplication \
-XX:+UseCompressedOops -XX:+UseCompressedClassPointers "

Expand Down

0 comments on commit 0a7f7c5

Please sign in to comment.