Skip to content

Commit

Permalink
Add audio transcription feature using OpenAI for emotion notes (#21)
Browse files Browse the repository at this point in the history
- Add `TranscriberController` for handling audio file uploads
- Implement `TranscriberService` with OpenAI Whisper integration
- Create model `TranscribedText` for the transcribed text response
- Update `Module.scala` for dependency injection configuration
- Add routes for transcribing audio files
- Integrate media recorder in note form component for recording and uploading audio
- Update frontend models and services to handle transcription response
- Add OpenAI dependency to `build.sbt`
  • Loading branch information
vega113 authored Jun 19, 2024
1 parent 0c8397a commit 7c59b6b
Show file tree
Hide file tree
Showing 14 changed files with 225 additions and 4 deletions.
1 change: 1 addition & 0 deletions app/Module.scala
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@ class Module extends AbstractModule with AkkaGuiceSupport {
bind(classOf[EmotionDetectionService]).annotatedWith(named("ChatGptAssistant")).
to(classOf[EmoDetectionServiceWithAssistantImpl])
}

}
39 changes: 39 additions & 0 deletions app/controllers/TranscriberController.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package controllers

import auth.AuthenticatedAction
import dao.model.TranscribedText
import play.api.libs.Files
import play.api.libs.Files.TemporaryFile
import play.api.libs.json.Json
import play.api.mvc.{Action, ControllerComponents, MultipartFormData}
import service.TranscriberService

import java.nio.file.Paths
import javax.inject.Inject
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.Future



class TranscriberController @Inject()(cc: ControllerComponents,
authenticatedAction: AuthenticatedAction, transcriberService: TranscriberService)
extends EmoBaseController(cc, authenticatedAction) {

private lazy val logger: org.slf4j.Logger = org.slf4j.LoggerFactory.getLogger(this.getClass)

def transcribeAudioToText(): Action[MultipartFormData[Files.TemporaryFile]] = Action(parse.multipartFormData) andThen authenticatedAction async { implicit request =>
request.body.file("audio").map { audio =>
logger.info(s"Transcribing audio file size: ${audio.fileSize}")
val ref: TemporaryFile = audio.ref
val tempFilePath = ref.path
val newFilePath = Paths.get(tempFilePath.toString + ".webm")
import java.nio.file.{Files, Paths}
Files.move(tempFilePath, newFilePath)
transcriberService.transcribeAudioToText(newFilePath).map(transcribedText => {
Ok(Json.toJson(transcribedText))
})
}.getOrElse {
Future.successful(BadRequest("Missing file"))
}
}
}
2 changes: 2 additions & 0 deletions app/controllers/model.scala
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ object model {
case class SubEmotionWrapper(subEmotion: SubEmotion, suggestedActions: List[SuggestedAction])
case class TagData(tagName: String, emotionRecordId: Long)



object EmotionData {
implicit val tagDataFormat: OFormat[TagData] = Json.format[TagData]
implicit val subEmotionActionFormat: OFormat[SubEmotionWrapper] = Json.format[SubEmotionWrapper]
Expand Down
8 changes: 7 additions & 1 deletion app/dao/model.scala
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import java.time.{LocalDate, LocalDateTime}
import anorm.{~, _}
import anorm.SqlParser._
import auth.model.TokenData
import play.api.libs.json.{Format, Json}
import play.api.libs.json.{Format, Json, OFormat}

import scala.annotation.unused
import scala.language.postfixOps
Expand Down Expand Up @@ -190,6 +190,8 @@ object model {
tag: Option[String], elapsedTime: Option[Double], created: Option[LocalDateTime],
idempotenceKey: Option[String] = None)

case class TranscribedText(text: String)


object User {
implicit val userFormat: Format[User] = Json.format[User]
Expand Down Expand Up @@ -547,4 +549,8 @@ object model {
implicit val requestsInFlightFormat: Format[RequestsInFlight] = Json.format[RequestsInFlight]
implicit val parser: RowParser[RequestsInFlight] = Macro.namedParser[RequestsInFlight](ColumnNaming.SnakeCase)
}

object TranscribedText {
implicit val transcribedText: OFormat[TranscribedText] = Json.format[TranscribedText]
}
}
50 changes: 50 additions & 0 deletions app/service/TranscriberService.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package service

import com.google.inject.ImplementedBy
import dao.model.TranscribedText
import io.github.sashirestela.openai.SimpleOpenAI
import io.github.sashirestela.openai.domain.audio.TranscriptionRequest.TimestampGranularity
import io.github.sashirestela.openai.domain.audio.{AudioResponseFormat, Transcription, TranscriptionRequest}
import play.api.Configuration

import java.nio.file.{Path, Paths}
import javax.inject.{Inject, Named}
import util.RichCompletableFuture._

import java.net.http.HttpClient
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.{ExecutionContext, Future}
import scala.concurrent.duration.Duration

@ImplementedBy(classOf[OpenAiWhisperServiceImpl])
trait TranscriberService {
def transcribeAudioToText(path: Path): Future[TranscribedText]
}

class OpenAiWhisperServiceImpl @Inject() (config: Configuration) extends TranscriberService {

private val executionContext: ExecutionContext = ExecutionContext.global
private val executorService = executionContext.asInstanceOf[java.util.concurrent.ExecutorService]

override def transcribeAudioToText(path: Path): Future[TranscribedText] = {

val duration = java.time.Duration.ofSeconds(config.get[Duration]("openai.timeout").toSeconds.toInt)
val httpClient = HttpClient.newBuilder()
.connectTimeout(duration)
.executor(executorService)
.build()
val openAi = SimpleOpenAI.builder()
.apiKey(config.get[String]("openai.apikey"))
.httpClient(httpClient)
.build()


val audioRequest = TranscriptionRequest.builder.file(path).model("whisper-1").
responseFormat(AudioResponseFormat.VERBOSE_JSON).temperature(0.2).timestampGranularity(TimestampGranularity.WORD).
timestampGranularity(TimestampGranularity.SEGMENT).build

openAi.audios.transcribe(audioRequest).asScala.map(response => {
TranscribedText(response.getText)
})
}
}
1 change: 0 additions & 1 deletion app/service/ai/EmotionDetectionService.scala
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package service.ai

import akka.actor.ActorSystem
import akka.actor.TypedActor.context
import com.google.inject.ImplementedBy
import dao.model.{EmotionDetectionResult, RequestsInFlight}
import play.api.Logger
Expand Down
5 changes: 5 additions & 0 deletions app/service/ai/SimpleOpenAiService.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
package service.ai

trait SimpleOpenAiService {

}
19 changes: 19 additions & 0 deletions app/util/RichCompletableFuture.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// In a file named RichCompletableFuture.scala
package util

import java.util.concurrent.CompletableFuture
import scala.concurrent.{Future, Promise}
import scala.concurrent.ExecutionContext.Implicits.global

object RichCompletableFuture {
implicit class RichCF[T](javaFuture: CompletableFuture[T]) {
def asScala: Future[T] = {
val promise = Promise[T]()
javaFuture.whenComplete { (result: T, exception: Throwable) =>
if (exception == null) promise.success(result)
else promise.failure(exception)
}
promise.future
}
}
}
4 changes: 3 additions & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ libraryDependencies += "org.liquibase" % "liquibase-core" % "4.20.0"
libraryDependencies += "com.pauldijou" %% "jwt-core" % "5.0.0"
libraryDependencies += "com.pauldijou" %% "jwt-play-json" % "5.0.0"

libraryDependencies += "ch.qos.logback" % "logback-classic" % "1.4.8"
libraryDependencies += "ch.qos.logback" % "logback-classic" % "1.4.12"
libraryDependencies += "org.fusesource.jansi" % "jansi" % "2.4.0"
libraryDependencies += "com.google.inject" % "guice" % "5.1.0"

Expand All @@ -75,6 +75,8 @@ dependencyOverrides += "org.scala-lang.modules" %% "scala-parser-combinators" %

libraryDependencies += "io.honeybadger" % "honeybadger-java" % "2.1.2"

libraryDependencies += "io.github.sashirestela" % "simple-openai" % "3.5.0"


libraryDependencies ++= Seq(
"io.gatling.highcharts" % "gatling-charts-highcharts" % "3.9.5",
Expand Down
3 changes: 3 additions & 0 deletions conf/routes
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ POST /api/user/todo
PUT /api/user/todo controllers.UserTodoController.edit()
DELETE /api/user/todo/:userTodoId controllers.UserTodoController.delete(userTodoId: Long)

# Transcription routes
POST /api/transcribe controllers.TranscriberController.transcribeAudioToText()

# AI Admin routes
POST /api/ai/admin/assistant controllers.AiAdminController.createAssistant()
DELETE /api/ai/admin/assistant/:externalId controllers.AiAdminController.deleteAssistantByExternal(externalId)
Expand Down
4 changes: 4 additions & 0 deletions ui/src/app/models/emotion.model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -228,3 +228,7 @@ export interface NoteTodoUpdate {
id: number;
isAccepted: boolean;
}

export interface TranscribedText {
text: string;
}
6 changes: 6 additions & 0 deletions ui/src/app/note-form/note-form.component.html
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@
</mat-form-field>
</div>
<mat-action-row>
<button mat-icon-button (click)="startRecording()" *ngIf="!isRecording">
<mat-icon>mic</mat-icon>
</button>
<button mat-icon-button (click)="stopRecording()" *ngIf="isRecording">
<mat-icon>mic_off</mat-icon>
</button>
<button mat-raised-button color="primary" type="submit"
[disabled]="emotionForm.invalid || isSavingEmotionRecord">
<mat-spinner *ngIf="isSavingEmotionRecord" diameter="24"></mat-spinner>
Expand Down
34 changes: 33 additions & 1 deletion ui/src/app/note-form/note-form.component.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import {MatSnackBar} from "@angular/material/snack-bar";
import {DateService} from "../services/date.service";
import {from} from "rxjs";
import {Emotion, EmotionRecord, Note, SubEmotion, Tag, Trigger} from "../models/emotion.model";
import {MediaRecorderService} from '../services/media-recorder.service';


@Component({
selector: 'app-note-form',
Expand All @@ -19,23 +21,30 @@ export class NoteFormComponent {
isSavingEmotionRecord: boolean = false;
maxNoteLength = 2000; // TODO: Should be fetched from the backend
placeHolderText: string = "Try to describe how this emotion is affecting your daily activities or your interactions with others. Include more context or personal thoughts to convey your emotions more clearly. Are there any noticeable patterns or recurring events? How do you wish to feel instead? What steps do you think you could take to influence your emotional state? Remember, you can also use #hashtags to categorize or highlight key points in your note. To add a todo, simply enclose it in double square brackets like this: [[<your todo here>]]. ";

computeNoteLength(): number {
return this.emotionForm.get('emotionNote')?.value?.length ?? 0;
}


isRecording = false;
mediaRecorderService: MediaRecorderService;


constructor(private fb: FormBuilder, private emotionService: EmotionService,
private authService: AuthService,
private router: Router,
private snackBar: MatSnackBar,
private emotionStateService: EmotionStateService,
private dateService: DateService) {
private dateService: DateService,
mediaRecorderService: MediaRecorderService) {
this.emotionForm = this.fb.group({
emotionDate: [new Date()],
emotionNote: [''],
textTitle: [''],
emotionTime: [''],
});
this.mediaRecorderService = mediaRecorderService;
}

async onSubmit(): Promise<void> {
Expand Down Expand Up @@ -102,4 +111,27 @@ export class NoteFormComponent {
created: this.dateService.formatDateToIsoString(emotionFromData.emotionDate)
};
}

startRecording() {
this.isRecording = true;
this.mediaRecorderService.startRecording();
console.log('Recording started');
}

stopRecording() {
this.isRecording = false;
this.mediaRecorderService.stopRecording().then(audioData => {
this.textToSpeech(audioData);
console.log('Recording stopped');
});
}


textToSpeech(audioData: Blob) {
this.mediaRecorderService.transcribeAudio(audioData).subscribe(transcription => {
console.log('Transcription: ', transcription);
const currentNote = this.emotionForm.get('emotionNote')?.value || '';
this.emotionForm.get('emotionNote')?.setValue(`${currentNote} ${transcription.text}`);
});
}
}
53 changes: 53 additions & 0 deletions ui/src/app/services/media-recorder.service.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import { Injectable } from '@angular/core';
import {Observable} from "rxjs";
import {HttpClient, HttpHeaders} from "@angular/common/http";
import {AuthService} from "./auth.service";
import {ErrorService} from "./error.service";
import {TranscribedText} from "../models/emotion.model";
import {environment} from "../../environments/environment";
import {catchError} from "rxjs/operators";

@Injectable({
providedIn: 'root'
})
export class MediaRecorderService {
private mediaRecorder: MediaRecorder | undefined
private audioChunks: Blob[] = [];

constructor(private http: HttpClient, private authService: AuthService, private errorService: ErrorService) {
}

startRecording() {
navigator.mediaDevices.getUserMedia({ audio: true }).then(stream => {
this.mediaRecorder = new MediaRecorder(stream);
this.mediaRecorder.start();

this.mediaRecorder.addEventListener('dataavailable', event => {
this.audioChunks.push(event.data);
});
});
}

stopRecording(): Promise<Blob> {
return new Promise(resolve => {
this.mediaRecorder?.addEventListener('stop', () => {
const audioBlob = new Blob(this.audioChunks, { type: 'audio/webm' });
this.audioChunks = [];
resolve(audioBlob);
});

this.mediaRecorder?.stop();
});
}

transcribeAudio(audioBlob: Blob): Observable<TranscribedText> {
const headers: HttpHeaders = this.authService.getAuthorizationHeader();
const formData = new FormData();
formData.append('audio', audioBlob, 'audio.webm');
return this.http
.post<TranscribedText>(`${environment.baseUrl}/transcribe`, formData, {headers})
.pipe(catchError(resp => {
return this.errorService.handleError(resp);
}));
}
}

0 comments on commit 7c59b6b

Please sign in to comment.