diff --git a/app/Module.scala b/app/Module.scala index 952281f..40991c3 100644 --- a/app/Module.scala +++ b/app/Module.scala @@ -13,4 +13,5 @@ class Module extends AbstractModule with AkkaGuiceSupport { bind(classOf[EmotionDetectionService]).annotatedWith(named("ChatGptAssistant")). to(classOf[EmoDetectionServiceWithAssistantImpl]) } + } diff --git a/app/controllers/TranscriberController.scala b/app/controllers/TranscriberController.scala new file mode 100644 index 0000000..d80004a --- /dev/null +++ b/app/controllers/TranscriberController.scala @@ -0,0 +1,39 @@ +package controllers + +import auth.AuthenticatedAction +import dao.model.TranscribedText +import play.api.libs.Files +import play.api.libs.Files.TemporaryFile +import play.api.libs.json.Json +import play.api.mvc.{Action, ControllerComponents, MultipartFormData} +import service.TranscriberService + +import java.nio.file.Paths +import javax.inject.Inject +import scala.concurrent.ExecutionContext.Implicits.global +import scala.concurrent.Future + + + +class TranscriberController @Inject()(cc: ControllerComponents, + authenticatedAction: AuthenticatedAction, transcriberService: TranscriberService) + extends EmoBaseController(cc, authenticatedAction) { + + private lazy val logger: org.slf4j.Logger = org.slf4j.LoggerFactory.getLogger(this.getClass) + + def transcribeAudioToText(): Action[MultipartFormData[Files.TemporaryFile]] = Action(parse.multipartFormData) andThen authenticatedAction async { implicit request => + request.body.file("audio").map { audio => + logger.info(s"Transcribing audio file size: ${audio.fileSize}") + val ref: TemporaryFile = audio.ref + val tempFilePath = ref.path + val newFilePath = Paths.get(tempFilePath.toString + ".webm") + import java.nio.file.{Files, Paths} + Files.move(tempFilePath, newFilePath) + transcriberService.transcribeAudioToText(newFilePath).map(transcribedText => { + Ok(Json.toJson(transcribedText)) + }) + }.getOrElse { + Future.successful(BadRequest("Missing file")) + } + } +} diff --git a/app/controllers/model.scala b/app/controllers/model.scala index 4a33bb2..1b2d38d 100644 --- a/app/controllers/model.scala +++ b/app/controllers/model.scala @@ -10,6 +10,8 @@ object model { case class SubEmotionWrapper(subEmotion: SubEmotion, suggestedActions: List[SuggestedAction]) case class TagData(tagName: String, emotionRecordId: Long) + + object EmotionData { implicit val tagDataFormat: OFormat[TagData] = Json.format[TagData] implicit val subEmotionActionFormat: OFormat[SubEmotionWrapper] = Json.format[SubEmotionWrapper] diff --git a/app/dao/model.scala b/app/dao/model.scala index 247327c..b670a41 100644 --- a/app/dao/model.scala +++ b/app/dao/model.scala @@ -4,7 +4,7 @@ import java.time.{LocalDate, LocalDateTime} import anorm.{~, _} import anorm.SqlParser._ import auth.model.TokenData -import play.api.libs.json.{Format, Json} +import play.api.libs.json.{Format, Json, OFormat} import scala.annotation.unused import scala.language.postfixOps @@ -190,6 +190,8 @@ object model { tag: Option[String], elapsedTime: Option[Double], created: Option[LocalDateTime], idempotenceKey: Option[String] = None) + case class TranscribedText(text: String) + object User { implicit val userFormat: Format[User] = Json.format[User] @@ -547,4 +549,8 @@ object model { implicit val requestsInFlightFormat: Format[RequestsInFlight] = Json.format[RequestsInFlight] implicit val parser: RowParser[RequestsInFlight] = Macro.namedParser[RequestsInFlight](ColumnNaming.SnakeCase) } + + object TranscribedText { + implicit val transcribedText: OFormat[TranscribedText] = Json.format[TranscribedText] + } } \ No newline at end of file diff --git a/app/service/TranscriberService.scala b/app/service/TranscriberService.scala new file mode 100644 index 0000000..d0fef6c --- /dev/null +++ b/app/service/TranscriberService.scala @@ -0,0 +1,50 @@ +package service + +import com.google.inject.ImplementedBy +import dao.model.TranscribedText +import io.github.sashirestela.openai.SimpleOpenAI +import io.github.sashirestela.openai.domain.audio.TranscriptionRequest.TimestampGranularity +import io.github.sashirestela.openai.domain.audio.{AudioResponseFormat, Transcription, TranscriptionRequest} +import play.api.Configuration + +import java.nio.file.{Path, Paths} +import javax.inject.{Inject, Named} +import util.RichCompletableFuture._ + +import java.net.http.HttpClient +import scala.concurrent.ExecutionContext.Implicits.global +import scala.concurrent.{ExecutionContext, Future} +import scala.concurrent.duration.Duration + +@ImplementedBy(classOf[OpenAiWhisperServiceImpl]) +trait TranscriberService { + def transcribeAudioToText(path: Path): Future[TranscribedText] +} + +class OpenAiWhisperServiceImpl @Inject() (config: Configuration) extends TranscriberService { + + private val executionContext: ExecutionContext = ExecutionContext.global + private val executorService = executionContext.asInstanceOf[java.util.concurrent.ExecutorService] + + override def transcribeAudioToText(path: Path): Future[TranscribedText] = { + + val duration = java.time.Duration.ofSeconds(config.get[Duration]("openai.timeout").toSeconds.toInt) + val httpClient = HttpClient.newBuilder() + .connectTimeout(duration) + .executor(executorService) + .build() + val openAi = SimpleOpenAI.builder() + .apiKey(config.get[String]("openai.apikey")) + .httpClient(httpClient) + .build() + + + val audioRequest = TranscriptionRequest.builder.file(path).model("whisper-1"). + responseFormat(AudioResponseFormat.VERBOSE_JSON).temperature(0.2).timestampGranularity(TimestampGranularity.WORD). + timestampGranularity(TimestampGranularity.SEGMENT).build + + openAi.audios.transcribe(audioRequest).asScala.map(response => { + TranscribedText(response.getText) + }) + } +} diff --git a/app/service/ai/EmotionDetectionService.scala b/app/service/ai/EmotionDetectionService.scala index 75edefd..f8305c6 100644 --- a/app/service/ai/EmotionDetectionService.scala +++ b/app/service/ai/EmotionDetectionService.scala @@ -1,7 +1,6 @@ package service.ai import akka.actor.ActorSystem -import akka.actor.TypedActor.context import com.google.inject.ImplementedBy import dao.model.{EmotionDetectionResult, RequestsInFlight} import play.api.Logger diff --git a/app/service/ai/SimpleOpenAiService.scala b/app/service/ai/SimpleOpenAiService.scala new file mode 100644 index 0000000..2dbe978 --- /dev/null +++ b/app/service/ai/SimpleOpenAiService.scala @@ -0,0 +1,5 @@ +package service.ai + +trait SimpleOpenAiService { + +} diff --git a/app/util/RichCompletableFuture.scala b/app/util/RichCompletableFuture.scala new file mode 100644 index 0000000..181685b --- /dev/null +++ b/app/util/RichCompletableFuture.scala @@ -0,0 +1,19 @@ +// In a file named RichCompletableFuture.scala +package util + +import java.util.concurrent.CompletableFuture +import scala.concurrent.{Future, Promise} +import scala.concurrent.ExecutionContext.Implicits.global + +object RichCompletableFuture { + implicit class RichCF[T](javaFuture: CompletableFuture[T]) { + def asScala: Future[T] = { + val promise = Promise[T]() + javaFuture.whenComplete { (result: T, exception: Throwable) => + if (exception == null) promise.success(result) + else promise.failure(exception) + } + promise.future + } + } +} \ No newline at end of file diff --git a/build.sbt b/build.sbt index a10827a..f141b49 100644 --- a/build.sbt +++ b/build.sbt @@ -63,7 +63,7 @@ libraryDependencies += "org.liquibase" % "liquibase-core" % "4.20.0" libraryDependencies += "com.pauldijou" %% "jwt-core" % "5.0.0" libraryDependencies += "com.pauldijou" %% "jwt-play-json" % "5.0.0" -libraryDependencies += "ch.qos.logback" % "logback-classic" % "1.4.8" +libraryDependencies += "ch.qos.logback" % "logback-classic" % "1.4.12" libraryDependencies += "org.fusesource.jansi" % "jansi" % "2.4.0" libraryDependencies += "com.google.inject" % "guice" % "5.1.0" @@ -75,6 +75,8 @@ dependencyOverrides += "org.scala-lang.modules" %% "scala-parser-combinators" % libraryDependencies += "io.honeybadger" % "honeybadger-java" % "2.1.2" +libraryDependencies += "io.github.sashirestela" % "simple-openai" % "3.5.0" + libraryDependencies ++= Seq( "io.gatling.highcharts" % "gatling-charts-highcharts" % "3.9.5", diff --git a/conf/routes b/conf/routes index 33fb12f..4233bf8 100644 --- a/conf/routes +++ b/conf/routes @@ -42,6 +42,9 @@ POST /api/user/todo PUT /api/user/todo controllers.UserTodoController.edit() DELETE /api/user/todo/:userTodoId controllers.UserTodoController.delete(userTodoId: Long) +# Transcription routes +POST /api/transcribe controllers.TranscriberController.transcribeAudioToText() + # AI Admin routes POST /api/ai/admin/assistant controllers.AiAdminController.createAssistant() DELETE /api/ai/admin/assistant/:externalId controllers.AiAdminController.deleteAssistantByExternal(externalId) diff --git a/ui/src/app/models/emotion.model.ts b/ui/src/app/models/emotion.model.ts index b8560e4..2e1e0c0 100644 --- a/ui/src/app/models/emotion.model.ts +++ b/ui/src/app/models/emotion.model.ts @@ -228,3 +228,7 @@ export interface NoteTodoUpdate { id: number; isAccepted: boolean; } + +export interface TranscribedText { + text: string; +} diff --git a/ui/src/app/note-form/note-form.component.html b/ui/src/app/note-form/note-form.component.html index f6635ba..497b94b 100644 --- a/ui/src/app/note-form/note-form.component.html +++ b/ui/src/app/note-form/note-form.component.html @@ -12,6 +12,12 @@ + +