-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add audio transcription feature using OpenAI for emotion notes (#21)
- Add `TranscriberController` for handling audio file uploads - Implement `TranscriberService` with OpenAI Whisper integration - Create model `TranscribedText` for the transcribed text response - Update `Module.scala` for dependency injection configuration - Add routes for transcribing audio files - Integrate media recorder in note form component for recording and uploading audio - Update frontend models and services to handle transcription response - Add OpenAI dependency to `build.sbt`
- Loading branch information
Showing
14 changed files
with
225 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
package controllers | ||
|
||
import auth.AuthenticatedAction | ||
import dao.model.TranscribedText | ||
import play.api.libs.Files | ||
import play.api.libs.Files.TemporaryFile | ||
import play.api.libs.json.Json | ||
import play.api.mvc.{Action, ControllerComponents, MultipartFormData} | ||
import service.TranscriberService | ||
|
||
import java.nio.file.Paths | ||
import javax.inject.Inject | ||
import scala.concurrent.ExecutionContext.Implicits.global | ||
import scala.concurrent.Future | ||
|
||
|
||
|
||
class TranscriberController @Inject()(cc: ControllerComponents, | ||
authenticatedAction: AuthenticatedAction, transcriberService: TranscriberService) | ||
extends EmoBaseController(cc, authenticatedAction) { | ||
|
||
private lazy val logger: org.slf4j.Logger = org.slf4j.LoggerFactory.getLogger(this.getClass) | ||
|
||
def transcribeAudioToText(): Action[MultipartFormData[Files.TemporaryFile]] = Action(parse.multipartFormData) andThen authenticatedAction async { implicit request => | ||
request.body.file("audio").map { audio => | ||
logger.info(s"Transcribing audio file size: ${audio.fileSize}") | ||
val ref: TemporaryFile = audio.ref | ||
val tempFilePath = ref.path | ||
val newFilePath = Paths.get(tempFilePath.toString + ".webm") | ||
import java.nio.file.{Files, Paths} | ||
Files.move(tempFilePath, newFilePath) | ||
transcriberService.transcribeAudioToText(newFilePath).map(transcribedText => { | ||
Ok(Json.toJson(transcribedText)) | ||
}) | ||
}.getOrElse { | ||
Future.successful(BadRequest("Missing file")) | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
package service | ||
|
||
import com.google.inject.ImplementedBy | ||
import dao.model.TranscribedText | ||
import io.github.sashirestela.openai.SimpleOpenAI | ||
import io.github.sashirestela.openai.domain.audio.TranscriptionRequest.TimestampGranularity | ||
import io.github.sashirestela.openai.domain.audio.{AudioResponseFormat, Transcription, TranscriptionRequest} | ||
import play.api.Configuration | ||
|
||
import java.nio.file.{Path, Paths} | ||
import javax.inject.{Inject, Named} | ||
import util.RichCompletableFuture._ | ||
|
||
import java.net.http.HttpClient | ||
import scala.concurrent.ExecutionContext.Implicits.global | ||
import scala.concurrent.{ExecutionContext, Future} | ||
import scala.concurrent.duration.Duration | ||
|
||
@ImplementedBy(classOf[OpenAiWhisperServiceImpl]) | ||
trait TranscriberService { | ||
def transcribeAudioToText(path: Path): Future[TranscribedText] | ||
} | ||
|
||
class OpenAiWhisperServiceImpl @Inject() (config: Configuration) extends TranscriberService { | ||
|
||
private val executionContext: ExecutionContext = ExecutionContext.global | ||
private val executorService = executionContext.asInstanceOf[java.util.concurrent.ExecutorService] | ||
|
||
override def transcribeAudioToText(path: Path): Future[TranscribedText] = { | ||
|
||
val duration = java.time.Duration.ofSeconds(config.get[Duration]("openai.timeout").toSeconds.toInt) | ||
val httpClient = HttpClient.newBuilder() | ||
.connectTimeout(duration) | ||
.executor(executorService) | ||
.build() | ||
val openAi = SimpleOpenAI.builder() | ||
.apiKey(config.get[String]("openai.apikey")) | ||
.httpClient(httpClient) | ||
.build() | ||
|
||
|
||
val audioRequest = TranscriptionRequest.builder.file(path).model("whisper-1"). | ||
responseFormat(AudioResponseFormat.VERBOSE_JSON).temperature(0.2).timestampGranularity(TimestampGranularity.WORD). | ||
timestampGranularity(TimestampGranularity.SEGMENT).build | ||
|
||
openAi.audios.transcribe(audioRequest).asScala.map(response => { | ||
TranscribedText(response.getText) | ||
}) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
package service.ai | ||
|
||
trait SimpleOpenAiService { | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
// In a file named RichCompletableFuture.scala | ||
package util | ||
|
||
import java.util.concurrent.CompletableFuture | ||
import scala.concurrent.{Future, Promise} | ||
import scala.concurrent.ExecutionContext.Implicits.global | ||
|
||
object RichCompletableFuture { | ||
implicit class RichCF[T](javaFuture: CompletableFuture[T]) { | ||
def asScala: Future[T] = { | ||
val promise = Promise[T]() | ||
javaFuture.whenComplete { (result: T, exception: Throwable) => | ||
if (exception == null) promise.success(result) | ||
else promise.failure(exception) | ||
} | ||
promise.future | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
import { Injectable } from '@angular/core'; | ||
import {Observable} from "rxjs"; | ||
import {HttpClient, HttpHeaders} from "@angular/common/http"; | ||
import {AuthService} from "./auth.service"; | ||
import {ErrorService} from "./error.service"; | ||
import {TranscribedText} from "../models/emotion.model"; | ||
import {environment} from "../../environments/environment"; | ||
import {catchError} from "rxjs/operators"; | ||
|
||
@Injectable({ | ||
providedIn: 'root' | ||
}) | ||
export class MediaRecorderService { | ||
private mediaRecorder: MediaRecorder | undefined | ||
private audioChunks: Blob[] = []; | ||
|
||
constructor(private http: HttpClient, private authService: AuthService, private errorService: ErrorService) { | ||
} | ||
|
||
startRecording() { | ||
navigator.mediaDevices.getUserMedia({ audio: true }).then(stream => { | ||
this.mediaRecorder = new MediaRecorder(stream); | ||
this.mediaRecorder.start(); | ||
|
||
this.mediaRecorder.addEventListener('dataavailable', event => { | ||
this.audioChunks.push(event.data); | ||
}); | ||
}); | ||
} | ||
|
||
stopRecording(): Promise<Blob> { | ||
return new Promise(resolve => { | ||
this.mediaRecorder?.addEventListener('stop', () => { | ||
const audioBlob = new Blob(this.audioChunks, { type: 'audio/webm' }); | ||
this.audioChunks = []; | ||
resolve(audioBlob); | ||
}); | ||
|
||
this.mediaRecorder?.stop(); | ||
}); | ||
} | ||
|
||
transcribeAudio(audioBlob: Blob): Observable<TranscribedText> { | ||
const headers: HttpHeaders = this.authService.getAuthorizationHeader(); | ||
const formData = new FormData(); | ||
formData.append('audio', audioBlob, 'audio.webm'); | ||
return this.http | ||
.post<TranscribedText>(`${environment.baseUrl}/transcribe`, formData, {headers}) | ||
.pipe(catchError(resp => { | ||
return this.errorService.handleError(resp); | ||
})); | ||
} | ||
} |