-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
85 lines (64 loc) · 2.37 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import requests
import time
upload_endpoint = "https://api.assemblyai.com/v2/upload"
transcript_endpoint = "https://api.assemblyai.com/v2/transcript"
# Helper for `upload_file()`
def _read_file(filename, chunk_size=5242880):
with open(filename, "rb") as f:
while True:
data = f.read(chunk_size)
if not data:
break
yield data
# Uploads a file to AAI servers
def upload_file(audio_file, header):
upload_response = requests.post(
upload_endpoint,
headers=header, data=_read_file(audio_file)
)
return upload_response.json()
# Request transcript for file uploaded to AAI servers
def request_transcript(upload_url, header, start_time, end_time):
transcript_request = {
'audio_url': upload_url,
"speaker_labels": True,
'audio_start_from': start_time,
'audio_end_at': end_time
}
transcript_response = requests.post(
transcript_endpoint,
json=transcript_request,
headers=header,
)
return transcript_response.json()
# Make a polling endpoint
def make_polling_endpoint(transcript_response):
polling_endpoint = "https://api.assemblyai.com/v2/transcript/"
polling_endpoint += transcript_response['id']
return polling_endpoint
# Wait for the transcript to finish
def wait_for_completion(polling_endpoint, header):
while True:
polling_response = requests.get(polling_endpoint, headers=header)
polling_response = polling_response.json()
print(polling_response['status'])
if polling_response['status'] == 'error':
print(polling_response['error'])
if polling_response['status'] == 'completed':
break
time.sleep(5)
# Get the paragraphs of the transcript
def get_paragraphs(polling_endpoint, header):
paragraphs_response = requests.get(polling_endpoint + "/paragraphs", headers=header)
paragraphs_response = paragraphs_response.json()
paragraphs = []
for para in paragraphs_response['paragraphs']:
paragraphs.append(para)
return paragraphs
def get_sentences(polling_endpoint, header):
sentences_response = requests.get(polling_endpoint + "/sentences", headers=header)
sentences_response = sentences_response.json()
sentences = []
for para in sentences_response['sentences']:
sentences.append(para)
return sentences