forked from Brown-University-Library/iip-word-lists
-
Notifications
You must be signed in to change notification settings - Fork 1
/
untitled.py
53 lines (36 loc) · 1.04 KB
/
untitled.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
mport csv
import os
import requests
from iip_smr_web_app import settings_app
import xml.etree.ElementTree as ET
LATIN_TEXT = 0
LATIN_WORDNUM = 1
LATIN_WORD = 7
LATIN_POS1 = 8
LATIN_POS2 = 9
LATIN_LEMMA = 10
XML1 = 11
XML2 = 12
NEWBUFF = 3
KWIC_BUFF = 2
with open('latin_doubletree_data.txt', 'w') as f:
sys.stdout = f
with requests.Session() as s:
download = s.get("https://raw.githubusercontent.com/Brown-University-Library/iip-word-lists/master/new%20version%20test/Step%204%20New%20Output.csv")
decoded = download.content.decode('utf-8')
csv_reader = csv.reader(decoded.splitlines(), delimiter=",")
line_count = 0
curtext = ""
textrows = []
for row in csv_reader:
row_word = row[LATIN_LEMMA + NEWBUFF]
if line_count > 0 and len(row_word) > 0 and row_word[:1] != "?":
if curtext != row[LATIN_TEXT + NEWBUFF]:
go_through_text(textrows, words)
curtext = row[LATIN_TEXT + NEWBUFF]
textrows = []
else:
textrows.append(row)
line_count += 1
go_through_text(textrows)
def go_through_text(textrows):