forked from Rahi13/scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
email_parser.py
28 lines (19 loc) · 902 Bytes
/
email_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
#This code parses email address from a endnote export text file. the code is written in Python 3. Code modified from https://www.tutorialspoint.com/python_text_processing/python_extract_emails_from_text.htm
import re
infile = '/Users/rsn13/Pictures/Desktop/all_proteoform_references.txt'
outfile = '/Users/rsn13/Pictures/Desktop/scripts/output/email_addresses_proteoform.txt'
lines = []
emails = []
with open(infile, encoding='utf8') as fh:
for line in fh:
lines.append(line)
for text in lines:
emails_ids = re.findall(r"[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+", text)
for i in emails_ids:
if i not in emails:
emails.append(i)
for email in emails:
sourceFile = open(outfile, 'a')
print(email, file=sourceFile)
sourceFile.close()
print('Done')