-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmimelook.py
executable file
·292 lines (224 loc) · 9.63 KB
/
mimelook.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
#!/usr/bin/env python3
import os
import sys
import re
import base64
import subprocess
import html
from html.parser import HTMLParser
import mailparser # mail-parser
import markdown # Markdown
import magic # python-magic (for mimetypes)
QUOTE_ESCAPE = "MIMELOOK_QUOTES"
mime = magic.Magic(mime=True)
def export_inline_attachments(message, dstdir):
# find inline attachments in plaintext version
#inlines = re.findall("\[cid:.*?\]", message.body.split("--- mail_boundary ---")[0])
# .. or html version (probably safer?):
inlines = re.findall("src=\"cid:.*?\"", message.body.split("--- mail_boundary ---")[1])
# return list of tuples: (attachment id, exported file)
ret = []
for inline in inlines:
# find filename
name_match = re.search("cid:.*@", inline)
# find content id
id_match = re.search("@.*", inline)
attachment_id = inline[id_match.start()+1:-1]
# find corresponding attachment in the message
attachment_name = inline[name_match.start()+4:name_match.end()-1]
attachment = [x for x in message.attachments if x["content-id"].startswith("<{}".format(attachment_name))]
assert len(attachment) == 1, "Could not get attachment '{}'".format(attachment_name)
attachment = attachment[0]
# base64 decode the file and place it in dstdir
assert attachment["content_transfer_encoding"] == "base64", "Only base64 currently supported"
b = base64.decodebytes(bytes(attachment["payload"], "ascii"))
dstfile = os.path.join(dstdir, attachment_name)
with open(dstfile, "wb") as f:
f.write(b)
# same attachment might occur multiple times - only add it once
att = (attachment_id, dstfile)
if att not in ret:
ret.append(att)
return ret
# make "from: x, sent: d/m-y, to: z, ..." section in outlook style
# grabbed from what outlook webmail does
def format_insane_outlook_header(fromaddr, sent, to, cc, subject):
ret = """<hr style="display:inline-block;width:98%" tabindex="-1">
<div id="divRplyFwdMsg" dir="ltr"><font face="Calibri, sans-serif" style="font-size:11pt" color="#000000"><b>From:</b> {}<br>
<b>Sent:</b> {}<br>
""".format(fromaddr, sent)
if to is not None:
ret += "<b>To:</b> {}<br>\n".format(to)
if cc is not None:
ret += "<b>Cc:</b> {}<br>\n".format(cc)
ret += """<b>Subject:</b> {}</font>
<div> </div>
</div>
""".format(subject)
return ret
# get message from id
def message_from_msgid(msgid):
mucmd = "mu find msgid:{} --fields 'l'".format(msgid)
p = subprocess.Popen(mucmd.split(" "), stdout=subprocess.PIPE)
messagefiles, _ = p.communicate()
messagefiles = messagefiles.decode("utf-8").split("\n")
# check return code ok
assert p.returncode == 0, "mu find failed"
# expecting list of at least 1 message and one empty line
assert(len(messagefiles) > 1), "mu found no messages"
# use first hit and strip surrounding '
messagefile = messagefiles[0][1:-1]
# parse message and grab HTML
message = mailparser.parse_from_file(messagefile)
return message
# create crazy outlook-style html reply from message id and the desired html message
def format_outlook_reply(message, htmltoinsert):
message_html = message.body.split("--- mail_boundary ---")[1]
# convert CRLF to LF
message_html = message_html.replace("\r\n", "\n")
# grab header info
message_from = message.headers["From"]
message_to = message.headers["To"] if "To" in message.headers else None
message_subject = message.headers["Subject"]
message_date = message.date.strftime("%d %B %Y %H:%M:%S")
message_cc = message.headers["CC"] if "CC" in message.headers else None
outlook_madness = format_insane_outlook_header(message_from, message_date,
message_to, message_cc, message_subject)
# find body tag in html
m = re.search("<body.*?>", message_html)
assert m is not None, "No body tag found in parent HTML"
# format resulting html email:
# ..<body> from email being replied to
# reply message
# "from yada yada" section
# remainder of email being replied to
html = "{}\n{}\n{}\n{}".format(message_html[:m.end()],
htmltoinsert,
outlook_madness,
message_html[m.end():])
return html
# Convert "> "-style quotes into something else that passes untouched through html.escape()
# Escaped quotes look like this: [[MIMELOOK_QUOTES|X]] where X denotes the
# number of quotes that have been escaped
def escape_quotes(plaintext):
ret = ""
for line in plaintext.split("\n"):
if line.startswith(">"):
i = 0
while i < len(line) and line[i] == ">":
i += 1
ret += "[[{}|{}]]".format(QUOTE_ESCAPE, i)
ret += line[i:] + "\n"
else:
ret += line + "\n"
return ret
# Convert previously escaped "> "-style quotes back to their original form.
def unescape_quotes(string):
retstr = ""
i = 0
while i < len(string):
# find start position of next escaped quote group
p = string[i:].find("[[{}|".format(QUOTE_ESCAPE))
if p < 0:
# no more escaped quotes - grab the rest of the string and return
retstr += string[i:]
break
# found some escaped quotes, grab content of string upto them
retstr += string[i:i+p]
# find end of the escaped quote tag
tag_end_pos = string[i+p:].find("]]")
# grab the number from the tag
nquotes = int(string[i+p+2+len(QUOTE_ESCAPE)+1:i+p+tag_end_pos])
# append that number of quotes
retstr += ">"*nquotes
# advance to the character just after the tag
i += p+tag_end_pos+2
return retstr
def escape_signature_linebreaks(plaintext):
m = re.search("^-- ", plaintext, re.MULTILINE)
if m is not None:
content = plaintext[:m.start()]
signature = plaintext[m.start():]
signature = signature.replace("\n", " \n")
return content + signature
else:
return plaintext
# Find MIME parts in the plaintext
# Returns plaintext without parts and list of parts
# Warning: assumes only valid <#part...><#/part> tags after the first occurence
# of "<#part" !
def find_mime_parts(plaintext):
parts = re.findall("<#part.*?<#/part>", plaintext, re.DOTALL)
m = re.search("<#part.*?<#/part>", plaintext, re.DOTALL)
if m is not None:
text = plaintext[:m.start()]
else:
text = plaintext
return text, parts
# Escape HTML. Don't escape lines that start with four spaces, since
# these will be wrapped by <pre> tags by the Markdown-to-html
# conversion.
def html_escape(text):
ret = ""
for line in text.split("\n"):
if not line.startswith(" "):
ret += html.escape(line) + "\n"
else:
ret += line + "\n"
return ret
# Take desired plaintext message and id of message being replied to
# and format a multipart message with sane plaintext section and
# insane outlook-style html section. The plaintext message is converted
# to HTML supporting markdown syntax.
def plain2fancy(plaintext, msgid):
# find and strip MIME parts in the ending of the plaintext
plaintext, parts = find_mime_parts(plaintext)
# escape HTML in the plaintext, handling quoted content explicitly
escaped_plaintext = unescape_quotes(html_escape(escape_quotes(plaintext)))
# handle signature - we expect linebreaks to be preserved in the signature,
# but let everything else wrap (reminder: Markdown preserves linebreaks if
# there's two spaces at the end of a line)
escaped_plaintext = escape_signature_linebreaks(escaped_plaintext)
# plaintext is converted to html, supporting markdown syntax
# loosely inspired by http://webcache.googleusercontent.com/search?q=cache:R1RQkhWqwEgJ:tess.oconnor.cx/2008/01/html-email-composition-in-emacs
text2html = markdown.markdown(escaped_plaintext)
# get message from message id
message = message_from_msgid(msgid)
# insane outlook-style html reply
madness = format_outlook_reply(message, text2html)
# find inline attachments and export them to a temporary dir
attdir = "/dev/shm/mu4e-{}".format(msgid)
if not os.path.isdir(attdir):
os.mkdir(attdir)
attachments = export_inline_attachments(message, attdir)
# build string of <#part type=x filename=y disposition=inline><#/part> for each
# attachment, separated by newlines
attachment_str = ""
for attachment in attachments:
mimetype = mime.from_file(attachment[1])
attachment_str += "<#part type=\"{}\" filename=\"{}\" disposition=inline id=\"{}@{}\"><#/part>\n"\
.format(mimetype, attachment[1], os.path.basename(attachment[1]), attachment[0])
# also include attachments that were already present in the plaintext
attachment_str += "\n".join(parts)
# write html message to file for inspection before sending
with open("/dev/shm/mimelook-madness.html", "w") as f:
f.write(madness)
# return the multipart message
multimsg = """<#multipart type=alternative>
<#part type=text/plain>
{}
<#/part>
<#part type=text/html>
{}
<#/part>
<#/multipart>
{}""".format(plaintext, madness, attachment_str)
return multimsg
if __name__ == '__main__':
stdin = sys.stdin.read()
msgid_end_char = stdin.find("\n")
# expecting first line to be the message id
msgid = stdin[:msgid_end_char]
# expecting rest to be plaintext version of the message
plaintext = stdin[msgid_end_char+1:]
print(plain2fancy(plaintext, msgid))