From 151d2dcbc0d67bc99ef6953705b21812b1ef56a7 Mon Sep 17 00:00:00 2001 From: erankor Date: Mon, 7 Aug 2023 20:31:30 +0300 Subject: [PATCH] add io.Reader interface for attachment content (#296) Can provide a significant optimization in memory usage when encoding large attachments - the attachment is stored only once (in the final message), instead of 3 times (part.Content, the base64 encoded buffer and the final message) --- builder.go | 11 ++++++++ encode.go | 72 +++++++++++++++++++++++++++++++++++++++++++++-- encode_test.go | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++ part.go | 7 +++-- 4 files changed, 161 insertions(+), 5 deletions(-) diff --git a/builder.go b/builder.go index 25f4aeef..238a42fe 100644 --- a/builder.go +++ b/builder.go @@ -3,6 +3,7 @@ package enmime import ( "bytes" "errors" + "io" "io/ioutil" "math/rand" "mime" @@ -228,6 +229,16 @@ func (p MailBuilder) AddAttachment(b []byte, contentType string, fileName string return p } +// AddAttachmentWithReader returns a copy of MailBuilder that includes the specified attachment, using an io.Reader to pull the content of the attachment. +func (p MailBuilder) AddAttachmentWithReader(r io.Reader, contentType string, fileName string) MailBuilder { + part := NewPart(contentType) + part.ContentReader = r + part.FileName = fileName + part.Disposition = cdAttachment + p.attachments = append(p.attachments, part) + return p +} + // AddFileAttachment returns a copy of MailBuilder that includes the specified attachment. // fileName, will be populated from the base name of path. Content type will be detected from the // path extension. diff --git a/encode.go b/encode.go index 46dfeba7..02f89a86 100644 --- a/encode.go +++ b/encode.go @@ -26,6 +26,13 @@ const ( teBase64 ) +const ( + base64EncodedLineLen = 76 + base64DecodedLineLen = base64EncodedLineLen * 3 / 4 // this is ok since lineLen is divisible by 4 + linesPerChunk = 128 + readChunkSize = base64DecodedLineLen * linesPerChunk +) + var crnl = []byte{'\r', '\n'} // Encode writes this Part and all its children to the specified writer in MIME format. @@ -33,6 +40,15 @@ func (p *Part) Encode(writer io.Writer) error { if p.Header == nil { p.Header = make(textproto.MIMEHeader) } + if p.ContentReader != nil { + // read some data in order to check whether the content is empty + p.Content = make([]byte, readChunkSize) + n, err := p.ContentReader.Read(p.Content) + if err != nil && err != io.EOF { + return err + } + p.Content = p.Content[:n] + } cte := p.setupMIMEHeaders() // Encode this part. b := bufio.NewWriter(writer) @@ -87,7 +103,7 @@ func (p *Part) setupMIMEHeaders() transferEncoding { cte := te7Bit if len(p.Content) > 0 { cte = teBase64 - if p.TextContent() { + if p.TextContent() && p.ContentReader == nil { cte = selectTransferEncoding(p.Content, false) if p.Charset == "" { p.Charset = utf8 @@ -174,11 +190,15 @@ func (p *Part) encodeHeader(b *bufio.Writer) error { // encodeContent writes out the content in the selected encoding. func (p *Part) encodeContent(b *bufio.Writer, cte transferEncoding) (err error) { + if p.ContentReader != nil { + return p.encodeContentFromReader(b) + } + switch cte { case teBase64: enc := base64.StdEncoding text := make([]byte, enc.EncodedLen(len(p.Content))) - base64.StdEncoding.Encode(text, p.Content) + enc.Encode(text, p.Content) // Wrap lines. lineLen := 76 for len(text) > 0 { @@ -205,6 +225,54 @@ func (p *Part) encodeContent(b *bufio.Writer, cte transferEncoding) (err error) return err } +// encodeContentFromReader writes out the content read from the reader using base64 encoding. +func (p *Part) encodeContentFromReader(b *bufio.Writer) error { + text := make([]byte, base64EncodedLineLen) // a single base64 encoded line + enc := base64.StdEncoding + + chunk := make([]byte, readChunkSize) // contains a whole number of lines + copy(chunk, p.Content) // copy the data of the initial read that was issued by `Encode` + n := len(p.Content) + + for { + // call read until we get a full chunk / error + for n < len(chunk) { + c, err := p.ContentReader.Read(chunk[n:]) + if err != nil { + if err == io.EOF { + break + } + return err + } + + n += c + } + + for i := 0; i < n; i += base64DecodedLineLen { + size := n - i + if size > base64DecodedLineLen { + size = base64DecodedLineLen + } + + enc.Encode(text, chunk[i:i+size]) + if _, err := b.Write(text[:enc.EncodedLen(size)]); err != nil { + return err + } + if _, err := b.Write(crnl); err != nil { + return err + } + } + + if n < len(chunk) { + break + } + + n = 0 + } + + return nil +} + // selectTransferEncoding scans content for non-ASCII characters and selects 'b' or 'q' encoding. func selectTransferEncoding(content []byte, quoteLineBreaks bool) transferEncoding { if len(content) == 0 { diff --git a/encode_test.go b/encode_test.go index c4e39dba..b545502d 100644 --- a/encode_test.go +++ b/encode_test.go @@ -2,6 +2,8 @@ package enmime_test import ( "bytes" + "io" + "math/rand" "testing" "time" @@ -104,6 +106,80 @@ func TestEncodePartQuotedPrintableHeaders(t *testing.T) { test.DiffGolden(t, b.Bytes(), "testdata", "encode", "part-quoted-printable-headers.golden") } +// oneByOneReader implements io.Reader over a byte slice, returns a single byte on every Read request. +// This object is used to validate that partial reads (=read calls that return n= len(r.content) { + return 0, io.EOF + } + p[0] = r.content[r.pos] + r.pos++ + return 1, nil +} + +func TestEncodePartContentReader(t *testing.T) { + contentLengths := []int{ + 0, 1, 2, 3, 4, // empty / nearly empty + 55, 56, 57, 58, 59, 60, // lengths close to the length of a single line (57) + 7294, 7295, 7296, 7297, 7298, // lengths close to the length of a single chunk (7296) + } + + for _, oneByOne := range []bool{false, true} { + for _, contentLength := range contentLengths { + // create a part with random content + p := enmime.NewPart("application/zip") + p.Boundary = "enmime-abcdefg0123456789" + p.Charset = "binary" + p.ContentID = "mycontentid" + p.ContentTypeParams["param1"] = "myparameter1" + p.ContentTypeParams["param2"] = "myparameter2" + p.Disposition = "attachment" + p.FileName = "stuff.zip" + p.FileModDate, _ = time.Parse(time.RFC822, "01 Feb 03 04:05 GMT") + + p.Content = make([]byte, contentLength) + _, err := rand.Read(p.Content) + if err != nil { + t.Fatal(err) + } + + // encode the part using `Content` byte slice stored in the Part + b1 := &bytes.Buffer{} + err = p.Encode(b1) + if err != nil { + t.Fatal(err) + } + + // encode the part using io.reader + if oneByOne { + p.ContentReader = &oneByOneReader{content: p.Content} + } else { + p.ContentReader = bytes.NewReader(p.Content) + } + p.Content = nil + + b2 := &bytes.Buffer{} + err = p.Encode(b2) + if err != nil { + t.Fatal(err) + } + + // compare the results + if !bytes.Equal(b1.Bytes(), b2.Bytes()) { + t.Errorf("[]byte encode and io.Reader encode produced different results for length %d", contentLength) + } + } + } +} + func TestEncodePartBinaryHeader(t *testing.T) { p := enmime.NewPart("text/plain") p.Header.Set("Subject", "¡Hola, señor!") diff --git a/part.go b/part.go index ccf8bb75..60dd887f 100644 --- a/part.go +++ b/part.go @@ -44,9 +44,10 @@ type Part struct { Charset string // The content charset encoding, may differ from charset in header. OrigCharset string // The original content charset when a different charset was detected. - Errors []*Error // Errors encountered while parsing this part. - Content []byte // Content after decoding, UTF-8 conversion if applicable. - Epilogue []byte // Epilogue contains data following the closing boundary marker. + Errors []*Error // Errors encountered while parsing this part. + Content []byte // Content after decoding, UTF-8 conversion if applicable. + ContentReader io.Reader // Reader interface for pulling the content for encoding. + Epilogue []byte // Epilogue contains data following the closing boundary marker. parser *Parser // Provides access to parsing options.