add io.Reader interface for attachment content (#296)

Can provide a significant optimization in memory usage when encoding large attachments - the attachment is stored only once (in the final message), instead of 3 times (part.Content, the base64 encoded buffer and the final message)
jhillyerd · Aug 7, 2023 · 151d2dc · 151d2dc
1 parent 9d622b5
commit 151d2dc
Show file tree

Hide file tree

Showing 4 changed files with 161 additions and 5 deletions.
diff --git a/builder.go b/builder.go
@@ -3,6 +3,7 @@ package enmime
 import (
 	"bytes"
 	"errors"
+	"io"
 	"io/ioutil"
 	"math/rand"
 	"mime"
@@ -228,6 +229,16 @@ func (p MailBuilder) AddAttachment(b []byte, contentType string, fileName string
 	return p
 }
 
+// AddAttachmentWithReader returns a copy of MailBuilder that includes the specified attachment, using an io.Reader to pull the content of the attachment.
+func (p MailBuilder) AddAttachmentWithReader(r io.Reader, contentType string, fileName string) MailBuilder {
+	part := NewPart(contentType)
+	part.ContentReader = r
+	part.FileName = fileName
+	part.Disposition = cdAttachment
+	p.attachments = append(p.attachments, part)
+	return p
+}
+
 // AddFileAttachment returns a copy of MailBuilder that includes the specified attachment.
 // fileName, will be populated from the base name of path.  Content type will be detected from the
 // path extension.

diff --git a/encode.go b/encode.go
@@ -26,13 +26,29 @@ const (
 	teBase64
 )
 
+const (
+	base64EncodedLineLen = 76
+	base64DecodedLineLen = base64EncodedLineLen * 3 / 4 // this is ok since lineLen is divisible by 4
+	linesPerChunk        = 128
+	readChunkSize        = base64DecodedLineLen * linesPerChunk
+)
+
 var crnl = []byte{'\r', '\n'}
 
 // Encode writes this Part and all its children to the specified writer in MIME format.
 func (p *Part) Encode(writer io.Writer) error {
 	if p.Header == nil {
 		p.Header = make(textproto.MIMEHeader)
 	}
+	if p.ContentReader != nil {
+		// read some data in order to check whether the content is empty
+		p.Content = make([]byte, readChunkSize)
+		n, err := p.ContentReader.Read(p.Content)
+		if err != nil && err != io.EOF {
+			return err
+		}
+		p.Content = p.Content[:n]
+	}
 	cte := p.setupMIMEHeaders()
 	// Encode this part.
 	b := bufio.NewWriter(writer)
@@ -87,7 +103,7 @@ func (p *Part) setupMIMEHeaders() transferEncoding {
 	cte := te7Bit
 	if len(p.Content) > 0 {
 		cte = teBase64
-		if p.TextContent() {
+		if p.TextContent() && p.ContentReader == nil {
 			cte = selectTransferEncoding(p.Content, false)
 			if p.Charset == "" {
 				p.Charset = utf8
@@ -174,11 +190,15 @@ func (p *Part) encodeHeader(b *bufio.Writer) error {
 
 // encodeContent writes out the content in the selected encoding.
 func (p *Part) encodeContent(b *bufio.Writer, cte transferEncoding) (err error) {
+	if p.ContentReader != nil {
+		return p.encodeContentFromReader(b)
+	}
+
 	switch cte {
 	case teBase64:
 		enc := base64.StdEncoding
 		text := make([]byte, enc.EncodedLen(len(p.Content)))
-		base64.StdEncoding.Encode(text, p.Content)
+		enc.Encode(text, p.Content)
 		// Wrap lines.
 		lineLen := 76
 		for len(text) > 0 {
@@ -205,6 +225,54 @@ func (p *Part) encodeContent(b *bufio.Writer, cte transferEncoding) (err error)
 	return err
 }
 
+// encodeContentFromReader writes out the content read from the reader using base64 encoding.
+func (p *Part) encodeContentFromReader(b *bufio.Writer) error {
+	text := make([]byte, base64EncodedLineLen) // a single base64 encoded line
+	enc := base64.StdEncoding
+
+	chunk := make([]byte, readChunkSize) // contains a whole number of lines
+	copy(chunk, p.Content)               // copy the data of the initial read that was issued by `Encode`
+	n := len(p.Content)
+
+	for {
+		// call read until we get a full chunk / error
+		for n < len(chunk) {
+			c, err := p.ContentReader.Read(chunk[n:])
+			if err != nil {
+				if err == io.EOF {
+					break
+				}
+				return err
+			}
+
+			n += c
+		}
+
+		for i := 0; i < n; i += base64DecodedLineLen {
+			size := n - i
+			if size > base64DecodedLineLen {
+				size = base64DecodedLineLen
+			}
+
+			enc.Encode(text, chunk[i:i+size])
+			if _, err := b.Write(text[:enc.EncodedLen(size)]); err != nil {
+				return err
+			}
+			if _, err := b.Write(crnl); err != nil {
+				return err
+			}
+		}
+
+		if n < len(chunk) {
+			break
+		}
+
+		n = 0
+	}
+
+	return nil
+}
+
 // selectTransferEncoding scans content for non-ASCII characters and selects 'b' or 'q' encoding.
 func selectTransferEncoding(content []byte, quoteLineBreaks bool) transferEncoding {
 	if len(content) == 0 {

diff --git a/encode_test.go b/encode_test.go
@@ -2,6 +2,8 @@ package enmime_test
 
 import (
 	"bytes"
+	"io"
+	"math/rand"
 	"testing"
 	"time"
 
@@ -104,6 +106,80 @@ func TestEncodePartQuotedPrintableHeaders(t *testing.T) {
 	test.DiffGolden(t, b.Bytes(), "testdata", "encode", "part-quoted-printable-headers.golden")
 }
 
+// oneByOneReader implements io.Reader over a byte slice, returns a single byte on every Read request.
+// This object is used to validate that partial reads (=read calls that return n<len(p)) are handled correctly.
+type oneByOneReader struct {
+	content []byte
+	pos     int
+}
+
+func (r *oneByOneReader) Read(p []byte) (n int, err error) {
+	if len(p) == 0 {
+		return 0, nil
+	}
+	if r.pos >= len(r.content) {
+		return 0, io.EOF
+	}
+	p[0] = r.content[r.pos]
+	r.pos++
+	return 1, nil
+}
+
+func TestEncodePartContentReader(t *testing.T) {
+	contentLengths := []int{
+		0, 1, 2, 3, 4, // empty / nearly empty
+		55, 56, 57, 58, 59, 60, // lengths close to the length of a single line (57)
+		7294, 7295, 7296, 7297, 7298, // lengths close to the length of a single chunk (7296)
+	}
+
+	for _, oneByOne := range []bool{false, true} {
+		for _, contentLength := range contentLengths {
+			// create a part with random content
+			p := enmime.NewPart("application/zip")
+			p.Boundary = "enmime-abcdefg0123456789"
+			p.Charset = "binary"
+			p.ContentID = "mycontentid"
+			p.ContentTypeParams["param1"] = "myparameter1"
+			p.ContentTypeParams["param2"] = "myparameter2"
+			p.Disposition = "attachment"
+			p.FileName = "stuff.zip"
+			p.FileModDate, _ = time.Parse(time.RFC822, "01 Feb 03 04:05 GMT")
+
+			p.Content = make([]byte, contentLength)
+			_, err := rand.Read(p.Content)
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			// encode the part using `Content` byte slice stored in the Part
+			b1 := &bytes.Buffer{}
+			err = p.Encode(b1)
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			// encode the part using io.reader
+			if oneByOne {
+				p.ContentReader = &oneByOneReader{content: p.Content}
+			} else {
+				p.ContentReader = bytes.NewReader(p.Content)
+			}
+			p.Content = nil
+
+			b2 := &bytes.Buffer{}
+			err = p.Encode(b2)
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			// compare the results
+			if !bytes.Equal(b1.Bytes(), b2.Bytes()) {
+				t.Errorf("[]byte encode and io.Reader encode produced different results for length %d", contentLength)
+			}
+		}
+	}
+}
+
 func TestEncodePartBinaryHeader(t *testing.T) {
 	p := enmime.NewPart("text/plain")
 	p.Header.Set("Subject", "¡Hola, señor!")

diff --git a/part.go b/part.go
@@ -44,9 +44,10 @@ type Part struct {
 	Charset           string            // The content charset encoding, may differ from charset in header.
 	OrigCharset       string            // The original content charset when a different charset was detected.
 
-	Errors   []*Error // Errors encountered while parsing this part.
-	Content  []byte   // Content after decoding, UTF-8 conversion if applicable.
-	Epilogue []byte   // Epilogue contains data following the closing boundary marker.
+	Errors        []*Error  // Errors encountered while parsing this part.
+	Content       []byte    // Content after decoding, UTF-8 conversion if applicable.
+	ContentReader io.Reader // Reader interface for pulling the content for encoding.
+	Epilogue      []byte    // Epilogue contains data following the closing boundary marker.
 
 	parser *Parser // Provides access to parsing options.