Skip to content

Commit

Permalink
add io.Reader interface for attachment content (#296)
Browse files Browse the repository at this point in the history
Can provide a significant optimization in memory usage when encoding
large attachments - the attachment is stored only once (in the final
message), instead of 3 times (part.Content, the base64 encoded buffer
and the final message)
  • Loading branch information
erankor authored Aug 7, 2023
1 parent 9d622b5 commit 151d2dc
Show file tree
Hide file tree
Showing 4 changed files with 161 additions and 5 deletions.
11 changes: 11 additions & 0 deletions builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package enmime
import (
"bytes"
"errors"
"io"
"io/ioutil"
"math/rand"
"mime"
Expand Down Expand Up @@ -228,6 +229,16 @@ func (p MailBuilder) AddAttachment(b []byte, contentType string, fileName string
return p
}

// AddAttachmentWithReader returns a copy of MailBuilder that includes the specified attachment, using an io.Reader to pull the content of the attachment.
func (p MailBuilder) AddAttachmentWithReader(r io.Reader, contentType string, fileName string) MailBuilder {
part := NewPart(contentType)
part.ContentReader = r
part.FileName = fileName
part.Disposition = cdAttachment
p.attachments = append(p.attachments, part)
return p
}

// AddFileAttachment returns a copy of MailBuilder that includes the specified attachment.
// fileName, will be populated from the base name of path. Content type will be detected from the
// path extension.
Expand Down
72 changes: 70 additions & 2 deletions encode.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,29 @@ const (
teBase64
)

const (
base64EncodedLineLen = 76
base64DecodedLineLen = base64EncodedLineLen * 3 / 4 // this is ok since lineLen is divisible by 4
linesPerChunk = 128
readChunkSize = base64DecodedLineLen * linesPerChunk
)

var crnl = []byte{'\r', '\n'}

// Encode writes this Part and all its children to the specified writer in MIME format.
func (p *Part) Encode(writer io.Writer) error {
if p.Header == nil {
p.Header = make(textproto.MIMEHeader)
}
if p.ContentReader != nil {
// read some data in order to check whether the content is empty
p.Content = make([]byte, readChunkSize)
n, err := p.ContentReader.Read(p.Content)
if err != nil && err != io.EOF {
return err
}
p.Content = p.Content[:n]
}
cte := p.setupMIMEHeaders()
// Encode this part.
b := bufio.NewWriter(writer)
Expand Down Expand Up @@ -87,7 +103,7 @@ func (p *Part) setupMIMEHeaders() transferEncoding {
cte := te7Bit
if len(p.Content) > 0 {
cte = teBase64
if p.TextContent() {
if p.TextContent() && p.ContentReader == nil {
cte = selectTransferEncoding(p.Content, false)
if p.Charset == "" {
p.Charset = utf8
Expand Down Expand Up @@ -174,11 +190,15 @@ func (p *Part) encodeHeader(b *bufio.Writer) error {

// encodeContent writes out the content in the selected encoding.
func (p *Part) encodeContent(b *bufio.Writer, cte transferEncoding) (err error) {
if p.ContentReader != nil {
return p.encodeContentFromReader(b)
}

switch cte {
case teBase64:
enc := base64.StdEncoding
text := make([]byte, enc.EncodedLen(len(p.Content)))
base64.StdEncoding.Encode(text, p.Content)
enc.Encode(text, p.Content)
// Wrap lines.
lineLen := 76
for len(text) > 0 {
Expand All @@ -205,6 +225,54 @@ func (p *Part) encodeContent(b *bufio.Writer, cte transferEncoding) (err error)
return err
}

// encodeContentFromReader writes out the content read from the reader using base64 encoding.
func (p *Part) encodeContentFromReader(b *bufio.Writer) error {
text := make([]byte, base64EncodedLineLen) // a single base64 encoded line
enc := base64.StdEncoding

chunk := make([]byte, readChunkSize) // contains a whole number of lines
copy(chunk, p.Content) // copy the data of the initial read that was issued by `Encode`
n := len(p.Content)

for {
// call read until we get a full chunk / error
for n < len(chunk) {
c, err := p.ContentReader.Read(chunk[n:])
if err != nil {
if err == io.EOF {
break
}
return err
}

n += c
}

for i := 0; i < n; i += base64DecodedLineLen {
size := n - i
if size > base64DecodedLineLen {
size = base64DecodedLineLen
}

enc.Encode(text, chunk[i:i+size])
if _, err := b.Write(text[:enc.EncodedLen(size)]); err != nil {
return err
}
if _, err := b.Write(crnl); err != nil {
return err
}
}

if n < len(chunk) {
break
}

n = 0
}

return nil
}

// selectTransferEncoding scans content for non-ASCII characters and selects 'b' or 'q' encoding.
func selectTransferEncoding(content []byte, quoteLineBreaks bool) transferEncoding {
if len(content) == 0 {
Expand Down
76 changes: 76 additions & 0 deletions encode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package enmime_test

import (
"bytes"
"io"
"math/rand"
"testing"
"time"

Expand Down Expand Up @@ -104,6 +106,80 @@ func TestEncodePartQuotedPrintableHeaders(t *testing.T) {
test.DiffGolden(t, b.Bytes(), "testdata", "encode", "part-quoted-printable-headers.golden")
}

// oneByOneReader implements io.Reader over a byte slice, returns a single byte on every Read request.
// This object is used to validate that partial reads (=read calls that return n<len(p)) are handled correctly.
type oneByOneReader struct {
content []byte
pos int
}

func (r *oneByOneReader) Read(p []byte) (n int, err error) {
if len(p) == 0 {
return 0, nil
}
if r.pos >= len(r.content) {
return 0, io.EOF
}
p[0] = r.content[r.pos]
r.pos++
return 1, nil
}

func TestEncodePartContentReader(t *testing.T) {
contentLengths := []int{
0, 1, 2, 3, 4, // empty / nearly empty
55, 56, 57, 58, 59, 60, // lengths close to the length of a single line (57)
7294, 7295, 7296, 7297, 7298, // lengths close to the length of a single chunk (7296)
}

for _, oneByOne := range []bool{false, true} {
for _, contentLength := range contentLengths {
// create a part with random content
p := enmime.NewPart("application/zip")
p.Boundary = "enmime-abcdefg0123456789"
p.Charset = "binary"
p.ContentID = "mycontentid"
p.ContentTypeParams["param1"] = "myparameter1"
p.ContentTypeParams["param2"] = "myparameter2"
p.Disposition = "attachment"
p.FileName = "stuff.zip"
p.FileModDate, _ = time.Parse(time.RFC822, "01 Feb 03 04:05 GMT")

p.Content = make([]byte, contentLength)
_, err := rand.Read(p.Content)
if err != nil {
t.Fatal(err)
}

// encode the part using `Content` byte slice stored in the Part
b1 := &bytes.Buffer{}
err = p.Encode(b1)
if err != nil {
t.Fatal(err)
}

// encode the part using io.reader
if oneByOne {
p.ContentReader = &oneByOneReader{content: p.Content}
} else {
p.ContentReader = bytes.NewReader(p.Content)
}
p.Content = nil

b2 := &bytes.Buffer{}
err = p.Encode(b2)
if err != nil {
t.Fatal(err)
}

// compare the results
if !bytes.Equal(b1.Bytes(), b2.Bytes()) {
t.Errorf("[]byte encode and io.Reader encode produced different results for length %d", contentLength)
}
}
}
}

func TestEncodePartBinaryHeader(t *testing.T) {
p := enmime.NewPart("text/plain")
p.Header.Set("Subject", "¡Hola, señor!")
Expand Down
7 changes: 4 additions & 3 deletions part.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,10 @@ type Part struct {
Charset string // The content charset encoding, may differ from charset in header.
OrigCharset string // The original content charset when a different charset was detected.

Errors []*Error // Errors encountered while parsing this part.
Content []byte // Content after decoding, UTF-8 conversion if applicable.
Epilogue []byte // Epilogue contains data following the closing boundary marker.
Errors []*Error // Errors encountered while parsing this part.
Content []byte // Content after decoding, UTF-8 conversion if applicable.
ContentReader io.Reader // Reader interface for pulling the content for encoding.
Epilogue []byte // Epilogue contains data following the closing boundary marker.

parser *Parser // Provides access to parsing options.

Expand Down

0 comments on commit 151d2dc

Please sign in to comment.