-
Notifications
You must be signed in to change notification settings - Fork 0
/
compress.go
302 lines (254 loc) · 8.31 KB
/
compress.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
// These functions in `compress.go` and `decompress.go` are the primary interfaces to the xz compression and decompression
// functions. All these functions call a single stage set of goroutines that handle the setup of `liblzma` and manage the
// movement of streaming data.
//
// Use the `XZReader` and `XZWriter` structs to manage direct streaming of compression and decompression. Your
// own code for buffer sized for reads and writes will have no bearing on the internal buffers that liblzma uses, and
// your own buffer sizes will be respected.
//
// Use the CompressionStrategy type to fine-tune the lzma compression strategy used. The default is CompressionMulti, which
// is a good balance between speed and compression ratio. For memory-restricted and single-core systems, use the Simple
// compression options to reduce the memory requirements of liblzma.
package safexz
import (
"bytes"
"fmt"
"io"
"os"
"path/filepath"
/* trunk-ignore(golangci-lint/typecheck) */
internal "github.com/christoofar/safexz/internal"
)
// CompressString compresses a string using the xz format and returns the compressed string.
func CompressString(s string, strategy ...CompressionStrategy) (string, error) {
use_strategy := CompressionMulti
if len(strategy) > 0 {
use_strategy = strategy[0]
}
readchan := make(chan []byte, 1)
writechan := make(chan []byte, 1)
var funcerr error = nil
go func() {
funcerr = internal.CompressIn(readchan, writechan, int(use_strategy))
}()
go func() {
for i := 0; i < len(s); i += internal.MAX_BUF_SIZE {
end := i + internal.MAX_BUF_SIZE
if end > len(s) {
end = len(s)
}
readchan <- []byte(s[i:end])
}
close(readchan)
}()
var compressed string
for data := range writechan {
compressed += string(data)
}
return compressed, funcerr
}
// CompressBytes compresses a byte slice using the xz format and returns the compressed byte slice. If the byte slice is huge,
// you may want to consider using CompressFile or CompressStream instead. The reason is that the compression process can greatly expand the
// amount of memory consumed depending on the CompressionStrategy used.
// The compression process can greatly expand the amount of memory consumed depending on the CompressionStrategy used.
func CompressBytes(b []byte, strategy ...CompressionStrategy) ([]byte, error) {
use_strategy := CompressionMulti
if len(strategy) > 0 {
use_strategy = strategy[0]
}
readchan := make(chan []byte, 1)
writechan := make(chan []byte, 1)
var funcerr error = nil
go func() {
funcerr = internal.CompressIn(readchan, writechan, int(use_strategy))
}()
go func() {
for i := 0; i < len(b); i += internal.MAX_BUF_SIZE {
end := i + internal.MAX_BUF_SIZE
if end > len(b) {
end = len(b)
}
readchan <- b[i:end]
}
close(readchan)
}()
var compressed []byte
for data := range writechan {
compressed = append(compressed, data...)
}
return compressed, funcerr
}
// CompressFile compresses a file using the xz format and writes the compressed data to the output file. The output file must end with the `.xz` extension.
func CompressFile(inpath string, outpath string, strategy ...CompressionStrategy) error {
use_strategy := CompressionMulti
if len(strategy) > 0 {
use_strategy = strategy[0]
}
return CompressFileWithProgress(inpath, outpath, nil, use_strategy)
}
// CompressFileWithProgress compresses a file using the xz format and writes the compressed data to the output file. The output file must end with the `.xz` extension.
// Your progress callback function that you supply will be called with the number of bytes read and written to the output file. This is useful for showing progress bars.
// The first 'uint64' is the number of bytes read from the input file, and the second 'uint64' is the number of bytes written to the output file. From this you can calculate
// the percentage of the file that has been compressed, the estimated time remaining, etc.
func CompressFileWithProgress(inpath string, outpath string, progress func(uint64, uint64), strategy ...CompressionStrategy) error {
use_strategy := CompressionMulti
if len(strategy) > 0 {
use_strategy = strategy[0]
}
// Check the file extension
extension := filepath.Ext(outpath)
fileExtension := extension[1:]
if fileExtension != "xz" {
return fmt.Errorf("output file [%s] must have an xz extension", outpath)
}
f, err := os.Open(inpath)
if err != nil {
return err
}
readchan := make(chan []byte, 1)
writechan := make(chan []byte, 1)
go func() {
err := internal.CompressIn(readchan, writechan, int(use_strategy))
if err != nil {
fmt.Println("Error compressing data:", err)
}
}()
var readCount uint64
var writeCount uint64
readfunc := func() {
readbuf := make([]byte, internal.MAX_BUF_SIZE)
for {
bytes, err := f.Read(readbuf)
readCount += uint64(bytes)
if progress != nil && readCount%4096 == 0 {
progress(readCount, writeCount)
}
if err != nil { // The EOF has been hit, send the final batch
readchan <- readbuf[:bytes]
close(readchan)
break
}
data := make([]byte, bytes)
copy(data, readbuf)
readchan <- data
}
}
// If the outfpath exists, delete it
if _, err := os.Stat(outpath); err == nil {
err := os.Remove(outpath)
if err != nil {
return err
}
}
outfile, err := os.Create(outpath)
if err != nil {
return err
}
go readfunc()
donewrite := make(chan bool, 1)
go func() {
for data := range writechan {
outfile.Write(data)
if len(data) > 0 {
if progress != nil {
writeCount += uint64(len(data))
progress(readCount, writeCount)
}
}
}
donewrite <- true
}()
<-donewrite
outfile.Close()
return nil
}
// CompressFileToMemory compresses a file using the xz format and returns the compressed data
// as a byte slice. It can be handy for preparing uncompressed data for transmission over a network.
func CompressFileToMemory(path string, strategy ...CompressionStrategy) ([]byte, error) {
use_strategy := CompressionMulti
if len(strategy) > 0 {
use_strategy = strategy[0]
}
f, err := os.Open(path)
if err != nil {
return []byte{}, err
}
readchan := make(chan []byte, 1)
writechan := make(chan []byte, 1)
go func() {
err := internal.CompressIn(readchan, writechan, int(use_strategy))
if err != nil {
fmt.Println("Error compressing data:", err)
}
}()
readfunc := func() {
readbuf := make([]byte, internal.MAX_BUF_SIZE)
for {
bytes, err := f.Read(readbuf)
if err != nil { // The EOF has been hit, send the final batch
readchan <- readbuf[:bytes]
close(readchan)
break
}
data := make([]byte, bytes)
copy(data, readbuf)
readchan <- data
}
}
membuffer := bytes.Buffer{}
go readfunc()
donewrite := make(chan bool, 1)
go func() {
for data := range writechan {
membuffer.Write(data)
}
donewrite <- true
}()
<-donewrite
return membuffer.Bytes(), nil
}
// CompressStream skips a call to io.Copy() by just compressing whatever stream you put in the
// input reader and writing it to the output writer. If you hold the input stream open and keep writing to it,
// this call will block until you close the input stream. This is useful for compressing data on the fly, such
// as the case with a logger stream that keeps the most recent events in RAM then shunts new entries off to
// a goroutine that's keeping a compressed version of it on disk.
// Note: Neiher CompressStream nor DecompressStream
// actually use XZReader or XZWriter. They are just there for the sake of the ABI.
func CompressStream(input io.Reader, output io.Writer, strategy ...CompressionStrategy) error {
use_strategy := CompressionMulti
if len(strategy) > 0 {
use_strategy = strategy[0]
}
readchan := make(chan []byte, 1)
writechan := make(chan []byte, 1)
go func() {
err := internal.CompressIn(readchan, writechan, int(use_strategy))
if err != nil {
fmt.Println("Error compressing data:", err)
}
}()
readfunc := func() {
readbuf := make([]byte, internal.MAX_BUF_SIZE)
for {
bytes, err := input.Read(readbuf)
if err != nil { // The EOF has been hit, send the final batch
readchan <- readbuf[:bytes]
close(readchan)
break
}
data := make([]byte, bytes)
copy(data, readbuf)
readchan <- data
}
}
go readfunc()
donewrite := make(chan bool, 1)
go func() {
for data := range writechan {
output.Write(data)
}
donewrite <- true
}()
<-donewrite
return nil
}