Releasing the first working version

ajaxray · Jan 25, 2018 · e150213 · e150213
1 parent 053df92
commit e150213
Show file tree

Hide file tree

Showing 4 changed files with 219 additions and 1 deletion.
diff --git a/.gitignore b/.gitignore
@@ -12,3 +12,6 @@
 
 # Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736
 .glide/
+
+# Project specific
+merge2pdf
diff --git a/README.md b/README.md
@@ -41,7 +41,9 @@ If you fix a bug or want to add/improve a feature,
 and it's alligned to the focus (merging with ease) of this tool, 
 I will be glad to accept your PR.  
 
-Thanks
+### Thanks
+
+This tool was made using the beautiful [Unidoc](https://unidoc.io/) library. Thanks to **Unidoc**.
 
 ---
 > "This is the Book about which there is no doubt, a guidance for those conscious of Allah" - [Al-Quran](http://quran.com)
diff --git a/main.go b/main.go
@@ -0,0 +1,168 @@
+package main
+
+import (
+	"errors"
+	"fmt"
+	"io"
+	"os"
+	"strconv"
+	"strings"
+
+	unicommon "github.com/unidoc/unidoc/common"
+	pdf "github.com/unidoc/unidoc/pdf/model"
+)
+
+func init() {
+	// Debug log level.
+	unicommon.SetLogger(unicommon.NewConsoleLogger(unicommon.LogLevelDebug))
+}
+
+func main() {
+	if len(os.Args) < 3 {
+		fmt.Printf("Requires at least 3 arguments: output_path and 2 input paths(and optional page numbers) \n")
+		fmt.Printf("Usage: merge2pdf output.pdf input1.pdf input2.pdf~1,2,3 ...\n")
+		os.Exit(0)
+	}
+
+	outputPath := os.Args[1]
+	inputPaths := []string{}
+	inputPages := [][]int{}
+
+	// Sanity check the input arguments.
+	for _, arg := range os.Args[2:] {
+		//inputPaths = append(inputPaths, arg)
+
+		fileInputParts := strings.Split(arg, "~")
+		inputPaths = append(inputPaths, fileInputParts[0])
+		pages := []int{}
+
+		if len(fileInputParts) > 1 {
+			for _, e := range strings.Split(fileInputParts[1], ",") {
+				pageNo, err := strconv.Atoi(strings.Trim(e, " \n"))
+				if err != nil {
+					fmt.Errorf("Invalid format! Example of a file input with page numbers: path/to/abc.pdf~1,2,3,5,6")
+					os.Exit(1)
+				}
+				pages = append(pages, pageNo)
+			}
+		}
+
+		inputPages = append(inputPages, pages)
+	}
+
+	// fmt.Println(inputPages)
+	// os.Exit(1)
+
+	err := mergePdf(inputPaths, inputPages, outputPath)
+	if err != nil {
+		fmt.Printf("Error: %v\n", err)
+		os.Exit(1)
+	}
+
+	fmt.Printf("Complete, see output file: %s\n", outputPath)
+}
+
+func mergePdf(inputPaths []string, inputPages [][]int, outputPath string) error {
+	pdfWriter := pdf.NewPdfWriter()
+
+	for i, inputPath := range inputPaths {
+
+		f, err := os.Open(inputPath)
+		if err != nil {
+			return err
+		}
+		defer f.Close()
+
+		fileType, typeError := getFileType(f)
+		if typeError != nil {
+			return nil
+		}
+
+		if fileType == "directory" {
+			// @TODO : Read all files in directory
+			return errors.New(inputPath + " is a drectory.")
+		} else if fileType == "application/pdf" {
+			err := addPdfPages(f, inputPages[i], &pdfWriter)
+			if err != nil {
+				return err
+			}
+		} else if ok, _ := in_array(fileType, []string{"image/jpg", "image/jpeg", "image/png"}); ok {
+			return errors.New(inputPath + " Images is not supproted yet.")
+		}
+
+	}
+
+	fWrite, err := os.Create(outputPath)
+	if err != nil {
+		return err
+	}
+	defer fWrite.Close()
+
+	err = pdfWriter.Write(fWrite)
+	if err != nil {
+		return err
+	}
+
+	return nil
+}
+
+func getReader(rs io.ReadSeeker) (*pdf.PdfReader, error) {
+
+	pdfReader, err := pdf.NewPdfReader(rs)
+	if err != nil {
+		return nil, err
+	}
+
+	isEncrypted, err := pdfReader.IsEncrypted()
+	if err != nil {
+		return nil, err
+	}
+
+	if isEncrypted {
+		auth, err := pdfReader.Decrypt([]byte(""))
+		if err != nil {
+			return nil, err
+		}
+		if !auth {
+			return nil, errors.New("Cannot merge encrypted, password protected document")
+		}
+	}
+
+	return pdfReader, nil
+}
+
+func addPdfPages(file io.ReadSeeker, pages []int, writer *pdf.PdfWriter) error {
+	pdfReader, err := getReader(file)
+	if err != nil {
+		return err
+	}
+
+	if len(pages) > 0 {
+		for _, pageNo := range pages {
+			if page, pageErr := pdfReader.GetPage(pageNo); pageErr != nil {
+				return pageErr
+			} else {
+				err = writer.AddPage(page)
+			}
+		}
+	} else {
+		numPages, err := pdfReader.GetNumPages()
+		if err != nil {
+			return err
+		}
+		for i := 0; i < numPages; i++ {
+			pageNum := i + 1
+
+			page, err := pdfReader.GetPage(pageNum)
+			if err != nil {
+				return err
+			}
+
+			if err = writer.AddPage(page); err != nil {
+				return err
+			}
+		}
+	}
+
+	return nil
+}
diff --git a/util.go b/util.go
@@ -0,0 +1,45 @@
+package main
+
+import (
+	"net/http"
+	"os"
+	"reflect"
+)
+
+func in_array(val interface{}, array interface{}) (exists bool, index int) {
+	exists = false
+	index = -1
+
+	switch reflect.TypeOf(array).Kind() {
+	case reflect.Slice:
+		s := reflect.ValueOf(array)
+
+		for i := 0; i < s.Len(); i++ {
+			if reflect.DeepEqual(val, s.Index(i).Interface()) == true {
+				index = i
+				exists = true
+				return
+			}
+		}
+	}
+
+	return
+}
+
+func getFileType(file *os.File) (string, error) {
+	// Only the first 512 bytes are used to sniff the content type.
+	if info, stateErr := file.Stat(); stateErr != nil {
+		return "error", stateErr
+	} else if info.IsDir() {
+		return "directory", nil
+	} else {
+		buffer := make([]byte, 512)
+		_, readError := file.Read(buffer)
+		if readError != nil {
+			return "error", readError
+		}
+
+		// Always returns a valid content-type and "application/octet-stream" if no others seemed to match.
+		return http.DetectContentType(buffer), nil
+	}
+}