-
Notifications
You must be signed in to change notification settings - Fork 4
/
search.go
131 lines (113 loc) · 4.06 KB
/
search.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
// Copyright 2019 PaperCut Software International Pty Ltd. All rights reserved.
package main
import (
"flag"
"fmt"
"os"
"path/filepath"
"strings"
"time"
"github.com/papercutsoftware/pdfsearch"
"github.com/papercutsoftware/pdfsearch/examples/cmd_utils"
)
const usage = `Usage: go run search.go [OPTIONS] PaperCut NG
Performs a full text search for "PaperCut NG" over PDF pages in the current index.
`
func main() {
persistDir := filepath.Join(pdfsearch.DefaultPersistRoot, "my.computer")
var serialize bool
var nameOnly bool
maxSearchResults := 10
outPath := "search.results.pdf"
outDir := "search.history"
flag.StringVar(&outPath, "o", outPath, "Name of PDF that will show marked up results.")
flag.StringVar(&persistDir, "s", persistDir, "The on-disk index is stored here.")
flag.BoolVar(&serialize, "m", serialize, "Serialize in-memory index to byte array.")
flag.BoolVar(&nameOnly, "l", nameOnly, "Show matching file names only.")
flag.IntVar(&maxSearchResults, "n", maxSearchResults, "Max number of search results to return.")
cmd_utils.MakeUsage(usage)
flag.Parse()
pdfsearch.InitLogging()
if len(flag.Args()) < 1 {
flag.Usage()
os.Exit(1)
}
// We always want to see all errors in our testing.
pdfsearch.ExposeErrors()
// The term to search for.
term := strings.Join(flag.Args(), " ")
// File extension based on term.
termExt := strings.Join(flag.Args(), ".")
maxResults := maxSearchResults
if nameOnly {
maxResults = 1e9
}
// Run the tests.
if err := runSearchShow(term, persistDir, nameOnly, maxResults, outPath); err != nil {
fmt.Fprintf(os.Stderr, "runSearchShow failed. err=%v\n", err)
os.Exit(1)
}
// Save a copy of the marked up file for posterity.
if err := pdfsearch.CopyMarkedupResults(outDir, outPath, "", termExt); err != nil {
fmt.Fprintf(os.Stderr, "copyMarkedupResults failed. err=%v\n", err)
os.Exit(1)
}
}
// runSearchShow searches for `term` in current index and shows the results.
// It also creates a marked-up PDF containing the original PDF pages with the matched terms marked
// and saves it to `outPath`.
//
// `nameOnly`: Show matching file names only.
// `maxResults`: Max number of search results to return.
func runSearchShow(term, persistDir string, nameOnly bool, maxResults int, outPath string) error {
results, dt, err := runSearch(term, persistDir, maxResults)
if err != nil {
return err
}
return showResults(results, dt, nameOnly, maxResults, outPath)
}
// runSearch searches for `term` in the PDF index stored in directory `persistDir` and returns the
//the search results and the search duration.
// This is the main function. It shows you how to search a persistent index.
//
// `maxResults`: Max number of search results to return.
func runSearch(term, persistDir string, maxResults int) (
results pdfsearch.PdfMatchSet, dt time.Duration, err error) {
t0 := time.Now()
pdfIndex := pdfsearch.ReuseIndex(persistDir)
results, err = pdfIndex.Search(term, maxResults)
dt = time.Since(t0)
return results, dt, err
}
// showResults writes a report on `results`, some search results (for a term that we don't show
// here) on `pdfIndex` that was build from the PDFs in `pathList`.
// It also creates a marked-up PDF containing the original PDF pages with the matched terms marked
// and saves it to `outPath`.
//
// `dt` is the duration of the search.
// `nameOnly`: Show matching file names only.
// `maxResults`: Max number of search results to return.
func showResults(results pdfsearch.PdfMatchSet, dt time.Duration, nameOnly bool, maxResults int, outPath string) error {
if nameOnly {
files := results.Files()
if len(files) > maxResults {
files = files[:maxResults]
}
for i, fn := range files {
fmt.Printf("%4d: %q\n", i, fn)
}
} else {
fmt.Printf("%+v\n", results)
}
if err := pdfsearch.MarkupPdfResults(results, outPath); err != nil {
return err
}
fmt.Fprintf(os.Stderr, "Duration=%.1f sec\n"+
"Marked up search results in %q\n",
dt.Seconds(), outPath)
return nil
}
// `report` is called by IndexPdfMem to report progress.
func report(msg string) {
fmt.Fprintf(os.Stderr, ">> %s\n", msg)
}