Skip to content

Commit

Permalink
perf(extractor): 改为异步处理
Browse files Browse the repository at this point in the history
  • Loading branch information
kwaziidev committed Feb 27, 2020
1 parent 10e8466 commit 210acf9
Show file tree
Hide file tree
Showing 2 changed files with 704 additions and 16 deletions.
42 changes: 26 additions & 16 deletions extractor.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package textractor

import (
"strings"
"sync"

"github.com/PuerkitoBio/goquery"
)
Expand Down Expand Up @@ -39,23 +40,32 @@ func Extract(source string) (*Text, error) {
}
body := dom.Find("body")
normalize(body)
content := contentExtract(body)
publishTime := timeExtract(body)
author := authorExtract(body)
title := titleExtract(dom.Selection, content.node)
result := &Text{}
result.Content = content.density.tiText
result.ContentHTML, _ = content.node.Html()
var imgs []string
content.node.Find("img").Each(func(i int, s *goquery.Selection) {
if src, ok := s.Attr("src"); ok {
imgs = append(imgs, src)
}
})
result.Image = imgs
result.PublishTime = publishTime
result.Author = author
result.Title = title
wg := &sync.WaitGroup{}
wg.Add(3)
go func() {
result.PublishTime = timeExtract(body)
wg.Done()
}()
go func() {
result.Author = authorExtract(body)
wg.Done()
}()
go func() {
content := contentExtract(body)
result.Title = titleExtract(dom.Selection, content.node)
result.Content = content.density.tiText
result.ContentHTML, _ = content.node.Html()
var imgs []string
content.node.Find("img").Each(func(i int, s *goquery.Selection) {
if src, ok := s.Attr("src"); ok {
imgs = append(imgs, src)
}
})
result.Image = imgs
wg.Done()
}()
wg.Wait()
return result, nil
}

Expand Down
Loading

0 comments on commit 210acf9

Please sign in to comment.