Skip to content

Commit

Permalink
episode ranges
Browse files Browse the repository at this point in the history
  • Loading branch information
robinovitch61 committed Dec 25, 2023
1 parent 4412eed commit 6ac152f
Show file tree
Hide file tree
Showing 2 changed files with 119 additions and 73 deletions.
9 changes: 8 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,14 @@ Download [webtoon](https://www.webtoons.com/en/) comics as PDFs using a terminal
## Usage

```shell
webtoon-dl <your-webtoon-url>
# download single episodes
webtoon-dl <your-webtoon-episode-url>

# download entire series
webtoon-dl <your-webtoon-series-url>

# create single pdfs from a range of episodes (inclusive)
webtoon-dl --min-ep=10 --max-ep=20 <your-webtoon-series-url>
```

## Installation
Expand Down
183 changes: 111 additions & 72 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@ package main

import (
"bytes"
"flag"
"fmt"
"github.com/anaskhan96/soup"
"github.com/signintech/gopdf"
"image"
"io"
"math"
"net/http"
"os"
"regexp"
Expand All @@ -18,7 +20,7 @@ import (

func getImgLinksForEpisode(url string) []string {
resp, err := soup.Get(url)
time.Sleep(500 * time.Millisecond)
time.Sleep(200 * time.Millisecond)
if err != nil {
fmt.Println(fmt.Sprintf("Error fetching page: %v", err))
os.Exit(1)
Expand All @@ -37,7 +39,7 @@ func getImgLinksForEpisode(url string) []string {

func getEpisodeLinksForPage(url string) ([]string, error) {
resp, err := soup.Get(url)
time.Sleep(500 * time.Millisecond)
time.Sleep(200 * time.Millisecond)
if err != nil {
return []string{}, fmt.Errorf("error fetching page: %v", err)
}
Expand All @@ -52,61 +54,83 @@ func getEpisodeLinksForPage(url string) ([]string, error) {
return links, nil
}

func getImgLinks(url string) []string {
func getImgLinks(url string, minEp, maxEp int) ([]string, int, int) {
if strings.Contains(url, "/viewer") {
// assume viewing single episode
return getImgLinksForEpisode(url)
return getImgLinksForEpisode(url), episodeNo(url), episodeNo(url)
} else {
// assume viewing list of episodes
re := regexp.MustCompile("&page=[0-9]+")
allEpisodeLinks := make(map[string]struct{})
foundLastPage := false
for page := 1; !foundLastPage; page++ {
url = re.ReplaceAllString(url, "") + fmt.Sprintf("&page=%d", page)
episodeLinks, err := getEpisodeLinksForPage(url)
if err != nil {
break
}
for _, episodeLink := range episodeLinks {
// when you go past the last page, it just rerenders the last page
if _, ok := allEpisodeLinks[episodeLink]; ok {
foundLastPage = true
break
}
allEpisodeLinks[episodeLink] = struct{}{}
}
if !foundLastPage {
println(url)
println("scanning all pages to get all episode links")
allEpisodeLinks := getAllEpisodeLinks(url)
println(fmt.Sprintf("found %d total episodes", len(allEpisodeLinks)))

var desiredEpisodeLinks []string
for _, episodeLink := range allEpisodeLinks {
epNo := episodeNo(episodeLink)
if epNo >= minEp && epNo <= maxEp {
desiredEpisodeLinks = append(desiredEpisodeLinks, episodeLink)
}
}
keys := make([]string, 0, len(allEpisodeLinks))
for k := range allEpisodeLinks {
keys = append(keys, k)

return getImgLinksForEpisodes(desiredEpisodeLinks), episodeNo(desiredEpisodeLinks[0]), episodeNo(desiredEpisodeLinks[len(desiredEpisodeLinks)-1])
}
}

func getAllEpisodeLinks(url string) []string {
re := regexp.MustCompile("&page=[0-9]+")
episodeLinkSet := make(map[string]struct{})
foundLastPage := false
for page := 1; !foundLastPage; page++ {
url = re.ReplaceAllString(url, "") + fmt.Sprintf("&page=%d", page)
episodeLinks, err := getEpisodeLinksForPage(url)
if err != nil {
break
}
// extract episode_no from url and sort by it
re = regexp.MustCompile("episode_no=([0-9]+)")
episodeNo := func(episodeLink string) int {
matches := re.FindStringSubmatch(episodeLink)
if len(matches) != 2 {
return 0
}
episodeNo, err := strconv.Atoi(matches[1])
if err != nil {
return 0
for _, episodeLink := range episodeLinks {
// when you go past the last page, it just rerenders the last page
if _, ok := episodeLinkSet[episodeLink]; ok {
foundLastPage = true
break
}
return episodeNo
episodeLinkSet[episodeLink] = struct{}{}
}
sort.Slice(keys, func(i, j int) bool {
return episodeNo(keys[i]) < episodeNo(keys[j])
})

var allImgLinks []string
for _, episodeLink := range keys {
println(episodeLink)
allImgLinks = append(allImgLinks, getImgLinksForEpisode(episodeLink)...)
if !foundLastPage {
println(url)
}
return allImgLinks
}

allEpisodeLinks := make([]string, 0, len(episodeLinkSet))
for episodeLink := range episodeLinkSet {
allEpisodeLinks = append(allEpisodeLinks, episodeLink)
}

// extract episode_no from url and sort by it
sort.Slice(allEpisodeLinks, func(i, j int) bool {
return episodeNo(allEpisodeLinks[i]) < episodeNo(allEpisodeLinks[j])
})
return allEpisodeLinks
}

func episodeNo(episodeLink string) int {
re := regexp.MustCompile("episode_no=([0-9]+)")
matches := re.FindStringSubmatch(episodeLink)
if len(matches) != 2 {
return 0
}
episodeNo, err := strconv.Atoi(matches[1])
if err != nil {
return 0
}
return episodeNo
}

func getImgLinksForEpisodes(episodeLinks []string) []string {
var allImgLinks []string
for _, episodeLink := range episodeLinks {
println(fmt.Sprintf("fetching images for episode %d (last episode %d)", episodeNo(episodeLink), episodeNo(episodeLinks[len(episodeLinks)-1])))
allImgLinks = append(allImgLinks, getImgLinksForEpisode(episodeLink)...)
}
return allImgLinks
}

func fetchImage(imgLink string) []byte {
Expand Down Expand Up @@ -139,45 +163,55 @@ func fetchImage(imgLink string) []byte {
return buff.Bytes()
}

func addImgToPdf(pdf *gopdf.GoPdf, imgLink string) error {
img := fetchImage(imgLink)
holder, err := gopdf.ImageHolderByBytes(img)
if err != nil {
return err
}

d, _, err := image.DecodeConfig(bytes.NewReader(img))
if err != nil {
return err
}

// gopdf assumes dpi 128 https://github.com/signintech/gopdf/issues/168
// W and H are in points, 1 point = 1/72 inch
// convert pixels (Width and Height) to points
// subtract 1 point to account for margins
pdf.AddPageWithOption(gopdf.PageOption{PageSize: &gopdf.Rect{
W: float64(d.Width)*72/128 - 1,
H: float64(d.Height)*72/128 - 1,
}})
return pdf.ImageByHolder(holder, 0, 0, nil)
}

func main() {
if len(os.Args) < 2 {
fmt.Println("Usage: webtoon-dl <url>")
os.Exit(1)
}
url := os.Args[1]
imgLinks := getImgLinks(url)
minEp := flag.Int("min-ep", 0, "Minimum episode number to download (inclusive)")
maxEp := flag.Int("max-ep", math.MaxInt, "Maximum episode number to download (inclusive)")
flag.Parse()
if *minEp > *maxEp {
fmt.Println("min-ep must be less than or equal to max-ep")
os.Exit(1)
}

url := os.Args[len(os.Args)-1]
imgLinks, actualMinEp, actualMaxEp := getImgLinks(url, *minEp, *maxEp)
fmt.Println(fmt.Sprintf("found %d pages", len(imgLinks)))

pdf := gopdf.GoPdf{}
pdf.Start(gopdf.Config{Unit: gopdf.UnitPT, PageSize: *gopdf.PageSizeA4})
for _, imgLink := range imgLinks {
fmt.Println(imgLink)
img := fetchImage(imgLink)
holder, err := gopdf.ImageHolderByBytes(img)
if err != nil {
fmt.Println(err.Error())
os.Exit(1)
}

d, _, err := image.DecodeConfig(bytes.NewReader(img))
if err != nil {
fmt.Println(err.Error())
os.Exit(1)
}

// gopdf assumes dpi 128 https://github.com/signintech/gopdf/issues/168
// W and H are in points, 1 point = 1/72 inch
// convert pixels (Width and Height) to points
// subtract 1 point to account for margins
pdf.AddPageWithOption(gopdf.PageOption{PageSize: &gopdf.Rect{
W: float64(d.Width)*72/128 - 1,
H: float64(d.Height)*72/128 - 1,
}})
err = pdf.ImageByHolder(holder, 0, 0, nil)
for idx, imgLink := range imgLinks {
err := addImgToPdf(&pdf, imgLink)
if err != nil {
fmt.Println(err.Error())
os.Exit(1)
}
fmt.Println(fmt.Sprintf("added page %d/%d", idx+1, len(imgLinks)))
}

outURL := strings.ReplaceAll(url, "http://", "")
Expand All @@ -187,6 +221,11 @@ func main() {
outURL = strings.Split(outURL, "?")[0]
outURL = strings.ReplaceAll(outURL, "/viewer", "")
outURL = strings.ReplaceAll(outURL, "/", "-")
if actualMinEp != actualMaxEp {
outURL = fmt.Sprintf("%s-ep%d-%d", outURL, actualMinEp, actualMaxEp)
} else {
outURL = fmt.Sprintf("%s-ep%d", outURL, actualMinEp)
}
outPath := outURL + ".pdf"
err := pdf.WritePdf(outPath)
if err != nil {
Expand Down

0 comments on commit 6ac152f

Please sign in to comment.