Skip to content
This repository has been archived by the owner on Nov 19, 2024. It is now read-only.

Commit

Permalink
Merge pull request #25 from mBaratta96/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
mBaratta96 authored Aug 15, 2023
2 parents f4624f5 + 43a8d89 commit 81c13e8
Show file tree
Hide file tree
Showing 4 changed files with 149 additions and 71 deletions.
10 changes: 9 additions & 1 deletion app.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ func checkIndex(index int) int {
return index
}

// RYM is the website that requires more configuration (cookies, credits scraping, etc...)
// However, we still make similar operations for both of the two websites: search an artist,
// select an artist, select an album, get album data. The similarity of these operation is what led to
// implement the scraper.Scraper interface.
func app(s scraper.Scraper) {
data := scraper.ScrapeData(s.SearchBand)
index := -1
Expand All @@ -35,12 +39,14 @@ func app(s scraper.Scraper) {
}
index = checkIndex(index)
s.SetLink(data.Links[index])
// Scrape the albums of an artist
data = scraper.ScrapeData(s.AlbumList)
for true {
cli.CallClear()
cli.PrintMap(s.StyleColor(), data.Metadata)
index = checkIndex(cli.PrintTable(data.Rows, data.Columns.Title, data.Columns.Width))
s.SetLink(data.Links[index])
// Scrape albm data
albumData := scraper.ScrapeData(s.Album)
cli.CallClear()
if albumData.Image != nil {
Expand Down Expand Up @@ -108,7 +114,7 @@ func app(s scraper.Scraper) {
s.SetLink(similData.Links[similIndex])
data = scraper.ScrapeData(s.AlbumList)
goingBack = true
} else { // get back to current artist and do nothing
} else { // similIndex is the "Go back" option. Get back to current artist and do nothing
s.SetLink(data.Links[index])
}
}
Expand All @@ -122,6 +128,7 @@ func app(s scraper.Scraper) {
func main() {
website := flag.String("website", "", "Desired Website ('metallum' or 'rym')")
rymCredits := flag.Bool("credits", false, "Display RYM credits")
expand := flag.Bool("expand", false, "Expand RYM albums")
flag.Parse()
if len(flag.Args()) == 0 {
os.Exit(1)
Expand All @@ -146,6 +153,7 @@ func main() {
r := &scraper.RateYourMusic{}
r.Link = search
r.GetCredits = *rymCredits
r.Expand = *expand
config, _ := scraper.ReadUserConfiguration(configFilePath)
r.Delay = config.Delay
if config.Authenticate {
Expand Down
26 changes: 17 additions & 9 deletions scraper/metallum.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ type Metallum struct {
Link string
}

// Metadata contains info like country of origin for band page and label for albums.
// For reference inspect:
// https://www.metal-archives.com/bands/Emperor/30
// https://www.metal-archives.com/albums/Emperor/Anthems_to_the_Welkin_at_Dusk/92
func getMetadata(h *colly.HTMLElement, metadata *orderedmap.OrderedMap[string, string]) {
keys, values := []string{}, []string{}
h.ForEach("dt", func(_ int, h *colly.HTMLElement) {
Expand All @@ -52,6 +56,8 @@ func getMetadata(h *colly.HTMLElement, metadata *orderedmap.OrderedMap[string, s
}
}

// Metallum search page renders the result of a query from a JSON payload.
// https://www.metal-archives.com/search?searchString=emperor&type=band_name
func (m *Metallum) SearchBand(data *ScrapedData) ([]int, []string) {
c := colly.NewCollector()
data.Links = make([]string, 0)
Expand All @@ -61,8 +67,9 @@ func (m *Metallum) SearchBand(data *ScrapedData) ([]int, []string) {
if err := json.Unmarshal(r.Body, &response); err != nil {
fmt.Println("Can not unmarshal JSON")
}

for _, el := range response.AaData {
// Search results are contained in the first element of the JSON array as a HTML string.
// We parse it and get the data.
doc, err := goquery.NewDocumentFromReader(strings.NewReader(el[0]))
if err != nil {
fmt.Println("Error on response")
Expand All @@ -80,15 +87,17 @@ func (m *Metallum) SearchBand(data *ScrapedData) ([]int, []string) {
return mBandColWidths[:], mAlbumColTitles[:]
}

// https://www.metal-archives.com/bands/Emperor/30
func (m *Metallum) AlbumList(data *ScrapedData) ([]int, []string) {
c := colly.NewCollector()
data.Links = make([]string, 0)
data.Metadata = orderedmap.New[string, string]()

// Get link to table with all albums
c.OnHTML("#band_disco a[href*='all']", func(e *colly.HTMLElement) {
e.Request.Visit(e.Attr("href"))
})

// Scrape the table
c.OnHTML("table.display.discog tbody tr", func(h *colly.HTMLElement) {
var row [4]string
h.ForEach(".album,.demo,.other,td a[href]", func(i int, h *colly.HTMLElement) {
Expand All @@ -107,6 +116,7 @@ func (m *Metallum) AlbumList(data *ScrapedData) ([]int, []string) {
return mAlbumlistColWidths[:], mAlbumlistColTitles[:]
}

// https://www.metal-archives.com/albums/Emperor/Anthems_to_the_Welkin_at_Dusk/92
func (m *Metallum) Album(data *ScrapedData) ([]int, []string) {
c := colly.NewCollector()
data.Links = make([]string, 0)
Expand All @@ -119,16 +129,14 @@ func (m *Metallum) Album(data *ScrapedData) ([]int, []string) {
})
data.Rows = append(data.Rows, row[:])
})

// Get band id (useful if you want to check similar bands later)
c.OnHTML("h2.band_name > a", func(h *colly.HTMLElement) {
data.Metadata.Set("ID", path.Base(h.Attr("href")))
})

c.OnHTML("a#cover.image", func(h *colly.HTMLElement) {
image_src := h.ChildAttr("img", "src")
h.Request.Visit(image_src)
})

c.OnResponse(func(r *colly.Response) {
if r.Headers.Get("content-type") == "image/jpeg" {
var err error
Expand All @@ -138,7 +146,6 @@ func (m *Metallum) Album(data *ScrapedData) ([]int, []string) {
}
}
})

c.OnHTML("dl.float_right,dl.float_left", func(h *colly.HTMLElement) {
getMetadata(h, data.Metadata)
})
Expand All @@ -155,6 +162,7 @@ func (m *Metallum) SetLink(link string) {
m.Link = link
}

// https://www.metal-archives.com/albums/Emperor/Anthems_to_the_Welkin_at_Dusk/92
func (m *Metallum) ReviewsList(data *ScrapedData) ([]int, []string) {
c := colly.NewCollector()
data.Links = make([]string, 0)
Expand All @@ -172,7 +180,6 @@ func (m *Metallum) ReviewsList(data *ScrapedData) ([]int, []string) {
})
data.Rows = append(data.Rows, row[:])
})

c.OnHTML("div.reviewBox", func(h *colly.HTMLElement) {
review := h.ChildText("h3.reviewTitle") + "\n"
review += h.ChildText("div:not([attr_all])") + "\n"
Expand All @@ -184,6 +191,7 @@ func (m *Metallum) ReviewsList(data *ScrapedData) ([]int, []string) {
return mReviewColWidths[:], mReviewColTitles[:]
}

// https://www.metal-archives.com/albums/Emperor/Anthems_to_the_Welkin_at_Dusk/92
func (m *Metallum) Credits() *orderedmap.OrderedMap[string, string] {
c := colly.NewCollector()
credits := orderedmap.New[string, string]()
Expand Down Expand Up @@ -212,8 +220,8 @@ func (m *Metallum) similarArtists(data *ScrapedData) ([]int, []string) {
})
data.Rows = append(data.Rows, row[:])
})

c.OnScraped(func(_ *colly.Response) { // This makes len(data.Rown) = len(data.Links) + 1 (see app.go)
// This makes len(data.Rows) = len(data.Links) + 1 (see app.go)
c.OnScraped(func(_ *colly.Response) {
data.Rows = append(data.Rows, []string{"Go back to choices", "", "", ""})
})

Expand Down
3 changes: 0 additions & 3 deletions scraper/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,11 @@ func credentials() (string, string, error) {
if err != nil {
return "", "", err
}

fmt.Print("Enter Password: ")
bytePassword, err := term.ReadPassword(int(syscall.Stdin))
if err != nil {
return "", "", err
}

password := string(bytePassword)
return strings.TrimSpace(username), strings.TrimSpace(password), nil
}
Expand Down Expand Up @@ -84,7 +82,6 @@ func SaveCookie(cookies map[string]string, path string) {
panic(err)
}
defer f.Close()

as_json, err := json.MarshalIndent(cookies, "", "\t")
if err != nil {
panic(err)
Expand Down
Loading

0 comments on commit 81c13e8

Please sign in to comment.