-
Notifications
You must be signed in to change notification settings - Fork 0
/
sitemeta.go
39 lines (33 loc) · 967 Bytes
/
sitemeta.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
package main
import "regexp"
type SiteMeta struct {
Title string
Author string
Description string
BaseLink string
}
// Extract site metadata from an HTML page
func NewSiteMeta(html string) SiteMeta {
meta := SiteMeta{
Title: htmlTitle(html),
Description: htmlMetaAttr(html, "description"),
Author: htmlMetaAttr(html, "author"),
BaseLink: htmlBaseLink(html),
}
return meta
}
// Extracts the value of a meta attribute in an HTML page
func htmlMetaAttr(html string, metaName string) string {
re := regexp.MustCompile(`<meta name="` + metaName + `" content="(.*)"`)
return regExpMatch(html, re)
}
// Extracts the title of an HTML page
func htmlTitle(html string) string {
re := regexp.MustCompile(`<title>(.*)</title>`)
return regExpMatch(html, re)
}
// Extracts the base link value of an HTML page
func htmlBaseLink(html string) string {
re := regexp.MustCompile(`<base href="(.*)"`)
return regExpMatch(html, re)
}