Skip to content

Commit

Permalink
Add abstractions for validator
Browse files Browse the repository at this point in the history
Signed-off-by: Saswata Mukherjee <saswataminsta@yahoo.com>
  • Loading branch information
saswatamcode committed Jun 3, 2021
1 parent 7536052 commit 98e791a
Show file tree
Hide file tree
Showing 6 changed files with 230 additions and 167 deletions.
2 changes: 1 addition & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ This directive runs executable with arguments and put its stderr and stdout outp
"Absolute path links will be converted to relative links to anchor dir as well.").Regexp()
// TODO(bwplotka): Add cache in file?
linksValidateEnabled := cmd.Flag("links.validate", "If true, all links will be validated").Short('l').Bool()
linksValidateConfig := extflag.RegisterPathOrContent(cmd, "links.validate.config", "YAML file for skipping link check, with spec defined in github.com/bwplotka/mdox/pkg/linktransformer.Config", false, true)
linksValidateConfig := extflag.RegisterPathOrContent(cmd, "links.validate.config", "YAML file for skipping link check, with spec defined in github.com/bwplotka/mdox/pkg/linktransformer.Config", extflag.WithEnvSubstitution(true))

cmd.Run(func(ctx context.Context, logger log.Logger) (err error) {
var opts []mdformatter.Option
Expand Down
56 changes: 36 additions & 20 deletions pkg/extflag/pathorcontent.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ import (
"io/ioutil"
"os"
"regexp"
"strings"

"github.com/pkg/errors"
"gopkg.in/alecthomas/kingpin.v2"
Expand All @@ -29,12 +28,15 @@ type PathOrContent struct {
content *string
}

// Option is a functional option type for PathOrContent objects.
type Option func(*PathOrContent)

type FlagClause interface {
Flag(name, help string) *kingpin.FlagClause
}

// RegisterPathOrContent registers PathOrContent flag in kingpinCmdClause.
func RegisterPathOrContent(cmd FlagClause, flagName string, help string, required bool, envSubstitution bool) *PathOrContent {
func RegisterPathOrContent(cmd FlagClause, flagName string, help string, opts ...Option) *PathOrContent {
fileFlagName := fmt.Sprintf("%s-file", flagName)
contentFlagName := flagName

Expand All @@ -44,13 +46,17 @@ func RegisterPathOrContent(cmd FlagClause, flagName string, help string, require
contentHelp := fmt.Sprintf("Alternative to '%s' flag (mutually exclusive). Content of %s", fileFlagName, help)
contentFlag := cmd.Flag(contentFlagName, contentHelp).PlaceHolder("<content>").String()

return &PathOrContent{
p := &PathOrContent{
flagName: flagName,
required: required,
path: fileFlag,
content: contentFlag,
envSubstitution: envSubstitution,
required: false,
envSubstitution: false,
}
for _, opt := range opts {
opt(p)
}
return p
}

// Content returns the content of the file when given or directly the content that has been passed to the flag.
Expand Down Expand Up @@ -80,24 +86,34 @@ func (p *PathOrContent) Content() ([]byte, error) {
return nil, errors.Errorf("flag %s or %s is required for running this command and content cannot be empty.", fileFlagName, p.flagName)
}
if p.envSubstitution {
content = substituteEnvVars(string(content))
content = expandEnv(content)
}
return content, nil
}

// substituteEnvVars returns content of YAML file with substituted environment variables.
// Will be substituted with empty string if env var isn't set.
// Follows K8s convention, i.e $(...), as mentioned here https://kubernetes.io/docs/tasks/inject-data-application/define-interdependent-environment-variables/.
func substituteEnvVars(content string) []byte {
var replaceWithEnv []string
// Match env variable syntax.
envVarName := regexp.MustCompile(`\$\((?P<var>[a-zA-Z_]+[a-zA-Z0-9_]*)\)`)
loc := envVarName.FindAllStringSubmatchIndex(content, -1)
for i := range loc {
// Add pair to be replaced.
replaceWithEnv = append(replaceWithEnv, content[loc[i][0]:loc[i][1]], os.Getenv(content[loc[i][2]:loc[i][3]]))
// WithRequired allows you to override default required option.
func WithRequired(r bool) Option {
return func(p *PathOrContent) {
p.required = r
}
replacer := strings.NewReplacer(replaceWithEnv...)
contentWithEnv := replacer.Replace(content)
return []byte(contentWithEnv)
}

// WithRequired allows you to override default envSubstitution option.
func WithEnvSubstitution(e bool) Option {
return func(p *PathOrContent) {
p.envSubstitution = e
}
}

// expandEnv returns content of YAML file with substituted environment variables.
// Follows K8s convention, i.e $(...), as mentioned here https://kubernetes.io/docs/tasks/inject-data-application/define-interdependent-environment-variables/.
func expandEnv(b []byte) []byte {
var envRe = regexp.MustCompile(`\$\(([a-zA-Z_0-9]+)\)`)
r := envRe.ReplaceAllFunc(b, func(n []byte) []byte {
n = n[2 : len(n)-1]

v := os.Getenv(string(n))
return []byte(v)
})
return r
}
134 changes: 124 additions & 10 deletions pkg/mdformatter/linktransformer/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,35 +5,65 @@ package linktransformer

import (
"bytes"
"encoding/json"
"fmt"
"math"
"net/http"
"regexp"
"strconv"

"github.com/pkg/errors"
"gopkg.in/yaml.v3"
)

const roundtrip ValidatorType = "roundtrip"

// const github ValidatorType = "github"

type Config struct {
Version int

Validators []Validator `yaml:"validators"`
Validators []ValidatorConfig `yaml:"validators"`
}

type Validator struct {
_regex *regexp.Regexp
_maxNum int
type ValidatorConfig struct {
// Regex for type github is reponame matcher, like `bwplotka\/mdox`.
Regex string `yaml:"regex"`
// By default type is `roundtrip`. Could be `github`.
// By default type is `ignore`. Could be `github` or `roundtrip`.
Type ValidatorType `yaml:"type"`
// GitHub repo token to avoid getting rate limited.
Token string `yaml:"token"`

ghValidator GitHubValidator
rtValidator RoundTripValidator
igValidator IgnoreValidator
}

type RoundTripValidator struct {
_regex *regexp.Regexp
}

type GitHubValidator struct {
_regex *regexp.Regexp
_maxNum int
}

type IgnoreValidator struct {
_regex *regexp.Regexp
}

type ValidatorType string

const (
roundtripValidator ValidatorType = "roundtrip"
githubValidator ValidatorType = "github"
ignoreValidator ValidatorType = "ignore"
)

const (
gitHubAPIURL = "https://api.github.com/repos/%v/%v?sort=created&direction=desc&per_page=1"
)

type GitHubResponse struct {
Number int `json:"number"`
}

func ParseConfig(c []byte) (Config, error) {
cfg := Config{}
dec := yaml.NewDecoder(bytes.NewReader(c))
Expand All @@ -46,8 +76,92 @@ func ParseConfig(c []byte) (Config, error) {
return Config{}, errors.New("No validator provided")
}

// Evaluate regex for given validators.
for i := range cfg.Validators {
cfg.Validators[i]._regex = regexp.MustCompile(cfg.Validators[i].Regex)
switch cfg.Validators[i].Type {
case roundtripValidator:
cfg.Validators[i].rtValidator._regex = regexp.MustCompile(cfg.Validators[i].Regex)
case githubValidator:
regex, maxNum, err := getGitHubRegex(cfg.Validators[i].Regex, cfg.Validators[i].Token)
if err != nil {
return Config{}, errors.Wrapf(err, "parsing GitHub Regex %v", err)
}
cfg.Validators[i].ghValidator._regex = regex
cfg.Validators[i].ghValidator._maxNum = maxNum
case ignoreValidator:
cfg.Validators[i].igValidator._regex = regexp.MustCompile(cfg.Validators[i].Regex)
default:
return Config{}, errors.New("Validator type not supported")
}
}
return cfg, nil
}

// getGitHubRegex returns GitHub pulls/issues regex from repo name.
func getGitHubRegex(repoRe string, repoToken string) (*regexp.Regexp, int, error) {
// Get reponame from regex.
getRepo := regexp.MustCompile(`(?P<org>[A-Za-z0-9_.-]+)\\\/(?P<repo>[A-Za-z0-9_.-]+)`)
match := getRepo.FindStringSubmatch(repoRe)
if len(match) != 3 {
return nil, math.MaxInt64, errors.New("repo name regex not valid")
}
reponame := match[1] + "/" + match[2]

var pullNum []GitHubResponse
var issueNum []GitHubResponse
max := 0
// All GitHub API reqs need to have User-Agent: https://docs.github.com/en/rest/overview/resources-in-the-rest-api#user-agent-required.
client := &http.Client{}

// Check latest pull request number.
reqPull, err := http.NewRequest("GET", fmt.Sprintf(gitHubAPIURL, reponame, "pulls"), nil)
if err != nil {
return nil, math.MaxInt64, err
}
reqPull.Header.Set("User-Agent", "mdox")

// Check latest issue number and return whichever is greater.
reqIssue, err := http.NewRequest("GET", fmt.Sprintf(gitHubAPIURL, reponame, "issues"), nil)
if err != nil {
return nil, math.MaxInt64, err
}
reqIssue.Header.Set("User-Agent", "mdox")

if repoToken != "" {
reqPull.Header.Set("Authorization", "Bearer "+repoToken)
reqIssue.Header.Set("Authorization", "Bearer "+repoToken)
}

respPull, err := client.Do(reqPull)
if err != nil {
return nil, math.MaxInt64, err
}
if respPull.StatusCode != 200 {
return nil, math.MaxInt64, errors.New("pulls API request failed. status code: " + strconv.Itoa(respPull.StatusCode))
}
defer respPull.Body.Close()
if err := json.NewDecoder(respPull.Body).Decode(&pullNum); err != nil {
return nil, math.MaxInt64, err
}

respIssue, err := client.Do(reqIssue)
if err != nil {
return nil, math.MaxInt64, err
}
if respIssue.StatusCode != 200 {
return nil, math.MaxInt64, errors.New("issues API request failed. status code: " + strconv.Itoa(respIssue.StatusCode))
}
defer respIssue.Body.Close()
if err := json.NewDecoder(respIssue.Body).Decode(&issueNum); err != nil {
return nil, math.MaxInt64, err
}

if len(pullNum) > 0 {
max = pullNum[0].Number
}
if len(issueNum) > 0 && issueNum[0].Number > max {
max = issueNum[0].Number
}

return regexp.MustCompile(`(^http[s]?:\/\/)(www\.)?(github\.com\/)(` + repoRe + `)(\/pull\/|\/issues\/)`), max, nil
}
18 changes: 8 additions & 10 deletions pkg/mdformatter/linktransformer/link.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,8 @@ type futureResult struct {
// NewValidator returns mdformatter.LinkTransformer that crawls all links.
// TODO(bwplotka): Add optimization and debug modes - this is the main source of latency and pain.
func NewValidator(logger log.Logger, linksValidateConfig []byte, anchorDir string) (mdformatter.LinkTransformer, error) {
var config Config
var err error
config := Config{}
if string(linksValidateConfig) != "" {
config, err = ParseConfig(linksValidateConfig)
if err != nil {
Expand Down Expand Up @@ -177,10 +177,6 @@ func NewValidator(logger log.Logger, linksValidateConfig []byte, anchorDir strin
defer v.rMu.Unlock()
v.remoteLinks[response.Ctx.Get(originalURLKey)] = errors.Wrapf(err, "%q not accessible; status code %v", response.Request.URL.String(), response.StatusCode)
})
err = v.validateConfig.validateGH()
if err != nil {
return nil, err
}
return v, nil
}

Expand Down Expand Up @@ -244,11 +240,6 @@ func (v *validator) visit(filepath string, dest string) {
return
}
v.destFutures[k] = &futureResult{cases: 1, resultFn: func() error { return nil }}
validator := v.validateConfig.GetValidatorForURL(dest)
if validator.Matched {
return
}

matches := remoteLinkPrefixRe.FindAllStringIndex(dest, 1)
if matches == nil {
// Relative or absolute path. Check if exists.
Expand All @@ -260,6 +251,13 @@ func (v *validator) visit(filepath string, dest string) {
}
return
}
validator := v.validateConfig.GetValidatorForURL(dest)
if validator != nil {
matched, err := validator.IsValid(dest)
if matched && err == nil {
return
}
}

// Result will be in future.
v.destFutures[k].resultFn = func() error { return v.remoteLinks[dest] }
Expand Down
14 changes: 8 additions & 6 deletions pkg/mdformatter/linktransformer/link_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -230,21 +230,21 @@ func TestValidator_TransformDestination(t *testing.T) {
testutil.Equals(t, fmt.Sprintf("%v%v: %v%v: \"https://bwplotka.dev/does-not-exists\" not accessible; status code 404: Not Found", tmpDir, filePath, relDirPath, filePath), err.Error())
})

t.Run("check 404 link with validate config", func(t *testing.T) {
t.Run("check valid & 404 link with validate config", func(t *testing.T) {
testFile := filepath.Join(tmpDir, "repo", "docs", "test", "invalid-link2.md")
testutil.Ok(t, ioutil.WriteFile(testFile, []byte("https://bwplotka.dev/does-not-exists\n"), os.ModePerm))
testutil.Ok(t, ioutil.WriteFile(testFile, []byte("https://www.github.com/ https://bwplotka.dev/does-not-exits\n"), os.ModePerm))

diff, err := mdformatter.IsFormatted(context.TODO(), logger, []string{testFile})
testutil.Ok(t, err)
testutil.Equals(t, 0, len(diff), diff.String())

_, err = mdformatter.IsFormatted(context.TODO(), logger, []string{testFile}, mdformatter.WithLinkTransformer(
MustNewValidator(logger, []byte("version: 1\n\nvalidators:\n - regex: '://bwplotka.dev'\n type: 'roundtrip'\n"), anchorDir),
MustNewValidator(logger, []byte("version: 1\n\nvalidators:\n - regex: 'github'\n type: 'roundtrip'\n"), anchorDir),
))
testutil.Ok(t, err)
})

t.Run("check links with validate config", func(t *testing.T) {
t.Run("check 404 links with ignore validate config", func(t *testing.T) {
testFile := filepath.Join(tmpDir, "repo", "docs", "test", "links.md")
testutil.Ok(t, ioutil.WriteFile(testFile, []byte("https://fakelink1.com/ http://fakelink2.com/ https://www.fakelink3.com/\n"), os.ModePerm))

Expand All @@ -253,21 +253,23 @@ func TestValidator_TransformDestination(t *testing.T) {
testutil.Equals(t, 0, len(diff), diff.String())

_, err = mdformatter.IsFormatted(context.TODO(), logger, []string{testFile}, mdformatter.WithLinkTransformer(
MustNewValidator(logger, []byte("version: 1\n\nvalidators:\n - regex: '(^http[s]?:\\/\\/)(www\\.)?(fakelink[0-9]\\.com\\/)'\n type: 'roundtrip'\n"), anchorDir),
MustNewValidator(logger, []byte("version: 1\n\nvalidators:\n - regex: '(^http[s]?:\\/\\/)(www\\.)?(fakelink[0-9]\\.com\\/)'\n type: 'ignore'\n"), anchorDir),
))
testutil.Ok(t, err)
})

t.Run("check github links with validate config", func(t *testing.T) {
testFile := filepath.Join(tmpDir, "repo", "docs", "test", "github-link.md")
testutil.Ok(t, ioutil.WriteFile(testFile, []byte("https://github.com/bwplotka/mdox/issues/23 https://github.com/bwplotka/mdox/pull/32 https://github.com/bwplotka/mdox/pull/27#pullrequestreview-659598194\n"), os.ModePerm))
// This is substituted in config using PathorContent flag. But need to pass it directly here.
repoToken := os.Getenv("GITHUB_TOKEN")

diff, err := mdformatter.IsFormatted(context.TODO(), logger, []string{testFile})
testutil.Ok(t, err)
testutil.Equals(t, 0, len(diff), diff.String())

_, err = mdformatter.IsFormatted(context.TODO(), logger, []string{testFile}, mdformatter.WithLinkTransformer(
MustNewValidator(logger, []byte("version: 1\n\nvalidators:\n - regex: 'bwplotka\\/mdox'\n type: 'github'\n"), anchorDir),
MustNewValidator(logger, []byte("version: 1\n\nvalidators:\n - regex: 'bwplotka\\/mdox'\n token: '"+repoToken+"'\n type: 'github'\n"), anchorDir),
))
testutil.Ok(t, err)
})
Expand Down
Loading

0 comments on commit 98e791a

Please sign in to comment.