Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add links.validate.config flag #33

Merged
merged 9 commits into from
Jun 5, 2021
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/go.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,5 @@ jobs:
- name: Run unit tests.
env:
GOBIN: /tmp/.bin
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: make test
12 changes: 9 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,15 @@ Flags:
exists).Absolute path links will be converted
to relative links to anchor dir as well.
-l, --links.validate If true, all links will be validated
--links.validate.without-address-regex=^$
If specified, all links will be validated,
except those matching the given target address.
--links.validate.config-file=<file-path>
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💪🏽

Path to YAML file for skipping link check, with
spec defined in
github.com/bwplotka/mdox/pkg/linktransformer.Config
--links.validate.config=<content>
Alternative to 'links.validate.config-file'
flag (mutually exclusive). Content of YAML file
for skipping link check, with spec defined in
github.com/bwplotka/mdox/pkg/linktransformer.Config

Args:
<files> Markdown file(s) to process.
Expand Down
9 changes: 7 additions & 2 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"syscall"

"github.com/bwplotka/mdox/pkg/clilog"
"github.com/bwplotka/mdox/pkg/extflag"
"github.com/bwplotka/mdox/pkg/extkingpin"
"github.com/bwplotka/mdox/pkg/mdformatter"
"github.com/bwplotka/mdox/pkg/mdformatter/linktransformer"
Expand Down Expand Up @@ -126,7 +127,7 @@ This directive runs executable with arguments and put its stderr and stdout outp
"Absolute path links will be converted to relative links to anchor dir as well.").Regexp()
// TODO(bwplotka): Add cache in file?
linksValidateEnabled := cmd.Flag("links.validate", "If true, all links will be validated").Short('l').Bool()
linksValidateExceptDomains := cmd.Flag("links.validate.without-address-regex", "If specified, all links will be validated, except those matching the given target address.").Default(`^$`).Regexp()
linksValidateConfig := extflag.RegisterPathOrContent(cmd, "links.validate.config", "YAML file for skipping link check, with spec defined in github.com/bwplotka/mdox/pkg/linktransformer.Config", false)

cmd.Run(func(ctx context.Context, logger log.Logger) (err error) {
var opts []mdformatter.Option
Expand All @@ -151,7 +152,11 @@ This directive runs executable with arguments and put its stderr and stdout outp

var linkTr []mdformatter.LinkTransformer
if *linksValidateEnabled {
v, err := linktransformer.NewValidator(logger, *linksValidateExceptDomains, anchorDir)
validateConfigContent, err := linksValidateConfig.Content()
if err != nil {
return err
}
v, err := linktransformer.NewValidator(logger, validateConfigContent, anchorDir)
if err != nil {
return err
}
Expand Down
79 changes: 79 additions & 0 deletions pkg/extflag/pathorcontent.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
// Copyright (c) Bartłomiej Płotka @bwplotka
// Licensed under the Apache License 2.0.

// Taken from Thanos project.
//
// Copyright (c) The Thanos Authors.
// Licensed under the Apache License 2.0.
package extflag

import (
"fmt"
"io/ioutil"

"github.com/pkg/errors"
"gopkg.in/alecthomas/kingpin.v2"
)

// PathOrContent is a flag type that defines two flags to fetch bytes. Either from file (*-file flag) or content (* flag).
type PathOrContent struct {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add env substitution here, and then we can think about proposing this upstream.

So the idea would be to add env substitution using e.g Kubernetes format, which is $(...) and document it.

e.g our config could be Token: $(GITHUB_TOKEN)

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then we can upstream it e.g through this: efficientgo/tools#9

flagName string

required bool

path *string
content *string
}

type FlagClause interface {
Flag(name, help string) *kingpin.FlagClause
}

// RegisterPathOrContent registers PathOrContent flag in kingpinCmdClause.
func RegisterPathOrContent(cmd FlagClause, flagName string, help string, required bool) *PathOrContent {
fileFlagName := fmt.Sprintf("%s-file", flagName)
contentFlagName := flagName

fileHelp := fmt.Sprintf("Path to %s", help)
fileFlag := cmd.Flag(fileFlagName, fileHelp).PlaceHolder("<file-path>").String()

contentHelp := fmt.Sprintf("Alternative to '%s' flag (mutually exclusive). Content of %s", fileFlagName, help)
contentFlag := cmd.Flag(contentFlagName, contentHelp).PlaceHolder("<content>").String()

return &PathOrContent{
flagName: flagName,
required: required,
path: fileFlag,
content: contentFlag,
}
}

// Content returns the content of the file when given or directly the content that has been passed to the flag.
// It returns an error when:
// * The file and content flags are both not empty.
// * The file flag is not empty but the file can't be read.
// * The content is empty and the flag has been defined as required.
func (p *PathOrContent) Content() ([]byte, error) {
fileFlagName := fmt.Sprintf("%s-file", p.flagName)

if len(*p.path) > 0 && len(*p.content) > 0 {
return nil, errors.Errorf("both %s and %s flags set.", fileFlagName, p.flagName)
}

var content []byte
if len(*p.path) > 0 {
c, err := ioutil.ReadFile(*p.path)
if err != nil {
return nil, errors.Wrapf(err, "loading file %s for %s", *p.path, fileFlagName)
}
content = c
} else {
content = []byte(*p.content)
}

if len(content) == 0 && p.required {
return nil, errors.Errorf("flag %s or %s is required for running this command and content cannot be empty.", fileFlagName, p.flagName)
}

return content, nil
}
51 changes: 51 additions & 0 deletions pkg/mdformatter/linktransformer/config.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// Copyright (c) Bartłomiej Płotka @bwplotka
// Licensed under the Apache License 2.0.

package linktransformer

import (
"bytes"
"regexp"

"github.com/pkg/errors"
"gopkg.in/yaml.v3"
)

const roundtrip ValidatorType = "roundtrip"
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's common to put this just below the ValidatorType definition. Let's also group those into one const ( ).

Suggested change
const roundtrip ValidatorType = "roundtrip"
const roundtripValidator ValidatorType = "roundtrip"


// const github ValidatorType = "github"

type Config struct {
Version int

Validators []Validator `yaml:"validators"`
}

type Validator struct {
_regex *regexp.Regexp
_maxNum int
// Regex for type github is reponame matcher, like `bwplotka\/mdox`.
Regex string `yaml:"regex"`
// By default type is `roundtrip`. Could be `github`.
Type ValidatorType `yaml:"type"`
}

type ValidatorType string

func ParseConfig(c []byte) (Config, error) {
cfg := Config{}
dec := yaml.NewDecoder(bytes.NewReader(c))
dec.KnownFields(true)
if err := dec.Decode(&cfg); err != nil {
return Config{}, errors.Wrapf(err, "parsing YAML content %q", string(c))
}

if len(cfg.Validators) <= 0 {
return Config{}, errors.New("No validator provided")
}

for i := range cfg.Validators {
cfg.Validators[i]._regex = regexp.MustCompile(cfg.Validators[i].Regex)
}
bwplotka marked this conversation as resolved.
Show resolved Hide resolved
return cfg, nil
}
41 changes: 27 additions & 14 deletions pkg/mdformatter/linktransformer/link.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,9 @@ func (l *localizer) TransformDestination(ctx mdformatter.SourceContext, destinat
func (l *localizer) Close(mdformatter.SourceContext) error { return nil }

type validator struct {
logger log.Logger
anchorDir string
except *regexp.Regexp
logger log.Logger
anchorDir string
validateConfig Config

localLinks localLinksCache
rMu sync.RWMutex
Expand All @@ -135,15 +135,23 @@ type futureResult struct {

// NewValidator returns mdformatter.LinkTransformer that crawls all links.
// TODO(bwplotka): Add optimization and debug modes - this is the main source of latency and pain.
func NewValidator(logger log.Logger, except *regexp.Regexp, anchorDir string) (mdformatter.LinkTransformer, error) {
func NewValidator(logger log.Logger, linksValidateConfig []byte, anchorDir string) (mdformatter.LinkTransformer, error) {
var config Config
var err error
if string(linksValidateConfig) != "" {
config, err = ParseConfig(linksValidateConfig)
if err != nil {
return nil, err
}
}
v := &validator{
logger: logger,
anchorDir: anchorDir,
except: except,
localLinks: map[string]*[]string{},
remoteLinks: map[string]error{},
c: colly.NewCollector(colly.Async()),
destFutures: map[futureKey]*futureResult{},
logger: logger,
anchorDir: anchorDir,
validateConfig: config,
localLinks: map[string]*[]string{},
remoteLinks: map[string]error{},
c: colly.NewCollector(colly.Async()),
destFutures: map[futureKey]*futureResult{},
}
// Set very soft limits.
// E.g github has 50-5000 https://docs.github.com/en/free-pro-team@latest/rest/reference/rate-limit limit depending
Expand All @@ -169,12 +177,16 @@ func NewValidator(logger log.Logger, except *regexp.Regexp, anchorDir string) (m
defer v.rMu.Unlock()
v.remoteLinks[response.Ctx.Get(originalURLKey)] = errors.Wrapf(err, "%q not accessible; status code %v", response.Request.URL.String(), response.StatusCode)
})
err = v.validateConfig.validateGH()
if err != nil {
return nil, err
}
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
err = v.validateConfig.validateGH()
if err != nil {
return nil, err
}
return v.validateConfig.validateGH()

return v, nil
}

// MustNewValidator returns mdformatter.LinkTransformer that crawls all links.
func MustNewValidator(logger log.Logger, except *regexp.Regexp, anchorDir string) mdformatter.LinkTransformer {
v, err := NewValidator(logger, except, anchorDir)
func MustNewValidator(logger log.Logger, linksValidateConfig []byte, anchorDir string) mdformatter.LinkTransformer {
v, err := NewValidator(logger, linksValidateConfig, anchorDir)
if err != nil {
panic(err)
}
Expand Down Expand Up @@ -232,7 +244,8 @@ func (v *validator) visit(filepath string, dest string) {
return
}
v.destFutures[k] = &futureResult{cases: 1, resultFn: func() error { return nil }}
if v.except.MatchString(dest) {
validator := v.validateConfig.GetValidatorForURL(dest)
if validator.Matched {
return
}

Expand Down
42 changes: 35 additions & 7 deletions pkg/mdformatter/linktransformer/link_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ func TestValidator_TransformDestination(t *testing.T) {
testutil.Equals(t, 0, len(diff), diff.String())

diff, err = mdformatter.IsFormatted(context.TODO(), logger, []string{testFile}, mdformatter.WithLinkTransformer(
MustNewValidator(logger, regexp.MustCompile(`^$`), anchorDir),
MustNewValidator(logger, []byte(""), anchorDir),
))
testutil.Ok(t, err)
testutil.Equals(t, 0, len(diff), diff.String())
Expand All @@ -157,7 +157,7 @@ func TestValidator_TransformDestination(t *testing.T) {
testutil.Equals(t, 0, len(diff), diff.String())

diff, err = mdformatter.IsFormatted(context.TODO(), logger, []string{testFile, testFile2}, mdformatter.WithLinkTransformer(
MustNewValidator(logger, regexp.MustCompile(`^$`), anchorDir),
MustNewValidator(logger, []byte(""), anchorDir),
))
testutil.Ok(t, err)
testutil.Equals(t, 0, len(diff), diff.String())
Expand All @@ -175,7 +175,7 @@ func TestValidator_TransformDestination(t *testing.T) {
testutil.Equals(t, 0, len(diff), diff.String())

diff, err = mdformatter.IsFormatted(context.TODO(), logger, []string{testFile}, mdformatter.WithLinkTransformer(
MustNewValidator(logger, regexp.MustCompile(`^$`), anchorDir),
MustNewValidator(logger, []byte(""), anchorDir),
))
testutil.Ok(t, err)
testutil.Equals(t, 0, len(diff), diff.String())
Expand All @@ -198,7 +198,7 @@ func TestValidator_TransformDestination(t *testing.T) {
testutil.Equals(t, 0, len(diff), diff.String())

_, err = mdformatter.IsFormatted(context.TODO(), logger, []string{testFile}, mdformatter.WithLinkTransformer(
MustNewValidator(logger, regexp.MustCompile(`^$`), anchorDir),
MustNewValidator(logger, []byte(""), anchorDir),
))
testutil.NotOk(t, err)

Expand All @@ -224,13 +224,13 @@ func TestValidator_TransformDestination(t *testing.T) {
testutil.Equals(t, 0, len(diff), diff.String())

_, err = mdformatter.IsFormatted(context.TODO(), logger, []string{testFile}, mdformatter.WithLinkTransformer(
MustNewValidator(logger, regexp.MustCompile(`^$`), anchorDir),
MustNewValidator(logger, []byte(""), anchorDir),
))
testutil.NotOk(t, err)
testutil.Equals(t, fmt.Sprintf("%v%v: %v%v: \"https://bwplotka.dev/does-not-exists\" not accessible; status code 404: Not Found", tmpDir, filePath, relDirPath, filePath), err.Error())
})

t.Run("check 404 link, ignored", func(t *testing.T) {
t.Run("check 404 link with validate config", func(t *testing.T) {
testFile := filepath.Join(tmpDir, "repo", "docs", "test", "invalid-link2.md")
testutil.Ok(t, ioutil.WriteFile(testFile, []byte("https://bwplotka.dev/does-not-exists\n"), os.ModePerm))

Expand All @@ -239,7 +239,35 @@ func TestValidator_TransformDestination(t *testing.T) {
testutil.Equals(t, 0, len(diff), diff.String())

_, err = mdformatter.IsFormatted(context.TODO(), logger, []string{testFile}, mdformatter.WithLinkTransformer(
MustNewValidator(logger, regexp.MustCompile(`://bwplotka.dev`), anchorDir),
MustNewValidator(logger, []byte("version: 1\n\nvalidators:\n - regex: '://bwplotka.dev'\n type: 'roundtrip'\n"), anchorDir),
))
testutil.Ok(t, err)
})

t.Run("check links with validate config", func(t *testing.T) {
testFile := filepath.Join(tmpDir, "repo", "docs", "test", "links.md")
testutil.Ok(t, ioutil.WriteFile(testFile, []byte("https://fakelink1.com/ http://fakelink2.com/ https://www.fakelink3.com/\n"), os.ModePerm))

diff, err := mdformatter.IsFormatted(context.TODO(), logger, []string{testFile})
testutil.Ok(t, err)
testutil.Equals(t, 0, len(diff), diff.String())

_, err = mdformatter.IsFormatted(context.TODO(), logger, []string{testFile}, mdformatter.WithLinkTransformer(
MustNewValidator(logger, []byte("version: 1\n\nvalidators:\n - regex: '(^http[s]?:\\/\\/)(www\\.)?(fakelink[0-9]\\.com\\/)'\n type: 'roundtrip'\n"), anchorDir),
))
testutil.Ok(t, err)
})

t.Run("check github links with validate config", func(t *testing.T) {
testFile := filepath.Join(tmpDir, "repo", "docs", "test", "github-link.md")
testutil.Ok(t, ioutil.WriteFile(testFile, []byte("https://github.com/bwplotka/mdox/issues/23 https://github.com/bwplotka/mdox/pull/32 https://github.com/bwplotka/mdox/pull/27#pullrequestreview-659598194\n"), os.ModePerm))

diff, err := mdformatter.IsFormatted(context.TODO(), logger, []string{testFile})
testutil.Ok(t, err)
testutil.Equals(t, 0, len(diff), diff.String())

_, err = mdformatter.IsFormatted(context.TODO(), logger, []string{testFile}, mdformatter.WithLinkTransformer(
MustNewValidator(logger, []byte("version: 1\n\nvalidators:\n - regex: 'bwplotka\\/mdox'\n type: 'github'\n"), anchorDir),
))
testutil.Ok(t, err)
})
Expand Down
Loading