Skip to content

Commit

Permalink
[maketmpl] add readme notebook parsing and writing (#351)
Browse files Browse the repository at this point in the history
a python notebook can be converted into two different flavor's of
markdown files, one is a github flavored, for checking in and updating
in the source, when is for releasing, with all images inlined, to be
used in anyscale product and docs pages
  • Loading branch information
aslonnie authored Sep 26, 2024
1 parent dab6de6 commit e94f75f
Show file tree
Hide file tree
Showing 5 changed files with 356 additions and 1 deletion.
2 changes: 1 addition & 1 deletion .github/workflows/premerge.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ jobs:
run: bash -ec "(cd ci; go fmt ./... ; git diff --exit-code)"

- name: Test
run: bash -ec "(cd ci; go test -v ./...)"
run: CI=1 bash -ec "(cd ci; go test -v ./...)"

- name: Build templates
run: bash build.sh
152 changes: 152 additions & 0 deletions ci/maketmpl/readme.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
package maketmpl

import (
"fmt"
"io"
"log"
"os"
"os/exec"
"path/filepath"
)

type readmeFile struct {
notebookFile string

md []byte
imgs []*mdImage
}

func (f *readmeFile) writeInto(w io.Writer, imgOpts *writeImgOptions) error {
// Copies the markdown file into writer w, but rewrites all images
// in the markdown file following options in imgOpts

// cursor is the point where things have copied. It is an offset in the
// source markdown file, which is f.md
cursor := 0

for i, img := range f.imgs {
// Copy any part that is before the next image first.
// img.start is the starting point of the markdown image.
if cursor < img.start {
if _, err := w.Write(f.md[cursor:img.start]); err != nil {
return fmt.Errorf("write markdown: %w", err)
}
}

// Write out the image.
if err := img.writeInto(w, imgOpts); err != nil {
return fmt.Errorf("write image %d: %w", i, err)
}

// Forward the read cursor to the end of the image.
cursor = img.end
}

// Copy the rest.
if cursor < len(f.md) {
if _, err := w.Write(f.md[cursor:]); err != nil {
return fmt.Errorf("write markdown: %w", err)
}
}

return nil
}

func (f *readmeFile) writeIntoFile(p string, imgOpts *writeImgOptions) error {
out, err := os.Create(p)
if err != nil {
return fmt.Errorf("create output file: %w", err)
}
defer out.Close()

if err := f.writeInto(out, imgOpts); err != nil {
return err
}

if err := out.Sync(); err != nil {
return fmt.Errorf("sync output file: %w", err)
}

if err := out.Close(); err != nil {
return fmt.Errorf("close output file: %w", err)
}

return nil
}

func (f *readmeFile) writeGitHubMD(path string) error {
// GitHub flavored markdown does not support inline images
// and forbids having inlined styles.
imgOpts := &writeImgOptions{
inlineSrc: false,
sizeInStyle: false,
}
return f.writeIntoFile(path, imgOpts)
}

func (f *readmeFile) writeReleaseMD(path, baseDir string) error {
// This is for rendering in doc pages and other Web sites.
// and we want the file to be reliable, consistent and self-contained.
imgOpts := &writeImgOptions{
inlineSrc: true,
inlineSrcDir: baseDir,
sizeInStyle: true,
}
return f.writeIntoFile(path, imgOpts)
}

func readReadmeFile(path string) (*readmeFile, error) {
md, err := os.ReadFile(path)
if err != nil {
return nil, fmt.Errorf("read file: %w", err)
}

imgs, err := parseMdImages(md)
if err != nil {
return nil, fmt.Errorf("parse images: %w", err)
}

return &readmeFile{
md: md,
imgs: imgs,
}, nil
}

func readmeFromNotebook(f string) (*readmeFile, error) {
stat, err := os.Stat(f)
if err != nil {
return nil, fmt.Errorf("stat notebook file: %w", err)
}
if stat.IsDir() {
return nil, fmt.Errorf("notebook is a directory")
}

tmpDir, err := os.MkdirTemp("", "maketmpl_*")
if err != nil {
return nil, fmt.Errorf("create temp dir: %w", err)
}
defer os.RemoveAll(tmpDir)

cmd := exec.Command(
"jupyter", "nbconvert", "--to", "markdown",
f, "--output", "README",
"--output-dir", tmpDir,
)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
log.Println(cmd.Args)

if err := cmd.Run(); err != nil {
return nil, fmt.Errorf("convert notebook: %w", err)
}

outputFile := filepath.Join(tmpDir, "README.md")

readme, err := readReadmeFile(outputFile)
if err != nil {
return nil, err
}
readme.notebookFile = f

return readme, nil
}
172 changes: 172 additions & 0 deletions ci/maketmpl/readme_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
package maketmpl

import (
"errors"
"os"
"os/exec"
"path/filepath"
"strings"
"testing"
)

func TestReadmeFile_writeGitHubMD(t *testing.T) {
tmp := t.TempDir()

content := strings.Join([]string{
"# example",
"",
"![img1](img1.png)",
"![img2](img2.png)",
`<img src='img3.png' width=400px />`,
"some extra text",
}, "\n")

path := filepath.Join(tmp, "README.md")
if err := os.WriteFile(path, []byte(content), 0600); err != nil {
t.Fatal("write readme: ", err)
}

readme, err := readReadmeFile(path)
if err != nil {
t.Fatal("read readme: ", err)
}

output := filepath.Join(tmp, "README.github.md")
if err := readme.writeGitHubMD(output); err != nil {
t.Fatal("write github md: ", err)
}

got, err := os.ReadFile(output)
if err != nil {
t.Fatal("read output: ", err)
}

want := strings.Join([]string{
"# example",
"",
`<img src="img1.png" alt="img1" />`,
`<img src="img2.png" alt="img2" />`,
`<img src="img3.png" width="400px" />`,
"some extra text",
}, "\n")

if string(got) != want {
t.Errorf("got:\n---\n%s\n---\nwant:\n---\n%s\n---\n", got, want)
}
}

func TestReadmeFile_writeReleaseMD(t *testing.T) {
tmp := t.TempDir()

content := strings.Join([]string{
"# example",
"",
`![img1](img1.png)<img src='img2.png' width=400px />extra text`,
}, "\n")

for _, file := range []struct {
name, content string
}{
{"img1.png", "img1"},
{"img2.png", "img2"},
{"README.md", content},
} {
path := filepath.Join(tmp, file.name)
if err := os.WriteFile(path, []byte(file.content), 0600); err != nil {
t.Fatalf("write %q: %s", file.name, err)
}
}

input := filepath.Join(tmp, "README.md")
readme, err := readReadmeFile(input)
if err != nil {
t.Fatal("read readme: ", err)
}

output := filepath.Join(tmp, "README.release.md")
if err := readme.writeReleaseMD(output, tmp); err != nil {
t.Fatal("write release md: ", err)
}

got, err := os.ReadFile(output)
if err != nil {
t.Fatal("read output: ", err)
}

want := strings.Join([]string{
"# example",
"",
strings.Join([]string{
`<img src="data:image/png;base64,aW1nMQ==" alt="img1" />`,
`<img src="data:image/png;base64,aW1nMg==" style="width: 400px" />`,
`extra text`,
}, ""),
}, "\n")

if string(got) != want {
t.Errorf("got:\n---\n%s\n---\nwant:\n---\n%s\n---\n", got, want)
}
}

func findJupyter() (bool, error) {
if _, err := exec.LookPath("jupyter"); err != nil {
if errors.Is(err, exec.ErrNotFound) {
return false, nil
}
return false, err
}
return true, nil
}

func checkJupyterOrSkipOnLocal(t *testing.T) {
t.Helper()

if ok, err := findJupyter(); err != nil {
t.Fatal(err)
} else if !ok {
if os.Getenv("CI") == "" {
t.Skip("jupyter not found; skip the test as it is not on CI.")
} else {
t.Fatal("jupyter not found")
}
}
}

func TestReadmeFromNotebook(t *testing.T) {
checkJupyterOrSkipOnLocal(t)

tmp := t.TempDir()

f, err := readmeFromNotebook("testdata/reefy-ray/README.ipynb")
if err != nil {
t.Fatal("read readme from notebook: ", err)
}

output := filepath.Join(tmp, "README.github.md")
if err := f.writeGitHubMD(output); err != nil {
t.Fatal("write github md: ", err)
}

got, err := os.ReadFile(output)
if err != nil {
t.Fatal("read output: ", err)
}

want := strings.Join([]string{
"# Test example",
"",
`<img src="a.png" width="400px" />`,
"",
"and some text",
"",
"",
"```python",
`print("this is just an example")`,
"```",
"",
}, "\n")

if string(got) != want {
t.Errorf("got:\n---\n%s\n---\nwant:\n---\n%s\n---\n", got, want)
}
}
31 changes: 31 additions & 0 deletions ci/maketmpl/testdata/reefy-ray/README.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Test example\n",
"\n",
"<img src=\"a.png\" width=\"400px\" />\n",
"\n",
"and some text"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(\"this is just an example\")"
]
}
],
"metadata": {
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Binary file added ci/maketmpl/testdata/reefy-ray/a.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit e94f75f

Please sign in to comment.