-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[maketmpl] add readme notebook parsing and writing (#351)
a python notebook can be converted into two different flavor's of markdown files, one is a github flavored, for checking in and updating in the source, when is for releasing, with all images inlined, to be used in anyscale product and docs pages
- Loading branch information
Showing
5 changed files
with
356 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,152 @@ | ||
package maketmpl | ||
|
||
import ( | ||
"fmt" | ||
"io" | ||
"log" | ||
"os" | ||
"os/exec" | ||
"path/filepath" | ||
) | ||
|
||
type readmeFile struct { | ||
notebookFile string | ||
|
||
md []byte | ||
imgs []*mdImage | ||
} | ||
|
||
func (f *readmeFile) writeInto(w io.Writer, imgOpts *writeImgOptions) error { | ||
// Copies the markdown file into writer w, but rewrites all images | ||
// in the markdown file following options in imgOpts | ||
|
||
// cursor is the point where things have copied. It is an offset in the | ||
// source markdown file, which is f.md | ||
cursor := 0 | ||
|
||
for i, img := range f.imgs { | ||
// Copy any part that is before the next image first. | ||
// img.start is the starting point of the markdown image. | ||
if cursor < img.start { | ||
if _, err := w.Write(f.md[cursor:img.start]); err != nil { | ||
return fmt.Errorf("write markdown: %w", err) | ||
} | ||
} | ||
|
||
// Write out the image. | ||
if err := img.writeInto(w, imgOpts); err != nil { | ||
return fmt.Errorf("write image %d: %w", i, err) | ||
} | ||
|
||
// Forward the read cursor to the end of the image. | ||
cursor = img.end | ||
} | ||
|
||
// Copy the rest. | ||
if cursor < len(f.md) { | ||
if _, err := w.Write(f.md[cursor:]); err != nil { | ||
return fmt.Errorf("write markdown: %w", err) | ||
} | ||
} | ||
|
||
return nil | ||
} | ||
|
||
func (f *readmeFile) writeIntoFile(p string, imgOpts *writeImgOptions) error { | ||
out, err := os.Create(p) | ||
if err != nil { | ||
return fmt.Errorf("create output file: %w", err) | ||
} | ||
defer out.Close() | ||
|
||
if err := f.writeInto(out, imgOpts); err != nil { | ||
return err | ||
} | ||
|
||
if err := out.Sync(); err != nil { | ||
return fmt.Errorf("sync output file: %w", err) | ||
} | ||
|
||
if err := out.Close(); err != nil { | ||
return fmt.Errorf("close output file: %w", err) | ||
} | ||
|
||
return nil | ||
} | ||
|
||
func (f *readmeFile) writeGitHubMD(path string) error { | ||
// GitHub flavored markdown does not support inline images | ||
// and forbids having inlined styles. | ||
imgOpts := &writeImgOptions{ | ||
inlineSrc: false, | ||
sizeInStyle: false, | ||
} | ||
return f.writeIntoFile(path, imgOpts) | ||
} | ||
|
||
func (f *readmeFile) writeReleaseMD(path, baseDir string) error { | ||
// This is for rendering in doc pages and other Web sites. | ||
// and we want the file to be reliable, consistent and self-contained. | ||
imgOpts := &writeImgOptions{ | ||
inlineSrc: true, | ||
inlineSrcDir: baseDir, | ||
sizeInStyle: true, | ||
} | ||
return f.writeIntoFile(path, imgOpts) | ||
} | ||
|
||
func readReadmeFile(path string) (*readmeFile, error) { | ||
md, err := os.ReadFile(path) | ||
if err != nil { | ||
return nil, fmt.Errorf("read file: %w", err) | ||
} | ||
|
||
imgs, err := parseMdImages(md) | ||
if err != nil { | ||
return nil, fmt.Errorf("parse images: %w", err) | ||
} | ||
|
||
return &readmeFile{ | ||
md: md, | ||
imgs: imgs, | ||
}, nil | ||
} | ||
|
||
func readmeFromNotebook(f string) (*readmeFile, error) { | ||
stat, err := os.Stat(f) | ||
if err != nil { | ||
return nil, fmt.Errorf("stat notebook file: %w", err) | ||
} | ||
if stat.IsDir() { | ||
return nil, fmt.Errorf("notebook is a directory") | ||
} | ||
|
||
tmpDir, err := os.MkdirTemp("", "maketmpl_*") | ||
if err != nil { | ||
return nil, fmt.Errorf("create temp dir: %w", err) | ||
} | ||
defer os.RemoveAll(tmpDir) | ||
|
||
cmd := exec.Command( | ||
"jupyter", "nbconvert", "--to", "markdown", | ||
f, "--output", "README", | ||
"--output-dir", tmpDir, | ||
) | ||
cmd.Stdout = os.Stdout | ||
cmd.Stderr = os.Stderr | ||
log.Println(cmd.Args) | ||
|
||
if err := cmd.Run(); err != nil { | ||
return nil, fmt.Errorf("convert notebook: %w", err) | ||
} | ||
|
||
outputFile := filepath.Join(tmpDir, "README.md") | ||
|
||
readme, err := readReadmeFile(outputFile) | ||
if err != nil { | ||
return nil, err | ||
} | ||
readme.notebookFile = f | ||
|
||
return readme, nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,172 @@ | ||
package maketmpl | ||
|
||
import ( | ||
"errors" | ||
"os" | ||
"os/exec" | ||
"path/filepath" | ||
"strings" | ||
"testing" | ||
) | ||
|
||
func TestReadmeFile_writeGitHubMD(t *testing.T) { | ||
tmp := t.TempDir() | ||
|
||
content := strings.Join([]string{ | ||
"# example", | ||
"", | ||
"![img1](img1.png)", | ||
"![img2](img2.png)", | ||
`<img src='img3.png' width=400px />`, | ||
"some extra text", | ||
}, "\n") | ||
|
||
path := filepath.Join(tmp, "README.md") | ||
if err := os.WriteFile(path, []byte(content), 0600); err != nil { | ||
t.Fatal("write readme: ", err) | ||
} | ||
|
||
readme, err := readReadmeFile(path) | ||
if err != nil { | ||
t.Fatal("read readme: ", err) | ||
} | ||
|
||
output := filepath.Join(tmp, "README.github.md") | ||
if err := readme.writeGitHubMD(output); err != nil { | ||
t.Fatal("write github md: ", err) | ||
} | ||
|
||
got, err := os.ReadFile(output) | ||
if err != nil { | ||
t.Fatal("read output: ", err) | ||
} | ||
|
||
want := strings.Join([]string{ | ||
"# example", | ||
"", | ||
`<img src="img1.png" alt="img1" />`, | ||
`<img src="img2.png" alt="img2" />`, | ||
`<img src="img3.png" width="400px" />`, | ||
"some extra text", | ||
}, "\n") | ||
|
||
if string(got) != want { | ||
t.Errorf("got:\n---\n%s\n---\nwant:\n---\n%s\n---\n", got, want) | ||
} | ||
} | ||
|
||
func TestReadmeFile_writeReleaseMD(t *testing.T) { | ||
tmp := t.TempDir() | ||
|
||
content := strings.Join([]string{ | ||
"# example", | ||
"", | ||
`![img1](img1.png)<img src='img2.png' width=400px />extra text`, | ||
}, "\n") | ||
|
||
for _, file := range []struct { | ||
name, content string | ||
}{ | ||
{"img1.png", "img1"}, | ||
{"img2.png", "img2"}, | ||
{"README.md", content}, | ||
} { | ||
path := filepath.Join(tmp, file.name) | ||
if err := os.WriteFile(path, []byte(file.content), 0600); err != nil { | ||
t.Fatalf("write %q: %s", file.name, err) | ||
} | ||
} | ||
|
||
input := filepath.Join(tmp, "README.md") | ||
readme, err := readReadmeFile(input) | ||
if err != nil { | ||
t.Fatal("read readme: ", err) | ||
} | ||
|
||
output := filepath.Join(tmp, "README.release.md") | ||
if err := readme.writeReleaseMD(output, tmp); err != nil { | ||
t.Fatal("write release md: ", err) | ||
} | ||
|
||
got, err := os.ReadFile(output) | ||
if err != nil { | ||
t.Fatal("read output: ", err) | ||
} | ||
|
||
want := strings.Join([]string{ | ||
"# example", | ||
"", | ||
strings.Join([]string{ | ||
`<img src="data:image/png;base64,aW1nMQ==" alt="img1" />`, | ||
`<img src="data:image/png;base64,aW1nMg==" style="width: 400px" />`, | ||
`extra text`, | ||
}, ""), | ||
}, "\n") | ||
|
||
if string(got) != want { | ||
t.Errorf("got:\n---\n%s\n---\nwant:\n---\n%s\n---\n", got, want) | ||
} | ||
} | ||
|
||
func findJupyter() (bool, error) { | ||
if _, err := exec.LookPath("jupyter"); err != nil { | ||
if errors.Is(err, exec.ErrNotFound) { | ||
return false, nil | ||
} | ||
return false, err | ||
} | ||
return true, nil | ||
} | ||
|
||
func checkJupyterOrSkipOnLocal(t *testing.T) { | ||
t.Helper() | ||
|
||
if ok, err := findJupyter(); err != nil { | ||
t.Fatal(err) | ||
} else if !ok { | ||
if os.Getenv("CI") == "" { | ||
t.Skip("jupyter not found; skip the test as it is not on CI.") | ||
} else { | ||
t.Fatal("jupyter not found") | ||
} | ||
} | ||
} | ||
|
||
func TestReadmeFromNotebook(t *testing.T) { | ||
checkJupyterOrSkipOnLocal(t) | ||
|
||
tmp := t.TempDir() | ||
|
||
f, err := readmeFromNotebook("testdata/reefy-ray/README.ipynb") | ||
if err != nil { | ||
t.Fatal("read readme from notebook: ", err) | ||
} | ||
|
||
output := filepath.Join(tmp, "README.github.md") | ||
if err := f.writeGitHubMD(output); err != nil { | ||
t.Fatal("write github md: ", err) | ||
} | ||
|
||
got, err := os.ReadFile(output) | ||
if err != nil { | ||
t.Fatal("read output: ", err) | ||
} | ||
|
||
want := strings.Join([]string{ | ||
"# Test example", | ||
"", | ||
`<img src="a.png" width="400px" />`, | ||
"", | ||
"and some text", | ||
"", | ||
"", | ||
"```python", | ||
`print("this is just an example")`, | ||
"```", | ||
"", | ||
}, "\n") | ||
|
||
if string(got) != want { | ||
t.Errorf("got:\n---\n%s\n---\nwant:\n---\n%s\n---\n", got, want) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Test example\n", | ||
"\n", | ||
"<img src=\"a.png\" width=\"400px\" />\n", | ||
"\n", | ||
"and some text" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"print(\"this is just an example\")" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"language_info": { | ||
"name": "python" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.