From 4ad178cd054d336e1e78342ac30cd53987f860b1 Mon Sep 17 00:00:00 2001 From: Adam Richie-Halford Date: Mon, 30 Sep 2024 11:42:41 -0700 Subject: [PATCH 1/9] Add action to generate pdf diff --- .github/CODEOWNERS | 2 + .github/workflows/markdown-diff.yml | 72 +++++++++++++++++++++++++++++ README.md | 6 +++ 3 files changed, 80 insertions(+) create mode 100644 .github/CODEOWNERS create mode 100644 .github/workflows/markdown-diff.yml diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..7952df2 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,2 @@ +# The roar-irb-maintainers team will own all markdown files. +*.md @yeatmanlab/roar-irb-maintainers diff --git a/.github/workflows/markdown-diff.yml b/.github/workflows/markdown-diff.yml new file mode 100644 index 0000000..ce2735e --- /dev/null +++ b/.github/workflows/markdown-diff.yml @@ -0,0 +1,72 @@ +name: Markdown Diff to PDF + +on: + pull_request: + types: [opened, synchronize, reopened] + branches: + - main + +jobs: + check-markdown: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Install Dependencies + run: | + sudo apt-get update + sudo apt-get install -y pandoc diffpdf + + - name: Check for modified Markdown files + id: markdown_check + run: | + MODIFIED_MARKDOWN=$(git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.sha }} | grep '\.md' || true) + echo "MODIFIED_MARKDOWN=$MODIFIED_MARKDOWN" >> $GITHUB_ENV + + - name: Generate individual PDFs for modified markdown files + if: env.MODIFIED_MARKDOWN != '' + run: | + mkdir -p pdfs + for FILE in $MODIFIED_MARKDOWN; do + # Generate PDF of the current version of the file + pandoc "$FILE" -o "pdfs/$(basename $FILE .md)_current.pdf" + # Generate PDF of the previous version of the file + git show ${{ github.event.pull_request.base.sha }}:"$FILE" > "old_$FILE" + pandoc "old_$FILE" -o "pdfs/$(basename $FILE .md)_previous.pdf" + # Create diff PDF + diffpdf "pdfs/$(basename $FILE .md)_previous.pdf" "pdfs/$(basename $FILE .md)_current.pdf" "pdfs/$(basename $FILE .md)_diff.pdf" + done + + - name: Combine PDFs with page breaks + if: env.MODIFIED_MARKDOWN != '' + run: | + pdftk pdfs/*_diff.pdf cat output combined_diffs.pdf + + - name: Upload PDFs as artifacts + if: env.MODIFIED_MARKDOWN != '' + uses: actions/upload-artifact@v3 + with: + name: markdown_diffs + path: pdfs/ + + - name: Upload combined diff PDF as artifact + if: env.MODIFIED_MARKDOWN != '' + uses: actions/upload-artifact@v3 + with: + name: combined_markdown_diff + path: combined_diffs.pdf + + - name: Post PR comment with download links + if: env.MODIFIED_MARKDOWN != '' + run: | + LINKS="" + for FILE in $MODIFIED_MARKDOWN; do + LINKS="${LINKS}- [Download diff for $(basename $FILE)](${{ github.server_url }}/${{ github.repository }}/actions/artifacts/markdown_diffs/$(basename $FILE .md)_diff.pdf)\n" + done + LINKS="${LINKS}\n- [Download combined diff](${{ github.server_url }}/${{ github.repository }}/actions/artifacts/combined_markdown_diff/combined_diffs.pdf)" + curl -X POST -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ + -H "Content-Type: application/json" \ + -d "{\"body\": \"### Markdown Diff PDFs:\n\n${LINKS}\"}" \ + "${{ github.event.pull_request.url }}/comments" diff --git a/README.md b/README.md index 0f13f31..b24bb4e 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,13 @@ # roar-legal-documents + A repository that hosts all relevant legal documents for ROAR research. This repository currently includes: + - Consolidated Assent - Terms of Service - District 2 Consent Form - Web-Based Behavioral Consent - Web-Based Behavioral Consent (Eye-Tracking) + +## Deployment + +Insert steps here for how to modify a markdown file. From 79c93e89cfd5725b3cb4272f13bb1cb27ab8bccd Mon Sep 17 00:00:00 2001 From: Adam Richie-Halford Date: Mon, 30 Sep 2024 15:15:48 -0700 Subject: [PATCH 2/9] Check out entire history in GitHub action --- .github/workflows/markdown-diff.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/markdown-diff.yml b/.github/workflows/markdown-diff.yml index ce2735e..9d0846a 100644 --- a/.github/workflows/markdown-diff.yml +++ b/.github/workflows/markdown-diff.yml @@ -13,6 +13,8 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@v3 + with: + fetch-depth: 0 # Fetch the entire history to avoid shallow clone issues - name: Install Dependencies run: | From 245bfaebf4e27c75411e7edf692992120eeee291 Mon Sep 17 00:00:00 2001 From: Adam Richie-Halford Date: Mon, 30 Sep 2024 15:18:29 -0700 Subject: [PATCH 3/9] Install pdflatex --- .github/workflows/markdown-diff.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/markdown-diff.yml b/.github/workflows/markdown-diff.yml index 9d0846a..5da61ad 100644 --- a/.github/workflows/markdown-diff.yml +++ b/.github/workflows/markdown-diff.yml @@ -19,7 +19,7 @@ jobs: - name: Install Dependencies run: | sudo apt-get update - sudo apt-get install -y pandoc diffpdf + sudo apt-get install -y pandoc texlive-latex-base diffpdf - name: Check for modified Markdown files id: markdown_check From 3b12ef727d3b78d3cef11a5a3438dc8336b558d6 Mon Sep 17 00:00:00 2001 From: Adam Richie-Halford Date: Mon, 30 Sep 2024 15:21:29 -0700 Subject: [PATCH 4/9] Ugh, install other LaTeX packages --- .github/workflows/markdown-diff.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/markdown-diff.yml b/.github/workflows/markdown-diff.yml index 5da61ad..4795910 100644 --- a/.github/workflows/markdown-diff.yml +++ b/.github/workflows/markdown-diff.yml @@ -19,7 +19,8 @@ jobs: - name: Install Dependencies run: | sudo apt-get update - sudo apt-get install -y pandoc texlive-latex-base diffpdf + sudo apt-get install -y pandoc texlive-latex-base texlive-latex-recommended texlive-fonts-recommended texlive-fonts-extra diffpdf + - name: Check for modified Markdown files id: markdown_check From 9f9db923e1c5a06fef99754b8a73cd26806ea3d7 Mon Sep 17 00:00:00 2001 From: Adam Richie-Halford Date: Mon, 30 Sep 2024 15:32:24 -0700 Subject: [PATCH 5/9] Use pdfdiff instead of diffpdf --- .github/workflows/markdown-diff.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/markdown-diff.yml b/.github/workflows/markdown-diff.yml index 4795910..5aa08e8 100644 --- a/.github/workflows/markdown-diff.yml +++ b/.github/workflows/markdown-diff.yml @@ -19,8 +19,7 @@ jobs: - name: Install Dependencies run: | sudo apt-get update - sudo apt-get install -y pandoc texlive-latex-base texlive-latex-recommended texlive-fonts-recommended texlive-fonts-extra diffpdf - + sudo apt-get install -y pandoc texlive-latex-base texlive-latex-recommended texlive-fonts-recommended texlive-fonts-extra poppler-utils - name: Check for modified Markdown files id: markdown_check @@ -39,7 +38,7 @@ jobs: git show ${{ github.event.pull_request.base.sha }}:"$FILE" > "old_$FILE" pandoc "old_$FILE" -o "pdfs/$(basename $FILE .md)_previous.pdf" # Create diff PDF - diffpdf "pdfs/$(basename $FILE .md)_previous.pdf" "pdfs/$(basename $FILE .md)_current.pdf" "pdfs/$(basename $FILE .md)_diff.pdf" + pdfdiff "pdfs/$(basename $FILE .md)_previous.pdf" "pdfs/$(basename $FILE .md)_current.pdf" "pdfs/$(basename $FILE .md)_diff.pdf" done - name: Combine PDFs with page breaks From 72873e9b2471a12abccf1299adf7cc6c2a3f643d Mon Sep 17 00:00:00 2001 From: Adam Richie-Halford Date: Mon, 30 Sep 2024 15:39:26 -0700 Subject: [PATCH 6/9] use diff2html instead --- .github/workflows/markdown-diff.yml | 129 ++++++++++++++-------------- 1 file changed, 64 insertions(+), 65 deletions(-) diff --git a/.github/workflows/markdown-diff.yml b/.github/workflows/markdown-diff.yml index 5aa08e8..148722f 100644 --- a/.github/workflows/markdown-diff.yml +++ b/.github/workflows/markdown-diff.yml @@ -1,74 +1,73 @@ -name: Markdown Diff to PDF +name: Generate Diff PDFs on: pull_request: types: [opened, synchronize, reopened] branches: - main - + jobs: - check-markdown: + generate_pdfs: runs-on: ubuntu-latest - steps: - - name: Checkout repository - uses: actions/checkout@v3 - with: - fetch-depth: 0 # Fetch the entire history to avoid shallow clone issues - - - name: Install Dependencies - run: | - sudo apt-get update - sudo apt-get install -y pandoc texlive-latex-base texlive-latex-recommended texlive-fonts-recommended texlive-fonts-extra poppler-utils - - - name: Check for modified Markdown files - id: markdown_check - run: | - MODIFIED_MARKDOWN=$(git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.sha }} | grep '\.md' || true) - echo "MODIFIED_MARKDOWN=$MODIFIED_MARKDOWN" >> $GITHUB_ENV - - - name: Generate individual PDFs for modified markdown files - if: env.MODIFIED_MARKDOWN != '' - run: | - mkdir -p pdfs - for FILE in $MODIFIED_MARKDOWN; do - # Generate PDF of the current version of the file - pandoc "$FILE" -o "pdfs/$(basename $FILE .md)_current.pdf" - # Generate PDF of the previous version of the file - git show ${{ github.event.pull_request.base.sha }}:"$FILE" > "old_$FILE" - pandoc "old_$FILE" -o "pdfs/$(basename $FILE .md)_previous.pdf" - # Create diff PDF - pdfdiff "pdfs/$(basename $FILE .md)_previous.pdf" "pdfs/$(basename $FILE .md)_current.pdf" "pdfs/$(basename $FILE .md)_diff.pdf" - done - - - name: Combine PDFs with page breaks - if: env.MODIFIED_MARKDOWN != '' - run: | - pdftk pdfs/*_diff.pdf cat output combined_diffs.pdf - - - name: Upload PDFs as artifacts - if: env.MODIFIED_MARKDOWN != '' - uses: actions/upload-artifact@v3 - with: - name: markdown_diffs - path: pdfs/ - - - name: Upload combined diff PDF as artifact - if: env.MODIFIED_MARKDOWN != '' - uses: actions/upload-artifact@v3 - with: - name: combined_markdown_diff - path: combined_diffs.pdf - - - name: Post PR comment with download links - if: env.MODIFIED_MARKDOWN != '' - run: | - LINKS="" - for FILE in $MODIFIED_MARKDOWN; do - LINKS="${LINKS}- [Download diff for $(basename $FILE)](${{ github.server_url }}/${{ github.repository }}/actions/artifacts/markdown_diffs/$(basename $FILE .md)_diff.pdf)\n" - done - LINKS="${LINKS}\n- [Download combined diff](${{ github.server_url }}/${{ github.repository }}/actions/artifacts/combined_markdown_diff/combined_diffs.pdf)" - curl -X POST -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ - -H "Content-Type: application/json" \ - -d "{\"body\": \"### Markdown Diff PDFs:\n\n${LINKS}\"}" \ - "${{ github.event.pull_request.url }}/comments" + - name: Checkout code + uses: actions/checkout@v3 + with: + fetch-depth: 0 # Fetch full history to compare branches + - name: Set up Node.js + uses: actions/setup-node@v3 + with: + node-version: '16' + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y wkhtmltopdf + npm install -g diff2html-cli + - name: Fetch main branch + run: | + git fetch origin main + - name: Generate diffs and PDFs + run: | + # Get list of modified markdown files + MODIFIED_FILES=$(git diff --name-only origin/main...HEAD -- '*.md') + echo "Modified markdown files:" + echo "$MODIFIED_FILES" + mkdir pdfs + # Generate individual PDFs + for FILE in $MODIFIED_FILES; do + echo "Processing $FILE" + # Generate diff in HTML + git diff origin/main...HEAD -- "$FILE" > diff.txt + diff2html -i file --file diff.txt -F diff.html -s line + # Convert HTML to PDF + OUTPUT_FILE="pdfs/${FILE//\//_}.pdf" + wkhtmltopdf diff.html "$OUTPUT_FILE" + done + # Generate combined PDF + COMBINED_HTML="combined_diff.html" + echo "" > $COMBINED_HTML + for FILE in $MODIFIED_FILES; do + echo "

Diff for $FILE


" >> $COMBINED_HTML + git diff origin/main...HEAD -- "$FILE" > diff.txt + diff2html -i file --file diff.txt -s line >> $COMBINED_HTML + echo "
" >> $COMBINED_HTML + done + echo "" >> $COMBINED_HTML + # Convert combined HTML to PDF + wkhtmltopdf $COMBINED_HTML "pdfs/combined_diff.pdf" + - name: Upload PDFs + uses: actions/upload-artifact@v3 + with: + name: diff_pdfs + path: pdfs/ + - name: Comment on PR with download link + uses: actions/github-script@v6 + with: + script: | + const artifactUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${process.env.GITHUB_RUN_ID}`; + github.rest.issues.createComment({ + issue_number: context.issue.number, + owner: context.repo.owner, + repo: context.repo.repo, + body: `📄 Diff PDFs have been generated and uploaded as artifacts for this workflow run. You can download them from the [Artifacts section of the workflow run](${artifactUrl}).` + }); From c1b1b7699da0148bcbba1add195472769d15d043 Mon Sep 17 00:00:00 2001 From: Adam Richie-Halford Date: Mon, 30 Sep 2024 15:45:53 -0700 Subject: [PATCH 7/9] Correctly pass input file --- .github/workflows/markdown-diff.yml | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/.github/workflows/markdown-diff.yml b/.github/workflows/markdown-diff.yml index 148722f..1d1eb41 100644 --- a/.github/workflows/markdown-diff.yml +++ b/.github/workflows/markdown-diff.yml @@ -32,13 +32,12 @@ jobs: MODIFIED_FILES=$(git diff --name-only origin/main...HEAD -- '*.md') echo "Modified markdown files:" echo "$MODIFIED_FILES" - mkdir pdfs + mkdir -p pdfs # Generate individual PDFs for FILE in $MODIFIED_FILES; do echo "Processing $FILE" - # Generate diff in HTML - git diff origin/main...HEAD -- "$FILE" > diff.txt - diff2html -i file --file diff.txt -F diff.html -s line + # Generate diff and convert to HTML + git diff origin/main...HEAD -- "$FILE" | diff2html -i stdin -s line -o stdout > diff.html # Convert HTML to PDF OUTPUT_FILE="pdfs/${FILE//\//_}.pdf" wkhtmltopdf diff.html "$OUTPUT_FILE" @@ -48,8 +47,7 @@ jobs: echo "" > $COMBINED_HTML for FILE in $MODIFIED_FILES; do echo "

Diff for $FILE


" >> $COMBINED_HTML - git diff origin/main...HEAD -- "$FILE" > diff.txt - diff2html -i file --file diff.txt -s line >> $COMBINED_HTML + git diff origin/main...HEAD -- "$FILE" | diff2html -i stdin -s line -o stdout >> $COMBINED_HTML echo "
" >> $COMBINED_HTML done echo "" >> $COMBINED_HTML From 048a5450cd9f59886a66bbca98a7b7a0a74bd1c7 Mon Sep 17 00:00:00 2001 From: Adam Richie-Halford Date: Tue, 1 Oct 2024 06:01:04 -0700 Subject: [PATCH 8/9] Try new workflow for pdf diff --- .github/workflows/pdf-diff.yml | 91 ++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 .github/workflows/pdf-diff.yml diff --git a/.github/workflows/pdf-diff.yml b/.github/workflows/pdf-diff.yml new file mode 100644 index 0000000..afc8501 --- /dev/null +++ b/.github/workflows/pdf-diff.yml @@ -0,0 +1,91 @@ +name: Generate Rendered Diff PDFs + +on: + pull_request: + types: [opened, synchronize, reopened] + branches: + - main + +jobs: + generate_pdfs: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v3 + with: + fetch-depth: 0 # Fetch full history to compare branches + + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y pandoc texlive-latex-base texlive-latex-extra texlive-fonts-recommended latexdiff latexmk ghostscript + mkdir -p old_version new_version diffs pdfs + + - name: Fetch main branch + run: | + git fetch origin main + + - name: Generate rendered diffs and PDFs + run: | + # Get list of modified markdown files + MODIFIED_FILES=$(git diff --name-only origin/main...HEAD -- '*.md') + echo "Modified markdown files:" + echo "$MODIFIED_FILES" + mkdir -p pdfs + COMBINED_DIFFS="" + for FILE in $MODIFIED_FILES; do + echo "Processing $FILE" + + # Extract old version from main branch + git show origin/main:"$FILE" > old_version/"$(basename "$FILE")" + + # Copy new version + cp "$FILE" new_version/ + + # Convert both versions to LaTeX + pandoc old_version/"$(basename "$FILE")" -o old_version/"$(basename "$FILE" .md)".tex + pandoc new_version/"$(basename "$FILE")" -o new_version/"$(basename "$FILE" .md)".tex + + # Run latexdiff to get the diffed LaTeX file + latexdiff old_version/"$(basename "$FILE" .md)".tex new_version/"$(basename "$FILE" .md)".tex > diffs/"$(basename "$FILE" .md)"_diff.tex + + # Compile the diffed LaTeX file to PDF + cd diffs + latexmk -pdf -quiet "$(basename "$FILE" .md)"_diff.tex + cd .. + + # Move the generated PDF to the pdfs directory + mv diffs/"$(basename "$FILE" .md)"_diff.pdf pdfs/ + + # Append to combined diffs + COMBINED_DIFFS="$COMBINED_DIFFS \includepdf[pages=-]{pdfs/$(basename "$FILE" .md)_diff.pdf}" + done + + # Generate combined PDF if there are diffs + if [ -n "$COMBINED_DIFFS" ]; then + echo "\documentclass{article} + \usepackage{pdfpages} + \begin{document} + $COMBINED_DIFFS + \end{document}" > combined_diff.tex + latexmk -pdf -quiet combined_diff.tex + mv combined_diff.pdf pdfs/ + fi + + - name: Upload PDFs + uses: actions/upload-artifact@v3 + with: + name: rendered_diff_pdfs + path: pdfs/ + + - name: Comment on PR with download link + uses: actions/github-script@v6 + with: + script: | + const artifactUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${process.env.GITHUB_RUN_ID}`; + github.rest.issues.createComment({ + issue_number: context.issue.number, + owner: context.repo.owner, + repo: context.repo.repo, + body: `📄 Rendered diff PDFs have been generated and uploaded as artifacts for this workflow run. You can download them from the [Artifacts section of the workflow run](${artifactUrl}).` + }); From b41207c8b0c0c3d72c7e63b87bde1e103c8ba46e Mon Sep 17 00:00:00 2001 From: Adam Richie-Halford Date: Tue, 1 Oct 2024 06:07:31 -0700 Subject: [PATCH 9/9] Use xelatex instead --- .github/workflows/pdf-diff.yml | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/.github/workflows/pdf-diff.yml b/.github/workflows/pdf-diff.yml index afc8501..7e7fd5b 100644 --- a/.github/workflows/pdf-diff.yml +++ b/.github/workflows/pdf-diff.yml @@ -18,7 +18,7 @@ jobs: - name: Install dependencies run: | sudo apt-get update - sudo apt-get install -y pandoc texlive-latex-base texlive-latex-extra texlive-fonts-recommended latexdiff latexmk ghostscript + sudo apt-get install -y pandoc texlive-latex-base texlive-latex-extra texlive-fonts-recommended texlive-xetex texlive-fonts-extra latexdiff latexmk ghostscript mkdir -p old_version new_version diffs pdfs - name: Fetch main branch @@ -27,6 +27,12 @@ jobs: - name: Generate rendered diffs and PDFs run: | + # Create a LaTeX preamble with necessary packages + echo "\usepackage{fontspec} + \setmainfont{DejaVu Serif} + \setsansfont{DejaVu Sans} + \setmonofont{DejaVu Sans Mono}" > preamble.tex + # Get list of modified markdown files MODIFIED_FILES=$(git diff --name-only origin/main...HEAD -- '*.md') echo "Modified markdown files:" @@ -42,16 +48,16 @@ jobs: # Copy new version cp "$FILE" new_version/ - # Convert both versions to LaTeX - pandoc old_version/"$(basename "$FILE")" -o old_version/"$(basename "$FILE" .md)".tex - pandoc new_version/"$(basename "$FILE")" -o new_version/"$(basename "$FILE" .md)".tex + # Convert both versions to LaTeX with preamble + pandoc old_version/"$(basename "$FILE")" --include-in-header=preamble.tex -o old_version/"$(basename "$FILE" .md)".tex + pandoc new_version/"$(basename "$FILE")" --include-in-header=preamble.tex -o new_version/"$(basename "$FILE" .md)".tex - # Run latexdiff to get the diffed LaTeX file - latexdiff old_version/"$(basename "$FILE" .md)".tex new_version/"$(basename "$FILE" .md)".tex > diffs/"$(basename "$FILE" .md)"_diff.tex + # Run latexdiff to get the diffed LaTeX file, including preamble + latexdiff --encoding=utf8 --preamble "\input{preamble.tex}" old_version/"$(basename "$FILE" .md)".tex new_version/"$(basename "$FILE" .md)".tex > diffs/"$(basename "$FILE" .md)"_diff.tex - # Compile the diffed LaTeX file to PDF + # Compile the diffed LaTeX file to PDF using xelatex cd diffs - latexmk -pdf -quiet "$(basename "$FILE" .md)"_diff.tex + latexmk -pdf -xelatex -interaction=nonstopmode -quiet "$(basename "$FILE" .md)"_diff.tex cd .. # Move the generated PDF to the pdfs directory @@ -68,7 +74,7 @@ jobs: \begin{document} $COMBINED_DIFFS \end{document}" > combined_diff.tex - latexmk -pdf -quiet combined_diff.tex + latexmk -pdf -xelatex -interaction=nonstopmode -quiet combined_diff.tex mv combined_diff.pdf pdfs/ fi