Skip to content

Commit

Permalink
Merge pull request #14 from kmlbgn/bugfix/notion-links
Browse files Browse the repository at this point in the history
Bugfix/notion links
  • Loading branch information
kmlbgn authored Dec 19, 2023
2 parents 00e43f4 + 932fef5 commit 6e76350
Show file tree
Hide file tree
Showing 8 changed files with 110 additions and 48 deletions.
4 changes: 2 additions & 2 deletions docu-notion.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ const dummyMarkdownModifier: IPlugin = {
const config: IDocuNotionConfig = {
plugins: [
// here we're adding a plugin that needs a parameter for customization
dummyBlockModifier("foobar"),
// dummyBlockModifier("foobar"),
// here's we're adding a plugin that doesn't take any customization
dummyMarkdownModifier,
// dummyMarkdownModifier,
],
};

Expand Down
82 changes: 68 additions & 14 deletions src/NotionPage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import { GetPageResponse } from "@notionhq/client/build/src/api-endpoints";
import { parseLinkId } from "./plugins/internalLinks";
import { ListBlockChildrenResponseResults } from "notion-to-md/build/types";
import { error, warning, verbose } from "./log";

// Notion has 2 kinds of pages: a normal one which is just content, and what I'm calling a "database page", which has whatever properties you put on it.
// docu-notion supports the later via links from outline pages. That is, you put the database pages in a database, then separately, in the outline, you
Expand Down Expand Up @@ -44,7 +45,7 @@ export class NotionPage {
const { baseLinkId } = parseLinkId(id);

const match =
baseLinkId === this.pageId || // from a link_to_page.pageId, which still has the dashes
baseLinkId === this.pageId || // from a mention.pageId, which still has the dashes
baseLinkId === this.pageId.replaceAll("-", ""); // from inline links, which are lacking the dashes

// logDebug(
Expand Down Expand Up @@ -249,28 +250,81 @@ export class NotionPage {
}
}

// @@@ docu-notion original
///
// public async getContentInfo(
// children: ListBlockChildrenResponseResults
// ): Promise<{
// childPageIdsAndOrder: { id: string; order: number }[];
// linksPageIdsAndOrder: { id: string; order: number }[];
// hasParagraphs: boolean;
// }> {
// for (let i = 0; i < children.length; i++) {
// (children[i] as any).order = i;
// if ((children[i] as any).type === "mention") {
// }
// }
// return {
// childPageIdsAndOrder: children
// .filter((b: any) => b.type === "child_page")
// .map((b: any) => ({ id: b.id, order: b.order })),
// linksPageIdsAndOrder: children
// .filter((b: any) => b.type === "mention")
// .map((b: any) => ({ id: b.mention.page.id, order: b.order })),
// hasParagraphs: children.some(
// b =>
// (b as any).type === "paragraph" &&
// (b as any).paragraph.rich_text.length > 0
// ),
// };
// }

public async getContentInfo(
children: ListBlockChildrenResponseResults
): Promise<{
childPageIdsAndOrder: { id: string; order: number }[];
linksPageIdsAndOrder: { id: string; order: number }[];
hasParagraphs: boolean;
}> {
const childPageIdsAndOrder: { id: string; order: number }[] = [];
const linksPageIdsAndOrder: { id: string; order: number }[] = [];
let hasParagraphs = false;

for (let i = 0; i < children.length; i++) {
(children[i] as any).order = i;
const block = children[i] as any;
block.order = i;

// Check if the block is a paragraph with rich_text
if (block.type === "paragraph" && block.paragraph.rich_text) {

// Initially set hasParagraphs to true if rich_text is not empty
hasParagraphs = hasParagraphs || block.paragraph.rich_text.length > 0;

// Filter to find mentions of type 'page'
const pageMentions = block.paragraph.rich_text.filter((element: any) => element.type === "mention" && element.mention?.type === "page");

// Filter to find empty text nodes
const emptyTextNodes = block.paragraph.rich_text.filter((element: any) => element.type === "text" && element.text.content.trim() === "");

// Check if there is exactly one mention and one empty text node: if so it's a link to page and should not trigger a level.
// If not it's a link to page within a text and should not trigger a level.
if (pageMentions.length === 1 && emptyTextNodes.length === 1) {
linksPageIdsAndOrder.push({ id: pageMentions[0].mention.page.id, order: i });
hasParagraphs = false;
verbose(`Found a link to page with page ID: ${pageMentions[0].mention.page.id}`);
}
}

// Add child pages to the childPageIdsAndOrder array
if (block.type === "child_page") {
childPageIdsAndOrder.push({ id: block.id, order: block.order });
}
}

return {
childPageIdsAndOrder: children
.filter((b: any) => b.type === "child_page")
.map((b: any) => ({ id: b.id, order: b.order })),
linksPageIdsAndOrder: children
.filter((b: any) => b.type === "link_to_page")
.map((b: any) => ({ id: b.link_to_page.page_id, order: b.order })),
hasParagraphs: children.some(
b =>
(b as any).type === "paragraph" &&
(b as any).paragraph.rich_text.length > 0
),
childPageIdsAndOrder,
linksPageIdsAndOrder,
hasParagraphs,
};
}
}
}
1 change: 1 addition & 0 deletions src/plugins/internalLinks.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,7 @@ test("raw link to an existing page on this site that has a slug", async () => {
last_edited_time: "2023-06-14T20:09:00.000Z",
has_children: false,
archived: false,
// TODO: mention has replaced link_to_page
type: "link_to_page",
link_to_page: {
type: "page_id",
Expand Down
8 changes: 4 additions & 4 deletions src/plugins/internalLinks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,9 @@ function convertInternalLink(
function convertLinkLabel(targetPage: NotionPage, text: string): string {
// In Notion, if you just add a link to a page without linking it to any text, then in Notion
// you see the name of the page as the text of the link. But when Notion gives us that same
// link, it uses "link_to_page" as the text. So we have to look up the name of the page in
// link, it uses "mention" as the text. So we have to look up the name of the page in
// order to fix that.;
if (text !== "link_to_page") return text;
if (text !== "mention") return text;
else return targetPage.nameOrTitle;
}
function convertLinkHref(
Expand Down Expand Up @@ -123,9 +123,9 @@ export const standardInternalLinkConversion: IPlugin = {
linkModifier: {
// from notion (or notion-md?) we get slightly different hrefs depending on whether the links is "inline"
// (has some other text that's been turned into a link) or "raw".
// Raw links come in without a leading slash, e.g. [link_to_page](4a6de8c0-b90b-444b-8a7b-d534d6ec71a4)
// Raw links come in without a leading slash, e.g. [mention](4a6de8c0-b90b-444b-8a7b-d534d6ec71a4)
// Inline links come in with a leading slash, e.g. [pointer to the introduction](/4a6de8c0b90b444b8a7bd534d6ec71a4)
match: /\[([^\]]+)?\]\((?!mailto:)(\/?[^),^/]+)\)/,
convert: convertInternalLink,
},
};
};
1 change: 0 additions & 1 deletion src/plugins/pluginTestRun.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ export async function blocksToMarkdown(
output_normally: 0,
skipped_because_empty: 0,
skipped_because_status: 0,
skipped_because_level_cannot_have_content: 0,
},
// enhance: this needs more thinking, how we want to do logging in tests
// one thing is to avoid a situation where we break people's tests that
Expand Down
28 changes: 10 additions & 18 deletions src/pull.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ const counts = {
output_normally: 0,
skipped_because_empty: 0,
skipped_because_status: 0,
skipped_because_level_cannot_have_content: 0,
};

export async function notionPull(options: DocuNotionOptions): Promise<void> {
Expand Down Expand Up @@ -191,18 +190,17 @@ async function getPagesRecursively(

const r = await getBlockChildren(pageInTheOutline.pageId);
const pageInfo = await pageInTheOutline.getContentInfo(r);
verbose(`Links Page ids :${pageInfo.linksPageIdsAndOrder.length}`);
verbose(`Childs Page ids :${pageInfo.childPageIdsAndOrder.length}`);
verbose(`Has paragraphs ? > ${pageInfo.hasParagraphs}`);


if (
!rootLevel &&
pageInfo.hasParagraphs &&
pageInfo.childPageIdsAndOrder.length
) {
(pageInfo.childPageIdsAndOrder.length > 0 || pageInfo.linksPageIdsAndOrder.length > 0)
){
warning(`Note: The page "${pageInTheOutline.nameOrTitle}" contains both childrens and content so it should produce a level with an index page`);
// error(
// `Skipping "${pageInTheOutline.nameOrTitle}" and its children. docu-notion does not support pages that are both levels and have content at the same time.`
// );
// ++counts.skipped_because_level_cannot_have_content;
// return;

// set IsCategory flag
(pageInTheOutline.metadata as any).parent.IsCategory = true;
Expand Down Expand Up @@ -241,24 +239,18 @@ async function getPagesRecursively(
}
}

// Simple content page are being pushed
if (!rootLevel && pageInfo.hasParagraphs) {
warning(`Note: The page "${pageInTheOutline.nameOrTitle}" is a simple content page.`);
pages.push(pageInTheOutline);

// The best practice is to keep content pages in the "database" (e.g. kanban board), but we do allow people to make pages in the outline directly.
// So how can we tell the difference between a page that is supposed to be content and one that is meant to form the sidebar? If it
// has only links, then it's a page for forming the sidebar. If it has contents and no links, then it's a content page. But what if
// it has both? Well then we assume it's a content page.
if (pageInfo.linksPageIdsAndOrder?.length) {
warning(
`Note: The page "${pageInTheOutline.nameOrTitle}" is in the outline, has content and also points at other pages but doesn't have childrens. It will be treated as a simple content page. This is no problem, unless you intended to have all your content pages in the database (kanban workflow) section.`
);
}
}

// a normal outline page that exists just to create the level, pointing at database pages that belong in this level
else if (
pageInfo.childPageIdsAndOrder.length ||
pageInfo.linksPageIdsAndOrder.length
) {
warning(`Note: The page "${pageInTheOutline.nameOrTitle}" only has child pages or links to page; it's a level without index.`);
let layoutContext = incomingContext;
// don't make a level for "Outline" page at the root
if (!rootLevel && pageInTheOutline.nameOrTitle !== "Outline") {
Expand Down
33 changes: 25 additions & 8 deletions src/transform.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,31 @@ export async function getMarkdownFromNotionBlocks(
config: IDocuNotionConfig,
blocks: Array<NotionBlock>
): Promise<string> {
// Filter out child page blocks before converting to markdown because there is no case where we want the content of an actual child page to be appended to the index.md of it's category level index.md
const filteredBlocks = blocks.filter(block => block.type !== 'child_page');

const containsColumnList = filteredBlocks.some(block => block.type === 'column_list');
verbose(
`Column_list type detected? >> "${containsColumnList}" `
);

// Level page index.md content filter : Keep the block if it is not a child page or only contains a mention (is a link to page)
// Note: this will filters EVERY page. We assume child_page and mention block to be used only for the purpose of creating a new page.
// If you want to use links to other pages, you'll have to put a bit of text in the block.
const filteredBlocks = blocks.filter((block: any) => {
// Filter out 'child_page' type blocks
if (block.type === 'child_page') {
return false;
}

// For paragraph blocks, check if they consist of a mention and an empty text node
if (block.type === 'paragraph' && block.paragraph.rich_text.length === 2) {
const [element1, element2] = block.paragraph.rich_text;

// Check for one mention of type 'page' and one empty text node
const isPageMention = (element: any) => element.type === 'mention' && element.mention?.type === 'page';
const isEmptyTextNode = (element: any) => element.type === 'text' && element.text?.content.trim() === '';

if ((isPageMention(element1) && isEmptyTextNode(element2)) || (isPageMention(element2) && isEmptyTextNode(element1))) {
// Filter out this block
return false;
}
}
return true;
});

// changes to the blocks we get from notion API
doNotionBlockTransforms(filteredBlocks, config);
Expand Down Expand Up @@ -191,7 +208,7 @@ async function doNotionToMarkdown(
// corrections to links after they are converted to markdown
// Note: from notion (or notion-md?) we get slightly different hrefs depending on whether the links is "inline"
// (has some other text that's been turned into a link) or "raw".
// Raw links come in without a leading slash, e.g. [link_to_page](4a6de8c0-b90b-444b-8a7b-d534d6ec71a4)
// Raw links come in without a leading slash, e.g. [mention](4a6de8c0-b90b-444b-8a7b-d534d6ec71a4)
// Inline links come in with a leading slash, e.g. [pointer to the introduction](/4a6de8c0b90b444b8a7bd534d6ec71a4)
function doLinkFixes(
context: IDocuNotionContext,
Expand Down
1 change: 0 additions & 1 deletion src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,4 @@ export type ICounts = {
output_normally: number;
skipped_because_empty: number;
skipped_because_status: number;
skipped_because_level_cannot_have_content: number;
};

0 comments on commit 6e76350

Please sign in to comment.