Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add ViewText (distribution document) parser #79

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions .pnp.cjs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Binary file not shown.
Binary file not shown.
2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
"@rollup/plugin-commonjs": "^15.0.0",
"@rollup/plugin-node-resolve": "^9.0.0",
"@rollup/plugin-strip": "^2.0.0",
"@types/aes-js": "^3",
"@types/jest": "^26.0.10",
"@types/pako": "^1.0.1",
"@typescript-eslint/eslint-plugin": "^3.6.1",
Expand All @@ -63,6 +64,7 @@
"typescript": "^4.0.2"
},
"dependencies": {
"aes-js": "^3.1.2",
"cfb": "^1.2.0",
"pako": "^1.0.11"
}
Expand Down
19 changes: 14 additions & 5 deletions src/parser/parse.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import HWPVersion from '../models/version'
import Section from '../models/section'
import DocInfoParser from './DocInfoParser'
import SectionParser from './SectionParser'
import parseViewText from './parseViewText'
import ByteReader from '../utils/byteReader'
import { getBitValue } from '../utils/bitUtils'

Expand Down Expand Up @@ -119,17 +120,25 @@ function parseSection(container: CFB$Container, sectionNumber: number): Section
return new SectionParser(decodedContent).parse()
}

function parseBodyText(container: CFB$Container, docInfo: DocInfo): Section[] {
const sections: Section[] = []

for (let i = 0; i < docInfo.sectionSize; i += 1) {
sections.push(parseSection(container, i))
}

return sections
}

function parse(input: CFB$Blob, options?: CFB$ParsingOptions): HWPDocument {
const container: CFB$Container = read(input, options)

const header = parseFileHeader(container)
const docInfo = parseDocInfo(container, header)

const sections: Section[] = []

for (let i = 0; i < docInfo.sectionSize; i += 1) {
sections.push(parseSection(container, i))
}
const sections: Section[] = header.properties.distribution
? parseViewText(container)
: parseBodyText(container, docInfo)

return new HWPDocument(header, docInfo, sections)
}
Expand Down
78 changes: 78 additions & 0 deletions src/parser/parseViewText.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
// Reference 1 : https://cdn.hancom.com/link/docs/%ED%95%9C%EA%B8%80%EB%AC%B8%EC%84%9C%ED%8C%8C%EC%9D%BC%ED%98%95%EC%8B%9D_%EB%B0%B0%ED%8F%AC%EC%9A%A9%EB%AC%B8%EC%84%9C_revision1.2.hwp
// Reference 2 : https://groups.google.com/g/hwp-foss/c/d2KL2ypR89Q

import {
find,
CFB$Container,
CFB$Entry,
} from 'cfb'
import { inflate } from 'pako'
import * as aesjs from 'aes-js'

import Section from '../models/section'
import SectionParser from './SectionParser'
import ByteReader from '../utils/byteReader'

function createRand(seed = 1) {
let randomSeed = seed
return () => {
randomSeed = (randomSeed * 214013 + 2531011) & 0xFFFFFFFF
return (randomSeed >> 16) & 0x7FFF
}
}

function decrypt(cipherText: ArrayBuffer, decKey: ArrayBuffer) {
// eslint-disable-next-line new-cap
const aesEcb = new aesjs.ModeOfOperation.ecb(new Uint8Array(decKey))
const decryptedBytes = aesEcb.decrypt(new Uint8Array(cipherText))
return decryptedBytes
}

function getDecryptionKey(data: ArrayBuffer) : ArrayBuffer {
const sha1Encoded = new Uint8Array(data)
const sha1Decoded = new Uint8Array(sha1Encoded.length)
const seed = (new DataView(data.slice(0, 4))).getInt32(0, true)
const offset = 4 + (seed & 0xF)
const rand = createRand(seed)
for (let j = 0, n = 0, k = 0; j < 256; j += 1, n -= 1) {
if (n === 0) {
k = rand() & 0xFF
n = (rand() & 0xF) + 1
}
sha1Decoded[j] = sha1Encoded[j] ^ k
}
const sha1ucsstr = sha1Decoded.slice(offset, 80)
return sha1ucsstr.slice(0, 16)
}

function parseViewTextSection(entry: CFB$Entry): Section {
const content = new Uint8Array(entry.content)
const reader = new ByteReader(content.buffer)
const [, , size] = reader.readRecord()
const distDocData = reader.read(size)
const encryptedData = reader.read(reader.remainByte())
const decKey = getDecryptionKey(distDocData)
const decrypted = decrypt(encryptedData, decKey)
const decodedContent: Uint8Array = inflate(decrypted, { windowBits: -15 })
const section: Section = new SectionParser(decodedContent).parse()
return section
}

function parseViewText(container: CFB$Container): Section[] {
const view = find(container, 'Root Entry/ViewText/')
const viewPaths = container.FullPaths.filter((e: string) => e.startsWith('Root Entry/ViewText/Section'))
const sections: Section[] = []

if (view && viewPaths.length > 0) {
for (let i = 0; i < viewPaths.length; i += 1) {
const entry = find(container, `Root Entry/ViewText/Section${i}`)
if (entry != null) {
sections.push(parseViewTextSection(entry))
}
}
}

return sections
}

export default parseViewText
16 changes: 16 additions & 0 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -3393,6 +3393,13 @@ __metadata:
languageName: node
linkType: hard

"@types/aes-js@npm:^3":
version: 3.1.1
resolution: "@types/aes-js@npm:3.1.1"
checksum: 388d21adbaa70548f9e94947430f53b54d13f99408fc6461227c2529b9dbaa5397a135e8bb4584206c65a3ef837b868b34602816dc7c20f1537ea9ef23a74bd9
languageName: node
linkType: hard

"@types/babel__core@npm:^7.0.0, @types/babel__core@npm:^7.1.7":
version: 7.1.9
resolution: "@types/babel__core@npm:7.1.9"
Expand Down Expand Up @@ -4324,6 +4331,13 @@ __metadata:
languageName: node
linkType: hard

"aes-js@npm:^3.1.2":
version: 3.1.2
resolution: "aes-js@npm:3.1.2"
checksum: 062154d50b1e433cc8c3b8ca7879f3a6375d5e79c2a507b2b6c4ec920b4cd851bf2afa7f65c98761a9da89c0ab618cbe6529e8e9a1c71f93290b53128fb8f712
languageName: node
linkType: hard

"agent-base@npm:6, agent-base@npm:^6.0.2":
version: 6.0.2
resolution: "agent-base@npm:6.0.2"
Expand Down Expand Up @@ -10935,10 +10949,12 @@ fsevents@~2.3.2:
"@rollup/plugin-commonjs": ^15.0.0
"@rollup/plugin-node-resolve": ^9.0.0
"@rollup/plugin-strip": ^2.0.0
"@types/aes-js": ^3
"@types/jest": ^26.0.10
"@types/pako": ^1.0.1
"@typescript-eslint/eslint-plugin": ^3.6.1
"@typescript-eslint/parser": ^3.9.1
aes-js: ^3.1.2
babel-jest: ^26.3.0
cfb: ^1.2.0
eslint: ^7.7.0
Expand Down