Skip to content

Commit

Permalink
refactor(assembler): tokenizer and parser
Browse files Browse the repository at this point in the history
  • Loading branch information
exuanbo committed Nov 28, 2023
1 parent d411b6c commit 9e0628f
Show file tree
Hide file tree
Showing 6 changed files with 148 additions and 106 deletions.
5 changes: 1 addition & 4 deletions __tests__/features/assembler/parser.test.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
import { tokenize } from '@/features/assembler/core/tokenizer'
import { Statement, parse as __parse } from '@/features/assembler/core/parser'
import { parse } from '@/features/assembler/core/parser'
import { shortArraySerializer } from '../../snapshotSerializers'

expect.addSnapshotSerializer(shortArraySerializer)

const parse = (input: string): Statement[] => __parse(tokenize(input))

describe('parser', () => {
it('should parse instruction with no operand', () => {
expect(
Expand Down
11 changes: 10 additions & 1 deletion __tests__/features/assembler/tokenizer.test.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
import { tokenize } from '@/features/assembler/core/tokenizer'
import { Token, createTokenizer } from '@/features/assembler/core/tokenizer'
import { shortArraySerializer } from '../../snapshotSerializers'

expect.addSnapshotSerializer(shortArraySerializer)

const tokenize = (source: string): Token[] => {
const tokenizer = createTokenizer(source)
const tokens: Token[] = []
while (tokenizer.hasCurrent) {
tokens.push(tokenizer.consume())
}
return tokens
}

describe('tokenizer', () => {
it('should skip whitespace', () => {
expect(tokenize(' \t\ndone: \t\nend')).toMatchSnapshot()
Expand Down
10 changes: 10 additions & 0 deletions src/features/assembler/core/exceptions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,16 @@ export abstract class AssemblerError extends Error {
}
}

class TokenizeError extends AssemblerError {
public name = 'TokenizeError'
}

export class EndOfTokenStreamError extends TokenizeError {
constructor() {
super('Unexpected end of token stream.')
}
}

class ParseError extends AssemblerError {
public name = 'ParseError'

Expand Down
3 changes: 1 addition & 2 deletions src/features/assembler/core/index.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import { tokenize } from './tokenizer'
import { OperandType, Operand, Statement, parse } from './parser'
import {
DuplicateLabelError,
Expand Down Expand Up @@ -55,7 +54,7 @@ export interface AddressToStatementMap {
export type AssembleResult = [AddressToMachineCodeMap, Partial<AddressToStatementMap>]

export const assemble = (input: string): AssembleResult => {
const statements = parse(tokenize(input))
const statements = parse(input)
const labelToAddressMap = getLabelToAddressMap(statements)
const addressToMachineCodeMap: AddressToMachineCodeMap = {}
const addressToStatementMap: AddressToStatementMap = {}
Expand Down
97 changes: 37 additions & 60 deletions src/features/assembler/core/parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ import type {
MnemonicWithOneOperand,
MnemonicWithTwoOperands
} from './types'
import { TokenType, Token } from './tokenizer'
import { TokenType, Token, Tokenizer, createTokenizer } from './tokenizer'
import {
AssemblerError,
EndOfTokenStreamError,
InvalidLabelError,
StatementError,
MissingEndError,
Expand Down Expand Up @@ -142,11 +142,15 @@ const validateLabel = (token: Token): Token => {
return token
}

const parseLabel = (tokens: Token[], index: number): Label | null => {
if (tokens[index + 1]?.type !== TokenType.Colon) {
const parseLabel = (tokenizer: Tokenizer): Label | null => {
const nextToken = tokenizer.peekNext()
if (nextToken?.type !== TokenType.Colon) {
return null
}
return createLabel(validateLabel(tokens[index]))
const token = tokenizer.consume()
const label = createLabel(validateLabel(token))
tokenizer.advance()
return label
}

const validateNumber = (token: Token): Token => {
Expand All @@ -171,13 +175,9 @@ const NUMBER_REGEXP = /^[\dA-F]+$/
const REGISTER_REGEXP = /^[A-D]L$/

const parseSingleOperand =
(tokens: Token[], index: number) =>
(tokenizer: Tokenizer) =>
<T extends OperandType>(...expectedTypes: T[]): Operand<T> => {
if (index >= tokens.length) {
throw new MissingEndError()
}
const token = tokens[index]

const token = tokenizer.consume()
let t: OperandType

const isExpectedType = (type: OperandType): type is T =>
Expand Down Expand Up @@ -236,68 +236,38 @@ const parseSingleOperand =
throw new OperandTypeError(token, ...expectedTypes)
}

const checkComma = (tokens: Token[], index: number): AssemblerError | null => {
if (index >= tokens.length) {
return new MissingEndError()
}
const token = tokens[index]
if (token.type !== TokenType.Comma) {
return new MissingCommaError(token)
}
return null
}

const parseDoubleOperands =
(tokens: Token[], index: number) =>
(tokenizer: Tokenizer) =>
<T1 extends OperandType, T2 extends OperandType>(
...expectedTypePairs: Array<[firstOperandType: T1, secondOperandType: T2]>
): [firstOperand: Operand<T1>, secondOperand: Operand<T2>] => {
const parseOperand = parseSingleOperand(tokenizer)
const possibleFirstOperandTypes: T1[] = []
expectedTypePairs.forEach(([firstOperandType]) => {
if (!possibleFirstOperandTypes.includes(firstOperandType)) {
possibleFirstOperandTypes.push(firstOperandType)
}
})
const firstOperand = parseSingleOperand(tokens, index)(...possibleFirstOperandTypes)
const error = checkComma(tokens, index + 1)
if (error !== null) {
throw error
}
const firstOperand = parseOperand(...possibleFirstOperandTypes)
tokenizer.match(TokenType.Comma, token => new MissingCommaError(token))
const possibleSecondOperandTypes: T2[] = []
expectedTypePairs.forEach(([firstOperandType, secondOperandType]) => {
if (firstOperandType === firstOperand.type) {
possibleSecondOperandTypes.push(secondOperandType)
}
})
const secondOperand = parseSingleOperand(tokens, index + 2)(...possibleSecondOperandTypes)
const secondOperand = parseOperand(...possibleSecondOperandTypes)
return [firstOperand, secondOperand]
}

const parseStatement = (
tokens: Token[],
__index: number
): [statement: Statement, consumed: number] => {
const getIndex = (): number => __index + consumedTokenCount

let consumedTokenCount = 0
const consumeToken = (count: number): void => {
consumedTokenCount += count
}

const label = parseLabel(tokens, getIndex())
const parseStatement = (tokenizer: Tokenizer): Statement => {
const label = parseLabel(tokenizer)
const hasLabel = label !== null
if (hasLabel) {
consumeToken(2) // Label + Colon
}

const token = tokens[getIndex()]
if (token === undefined) {
throw new MissingEndError()
}
const token = tokenizer.consume()
if (token.type !== TokenType.Unknown || !(token.value in Mnemonic)) {
throw new StatementError(token, hasLabel)
}
consumeToken(1) // instruction

const instruction = createInstruction(token)
const setOpcode = (opcode: Opcode | null): void => {
Expand All @@ -319,7 +289,7 @@ const parseStatement = (
}
case 1: {
let opcode, operand
const parseOperand = parseSingleOperand(tokens, getIndex())
const parseOperand = parseSingleOperand(tokenizer)

switch (mnemonic as MnemonicWithOneOperand) {
case Mnemonic.INC:
Expand Down Expand Up @@ -414,12 +384,11 @@ const parseStatement = (

setOpcode(opcode)
setOperands(operand)
consumeToken(1) // Operand
break
}
case 2: {
let opcode, firstOperand, secondOperand
const parseOperands = parseDoubleOperands(tokens, getIndex())
const parseOperands = parseDoubleOperands(tokenizer)

switch (mnemonic as MnemonicWithTwoOperands) {
case Mnemonic.ADD:
Expand Down Expand Up @@ -588,21 +557,29 @@ const parseStatement = (

setOpcode(opcode)
setOperands(firstOperand, secondOperand)
consumeToken(3) // Operand + Comma + Operand
break
}
}

const statement = createStatement(label, instruction, operands)
return [statement, consumedTokenCount]
return createStatement(label, instruction, operands)
}

export const parse = (tokens: Token[]): Statement[] => {
export const parse = (source: string): Statement[] => {
const tokenizer = createTokenizer(source)
const statements: Statement[] = []
for (let index = 0; index < tokens.length; ) {
const [statement, consumed] = parseStatement(tokens, index)
statements.push(statement)
index += consumed
while (tokenizer.hasCurrent) {
try {
const statement = parseStatement(tokenizer)
statements.push(statement)
if (statement.instruction.mnemonic === Mnemonic.END) {
break
}
} catch (error) {
if (error instanceof EndOfTokenStreamError) {
throw new MissingEndError()
}
throw error
}
}
if (
statements.length > 0 &&
Expand Down
Loading

0 comments on commit 9e0628f

Please sign in to comment.