Skip to content

experimental compiler of minimalistic language to nasm x86_64 for linux

Notifications You must be signed in to change notification settings

calint/compiler-2

Repository files navigation

compiler-2: baz

experimental compiler of minimalistic language to nasm x86_64 for linux

intention:

  • minimalistic language
  • gain experience writing compilers
  • generate handwritten-like assembler compiled by nasm for x86_64
  • super loop program with non-reentrant inlined functions
  • support re-entrant non-inlined functions

supports:

  • built-in integer types (64, 32, 16, 8 bit)
  • built-in boolean type
  • user defined types
  • inlined functions
  • keywords: inline, func, field, var, loop, if, else, continue, break

howto:

  • to compile the compiler that compiles 'prog.baz' and assembles the generated code run 'make.sh'
  • after that use './run-baz.sh myprogram.baz' or './run-baz.sh' to compile and run 'prog.baz'
  • to run the tests 'qa/coverage/run-tests.sh' and see coverage report in 'qa/coverage/report/'

sample code:

field   hello = "hello world from baz\n"
field   input = "............................................................"
field prompt1 = "enter name:\n"
field prompt2 = "that is not a name.\n"
field prompt3 = "hello "
field     dot = "."
field      nl = "\n"

func inline exit(v : reg_rdi) {
    mov(rax, 60)  # exit system call
    mov(rdi, v)   # return code
    syscall()
}

func inline print(len : reg_rdx, ptr : reg_rsi) {
    mov(rax, 1)   # write system call
    mov(rdi, 1)   # file descriptor for standard out
    mov(rsi, ptr) # buffer address 
    mov(rdx, len) # buffer size
    syscall()
}

func inline read(len : reg_rdx, ptr : reg_rsi) : i64 nbytes {
    mov(rax, 0)   # read system call
    mov(rdi, 0)   # file descriptor for standard input
    mov(rsi, ptr) # buffer address
    mov(rdx, len) # buffer size
    syscall()
    mov(nbytes, rax) # return value
}

func assert(expr : bool) {
    if not expr exit(1)
}

type point {x, y}

type object {pos : point, color : i32}

func inline foo(p : point) {
    p.x = 0b10    # binary value 2
    p.y = 0xb     # hex value 11
}

func inline main() {
    var p : point = {0, 0}
    foo(p)
    # inlined functions can write to the arguments
    assert(p.x == 2)
    assert(p.y == 0xb)

    var x = 1
    var y = 2

    var o1 : object = {{x * 10, y}, 0xff0000}
    assert(o1.pos.x == 10)
    assert(o1.pos.y == 2)
    assert(o1.color == 0xff0000)

    var p1 : point = {-x, -y}
    o1.pos = p1
    assert(o1.pos.x == -1)
    assert(o1.pos.y == -2)

    var o2 : object = o1
    assert(o2.pos.x == -1)
    assert(o2.pos.y == -2)
    assert(o2.color == 0xff0000)

    print(hello.len, hello)
    loop {
        print(prompt1.len, prompt1)
        var len = read(input.len, input) - 1    # -1 don't include the '\n'
        if len == 0 {
            break
        }
        if len <= 4 {
            print(prompt2.len, prompt2)
            continue
        }
        print(prompt3.len, prompt3)
        print(len, input)
        print(dot.len, dot)
        print(nl.len, nl)
    }
}

generates:

; generated by baz

true equ 1
false equ 0

section .data
align 4
;[1:1] field hello = "hello world from baz\n" 
hello: db 'hello world from baz',10,''
hello.len equ $-hello
;[2:1] field input = "............................................................" 
input: db '............................................................'
input.len equ $-input
;[3:1] field prompt1 = "enter name:\n" 
prompt1: db 'enter name:',10,''
prompt1.len equ $-prompt1
;[4:1] field prompt2 = "that is not a name.\n" 
prompt2: db 'that is not a name.',10,''
prompt2.len equ $-prompt2
;[5:1] field prompt3 = "hello " 
prompt3: db 'hello '
prompt3.len equ $-prompt3
;[6:1] field dot = "." 
dot: db '.'
dot.len equ $-dot
;[7:1] field nl = "\n" 
nl: db '',10,''
nl.len equ $-nl

section .bss
align 4
stk resd 1024
stk.end:

section .text
align 4
bits 64
global _start
_start:
mov rsp,stk.end
mov rbp,rsp
jmp main

assert:
;  assert(expr : bool) 
   push rbp
   mov rbp, rsp
;  expr: byte[rbp+16]
   if_33_8:
;  [33:8] ? not expr 
;  [33:8] ? not expr 
   cmp_33_8:
   cmp byte[rbp+16], 0
   jne if_33_5_end
   if_33_8_code:  ; opt1
;    [33:17] exit(1) 
;    exit(v : reg_rdi) 
;      inline: 33_17
;      alloc rdi
;      alias v -> rdi
       mov rdi, 1
;      [10:5] mov(rax, 60) 
       mov rax, 60
;      [10:19] # exit system call 
;      [11:5] mov(rdi, v) 
;      [11:19] # return code 
;      [12:5] syscall() 
       syscall
;      free rdi
     exit_33_17_end:
   if_33_5_end:
   pop rbp
   ret

main:
;  p: qword[rbp-16]
;  [46:5] var p : point = {0, 0}
;  [46:9] p : point = {0, 0}
;  [46:21] {0, 0}
;    [46:21] {0, 0}
;    [46:22] 0
;    [46:22] 0
;    [46:22] p.x = 0
     mov qword[rbp-16], 0
;    [46:25] 0
;    [46:25] 0
;    [46:25] p.y = 0
     mov qword[rbp-8], 0
;  [47:5] foo(p) 
;  foo(p : point) 
;    inline: 47_5
;    alias p -> p
;    [41:5] p.x = 0b10 
;    [41:11] 0b10 
;    [41:11] 0b10 
;    [41:11] p.x = 0b10 
     mov qword[rbp-16], 0b10
;    [41:19] # binary value 2 
;    [42:5] p.y = 0xb 
;    [42:11] 0xb 
;    [42:11] 0xb 
;    [42:11] p.y = 0xb 
     mov qword[rbp-8], 0xb
;    [42:19] # hex value 11 
   foo_47_5_end:
;  [48:5] # inlined functions can write to the arguments 
;  [49:5] assert(p.x == 2) 
   sub rsp, 16
;  alloc r15
;    [49:12] p.x == 2
;    [49:12] ? p.x == 2
;    [49:12] ? p.x == 2
     cmp_49_12:
     cmp qword[rbp-16], 2
     jne bool_false_49_12
     bool_true_49_12:  ; opt1
     mov r15, true
     jmp bool_end_49_12
     bool_false_49_12:
     mov r15, false
     bool_end_49_12:
   push r15
;  free r15
   call assert
   add rsp, 24
;  [50:5] assert(p.y == 0xb) 
   sub rsp, 16
;  alloc r15
;    [50:12] p.y == 0xb
;    [50:12] ? p.y == 0xb
;    [50:12] ? p.y == 0xb
     cmp_50_12:
     cmp qword[rbp-8], 0xb
     jne bool_false_50_12
     bool_true_50_12:  ; opt1
     mov r15, true
     jmp bool_end_50_12
     bool_false_50_12:
     mov r15, false
     bool_end_50_12:
   push r15
;  free r15
   call assert
   add rsp, 24
;  x: qword[rbp-24]
;  [52:5] var x = 1 
;  [52:9] x = 1 
;  [52:13] 1 
;  [52:13] 1 
;  [52:13] x = 1 
   mov qword[rbp-24], 1
;  y: qword[rbp-32]
;  [53:5] var y = 2 
;  [53:9] y = 2 
;  [53:13] 2 
;  [53:13] 2 
;  [53:13] y = 2 
   mov qword[rbp-32], 2
;  o1: qword[rbp-52]
;  [55:5] var o1 : object = {{x * 10, y}, 0xff0000}
;  [55:9] o1 : object = {{x * 10, y}, 0xff0000}
;  [55:23] {{x * 10, y}, 0xff0000}
;    [55:23] {{x * 10, y}, 0xff0000}
;      [55:24] {x * 10, y}
;      [55:25] x * 10
;      alloc r15
;      [55:25] x * 10
;      [55:25] r15 = x 
       mov r15, qword[rbp-24]
;      [55:29] r15 * 10
       imul r15, 10
       mov qword[rbp-52], r15
;      free r15
;      [55:33] y
;      [55:33] y
;      [55:33] o1.pos.y = y
;      alloc r15
       mov r15, qword[rbp-32]
       mov qword[rbp-44], r15
;      free r15
;    [55:37] 0xff0000
;    [55:37] 0xff0000
;    [55:37] o1.color = 0xff0000
     mov dword[rbp-36], 0xff0000
;  [56:5] assert(o1.pos.x == 10) 
   sub rsp, 52
;  alloc r15
;    [56:12] o1.pos.x == 10
;    [56:12] ? o1.pos.x == 10
;    [56:12] ? o1.pos.x == 10
     cmp_56_12:
     cmp qword[rbp-52], 10
     jne bool_false_56_12
     bool_true_56_12:  ; opt1
     mov r15, true
     jmp bool_end_56_12
     bool_false_56_12:
     mov r15, false
     bool_end_56_12:
   push r15
;  free r15
   call assert
   add rsp, 60
;  [57:5] assert(o1.pos.y == 2) 
   sub rsp, 52
;  alloc r15
;    [57:12] o1.pos.y == 2
;    [57:12] ? o1.pos.y == 2
;    [57:12] ? o1.pos.y == 2
     cmp_57_12:
     cmp qword[rbp-44], 2
     jne bool_false_57_12
     bool_true_57_12:  ; opt1
     mov r15, true
     jmp bool_end_57_12
     bool_false_57_12:
     mov r15, false
     bool_end_57_12:
   push r15
;  free r15
   call assert
   add rsp, 60
;  [58:5] assert(o1.color == 0xff0000) 
   sub rsp, 52
;  alloc r15
;    [58:12] o1.color == 0xff0000
;    [58:12] ? o1.color == 0xff0000
;    [58:12] ? o1.color == 0xff0000
     cmp_58_12:
     cmp dword[rbp-36], 0xff0000
     jne bool_false_58_12
     bool_true_58_12:  ; opt1
     mov r15, true
     jmp bool_end_58_12
     bool_false_58_12:
     mov r15, false
     bool_end_58_12:
   push r15
;  free r15
   call assert
   add rsp, 60
;  p1: qword[rbp-68]
;  [60:5] var p1 : point = {-x, -y}
;  [60:9] p1 : point = {-x, -y}
;  [60:22] {-x, -y}
;    [60:22] {-x, -y}
;    [60:23] -x
;    [60:23] -x
;    [60:24] p1.x = -x
;    alloc r15
     mov r15, qword[rbp-24]
     mov qword[rbp-68], r15
;    free r15
     neg qword[rbp-68]
;    [60:27] -y
;    [60:27] -y
;    [60:28] p1.y = -y
;    alloc r15
     mov r15, qword[rbp-32]
     mov qword[rbp-60], r15
;    free r15
     neg qword[rbp-60]
;  [61:5] o1.pos = p1 
;  [61:14] p1 
;    [61:14] p1 
;    alloc r15
     mov r15, qword[rbp-68]
     mov qword[rbp-52], r15
;    free r15
;    alloc r15
     mov r15, qword[rbp-60]
     mov qword[rbp-44], r15
;    free r15
;  [62:5] assert(o1.pos.x == -1) 
   sub rsp, 68
;  alloc r15
;    [62:12] o1.pos.x == -1
;    [62:12] ? o1.pos.x == -1
;    [62:12] ? o1.pos.x == -1
     cmp_62_12:
     cmp qword[rbp-52], -1
     jne bool_false_62_12
     bool_true_62_12:  ; opt1
     mov r15, true
     jmp bool_end_62_12
     bool_false_62_12:
     mov r15, false
     bool_end_62_12:
   push r15
;  free r15
   call assert
   add rsp, 76
;  [63:5] assert(o1.pos.y == -2) 
   sub rsp, 68
;  alloc r15
;    [63:12] o1.pos.y == -2
;    [63:12] ? o1.pos.y == -2
;    [63:12] ? o1.pos.y == -2
     cmp_63_12:
     cmp qword[rbp-44], -2
     jne bool_false_63_12
     bool_true_63_12:  ; opt1
     mov r15, true
     jmp bool_end_63_12
     bool_false_63_12:
     mov r15, false
     bool_end_63_12:
   push r15
;  free r15
   call assert
   add rsp, 76
;  o2: qword[rbp-88]
;  [65:5] var o2 : object = o1 
;  [65:9] o2 : object = o1 
;  [65:23] o1 
;    [65:23] o1 
;      [65:23] o1 
;      alloc r15
       mov r15, qword[rbp-52]
       mov qword[rbp-88], r15
;      free r15
;      alloc r15
       mov r15, qword[rbp-44]
       mov qword[rbp-80], r15
;      free r15
;    alloc r15
     mov r15d, dword[rbp-36]
     mov dword[rbp-72], r15d
;    free r15
;  [66:5] assert(o2.pos.x == -1) 
   sub rsp, 88
;  alloc r15
;    [66:12] o2.pos.x == -1
;    [66:12] ? o2.pos.x == -1
;    [66:12] ? o2.pos.x == -1
     cmp_66_12:
     cmp qword[rbp-88], -1
     jne bool_false_66_12
     bool_true_66_12:  ; opt1
     mov r15, true
     jmp bool_end_66_12
     bool_false_66_12:
     mov r15, false
     bool_end_66_12:
   push r15
;  free r15
   call assert
   add rsp, 96
;  [67:5] assert(o2.pos.y == -2) 
   sub rsp, 88
;  alloc r15
;    [67:12] o2.pos.y == -2
;    [67:12] ? o2.pos.y == -2
;    [67:12] ? o2.pos.y == -2
     cmp_67_12:
     cmp qword[rbp-80], -2
     jne bool_false_67_12
     bool_true_67_12:  ; opt1
     mov r15, true
     jmp bool_end_67_12
     bool_false_67_12:
     mov r15, false
     bool_end_67_12:
   push r15
;  free r15
   call assert
   add rsp, 96
;  [68:5] assert(o2.color == 0xff0000) 
   sub rsp, 88
;  alloc r15
;    [68:12] o2.color == 0xff0000
;    [68:12] ? o2.color == 0xff0000
;    [68:12] ? o2.color == 0xff0000
     cmp_68_12:
     cmp dword[rbp-72], 0xff0000
     jne bool_false_68_12
     bool_true_68_12:  ; opt1
     mov r15, true
     jmp bool_end_68_12
     bool_false_68_12:
     mov r15, false
     bool_end_68_12:
   push r15
;  free r15
   call assert
   add rsp, 96
;  [70:5] print(hello.len, hello) 
;  print(len : reg_rdx, ptr : reg_rsi) 
;    inline: 70_5
;    alloc rdx
;    alias len -> rdx
     mov rdx, hello.len
;    alloc rsi
;    alias ptr -> rsi
     mov rsi, hello
;    [16:5] mov(rax, 1) 
     mov rax, 1
;    [16:19] # write system call 
;    [17:5] mov(rdi, 1) 
     mov rdi, 1
;    [17:19] # file descriptor for standard out 
;    [18:5] mov(rsi, ptr) 
;    [18:19] # buffer address 
;    [19:5] mov(rdx, len) 
;    [19:19] # buffer size 
;    [20:5] syscall() 
     syscall
;    free rsi
;    free rdx
   print_70_5_end:
;  [71:5] loop
   loop_71_5:
;    [72:9] print(prompt1.len, prompt1) 
;    print(len : reg_rdx, ptr : reg_rsi) 
;      inline: 72_9
;      alloc rdx
;      alias len -> rdx
       mov rdx, prompt1.len
;      alloc rsi
;      alias ptr -> rsi
       mov rsi, prompt1
;      [16:5] mov(rax, 1) 
       mov rax, 1
;      [16:19] # write system call 
;      [17:5] mov(rdi, 1) 
       mov rdi, 1
;      [17:19] # file descriptor for standard out 
;      [18:5] mov(rsi, ptr) 
;      [18:19] # buffer address 
;      [19:5] mov(rdx, len) 
;      [19:19] # buffer size 
;      [20:5] syscall() 
       syscall
;      free rsi
;      free rdx
     print_72_9_end:
;    len: qword[rbp-96]
;    [73:9] var len = read(input.len, input) - 1 
;    [73:13] len = read(input.len, input) - 1 
;    [73:19] read(input.len, input) - 1 
;    [73:19] read(input.len, input) - 1 
;    [73:19] len = read(input.len, input) 
;    [73:19] read(input.len, input) 
;    read(len : reg_rdx, ptr : reg_rsi) : i64 nbytes 
;      inline: 73_19
;      alias nbytes -> len
;      alloc rdx
;      alias len -> rdx
       mov rdx, input.len
;      alloc rsi
;      alias ptr -> rsi
       mov rsi, input
;      [24:5] mov(rax, 0) 
       mov rax, 0
;      [24:19] # read system call 
;      [25:5] mov(rdi, 0) 
       mov rdi, 0
;      [25:19] # file descriptor for standard input 
;      [26:5] mov(rsi, ptr) 
;      [26:19] # buffer address 
;      [27:5] mov(rdx, len) 
;      [27:19] # buffer size 
;      [28:5] syscall() 
       syscall
;      [29:5] mov(nbytes, rax) 
       mov qword[rbp-96], rax
;      [29:22] # return value 
;      free rsi
;      free rdx
     read_73_19_end:
;    [73:44] len - 1 
     sub qword[rbp-96], 1
;    [73:49] # -1 don't include the '\n' 
     if_74_12:
;    [74:12] ? len == 0 
;    [74:12] ? len == 0 
     cmp_74_12:
     cmp qword[rbp-96], 0
     jne if_74_9_end
     if_74_12_code:  ; opt1
;      [75:13] break 
       jmp loop_71_5_end
     if_74_9_end:
     if_77_12:
;    [77:12] ? len <= 4 
;    [77:12] ? len <= 4 
     cmp_77_12:
     cmp qword[rbp-96], 4
     jg if_77_9_end
     if_77_12_code:  ; opt1
;      [78:13] print(prompt2.len, prompt2) 
;      print(len : reg_rdx, ptr : reg_rsi) 
;        inline: 78_13
;        alloc rdx
;        alias len -> rdx
         mov rdx, prompt2.len
;        alloc rsi
;        alias ptr -> rsi
         mov rsi, prompt2
;        [16:5] mov(rax, 1) 
         mov rax, 1
;        [16:19] # write system call 
;        [17:5] mov(rdi, 1) 
         mov rdi, 1
;        [17:19] # file descriptor for standard out 
;        [18:5] mov(rsi, ptr) 
;        [18:19] # buffer address 
;        [19:5] mov(rdx, len) 
;        [19:19] # buffer size 
;        [20:5] syscall() 
         syscall
;        free rsi
;        free rdx
       print_78_13_end:
;      [79:13] continue 
       jmp loop_71_5
     if_77_9_end:
;    [81:9] print(prompt3.len, prompt3) 
;    print(len : reg_rdx, ptr : reg_rsi) 
;      inline: 81_9
;      alloc rdx
;      alias len -> rdx
       mov rdx, prompt3.len
;      alloc rsi
;      alias ptr -> rsi
       mov rsi, prompt3
;      [16:5] mov(rax, 1) 
       mov rax, 1
;      [16:19] # write system call 
;      [17:5] mov(rdi, 1) 
       mov rdi, 1
;      [17:19] # file descriptor for standard out 
;      [18:5] mov(rsi, ptr) 
;      [18:19] # buffer address 
;      [19:5] mov(rdx, len) 
;      [19:19] # buffer size 
;      [20:5] syscall() 
       syscall
;      free rsi
;      free rdx
     print_81_9_end:
;    [82:9] print(len, input) 
;    print(len : reg_rdx, ptr : reg_rsi) 
;      inline: 82_9
;      alloc rdx
;      alias len -> rdx
       mov rdx, qword[rbp-96]
;      alloc rsi
;      alias ptr -> rsi
       mov rsi, input
;      [16:5] mov(rax, 1) 
       mov rax, 1
;      [16:19] # write system call 
;      [17:5] mov(rdi, 1) 
       mov rdi, 1
;      [17:19] # file descriptor for standard out 
;      [18:5] mov(rsi, ptr) 
;      [18:19] # buffer address 
;      [19:5] mov(rdx, len) 
;      [19:19] # buffer size 
;      [20:5] syscall() 
       syscall
;      free rsi
;      free rdx
     print_82_9_end:
;    [83:9] print(dot.len, dot) 
;    print(len : reg_rdx, ptr : reg_rsi) 
;      inline: 83_9
;      alloc rdx
;      alias len -> rdx
       mov rdx, dot.len
;      alloc rsi
;      alias ptr -> rsi
       mov rsi, dot
;      [16:5] mov(rax, 1) 
       mov rax, 1
;      [16:19] # write system call 
;      [17:5] mov(rdi, 1) 
       mov rdi, 1
;      [17:19] # file descriptor for standard out 
;      [18:5] mov(rsi, ptr) 
;      [18:19] # buffer address 
;      [19:5] mov(rdx, len) 
;      [19:19] # buffer size 
;      [20:5] syscall() 
       syscall
;      free rsi
;      free rdx
     print_83_9_end:
;    [84:9] print(nl.len, nl) 
;    print(len : reg_rdx, ptr : reg_rsi) 
;      inline: 84_9
;      alloc rdx
;      alias len -> rdx
       mov rdx, nl.len
;      alloc rsi
;      alias ptr -> rsi
       mov rsi, nl
;      [16:5] mov(rax, 1) 
       mov rax, 1
;      [16:19] # write system call 
;      [17:5] mov(rdi, 1) 
       mov rdi, 1
;      [17:19] # file descriptor for standard out 
;      [18:5] mov(rsi, ptr) 
;      [18:19] # buffer address 
;      [19:5] mov(rdx, len) 
;      [19:19] # buffer size 
;      [20:5] syscall() 
       syscall
;      free rsi
;      free rdx
     print_84_9_end:
   jmp loop_71_5
   loop_71_5_end:
; main end

; system call: exit 0
mov rax, 60
mov rdi, 0
syscall

; max scratch registers in use: 1
;            max frames in use: 7
;        max inline stack size: 96 B

About

experimental compiler of minimalistic language to nasm x86_64 for linux

Topics

Resources

Stars

Watchers

Forks

Packages

No packages published

Languages