diff --git a/.eslintrc.json b/.eslintrc.json
index f326b306..cc4eddae 100644
--- a/.eslintrc.json
+++ b/.eslintrc.json
@@ -30,6 +30,8 @@
     "ignorePatterns": [
         "src/compile/inference/file_level_definitions_parser_header.ts",
         "src/compile/inference/file_level_definitions_parser.ts",
+        "src/ast/comments/comments_parser_header.ts",
+        "src/ast/comments/comments_parser.ts",
         "test/utils/typeStrings/typeString_parser_header.ts",
         "test/utils/typeStrings/typeString_parser.ts"
     ]
diff --git a/.gitignore b/.gitignore
index b131ed93..c7c0a4e7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,5 +6,6 @@ docs
 coverage
 *.tgz
 src/compile/inference/file_level_definitions_parser.ts
+src/ast/comments/comments_parser.ts
 test/utils/typeStrings/typeString_parser.ts
 .idea
diff --git a/.nycrc.json b/.nycrc.json
index 79af7b85..bae2453f 100644
--- a/.nycrc.json
+++ b/.nycrc.json
@@ -7,7 +7,8 @@
         "**/coverage/**",
         "**/docs/**",
         "**/.compiler_cache/**",
-        "src/compile/inference/file_level_definitions_parser*.ts"
+        "src/compile/inference/file_level_definitions_parser*.ts",
+        "src/ast/comments/comments_parser*.ts"
     ],
     "reporter": ["lcov", "text-summary"],
     "all": true,
diff --git a/package.json b/package.json
index 87a90082..ebe6c464 100644
--- a/package.json
+++ b/package.json
@@ -16,7 +16,8 @@
         "transpile": "tsc",
         "build-type-parser": "tspegjs -o test/utils/typeStrings/typeString_parser.ts --custom-header-file test/utils/typeStrings/typeString_parser_header.ts --cache test/utils/typeStrings/typeString_grammar.pegjs",
         "build-file-level-definitions-parser": "tspegjs -o src/compile/inference/file_level_definitions_parser.ts --custom-header-file src/compile/inference/file_level_definitions_parser_header.ts --cache src/compile/inference/file_level_definitions.pegjs",
-        "build": "npm run clean && npm run build-file-level-definitions-parser && npm run transpile && chmod u+x dist/bin/compile.js",
+        "build-comments-parser": "tspegjs -o src/ast/comments/comments_parser.ts --custom-header-file src/ast/comments/comments_parser_header.ts --cache src/ast/comments/comments_grammar.pegjs",
+        "build": "npm run clean && npm run build-comments-parser && npm run build-file-level-definitions-parser && npm run transpile && chmod u+x dist/bin/compile.js",
         "lint": "eslint src/ test/ --ext=ts",
         "lint:fix": "eslint src/ test/ --ext=ts --fix",
         "test": "npm run build-type-parser && NODE_OPTIONS='--max-old-space-size=2048' nyc mocha",
diff --git a/src/ast/ast_node.ts b/src/ast/ast_node.ts
index ed2721f1..d14a0e14 100644
--- a/src/ast/ast_node.ts
+++ b/src/ast/ast_node.ts
@@ -1,6 +1,6 @@
 import { ASTNodeFormatter } from "./ast_node_formatter";
 import { ASTContext } from "./ast_reader";
-import { parseSourceLocation, SourceLocation } from "./utils";
+import { SourceLocation, parseSourceLocation } from "./utils";
 
 export type ASTNodeCallback = (node: ASTNode) => void;
 export type ASTNodeSelector = (node: ASTNode) => boolean;
@@ -328,10 +328,10 @@ export class ASTNode {
      *
      * In other words, returns corresponding code fragment substring.
      */
-    extractSourceFragment(source: string): string {
+    extractSourceFragment(source: Uint8Array): Uint8Array {
         const { offset, length } = this.sourceInfo;
 
-        return source.substr(offset, length);
+        return source.slice(offset, offset + length);
     }
 
     private createWalker(callback: ASTNodeCallback): ASTNodeCallback {
diff --git a/src/ast/ast_reader.ts b/src/ast/ast_reader.ts
index 3a51d424..74ea531a 100644
--- a/src/ast/ast_reader.ts
+++ b/src/ast/ast_reader.ts
@@ -5,6 +5,10 @@ import { ModernConfiguration } from "./modern";
 import { DefaultNodePostprocessorList } from "./postprocessing";
 import { sequence } from "./utils";
 
+// We store source files as byte arrays since AST src maps are byte-offset
+// based.
+export type FileMap = Map<string, Uint8Array>;
+
 export interface ASTNodeProcessor<T extends ASTNode> {
     process(
         reader: ASTReader,
@@ -14,7 +18,7 @@ export interface ASTNodeProcessor<T extends ASTNode> {
 }
 
 export interface ASTNodePostprocessor<T extends ASTNode> {
-    process(node: T, context: ASTContext, sources?: Map<string, string>): void;
+    process(node: T, context: ASTContext, sources?: FileMap): void;
     isSupportedNode(node: ASTNode): node is T;
 }
 
@@ -133,7 +137,7 @@ export class ASTPostprocessor {
         );
     }
 
-    processNode(node: ASTNode, context: ASTContext, sources?: Map<string, string>): void {
+    processNode(node: ASTNode, context: ASTContext, sources?: FileMap): void {
         const postprocessors = this.getPostprocessorsForNode(node);
 
         for (const postprocessor of postprocessors) {
@@ -141,7 +145,7 @@ export class ASTPostprocessor {
         }
     }
 
-    processContext(context: ASTContext, sources?: Map<string, string>): void {
+    processContext(context: ASTContext, sources?: FileMap): void {
         for (const postprocessor of this.nodePostprocessors) {
             for (const node of context.nodes) {
                 if (postprocessor.isSupportedNode(node)) {
@@ -185,7 +189,7 @@ export class ASTReader {
      *
      * @returns An array of `SourceUnit`s for each of the source entries in the input.
      */
-    read(data: any, kind = ASTKind.Any, sources?: Map<string, string>): SourceUnit[] {
+    read(data: any, kind = ASTKind.Any, sources?: FileMap): SourceUnit[] {
         const entries: Array<[string, any]> = Object.entries(data.sources);
         const rootNodeTypeName = "SourceUnit";
         const result: SourceUnit[] = [];
diff --git a/src/ast/comments/comment.ts b/src/ast/comments/comment.ts
new file mode 100644
index 00000000..2e3a9a3c
--- /dev/null
+++ b/src/ast/comments/comment.ts
@@ -0,0 +1,35 @@
+import { RawCommentKind } from "../constants";
+
+export interface CommentLoc {
+    start: number;
+    end: number;
+}
+
+export class RawComment {
+    /**
+     * Type of comment
+     */
+    kind: RawCommentKind;
+
+    /**
+     * The entire text of the comment include *s and /s
+     */
+    text: string;
+
+    /**
+     * The text of the comment without * and /. I.e. only the actual comment body
+     */
+    internalText: string;
+
+    /**
+     * The location of this comment
+     */
+    loc: CommentLoc;
+
+    constructor(kind: RawCommentKind, text: string, internalText: string, loc: CommentLoc) {
+        this.kind = kind;
+        this.text = text;
+        this.internalText = internalText;
+        this.loc = loc;
+    }
+}
diff --git a/src/ast/comments/comments_grammar.pegjs b/src/ast/comments/comments_grammar.pegjs
new file mode 100644
index 00000000..406c7ba2
--- /dev/null
+++ b/src/ast/comments/comments_grammar.pegjs
@@ -0,0 +1,168 @@
+{
+    expected;
+    error;
+    peg$anyExpectation;
+    peg$parse__;
+}
+
+CommentSoup =
+    t: (
+        ([^"'/]+ (!("//" / "///" / "/*") "/")?) { return text(); } // non-comment, non-string-literal anything
+        / StringLiteral { return text(); } // string literal
+        / (c: Comment __ { return c; }) // comment
+    )* { return t; }
+
+Comment
+    = BlockComment
+    / NatspecLineGroup
+    / LineComment
+
+
+FirstBlockLine = "/*" body: ((!"*/" NonLineTerminator)* { return text(); }) LineTerminator { return body; }
+BlockLine = (PrimitiveWhiteSpace* (!"*/" "*"))? body: ((!"*/" NonLineTerminator)* { return text(); }) LineTerminator { return body; }
+LastBlockLine = (PrimitiveWhiteSpace* (!"*/" "*"))? body: ((!"*/" NonLineTerminator)* { return text(); }) "*/" { return body; }
+
+MultiLineBlockComment = start: FirstBlockLine inner: BlockLine* last: LastBlockLine {
+    const isNatSpec = start[0] === "*";
+
+    // For NatSpec comments we strip 1 space from each inner line (if present)
+    // to be compatible with the Solidity compiler's behavior
+    if (isNatSpec) {
+        inner = inner.map((l: string) => l.startsWith(" ") ? l.slice(1) : l);
+        last = last.startsWith(" ") ? last.slice(1) : last;
+    }
+
+    let body = [start, ...inner, last].join("\n")
+
+    // for natspec skip the second *
+    body = isNatSpec ? body.slice(1) : body;
+
+    const kind = isNatSpec ? RawCommentKind.BlockNatSpec : RawCommentKind.BlockComment;
+
+    return new RawComment(kind, text(), body, mkLoc(location()) )
+}
+
+SingleLineBlockComment = "/*" body: ((!"*/" NonLineTerminator)* { return text(); }) "*/" {
+    const isNatSpec = body[0] === "*";
+    return new RawComment(
+        isNatSpec ? RawCommentKind.BlockNatSpec : RawCommentKind.BlockComment,
+        text(),
+        isNatSpec ? body.slice(1) : body, // for natspec skip the second *
+        mkLoc(location())
+    );
+}
+
+BlockComment = MultiLineBlockComment / SingleLineBlockComment
+
+NonLineTerminator =
+    [^\n\r\u2028\u2029]
+
+LineComment = 
+    "//" body: (NonLineTerminator* { return text(); }) LineTerminator {
+        return new RawComment(RawCommentKind.SingleLineComment, text(), body, mkLoc(location()));
+    }
+
+LineNatspec = 
+    PrimitiveWhiteSpace* "///" body: (NonLineTerminator* { return text(); }) LineTerminator {
+        return body.startsWith(" ") ? body.slice(1) : body;
+    }
+
+NatspecLineGroup =
+    bodies: LineNatspec+ {
+        return new RawComment(RawCommentKind.LineGroupNatSpec, text(), bodies.join("\n"), mkLoc(location()));
+    }
+
+// ==== White space
+
+PrimitiveWhiteSpace =
+    "\t"
+    / "\v"
+    / "\f"
+    / " "
+    / "\u00A0"
+    / "\uFEFF"
+    / Zs
+
+// Separator, Space
+Zs =
+    [\u0020\u00A0\u1680\u2000-\u200A\u202F\u205F\u3000]
+
+LineTerminator =
+    [\n\r\u2028\u2029]
+
+__ =
+    (PrimitiveWhiteSpace / LineTerminator)*
+
+StringLiteral =
+    "'" chars: SingleStringChar* "'" { return chars.join(""); }
+    / '"' chars: DoubleStringChar* '"' { return chars.join(""); }
+
+AnyChar =
+    .
+
+DoubleStringChar =
+    !('"' / "\\" / LineTerminator) AnyChar { return text(); }
+    / "\\" sequence: EscapeSequence { return sequence; }
+    / LineContinuation
+
+SingleStringChar =
+    !("'" / "\\" / LineTerminator) AnyChar { return text(); }
+    / "\\" sequence: EscapeSequence { return sequence; }
+    / LineContinuation
+
+LineContinuation =
+    "\\" LineTerminatorSequence { return ""; }
+
+EscapeSequence =
+    CharEscapeSequence
+    / "0" !DecDigit { return "\0"; }
+    / HexEscapeSequence
+    / UnicodeEscapeSequence
+    / AnyChar // Allow invalid hex sequences as a fallback
+
+CharEscapeSequence =
+    SingleEscapeChar
+    / NonEscapeChar
+
+SingleEscapeChar =
+    "'"
+    / '"'
+    / "\\"
+    / "b"  { return "\b"; }
+    / "f"  { return "\f"; }
+    / "n"  { return "\n"; }
+    / "r"  { return "\r"; }
+    / "t"  { return "\t"; }
+    / "v"  { return "\v"; }
+
+NonEscapeChar =
+    !(EscapeChar / LineTerminator) AnyChar { return text(); }
+
+HexDigit =
+    [0-9a-f]i
+
+DecDigit =
+    [0-9]
+
+EscapeChar =
+    SingleEscapeChar
+    / DecDigit
+    / "x"
+    / "u"
+
+HexEscapeSequence =
+    "x" digits:$(HexDigit HexDigit) {
+        return String.fromCharCode(parseInt(digits, 16));
+    }
+
+UnicodeEscapeSequence =
+    "u" digits:$(HexDigit HexDigit HexDigit HexDigit) {
+        return String.fromCharCode(parseInt(digits, 16));
+    }
+
+LineTerminatorSequence =
+    "\n"
+    / "\r\n"
+    / "\r"
+    / "\u2028"
+    / "\u2029"
diff --git a/src/ast/comments/comments_parser_header.ts b/src/ast/comments/comments_parser_header.ts
new file mode 100644
index 00000000..0ba0b4e8
--- /dev/null
+++ b/src/ast/comments/comments_parser_header.ts
@@ -0,0 +1,11 @@
+import { CommentLoc, RawComment } from "./comment";
+import { RawCommentKind } from "../constants";
+
+function mkLoc(raw: any): CommentLoc {
+    return { start: raw.start.offset, end: raw.end.offset };
+}
+
+export function parseComments(contents: string): (RawComment | string)[] {
+    // @ts-ignore
+    return parse(contents);
+}
diff --git a/src/ast/comments/index.ts b/src/ast/comments/index.ts
new file mode 100644
index 00000000..054b6629
--- /dev/null
+++ b/src/ast/comments/index.ts
@@ -0,0 +1,2 @@
+export * from "./comment";
+export { parseComments } from "./comments_parser";
diff --git a/src/ast/constants.ts b/src/ast/constants.ts
index 5fc2f489..2d7a3319 100644
--- a/src/ast/constants.ts
+++ b/src/ast/constants.ts
@@ -89,6 +89,13 @@ export enum TimeUnit {
     Years = "years"
 }
 
+export enum RawCommentKind {
+    SingleLineComment = "single_line",
+    BlockComment = "block_comment",
+    LineGroupNatSpec = "line_group_natspec",
+    BlockNatSpec = "block_natspec"
+}
+
 export const PossibleDataLocations = new Set<string>(Object.values(DataLocation));
 
 export const PossibleFunctionVisibilities = new Set<string>(Object.values(FunctionVisibility));
diff --git a/src/ast/index.ts b/src/ast/index.ts
index cfd9140e..a5a5fd35 100644
--- a/src/ast/index.ts
+++ b/src/ast/index.ts
@@ -15,3 +15,4 @@ export * from "./dispatch";
 export * from "./definitions";
 export * from "./utils";
 export * from "./xpath";
+export * from "./comments";
diff --git a/src/ast/postprocessing/structured_documentation_reconstruction.ts b/src/ast/postprocessing/structured_documentation_reconstruction.ts
index 3c148f22..06c17f65 100644
--- a/src/ast/postprocessing/structured_documentation_reconstruction.ts
+++ b/src/ast/postprocessing/structured_documentation_reconstruction.ts
@@ -1,5 +1,8 @@
+import { strByteLen, toUTF8 } from "../../misc";
 import { ASTNode } from "../ast_node";
-import { ASTContext, ASTNodePostprocessor } from "../ast_reader";
+import { ASTContext, ASTNodePostprocessor, FileMap } from "../ast_reader";
+import { RawComment, parseComments } from "../comments";
+import { RawCommentKind } from "../constants";
 import {
     ContractDefinition,
     EnumDefinition,
@@ -25,23 +28,50 @@ export class StructuredDocumentationReconstructor {
      */
     fragmentCoordsToStructDoc(
         coords: FragmentCoordinates,
-        source: string
+        source: Uint8Array
     ): StructuredDocumentation | undefined {
         const [from, to, sourceIndex] = coords;
-        const fragment = source.slice(from, to);
-        const comments = this.extractComments(fragment);
-        const docBlock = comments.length > 0 ? this.detectDocumentationBlock(comments) : undefined;
+        const fragment = toUTF8(source.slice(from, to));
 
-        if (docBlock === undefined) {
+        const parsedCommentsSoup = parseComments(fragment);
+
+        // The parser gives us a soup of "strings" (corresponding to non-comment
+        // tokens) and comments.
+        // Find the suffix of the parse output that contains only comments
+        let commentsStartIdx = parsedCommentsSoup.length - 1;
+        for (; commentsStartIdx >= 0; commentsStartIdx--) {
+            if (!(parsedCommentsSoup[commentsStartIdx] instanceof RawComment)) {
+                commentsStartIdx++;
+                break;
+            }
+        }
+
+        const parsedComments = parsedCommentsSoup.slice(
+            commentsStartIdx,
+            parsedCommentsSoup.length
+        ) as RawComment[];
+
+        // No comments found in the game
+        if (parsedComments.length === 0) {
+            return undefined;
+        }
+
+        const lastComment = parsedComments[parsedComments.length - 1];
+
+        // The last comment in the gap is not a docstring
+        if (
+            lastComment.kind !== RawCommentKind.BlockNatSpec &&
+            lastComment.kind !== RawCommentKind.LineGroupNatSpec
+        ) {
             return undefined;
         }
 
-        const offset = from + fragment.indexOf(docBlock);
-        const length = docBlock.length;
+        const byteOffsetFromFragment = strByteLen(fragment.slice(0, lastComment.loc.start));
+        const offset = from + byteOffsetFromFragment;
+        const length = strByteLen(lastComment.text);
         const src = `${offset}:${length}:${sourceIndex}`;
-        const text = this.extractText(docBlock);
 
-        return new StructuredDocumentation(0, src, text);
+        return new StructuredDocumentation(0, src, lastComment.internalText.trim());
     }
 
     getPrecedingGapCoordinates(node: ASTNode): FragmentCoordinates {
@@ -76,7 +106,8 @@ export class StructuredDocumentationReconstructor {
     getDanglingGapCoordinates(node: ASTNode): FragmentCoordinates {
         const curInfo = node.sourceInfo;
 
-        const to = curInfo.offset + curInfo.length;
+        // Skip final }
+        const to = curInfo.offset + curInfo.length - 1;
         const sourceIndex = curInfo.sourceIndex;
 
         const lastChild = node.lastChild;
@@ -93,116 +124,6 @@ export class StructuredDocumentationReconstructor {
 
         return [from, to, sourceIndex];
     }
-
-    private extractComments(fragment: string): string[] {
-        const rx = /(\/\*([^*]|[\r\n]|(\*+([^*/]|[\r\n])))*\*+\/)|([^\n\r]*\/\/.*[\n\r]+)|[\n\r]/g;
-        const result: string[] = [];
-
-        let match = rx.exec(fragment);
-
-        while (match !== null) {
-            result.push(match[0]);
-
-            match = rx.exec(fragment);
-        }
-
-        return result;
-    }
-
-    private detectDocumentationBlock(comments: string[]): string | undefined {
-        const buffer: string[] = [];
-
-        comments.reverse();
-
-        let stopOnNextGap = false;
-
-        const rxCleanBeforeSlash = /^[^/]+/;
-
-        for (const comment of comments) {
-            /**
-             * Remove ANY leading characters before first `/` character.
-             *
-             * This is mostly actual for dangling documentation candidates,
-             * as their source range starts from very beginnig of parent node.
-             * This leads to an effect that part of parent source symbols are
-             * preceding the `///` or `/**`. Skip them for detection reasons.
-             *
-             * Consider following example:
-             * ```
-             * unchecked {
-             *     /// dangling
-             * }
-             * ```
-             * Source range would include the `unchecked {` part,
-             * however interesting part for us starts only since `///`.
-             */
-            const cleanComment = comment.replace(rxCleanBeforeSlash, "");
-
-            /**
-             * Consider if comment is valid single-line or multi-line DocBlock
-             */
-            if (cleanComment.startsWith("/**")) {
-                buffer.push(comment);
-
-                break;
-            } else if (cleanComment.startsWith("///")) {
-                buffer.push(comment);
-
-                stopOnNextGap = true;
-            } else if (stopOnNextGap) {
-                break;
-            }
-        }
-
-        if (buffer.length === 0) {
-            return undefined;
-        }
-
-        if (buffer.length > 1) {
-            buffer.reverse();
-        }
-
-        /**
-         * When joining back DocBlock, remove leading garbage characters again,
-         * but only before first `/` (not in each line, like before).
-         *
-         * Need to preserve whitespace charactes in multiline comments like
-         * ```
-         * {
-         *      /// A
-         *          /// B
-         *              /// C
-         * }
-         * ```
-         * to have following result
-         * ```
-         * /// A
-         *          /// B
-         *              /// C
-         * ```
-         * NOTE that this is affecting documentation node source range.
-         */
-        return buffer.join("").trim().replace(rxCleanBeforeSlash, "");
-    }
-
-    private extractText(docBlock: string): string {
-        const result: string[] = [];
-
-        const replacers = docBlock.startsWith("///") ? ["/// ", "///"] : ["/**", "*/", "* ", "*"];
-        const lines = docBlock.split("\n");
-
-        for (let line of lines) {
-            line = line.trimStart();
-
-            for (const replacer of replacers) {
-                line = line.replace(replacer, "");
-            }
-
-            result.push(line);
-        }
-
-        return result.join("\n").trim();
-    }
 }
 
 type SupportedNode =
@@ -222,7 +143,7 @@ export class StructuredDocumentationReconstructingPostprocessor
 {
     private reconstructor = new StructuredDocumentationReconstructor();
 
-    process(node: SupportedNode, context: ASTContext, sources?: Map<string, string>): void {
+    process(node: SupportedNode, context: ASTContext, sources?: FileMap): void {
         if (sources === undefined) {
             return;
         }
diff --git a/src/ast/writing/ast_mapping.ts b/src/ast/writing/ast_mapping.ts
index bab40b50..b6e51f2a 100644
--- a/src/ast/writing/ast_mapping.ts
+++ b/src/ast/writing/ast_mapping.ts
@@ -625,7 +625,7 @@ class VariableDeclarationStatementWriter extends SimpleStatementWriter<VariableD
 
 /**
  * Compound statemetns don't have their own semicolons. However if a
- * child has a semi-colon, we must make sure to exclude it from our soruce map.
+ * child has a semi-colon, we must make sure to exclude it from our source map.
  */
 abstract class CompoundStatementWriter<
     T extends CompoundStatement
diff --git a/src/ast/writing/writer.ts b/src/ast/writing/writer.ts
index d6017ec6..0e4a92d9 100644
--- a/src/ast/writing/writer.ts
+++ b/src/ast/writing/writer.ts
@@ -1,3 +1,4 @@
+import { strByteLen } from "../../misc";
 import { ASTNode, ASTNodeConstructor } from "../ast_node";
 import { YulNode } from "../implementation/statement/inline_assembly";
 import { SourceFormatter } from "./formatter";
@@ -110,19 +111,20 @@ export class ASTWriter {
      */
     descToSourceString(desc: SrcDesc, sourceMap: SrcRangeMap): string {
         let source = "";
+        let size = 0;
 
         const helper = (current: SrcDesc): void => {
             for (const element of current) {
                 if (typeof element === "string") {
                     source += element;
+                    size += strByteLen(element);
                 } else {
                     const [node, nodeDesc] = element;
-                    const start = source.length;
+                    const start = size;
 
                     helper(nodeDesc);
 
-                    const length = source.length - start;
-
+                    const length = size - start;
                     sourceMap.set(node, [start, length]);
                 }
             }
diff --git a/src/bin/compile.ts b/src/bin/compile.ts
index 6db43674..7c018f55 100644
--- a/src/bin/compile.ts
+++ b/src/bin/compile.ts
@@ -32,6 +32,7 @@ import {
     PrettyFormatter,
     SourceUnit,
     StateVariableVisibility,
+    toUTF8,
     VariableDeclaration,
     XPath
 } from "..";
@@ -325,7 +326,7 @@ function error(message: string): never {
                     data.sources[key] = {};
                 }
 
-                data.sources[key].source = value;
+                data.sources[key].source = toUTF8(value);
             }
         }
 
diff --git a/src/compile/compiler_selection.ts b/src/compile/compiler_selection.ts
index d1ead226..1c8d0884 100644
--- a/src/compile/compiler_selection.ts
+++ b/src/compile/compiler_selection.ts
@@ -1,3 +1,4 @@
+import { toUTF8 } from "../misc";
 import { CompilerSeries, CompilerVersions } from "./constants";
 import { extractSpecifiersFromSource, getCompilerVersionsBySpecifiers } from "./version";
 
@@ -74,8 +75,12 @@ export class VersionDetectionStrategy implements CompilerVersionSelectionStrateg
     fallback: CompilerVersionSelectionStrategy;
     descending: boolean;
 
-    constructor(sources: string[], fallback: CompilerVersionSelectionStrategy, descending = true) {
-        this.sources = sources;
+    constructor(
+        sources: Uint8Array[],
+        fallback: CompilerVersionSelectionStrategy,
+        descending = true
+    ) {
+        this.sources = sources.map(toUTF8);
         this.fallback = fallback;
         this.descending = descending;
     }
diff --git a/src/compile/inference/imports.ts b/src/compile/inference/imports.ts
index e6192b0e..6919dc1c 100644
--- a/src/compile/inference/imports.ts
+++ b/src/compile/inference/imports.ts
@@ -1,7 +1,7 @@
 import fse from "fs-extra";
 import { dirname, normalize } from "path";
 import { CompileInferenceError, ImportResolver, Remapping } from "..";
-import { assert } from "../..";
+import { FileMap, assert, toUTF8 } from "../..";
 import {
     AnyFileLevelNode,
     FileLevelNodeKind,
@@ -95,12 +95,12 @@ function computeSourceUnitName(
 async function resolveSourceUnitName(
     sourceUnitName: string,
     resolvers: ImportResolver[]
-): Promise<[string, string] | undefined> {
+): Promise<[Uint8Array, string] | undefined> {
     for (const resolver of resolvers) {
         const resolvedPath = resolver.resolve(sourceUnitName);
 
         if (resolvedPath !== undefined) {
-            const contents = await fse.readFile(resolvedPath, "utf-8");
+            const contents = await fse.readFile(resolvedPath);
 
             return [contents, resolvedPath];
         }
@@ -117,7 +117,7 @@ async function resolveSourceUnitName(
  * add a mapping from its source unit name to the actual file name in `fileNames`.
  */
 export async function findAllFiles(
-    files: Map<string, string>,
+    files: FileMap,
     fileNames: Map<string, string>,
     remappings: Remapping[],
     resolvers: ImportResolver[],
@@ -161,7 +161,7 @@ export async function findAllFiles(
         let flds: AnyFileLevelNode[];
 
         try {
-            flds = parseFileLevelDefinitions(content);
+            flds = parseFileLevelDefinitions(toUTF8(content));
         } catch (e: any) {
             if (e instanceof PeggySyntaxError) {
                 const start = e.location.start.offset;
diff --git a/src/compile/input.ts b/src/compile/input.ts
index 5c540bae..743d13c5 100644
--- a/src/compile/input.ts
+++ b/src/compile/input.ts
@@ -1,3 +1,5 @@
+import { FileMap } from "../ast";
+import { toUTF8 } from "../misc";
 import { CompilationOutput } from "./constants";
 
 export interface PartialSolcInput {
@@ -41,7 +43,7 @@ function mergeCompilerSettings<T extends SolcInput>(input: T, settings: any): T
  * This handles the differences in the JSON input between different compiler versions.
  */
 export function createCompilerInput(
-    files: Map<string, string>,
+    files: FileMap,
     remappings: string[],
     output: CompilationOutput[],
     compilerSettings: any
@@ -79,7 +81,7 @@ export function createCompilerInput(
     partialInp.sources = {};
 
     for (const [fileName, content] of files.entries()) {
-        partialInp.sources[fileName] = { content };
+        partialInp.sources[fileName] = { content: toUTF8(content) };
     }
 
     const input = partialInp as SolcInput;
diff --git a/src/compile/utils.ts b/src/compile/utils.ts
index 6893516d..1ef02eee 100644
--- a/src/compile/utils.ts
+++ b/src/compile/utils.ts
@@ -1,7 +1,7 @@
 import fse from "fs-extra";
 import path from "path";
 import { FileSystemResolver, getCompilerForVersion, LocalNpmResolver } from ".";
-import { assert } from "../misc";
+import { assert, fromUTF8 } from "../misc";
 import {
     CompilerVersionSelectionStrategy,
     LatestVersionInEachSeriesStrategy,
@@ -12,6 +12,7 @@ import { CompilationOutput, CompilerKind } from "./constants";
 import { Remapping } from "./import_resolver";
 import { findAllFiles } from "./inference";
 import { createCompilerInput } from "./input";
+import { FileMap } from "../ast";
 
 export interface PathOptions {
     remapping?: string[];
@@ -40,7 +41,7 @@ export interface CompileResult {
      * Map from file-names (either passed in by caller, or source unit names of imported files)
      * to the contents of the respective files.
      */
-    files: Map<string, string>;
+    files: FileMap;
 
     /**
      * Map from file-names appearing in the `files` map, to the
@@ -110,19 +111,16 @@ export function parsePathRemapping(remapping: string[]): Remapping[] {
     return result;
 }
 
-function fillFilesFromSources(
-    files: Map<string, string>,
-    sources: { [fileName: string]: any }
-): void {
+function fillFilesFromSources(files: FileMap, sources: { [fileName: string]: any }): void {
     for (const [fileName, section] of Object.entries(sources)) {
         if (section && typeof section.source === "string") {
-            files.set(fileName, section.source);
+            files.set(fileName, fromUTF8(section.source));
         }
     }
 }
 
 function getCompilerVersionStrategy(
-    sources: string[],
+    sources: Uint8Array[],
     versionOrStrategy: string | CompilerVersionSelectionStrategy
 ): CompilerVersionSelectionStrategy {
     if (versionOrStrategy === "auto") {
@@ -137,7 +135,7 @@ function getCompilerVersionStrategy(
 }
 
 export async function compile(
-    files: Map<string, string>,
+    files: FileMap,
     remapping: string[],
     version: string,
     compilationOutput: CompilationOutput[] = [CompilationOutput.ALL],
@@ -211,7 +209,7 @@ export async function compileSourceString(
     const resolvers = [fsResolver, npmResolver];
 
     const parsedRemapping = parsePathRemapping(remapping);
-    const files = new Map([[fileName, sourceCode]]);
+    const files = new Map([[fileName, fromUTF8(sourceCode)]]);
     const resolvedFileNames = new Map([[fileName, fileName]]);
 
     await findAllFiles(files, resolvedFileNames, parsedRemapping, resolvers);
@@ -283,7 +281,7 @@ export async function compileSol(
     const remapping = pathOptions.remapping || [];
     const parsedRemapping = parsePathRemapping(remapping);
 
-    const files = new Map<string, string>();
+    const files: FileMap = new Map();
     const resolvedFileNames = new Map<string, string>();
     const visited = new Set<string>();
 
@@ -294,7 +292,7 @@ export async function compileSol(
 
         assert(resolvedFileName !== undefined, `Unable to find "${fileName}"`);
 
-        const sourceCode = await fse.readFile(resolvedFileName, "utf-8");
+        const sourceCode = await fse.readFile(resolvedFileName);
 
         if (isDynamicBasePath) {
             const basePath = path.dirname(resolvedFileName);
@@ -358,7 +356,7 @@ export async function compileJsonData(
     compilerSettings?: any,
     kind?: CompilerKind
 ): Promise<CompileResult> {
-    const files = new Map<string, string>();
+    const files: FileMap = new Map();
 
     if (!(data instanceof Object && data.sources instanceof Object)) {
         throw new Error(`Unable to find required properties in "${fileName}"`);
@@ -388,7 +386,7 @@ export async function compileJsonData(
 
     if (consistentlyContainsOneOf(sources, "source")) {
         for (const [fileName, fileData] of Object.entries<{ source: string }>(sources)) {
-            files.set(fileName, fileData.source);
+            files.set(fileName, fromUTF8(fileData.source));
         }
 
         const compilerVersionStrategy = getCompilerVersionStrategy([...files.values()], version);
diff --git a/src/misc/index.ts b/src/misc/index.ts
index be92e63f..fcdf29a2 100644
--- a/src/misc/index.ts
+++ b/src/misc/index.ts
@@ -4,3 +4,4 @@ export * from "./pretty_printing";
 export * from "./srcmap";
 export * from "./struct_equality";
 export * from "./utils";
+export * from "./unicode";
diff --git a/src/misc/unicode.ts b/src/misc/unicode.ts
new file mode 100644
index 00000000..5c21133b
--- /dev/null
+++ b/src/misc/unicode.ts
@@ -0,0 +1,14 @@
+const decoder = new TextDecoder();
+const encoder = new TextEncoder();
+
+export function toUTF8(buf: Uint8Array): string {
+    return decoder.decode(buf);
+}
+
+export function fromUTF8(str: string): Uint8Array {
+    return encoder.encode(str);
+}
+
+export function strByteLen(str: string): number {
+    return fromUTF8(str).length;
+}
diff --git a/test/integration/compile/04.spec.ts b/test/integration/compile/04.spec.ts
index 832c458d..0e907ba9 100644
--- a/test/integration/compile/04.spec.ts
+++ b/test/integration/compile/04.spec.ts
@@ -6,14 +6,15 @@ import {
     CompilerVersions04,
     compileSol,
     detectCompileErrors,
+    FileMap,
     PossibleCompilerKinds,
     SourceUnit
 } from "../../../src";
 import { createImprint } from "./common";
 
 const sample = "./test/samples/solidity/compile_04.sol";
-const content = fse.readFileSync(sample).toString();
-const expectedFiles = new Map<string, string>([[sample, content]]);
+const content = fse.readFileSync(sample);
+const expectedFiles: FileMap = new Map([[sample, content]]);
 
 const encounters = new Map<string, number>([
     ["SourceUnit", 1],
diff --git a/test/integration/compile/05.spec.ts b/test/integration/compile/05.spec.ts
index 39d5de07..93d3b057 100644
--- a/test/integration/compile/05.spec.ts
+++ b/test/integration/compile/05.spec.ts
@@ -7,14 +7,15 @@ import {
     CompilerVersions05,
     compileSol,
     detectCompileErrors,
+    FileMap,
     PossibleCompilerKinds,
     SourceUnit
 } from "../../../src";
 import { createImprint } from "./common";
 
 const sample = "./test/samples/solidity/compile_05.sol";
-const content = fse.readFileSync(sample).toString();
-const expectedFiles = new Map<string, string>([[sample, content]]);
+const content = fse.readFileSync(sample);
+const expectedFiles: FileMap = new Map([[sample, content]]);
 
 const encounters = new Map<string, number>([
     ["SourceUnit", 1],
diff --git a/test/integration/compile/06.spec.ts b/test/integration/compile/06.spec.ts
index fe9a6894..cb8b4140 100644
--- a/test/integration/compile/06.spec.ts
+++ b/test/integration/compile/06.spec.ts
@@ -7,14 +7,15 @@ import {
     CompilerVersions06,
     compileSol,
     detectCompileErrors,
+    FileMap,
     PossibleCompilerKinds,
     SourceUnit
 } from "../../../src";
 import { createImprint } from "./common";
 
 const sample = "./test/samples/solidity/compile_06.sol";
-const content = fse.readFileSync(sample).toString();
-const expectedFiles = new Map<string, string>([[sample, content]]);
+const content = fse.readFileSync(sample);
+const expectedFiles: FileMap = new Map([[sample, content]]);
 
 const encounters = new Map<string, number>([
     ["SourceUnit", 1],
diff --git a/test/integration/compile/kinds.spec.ts b/test/integration/compile/kinds.spec.ts
index f5fefea8..4fbc88bd 100644
--- a/test/integration/compile/kinds.spec.ts
+++ b/test/integration/compile/kinds.spec.ts
@@ -165,7 +165,7 @@ describe(`Native and WASM compilers produce the same results for all files`, ()
         }
 
         it(fileName, async () => {
-            const source = fse.readFileSync(sample, { encoding: "utf8" });
+            const source = fse.readFileSync(sample);
             const args = additionalArgs.get(fileName);
             const versionStrategy = new VersionDetectionStrategy(
                 [source],
diff --git a/test/integration/compile/latest_06.spec.ts b/test/integration/compile/latest_06.spec.ts
index dba102d9..e03269ae 100644
--- a/test/integration/compile/latest_06.spec.ts
+++ b/test/integration/compile/latest_06.spec.ts
@@ -7,14 +7,15 @@ import {
     CompilerVersions06,
     compileSol,
     detectCompileErrors,
+    FileMap,
     PossibleCompilerKinds,
     SourceUnit
 } from "../../../src";
 import { createImprint } from "./common";
 
 const sample = "./test/samples/solidity/latest_06.sol";
-const content = fse.readFileSync(sample).toString();
-const expectedFiles = new Map<string, string>([[sample, content]]);
+const content = fse.readFileSync(sample);
+const expectedFiles: FileMap = new Map([[sample, content]]);
 const compilerVersion = CompilerVersions06[CompilerVersions06.length - 1];
 
 const encounters = new Map<string, number>([
diff --git a/test/integration/compile/latest_07.spec.ts b/test/integration/compile/latest_07.spec.ts
index af20ab12..b923747a 100644
--- a/test/integration/compile/latest_07.spec.ts
+++ b/test/integration/compile/latest_07.spec.ts
@@ -7,14 +7,15 @@ import {
     CompilerVersions07,
     compileSol,
     detectCompileErrors,
+    FileMap,
     PossibleCompilerKinds,
     SourceUnit
 } from "../../../src";
 import { createImprint } from "./common";
 
 const sample = "./test/samples/solidity/latest_07.sol";
-const content = fse.readFileSync(sample).toString();
-const expectedFiles = new Map<string, string>([[sample, content]]);
+const content = fse.readFileSync(sample);
+const expectedFiles: FileMap = new Map([[sample, content]]);
 const compilerVersion = CompilerVersions07[CompilerVersions07.length - 1];
 
 const encounters = new Map<string, number>([
diff --git a/test/integration/compile/latest_08.spec.ts b/test/integration/compile/latest_08.spec.ts
index ca887406..9de9eafe 100644
--- a/test/integration/compile/latest_08.spec.ts
+++ b/test/integration/compile/latest_08.spec.ts
@@ -7,17 +7,18 @@ import {
     CompilerVersions08,
     compileSol,
     detectCompileErrors,
+    FileMap,
     PossibleCompilerKinds,
     SourceUnit
 } from "../../../src";
 import { createImprint } from "./common";
 
 const mainSample = "./test/samples/solidity/latest_08.sol";
-const mainContent = fse.readFileSync(mainSample).toString();
+const mainContent = fse.readFileSync(mainSample);
 const importSample = "./test/samples/solidity/latest_imports_08.sol";
-const importContent = fse.readFileSync(importSample).toString();
+const importContent = fse.readFileSync(importSample);
 
-const expectedFiles = new Map<string, string>([
+const expectedFiles: FileMap = new Map([
     [mainSample, mainContent],
     [importSample, importContent]
 ]);
diff --git a/test/integration/sol-ast-compile/xpath/struct_doc_recovery.spec.ts b/test/integration/sol-ast-compile/xpath/struct_doc_recovery.spec.ts
index f6458109..fb348f79 100644
--- a/test/integration/sol-ast-compile/xpath/struct_doc_recovery.spec.ts
+++ b/test/integration/sol-ast-compile/xpath/struct_doc_recovery.spec.ts
@@ -41,22 +41,26 @@ for (const kind of PossibleCompilerKinds) {
 
         it("STDOUT is correct", () => {
             const cases: Array<[string, string]> = [
-                ["X\\nY\\nZ", "4:37:0"],
-                ["C", "85:5:0"],
-                ["A", "122:5:0"],
-                ["B", "184:20:0"],
-                ["A\\nB", "235:15:0"],
-                ["B", "291:20:0"],
-                ["C", "458:5:0"],
-                ["A", "482:5:0"],
-                ["B", "532:20:0"],
-                ["A\\nB", "571:15:0"],
-                ["B", "615:20:0"],
-                ["C", "746:5:0"],
-                ["A", "788:5:0"],
-                ["B", "855:20:0"],
-                ["A\\nB", "911:15:0"],
-                ["B", "972:20:0"]
+                ["X\\nY\\nZ", "4:38:0"],
+                ["C", "114:6:0"],
+                ["A", "151:6:0"],
+                ["B", "213:20:0"],
+                ["A\\nB", "264:16:0"],
+                ["B", "320:20:0"],
+                ["C", "487:6:0"],
+                ["A", "511:6:0"],
+                ["B", "561:20:0"],
+                ["A\\nB", "600:16:0"],
+                ["B", "644:20:0"],
+                ["C", "775:6:0"],
+                ["A", "817:6:0"],
+                ["B", "884:20:0"],
+                ["A\\nB", "940:16:0"],
+                ["B", "1001:20:0"],
+                ["goose", "1209:12:0"],
+                ["comment", "1236:14:0"],
+                ["this is a docstring", "1312:26:0"],
+                ["this is also a docstring", "1411:32:0"]
             ];
 
             for (const [src, text] of cases) {
diff --git a/test/samples/solidity/struct_doc_recovery.sol b/test/samples/solidity/struct_doc_recovery.sol
index 06b4a629..a4f8e5fb 100644
--- a/test/samples/solidity/struct_doc_recovery.sol
+++ b/test/samples/solidity/struct_doc_recovery.sol
@@ -1,7 +1,7 @@
     /// X
         /// Y
             /// Z
-contract Test {
+contract /** random */ Test /** garbage */ {
     /// A
         // B
     /// C
@@ -107,5 +107,18 @@ contract Test {
     /*
      * /// A
      */
-    function fG() external {}
+    function fG() external {
+        uint x;
+        if /** silly */ ( (x = 1) < 0) /** goose */ {
+            /** comment */
+        }
+
+        for /** haha"" 'asdfsad' */ (
+            /** this is a docstring */ x = 0 ;
+            x > 0;
+            /* fml /// "asd" */)
+            /** this is also a docstring  */ {
+
+        }
+    }
 }
diff --git a/test/samples/solidity/struct_docs_04.sourced.sol b/test/samples/solidity/struct_docs_04.sourced.sol
index 1a12b2a9..366a0189 100644
--- a/test/samples/solidity/struct_docs_04.sourced.sol
+++ b/test/samples/solidity/struct_docs_04.sourced.sol
@@ -10,8 +10,6 @@ contract StmtDocs04 {
         X,
         Y,
         Z
-        /// Enum
-        /// 
         /// Dangling
         /// Docstring
     }
@@ -22,8 +20,6 @@ contract StmtDocs04 {
     /// Docstring
     struct StructABC {
         uint a;
-        /// Struct
-        /// 
         /// Dangling
         /// Docstring
     }
diff --git a/test/samples/solidity/struct_docs_05.sourced.sol b/test/samples/solidity/struct_docs_05.sourced.sol
index 29cb9581..26b196fb 100644
--- a/test/samples/solidity/struct_docs_05.sourced.sol
+++ b/test/samples/solidity/struct_docs_05.sourced.sol
@@ -10,8 +10,6 @@ contract StmtDocs04 {
         X,
         Y,
         Z
-        /// Enum
-        /// 
         /// Dangling
         /// Docstring
     }
@@ -22,8 +20,6 @@ contract StmtDocs04 {
     /// Docstring
     struct StructABC {
         uint a;
-        /// Struct
-        /// 
         /// Dangling
         /// Docstring
     }
@@ -78,8 +74,6 @@ contract StmtDocs04 {
         return;
         /// Function body docstring
     }
-    /// Contract
-    /// 
     /// Dangling
     /// Docstring
 }
diff --git a/test/samples/solidity/unicode.sol b/test/samples/solidity/unicode.sol
new file mode 100644
index 00000000..5267f8d3
--- /dev/null
+++ b/test/samples/solidity/unicode.sol
@@ -0,0 +1,18 @@
+pragma solidity 0.8.22;
+
+contract Test {
+    function some() public {
+        /**
+         * no unicode chars 1
+         */
+        uint a = 0;
+        /**
+         *	😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀
+         */
+        uint b = 0;
+        /**
+         * no unicode chars 2
+         */
+        uint c = 0;
+    }
+}
\ No newline at end of file
diff --git a/test/samples/solidity/unicode_big.sol b/test/samples/solidity/unicode_big.sol
new file mode 100644
index 00000000..1af51def
--- /dev/null
+++ b/test/samples/solidity/unicode_big.sol
@@ -0,0 +1,393 @@
+pragma solidity 0.8.22;
+pragma abicoder v2;
+
+/**
+ * Struct
+ * 😀😀😀😀😀😀😀😀😀😀😀
+ * Doc
+ */
+struct Some {
+    uint x;
+
+    /**
+     * Struct 😀😀😀😀😀😀😀😀😀😀😀
+     *  Dangling
+     *   Doc
+     */
+}
+
+/**😀😀😀😀😀😀😀😀😀😀😀
+ * Enum 
+ * Doc
+ */
+enum EnumABC {
+    A, B, C
+
+    /**
+     * Enum
+     *  Dangling
+     *   Doc
+     */
+}
+
+contract UncheckedMathExample {
+    function test() public pure returns (uint) {
+        uint x = 0;
+
+        unchecked {
+            x--;
+        }
+
+        return x;
+    }
+}
+
+contract UsesNewAddressMembers {
+    function test() public {
+        bytes memory code = address(0).code;
+        bytes32 codeHash = address(0).codehash;
+    }
+}
+
+contract CatchPanic {
+    function test() public {
+        UsesNewAddressMembers c = new UsesNewAddressMembers();
+
+        try c.test() {
+            
+        } catch Error(string memory reason) {
+            revert(reason);
+        } catch Panic(uint _code) {
+            if (_code == 0x01) {
+                revert("Assertion failed");
+            } else if (_code == 0x11) {
+                revert("Underflow/overflow");
+            }
+        } catch {
+            revert("Internal error");
+        }
+    }
+}
+
+contract Features082 {
+    event Ev(uint a);
+
+    enum EnumXYZ {
+        X, Y, Z
+    }
+
+    modifier modStructDocs() {
+        /// PlaceholderState😀😀😀😀😀😀😀😀😀😀😀ment docstring
+        _;
+    }
+
+    function stmtStructDocs() modStructDocs() public {
+        /// VariableDeclarationStatement docstring
+        (uint a) = (1);
+
+        /// Expression😀😀😀😀😀😀😀😀😀😀😀Statement docstring
+        1;
+
+        /// Block docstring
+        {}
+
+        /// EmitStatement doc😀😀😀😀😀😀😀😀😀😀😀string
+        emit Ev(1);
+
+        /// WhileStatement docstring
+        while(false)
+        /// Body Block docstring
+        {
+            /// Continue docstring
+            continue;
+        }
+
+        /// DoWhile😀😀😀😀😀😀😀😀😀😀😀Statement docstring
+        do
+        /// Body Block docstring
+        {
+            /// Break docstring
+            break;
+        }
+        while(true);
+
+        /// ForStatement docstring
+        for (
+            /// Init VariableDeclarati😀😀😀😀😀😀😀😀😀😀😀onStatement docstring
+            (uint n) = (1);
+            /// Expression docstring
+            n < 1;
+            /// Post-lo😀😀😀😀😀😀😀😀😀😀😀op ExpressionStatement docstring
+            n++
+        ) 
+        /// Body😀😀😀😀😀😀😀😀😀😀😀 Block docstring
+        {}
+
+        /// IfStatement docstring
+        if (false)
+        /// True body Block docstring
+        {}
+        else
+        /// False body Block docstring
+        {}
+
+        CatchPanic cp = new CatchPanic();
+
+        /// TryStatement docstring
+        try cp.test()
+        /// Call TryCatchClause Block docstring
+        {}
+        /// Error TryCatchClause docstring
+        catch Error(string memory reason)
+        /// Error TryCatchClause Block docstring
+        {}
+        /// Panic TryCatchClause docstring
+        catch Panic(uint _code)
+        /// Panic TryCatchClau😀😀😀😀😀😀😀😀😀😀😀se Block docstring
+        {}
+        /// Fallback TryCatchClause docstring
+        catch
+        /// Fallback TryCatchClause Block docstring
+        {}
+
+        /// InlineAssembly docstring
+        assembly {}
+
+        /// UncheckedBlock docstring
+        unchecked {}
+
+        /// Return😀😀😀😀😀😀😀😀😀😀😀 docstring
+        return;
+    }
+}
+
+/// UnitLevelError e😀😀😀😀😀😀😀😀😀😀😀rror docstring
+error UnitLevelError084(uint code);
+
+library LibErrors084 {
+    /// LibErrors084.Li😀😀😀😀😀😀😀😀😀😀😀b error docstring
+    error Lib(bytes b);
+}
+
+contract Features084 {
+    /// Features😀😀😀😀😀😀😀😀😀😀😀084.Own error docstring
+    error Own();
+
+    function testAssemblyHexLiterals() public {
+        assembly {
+            let a := "test"
+            let x := hex"112233445566778899aabbccddeeff6677889900"
+            let y := hex"1234_abcd"
+            let z := "\xc3"
+
+            sstore(0, x)
+            sstore(1, y)
+
+            pop(hex"2233")
+        }
+    }
+
+    function testBytesConcatBuiltin(bytes memory a, bytes memory b) public pure returns (bytes memory c) {
+        return bytes.concat(a, b);
+    }
+
+    function testVariableDeclarationStatementDocString() public {
+        /// VariableDeclar😀😀😀😀😀😀😀😀😀😀😀ationStatement docstring
+        uint a = 10;
+    }
+
+    function revertWithLib() public {
+        /// RevertSt😀😀😀😀😀😀😀😀😀😀😀atement docstring
+        revert LibErrors084.Lib(hex"001122");
+    }
+
+    function revertWithOwn() public {
+        revert Own();
+    }
+
+    function revertWithUnitLevelError() public {
+        revert UnitLevelError084(1);
+    }
+}
+
+contract Features087 {
+    function basefeeGlobal() external view returns (uint) {
+        return block.basefee;
+    }
+
+    function basefeeInlineAssembly() external view returns (uint ret) {
+        assembly {
+            ret := basefee()
+        }
+    }
+}
+
+type Price is uint128;
+type Quantity is uint128;
+
+library LibWithUDVT_088 {
+    type UFixed is uint256;
+
+    uint constant multiplier = 10**18;
+
+    function add(UFixed a, UFixed b) internal pure returns (UFixed) {
+        return UFixed.wrap(UFixed.unwrap(a) + UFixed.unwrap(b));
+    }
+
+    function mul(UFixed a, uint256 b) internal pure returns (UFixed) {
+        return UFixed.wrap(UFixed.unwrap(a) * b);
+    }
+
+    function floor(UFixed a) internal pure returns (uint256) {
+        return UFixed.unwrap(a) / multiplier;
+    }
+
+    function toUFixed(uint256 a) internal pure returns (UFixed) {
+        return UFixed.wrap(a * multiplier);
+    }
+}
+
+interface InterfaceWithUDTV_088 {
+    /// Interface😀😀😀😀😀😀😀😀😀😀😀WithUDTV_088.EntityReference docstring
+    type EntityReference is address payable;
+
+    function balance(EntityReference er) external view returns(uint);
+}
+
+contract EnumTypeMinMax_088 {
+    function testEnumMinMax() public pure {
+        assert(type(EnumABC).min == EnumABC.A);
+        assert(type(EnumABC).max == EnumABC.C);
+    }
+}
+
+contract ExternalFnSelectorAndAddress_0810 {
+    function testFunction() external {}
+
+    function test(address newAddress, uint32 newSelector) view public returns (address adr, bytes4 sel) {
+        function() external fp = this.testFunction;
+
+        assembly {
+            let o := fp.address
+            let s := fp.selector
+
+            fp.address := newAddress
+            fp.selector := newSelector
+        }
+
+        return (fp.address, fp.selector);
+    }
+}
+
+contract Builtins_0811 {
+    function some(uint a, int b, bytes2 c) external pure returns(bytes2 x, int y, uint z) {
+        return (c, b, a);
+    }
+
+    function test() public view {
+        bytes memory payload = abi.encodeCall(this.some, (1, -1, 0xFFFF));
+    }
+}
+
+contract Features_0812 {
+    function () external externalStorage;
+
+    function comparePtr() public {
+        function () external externalLocal1;
+        function () external externalLocal2;
+
+        externalLocal1 == externalLocal2;
+        externalLocal1 != externalLocal2;
+
+        externalLocal1 == externalStorage;
+        externalStorage != externalLocal2;
+
+        abi.encodeCall(Builtins_0811.some, (1, -1, 0x0102));
+
+        string memory a = "abc";
+        string memory b = "def";
+        string memory c = string.concat(a, b);
+    }
+}
+
+type RestrictedNumber_0813 is int256;
+
+using A_0813 for RestrictedNumber_0813;
+using { plusOne, minusOne, A_0813.add } for RestrictedNumber_0813 global;
+
+function plusOne(RestrictedNumber_0813 x) pure returns (RestrictedNumber_0813) {
+    unchecked {
+        return RestrictedNumber_0813.wrap(RestrictedNumber_0813.unwrap(x) + 1);
+    }
+}
+
+function minusOne(RestrictedNumber_0813 x) pure returns (RestrictedNumber_0813) {
+    unchecked {
+        return RestrictedNumber_0813.wrap(RestrictedNumber_0813.unwrap(x) - 1);
+    }
+}
+
+function createRestrictedNumber_0813(int256 value) pure returns (RestrictedNumber_0813) {
+    require((value <= 100) && ((-value) <= 100));
+    return RestrictedNumber_0813.wrap(value);
+}
+
+library A_0813 {
+    function add(RestrictedNumber_0813 a, RestrictedNumber_0813 b) internal returns (RestrictedNumber_0813 c) {
+        c = RestrictedNumber_0813.wrap(RestrictedNumber_0813.unwrap(a) + RestrictedNumber_0813.unwrap(b));
+    }
+}
+
+contract Features_0813 {
+    function memorySafeAsm() public {
+        assembly ("memory-safe") {}
+        /// @solidity memory-safe-assembly
+        assembly {}
+    }
+}
+
+contract Features_0815 {
+    event SomeEvent(address indexed addr, uint indexed v);
+    error SomeError(address addr, uint v);
+
+    function privateFunc(uint x) private pure returns (uint z) {}
+
+    function checkSelectors() pure public returns (bytes32 ev, bytes4 er) {
+        ev = SomeEvent.selector;
+        er = SomeError.selector;
+
+        privateFunc(1);
+
+        assert(ev == 0xdde371250dcd21c331edbb965b9163f4898566e8c60e28868533281edf66ab03);
+        assert(er == 0x399802c9);
+    }
+}
+
+
+interface IntEvents {
+    event X(uint a);
+}
+
+library LibEvents {
+    event X(uint a);
+}
+
+event X(uint a);
+event Y(uint a) anonymous;
+
+contract Features_0822 {
+    event X(uint a);
+
+    function main() public {
+        emit IntEvents.X(1);
+        emit LibEvents.X(2);
+        /**
+         * Both followin😀😀😀😀😀😀😀😀😀😀😀g emits are referring to an event
+         * that is defined by contract (due to shadowing).
+         */
+        emit X(3);
+        emit Features_0822.X(4);
+
+        emit Y(5);
+    }
+}
diff --git a/test/unit/ast/ast_node.spec.ts b/test/unit/ast/ast_node.spec.ts
index e809bf6d..43747e33 100644
--- a/test/unit/ast/ast_node.spec.ts
+++ b/test/unit/ast/ast_node.spec.ts
@@ -5,13 +5,14 @@ import {
     ASTReader,
     Block,
     compileJson,
+    fromUTF8,
     FunctionDefinition,
     Literal,
     SourceUnit
 } from "../../../src";
 
 describe("ASTNode", () => {
-    const source = fse.readFileSync("test/samples/solidity/node.sol", { encoding: "utf-8" });
+    const source = new Uint8Array(fse.readFileSync("test/samples/solidity/node.sol"));
     const samples = new Map([
         ["0.4.13", "test/samples/solidity/node_0413.json"],
         ["0.5.0", "test/samples/solidity/node_050.json"]
@@ -101,7 +102,7 @@ describe("ASTNode", () => {
             it("extractSourceFragment()", () => {
                 const increment = nodes[nodes.length - 2];
 
-                expect(increment.extractSourceFragment(source)).toEqual("a++");
+                expect(increment.extractSourceFragment(source)).toEqual(fromUTF8("a++"));
             });
         });
     }
diff --git a/test/unit/ast/unicode.spec.ts b/test/unit/ast/unicode.spec.ts
new file mode 100644
index 00000000..7b95ac13
--- /dev/null
+++ b/test/unit/ast/unicode.spec.ts
@@ -0,0 +1,163 @@
+import expect from "expect";
+import fse from "fs-extra";
+import {
+    ASTKind,
+    ASTReader,
+    ASTWriter,
+    DefaultASTWriterMapping,
+    FileMap,
+    FunctionTypeName,
+    ParameterList,
+    PrettyFormatter,
+    SourceUnit,
+    SrcRangeMap,
+    StructuredDocumentation,
+    assert,
+    compileSol,
+    compileSourceString,
+    detectCompileErrors,
+    fromUTF8,
+    toUTF8
+} from "../../../src";
+
+const samples: string[] = [
+    "./test/samples/solidity/unicode.sol",
+    "./test/samples/solidity/unicode_big.sol"
+];
+
+async function strToAst(
+    name: string,
+    contents: string,
+    version: string
+): Promise<[SourceUnit, ASTReader]> {
+    const sources: FileMap = new Map([[name, fromUTF8(contents)]]);
+    const canonicalResult = await compileSourceString(name, contents, version);
+
+    const errors = detectCompileErrors(canonicalResult.data);
+    assert(errors.length === 0, `Unexpected errors when compiling ${name}`);
+
+    const reader = new ASTReader();
+    const units = reader.read(canonicalResult.data, ASTKind.Modern, sources);
+
+    assert(units.length === 1, `Expected a single unit in ${name}`);
+
+    return [units[0], reader];
+}
+
+function writeUnit(unit: SourceUnit, version: string): [string, SrcRangeMap] {
+    const formatter = new PrettyFormatter(4, 0);
+    const writer = new ASTWriter(DefaultASTWriterMapping, formatter, version);
+
+    const srcMap: SrcRangeMap = new Map();
+    return [writer.write(unit, srcMap), srcMap];
+}
+
+describe("Unicode tests", () => {
+    for (const sample of samples) {
+        describe(sample, () => {
+            let unit: SourceUnit;
+            let reader: ASTReader;
+            let version: string;
+            let contents: Uint8Array;
+            let sources: FileMap;
+
+            before(async () => {
+                contents = fse.readFileSync(sample);
+                sources = new Map([[sample, contents]]);
+
+                const result = await compileSol(sample, "auto");
+
+                const errors = detectCompileErrors(result.data);
+                expect(errors).toHaveLength(0);
+
+                reader = new ASTReader();
+                const units = reader.read(result.data, ASTKind.Modern, sources);
+
+                expect(units).toHaveLength(1);
+
+                unit = units[0];
+
+                version = result.compilerVersion as string;
+            });
+
+            it("StructuredDocumentation source locations are computed correctly", () => {
+                const docs = [...unit.getChildrenByType(StructuredDocumentation)];
+
+                for (const doc of docs) {
+                    const coords = doc.sourceInfo;
+                    const actual = toUTF8(
+                        contents.slice(coords.offset, coords.offset + coords.length)
+                    ).trim();
+
+                    // The actual fragment should start with a comment
+                    expect(actual.startsWith("/*") || actual.startsWith("/")).toBeTruthy();
+
+                    // The actual fragment should be well terminated for block comments
+                    if (actual.startsWith("/*")) {
+                        expect(actual.endsWith("*/")).toBeTruthy();
+                    }
+
+                    // The actual fragment contains the text of the node
+                    // We need to check that the actual range in the file includes each line
+                    // of the computed text of the StructuredDocumentation. We do this line by line
+                    // since for block comments the .text doesnt include the * at the start of each line
+                    for (const line of doc.text.split("\n")) {
+                        expect(actual.includes(line)).toBeTruthy();
+                    }
+                }
+            });
+
+            it("Writer produces correct source maps", async () => {
+                const [canonicalContents] = writeUnit(unit, version);
+                const [canonicalUnit, canonicalReader] = await strToAst(
+                    sample,
+                    canonicalContents,
+                    version
+                );
+
+                const [newContents, newContentsSrcMap] = writeUnit(canonicalUnit, version);
+
+                const [newUnit] = await strToAst(sample, newContents, version);
+
+                for (const newNode of newUnit.getChildren(true)) {
+                    if (
+                        newNode instanceof ParameterList ||
+                        newNode instanceof StructuredDocumentation
+                    ) {
+                        continue;
+                    }
+
+                    const oldNode = canonicalReader.context.locate(newNode.id);
+
+                    assert(oldNode !== undefined, `No old node for id ${newNode.id} ${newNode}`);
+
+                    assert(
+                        oldNode.constructor.name === newNode.constructor.name,
+                        `Mismatch between {0} and {1}`,
+                        oldNode,
+                        newNode
+                    );
+
+                    const writerSrc = newContentsSrcMap.get(oldNode);
+
+                    assert(writerSrc !== undefined, `Missing writer src for {0}`, oldNode);
+
+                    const readSrc = newNode.sourceInfo;
+
+                    expect(writerSrc[0]).toEqual(readSrc.offset);
+
+                    if (Math.abs(writerSrc[1] - readSrc.length) > 1) {
+                        // The solidity compiler erroneously includes the variable name in the source map
+                        // TODO: We should file a solidity bug
+                        if (newNode instanceof FunctionTypeName) {
+                            continue;
+                        }
+                    }
+
+                    // The length may be off by 1 since we may be a little inaccurate in source maps w.r.t ;
+                    expect(Math.abs(writerSrc[1] - readSrc.length)).toBeLessThanOrEqual(1);
+                }
+            });
+        });
+    }
+});
diff --git a/test/unit/compile/compiler_selection.spec.ts b/test/unit/compile/compiler_selection.spec.ts
index 61113f19..45308e57 100644
--- a/test/unit/compile/compiler_selection.spec.ts
+++ b/test/unit/compile/compiler_selection.spec.ts
@@ -7,6 +7,7 @@ import {
     CompilerVersions07,
     CompilerVersions08,
     CompilerVersionSelectionStrategy,
+    fromUTF8,
     LatestAndFirstVersionInEachSeriesStrategy,
     LatestCompilerVersion,
     LatestVersionInEachSeriesStrategy,
@@ -179,7 +180,7 @@ describe("VersionDetectionStrategy", () => {
             it(`Returns ${JSON.stringify(range)} for ${JSON.stringify(source)} and ${
                 fallback.constructor.name
             } in constructor`, () => {
-                const strategy = new VersionDetectionStrategy([source], fallback);
+                const strategy = new VersionDetectionStrategy([fromUTF8(source)], fallback);
 
                 expect(strategy.select()).toEqual(range);
             });
diff --git a/test/unit/compile/inference/findAllFiles.spec.ts b/test/unit/compile/inference/findAllFiles.spec.ts
index 6b848e4d..b7b34803 100644
--- a/test/unit/compile/inference/findAllFiles.spec.ts
+++ b/test/unit/compile/inference/findAllFiles.spec.ts
@@ -1,7 +1,7 @@
 import expect from "expect";
 import fse from "fs-extra";
 import { join } from "path";
-import { FileSystemResolver, findAllFiles } from "../../../../src";
+import { FileMap, FileSystemResolver, findAllFiles, fromUTF8 } from "../../../../src";
 
 const SAMPLES_DIR = join("test", "samples", "solidity");
 
@@ -48,8 +48,8 @@ const samples: Array<[string, string[]]> = [
 describe("findAllFiles() find all needed imports", () => {
     for (const [fileName, expectedAllFiles] of samples) {
         it(`All imports for ${fileName} should be ${expectedAllFiles.join(", ")}`, async () => {
-            const contents = fse.readFileSync(fileName).toString();
-            const files = new Map<string, string>([[fileName, contents]]);
+            const contents = fse.readFileSync(fileName);
+            const files: FileMap = new Map([[fileName, contents]]);
 
             await findAllFiles(files, new Map(), [], [new FileSystemResolver()]);
 
@@ -60,13 +60,13 @@ describe("findAllFiles() find all needed imports", () => {
 
 describe("findAllFiles() throws proper errors", () => {
     it("Parsing error", async () => {
-        const files = new Map<string, string>([
+        const files: FileMap = new Map([
             [
                 "foo.sol",
-                `import a
+                fromUTF8(`import a
 contract Foo {
 }
-`
+`)
             ]
         ]);
 
@@ -76,13 +76,13 @@ contract Foo {
     });
 
     it("Missing file error", async () => {
-        const files = new Map<string, string>([
+        const files: FileMap = new Map([
             [
                 "foo.sol",
-                `import "a.sol";
+                fromUTF8(`import "a.sol";
 contract Foo {
 }
-`
+`)
             ]
         ]);
 
diff --git a/test/unit/compile/utils.spec.ts b/test/unit/compile/utils.spec.ts
index a8abae2c..2216f93d 100644
--- a/test/unit/compile/utils.spec.ts
+++ b/test/unit/compile/utils.spec.ts
@@ -4,6 +4,8 @@ import {
     compileJson,
     CompilerKind,
     detectCompileErrors,
+    FileMap,
+    fromUTF8,
     getCompilerForVersion,
     LatestAndFirstVersionInEachSeriesStrategy,
     LatestCompilerVersion,
@@ -97,12 +99,12 @@ describe("Compile general utils", () => {
     });
 
     describe("compileJson()", () => {
-        const expectedFiles = new Map([
+        const expectedFiles: FileMap = new Map([
             [
                 "./test/sol_files/json_code/B.sol",
-                "import './A.sol';\n\ncontract B {\n    int16 test;\n}\n"
+                fromUTF8("import './A.sol';\n\ncontract B {\n    int16 test;\n}\n")
             ],
-            ["./test/sol_files/json_code/A.sol", "contract A {\n    uint8 test;\n}\n"]
+            ["./test/sol_files/json_code/A.sol", fromUTF8("contract A {\n    uint8 test;\n}\n")]
         ]);
 
         const cases: Array<[string, string | undefined, RegExp | undefined]> = [
diff --git a/tsconfig.json b/tsconfig.json
index 460726ee..272d88b5 100644
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -25,5 +25,6 @@
         "resolveJsonModule": true
     },
 
-    "include": ["src/**/*"]
+    "include": ["src/**/*"],
+    "exclude": ["src/ast/comments/comments_parser_header.ts"]
 }