diff --git a/README.md b/README.md index 4c7c0fb2f..1e7496b57 100644 --- a/README.md +++ b/README.md @@ -20,22 +20,23 @@ JPlag is a system that finds similarities among multiple sets of source code fil In the following, a list of all supported languages with their supported language version is provided. A language can be selected from the command line using subcommands (jplag [jplag options] [language options]). Alternatively you can use the legacy "-l" argument. | Language | Version | CLI Argument Name | [state](https://github.com/jplag/JPlag/wiki/2.-Supported-Languages) | parser | -|--------------------------------------------------------|--------:|-------------------|:----------------------------------------------------------------:|:---------:| -| [Java](https://www.java.com) | 17 | java | mature | JavaC | -| [C/C++](https://isocpp.org) | 11 | cpp | legacy | JavaCC | -| [C/C++](https://isocpp.org) | 14 | cpp2 | beta | ANTLR 4 | -| [C#](https://docs.microsoft.com/en-us/dotnet/csharp/) | 6 | csharp | beta | ANTLR 4 | -| [Go](https://go.dev) | 1.17 | golang | beta | ANTLR 4 | -| [Kotlin](https://kotlinlang.org) | 1.3 | kotlin | beta | ANTLR 4 | -| [Python](https://www.python.org) | 3.6 | python3 | legacy | ANTLR 4 | -| [R](https://www.r-project.org/) | 3.5.0 | rlang | beta | ANTLR 4 | -| [Rust](https://www.rust-lang.org/) | 1.60.0 | rust | beta | ANTLR 4 | -| [Scala](https://www.scala-lang.org) | 2.13.8 | scala | beta | Scalameta | -| [Scheme](http://www.scheme-reports.org) | ? | scheme | unknown | JavaCC | -| [Swift](https://www.swift.org) | 5.4 | swift | beta | ANTLR 4 | -| [EMF Metamodel](https://www.eclipse.org/modeling/emf/) | 2.25.0 | emf | beta | EMF | -| [EMF Model](https://www.eclipse.org/modeling/emf/) | 2.25.0 | emf-model | alpha | EMF | -| Text (naive) | - | text | legacy | CoreNLP | +|--------------------------------------------------------|--------:|-------------------|:-------------------------------------------------------------------:|:---------:| +| [Java](https://www.java.com) | 17 | java | mature | JavaC | +| [C/C++](https://isocpp.org) | 11 | cpp | legacy | JavaCC | +| [C/C++](https://isocpp.org) | 14 | cpp2 | beta | ANTLR 4 | +| [C#](https://docs.microsoft.com/en-us/dotnet/csharp/) | 6 | csharp | beta | ANTLR 4 | +| [Go](https://go.dev) | 1.17 | golang | beta | ANTLR 4 | +| [Kotlin](https://kotlinlang.org) | 1.3 | kotlin | beta | ANTLR 4 | +| [Python](https://www.python.org) | 3.6 | python3 | legacy | ANTLR 4 | +| [R](https://www.r-project.org/) | 3.5.0 | rlang | beta | ANTLR 4 | +| [Rust](https://www.rust-lang.org/) | 1.60.0 | rust | beta | ANTLR 4 | +| [Scala](https://www.scala-lang.org) | 2.13.8 | scala | beta | Scalameta | +| [Scheme](http://www.scheme-reports.org) | ? | scheme | unknown | JavaCC | +| [Swift](https://www.swift.org) | 5.4 | swift | beta | ANTLR 4 | +| [EMF Metamodel](https://www.eclipse.org/modeling/emf/) | 2.25.0 | emf | beta | EMF | +| [EMF Model](https://www.eclipse.org/modeling/emf/) | 2.25.0 | emf-model | alpha | EMF | +| [LLVM IR](https://llvm.org) | 15 | llvmir | beta | ANTLR 4 | +| Text (naive) | - | text | legacy | CoreNLP | ## Download and Installation You need Java SE 17 to run or build JPlag. @@ -151,6 +152,7 @@ Commands: go java kotlin + llvmir python3 rlang rust diff --git a/cli/pom.xml b/cli/pom.xml index df278ecb7..3cf3fce27 100644 --- a/cli/pom.xml +++ b/cli/pom.xml @@ -102,6 +102,11 @@ emf-model ${revision} + + de.jplag + llvmir + ${revision} + org.kohsuke.metainf-services diff --git a/cli/src/test/java/de/jplag/cli/LanguageTest.java b/cli/src/test/java/de/jplag/cli/LanguageTest.java index e95b80cef..1ed766329 100644 --- a/cli/src/test/java/de/jplag/cli/LanguageTest.java +++ b/cli/src/test/java/de/jplag/cli/LanguageTest.java @@ -26,7 +26,7 @@ void testInvalidLanguage() { @Test void testLoading() { var languages = LanguageLoader.getAllAvailableLanguages(); - assertEquals(16, languages.size(), "Loaded Languages: " + languages.keySet()); + assertEquals(17, languages.size(), "Loaded Languages: " + languages.keySet()); } @Test diff --git a/coverage-report/pom.xml b/coverage-report/pom.xml index b385967b6..34f0a7fb7 100644 --- a/coverage-report/pom.xml +++ b/coverage-report/pom.xml @@ -121,6 +121,11 @@ emf-model ${revision} + + de.jplag + llvmir + ${revision} + diff --git a/languages/llvmir/README.md b/languages/llvmir/README.md new file mode 100644 index 000000000..6d5af395d --- /dev/null +++ b/languages/llvmir/README.md @@ -0,0 +1,28 @@ +# JPlag LLVM IR language module + +The JPlag LLVM IR module allows the use of JPlag with submissions in the LLVM IR.
+It is based on the [LLVMIR ANTLR4 grammar](https://github.com/antlr/grammars-v4/tree/master/llvm-ir), licensed under MIT. + +### LLVM IR specification compatibility + +The grammar definition targets LLVM 15, released in September 2022. + +The grammar in this repo contains a fix, see the comment in the [LLVM IR grammar](src/main/antlr4/de/jplag/llvmir/grammar/LLVMIR.g4). + +If the grammar is updated to a more recent1 syntax definition, this module should surely be updated as well. + + +### Token Extraction + +The choice of tokens includes nesting tokens for functions and basic blocks and separate tokens for various elements. +These include binary and bitwise instructions (like addition and or), memory operations (like load and store), terminator instructions (like branches), conversions, global variables, type definitions, constants and others. + + +### Usage + +To use the LLVM IR module, add the `-l llvmir` flag in the CLI, or use a `JPlagOption` object with `new de.jplag.llvmir.LLVMIRLanguage()` as `language` in the Java API as described in the usage information in the [readme of the main project](https://github.com/jplag/JPlag#usage) and [in the wiki](https://github.com/jplag/JPlag/wiki/1.-How-to-Use-JPlag). + +
+ +#### Footnotes +
1 The grammar files are taken from grammar-v4, with the most recent modification in commit 768b12e from August 2023.
\ No newline at end of file diff --git a/languages/llvmir/pom.xml b/languages/llvmir/pom.xml new file mode 100644 index 000000000..c5306fb4c --- /dev/null +++ b/languages/llvmir/pom.xml @@ -0,0 +1,39 @@ + + + 4.0.0 + + + de.jplag + languages + ${revision} + + llvmir + + + + org.antlr + antlr4-runtime + + + de.jplag + language-antlr-utils + ${revision} + + + + + + + org.antlr + antlr4-maven-plugin + + + + antlr4 + + + + + + + diff --git a/languages/llvmir/src/main/antlr4/de/jplag/llvmir/grammar/LLVMIR.g4 b/languages/llvmir/src/main/antlr4/de/jplag/llvmir/grammar/LLVMIR.g4 new file mode 100644 index 000000000..ffb1eab85 --- /dev/null +++ b/languages/llvmir/src/main/antlr4/de/jplag/llvmir/grammar/LLVMIR.g4 @@ -0,0 +1,1445 @@ +/* + MIT License + + Copyright (c) 2023 邱维东 + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + + Modifications: + Rename 'case' rule to 'case_' to match the generated case_() method in the LLVMIRParser + with the JavaDoc of the enterCase() and exitCase() methods of the LLVMIRListener + to fix a JavaDoc issue. + - Niklas Heneka + */ + +grammar LLVMIR; + +compilationUnit: topLevelEntity* EOF; + +targetDef: targetDataLayout | targetTriple; +sourceFilename: 'source_filename' '=' StringLit; +targetDataLayout: 'target' 'datalayout' '=' StringLit; +targetTriple: 'target' 'triple' '=' StringLit; + +topLevelEntity: + sourceFilename + | targetDef + | moduleAsm + | typeDef + | comdatDef + | globalDecl + | globalDef + | indirectSymbolDef + | funcDecl + | funcDef + | attrGroupDef + | namedMetadataDef + | metadataDef + | useListOrder + | useListOrderBB; +moduleAsm: 'module' 'asm' StringLit; +typeDef: LocalIdent '=' 'type' type; +comdatDef: + ComdatName '=' 'comdat' selectionKind = ( + 'any' + | 'exactmatch' + | 'largest' + | 'nodeduplicate' + | 'samesize' + ); +globalDecl: + GlobalIdent '=' externalLinkage preemption? visibility? dllStorageClass? threadLocal? + unnamedAddr? addrSpace? externallyInitialized? immutable type ( + ',' globalField + )* (',' metadataAttachment)* funcAttribute*; +globalDef: + GlobalIdent '=' internalLinkage? preemption? visibility? dllStorageClass? threadLocal? + unnamedAddr? addrSpace? externallyInitialized? immutable type constant ( + ',' globalField + )* (',' metadataAttachment)* funcAttribute*; + +indirectSymbolDef: + GlobalIdent '=' linkage? preemption? visibility? dllStorageClass? threadLocal? unnamedAddr? + indirectSymbolKind = ('alias' | 'ifunc') type ',' indirectSymbol ( + ',' partition + )*; + +funcDecl: 'declare' metadataAttachment* funcHeader; +funcDef: 'define' funcHeader metadataAttachment* funcBody; +attrGroupDef: + 'attributes' AttrGroupId '=' '{' funcAttribute* '}'; +namedMetadataDef: + MetadataName '=' '!' '{' (metadataNode (',' metadataNode)*)? '}'; +metadataDef: + MetadataId '=' distinct? (mdTuple | specializedMDNode); +useListOrder: + 'uselistorder' typeValue ',' '{' IntLit (',' IntLit)* '}'; +useListOrderBB: + 'uselistorder_bb' GlobalIdent ',' LocalIdent ',' '{' IntLit ( + ',' IntLit + )* '}'; + +funcHeader: + linkage? preemption? visibility? dllStorageClass? callingConv? returnAttribute* type GlobalIdent + '(' params ')' unnamedAddr? addrSpace? funcHdrField*; +indirectSymbol: + typeConst + | bitCastExpr + | getElementPtrExpr + | addrSpaceCastExpr + | intToPtrExpr; +callingConv: callingConvEnum | callingConvInt; +callingConvInt: 'cc' IntLit; +funcHdrField: + funcAttribute + | section + | partition + | comdat + | align + | gc + | prefix + | prologue + | personality; +gc: 'gc' StringLit; +prefix: 'prefix' typeConst; +prologue: 'prologue' typeConst; +personality: 'personality' typeConst; +returnAttribute: + returnAttr + | dereferenceable + | align; +funcBody: '{' basicBlock+ useListOrder* '}'; +basicBlock: LabelIdent? instruction* terminator; +instruction: // Instructions producing values. + localDefInst + | valueInstruction + // Instructions not producing values. + | storeInst + | fenceInst; +terminator: + // Terminators producing values. + localDefTerm + | valueTerminator + // Terminators not producing values. + | retTerm + | brTerm + | condBrTerm + | switchTerm + | indirectBrTerm + | resumeTerm + | catchRetTerm + | cleanupRetTerm + | unreachableTerm; +localDefTerm: LocalIdent '=' valueTerminator; +valueTerminator: invokeTerm | callBrTerm | catchSwitchTerm; +retTerm: + 'ret' 'void' (',' metadataAttachment)* + // Value return. + | 'ret' concreteType value (',' metadataAttachment)*; +brTerm: 'br' label (',' metadataAttachment)*; +condBrTerm: + 'br' IntType value ',' label ',' label ( + ',' metadataAttachment + )*; +switchTerm: + 'switch' typeValue ',' label '[' case_* ']' ( + ',' metadataAttachment + )*; +indirectBrTerm: + 'indirectbr' typeValue ',' '[' (label (',' label)?)? ']' ( + ',' metadataAttachment + )*; +resumeTerm: 'resume' typeValue (',' metadataAttachment)*; +catchRetTerm: + 'catchret' 'from' value 'to' label (',' metadataAttachment)*; +cleanupRetTerm: + 'cleanupret' 'from' value 'unwind' unwindTarget ( + ',' metadataAttachment + )*; +unreachableTerm: 'unreachable' (',' metadataAttachment)*; +invokeTerm: + 'invoke' callingConv? returnAttribute* addrSpace? type value '(' args ')' funcAttribute* ( + '[' (operandBundle ',')+ ']' + )? 'to' label 'unwind' label (',' metadataAttachment)*; +callBrTerm: + 'callbr' callingConv? returnAttribute* addrSpace? type value '(' args ')' funcAttribute* ( + '[' (operandBundle ',')+ ']' + )? 'to' label '[' (label (',' label)*)? ']' ( + ',' metadataAttachment + )*; +catchSwitchTerm: + 'catchswitch' 'within' exceptionPad '[' handlers ']' 'unwind' unwindTarget ( + ',' metadataAttachment + )*; +label: 'label' LocalIdent; +case_: typeConst ',' label; +unwindTarget: 'to' 'caller' | label; +handlers: label (',' label)*; +metadataNode: + MetadataId + // Parse DIExpressions inline as a special case. They are still MDNodes, so they can still + // appear in named metadata. Remove this logic if they become plain Metadata. + | diExpression; +diExpression: + '!DIExpression' '(' ( + diExpressionField (',' diExpressionField)* + )? ')'; +diExpressionField: IntLit | DwarfAttEncoding | DwarfOp; + +globalField: + section + | partition + | comdat + | align + | sanitizerKind = ( + 'no_sanitize_address' + | 'no_sanitize_hwaddress' + | 'sanitize_address_dyninit' + | 'sanitize_memtag' + ); +section: 'section' StringLit; +comdat: 'comdat' ('(' ComdatName ')')?; +partition: 'partition' StringLit; + +constant: + boolConst + | intConst + | floatConst + | nullConst + | noneConst + | structConst + | arrayConst + | vectorConst + | zeroInitializerConst + // @42 @foo + | GlobalIdent + | undefConst + | poisonConst + | blockAddressConst + | dsoLocalEquivalentConst + | noCFIConst + | constantExpr; +boolConst: 'true' | 'false'; +intConst: IntLit; +floatConst: FloatLit; +nullConst: 'null'; +noneConst: 'none'; +structConst: + '{' (typeConst (',' typeConst)*)? '}' + | '<' '{' ( typeConst (',' typeConst)*)? '}' '>'; +arrayConst: + 'c' StringLit + | '[' (typeConst (',' typeConst)*)? ']'; +vectorConst: '<' (typeConst (',' typeConst)*)? '>'; +zeroInitializerConst: 'zeroinitializer'; +undefConst: 'undef'; +poisonConst: 'poison'; +blockAddressConst: + 'blockaddress' '(' GlobalIdent ',' LocalIdent ')'; +dsoLocalEquivalentConst: 'dso_local_equivalent' GlobalIdent; +noCFIConst: 'no_cfi' GlobalIdent; +constantExpr: + // Unary expressions + fNegExpr + // Binary expressions + | addExpr + | subExpr + | mulExpr + // Bitwise expressions + | shlExpr + | lShrExpr + | aShrExpr + | andExpr + | orExpr + | xorExpr + // Vector expressions + | extractElementExpr + | insertElementExpr + | shuffleVectorExpr + // Memory expressions + | getElementPtrExpr + // Conversion expressions + | truncExpr + | zExtExpr + | sExtExpr + | fpTruncExpr + | fpExtExpr + | fpToUiExpr + | fpToSiExpr + | uiToFpExpr + | siToFpExpr + | ptrToIntExpr + | intToPtrExpr + | bitCastExpr + | addrSpaceCastExpr + // Other expressions + | iCmpExpr + | fCmpExpr + | selectExpr; +typeConst: firstClassType constant; + +metadataAttachment: MetadataName mdNode; +mdNode: + mdTuple + // !42 + | MetadataId + //!{ ... } + | specializedMDNode; +mdTuple: '!' '{' (mdField (',' mdField)*)? '}'; +// metadataID: MetadataId; +metadata: + typeValue + | mdString + // !{ ... } + | mdTuple + // !7 + | MetadataId + | diArgList + | specializedMDNode; +diArgList: '!DIArgList' '(' (typeValue (',' typeValue)*)? ')'; +typeValue: firstClassType value; +value: + constant + // %42 %foo + | LocalIdent + // TODO: Move InlineAsm from Value to Callee and Invokee? Inline assembler expressions may only + // be used as the callee operand of a call or an invoke instruction. + | inlineAsm; +inlineAsm: + 'asm' sideEffect = 'sideeffect'? alignStackTok = 'alignstack'? intelDialect = 'inteldialect'? + unwind = 'unwind'? StringLit ',' StringLit; +mdString: '!' StringLit; +mdFieldOrInt: IntLit | mdField; +diSPFlag: IntLit | DispFlag; +funcAttribute: + attrString + | attrPair + // not used in attribute groups. + | AttrGroupId + // used in functions. | align # NOTE: removed to resolve reduce/reduce conflict, see above. used + // in attribute groups. + | alignPair + | alignStack + | alignStackPair + | allocKind + | allocSize + | funcAttr + | preallocated + | unwindTable + | vectorScaleRange; +type: + 'void' + | 'opaque' + | type '(' params ')' + | intType + | floatType + | type addrSpace? '*' + | opaquePointerType + | vectorType + | labelType + | arrayType + | structType + | namedType + | mmxType + | tokenType + | metadataType; +params: + ellipsis = '...'? + | param (',' param)* (',' ellipsis = '...')?; +param: type paramAttribute* LocalIdent?; +paramAttribute: + attrString + | attrPair + | align + | alignStack + | byRefAttr + | byval + | dereferenceable + | elementType + | inAlloca + | paramAttr + | preallocated + | structRetAttr; +attrString: StringLit; +attrPair: StringLit '=' StringLit; +align: 'align' IntLit | 'align' '(' IntLit ')'; +alignPair: 'align' '=' IntLit; +alignStack: 'alignstack' '(' IntLit ')'; +alignStackPair: 'alignstack' '=' IntLit; +allocKind: 'allockind' '(' StringLit ')'; +allocSize: 'allocsize' '(' IntLit (',' IntLit)? ')'; +unwindTable: + 'uwtable' + | 'uwtable' '(' unwindTableKind = ('async' | 'sync') ')'; +vectorScaleRange: + 'vscale_range' ('(' (IntLit | IntLit ',' IntLit) ')')?; +byRefAttr: 'byref' '(' type ')'; +byval: 'byval' ( '(' type ')')?; +dereferenceable: + 'dereferenceable' '(' IntLit ')' + | 'dereferenceable_or_null' '(' IntLit ')'; +elementType: 'elementtype' '(' type ')'; +inAlloca: 'inalloca' '(' type ')'; +paramAttr: + 'allocalign' + | 'allocptr' + | 'immarg' + | 'inreg' + | 'nest' + | 'noalias' + | 'nocapture' + | 'nofree' + | 'nonnull' + | 'noundef' + | 'readnone' + | 'readonly' + | 'returned' + | 'signext' + | 'swiftasync' + | 'swifterror' + | 'swiftself' + | 'writeonly' + | 'zeroext'; +preallocated: 'preallocated' '(' type ')'; +structRetAttr: 'sret' '(' type ')'; + +// funcType: type '(' params ')'; +firstClassType: concreteType | metadataType; +concreteType: + intType + | floatType + | pointerType + | vectorType + | labelType + | arrayType + | structType + | namedType + | mmxType + | tokenType; + +intType: IntType; +floatType: floatKind; +pointerType: type addrSpace? '*' | opaquePointerType; +vectorType: + '<' IntLit 'x' type '>' + | '<' 'vscale' 'x' IntLit 'x' type '>'; +labelType: 'label'; +arrayType: '[' IntLit 'x' type ']'; +structType: + '{' (type (',' type)*)? '}' + | '<' '{' (type (',' type)*)? '}' '>'; +namedType: LocalIdent; +mmxType: 'x86_mmx'; +tokenType: 'token'; + +opaquePointerType: 'ptr' addrSpace?; +addrSpace: 'addrspace' '(' IntLit ')'; +threadLocal: 'thread_local' ('(' tlsModel ')')?; +metadataType: 'metadata'; + +// expr +bitCastExpr: 'bitcast' '(' typeConst 'to' type ')'; +getElementPtrExpr: + 'getelementptr' inBounds? '(' type ',' typeConst ( + ',' gepIndex + )* ')'; +gepIndex: inRange = 'inrange'? typeConst; +addrSpaceCastExpr: 'addrspacecast' '(' typeConst 'to' type ')'; +intToPtrExpr: 'inttoptr' '(' typeConst 'to' type ')'; +iCmpExpr: 'icmp' iPred '(' typeConst ',' typeConst ')'; +fCmpExpr: 'fcmp' fPred '(' typeConst ',' typeConst ')'; +selectExpr: + 'select' '(' typeConst ',' typeConst ',' typeConst ')'; + +truncExpr: 'trunc' '(' typeConst 'to' type ')'; +zExtExpr: 'zext' '(' typeConst 'to' type ')'; +sExtExpr: 'sext' '(' typeConst 'to' type ')'; +fpTruncExpr: 'fptrunc' '(' typeConst 'to' type ')'; +fpExtExpr: 'fpext' '(' typeConst 'to' type ')'; +fpToUiExpr: 'fptoui' '(' typeConst 'to' type ')'; +fpToSiExpr: 'fptosi' '(' typeConst 'to' type ')'; +uiToFpExpr: 'uitofp' '(' typeConst 'to' type ')'; +siToFpExpr: 'sitofp' '(' typeConst 'to' type ')'; +ptrToIntExpr: 'ptrtoint' '(' typeConst 'to' type ')'; +extractElementExpr: + 'extractelement' '(' typeConst ',' typeConst ')'; +insertElementExpr: + 'insertelement' '(' typeConst ',' typeConst ',' typeConst ')'; +shuffleVectorExpr: + 'shufflevector' '(' typeConst ',' typeConst ',' typeConst ')'; +shlExpr: 'shl' overflowFlag* '(' typeConst ',' typeConst ')'; +lShrExpr: + 'lshr' exact = 'exact'? '(' typeConst ',' typeConst ')'; +aShrExpr: + 'ashr' exact = 'exact'? '(' typeConst ',' typeConst ')'; +andExpr: 'and' '(' typeConst ',' typeConst ')'; +orExpr: 'or' '(' typeConst ',' typeConst ')'; +xorExpr: 'xor' '(' typeConst ',' typeConst ')'; +addExpr: 'add' overflowFlag* '(' typeConst ',' typeConst ')'; +subExpr: 'sub' overflowFlag* '(' typeConst ',' typeConst ')'; +mulExpr: 'mul' overflowFlag* '(' typeConst ',' typeConst ')'; +fNegExpr: 'fneg' '(' typeConst ')'; + +// instructions +localDefInst: LocalIdent '=' valueInstruction; +valueInstruction: + // Unary instructions + fNegInst + // Binary instructions + | addInst + | fAddInst + | subInst + | fSubInst + | mulInst + | fMulInst + | uDivInst + | sDivInst + | fDivInst + | uRemInst + | sRemInst + | fRemInst + // Bitwise instructions + | shlInst + | lShrInst + | aShrInst + | andInst + | orInst + | xorInst + // Vector instructions + | extractElementInst + | insertElementInst + | shuffleVectorInst + // Aggregate instructions + | extractValueInst + | insertValueInst + // Memory instructions + | allocaInst + | loadInst + | cmpXchgInst + | atomicRMWInst + | getElementPtrInst + // Conversion instructions + | truncInst + | zExtInst + | sExtInst + | fpTruncInst + | fpExtInst + | fpToUiInst + | fpToSiInst + | uiToFpInst + | siToFpInst + | ptrToIntInst + | intToPtrInst + | bitCastInst + | addrSpaceCastInst + // Other instructions + | iCmpInst + | fCmpInst + | phiInst + | selectInst + | freezeInst + | callInst + | vaargInst + | landingPadInst + | catchPadInst + | cleanupPadInst; +storeInst: + // Store. + 'store' volatile = 'volatile'? typeValue ',' typeValue ( + ',' align + )? (',' metadataAttachment)* + // atomic='atomic' store. + | 'store' atomic = 'atomic' volatile = 'volatile'? typeValue ',' typeValue syncScope? + atomicOrdering (',' align)? (',' metadataAttachment)*; + +syncScope: 'syncscope' '(' StringLit ')'; + +fenceInst: + 'fence' syncScope? atomicOrdering (',' metadataAttachment)*; +fNegInst: + 'fneg' fastMathFlag* typeValue (',' metadataAttachment)*; +addInst: + 'add' overflowFlag* typeValue ',' value ( + ',' metadataAttachment + )*; +fAddInst: + 'fadd' fastMathFlag* typeValue ',' value ( + ',' metadataAttachment + )*; +subInst: + 'sub' overflowFlag* typeValue ',' value ( + ',' metadataAttachment + )*; +fSubInst: + 'fsub' fastMathFlag* typeValue ',' value ( + ',' metadataAttachment + )*; +mulInst: + 'mul' overflowFlag* typeValue ',' value ( + ',' metadataAttachment + )*; +fMulInst: + 'fmul' fastMathFlag* typeValue ',' value ( + ',' metadataAttachment + )*; +uDivInst: + 'udiv' exact = 'exact'? typeValue ',' value ( + ',' metadataAttachment + )*; +sDivInst: + 'sdiv' exact = 'exact'? typeValue ',' value ( + ',' metadataAttachment + )*; +fDivInst: + 'fdiv' fastMathFlag* typeValue ',' value ( + ',' metadataAttachment + )*; +uRemInst: 'urem' typeValue ',' value ( ',' metadataAttachment)*; +sRemInst: 'srem' typeValue ',' value ( ',' metadataAttachment)*; +fRemInst: + 'frem' fastMathFlag* typeValue ',' value ( + ',' metadataAttachment + )*; +shlInst: + 'shl' overflowFlag* typeValue ',' value ( + ',' metadataAttachment + )*; +lShrInst: + 'lshr' exact = 'exact'? typeValue ',' value ( + ',' metadataAttachment + )*; +aShrInst: + 'ashr' exact = 'exact'? typeValue ',' value ( + ',' metadataAttachment + )*; +andInst: 'and' typeValue ',' value ( ',' metadataAttachment)*; +orInst: 'or' typeValue ',' value ( ',' metadataAttachment)*; +xorInst: 'xor' typeValue ',' value ( ',' metadataAttachment)*; +extractElementInst: + 'extractelement' typeValue ',' typeValue ( + ',' metadataAttachment + )*; +insertElementInst: + 'insertelement' typeValue ',' typeValue ',' typeValue ( + ',' metadataAttachment + )*; +shuffleVectorInst: + 'shufflevector' typeValue ',' typeValue ',' typeValue ( + ',' metadataAttachment + )*; +extractValueInst: + 'extractvalue' typeValue (',' IntLit)+ ( + ',' metadataAttachment + )*; +insertValueInst: + 'insertvalue' typeValue ',' typeValue (',' IntLit)+ ( + ',' metadataAttachment + )*; +allocaInst: + 'alloca' inAllocaTok = 'inalloca'? swiftError = 'swifterror'? type ( + ',' typeValue + )? (',' align)? (',' addrSpace)? (',' metadataAttachment)*; +loadInst: + // Load. + 'load' volatile = 'volatile'? type ',' typeValue (',' align)? ( + ',' metadataAttachment + )* + // atomic='atomic' load. + | 'load' atomic = 'atomic' volatile = 'volatile'? type ',' typeValue syncScope? atomicOrdering ( + ',' align + )? (',' metadataAttachment)*; +cmpXchgInst: + 'cmpxchg' weak = 'weak'? volatile = 'volatile'? typeValue ',' typeValue ',' typeValue syncScope? + atomicOrdering atomicOrdering (',' align)? ( + ',' metadataAttachment + )*; +atomicRMWInst: + 'atomicrmw' volatile = 'volatile'? atomicOp typeValue ',' typeValue syncScope? atomicOrdering ( + ',' align + )? (',' metadataAttachment)*; +getElementPtrInst: + 'getelementptr' inBounds? type ',' typeValue (',' typeValue)* ( + ',' metadataAttachment + )*; +truncInst: + 'trunc' typeValue 'to' type (',' metadataAttachment)*; +zExtInst: 'zext' typeValue 'to' type ( ',' metadataAttachment)*; +sExtInst: 'sext' typeValue 'to' type ( ',' metadataAttachment)*; +fpTruncInst: + 'fptrunc' typeValue 'to' type (',' metadataAttachment)*; +fpExtInst: + 'fpext' typeValue 'to' type (',' metadataAttachment)*; +fpToUiInst: + 'fptoui' typeValue 'to' type (',' metadataAttachment)*; +fpToSiInst: + 'fptosi' typeValue 'to' type (',' metadataAttachment)*; +uiToFpInst: + 'uitofp' typeValue 'to' type (',' metadataAttachment)*; +siToFpInst: + 'sitofp' typeValue 'to' type (',' metadataAttachment)*; +ptrToIntInst: + 'ptrtoint' typeValue 'to' type (',' metadataAttachment)*; +intToPtrInst: + 'inttoptr' typeValue 'to' type (',' metadataAttachment)*; +bitCastInst: + 'bitcast' typeValue 'to' type (',' metadataAttachment)*; +addrSpaceCastInst: + 'addrspacecast' typeValue 'to' type (',' metadataAttachment)*; +iCmpInst: + 'icmp' iPred typeValue ',' value (',' metadataAttachment)*; +fCmpInst: + 'fcmp' fastMathFlag* fPred typeValue ',' value ( + ',' metadataAttachment + )*; +phiInst: + 'phi' fastMathFlag* type (inc (',' inc)*) ( + ',' metadataAttachment + )*; +selectInst: + 'select' fastMathFlag* typeValue ',' typeValue ',' typeValue ( + ',' metadataAttachment + )*; +freezeInst: 'freeze' typeValue; +callInst: + tail = ('musttail' | 'notail' | 'tail')? 'call' fastMathFlag* callingConv? returnAttribute* + addrSpace? type value '(' args ')' funcAttribute* ( + '[' operandBundle (',' operandBundle)* ']' + )? (',' metadataAttachment)*; +vaargInst: + 'va_arg' typeValue ',' type (',' metadataAttachment)*; +landingPadInst: + 'landingpad' type cleanUp = 'cleanup'? clause* ( + ',' metadataAttachment + )*; +catchPadInst: + 'catchpad' 'within' LocalIdent '[' ( + exceptionArg (',' exceptionArg)* + )? ']' (',' metadataAttachment)*; +cleanupPadInst: + 'cleanuppad' 'within' exceptionPad '[' ( + exceptionArg (',' exceptionArg)* + )? ']' (',' metadataAttachment)*; + +inc: '[' value ',' LocalIdent ']'; + +operandBundle: StringLit '(' (typeValue (',' typeValue)*)? ')'; +clause: clauseType = ('catch' | 'filter') typeValue; + +args: + ellipsis = '...'? + | arg (',' arg)* (',' ellipsis = '...')?; +arg: concreteType paramAttribute* value | metadataType metadata; + +exceptionArg: concreteType value | metadataType metadata; +exceptionPad: noneConst | LocalIdent; + +externalLinkage: 'extern_weak' | 'external'; +internalLinkage: + 'appending' + | 'available_externally' + | 'common' + | 'internal' + | 'linkonce' + | 'linkonce_odr' + | 'private' + | 'weak' + | 'weak_odr'; +linkage: internalLinkage | externalLinkage; +preemption: 'dso_local' | 'dso_preemptable'; +visibility: 'default' | 'hidden' | 'protected'; +dllStorageClass: 'dllexport' | 'dllimport'; +tlsModel: 'initialexec' | 'localdynamic' | 'localexec'; +unnamedAddr: 'local_unnamed_addr' | 'unnamed_addr'; +externallyInitialized: 'externally_initialized'; +immutable: 'constant' | 'global'; +funcAttr: + 'alwaysinline' + | 'argmemonly' + | 'builtin' + | 'cold' + | 'convergent' + | 'disable_sanitizer_instrumentation' + | 'fn_ret_thunk_extern' + | 'hot' + | 'inaccessiblemem_or_argmemonly' + | 'inaccessiblememonly' + | 'inlinehint' + | 'jumptable' + | 'minsize' + | 'mustprogress' + | 'naked' + | 'nobuiltin' + | 'nocallback' + | 'nocf_check' + | 'noduplicate' + | 'nofree' + | 'noimplicitfloat' + | 'noinline' + | 'nomerge' + | 'nonlazybind' + | 'noprofile' + | 'norecurse' + | 'noredzone' + | 'noreturn' + | 'nosanitize_bounds' + | 'nosanitize_coverage' + | 'nosync' + | 'nounwind' + | 'null_pointer_is_valid' + | 'optforfuzzing' + | 'optnone' + | 'optsize' + | 'presplitcoroutine' + | 'readnone' + | 'readonly' + | 'returns_twice' + | 'safestack' + | 'sanitize_address' + | 'sanitize_hwaddress' + | 'sanitize_memory' + | 'sanitize_memtag' + | 'sanitize_thread' + | 'shadowcallstack' + | 'speculatable' + | 'speculative_load_hardening' + | 'ssp' + | 'sspreq' + | 'sspstrong' + | 'strictfp' + | 'willreturn' + | 'writeonly'; +distinct: 'distinct'; +inBounds: 'inbounds'; +returnAttr: + 'inreg' + | 'noalias' + | 'nonnull' + | 'noundef' + | 'signext' + | 'zeroext'; +overflowFlag: 'nsw' | 'nuw'; +iPred: + 'eq' + | 'ne' + | 'sge' + | 'sgt' + | 'sle' + | 'slt' + | 'uge' + | 'ugt' + | 'ule' + | 'ult'; +fPred: + 'false' + | 'oeq' + | 'oge' + | 'ogt' + | 'ole' + | 'olt' + | 'one' + | 'ord' + | 'true' + | 'ueq' + | 'uge' + | 'ugt' + | 'ule' + | 'ult' + | 'une' + | 'uno'; +atomicOrdering: + 'acq_rel' + | 'acquire' + | 'monotonic' + | 'release' + | 'seq_cst' + | 'unordered'; +callingConvEnum: + 'aarch64_sve_vector_pcs' + | 'aarch64_vector_pcs' + | 'amdgpu_cs' + | 'amdgpu_es' + | 'amdgpu_gfx' + | 'amdgpu_gs' + | 'amdgpu_hs' + | 'amdgpu_kernel' + | 'amdgpu_ls' + | 'amdgpu_ps' + | 'amdgpu_vs' + | 'anyregcc' + | 'arm_aapcs_vfpcc' + | 'arm_aapcscc' + | 'arm_apcscc' + | 'avr_intrcc' + | 'avr_signalcc' + | 'ccc' + | 'cfguard_checkcc' + | 'coldcc' + | 'cxx_fast_tlscc' + | 'fastcc' + | 'ghccc' + | 'hhvm_ccc' + | 'hhvmcc' + | 'intel_ocl_bicc' + | 'msp430_intrcc' + | 'preserve_allcc' + | 'preserve_mostcc' + | 'ptx_device' + | 'ptx_kernel' + | 'spir_func' + | 'spir_kernel' + | 'swiftcc' + | 'swifttailcc' + | 'tailcc' + | 'webkit_jscc' + | 'win64cc' + | 'x86_64_sysvcc' + | 'x86_fastcallcc' + | 'x86_intrcc' + | 'x86_regcallcc' + | 'x86_stdcallcc' + | 'x86_thiscallcc' + | 'x86_vectorcallcc'; + +fastMathFlag: + 'afn' + | 'arcp' + | 'contract' + | 'fast' + | 'ninf' + | 'nnan' + | 'nsz' + | 'reassoc'; +atomicOp: + 'add' + | 'and' + | 'fadd' + | 'fmax' + | 'fmin' + | 'fsub' + | 'max' + | 'min' + | 'nand' + | 'or' + | 'sub' + | 'umax' + | 'umin' + | 'xchg' + | 'xor'; +floatKind: + 'half' + | 'bfloat' + | 'float' + | 'double' + | 'x86_fp80' + | 'fp128' + | 'ppc_fp128'; +/*看不懂,直接抄过来的 */ +specializedMDNode: + diBasicType + | diCommonBlock // not in spec as of 2019-12-05 + | diCompileUnit + | diCompositeType + | diDerivedType + | diEnumerator + | diExpression + | diFile + | diGlobalVariable + | diGlobalVariableExpression + | diImportedEntity + | diLabel // not in spec as of 2018-10-14, still not in spec as of 2019-12-05 + | diLexicalBlock + | diLexicalBlockFile + | diLocalVariable + | diLocation + | diMacro + | diMacroFile + | diModule // not in spec as of 2018-02-21, still not in spec as of 2019-12-05 + | diNamespace + | diObjCProperty + | diStringType + | diSubprogram + | diSubrange + | diSubroutineType + | diTemplateTypeParameter + | diTemplateValueParameter + | genericDiNode; // not in spec as of 2018-02-21, still not in spec as of 2019-12-05 + +diBasicType: + '!DIBasicType' '(' (diBasicTypeField (',' diBasicTypeField)*)? ')'; +diCommonBlock: + '!DICommonBlock' '(' ( + diCommonBlockField (',' diCommonBlockField)* + )? ')'; +diCompileUnit: + '!DICompileUnit' '(' ( + diCompileUnitField (',' diCompileUnitField)* + )? ')'; +diCompositeType: + '!DICompositeType' '(' ( + diCompositeTypeField (',' diCompositeTypeField)* + )? ')'; +diCompositeTypeField: + tagField + | nameField + | scopeField + | fileField + | lineField + | baseTypeField + | sizeField + | alignField + | offsetField + | flagsField + | elementsField + | runtimeLangField + | vtableHolderField + | templateParamsField + | identifierField + | discriminatorField + | dataLocationField + | associatedField + | allocatedField + | rankField + | annotationsField; +diDerivedType: + '!DIDerivedType' '(' ( + diDerivedTypeField (',' diDerivedTypeField)* + )? ')'; +diDerivedTypeField: + tagField + | nameField + | scopeField + | fileField + | lineField + | baseTypeField + | sizeField + | alignField + | offsetField + | flagsField + | extraDataField + | dwarfAddressSpaceField + | annotationsField; +diEnumerator: + '!DIEnumerator' '(' ( + diEnumeratorField (',' diEnumeratorField)* + )? ')'; +diEnumeratorField: nameField | valueIntField | isUnsignedField; +diFile: '!DIFile' '(' (diFileField (',' diFileField)*)? ')'; +diFileField: + filenameField + | directoryField + | checksumkindField + | checksumField + | sourceField; +diGlobalVariable: + '!DIGlobalVariable' '(' ( + diGlobalVariableField (',' diGlobalVariableField)* + )? ')'; +diGlobalVariableField: + nameField + | scopeField + | linkageNameField + | fileField + | lineField + | typeField + | isLocalField + | isDefinitionField + | templateParamsField + | declarationField + | alignField + | annotationsField; +diGlobalVariableExpression: + '!DIGlobalVariableExpression' '(' ( + diGlobalVariableExpressionField ( + ',' diGlobalVariableExpressionField + )* + )? ')'; +diGlobalVariableExpressionField: varField | exprField; +diImportedEntity: + '!DIImportedEntity' '(' ( + diImportedEntityField (',' diImportedEntityField)* + )? ')'; +diImportedEntityField: + tagField + | scopeField + | entityField + | fileField + | lineField + | nameField + | elementsField; + +diLabel: '!DILabel' '(' (diLabelField (',' diLabelField)*)? ')'; +diLabelField: scopeField | nameField | fileField | lineField; +diLexicalBlock: + '!DILexicalBlock' '(' ( + diLexicalBlockField (',' diLexicalBlockField)* + )? ')'; +diLexicalBlockField: + scopeField + | fileField + | lineField + | columnField; +diLexicalBlockFile: + '!DILexicalBlockFile' '(' ( + diLexicalBlockFileField (',' diLexicalBlockFileField)* + )? ')'; +diLexicalBlockFileField: + scopeField + | fileField + | discriminatorIntField; +diLocalVariable: + '!DILocalVariable' '(' ( + diLocalVariableField (',' diLocalVariableField)* + )? ')'; +diLocalVariableField: + scopeField + | nameField + | argField + | fileField + | lineField + | typeField + | flagsField + | alignField + | annotationsField; +diLocation: + '!DILocation' '(' (diLocationField (',' diLocationField)*)? ')'; +diLocationField: + lineField + | columnField + | scopeField + | inlinedAtField + | isImplicitCodeField; +diMacro: '!DIMacro' '(' (diMacroField (',' diMacroField)*)? ')'; +diMacroField: + typeMacinfoField + | lineField + | nameField + | valueStringField; +diMacroFile: + '!DIMacroFile' '(' (diMacroFileField (',' diMacroFileField)*)? ')'; +diMacroFileField: + typeMacinfoField + | lineField + | fileField + | nodesField; +diModule: + '!DIModule' '(' (diModuleField (',' diModuleField)*)? ')'; +diModuleField: + scopeField + | nameField + | configMacrosField + | includePathField + | apiNotesField + | fileField + | lineField + | isDeclField; +diNamespace: + '!DINamespace' '(' (diNamespaceField (',' diNamespaceField)*)? ')'; +diNamespaceField: scopeField | nameField | exportSymbolsField; +diObjCProperty: + '!DIObjCProperty' '(' ( + diObjCPropertyField (',' diObjCPropertyField)* + )? ')'; +diObjCPropertyField: + nameField + | fileField + | lineField + | setterField + | getterField + | attributesField + | typeField; +diStringType: + '!DIStringType' '(' ( + diStringTypeField (',' diStringTypeField)* + )? ')'; +diStringTypeField: + tagField + | nameField + | stringLengthField + | stringLengthExpressionField + | stringLocationExpressionField + | sizeField + | alignField + | encodingField; +diSubprogram: + '!DISubprogram' '(' ( + diSubprogramField (',' diSubprogramField)* + )? ')'; +diSubprogramField: + scopeField + | nameField + | linkageNameField + | fileField + | lineField + | typeField + | isLocalField + | isDefinitionField + | scopeLineField + | containingTypeField + | virtualityField + | virtualIndexField + | thisAdjustmentField + | flagsField + | spFlagsField + | isOptimizedField + | unitField + | templateParamsField + | declarationField + | retainedNodesField + | thrownTypesField + | annotationsField + | targetFuncNameField; +diSubrange: + '!DISubrange' '(' (diSubrangeField (',' diSubrangeField)*)? ')'; +diSubrangeField: + countField + | lowerBoundField + | upperBoundField + | strideField; +diSubroutineType: + '!DISubroutineType' '(' ( + diSubroutineTypeField (',' diSubroutineTypeField)* + )? ')'; +diTemplateTypeParameter: + '!DITemplateTypeParameter' '(' ( + diTemplateTypeParameterField ( + ',' diTemplateTypeParameterField + )* + )? ')'; +diTemplateValueParameter: + '!DITemplateValueParameter' '(' ( + diTemplateValueParameterField ( + ',' diTemplateValueParameterField + ) + )? ')'; +genericDiNode: + '!GenericDINode' '(' ( + genericDINodeField (',' genericDINodeField)* + )? ')'; + +diTemplateTypeParameterField: + nameField + | typeField + | defaultedField; +diCompileUnitField: + languageField + | fileField + | producerField + | isOptimizedField + | flagsStringField + | runtimeVersionField + | splitDebugFilenameField + | emissionKindField + | enumsField + | retainedTypesField + | globalsField + | importsField + | macrosField + | dwoIdField + | splitDebugInliningField + | debugInfoForProfilingField + | nameTableKindField + | rangesBaseAddressField + | sysrootField + | sdkField; +diCommonBlockField: + scopeField + | declarationField + | nameField + | fileField + | lineField; +diBasicTypeField: + tagField + | nameField + | sizeField + | alignField + | encodingField + | flagsField; +genericDINodeField: tagField | headerField | operandsField; +tagField: 'tag:' DwarfTag; +headerField: 'header:' StringLit; +operandsField: 'operands:' '{' (mdField (',' mdField)*)? '}'; +diTemplateValueParameterField: + tagField + | nameField + | typeField + | defaultedField + | valueField; +nameField: 'name:' StringLit; +typeField: 'type:' mdField; +defaultedField: 'defaulted:' boolConst; +valueField: 'value:' mdField; +mdField: nullConst | metadata; +diSubroutineTypeField: flagsField | ccField | typesField; +flagsField: 'flags:' diFlags; +diFlags: DiFlag ('|' DiFlag)*; +ccField: 'cc:' DwarfCc | IntLit; +alignField: 'align:' IntLit; +allocatedField: 'allocated:' mdField; +annotationsField: 'annotations:' mdField; +argField: 'arg:' IntLit; +associatedField: 'associated:' mdField; +attributesField: 'attributes:' IntLit; +baseTypeField: 'baseType:' mdField; +checksumField: 'checksum:' StringLit; +checksumkindField: 'checksumkind:' ChecksumKind; +columnField: 'column:' IntLit; +configMacrosField: 'configMacros:' StringLit; +containingTypeField: 'containingType:' mdField; +countField: 'count:' mdFieldOrInt; +debugInfoForProfilingField: 'debugInfoForProfiling:' boolConst; +declarationField: 'declaration:' mdField; +directoryField: 'directory:' StringLit; +discriminatorField: 'discriminator:' mdField; +dataLocationField: 'dataLocation:' mdField; +discriminatorIntField: 'discriminator:' IntLit; +dwarfAddressSpaceField: 'dwarfAddressSpace:' IntLit; +dwoIdField: 'dwoId:' IntLit; +elementsField: 'elements:' mdField; +emissionKindField: + 'emissionKind:' emissionKind = ( + 'DebugDirectivesOnly' + | 'FullDebug' + | 'LineTablesOnly' + | 'NoDebug' + ); +encodingField: 'encoding:' (IntLit | DwarfAttEncoding); +entityField: 'entity:' mdField; +enumsField: 'enums:' mdField; +exportSymbolsField: 'exportSymbols:' boolConst; +exprField: 'expr:' mdField; +extraDataField: 'extraData:' mdField; +fileField: 'file:' mdField; +filenameField: 'filename:' StringLit; +flagsStringField: 'flags:' StringLit; +getterField: 'getter:' StringLit; +globalsField: 'globals:' mdField; +identifierField: 'identifier:' StringLit; +importsField: 'imports:' mdField; +includePathField: 'includePath:' StringLit; +inlinedAtField: 'inlinedAt:' mdField; +isDeclField: 'isDecl:' boolConst; +isDefinitionField: 'isDefinition:' boolConst; +isImplicitCodeField: 'isImplicitCode:' boolConst; +isLocalField: 'isLocal:' boolConst; +isOptimizedField: 'isOptimized:' boolConst; +isUnsignedField: 'isUnsigned:' boolConst; +apiNotesField: 'apinotes:' StringLit; +languageField: 'language:' DwarfLang; +lineField: 'line:' IntLit; +linkageNameField: 'linkageName:' StringLit; +lowerBoundField: 'lowerBound:' mdFieldOrInt; +macrosField: 'macros:' mdField; +nameTableKindField: + 'nameTableKind:' nameTableKind = ('GNU' | 'None' | 'Default'); +nodesField: 'nodes:' mdField; +offsetField: + // TODO: rename OffsetField= attribute to Offset= when inspirer/textmapper#13 is resolved + 'offset:' IntLit; +producerField: 'producer:' StringLit; +rangesBaseAddressField: 'rangesBaseAddress:' boolConst; +rankField: 'rank:' mdFieldOrInt; +retainedNodesField: 'retainedNodes:' mdField; +retainedTypesField: 'retainedTypes:' mdField; +runtimeLangField: 'runtimeLang:' DwarfLang; +runtimeVersionField: 'runtimeVersion:' IntLit; +scopeField: 'scope:' mdField; +scopeLineField: 'scopeLine:' IntLit; +sdkField: 'sdk:' StringLit; +setterField: 'setter:' StringLit; +sizeField: 'size:' IntLit; +sourceField: 'source:' StringLit; +spFlagsField: 'spFlags:' (diSPFlag ('|' diSPFlag)*); +splitDebugFilenameField: 'splitDebugFilename:' StringLit; +splitDebugInliningField: 'splitDebugInlining:' boolConst; +strideField: 'stride:' mdFieldOrInt; +stringLengthField: 'stringLength:' mdField; +stringLengthExpressionField: 'stringLengthExpression:' mdField; +stringLocationExpressionField: + 'stringLocationExpression:' mdField; +sysrootField: 'sysroot:' StringLit; +targetFuncNameField: 'targetFuncName:' StringLit; +templateParamsField: 'templateParams:' mdField; +thisAdjustmentField: 'thisAdjustment:' IntLit; +thrownTypesField: 'thrownTypes:' mdField; +typeMacinfoField: 'type:' DwarfMacinfo; +typesField: 'types:' mdField; +unitField: 'unit:' mdField; +upperBoundField: 'upperBound:' mdFieldOrInt; +valueIntField: 'value:' IntLit; +valueStringField: 'value:' StringLit; +varField: 'var:' mdField; +virtualIndexField: 'virtualIndex:' IntLit; +virtualityField: 'virtuality:' DwarfVirtuality; +vtableHolderField: 'vtableHolder:' mdField; + +fragment AsciiLetter: [A-Za-z]; +fragment Letter: AsciiLetter | [-$._]; +fragment EscapeLetter: Letter | '\\'; +fragment DecimalDigit: [0-9]; +fragment HexDigit: [A-Fa-f] | DecimalDigit; +fragment Decimals: DecimalDigit+; +fragment Name: Letter (Letter | DecimalDigit)*; +fragment EscapeName: + EscapeLetter (EscapeLetter | DecimalDigit)*; +fragment Id: Decimals; +fragment IntHexLit: [us] '0x' HexDigit+; +// 浮点型常量 +fragment Sign: [+-]; +fragment FracLit: Sign? Decimals '.' DecimalDigit*; +fragment SciLit: FracLit [eE] Sign? Decimals; +/* + HexFPConstant 0x{_hex_digit}+ // 16 hex digits + HexFP80Constant 0xK{_hex_digit}+ // 20 hex digits + HexFP128Constant 0xL{_hex_digit}+ // 32 hex digits + HexPPC128Constant 0xM{_hex_digit}+ // 32 hex + digits + HexHalfConstant 0xH{_hex_digit}+ // 4 hex digits + HexBFloatConstant 0xR{_hex_digit}+ // 4 + hex digits + */ +fragment FloatHexLit: '0x' [KLMHR]? HexDigit+; +fragment GlobalName: '@' (Name | QuotedString); +fragment GlobalId: '@' Id; +fragment LocalName: '%' (Name | QuotedString); +fragment LocalId: '%' Id; +fragment QuotedString: '"' (~["\r\n])* '"'; +Comment: ';' .*? '\r'? '\n' -> channel(HIDDEN); +WhiteSpace: [ \t\n\r]+ -> skip; +IntLit: '-'? DecimalDigit+ | IntHexLit; +FloatLit: FracLit | SciLit | FloatHexLit; +StringLit: QuotedString; +GlobalIdent: GlobalName | GlobalId; +LocalIdent: LocalName | LocalId; +LabelIdent: (Letter | DecimalDigit)+ ':' | QuotedString ':'; +AttrGroupId: '#' Id; +ComdatName: '$' (Name | QuotedString); +MetadataName: '!' EscapeName; +MetadataId: '!' Id; +IntType: 'i' DecimalDigit+; +DwarfTag: 'DW_TAG_' (AsciiLetter | DecimalDigit | '_')*; +DwarfAttEncoding: 'DW_ATE_' (AsciiLetter | DecimalDigit | '_')*; +DiFlag: 'DIFlag' (AsciiLetter | DecimalDigit | '_')*; +DispFlag: 'DISPFlag' (AsciiLetter | DecimalDigit | '_')*; +DwarfLang: 'DW_LANG_' (AsciiLetter | DecimalDigit | '_')*; +DwarfCc: 'DW_CC_' (AsciiLetter | DecimalDigit | '_')*; +ChecksumKind: 'CSK_' (AsciiLetter | DecimalDigit | '_')*; +DwarfVirtuality: + 'DW_VIRTUALITY_' (AsciiLetter | DecimalDigit | '_')*; +DwarfMacinfo: 'DW_MACINFO_' (AsciiLetter | DecimalDigit | '_')*; +DwarfOp: 'DW_OP_' (AsciiLetter | DecimalDigit | '_')*; diff --git a/languages/llvmir/src/main/java/de/jplag/llvmir/LLVMIRLanguage.java b/languages/llvmir/src/main/java/de/jplag/llvmir/LLVMIRLanguage.java new file mode 100644 index 000000000..8f0d65830 --- /dev/null +++ b/languages/llvmir/src/main/java/de/jplag/llvmir/LLVMIRLanguage.java @@ -0,0 +1,42 @@ +package de.jplag.llvmir; + +import org.kohsuke.MetaInfServices; + +import de.jplag.Language; +import de.jplag.antlr.AbstractAntlrLanguage; + +/** + * The entry point for the ANTLR parser based LLVM IR language module. + */ +@MetaInfServices(Language.class) +public class LLVMIRLanguage extends AbstractAntlrLanguage { + + private static final String NAME = "LLVMIR Parser"; + private static final String IDENTIFIER = "llvmir"; + private static final int DEFAULT_MIN_TOKEN_MATCH = 40; + private static final String[] FILE_EXTENSIONS = {".ll"}; + + public LLVMIRLanguage() { + super(new LLVMIRParserAdapter()); + } + + @Override + public String[] suffixes() { + return FILE_EXTENSIONS; + } + + @Override + public String getName() { + return NAME; + } + + @Override + public String getIdentifier() { + return IDENTIFIER; + } + + @Override + public int minimumTokenMatch() { + return DEFAULT_MIN_TOKEN_MATCH; + } +} diff --git a/languages/llvmir/src/main/java/de/jplag/llvmir/LLVMIRListener.java b/languages/llvmir/src/main/java/de/jplag/llvmir/LLVMIRListener.java new file mode 100644 index 000000000..ed0efc65f --- /dev/null +++ b/languages/llvmir/src/main/java/de/jplag/llvmir/LLVMIRListener.java @@ -0,0 +1,242 @@ +package de.jplag.llvmir; + +import static de.jplag.llvmir.LLVMIRTokenType.*; +import static de.jplag.llvmir.grammar.LLVMIRParser.AShrExprContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.AShrInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.AddExprContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.AddInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.AddrSpaceCastExprContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.AddrSpaceCastInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.AllocaInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.AndExprContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.AndInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.ArrayConstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.AtomicOrderingContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.AtomicRMWInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.BasicBlockContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.BitCastExprContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.BitCastInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.BrTermContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.CallBrTermContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.CallInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.Case_Context; +import static de.jplag.llvmir.grammar.LLVMIRParser.CatchPadInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.CatchRetTermContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.CatchSwitchTermContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.ClauseContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.CleanupPadInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.CleanupRetTermContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.CmpXchgInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.CondBrTermContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.ExtractElementExprContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.ExtractElementInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.ExtractValueInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.FAddInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.FCmpExprContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.FCmpInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.FDivInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.FMulInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.FRemInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.FSubInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.FenceInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.FpExtExprContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.FpExtInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.FpToSiExprContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.FpToSiInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.FpToUiExprContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.FpToUiInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.FpTruncExprContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.FpTruncInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.FuncBodyContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.FuncDeclContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.FuncDefContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.GetElementPtrExprContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.GetElementPtrInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.GlobalDeclContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.GlobalDefContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.ICmpExprContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.ICmpInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.IndirectBrTermContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.InlineAsmContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.InsertElementExprContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.InsertElementInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.InsertValueInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.IntToPtrExprContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.IntToPtrInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.InvokeTermContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.LShrExprContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.LShrInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.LandingPadInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.LoadInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.ModuleAsmContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.MulExprContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.MulInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.OrExprContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.OrInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.PhiInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.PtrToIntExprContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.PtrToIntInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.ResumeTermContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.RetTermContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.SDivInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.SExtExprContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.SExtInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.SRemInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.SelectExprContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.SelectInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.ShlExprContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.ShlInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.ShuffleVectorExprContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.ShuffleVectorInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.SiToFpExprContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.SiToFpInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.SourceFilenameContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.StoreInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.StructConstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.SubExprContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.SubInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.SwitchTermContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.TruncExprContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.TruncInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.TypeDefContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.UDivInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.URemInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.UiToFpExprContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.UiToFpInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.VaargInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.VectorConstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.XorExprContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.XorInstContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.ZExtExprContext; +import static de.jplag.llvmir.grammar.LLVMIRParser.ZExtInstContext; + +import java.io.File; + +import de.jplag.antlr.AbstractAntlrListener; +import de.jplag.antlr.TokenCollector; + +/** + * Extracts tokens from the ANTLR parse tree. The token abstraction includes nesting tokens for functions and basic + * blocks and separate tokens for different elements. These include binary and bitwise instructions, memory operations, + * terminator instructions, conversions, global variables, type definitions, constants, and others. + */ +public class LLVMIRListener extends AbstractAntlrListener { + + /** + * New instance + * @param collector The token collector the token will be added to + * @param currentFile The currently processed file + */ + public LLVMIRListener(TokenCollector collector, File currentFile) { + super(collector, currentFile); + + this.mapEnter(SourceFilenameContext.class, FILENAME); + this.mapRange(ModuleAsmContext.class, ASSEMBLY); + this.mapEnter(TypeDefContext.class, TYPE_DEFINITION); + this.mapRange(GlobalDeclContext.class, GLOBAL_VARIABLE); + this.mapRange(GlobalDefContext.class, GLOBAL_VARIABLE); + this.mapRange(FuncDeclContext.class, FUNCTION_DECLARATION); + this.mapRange(FuncDefContext.class, FUNCTION_DEFINITION); + this.mapEnterExit(FuncBodyContext.class, FUNCTION_BODY_BEGIN, FUNCTION_BODY_END); + this.mapEnterExit(BasicBlockContext.class, BASIC_BLOCK_BEGIN, BASIC_BLOCK_END); + this.mapRange(RetTermContext.class, RETURN); + this.mapRange(BrTermContext.class, BRANCH); + this.mapRange(CondBrTermContext.class, CONDITIONAL_BRANCH); + this.mapRange(SwitchTermContext.class, SWITCH); + this.mapRange(IndirectBrTermContext.class, BRANCH); + this.mapRange(ResumeTermContext.class, RESUME); + this.mapRange(CatchRetTermContext.class, CATCH_RETURN); + this.mapRange(CleanupRetTermContext.class, CLEAN_UP_RETURN); + this.mapRange(InvokeTermContext.class, INVOKE); + this.mapRange(CallBrTermContext.class, CALL_BRANCH); + this.mapRange(CatchSwitchTermContext.class, CATCH_SWITCH); + this.mapRange(Case_Context.class, CASE); + this.mapRange(StructConstContext.class, STRUCTURE); + this.mapRange(ArrayConstContext.class, ARRAY); + this.mapRange(VectorConstContext.class, VECTOR); + this.mapRange(InlineAsmContext.class, ASSEMBLY); + this.mapRange(BitCastExprContext.class, BITCAST); + this.mapRange(GetElementPtrExprContext.class, GET_ELEMENT_POINTER); + this.mapEnter(AddrSpaceCastExprContext.class, CONVERSION); + this.mapEnter(IntToPtrExprContext.class, CONVERSION); + this.mapRange(ICmpExprContext.class, COMPARISON); + this.mapRange(FCmpExprContext.class, COMPARISON); + this.mapRange(SelectExprContext.class, SELECT); + this.mapEnter(TruncExprContext.class, CONVERSION); + this.mapEnter(ZExtExprContext.class, CONVERSION); + this.mapEnter(SExtExprContext.class, CONVERSION); + this.mapEnter(FpTruncExprContext.class, CONVERSION); + this.mapEnter(FpExtExprContext.class, CONVERSION); + this.mapEnter(FpToUiExprContext.class, CONVERSION); + this.mapEnter(FpToSiExprContext.class, CONVERSION); + this.mapEnter(UiToFpExprContext.class, CONVERSION); + this.mapEnter(SiToFpExprContext.class, CONVERSION); + this.mapEnter(PtrToIntExprContext.class, CONVERSION); + this.mapEnter(ExtractElementExprContext.class, EXTRACT_ELEMENT); + this.mapEnter(InsertElementExprContext.class, INSERT_ELEMENT); + this.mapEnter(ShuffleVectorExprContext.class, SHUFFLE_VECTOR); + this.mapRange(ShlExprContext.class, SHIFT); + this.mapRange(LShrExprContext.class, SHIFT); + this.mapRange(AShrExprContext.class, SHIFT); + this.mapRange(AndExprContext.class, AND); + this.mapRange(OrExprContext.class, OR); + this.mapRange(XorExprContext.class, XOR); + this.mapRange(AddExprContext.class, ADDITION); + this.mapRange(SubExprContext.class, SUBTRACTION); + this.mapRange(MulExprContext.class, MULTIPLICATION); + this.mapRange(StoreInstContext.class, STORE); + this.mapRange(FenceInstContext.class, FENCE); + this.mapRange(AddInstContext.class, ADDITION); + this.mapRange(FAddInstContext.class, ADDITION); + this.mapRange(SubInstContext.class, SUBTRACTION); + this.mapRange(FSubInstContext.class, SUBTRACTION); + this.mapRange(MulInstContext.class, MULTIPLICATION); + this.mapRange(FMulInstContext.class, MULTIPLICATION); + this.mapRange(UDivInstContext.class, DIVISION); + this.mapRange(SDivInstContext.class, DIVISION); + this.mapRange(FDivInstContext.class, DIVISION); + this.mapRange(URemInstContext.class, REMAINDER); + this.mapRange(SRemInstContext.class, REMAINDER); + this.mapRange(FRemInstContext.class, REMAINDER); + this.mapRange(ShlInstContext.class, SHIFT); + this.mapRange(LShrInstContext.class, SHIFT); + this.mapRange(AShrInstContext.class, SHIFT); + this.mapRange(AndInstContext.class, AND); + this.mapRange(OrInstContext.class, OR); + this.mapRange(XorInstContext.class, XOR); + this.mapEnter(ExtractElementInstContext.class, EXTRACT_ELEMENT); + this.mapEnter(InsertElementInstContext.class, INSERT_ELEMENT); + this.mapEnter(ShuffleVectorInstContext.class, SHUFFLE_VECTOR); + this.mapRange(ExtractValueInstContext.class, EXTRACT_VALUE); + this.mapRange(InsertValueInstContext.class, INSERT_VALUE); + this.mapRange(AllocaInstContext.class, ALLOCATION); + this.mapRange(LoadInstContext.class, LOAD); + this.mapRange(CmpXchgInstContext.class, COMPARE_EXCHANGE); + this.mapRange(AtomicRMWInstContext.class, ATOMIC_READ_MODIFY_WRITE); + this.mapRange(GetElementPtrInstContext.class, GET_ELEMENT_POINTER); + this.mapEnter(TruncInstContext.class, CONVERSION); + this.mapEnter(ZExtInstContext.class, CONVERSION); + this.mapEnter(SExtInstContext.class, CONVERSION); + this.mapEnter(FpTruncInstContext.class, CONVERSION); + this.mapEnter(FpExtInstContext.class, CONVERSION); + this.mapEnter(FpToUiInstContext.class, CONVERSION); + this.mapEnter(FpToSiInstContext.class, CONVERSION); + this.mapEnter(UiToFpInstContext.class, CONVERSION); + this.mapEnter(SiToFpInstContext.class, CONVERSION); + this.mapEnter(PtrToIntInstContext.class, CONVERSION); + this.mapEnter(IntToPtrInstContext.class, CONVERSION); + this.mapRange(BitCastInstContext.class, BITCAST); + this.mapEnter(AddrSpaceCastInstContext.class, CONVERSION); + this.mapRange(ICmpInstContext.class, COMPARISON); + this.mapRange(FCmpInstContext.class, COMPARISON); + this.mapRange(PhiInstContext.class, PHI); + this.mapRange(SelectInstContext.class, SELECT); + this.mapRange(CallInstContext.class, CALL); + this.mapRange(VaargInstContext.class, VARIABLE_ARGUMENT); + this.mapRange(LandingPadInstContext.class, LANDING_PAD); + this.mapRange(CatchPadInstContext.class, CATCH_PAD); + this.mapRange(CleanupPadInstContext.class, CLEAN_UP_PAD); + this.mapRange(ClauseContext.class, CLAUSE); + this.mapRange(AtomicOrderingContext.class, ATOMIC_ORDERING); + } +} diff --git a/languages/llvmir/src/main/java/de/jplag/llvmir/LLVMIRParserAdapter.java b/languages/llvmir/src/main/java/de/jplag/llvmir/LLVMIRParserAdapter.java new file mode 100644 index 000000000..0d96e301e --- /dev/null +++ b/languages/llvmir/src/main/java/de/jplag/llvmir/LLVMIRParserAdapter.java @@ -0,0 +1,37 @@ +package de.jplag.llvmir; + +import java.io.File; + +import org.antlr.v4.runtime.*; + +import de.jplag.AbstractParser; +import de.jplag.antlr.AbstractAntlrListener; +import de.jplag.antlr.AbstractAntlrParserAdapter; +import de.jplag.antlr.TokenCollector; +import de.jplag.llvmir.grammar.LLVMIRLexer; +import de.jplag.llvmir.grammar.LLVMIRParser; + +/** + * The adapter between {@link AbstractParser} and the ANTLR based parser of this language module. + */ +public class LLVMIRParserAdapter extends AbstractAntlrParserAdapter { + @Override + protected Lexer createLexer(CharStream input) { + return new LLVMIRLexer(input); + } + + @Override + protected LLVMIRParser createParser(CommonTokenStream tokenStream) { + return new LLVMIRParser(tokenStream); + } + + @Override + protected ParserRuleContext getEntryContext(LLVMIRParser parser) { + return parser.compilationUnit(); + } + + @Override + protected AbstractAntlrListener createListener(TokenCollector collector, File currentFile) { + return new LLVMIRListener(collector, currentFile); + } +} diff --git a/languages/llvmir/src/main/java/de/jplag/llvmir/LLVMIRTokenType.java b/languages/llvmir/src/main/java/de/jplag/llvmir/LLVMIRTokenType.java new file mode 100644 index 000000000..0935fa4cc --- /dev/null +++ b/languages/llvmir/src/main/java/de/jplag/llvmir/LLVMIRTokenType.java @@ -0,0 +1,99 @@ +package de.jplag.llvmir; + +import de.jplag.TokenType; + +/** + * LLVM IR token types extracted by this language module. + */ +public enum LLVMIRTokenType implements TokenType { + + FILENAME("FILENAME"), + + // Functions + FUNCTION_BODY_BEGIN("FUNC{"), + FUNCTION_BODY_END("}FUNC"), + BASIC_BLOCK_BEGIN("BLOCK{"), + BASIC_BLOCK_END("}BLOCK"), + FUNCTION_DECLARATION("FUNC_DECL"), + FUNCTION_DEFINITION("FUNC_DEF"), + + GLOBAL_VARIABLE("GLOB_VAR"), + ASSEMBLY("ASM"), + TYPE_DEFINITION("TYPE_DEF"), + + // Constants + STRUCTURE("STRUCT"), + ARRAY("ARR"), + VECTOR("VEC"), + + // Terminator Instructions + RETURN("RET"), + BRANCH("BR"), + SWITCH("SWITCH"), + CASE("CASE"), + CONDITIONAL_BRANCH("COND_BR"), + INVOKE("INVOKE"), + CALL_BRANCH("CALL_BR"), + RESUME("RESUME"), + CATCH_SWITCH("CATCH_SWITCH"), + CATCH_RETURN("CATCH_RET"), + CLEAN_UP_RETURN("CLEAN_UP_RET"), + + // Binary Operations + ADDITION("ADD"), + SUBTRACTION("SUB"), + MULTIPLICATION("MUL"), + DIVISION("DIV"), + REMAINDER("REM"), + + // Bitwise instruction + SHIFT("SHIFT"), + AND("AND"), + OR("OR"), + XOR("XOR"), + + // Vector operations + EXTRACT_ELEMENT("EXTRACT_ELEM"), + INSERT_ELEMENT("INSERT_ELEM"), + SHUFFLE_VECTOR("SHUFFLE_VEC"), + + // Aggregate Operations + EXTRACT_VALUE("EXTRACT_VAL"), + INSERT_VALUE("INSERT_VAL"), + + // Memory Operations + ALLOCATION("ALLOC"), + LOAD("LOAD"), + STORE("STORE"), + FENCE("FENCE"), + COMPARE_EXCHANGE("CMP_XCHG"), + ATOMIC_READ_MODIFY_WRITE("ATOMIC_RMW"), + ATOMIC_ORDERING("ATOMIC"), + GET_ELEMENT_POINTER("GET_ELEMENT_PTR"), + + // Conversion Operations + BITCAST("BITCAST"), + CONVERSION("CONV"), + + // Other Operations + COMPARISON("COMP"), + PHI("PHI"), + SELECT("SELECT"), + CALL("CALL"), + VARIABLE_ARGUMENT("VA_ARG"), + LANDING_PAD("LANDING_PAD"), + CLAUSE("CLAUSE"), + CATCH_PAD("CATCH_PAD"), + CLEAN_UP_PAD("CLEAN_UP_PAD"); + + private final String description; + + LLVMIRTokenType(String description) { + this.description = description; + } + + @Override + public String getDescription() { + return description; + } +} diff --git a/languages/llvmir/src/test/java/de/jplag/llvmir/LLVMIRLanguageTest.java b/languages/llvmir/src/test/java/de/jplag/llvmir/LLVMIRLanguageTest.java new file mode 100644 index 000000000..b34956af9 --- /dev/null +++ b/languages/llvmir/src/test/java/de/jplag/llvmir/LLVMIRLanguageTest.java @@ -0,0 +1,42 @@ +package de.jplag.llvmir; + +import static de.jplag.llvmir.LLVMIRTokenType.*; + +import java.util.Arrays; +import java.util.List; + +import de.jplag.testutils.LanguageModuleTest; +import de.jplag.testutils.datacollector.TestDataCollector; +import de.jplag.testutils.datacollector.TestSourceIgnoredLinesCollector; + +/** + * Provides tests for the llvmir language module + */ +class LLVMIRLanguageTest extends LanguageModuleTest { + public LLVMIRLanguageTest() { + super(new LLVMIRLanguage(), LLVMIRTokenType.class); + } + + @Override + protected void collectTestData(TestDataCollector collector) { + List missingTokens = List.of(CATCH_SWITCH, CATCH_RETURN, CLEAN_UP_RETURN, CATCH_PAD, CLEAN_UP_PAD); + LLVMIRTokenType[] expectedTokens = Arrays.stream(LLVMIRTokenType.values()).filter(it -> !missingTokens.contains(it)) + .toArray(LLVMIRTokenType[]::new); + + collector.testFile("Complete.ll").testSourceCoverage().testContainedTokens(expectedTokens); + + // Finding an example for the new exception handling instructions was difficult. + // Therefore, the NewExceptionHandling.ll file can only be parsed and not executed. + collector.testFile("NewExceptionHandling.ll").testSourceCoverage().testContainedTokens(CATCH_SWITCH, CATCH_RETURN, CLEAN_UP_RETURN, CATCH_PAD, + CLEAN_UP_PAD); + + } + + @Override + protected void configureIgnoredLines(TestSourceIgnoredLinesCollector collector) { + collector.ignoreLinesByPrefix(";"); + collector.ignoreLinesByPrefix("target datalayout"); + collector.ignoreLinesByPrefix("target triple"); + collector.ignoreLinesByPrefix("unreachable"); + } +} \ No newline at end of file diff --git a/languages/llvmir/src/test/resources/de/jplag/llvmir/Complete.ll b/languages/llvmir/src/test/resources/de/jplag/llvmir/Complete.ll new file mode 100644 index 000000000..2ccaf60ee --- /dev/null +++ b/languages/llvmir/src/test/resources/de/jplag/llvmir/Complete.ll @@ -0,0 +1,161 @@ +; ModuleID = 'Complete.c' +source_filename = "Complete.c" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-macosx13.0.0" + +@Global_Var = private unnamed_addr constant [14 x i8] c"Hello World!\0A\00", align 1 +@struct.const = private constant {i32, double} {i32 4, double 8.12} + +%struct.Struct = type { i32 } +@_ZTIi = external constant ptr + +; Function Attrs: noinline nounwind optnone ssp uwtable +define i32 @main() #0 { + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca float, align 4 + %5 = alloca %struct.Struct, align 4 + store i32 0, ptr %1, align 4 + store i32 4, ptr %2, align 4 + %6 = load i32, ptr %2, align 4 + %7 = add nsw i32 5, %6 + store i32 %7, ptr %3, align 4 + %8 = load i32, ptr %2, align 4 + %9 = sub nsw i32 5, %8 + store i32 %9, ptr %3, align 4 + %10 = load i32, ptr %2, align 4 + %11 = mul nsw i32 5, %10 + store i32 %11, ptr %3, align 4 + %12 = load i32, ptr %2, align 4 + %13 = sdiv i32 5, %12 + %14 = sitofp i32 %13 to float + store float %14, ptr %4, align 4 + %15 = load i32, ptr %2, align 4 + %16 = srem i32 5, %15 + store i32 %16, ptr %3, align 4 + %17 = load i32, ptr %2, align 4 + %18 = shl i32 %17, 5 + store i32 %18, ptr %3, align 4 + %19 = load i32, ptr %2, align 4 + %20 = and i32 %19, 5 + store i32 %20, ptr %3, align 4 + %21 = load i32, ptr %2, align 4 + %22 = or i32 %21, 5 + store i32 %22, ptr %3, align 4 + %23 = load i32, ptr %2, align 4 + %24 = xor i32 %23, 5 + store i32 %24, ptr %3, align 4 + %25 = call i32 (ptr, ...) @printf(ptr noundef @Global_Var) + %cast = bitcast i8 255 to i8 + br label %vectors + +vectors: + %26 = call i32 @vector(<4 x i32> , <4 x i32> ) + switch i32 %26, label %vectors [ i32 10, label %aggregates + i32 1, label %vectors + i32 2, label %vectors ] + +aggregates: + %struc = insertvalue {i32} undef, i32 1, 0 + %27 = extractvalue {i32} %struc, 0 + callbr void asm "", "r,!i"(i32 0) to label %memory [label %aggregates] + +memory: + fence acquire + br label %entry + +entry: + %28 = getelementptr inbounds %struct.Struct, ptr %5, i32 0, i32 0 + %tmp = va_arg ptr %28, i32 + store i32 1, ptr %28, align 4 + %old = atomicrmw add ptr %28, i32 1 acquire + %orig = load atomic i32, ptr %28 unordered, align 4 + br label %loop + +loop: + %cmp = phi i32 [ %orig, %entry ], [%value_loaded, %loop] + %squared = mul i32 %cmp, %cmp + %val_success = cmpxchg ptr %28, i32 %cmp, i32 %squared acq_rel monotonic + %value_loaded = extractvalue { i32, i1 } %val_success, 0 + %success = extractvalue { i32, i1 } %val_success, 1 + br i1 %success, label %done, label %loop + +done: + %false = icmp eq i32 4, 5 + %first = select i1 true, i8 17, i8 42 + call void @Exception() + ret i32 0 +} + +define i32 @vector(<4 x i32> %v1, <4 x i32> %v2) { + %vec = shufflevector <4 x i32> %v1, <4 x i32> %v2, <4 x i32> + %vec_ins = insertelement <4 x i32> %vec, i32 10, i32 0 + %elem = extractelement <4 x i32> %vec_ins, i32 0 + ret i32 %elem +} + +; Function Attrs: mustprogress noinline norecurse optnone ssp uwtable +define noundef i32 @Exception() personality ptr @__gxx_personality_v0 { + %1 = alloca i32, align 4 + %2 = alloca ptr, align 8 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + store i32 0, ptr %1, align 4 + %5 = call ptr @__cxa_allocate_exception(i64 4) + store i32 5, ptr %5, align 16 + invoke void @__cxa_throw(ptr %5, ptr @_ZTIi, ptr null) to label %25 unwind label %6 + +6: + %7 = landingpad { ptr, i32 } + catch ptr @_ZTIi + %8 = extractvalue { ptr, i32 } %7, 0 + store ptr %8, ptr %2, align 8 + %9 = extractvalue { ptr, i32 } %7, 1 + store i32 %9, ptr %3, align 4 + br label %10 + +10: + %11 = load i32, ptr %3, align 4 + %12 = call i32 @llvm.eh.typeid.for(ptr @_ZTIi) + %13 = icmp eq i32 %11, %12 + br i1 %13, label %14, label %20 + +14: + %15 = load ptr, ptr %2, align 8 + %16 = call ptr @__cxa_begin_catch(ptr %15) + %17 = load i32, ptr %16, align 4 + store i32 %17, ptr %4, align 4 + store i32 0, ptr %1, align 4 + call void @__cxa_end_catch() + br label %18 + +18: + %19 = load i32, ptr %1, align 4 + ret i32 %19 + +20: + %21 = load ptr, ptr %2, align 8 + %22 = load i32, ptr %3, align 4 + %23 = insertvalue { ptr, i32 } undef, ptr %21, 0 + %24 = insertvalue { ptr, i32 } %23, i32 %22, 1 + resume { ptr, i32 } %24 + +25: + unreachable +} + +declare i32 @printf(ptr noundef, ...) + +declare ptr @__cxa_allocate_exception(i64) + +declare void @__cxa_throw(ptr, ptr, ptr) + +declare i32 @__gxx_personality_v0(...) + +; Function Attrs: nounwind readnone +declare i32 @llvm.eh.typeid.for(ptr) + +declare ptr @__cxa_begin_catch(ptr) + +declare void @__cxa_end_catch() \ No newline at end of file diff --git a/languages/llvmir/src/test/resources/de/jplag/llvmir/NewExceptionHandling.ll b/languages/llvmir/src/test/resources/de/jplag/llvmir/NewExceptionHandling.ll new file mode 100644 index 000000000..85f0f02e8 --- /dev/null +++ b/languages/llvmir/src/test/resources/de/jplag/llvmir/NewExceptionHandling.ll @@ -0,0 +1,41 @@ +define i32 @f() nounwind personality i32 (...)* @__CxxFrameHandler3 { +entry: + %obj = alloca %struct.Cleanup, align 4 + %e = alloca i32, align 4 + %call = invoke %struct.Cleanup* @"??0Cleanup@@QEAA@XZ"(%struct.Cleanup* nonnull %obj) + to label %invoke.cont unwind label %lpad.catch + +invoke.cont: ; preds = %entry + invoke void @"?may_throw@@YAXXZ"() + to label %invoke.cont.2 unwind label %lpad.cleanup + +invoke.cont.2: ; preds = %invoke.cont + call void @"??_DCleanup@@QEAA@XZ"(%struct.Cleanup* nonnull %obj) nounwind + br label %return + +return: ; preds = %invoke.cont.3, %invoke.cont.2 + %retval.0 = phi i32 [ 0, %invoke.cont.2 ], [ %3, %invoke.cont.3 ] + ret i32 %retval.0 + +lpad.cleanup: ; preds = %invoke.cont.2 + %0 = cleanuppad within none [] + call void @"??1Cleanup@@QEAA@XZ"(%struct.Cleanup* nonnull %obj) nounwind + cleanupret from %0 unwind label %lpad.catch + +lpad.catch: ; preds = %lpad.cleanup, %entry + %1 = catchswitch within none [label %catch.body] unwind label %lpad.terminate + +catch.body: ; preds = %lpad.catch + %catch = catchpad within %1 [%rtti.TypeDescriptor2* @"??_R0H@8", i32 0, i32* %e] + invoke void @"?may_throw@@YAXXZ"() + to label %invoke.cont.3 unwind label %lpad.terminate + +invoke.cont.3: ; preds = %catch.body + %2 = load i32, i32* %e, align 4 + catchret from %catch to label %return + +lpad.terminate: ; preds = %catch.body, %lpad.catch + cleanuppad within none [] + call void @"?terminate@@YAXXZ"() + unreachable +} \ No newline at end of file diff --git a/languages/pom.xml b/languages/pom.xml index f241a3205..2de4e639e 100644 --- a/languages/pom.xml +++ b/languages/pom.xml @@ -26,6 +26,7 @@ scxml swift text + llvmir