Skip to content

Commit

Permalink
New: Exe Extractor
Browse files Browse the repository at this point in the history
  • Loading branch information
BenediktHeinrichs committed Nov 21, 2023
1 parent 563cb29 commit 45a8e9d
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 2 deletions.
55 changes: 55 additions & 0 deletions MetadataExtractor/Extractors/Data/ExeExtract.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
from .IDataExtract import IDataExtract
import pefile
from MetadataExtractor.Util import metadataCreation, metadataFormatter
import logging

log = logging.getLogger(__name__)

class ExeExtract(IDataExtract):
def registerMimeTypes(self):
self.mimeTypes["concrete"] = "application/x-msdownload"

def extract(self, fileInfo):
log.info('Extracting metadata from exe file: ' + fileInfo["file"])
pe = pefile.PE(fileInfo["file"])

# Initialize metadata list
values = []

# Add DOS Header metadata
values.append({"predicate": "exe:e_magic", "object": hex(pe.DOS_HEADER.e_magic)})
values.append({"predicate": "exe:e_cblp", "object": pe.DOS_HEADER.e_cblp})

# Add File Header metadata
values.append({"predicate": "exe:machine", "object": hex(pe.FILE_HEADER.Machine)})
values.append({"predicate": "exe:numberOfSections", "object": pe.FILE_HEADER.NumberOfSections})

# Add Optional Header metadata
values.append({"predicate": "exe:addressOfEntryPoint", "object": hex(pe.OPTIONAL_HEADER.AddressOfEntryPoint)})
values.append({"predicate": "exe:imageBase", "object": hex(pe.OPTIONAL_HEADER.ImageBase)})

# Add Sections metadata
for section in pe.sections:
section_values = [
{"predicate": "exe:sectionName", "object": section.Name.decode().strip()},
{"predicate": "exe:virtualSize", "object": section.Misc_VirtualSize},
{"predicate": "exe:virtualAddress", "object": section.VirtualAddress}
]
values.extend(section_values)

# Additional metadata can be added here

# Create graph options
graphOptions = {
"additionalPrefixes": ["@prefix exe: <{}ontologies/exe#>".format(
metadataFormatter.getBaseUrl(self._IExtract__config)
)],
"identifier": fileInfo["identifier"],
"ontology": "exe",
"values": values
}

# Add metadata to graph
metadata = metadataCreation.addEntryToFileGraph(fileInfo, self._IExtract__config, graphOptions)

return "", metadata
2 changes: 2 additions & 0 deletions MetadataExtractor/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import logging
from logging import NullHandler

from ._version import __version__

# This is a namespace package, don't put any functional code in here besides the
# declare_namespace call, or it will disappear on install. See:
# https://setuptools.readthedocs.io/en/latest/setuptools.html#namespace-packages
Expand Down
2 changes: 1 addition & 1 deletion MetadataExtractor/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.4.2"
__version__ = "0.4.3"
6 changes: 5 additions & 1 deletion defaultConfigs.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,11 @@ def getDefaultConfig():
# "SummaryExtract",
],
"Triples": [],
"Data": ["FcsExtract", "Hdf5Extract"],
"Data": [
"ExeExtract",
"FcsExtract",
"Hdf5Extract"
],
"Image": [
"DescriptiveImageExtract",
"ObjectExtract",
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,4 @@ pandas==1.5.1
flowkit==1.0.0
openai-whisper
soundfile
pefile==2023.2.7

0 comments on commit 45a8e9d

Please sign in to comment.