Skip to content

Commit

Permalink
Let Barbie extraction proceed by skipping over the offending bytecode…
Browse files Browse the repository at this point in the history
… with odd datum types.
  • Loading branch information
npjg committed Jun 5, 2024
1 parent b9595ab commit eccf44b
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 2 deletions.
37 changes: 35 additions & 2 deletions src/MediaStation/Context.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,8 +377,41 @@ def read_header_section(self, chunk, reading_stage = False):
return False

elif (Context.SectionType.FUNCTION == section_type):
function = Script(chunk, in_independent_asset_chunk = True)
self.assets.update({function.id: function})
try:
function = Script(chunk, in_independent_asset_chunk = True)
self.assets.update({function.id: function})
except BinaryParsingError as e:
# TODO: This check exists due to an odd bytecode sequence in Barbie
# (117.CXT), around 0x188d and 0x18d9 in "function_5ps1_GetSavedGames".
# Seemingly nonsensical datums of type 0x0230 are provided right in
# the middle of otherwise normal bytecode.
#
# Here is an example of what happens, where the datum
# type is indicated by `^` and the value is indicated by `-`.
# The offending sequence is indicated by `!`.
# 0300 0A00 0200 0603 0001 0030 0200 0603 0001 00
# ^ - ^ - ^ - !! !!!! !!^ -
# 30 0200 0603 0001 00
# !! !!!! !!^ -
# 30 0200 0603 0001 00
# !! !!!! !!^ -
# 30 0200 0603 0001 00
# !! !!!! !!^ -
# 30 0200 0603 0001 00
# !! !!!! !!^ -
# 30 0200 0603 0001 00
# !! !!!! !!^ -
# 30 0200 0603 0001 00
# !! !!!! !!^ -
# 02 0006 0300 0100 3002 0006 0300 0100 3003 0067 0003 00DB 0003 00BA 00
# ^ - ^ - !!!! !!!! ^ - !!^ - ^ - ^ -
# It is perfectly acceptable for single-byte datums to throw
# off the alignment until the end of the chunk, so that's not
# the problem. I haven't been able to figure out what it is, so
# to allow extraction to proceed we will just skip the bytecode
# for now.
print(f'WARNING: Parsing error in bytecode. The entire bytecode chunk will be skipped. {e}')
chunk.skip()

elif (Context.SectionType.END == section_type):
# TODO: Figure out what these are.
Expand Down
6 changes: 6 additions & 0 deletions src/MediaStation/Riff/Chunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ def __init__(self, stream, fourcc_length = 4):
raise ZeroLengthChunkError('Encountered a zero-length chunk. This usually indicates corrupted data - maybe a CD-ROM read error.')
self.data_start_pointer = stream.tell()

## Skips over the entire chunk. The stream is left pointing to the
## next chunk/subfile, and any bytes in the chunk not yet read are discarded.
def skip(self):
bytes_remaining_in_chunk = self.end_pointer - self.stream.tell()
self.stream.read(bytes_remaining_in_chunk)

## Reads the given number of bytes from the chunk, or throws an error if there is an attempt
## to read past the end of the chunk. Generally this is the only byte reading method that should
## be called directly because it includes this protection.
Expand Down

0 comments on commit eccf44b

Please sign in to comment.