Skip to content

Commit

Permalink
Switched from using u64 for file_data idxs to usize, as in a memory m…
Browse files Browse the repository at this point in the history
…ap we need to use usize anyway. Not finished with this conversion however, just done it mainly in the parts affecting the validators to avoid the mess of "as u64"
  • Loading branch information
Will-Banksy committed Apr 10, 2024
1 parent 57cee30 commit 3ab7ed1
Show file tree
Hide file tree
Showing 8 changed files with 53 additions and 55 deletions.
3 changes: 3 additions & 0 deletions libsearchlight/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
// TODO: Either use u64 or usize, don't use them interchangably. We probably have to stick to usize as memory maps would require that. Some fs operations require u64/i64 though (seeking)
// Okay so we're using usize pretty much all the time in the validators, but we do need to go over everything and make sure we're only using u64 when necessary, and stick to usize
// for everything else. We can cast safely (panicking if the value doesn't fit) with .try_into().unwrap() (maybe add wrapper .assert_into() since we use .try_into().unwrap() so much
// lol)

pub mod search;
pub mod error;
Expand Down
12 changes: 6 additions & 6 deletions libsearchlight/src/search/pairing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ use super::{match_id_hash_slice_u16, Match};
#[derive(PartialEq)]
pub struct MatchPair<'a> {
pub file_type: &'a FileType,
pub start_idx: u64,
pub end_idx: u64
pub start_idx: usize,
pub end_idx: usize
}

impl fmt::Debug for MatchPair<'_> {
Expand All @@ -24,16 +24,16 @@ impl<'a> MatchPair<'a> {
pub fn new(file_type: &'a FileType, start: &Match, end: &Match) -> Self {
MatchPair {
file_type,
start_idx: start.start_idx,
end_idx: end.end_idx
start_idx: start.start_idx as usize,
end_idx: end.end_idx as usize
}
}

pub fn new_sized(file_type: &'a FileType, start: &Match, size: u64) -> Self {
MatchPair {
file_type,
start_idx: start.start_idx,
end_idx: start.start_idx + size
start_idx: start.start_idx as usize,
end_idx: (start.start_idx + size) as usize
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions libsearchlight/src/searchlight.rs
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ impl Searchlight {
let mut num_carved_files = 0;

for pot_file in match_pairs {
let validation = validator.validate(&mmap, &pot_file, &matches, cluster_size, &self.config);
let validation = validator.validate(&mmap, &pot_file, &matches, cluster_size as usize, &self.config);

debug!("Potential file at {}-{} (type id {}) validated as: {}, with fragments {:?}", pot_file.start_idx, pot_file.end_idx + 1, pot_file.file_type.type_id, validation.validation_type, validation.fragments);

Expand Down Expand Up @@ -203,7 +203,7 @@ impl Searchlight {
)?;

file.write_vectored(
&fragments.iter().map(|frag| IoSlice::new(&mmap[frag.start as usize..frag.end as usize])).collect::<Vec<IoSlice>>()
&fragments.iter().map(|frag| IoSlice::new(&mmap[frag.start..frag.end])).collect::<Vec<IoSlice>>()
)?;

num_carved_files += 1;
Expand Down
18 changes: 3 additions & 15 deletions libsearchlight/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,13 @@ pub fn file_len(file: &mut File) -> Result<u64, io::Error> {

/// Calculates the next multiple of `multiple` from `num`. E.g. `next_multiple_of(7, 3) == 9`,
/// `next_multiple_of(9, 3) == 12`
pub fn next_multiple_of(num: u64, multiple: u64) -> u64 {
pub fn next_multiple_of(num: usize, multiple: usize) -> usize {
((num / multiple) + 1) * multiple
}

/// Calculates the previous multiple of `multiple` from `num`. E.g. `prev_multiple_of(7, 3) == 6`,
/// `prev_multiple_of(9, 3) == 9`
pub fn prev_multiple_of(num: u64, multiple: u64) -> u64 {
pub fn prev_multiple_of(num: usize, multiple: usize) -> usize {
(num / multiple) * multiple
}

Expand Down Expand Up @@ -120,7 +120,7 @@ pub fn generate_fragmentations(cluster_size: usize, fragmentation_range: Range<u

while gap_idx <= clusters.len() - gap_len {
// Get all the clusters that are not in the gap, and simplify
let mut file_clusters: Vec<Range<u64>> = clusters.iter().enumerate().filter(|(i, _)| *i < gap_idx || *i >= (gap_idx + gap_len)).map(|(_, c)| c.start as u64..c.end as u64).collect();
let mut file_clusters: Vec<Range<usize>> = clusters.iter().enumerate().filter(|(i, _)| *i < gap_idx || *i >= (gap_idx + gap_len)).map(|(_, c)| c.clone()).collect();
simplify_ranges(&mut file_clusters);

res.push(file_clusters);
Expand All @@ -145,18 +145,6 @@ pub fn simplify_ranges<T>(ranges: &mut Vec<Range<T>>) where T: PartialEq {
}
}

/// Combines a list of ranges of indexes and a slice of data that is referred to by those indexes to produce a list of slices of that data
// NOTE: Is this useful?
pub fn idxs_to_slice<'a, T>(data: &'a [T], idxs: &[Range<usize>]) -> Vec<&'a [T]> {
let mut res = Vec::with_capacity(idxs.len());

for range in idxs {
res.push(&data[range.clone()])
}

res
}

#[cfg(test)]
mod test {
use crate::{search::Match, utils::estimate_cluster_size};
Expand Down
6 changes: 3 additions & 3 deletions libsearchlight/src/validation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ pub trait FileValidator {
///
/// `cluster_size` is given to aid reconstruction logic. It must not be assumed that cluster_size is any sensible value, as users can pass in anything. Additionally, a cluster size of
/// 1 indicates that files in the image aren't allocated on cluster boundaries
fn validate(&self, file_data: &[u8], file_match: &MatchPair, all_matches: &[Match], cluster_size: u64, config: &SearchlightConfig) -> FileValidationInfo;
fn validate(&self, file_data: &[u8], file_match: &MatchPair, all_matches: &[Match], cluster_size: usize, config: &SearchlightConfig) -> FileValidationInfo;
}

pub type Fragment = Range<u64>;
pub type Fragment = Range<usize>;

pub struct FileValidationInfo {
/// The result of validating the data - Whether it is recognised as fully present and correct, partial, corrupted, etc
Expand Down Expand Up @@ -98,7 +98,7 @@ impl DelegatingValidator {
}

impl FileValidator for DelegatingValidator {
fn validate(&self, file_data: &[u8], file_match: &MatchPair, all_matches: &[Match], cluster_size: u64, config: &SearchlightConfig) -> FileValidationInfo {
fn validate(&self, file_data: &[u8], file_match: &MatchPair, all_matches: &[Match], cluster_size: usize, config: &SearchlightConfig) -> FileValidationInfo {
if let Some(validator) = self.validators.get(&file_match.file_type.type_id) {
validator.validate(file_data, file_match, all_matches, cluster_size, config)
} else {
Expand Down
18 changes: 9 additions & 9 deletions libsearchlight/src/validation/jpeg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ impl JpegValidator {
/// Attempt to reconstruct JPEG scan data, assuming that all fragments are in-order, by looping through clusters and attempting to classify them
/// as either JPEG scan data or not
fn reconstruct_scan_data(file_data: &[u8], scan_marker_idx: usize, cluster_size: usize, config: &SearchlightConfig) -> JpegScanReconstructionInfo {
let fragmentation_start = utils::next_multiple_of(scan_marker_idx as u64 + 1, cluster_size as u64) as usize;
let fragmentation_start = utils::next_multiple_of(scan_marker_idx + 1, cluster_size) as usize;

let mut fragments = vec![
(scan_marker_idx as u64)..(fragmentation_start as u64)
scan_marker_idx..fragmentation_start
];

let mut cluster_idx = fragmentation_start;
Expand All @@ -56,10 +56,10 @@ impl JpegValidator {
()
}
(true, None) => {
fragments.push((cluster_idx as u64)..((cluster_idx + cluster_size) as u64));
fragments.push(cluster_idx..(cluster_idx + cluster_size));
}
(true, Some(next_marker)) => {
fragments.push((cluster_idx as u64)..(next_marker + cluster_idx) as u64);
fragments.push((cluster_idx)..(next_marker + cluster_idx));
utils::simplify_ranges(&mut fragments);

return JpegScanReconstructionInfo::Success {
Expand All @@ -80,7 +80,7 @@ impl JpegValidator {
impl FileValidator for JpegValidator {
// Written using https://www.w3.org/Graphics/JPEG/jfif3.pdf,
// https://www.w3.org/Graphics/JPEG/itu-t81.pdf and https://stackoverflow.com/questions/32873541/scanning-a-jpeg-file-for-markers
fn validate(&self, file_data: &[u8], file_match: &MatchPair, _all_matches: &[Match], cluster_size: u64, config: &SearchlightConfig) -> FileValidationInfo {
fn validate(&self, file_data: &[u8], file_match: &MatchPair, _all_matches: &[Match], cluster_size: usize, config: &SearchlightConfig) -> FileValidationInfo {
let start = file_match.start_idx as usize;

// Mandatory segments for a complete JPEG file
Expand All @@ -98,12 +98,12 @@ impl FileValidator for JpegValidator {
// that up. Then again I can't see anything in any documentation to say that segments necessarily have lengths
if (file_data[i + 1] ^ 0xd0 < 0x09) || file_data[i + 1] == 0x01 {
// Move on to the next segment
fragments.push(i as u64..(i as u64 + 2));
fragments.push(i..(i + 2));
utils::simplify_ranges(&mut fragments);
i += 2;
continue;
} else if file_data[i + 1] == JPEG_EOI {
fragments.push(i as u64..(i as u64 + 2 + cluster_size)); // NOTE: We're carving an extra cluster here which isn't necessary for the image but often metadata is stored past EOI so this will catch (some of) that
fragments.push(i..(i + 2 + cluster_size)); // NOTE: We're carving an extra cluster here which isn't necessary for the image but often metadata is stored past EOI so this will catch (some of) that
utils::simplify_ranges(&mut fragments);

// Return that this is a complete file with length start - i
Expand All @@ -122,7 +122,7 @@ impl FileValidator for JpegValidator {
i = next_chunk_idx;
},
JpegScanReconstructionInfo::Failure { failure_idx } => {
fragments.push(i as u64..failure_idx as u64);
fragments.push(i..failure_idx);

break FileValidationInfo {
validation_type: FileValidationType::Partial,
Expand All @@ -139,7 +139,7 @@ impl FileValidator for JpegValidator {
// Parse the length and skip the segment
let segment_len = u16::from_be_bytes(file_data[(i + 2)..=(i + 3)].try_into().unwrap());

fragments.push(i as u64..(i as u64 + segment_len as u64 + 2));
fragments.push(i..(i + segment_len as usize + 2));
utils::simplify_ranges(&mut fragments);

i += segment_len as usize + 2;
Expand Down
34 changes: 17 additions & 17 deletions libsearchlight/src/validation/png.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,22 +70,22 @@ struct ChunkValidationInfo {
validation_type: FileValidationType,
chunk_type: u32,
chunk_frags: Vec<Fragment>,
next_chunk_idx: Option<u64>,
next_chunk_idx: Option<usize>,
}

impl ChunkValidationInfo {
pub fn new_unfragmented(validation_type: FileValidationType, chunk_type: u32, chunk_idx: usize, data_len: u32, should_continue: bool) -> Self {
let next_chunk_idx = chunk_idx as u64 + 12 + data_len as u64;
let next_chunk_idx = chunk_idx + 12 + data_len as usize;

ChunkValidationInfo {
validation_type,
chunk_type,
chunk_frags: vec![chunk_idx as u64..next_chunk_idx],
next_chunk_idx: if should_continue { Some(chunk_idx as u64 + 12 + data_len as u64) } else { None }
chunk_frags: vec![chunk_idx..next_chunk_idx],
next_chunk_idx: if should_continue { Some(chunk_idx + 12 + data_len as usize) } else { None }
}
}

pub fn new_fragmented(validation_type: FileValidationType, chunk_type: u32, fragments: Vec<Fragment>, next_chunk_idx: Option<u64>) -> Self {
pub fn new_fragmented(validation_type: FileValidationType, chunk_type: u32, fragments: Vec<Fragment>, next_chunk_idx: Option<usize>) -> Self {
ChunkValidationInfo {
validation_type,
chunk_type,
Expand All @@ -98,7 +98,7 @@ impl ChunkValidationInfo {
enum ChunkReconstructionInfo {
Success {
chunk_frags: Vec<Fragment>,
next_chunk_idx: u64
next_chunk_idx: usize
},
Failure
}
Expand All @@ -110,7 +110,7 @@ impl PngValidator {

/// Validates and reconstructs PNG chunk at `chunk_idx` in `file_data`, where `file_data` has a cluster size of `cluster_size`, so files can be assumed
/// to be allocated in blocks of `cluster_size`. `chunk_idx` refers to the very start of a chunk, where a chunk is \[`len`\]\[`type`\]\[`data`\]\[`crc`\].
fn validate_chunk(requires_plte: &mut bool, plte_forbidden: &mut bool, file_data: &[u8], chunk_idx: usize, cluster_size: u64, max_search_len: u64) -> ChunkValidationInfo {
fn validate_chunk(requires_plte: &mut bool, plte_forbidden: &mut bool, file_data: &[u8], chunk_idx: usize, cluster_size: usize, max_search_len: usize) -> ChunkValidationInfo {
/// Macro to make extracting fields a bit more readable: file_data[(chunk_idx + 4)..(chunk_idx + 8)] -> chunk_data[4, 8]
macro_rules! chunk_data {
[$start: expr, $end: expr] => {
Expand Down Expand Up @@ -181,9 +181,9 @@ impl PngValidator {
} else {
(
vec![
(chunk_idx as u64)..(unfrag_crc_offset as u64 + 4)
chunk_idx..(unfrag_crc_offset + 4)
],
unfrag_crc_offset as u64 + 4
unfrag_crc_offset + 4
)
};

Expand All @@ -208,7 +208,7 @@ impl PngValidator {
/// Attempts to reconstruct a fragmented PNG chunk, assuming that the length, chunk type, and CRC are not fragmented and that all
/// fragments of the chunk are in-order (limitations) by searching forwards for a valid chunk type, decoding the CRC that should occur just before it,
/// and enumerating the possible cluster arrangements between the start of the chunk data and the decoded CRC for a matching calculated CRC
fn reconstruct_chunk(file_data: &[u8], chunk_idx: usize, chunk_data_len: usize, cluster_size: u64, max_search_len: u64) -> ChunkReconstructionInfo {
fn reconstruct_chunk(file_data: &[u8], chunk_idx: usize, chunk_data_len: usize, cluster_size: usize, max_search_len: usize) -> ChunkReconstructionInfo {
let unfrag_crc_offset = chunk_idx + chunk_data_len + 8;

let mut next_chunk_type_offset = unfrag_crc_offset + 8;
Expand All @@ -230,8 +230,8 @@ impl PngValidator {
let stored_crc = u32::from_be_bytes(file_data[(next_chunk_type_offset - 8)..(next_chunk_type_offset - 4)].try_into().unwrap());

// Calculate the fragmentation points
let fragmentation_start = utils::next_multiple_of(chunk_idx as u64 + 8, cluster_size) as usize;
let fragmentation_end = utils::prev_multiple_of(next_chunk_type_offset as u64 - 8, cluster_size) as usize;
let fragmentation_start = utils::next_multiple_of(chunk_idx + 8, cluster_size) as usize;
let fragmentation_end = utils::prev_multiple_of(next_chunk_type_offset - 8, cluster_size) as usize;

// Calculate the number of clusters that were skipped, i.e. the number of irrelevant chunks
let clusters_skipped = (next_chunk_type_offset - (unfrag_crc_offset + 8)) / cluster_size as usize;
Expand Down Expand Up @@ -268,12 +268,12 @@ impl PngValidator {
}

if let Some(mut data_frags) = correct_fragmentation {
data_frags.insert(0, chunk_idx as u64..fragmentation_start as u64);
data_frags.push(fragmentation_end as u64..(next_chunk_type_offset - 4) as u64);
data_frags.insert(0, chunk_idx..fragmentation_start);
data_frags.push(fragmentation_end..(next_chunk_type_offset - 4));

utils::simplify_ranges(&mut data_frags);

ChunkReconstructionInfo::Success { chunk_frags: data_frags, next_chunk_idx: next_chunk_type_offset as u64 - 4 }
ChunkReconstructionInfo::Success { chunk_frags: data_frags, next_chunk_idx: next_chunk_type_offset - 4 }
} else {
ChunkReconstructionInfo::Failure
}
Expand Down Expand Up @@ -336,7 +336,7 @@ impl PngValidator {

impl FileValidator for PngValidator {
// Written using https://www.w3.org/TR/png-3/
fn validate(&self, file_data: &[u8], file_match: &MatchPair, _all_matches: &[Match], cluster_size: u64, config: &SearchlightConfig) -> FileValidationInfo {
fn validate(&self, file_data: &[u8], file_match: &MatchPair, _all_matches: &[Match], cluster_size: usize, config: &SearchlightConfig) -> FileValidationInfo {
let mut chunk_idx = file_match.start_idx as usize + 8;

let mut requires_plte = false;
Expand All @@ -362,7 +362,7 @@ impl FileValidator for PngValidator {
let mut fragments: Vec<Fragment> = vec![ file_match.start_idx..(file_match.start_idx + 8) ];

loop {
let mut chunk_info = Self::validate_chunk(&mut requires_plte, &mut plte_forbidden, &file_data, chunk_idx, cluster_size, config.max_reconstruction_search_len.unwrap_or(u64::MAX));
let mut chunk_info = Self::validate_chunk(&mut requires_plte, &mut plte_forbidden, &file_data, chunk_idx, cluster_size, config.max_reconstruction_search_len.unwrap_or(u64::MAX) as usize);

fragments.append(&mut chunk_info.chunk_frags);
utils::simplify_ranges(&mut fragments);
Expand Down
13 changes: 10 additions & 3 deletions libsearchlight/src/validation/zip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ const ZIP_LOCAL_FILE_HEADER_SIG_ID: u64 = 0x04034b50; // TODO: Calculate this

const ZIP_LOCAL_FILE_HEADER_SIZE: usize = 30;
const ZIP_CENTRAL_DIR_HEADER_SIZE: usize = 46;
const ZIP_END_OF_CENTRAL_DIR_SIZE: usize = 22;

const ZIP_DATA_DESCRIPTOR_FLAG: u16 = 3;

Expand Down Expand Up @@ -248,7 +249,7 @@ impl ZipValidator {

impl FileValidator for ZipValidator {
// Written using: https://pkwaredownloads.blob.core.windows.net/pem/APPNOTE.txt and https://users.cs.jmu.edu/buchhofp/forensics/formats/pkzip.html
fn validate(&self, file_data: &[u8], file_match: &MatchPair, all_matches: &[Match], _cluster_size: u64, _config: &SearchlightConfig) -> FileValidationInfo {
fn validate(&self, file_data: &[u8], file_match: &MatchPair, all_matches: &[Match], _cluster_size: usize, _config: &SearchlightConfig) -> FileValidationInfo {
// Since ZIP files may have multiple headers before 1 footer, and so we can only assume that 1 footer = 1 zip file, this match pair
// may well span the nth file in the zip to the EOCD signature. We can check the number of entries we come across however against
// the number of entries in the central directory and if they don't match, and no other problems have been encountered, then we can
Expand All @@ -264,15 +265,18 @@ impl FileValidator for ZipValidator {
// 4. For each file, put their fragments in order of the offsets in the central directory
// 5. As one last thing, go through the fragments and check that all the offsets are correct. If they are not, validate the ZIP as either Partial or Corrupted

let eocd_idx = file_match.end_idx as usize - file_match.file_type.footers[0].len() + 1;
let eocd_idx = file_match.end_idx - file_match.file_type.footers[0].len() + 1;

if (eocd_idx + 22) > file_data.len() {
if (eocd_idx + ZIP_END_OF_CENTRAL_DIR_SIZE) > file_data.len() {
return FileValidationInfo {
validation_type: FileValidationType::Partial,
..Default::default()
}
}

let eocd_comment_len = u16::from_le_bytes(file_data[(eocd_idx + 0x14)..(eocd_idx + 0x16)].try_into().unwrap()) as usize;
let eocd_len = eocd_comment_len + ZIP_END_OF_CENTRAL_DIR_SIZE;

// Check the signature - we only want to handle the case of EOCD
let signature = &file_data[eocd_idx..(eocd_idx + 4)];
assert_eq!(signature, &[ 0x50, 0x4b, 0x05, 0x06 ]);
Expand Down Expand Up @@ -329,6 +333,9 @@ impl FileValidator for ZipValidator {
lfhs
};

let frag_eocd = eocd_idx..(eocd_idx + eocd_len);
let frag_cd = central_directory_idx..eocd_idx;

// TODO: Go through the local file headers and validate/reconstruct each file data segment

todo!()
Expand Down

0 comments on commit 3ab7ed1

Please sign in to comment.