Switched from using u64 for file_data idxs to usize, as in a memory m…

…ap we need to use usize anyway. Not finished with this conversion however, just done it mainly in the parts affecting the validators to avoid the mess of "as u64"
Will-Banksy · Apr 10, 2024 · 3ab7ed1 · 3ab7ed1
1 parent 57cee30
commit 3ab7ed1
Show file tree

Hide file tree

Showing 8 changed files with 53 additions and 55 deletions.
diff --git a/libsearchlight/src/lib.rs b/libsearchlight/src/lib.rs
@@ -1,4 +1,7 @@
 // TODO: Either use u64 or usize, don't use them interchangably. We probably have to stick to usize as memory maps would require that. Some fs operations require u64/i64 though (seeking)
+//       Okay so we're using usize pretty much all the time in the validators, but we do need to go over everything and make sure we're only using u64 when necessary, and stick to usize
+//       for everything else. We can cast safely (panicking if the value doesn't fit) with .try_into().unwrap() (maybe add wrapper .assert_into() since we use .try_into().unwrap() so much
+//       lol)
 
 pub mod search;
 pub mod error;

diff --git a/libsearchlight/src/search/pairing.rs b/libsearchlight/src/search/pairing.rs
@@ -10,8 +10,8 @@ use super::{match_id_hash_slice_u16, Match};
 #[derive(PartialEq)]
 pub struct MatchPair<'a> {
 	pub file_type: &'a FileType,
-	pub start_idx: u64,
-	pub end_idx: u64
+	pub start_idx: usize,
+	pub end_idx: usize
 }
 
 impl fmt::Debug for MatchPair<'_> {
@@ -24,16 +24,16 @@ impl<'a> MatchPair<'a> {
 	pub fn new(file_type: &'a FileType, start: &Match, end: &Match) -> Self {
 		MatchPair {
 			file_type,
-			start_idx: start.start_idx,
-			end_idx: end.end_idx
+			start_idx: start.start_idx as usize,
+			end_idx: end.end_idx as usize
 		}
 	}
 
 	pub fn new_sized(file_type: &'a FileType, start: &Match, size: u64) -> Self {
 		MatchPair {
 			file_type,
-			start_idx: start.start_idx,
-			end_idx: start.start_idx + size
+			start_idx: start.start_idx as usize,
+			end_idx: (start.start_idx + size) as usize
 		}
 	}
 }

diff --git a/libsearchlight/src/searchlight.rs b/libsearchlight/src/searchlight.rs
@@ -173,7 +173,7 @@ impl Searchlight {
 			let mut num_carved_files = 0;
 
 			for pot_file in match_pairs {
-				let validation = validator.validate(&mmap, &pot_file, &matches, cluster_size, &self.config);
+				let validation = validator.validate(&mmap, &pot_file, &matches, cluster_size as usize, &self.config);
 
 				debug!("Potential file at {}-{} (type id {}) validated as: {}, with fragments {:?}", pot_file.start_idx, pot_file.end_idx + 1, pot_file.file_type.type_id, validation.validation_type, validation.fragments);
 
@@ -203,7 +203,7 @@ impl Searchlight {
 					)?;
 
 					file.write_vectored(
-						&fragments.iter().map(|frag| IoSlice::new(&mmap[frag.start as usize..frag.end as usize])).collect::<Vec<IoSlice>>()
+						&fragments.iter().map(|frag| IoSlice::new(&mmap[frag.start..frag.end])).collect::<Vec<IoSlice>>()
 					)?;
 
 					num_carved_files += 1;

diff --git a/libsearchlight/src/utils.rs b/libsearchlight/src/utils.rs
@@ -25,13 +25,13 @@ pub fn file_len(file: &mut File) -> Result<u64, io::Error> {
 
 /// Calculates the next multiple of `multiple` from `num`. E.g. `next_multiple_of(7, 3) == 9`,
 /// `next_multiple_of(9, 3) == 12`
-pub fn next_multiple_of(num: u64, multiple: u64) -> u64 {
+pub fn next_multiple_of(num: usize, multiple: usize) -> usize {
 	((num / multiple) + 1) * multiple
 }
 
 /// Calculates the previous multiple of `multiple` from `num`. E.g. `prev_multiple_of(7, 3) == 6`,
 /// `prev_multiple_of(9, 3) == 9`
-pub fn prev_multiple_of(num: u64, multiple: u64) -> u64 {
+pub fn prev_multiple_of(num: usize, multiple: usize) -> usize {
 	(num / multiple) * multiple
 }
 
@@ -120,7 +120,7 @@ pub fn generate_fragmentations(cluster_size: usize, fragmentation_range: Range<u
 
 	while gap_idx <= clusters.len() - gap_len {
 		// Get all the clusters that are not in the gap, and simplify
-		let mut file_clusters: Vec<Range<u64>> = clusters.iter().enumerate().filter(|(i, _)| *i < gap_idx || *i >= (gap_idx + gap_len)).map(|(_, c)| c.start as u64..c.end as u64).collect();
+		let mut file_clusters: Vec<Range<usize>> = clusters.iter().enumerate().filter(|(i, _)| *i < gap_idx || *i >= (gap_idx + gap_len)).map(|(_, c)| c.clone()).collect();
 		simplify_ranges(&mut file_clusters);
 
 		res.push(file_clusters);
@@ -145,18 +145,6 @@ pub fn simplify_ranges<T>(ranges: &mut Vec<Range<T>>) where T: PartialEq {
 	}
 }
 
-/// Combines a list of ranges of indexes and a slice of data that is referred to by those indexes to produce a list of slices of that data
-// NOTE: Is this useful?
-pub fn idxs_to_slice<'a, T>(data: &'a [T], idxs: &[Range<usize>]) -> Vec<&'a [T]> {
-	let mut res = Vec::with_capacity(idxs.len());
-
-	for range in idxs {
-		res.push(&data[range.clone()])
-	}
-
-	res
-}
-
 #[cfg(test)]
 mod test {
     use crate::{search::Match, utils::estimate_cluster_size};

diff --git a/libsearchlight/src/validation.rs b/libsearchlight/src/validation.rs
@@ -15,10 +15,10 @@ pub trait FileValidator {
 	///
 	/// `cluster_size` is given to aid reconstruction logic. It must not be assumed that cluster_size is any sensible value, as users can pass in anything. Additionally, a cluster size of
 	/// 1 indicates that files in the image aren't allocated on cluster boundaries
-	fn validate(&self, file_data: &[u8], file_match: &MatchPair, all_matches: &[Match], cluster_size: u64, config: &SearchlightConfig) -> FileValidationInfo;
+	fn validate(&self, file_data: &[u8], file_match: &MatchPair, all_matches: &[Match], cluster_size: usize, config: &SearchlightConfig) -> FileValidationInfo;
 }
 
-pub type Fragment = Range<u64>;
+pub type Fragment = Range<usize>;
 
 pub struct FileValidationInfo {
 	/// The result of validating the data - Whether it is recognised as fully present and correct, partial, corrupted, etc
@@ -98,7 +98,7 @@ impl DelegatingValidator {
 }
 
 impl FileValidator for DelegatingValidator {
-	fn validate(&self, file_data: &[u8], file_match: &MatchPair, all_matches: &[Match], cluster_size: u64, config: &SearchlightConfig) -> FileValidationInfo {
+	fn validate(&self, file_data: &[u8], file_match: &MatchPair, all_matches: &[Match], cluster_size: usize, config: &SearchlightConfig) -> FileValidationInfo {
 		if let Some(validator) = self.validators.get(&file_match.file_type.type_id) {
 			validator.validate(file_data, file_match, all_matches, cluster_size, config)
 		} else {

diff --git a/libsearchlight/src/validation/jpeg.rs b/libsearchlight/src/validation/jpeg.rs
@@ -30,10 +30,10 @@ impl JpegValidator {
 	/// Attempt to reconstruct JPEG scan data, assuming that all fragments are in-order, by looping through clusters and attempting to classify them
 	/// as either JPEG scan data or not
 	fn reconstruct_scan_data(file_data: &[u8], scan_marker_idx: usize, cluster_size: usize, config: &SearchlightConfig) -> JpegScanReconstructionInfo {
-		let fragmentation_start = utils::next_multiple_of(scan_marker_idx as u64 + 1, cluster_size as u64) as usize;
+		let fragmentation_start = utils::next_multiple_of(scan_marker_idx + 1, cluster_size) as usize;
 
 		let mut fragments = vec![
-			(scan_marker_idx as u64)..(fragmentation_start as u64)
+			scan_marker_idx..fragmentation_start
 		];
 
 		let mut cluster_idx = fragmentation_start;
@@ -56,10 +56,10 @@ impl JpegValidator {
 					()
 				}
 				(true, None) => {
-					fragments.push((cluster_idx as u64)..((cluster_idx + cluster_size) as u64));
+					fragments.push(cluster_idx..(cluster_idx + cluster_size));
 				}
 				(true, Some(next_marker)) => {
-					fragments.push((cluster_idx as u64)..(next_marker + cluster_idx) as u64);
+					fragments.push((cluster_idx)..(next_marker + cluster_idx));
 					utils::simplify_ranges(&mut fragments);
 
 					return JpegScanReconstructionInfo::Success {
@@ -80,7 +80,7 @@ impl JpegValidator {
 impl FileValidator for JpegValidator {
 	// Written using https://www.w3.org/Graphics/JPEG/jfif3.pdf,
 	// https://www.w3.org/Graphics/JPEG/itu-t81.pdf and https://stackoverflow.com/questions/32873541/scanning-a-jpeg-file-for-markers
-	fn validate(&self, file_data: &[u8], file_match: &MatchPair, _all_matches: &[Match], cluster_size: u64, config: &SearchlightConfig) -> FileValidationInfo {
+	fn validate(&self, file_data: &[u8], file_match: &MatchPair, _all_matches: &[Match], cluster_size: usize, config: &SearchlightConfig) -> FileValidationInfo {
 		let start = file_match.start_idx as usize;
 
 		// Mandatory segments for a complete JPEG file
@@ -98,12 +98,12 @@ impl FileValidator for JpegValidator {
 				// that up. Then again I can't see anything in any documentation to say that segments necessarily have lengths
 				if (file_data[i + 1] ^ 0xd0 < 0x09) || file_data[i + 1] == 0x01 {
 					// Move on to the next segment
-					fragments.push(i as u64..(i as u64 + 2));
+					fragments.push(i..(i + 2));
 					utils::simplify_ranges(&mut fragments);
 					i += 2;
 					continue;
 				} else if file_data[i + 1] == JPEG_EOI {
-					fragments.push(i as u64..(i as u64 + 2 + cluster_size)); // NOTE: We're carving an extra cluster here which isn't necessary for the image but often metadata is stored past EOI so this will catch (some of) that
+					fragments.push(i..(i + 2 + cluster_size)); // NOTE: We're carving an extra cluster here which isn't necessary for the image but often metadata is stored past EOI so this will catch (some of) that
 					utils::simplify_ranges(&mut fragments);
 
 					// Return that this is a complete file with length start - i
@@ -122,7 +122,7 @@ impl FileValidator for JpegValidator {
 							i = next_chunk_idx;
 						},
 						JpegScanReconstructionInfo::Failure { failure_idx } => {
-							fragments.push(i as u64..failure_idx as u64);
+							fragments.push(i..failure_idx);
 
 							break FileValidationInfo {
 								validation_type: FileValidationType::Partial,
@@ -139,7 +139,7 @@ impl FileValidator for JpegValidator {
 					// Parse the length and skip the segment
 					let segment_len = u16::from_be_bytes(file_data[(i + 2)..=(i + 3)].try_into().unwrap());
 
-					fragments.push(i as u64..(i as u64 + segment_len as u64 + 2));
+					fragments.push(i..(i + segment_len as usize + 2));
 					utils::simplify_ranges(&mut fragments);
 
 					i += segment_len as usize + 2;

diff --git a/libsearchlight/src/validation/png.rs b/libsearchlight/src/validation/png.rs
@@ -70,22 +70,22 @@ struct ChunkValidationInfo {
 	validation_type: FileValidationType,
 	chunk_type: u32,
 	chunk_frags: Vec<Fragment>,
-	next_chunk_idx: Option<u64>,
+	next_chunk_idx: Option<usize>,
 }
 
 impl ChunkValidationInfo {
 	pub fn new_unfragmented(validation_type: FileValidationType, chunk_type: u32, chunk_idx: usize, data_len: u32, should_continue: bool) -> Self {
-		let next_chunk_idx = chunk_idx as u64 + 12 + data_len as u64;
+		let next_chunk_idx = chunk_idx + 12 + data_len as usize;
 
 		ChunkValidationInfo {
 			validation_type,
 			chunk_type,
-			chunk_frags: vec![chunk_idx as u64..next_chunk_idx],
-			next_chunk_idx: if should_continue { Some(chunk_idx as u64 + 12 + data_len as u64) } else { None }
+			chunk_frags: vec![chunk_idx..next_chunk_idx],
+			next_chunk_idx: if should_continue { Some(chunk_idx + 12 + data_len as usize) } else { None }
 		}
 	}
 
-	pub fn new_fragmented(validation_type: FileValidationType, chunk_type: u32, fragments: Vec<Fragment>, next_chunk_idx: Option<u64>) -> Self {
+	pub fn new_fragmented(validation_type: FileValidationType, chunk_type: u32, fragments: Vec<Fragment>, next_chunk_idx: Option<usize>) -> Self {
 		ChunkValidationInfo {
 			validation_type,
 			chunk_type,
@@ -98,7 +98,7 @@ impl ChunkValidationInfo {
 enum ChunkReconstructionInfo {
 	Success {
 		chunk_frags: Vec<Fragment>,
-		next_chunk_idx: u64
+		next_chunk_idx: usize
 	},
 	Failure
 }
@@ -110,7 +110,7 @@ impl PngValidator {
 
 	/// Validates and reconstructs PNG chunk at `chunk_idx` in `file_data`, where `file_data` has a cluster size of `cluster_size`, so files can be assumed
 	/// to be allocated in blocks of `cluster_size`. `chunk_idx` refers to the very start of a chunk, where a chunk is \[`len`\]\[`type`\]\[`data`\]\[`crc`\].
-	fn validate_chunk(requires_plte: &mut bool, plte_forbidden: &mut bool, file_data: &[u8], chunk_idx: usize, cluster_size: u64, max_search_len: u64) -> ChunkValidationInfo {
+	fn validate_chunk(requires_plte: &mut bool, plte_forbidden: &mut bool, file_data: &[u8], chunk_idx: usize, cluster_size: usize, max_search_len: usize) -> ChunkValidationInfo {
 		/// Macro to make extracting fields a bit more readable: file_data[(chunk_idx + 4)..(chunk_idx + 8)] -> chunk_data[4, 8]
 		macro_rules! chunk_data {
 			[$start: expr, $end: expr] => {
@@ -181,9 +181,9 @@ impl PngValidator {
 		} else {
 			(
 				vec![
-					(chunk_idx as u64)..(unfrag_crc_offset as u64 + 4)
+					chunk_idx..(unfrag_crc_offset + 4)
 				],
-				unfrag_crc_offset as u64 + 4
+				unfrag_crc_offset + 4
 			)
 		};
 
@@ -208,7 +208,7 @@ impl PngValidator {
 	/// Attempts to reconstruct a fragmented PNG chunk, assuming that the length, chunk type, and CRC are not fragmented and that all
 	/// fragments of the chunk are in-order (limitations) by searching forwards for a valid chunk type, decoding the CRC that should occur just before it,
 	/// and enumerating the possible cluster arrangements between the start of the chunk data and the decoded CRC for a matching calculated CRC
-	fn reconstruct_chunk(file_data: &[u8], chunk_idx: usize, chunk_data_len: usize, cluster_size: u64, max_search_len: u64) -> ChunkReconstructionInfo {
+	fn reconstruct_chunk(file_data: &[u8], chunk_idx: usize, chunk_data_len: usize, cluster_size: usize, max_search_len: usize) -> ChunkReconstructionInfo {
 		let unfrag_crc_offset = chunk_idx + chunk_data_len + 8;
 
 		let mut next_chunk_type_offset = unfrag_crc_offset + 8;
@@ -230,8 +230,8 @@ impl PngValidator {
 		let stored_crc = u32::from_be_bytes(file_data[(next_chunk_type_offset - 8)..(next_chunk_type_offset - 4)].try_into().unwrap());
 
 		// Calculate the fragmentation points
-		let fragmentation_start = utils::next_multiple_of(chunk_idx as u64 + 8, cluster_size) as usize;
-		let fragmentation_end = utils::prev_multiple_of(next_chunk_type_offset as u64 - 8, cluster_size) as usize;
+		let fragmentation_start = utils::next_multiple_of(chunk_idx + 8, cluster_size) as usize;
+		let fragmentation_end = utils::prev_multiple_of(next_chunk_type_offset - 8, cluster_size) as usize;
 
 		// Calculate the number of clusters that were skipped, i.e. the number of irrelevant chunks
 		let clusters_skipped = (next_chunk_type_offset - (unfrag_crc_offset + 8)) / cluster_size as usize;
@@ -268,12 +268,12 @@ impl PngValidator {
 		}
 
 		if let Some(mut data_frags) = correct_fragmentation {
-			data_frags.insert(0, chunk_idx as u64..fragmentation_start as u64);
-			data_frags.push(fragmentation_end as u64..(next_chunk_type_offset - 4) as u64);
+			data_frags.insert(0, chunk_idx..fragmentation_start);
+			data_frags.push(fragmentation_end..(next_chunk_type_offset - 4));
 
 			utils::simplify_ranges(&mut data_frags);
 
-			ChunkReconstructionInfo::Success { chunk_frags: data_frags, next_chunk_idx: next_chunk_type_offset as u64 - 4 }
+			ChunkReconstructionInfo::Success { chunk_frags: data_frags, next_chunk_idx: next_chunk_type_offset - 4 }
 		} else {
 			ChunkReconstructionInfo::Failure
 		}
@@ -336,7 +336,7 @@ impl PngValidator {
 
 impl FileValidator for PngValidator {
 	// Written using https://www.w3.org/TR/png-3/
-	fn validate(&self, file_data: &[u8], file_match: &MatchPair, _all_matches: &[Match], cluster_size: u64, config: &SearchlightConfig) -> FileValidationInfo {
+	fn validate(&self, file_data: &[u8], file_match: &MatchPair, _all_matches: &[Match], cluster_size: usize, config: &SearchlightConfig) -> FileValidationInfo {
 		let mut chunk_idx = file_match.start_idx as usize + 8;
 
 		let mut requires_plte = false;
@@ -362,7 +362,7 @@ impl FileValidator for PngValidator {
 		let mut fragments: Vec<Fragment> = vec![ file_match.start_idx..(file_match.start_idx + 8) ];
 
 		loop {
-			let mut chunk_info = Self::validate_chunk(&mut requires_plte, &mut plte_forbidden, &file_data, chunk_idx, cluster_size, config.max_reconstruction_search_len.unwrap_or(u64::MAX));
+			let mut chunk_info = Self::validate_chunk(&mut requires_plte, &mut plte_forbidden, &file_data, chunk_idx, cluster_size, config.max_reconstruction_search_len.unwrap_or(u64::MAX) as usize);
 
 			fragments.append(&mut chunk_info.chunk_frags);
 			utils::simplify_ranges(&mut fragments);

diff --git a/libsearchlight/src/validation/zip.rs b/libsearchlight/src/validation/zip.rs
@@ -11,6 +11,7 @@ const ZIP_LOCAL_FILE_HEADER_SIG_ID: u64 = 0x04034b50; // TODO: Calculate this
 
 const ZIP_LOCAL_FILE_HEADER_SIZE: usize = 30;
 const ZIP_CENTRAL_DIR_HEADER_SIZE: usize = 46;
+const ZIP_END_OF_CENTRAL_DIR_SIZE: usize = 22;
 
 const ZIP_DATA_DESCRIPTOR_FLAG: u16 = 3;
 
@@ -248,7 +249,7 @@ impl ZipValidator {
 
 impl FileValidator for ZipValidator {
 	// Written using: https://pkwaredownloads.blob.core.windows.net/pem/APPNOTE.txt and https://users.cs.jmu.edu/buchhofp/forensics/formats/pkzip.html
-	fn validate(&self, file_data: &[u8], file_match: &MatchPair, all_matches: &[Match], _cluster_size: u64, _config: &SearchlightConfig) -> FileValidationInfo {
+	fn validate(&self, file_data: &[u8], file_match: &MatchPair, all_matches: &[Match], _cluster_size: usize, _config: &SearchlightConfig) -> FileValidationInfo {
 		// Since ZIP files may have multiple headers before 1 footer, and so we can only assume that 1 footer = 1 zip file, this match pair
 		// may well span the nth file in the zip to the EOCD signature. We can check the number of entries we come across however against
 		// the number of entries in the central directory and if they don't match, and no other problems have been encountered, then we can
@@ -264,15 +265,18 @@ impl FileValidator for ZipValidator {
 		//       4. For each file, put their fragments in order of the offsets in the central directory
 		//       5. As one last thing, go through the fragments and check that all the offsets are correct. If they are not, validate the ZIP as either Partial or Corrupted
 
-		let eocd_idx = file_match.end_idx as usize - file_match.file_type.footers[0].len() + 1;
+		let eocd_idx = file_match.end_idx - file_match.file_type.footers[0].len() + 1;
 
-		if (eocd_idx + 22) > file_data.len() {
+		if (eocd_idx + ZIP_END_OF_CENTRAL_DIR_SIZE) > file_data.len() {
 			return FileValidationInfo {
 				validation_type: FileValidationType::Partial,
 				..Default::default()
 			}
 		}
 
+		let eocd_comment_len = u16::from_le_bytes(file_data[(eocd_idx + 0x14)..(eocd_idx + 0x16)].try_into().unwrap()) as usize;
+		let eocd_len = eocd_comment_len + ZIP_END_OF_CENTRAL_DIR_SIZE;
+
 		// Check the signature - we only want to handle the case of EOCD
 		let signature = &file_data[eocd_idx..(eocd_idx + 4)];
 		assert_eq!(signature, &[ 0x50, 0x4b, 0x05, 0x06 ]);
@@ -329,6 +333,9 @@ impl FileValidator for ZipValidator {
 			lfhs
 		};
 
+		let frag_eocd = eocd_idx..(eocd_idx + eocd_len);
+		let frag_cd = central_directory_idx..eocd_idx;
+
 		// TODO: Go through the local file headers and validate/reconstruct each file data segment
 
 		todo!()