Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for DW_LNCT_LLVM_source #728

Merged
merged 12 commits into from
Jun 28, 2024
3 changes: 3 additions & 0 deletions src/constants.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1052,7 +1052,10 @@ DwLnct(u16) {
DW_LNCT_timestamp = 0x3,
DW_LNCT_size = 0x4,
DW_LNCT_MD5 = 0x5,
// DW_LNCT_source = 0x6,
DW_LNCT_lo_user = 0x2000,
// We currently only implement the LLVM embedded source code extension for DWARF v5.
DW_LNCT_LLVM_source = 0x2001,
DW_LNCT_hi_user = 0x3fff,
});

Expand Down
50 changes: 49 additions & 1 deletion src/read/line.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1224,6 +1224,13 @@ where
.any(|x| x.content_type == constants::DW_LNCT_MD5)
}

/// Return true if the file name entry format contains a source field.
pub fn file_has_source(&self) -> bool {
self.file_name_entry_format
.iter()
.any(|x| x.content_type == constants::DW_LNCT_LLVM_source)
}

/// Get the list of source files that appear in this header's line program.
pub fn file_names(&self) -> &[FileEntry<R, Offset>] {
&self.file_names[..]
Expand Down Expand Up @@ -1380,6 +1387,7 @@ where
timestamp: 0,
size: 0,
md5: [0; 16],
source: None,
});

file_name_entry_format = Vec::new();
Expand Down Expand Up @@ -1579,6 +1587,7 @@ where
timestamp: u64,
size: u64,
md5: [u8; 16],
source: Option<AttributeValue<R, Offset>>,
}

impl<R, Offset> FileEntry<R, Offset>
Expand All @@ -1598,6 +1607,7 @@ where
timestamp,
size,
md5: [0; 16],
source: None,
};

Ok(entry)
Expand Down Expand Up @@ -1667,6 +1677,16 @@ where
pub fn md5(&self) -> &[u8; 16] {
&self.md5
}

/// The source code of this file. (UTF-8 source text string with "\n" line
/// endings).
///
/// Note: For DWARF v5 files this may return an empty attribute that
/// indicates that no source code is available, which this function
/// represents as Some(<zero-length attr>).
pub fn source(&self) -> Option<AttributeValue<R, Offset>> {
self.source.clone()
}
}

/// The format of a component of an include directory or file name entry.
Expand Down Expand Up @@ -1733,6 +1753,7 @@ fn parse_file_v5<R: Reader>(
let mut timestamp = 0;
let mut size = 0;
let mut md5 = [0; 16];
let mut source = None;

for format in formats {
let value = parse_attribute(input, encoding, format.form)?;
Expand Down Expand Up @@ -1760,6 +1781,9 @@ fn parse_file_v5<R: Reader>(
}
}
}
constants::DW_LNCT_LLVM_source => {
source = Some(value);
}
// Ignore unknown content types.
_ => {}
}
Expand All @@ -1771,6 +1795,7 @@ fn parse_file_v5<R: Reader>(
timestamp,
size,
md5,
source,
})
}

Expand Down Expand Up @@ -1986,13 +2011,15 @@ mod tests {
timestamp: 0,
size: 0,
md5: [0; 16],
source: None,
},
FileEntry {
path_name: AttributeValue::String(EndianSlice::new(b"bar.h", LittleEndian)),
directory_index: 1,
timestamp: 0,
size: 0,
md5: [0; 16],
source: None,
},
];
assert_eq!(header.file_names(), &expected_file_names);
Expand Down Expand Up @@ -2151,13 +2178,15 @@ mod tests {
timestamp: 0,
size: 0,
md5: [0; 16],
source: None,
},
FileEntry {
path_name: AttributeValue::String(EndianSlice::new(b"bar.rs", LittleEndian)),
directory_index: 0,
timestamp: 0,
size: 0,
md5: [0; 16],
source: None,
},
],
include_directories: vec![],
Expand Down Expand Up @@ -2404,6 +2433,7 @@ mod tests {
timestamp: 1,
size: 2,
md5: [0; 16],
source: None,
}),
);

Expand All @@ -2427,6 +2457,7 @@ mod tests {
timestamp: 0,
size: 0,
md5: [0; 16],
source: None,
};

let mut header = make_test_header(EndianSlice::new(&[], LittleEndian));
Expand Down Expand Up @@ -2855,6 +2886,7 @@ mod tests {
timestamp: 0,
size: 0,
md5: [0; 16],
source: None,
};

let opcode = LineInstruction::DefineFile(file);
Expand Down Expand Up @@ -2916,6 +2948,10 @@ mod tests {
timestamp: 0,
size: 0,
md5: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
source: Some(AttributeValue::String(EndianSlice::new(
b"foobar",
LittleEndian,
))),
},
FileEntry {
path_name: AttributeValue::String(EndianSlice::new(b"file2", LittleEndian)),
Expand All @@ -2925,6 +2961,10 @@ mod tests {
md5: [
11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
],
source: Some(AttributeValue::String(EndianSlice::new(
b"quux",
LittleEndian,
))),
},
];

Expand Down Expand Up @@ -2967,21 +3007,25 @@ mod tests {
.append_bytes(b"dir1\0")
.append_bytes(b"dir2\0")
// File entry format count.
.D8(3)
.D8(4)
.uleb(constants::DW_LNCT_path.0 as u64)
.uleb(constants::DW_FORM_string.0 as u64)
.uleb(constants::DW_LNCT_directory_index.0 as u64)
.uleb(constants::DW_FORM_data1.0 as u64)
.uleb(constants::DW_LNCT_MD5.0 as u64)
.uleb(constants::DW_FORM_data16.0 as u64)
.uleb(constants::DW_LNCT_LLVM_source.0 as u64)
.uleb(constants::DW_FORM_string.0 as u64)
// File count.
.D8(2)
.append_bytes(b"file1\0")
.D8(0)
.append_bytes(&expected_file_names[0].md5)
.append_bytes(b"foobar\0")
.append_bytes(b"file2\0")
.D8(1)
.append_bytes(&expected_file_names[1].md5)
.append_bytes(b"quux\0")
.mark(&header_end)
// Dummy line program data.
.append_bytes(expected_program)
Expand Down Expand Up @@ -3033,6 +3077,10 @@ mod tests {
FileEntryFormat {
content_type: constants::DW_LNCT_MD5,
form: constants::DW_FORM_data16,
},
FileEntryFormat {
content_type: constants::DW_LNCT_LLVM_source,
form: constants::DW_FORM_string,
}
]
);
Expand Down
70 changes: 66 additions & 4 deletions src/write/line.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,12 @@ pub struct LineProgram {
/// For version 5, this controls whether to emit `DW_LNCT_MD5`.
pub file_has_md5: bool,

/// True if the file entries have embedded source code.
///
/// For version <= 4, this is ignored.
/// For version 5, this controls whether to emit `DW_LNCT_LLVM_source`.
pub file_has_source: bool,

prev_row: LineRow,
row: LineRow,
// TODO: this probably should be either rows or sequences instead
Expand Down Expand Up @@ -119,6 +125,7 @@ impl LineProgram {
file_has_timestamp: false,
file_has_size: false,
file_has_md5: false,
file_has_source: false,
};
// For all DWARF versions, directory index 0 is comp_dir.
// For version <= 4, the entry is implicit. We still add
Expand Down Expand Up @@ -153,6 +160,7 @@ impl LineProgram {
file_has_timestamp: false,
file_has_size: false,
file_has_md5: false,
file_has_source: false,
}
}

Expand Down Expand Up @@ -592,7 +600,8 @@ impl LineProgram {
let count = 2
+ if self.file_has_timestamp { 1 } else { 0 }
+ if self.file_has_size { 1 } else { 0 }
+ if self.file_has_md5 { 1 } else { 0 };
+ if self.file_has_md5 { 1 } else { 0 }
+ if self.file_has_source { 1 } else { 0 };
w.write_u8(count)?;
w.write_uleb128(u64::from(constants::DW_LNCT_path.0))?;
let file_form = self.comp_file.0.form();
Expand All @@ -611,6 +620,10 @@ impl LineProgram {
w.write_uleb128(u64::from(constants::DW_LNCT_MD5.0))?;
w.write_uleb128(constants::DW_FORM_data16.0.into())?;
}
if self.file_has_source {
w.write_uleb128(u64::from(constants::DW_LNCT_LLVM_source.0))?;
w.write_uleb128(constants::DW_FORM_string.0.into())?;
}

// File name entries.
w.write_uleb128(self.files.len() as u64 + 1)?;
Expand All @@ -632,6 +645,20 @@ impl LineProgram {
if self.file_has_md5 {
w.write(&info.md5)?;
}
if self.file_has_source {
// Note: An empty DW_LNCT_LLVM_source is interpreted as missing
// source code. Included source code should always be
// terminated by a "\n" line ending.
let empty_str = LineString::String(Vec::new());
let source = info.source.as_ref().unwrap_or(&empty_str);
source.write(
w,
constants::DW_FORM_string,
Mrmaxmeier marked this conversation as resolved.
Show resolved Hide resolved
self.encoding,
debug_line_str_offsets,
debug_str_offsets,
)?;
}
Ok(())
};
write_file(&self.comp_file.0, DirectoryId(0), &self.comp_file.1)?;
Expand Down Expand Up @@ -937,7 +964,7 @@ mod id {
pub use self::id::*;

/// Extra information for file in a `LineProgram`.
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
#[derive(Debug, Default, Clone, PartialEq, Eq)]
pub struct FileInfo {
/// The implementation defined timestamp of the last modification of the file,
/// or 0 if not available.
Expand All @@ -950,6 +977,15 @@ pub struct FileInfo {
///
/// Only used if version >= 5 and `LineProgram::file_has_md5` is `true`.
pub md5: [u8; 16],

/// Optionally some embedded sourcecode.
///
/// Only used if version >= 5 and `LineProgram::file_has_source` is `true`.
///
/// NOTE: This currently only supports the `LineString::String` variant,
/// since we're encoding the string with `DW_FORM_string`.
/// Other variants will result in an `LineStringFormMismatch` error.
pub source: Option<LineString>,
}

define_section!(
Expand Down Expand Up @@ -999,6 +1035,15 @@ mod convert {
timestamp: comp_file.timestamp(),
size: comp_file.size(),
md5: *comp_file.md5(),
source: match comp_file.source() {
Some(source) => Some(LineString::from(
source,
dwarf,
line_strings,
strings,
)?),
None => None,
},
}),
)
}
Expand Down Expand Up @@ -1040,6 +1085,7 @@ mod convert {
program.file_has_timestamp = from_header.file_has_timestamp();
program.file_has_size = from_header.file_has_size();
program.file_has_md5 = from_header.file_has_md5();
program.file_has_source = from_header.file_has_source();
for from_file in from_header.file_names().iter().skip(file_skip) {
let from_name =
LineString::from(from_file.path_name(), dwarf, line_strings, strings)?;
Expand All @@ -1052,6 +1098,12 @@ mod convert {
timestamp: from_file.timestamp(),
size: from_file.size(),
md5: *from_file.md5(),
source: match from_file.source() {
Some(source) => {
Some(LineString::from(source, dwarf, line_strings, strings)?)
}
None => None,
},
Mrmaxmeier marked this conversation as resolved.
Show resolved Hide resolved
});
files.push(program.add_file(from_name, from_dir, from_info));
}
Expand Down Expand Up @@ -1190,6 +1242,13 @@ mod tests {
program.file_has_md5 = true;
}

// Note: Embedded source code is an accepted extension
// that will become part of DWARF v6. We're using the LLVM extension
// here for v5.
if encoding.version >= 5 {
program.file_has_source = true;
}

let dir_id = program.add_directory(dir2.clone());
assert_eq!(&dir2, program.get_directory(dir_id));
assert_eq!(dir_id, program.add_directory(dir2.clone()));
Expand All @@ -1202,8 +1261,11 @@ mod tests {
} else {
[0; 16]
},
source: (encoding.version >= 5)
.then(|| LineString::String(b"the source code\n".to_vec())),
};
let file_id = program.add_file(file2.clone(), dir_id, Some(file_info));
let file_id =
program.add_file(file2.clone(), dir_id, Some(file_info.clone()));
assert_eq!((&file2, dir_id), program.get_file(file_id));
assert_eq!(file_info, *program.get_file_info(file_id));

Expand All @@ -1213,7 +1275,7 @@ mod tests {
assert_ne!(file_info, *program.get_file_info(file_id));
assert_eq!(
file_id,
program.add_file(file2.clone(), dir_id, Some(file_info))
program.add_file(file2.clone(), dir_id, Some(file_info.clone()))
);
assert_eq!(file_info, *program.get_file_info(file_id));

Expand Down
Loading