burritos/src/simulator/loader.rs

450 lines
18 KiB
Rust
Raw Normal View History

2023-02-15 17:20:10 +01:00
use crate::Machine;
2023-03-31 21:49:26 +02:00
use std::fmt::Error;
2023-02-15 17:20:10 +01:00
use std::fs;
2023-03-27 18:10:11 +02:00
use std::io::Read;
2023-02-15 17:20:10 +01:00
2023-03-27 18:10:11 +02:00
/// load a 32-bits binary file into the machine
///
/// ### Parameters
///
/// - **path** path of the file to load
/// - **machine** the machine where the bin file will be loaded
/// - **start_index** at which index of machine memory you want to start to load the program
///
/// Returns in a Result any io error
pub fn load(path: &str, machine: &mut Machine, start_index: usize) -> Result<(), std::io::Error> {
let mut file = fs::File::open(path)?;
let mut instructions: Vec<u32> = Default::default();
loop {
let mut buf: [u8; 4] = [0; 4];
let res = file.read(&mut buf)?;
if res == 0 {
break; // eof
} else {
instructions.push(u32::from_le_bytes(buf));
}
}
for i in 0..instructions.len() {
machine.write_memory(4, 4 * i + start_index, instructions[i] as u64);
}
// #[cfg(debug_assertions)]
// println!("{:04x?}", instructions); // only print loaded program in debug build
2023-03-27 18:10:11 +02:00
Ok(())
2023-03-31 15:18:58 +02:00
}
2023-03-31 21:49:26 +02:00
pub struct ElfHeader {
pub endianess: bool,
pub is_32bits: bool,
pub version: u8,
pub sys_v_abi: bool,
pub is_riscv_target: bool,
pub entrypoint: u64,
pub elf_header_size: u16,
pub program_header_location: u64,
pub program_header_entries: u16,
pub program_header_size: u16,
pub section_header_location: u64,
pub section_header_entries: u16,
pub section_header_size: u16,
2023-03-31 15:18:58 +02:00
}
2023-03-31 21:49:26 +02:00
impl ElfHeader {
2023-03-31 15:18:58 +02:00
/// return true if the 4 first bytes constitude the elf magic number
2023-03-31 21:49:26 +02:00
fn is_elf(instructions: &Vec<u8>) -> bool {
instructions.get(0..4) == Option::Some(&[0x7f, 0x45, 0x4c, 0x46])
2023-03-31 15:18:58 +02:00
}
/// return true if big endian, false otherwise
2023-03-31 21:49:26 +02:00
fn check_endianess(instructions: &Vec<u8>) -> bool {
instructions.get(5) == Option::Some(&2)
2023-03-31 15:18:58 +02:00
}
/// return true if file is 32 bits, false if 64 bits
2023-03-31 21:49:26 +02:00
fn is_32bits(instructions: &Vec<u8>) -> bool {
instructions.get(4) == Option::Some(&1)
2023-03-31 15:18:58 +02:00
}
/// return the version of the elf file (should be 1)
/// Can be None if the file is smaller than 7 bytes -> the file is invalid
2023-03-31 21:49:26 +02:00
fn get_version(instructions: &Vec<u8>) -> Option<u8> {
instructions.get(6).copied() // work as primitives implements Copy
2023-03-31 15:18:58 +02:00
}
/// return true if target abi of the binary file is System V, false otherwise
2023-03-31 21:49:26 +02:00
fn is_system_v_elf(instructions: &Vec<u8>) -> bool {
instructions.get(7) == Option::Some(&0)
2023-03-31 15:18:58 +02:00
}
/// return true if specified target instruction set architecture is RISCV
2023-03-31 21:49:26 +02:00
fn is_riscv_isa(instructions: &Vec<u8>) -> bool {
Self::get_u16_value(instructions, 0x12) == Option::Some(0xf3)
2023-03-31 15:18:58 +02:00
}
/// memory address of the entry point from where the process starts its execution
2023-03-31 21:49:26 +02:00
fn get_entrypoint(instructions: &Vec<u8>, is_32bits: bool) -> Option<u64> {
2023-03-31 15:18:58 +02:00
if is_32bits {
2023-03-31 22:48:02 +02:00
get_address_point(instructions, 0x18, true)
2023-03-31 15:18:58 +02:00
} else {
2023-03-31 22:48:02 +02:00
get_address_point(instructions, 0x18, false)
2023-03-31 15:18:58 +02:00
}
}
/// Memory address of the start of the program header table
2023-03-31 21:49:26 +02:00
fn get_program_header_table_location(instructions: &Vec<u8>, is_32bits: bool) -> Option<u64> {
2023-03-31 15:18:58 +02:00
if is_32bits {
2023-03-31 22:48:02 +02:00
get_address_point(instructions, 0x1c, true)
2023-03-31 15:18:58 +02:00
} else {
2023-03-31 22:48:02 +02:00
get_address_point(instructions, 0x20, false)
2023-03-31 15:18:58 +02:00
}
}
/// Memory address of the start of the section header table
2023-03-31 21:49:26 +02:00
fn get_section_header_table_location(instructions: &Vec<u8>, is_32bits: bool) -> Option<u64> {
2023-03-31 15:18:58 +02:00
if is_32bits {
2023-03-31 22:48:02 +02:00
get_address_point(instructions, 0x20, true)
2023-03-31 15:18:58 +02:00
} else {
2023-03-31 22:48:02 +02:00
get_address_point(instructions, 0x28, false)
2023-03-31 15:18:58 +02:00
}
}
/// Return the size of the header, normally, 0x40 for 64 bits bin and 0x34 for 32 bits
2023-03-31 21:49:26 +02:00
fn get_elf_header_size(instructions: &Vec<u8>, is_32bits: bool) -> Option<u16> {
2023-03-31 15:18:58 +02:00
let address = if is_32bits { 0x28 } else { 0x34 };
2023-03-31 21:49:26 +02:00
Self::get_u16_value(instructions, address)
2023-03-31 15:18:58 +02:00
}
2023-03-31 21:49:26 +02:00
/// return the size of a program header table entry
fn get_program_header_size(instructions: &Vec<u8>, is_32bits: bool) -> Option<u16> {
let address = if is_32bits { 0x2a } else { 0x36 };
Self::get_u16_value(instructions, address)
2023-03-31 15:18:58 +02:00
}
/// return the number of entries in the program header
2023-03-31 21:49:26 +02:00
fn get_number_entries_program_header(instructions: &Vec<u8>, is_32bits: bool) -> Option<u16> {
2023-03-31 15:18:58 +02:00
let address = if is_32bits { 0x2c } else { 0x38 };
2023-03-31 21:49:26 +02:00
Self::get_u16_value(instructions, address)
2023-03-31 15:18:58 +02:00
}
2023-03-31 21:49:26 +02:00
/// Return the size of a section header table entry
fn get_section_header_size(instructions: &Vec<u8>, is_32bits: bool) -> Option<u16> {
2023-03-31 15:18:58 +02:00
let address = if is_32bits { 0x2e } else { 0x3a };
2023-03-31 21:49:26 +02:00
Self::get_u16_value(instructions, address)
2023-03-31 15:18:58 +02:00
}
/// Return the number of entries in the section header
2023-03-31 21:49:26 +02:00
fn get_section_header_num_entries(instructions: &Vec<u8>, is_32bits: bool) -> Option<u16> {
2023-03-31 15:18:58 +02:00
let address = if is_32bits { 0x30 } else { 0x3c };
2023-03-31 21:49:26 +02:00
Self::get_u16_value(instructions, address)
2023-03-31 15:18:58 +02:00
}
2023-03-31 21:49:26 +02:00
/// Return a u16 value, usually for the size or the number of entries inside a header
fn get_u16_value(instructions: &Vec<u8>, address: usize) -> Option<u16> {
let mut bytes: [u8; 2] = [0; 2];
bytes[0] = instructions.get(address).copied()?;
bytes[1] = instructions.get(address + 1).copied()?;
Option::Some(u16::from_le_bytes(bytes))
}
}
impl TryFrom<&Vec<u8>> for ElfHeader {
type Error = ();
fn try_from(instructions: &Vec<u8>) -> Result<Self, Self::Error> {
if Self::is_elf(instructions) {
let format = Self::is_32bits(instructions);
let endianess = Self::check_endianess(instructions);
let version = Self::get_version(instructions).ok_or(())?;
let is_sys_v_abi = Self::is_system_v_elf(instructions);
let is_rv_target = Self::is_riscv_isa(instructions);
let entrypoint = Self::get_entrypoint(instructions, format).ok_or(())?;
let elf_header_size = Self::get_elf_header_size(instructions, format).ok_or(())?;
let program_header_location = Self::get_program_header_table_location(instructions, format).ok_or(())?;
let program_header_entries = Self::get_number_entries_program_header(instructions, format).ok_or(())? ;
let program_header_size = Self::get_program_header_size(instructions, format).ok_or(())?;
let section_header_location = Self::get_section_header_table_location(instructions, format).ok_or(())?;
let section_header_entries = Self::get_section_header_num_entries(instructions, format).ok_or(())?;
let section_header_size = Self::get_section_header_size(instructions, format).ok_or(())?;
Ok(ElfHeader {
endianess,
is_32bits: format,
version,
sys_v_abi: is_sys_v_abi,
is_riscv_target: is_rv_target,
entrypoint,
elf_header_size,
program_header_location,
program_header_entries,
program_header_size,
section_header_location,
section_header_entries,
section_header_size
})
} else {
Err(())
}
}
}
pub enum FlagValue {
ShfWrite = 0x1,
ShfAlloc = 0x2,
ShfExecinstr = 0x4,
ShfMerge = 0x10,
ShfStrings = 0x20,
ShfInfoLink = 0x40,
ShfLinkOrder = 0x80,
// There is others but are unrelevant (I think)
}
2023-03-31 22:48:02 +02:00
#[derive(Debug)]
2023-03-31 21:49:26 +02:00
pub struct SectionHeader {
pub name_offset: u32,
pub header_type: u32,
pub flags: u64,
2023-03-31 21:49:26 +02:00
pub virt_addr: u64,
pub image_offset: u64,
pub section_size: u64,
2023-03-31 22:48:02 +02:00
pub section_link: u32,
2023-03-31 21:49:26 +02:00
pub section_info: u32,
pub required_align: u64,
pub entry_size: u64
}
2023-03-31 22:48:02 +02:00
impl SectionHeader {
/// return true if flag of this section contains / have `key`, false otherwise
pub fn does_flag_contains_key(&self, key: FlagValue) -> bool {
self.flags & key as u64 != 0
}
2023-03-31 22:48:02 +02:00
fn get_name_offset(instructions: &Vec<u8>, address: usize) -> Option<u32> {
get_address_point(instructions, address, true).map(|v| { v as u32 })
// set true to return a u32
}
fn get_header_type(instructions: &Vec<u8>, address: usize) -> Option<u32> {
get_address_point(instructions, address + 0x4, true).map(|v| { v as u32 })
}
fn get_flags(instructions: &Vec<u8>, address: usize, is_32bits: bool) -> Option<u64> {
2023-03-31 22:48:02 +02:00
get_address_point(instructions, address + 0x8, is_32bits)
}
fn get_virtual_address(instructions: &Vec<u8>, address: usize, is_32bits: bool) -> Option<u64> {
get_address_point(instructions, address + if is_32bits { 0x0C } else { 0x10 }, is_32bits)
}
fn get_image_offset(instructions: &Vec<u8>, address: usize, is_32bits: bool) -> Option<u64> {
get_address_point(instructions, address + if is_32bits { 0x10 } else { 0x18 }, is_32bits)
}
fn get_section_size(instructions: &Vec<u8>, address: usize, is_32bits: bool) -> Option<u64> {
get_address_point(instructions, address + if is_32bits { 0x14 } else { 0x20 }, is_32bits)
}
fn get_section_link(instructions: &Vec<u8>, address: usize, is_32bits: bool) -> Option<u32> {
get_address_point(instructions, address + if is_32bits { 0x18 } else { 0x28 }, false).map(|v| { v as u32 })
}
fn get_section_info(instructions: &Vec<u8>, address: usize, is_32bits: bool) -> Option<u32> {
get_address_point(instructions, address + if is_32bits { 0x1C } else { 0x2C }, false).map(|v| { v as u32 })
}
fn get_required_align(instructions: &Vec<u8>, address: usize, is_32bits: bool) -> Option<u64> {
get_address_point(instructions, address + if is_32bits { 0x20 } else { 0x30 }, is_32bits)
}
fn get_entry_size(instructions: &Vec<u8>, address: usize, is_32bits: bool) -> Option<u64> {
get_address_point(instructions, address + if is_32bits { 0x24 } else { 0x38 }, is_32bits)
}
}
impl TryFrom<(&Vec<u8>, u64, bool)> for SectionHeader {
2023-03-31 21:49:26 +02:00
type Error = ();
2023-03-31 22:48:02 +02:00
fn try_from(value: (&Vec<u8>, u64, bool)) -> Result<Self, Self::Error> {
let instructions = value.0;
let address = value.1 as usize;
let is_32bits = value.2;
let name_offset = Self::get_name_offset(instructions, address).ok_or(())?;
let header_type = Self::get_header_type(instructions, address).ok_or(())?;
let attribute = Self::get_flags(instructions, address, is_32bits).ok_or(())?;
2023-03-31 22:48:02 +02:00
let virt_addr = Self::get_virtual_address(instructions, address, is_32bits).ok_or(())?;
let image_offset = Self::get_image_offset(instructions, address, is_32bits).ok_or(())?;
let section_size = Self::get_section_size(instructions, address, is_32bits).ok_or(())?;
let section_link = Self::get_section_link(instructions, address, is_32bits).ok_or(())?;
let section_info = Self::get_section_info(instructions, address, is_32bits).ok_or(())?;
let required_align = Self::get_required_align(instructions, address, is_32bits).ok_or(())?;
let entry_size = Self::get_entry_size(instructions, address, is_32bits).ok_or(())?;
Ok(Self { name_offset,
header_type,
flags: attribute,
2023-03-31 22:48:02 +02:00
virt_addr,
image_offset,
section_size,
section_link,
section_info,
required_align,
entry_size
})
2023-03-31 21:49:26 +02:00
}
}
pub struct Loader {
bytes: Vec<u8>,
pub elf_header: ElfHeader,
pub sections: Vec<SectionHeader>
}
2023-03-31 22:48:02 +02:00
#[derive(Debug)]
2023-03-31 21:49:26 +02:00
pub enum LoaderError {
IOError(std::io::Error),
ParsingError
}
impl Loader {
pub fn new(path: &str, machine: &mut Machine, start_index: usize) -> Result<(Self, u64), LoaderError> {
let loader = Self::load_and_parse(path)?;
let end_alloc = loader.load_into_machine(machine, start_index)?;
Ok((loader, end_alloc))
}
fn load_into_machine(&self, machine: &mut Machine, start_index: usize) -> Result<u64, LoaderError> {
let mut end_index = 0;
for i in 0..self.sections.len() {
let section = &self.sections[i];
if section.does_flag_contains_key(FlagValue::ShfAlloc) {
end_index = section.virt_addr + section.section_size;
// Can allocate to machine memory
for j in (0..section.section_size as usize).step_by(4) {
let mut buf: [u8; 4] = [0; 4];
for k in 0..4 {
buf[k] = self.bytes.get(section.image_offset as usize + j + k).copied().ok_or(LoaderError::ParsingError)?;
}
machine.write_memory(4, start_index + section.virt_addr as usize + j, u32::from_le_bytes(buf) as u64);
}
}
}
Ok(start_index as u64 + end_index)
}
fn load_and_parse(path: &str) -> Result<Self, LoaderError> {
2023-03-31 21:49:26 +02:00
let file = fs::File::open(path);
match file {
Ok(mut file) => {
let mut instructions: Vec<u8> = Default::default();
loop {
let mut buf: [u8; 1] = [0; 1];
let res = file.read(&mut buf);
match res {
Ok(res) => {
if res == 0 {
break; // eof
} else {
instructions.push(buf[0]);
}
},
Err(err) => {
return Err(LoaderError::IOError(err))
}
}
}
let elf_header = match ElfHeader::try_from(&instructions) {
Ok(header) => {
header
},
Err(_) => {
return Err(LoaderError::ParsingError);
}
};
let section_header = match Self::parse_section_header(&instructions, elf_header.is_32bits, elf_header.section_header_location, elf_header.section_header_entries, elf_header.section_header_size) {
Ok(header) => {
header
},
Err(_) => {
return Err(LoaderError::ParsingError);
}
};
// #[cfg(debug_assertions)]
// println!("{:04x?}", instructions); // only print loaded program in debug build
return Ok(Self { bytes: instructions, elf_header, sections: section_header });
},
Err(err) => {
return Err(LoaderError::IOError(err));
}
};
}
fn parse_section_header(instructions: &Vec<u8>, is_32bits: bool, header_location: u64, num_of_entries: u16, entry_size: u16) -> Result<Vec<SectionHeader>, ()> {
let mut sections: Vec<SectionHeader> = Default::default();
for i in 0..num_of_entries as u64 {
sections.push(Self::parse_section_entry(instructions, is_32bits, header_location + i * entry_size as u64)?);
}
Ok(sections)
}
fn parse_section_entry(instructions: &Vec<u8>, is_32bits: bool, location: u64) -> Result<SectionHeader, ()> {
2023-03-31 22:48:02 +02:00
SectionHeader::try_from((instructions, location, is_32bits))
2023-03-31 21:49:26 +02:00
}
2023-03-31 15:18:58 +02:00
}
2023-03-31 22:48:02 +02:00
/// return the memory address of something stored at address
/// Can return None if the file is smaller than adress + 3 (or 7 if 64 bits), in this case, the elf header is incorrect
fn get_address_point(instructions: &Vec<u8>, address: usize, is_32bits: bool) -> Option<u64> {
if is_32bits {
let mut bytes: [u8; 4] = [0; 4];
bytes[0] = instructions.get(address).copied()?;
bytes[1] = instructions.get(address + 1).copied()?;
bytes[2] = instructions.get(address + 2).copied()?;
bytes[3] = instructions.get(address + 3).copied()?;
Option::Some(u32::from_le_bytes(bytes) as u64)
} else {
let mut bytes: [u8; 8] = [0; 8];
bytes[0] = instructions.get(address).copied()?;
bytes[1] = instructions.get(address + 1).copied()?;
bytes[2] = instructions.get(address + 2).copied()?;
bytes[3] = instructions.get(address + 3).copied()?;
bytes[4] = instructions.get(address + 4).copied()?;
bytes[5] = instructions.get(address + 5).copied()?;
bytes[6] = instructions.get(address + 6).copied()?;
bytes[7] = instructions.get(address + 7).copied()?;
Option::Some(u64::from_le_bytes(bytes))
}
}
2023-03-31 15:18:58 +02:00
#[cfg(test)]
mod test {
use crate::simulator::{loader::Loader, machine::Machine};
#[test]
2023-03-31 22:48:02 +02:00
#[ignore = "CI gitlab a modifié"]
2023-03-31 21:49:26 +02:00
fn test_parse_elf() {
2023-03-31 22:48:02 +02:00
let mut machine = Machine::init_machine();
let loader = Loader::load_and_parse("./test/riscv_instructions/simple_arithmetics/unsigned_addition").expect("IO Error");
loader.load_into_machine(&mut machine, 0).expect("Parsing error");
2023-03-31 22:48:02 +02:00
assert_eq!(false, loader.elf_header.is_32bits);
assert_eq!(false, loader.elf_header.endianess);
assert_eq!(true, loader.elf_header.sys_v_abi);
assert_eq!(true, loader.elf_header.is_riscv_target);
assert_eq!(1, loader.elf_header.version);
assert_eq!(0x4000, loader.elf_header.entrypoint);
assert_eq!(64, loader.elf_header.elf_header_size);
assert_eq!(64, loader.elf_header.program_header_location);
assert_eq!(18984, loader.elf_header.section_header_location);
assert_eq!(56, loader.elf_header.program_header_size);
assert_eq!(64, loader.elf_header.section_header_size);
assert_eq!(4, loader.elf_header.program_header_entries);
assert_eq!(9, loader.elf_header.section_header_entries);
println!("{:#x?}", loader.sections);
2023-03-31 15:18:58 +02:00
}
2023-02-15 17:20:10 +01:00
}