use crate::Machine; use std::fs; use std::io::Read; /// load a 32-bits binary file into the machine /// /// ### Parameters /// /// - **path** path of the file to load /// - **machine** the machine where the bin file will be loaded /// - **start_index** at which index of machine memory you want to start to load the program /// /// Returns in a Result any io error pub fn load(path: &str, machine: &mut Machine, start_index: usize) -> Result<(), std::io::Error> { let mut file = fs::File::open(path)?; let mut instructions: Vec = Default::default(); loop { let mut buf: [u8; 4] = [0; 4]; let res = file.read(&mut buf)?; if res == 0 { break; // eof } else { instructions.push(u32::from_le_bytes(buf)); } } for (i, inst) in instructions.iter().enumerate() { machine.write_memory(4, 4 * i + start_index, inst.to_owned() as u64); } // #[cfg(debug_assertions)] // println!("{:04x?}", instructions); // only print loaded program in debug build Ok(()) } /// The elf header defines principes aspects of the binary files, it's place at the start of the file /// see for more informations pub struct ElfHeader { /// Defines whether the file is big or little endian /// true correspond to big endian, false otherwise /// /// Offset: 0x05, size: 1 byte pub endianess: bool, /// Defines whether the file is 32 bits or 64 bits /// /// Offset: 0x04, size: 1 byte pub is_32bits: bool, /// Version of the elf file, current version is 1 /// /// Offset: 0x06, size: 1 byte pub version: u8, /// Identifies the target ABI. /// /// In this implementation: Defines if the target abi is system V compliant /// /// Offset: 0x07, size: 1 byte pub sys_v_abi: bool, /// Identifies target ISA, 0xF3 correspond to RISC-V /// /// In this implementatio, true if target isa is RISC-V, false otherwise /// /// Offset: 0x12, size: 2 bytes pub is_riscv_target: bool, /// Memory address of the entry point from w bool { instructions.get(0..4) == Option::Some(&[0x7f, 0x45, 0x4c, 0x46]) } /// return true if big endian, false otherwise fn check_endianess(instructions: &[u8]) -> bool { instructions.get(5) == Option::Some(&2) } /// return true if file is 32 bits, false if 64 bits fn is_32bits(instructions: &[u8]) -> bool { instructions.get(4) == Option::Some(&1) } /// return the version of the elf file (should be 1) /// Can be None if the file is smaller than 7 bytes -> the file is invalid fn get_version(instructions: &[u8]) -> Option { instructions.get(6).copied() // work as primitives implements Copy } /// return true if target abi of the binary file is System V, false otherwise fn is_system_v_elf(instructions: &[u8]) -> bool { instructions.get(7) == Option::Some(&0) } /// return true if specified target instruction set architecture is RISCV fn is_riscv_isa(instructions: &[u8]) -> bool { Self::get_u16_value(instructions, 0x12) == Option::Some(0xf3) } /// memory address of the entry point from where the process starts its execution /// /// ## Arguments: /// /// **instructions** List of bytes of the loaded binary file /// **is_32bits** defines whether the binary file is 32 bits or 64 bits fn get_entrypoint(instructions: &[u8], is_32bits: bool) -> Option { if is_32bits { get_address_point(instructions, 0x18, true) } else { get_address_point(instructions, 0x18, false) } } /// Memory address of the start of the program header table /// /// ## Arguments: /// /// **instructions** List of bytes of the loaded binary file /// **is_32bits** defines whether the binary file is 32 bits or 64 bits fn get_program_header_table_location(instructions: &[u8], is_32bits: bool) -> Option { if is_32bits { get_address_point(instructions, 0x1c, true) } else { get_address_point(instructions, 0x20, false) } } /// Memory address of the start of the section header table /// /// ## Arguments: /// /// **instructions** List of bytes of the loaded binary file /// **is_32bits** defines whether the binary file is 32 bits or 64 bits fn get_section_header_table_location(instructions: &[u8], is_32bits: bool) -> Option { if is_32bits { get_address_point(instructions, 0x20, true) } else { get_address_point(instructions, 0x28, false) } } /// Return the size of the header, normally, 0x40 for 64 bits bin and 0x34 for 32 bits /// /// ## Arguments: /// /// **instructions** List of bytes of the loaded binary file /// **is_32bits** defines whether the binary file is 32 bits or 64 bits fn get_elf_header_size(instructions: &[u8], is_32bits: bool) -> Option { let address = if is_32bits { 0x28 } else { 0x34 }; Self::get_u16_value(instructions, address) } /// return the size of a program header table entry /// /// ## Arguments: /// /// **instructions** List of bytes of the loaded binary file /// **is_32bits** defines whether the binary file is 32 bits or 64 bits fn get_program_header_size(instructions: &[u8], is_32bits: bool) -> Option { let address = if is_32bits { 0x2a } else { 0x36 }; Self::get_u16_value(instructions, address) } /// return the number of entries in the program header /// /// ## Arguments: /// /// **instructions** List of bytes of the loaded binary file /// **is_32bits** defines whether the binary file is 32 bits or 64 bits fn get_number_entries_program_header(instructions: &[u8], is_32bits: bool) -> Option { let address = if is_32bits { 0x2c } else { 0x38 }; Self::get_u16_value(instructions, address) } /// Return the size of a section header table entry /// /// ## Arguments: /// /// **instructions** List of bytes of the loaded binary file /// **is_32bits** defines whether the binary file is 32 bits or 64 bits fn get_section_header_size(instructions: &[u8], is_32bits: bool) -> Option { let address = if is_32bits { 0x2e } else { 0x3a }; Self::get_u16_value(instructions, address) } /// Return the number of entries in the section header /// /// ## Arguments: /// /// **instructions** List of bytes of the loaded binary file /// **is_32bits** defines whether the binary file is 32 bits or 64 bits fn get_section_header_num_entries(instructions: &[u8], is_32bits: bool) -> Option { let address = if is_32bits { 0x30 } else { 0x3c }; Self::get_u16_value(instructions, address) } /// Return a u16 value, usually for the size or the number of entries inside a header /// /// This method retrieve 2 bytes and concatenate them assuming the file is little endian /// /// ## Arguments: /// /// **instructions** List of bytes of the loaded binary file /// **address** Position of the first byte fn get_u16_value(instructions: &[u8], address: usize) -> Option { let mut bytes: [u8; 2] = [0; 2]; bytes[0] = instructions.get(address).copied()?; bytes[1] = instructions.get(address + 1).copied()?; Option::Some(u16::from_le_bytes(bytes)) } } impl TryFrom<&Vec> for ElfHeader { type Error = (); fn try_from(instructions: &Vec) -> Result { if Self::is_elf(instructions) { let format = Self::is_32bits(instructions); let endianess = Self::check_endianess(instructions); let version = Self::get_version(instructions).ok_or(())?; let is_sys_v_abi = Self::is_system_v_elf(instructions); let is_rv_target = Self::is_riscv_isa(instructions); let entrypoint = Self::get_entrypoint(instructions, format).ok_or(())?; let elf_header_size = Self::get_elf_header_size(instructions, format).ok_or(())?; let program_header_location = Self::get_program_header_table_location(instructions, format).ok_or(())?; let program_header_entries = Self::get_number_entries_program_header(instructions, format).ok_or(())? ; let program_header_size = Self::get_program_header_size(instructions, format).ok_or(())?; let section_header_location = Self::get_section_header_table_location(instructions, format).ok_or(())?; let section_header_entries = Self::get_section_header_num_entries(instructions, format).ok_or(())?; let section_header_size = Self::get_section_header_size(instructions, format).ok_or(())?; Ok(ElfHeader { endianess, is_32bits: format, version, sys_v_abi: is_sys_v_abi, is_riscv_target: is_rv_target, entrypoint, elf_header_size, program_header_location, program_header_entries, program_header_size, section_header_location, section_header_entries, section_header_size }) } else { Err(()) } } } /// Flag of a section, a section can have multiples flags by adding the values #[allow(clippy::enum_variant_names)] #[allow(dead_code)] pub enum FlagValue { /// The section is writable ShfWrite = 0x1, /// The section need to be allocate/occupe memory during execution ShfAlloc = 0x2, /// The section need to be executable ShfExecinstr = 0x4, /// Section might ber merged ShfMerge = 0x10, /// Contain null-terminated (\0) strings ShfStrings = 0x20, // There is others but are unrelevant (I think) } /// Section header entry, contains useful informations for each sections of the binary file /// /// see #[derive(Debug)] pub struct SectionHeader { /// Offset to a string in .shstrtab section that represent the name of this section /// /// Offset: 0x0, size: 4 bytes pub name_offset: u32, /// Identify the type of this header /// /// Offset: 0x4, size: 4 bytes pub header_type: u32, /// Identify the atributes of this section /// /// see `Self::does_flag_contains_key(self, FlagValue)` /// /// Offset: 0x8, size: 4 (32 bits) or 8 (64 bits) bytes pub flags: u64, /// Virtual address of the section in memory if section is loaded, 0x0 otherwise /// /// Offset: 0x0C (32 bits) or 0x10 (64 bits), size: 4 (32 bits) or 8 (64 bits) bytes pub virt_addr: u64, /// Offset of the section in the file image (binary file) /// /// Offset: 0x10 (32 bits) or 0x18 (64 bits), size: 4 (32 bits) or 8 (64 bits) bytes pub image_offset: u64, /// Size of the section in the file image, may be 0 /// /// Offset: 0x14 (32 bits) or 0x20 (64 bits), size: 4 (32 bits) or 8 (64 bits) bytes pub section_size: u64, pub section_link: u32, pub section_info: u32, /// Contain the required alignment of the section, must be a power of 2 /// /// Offset: 0x20 (32 bits) or 0x30 (64 bits), size: 4 (32 bits) or 8 (64 bits) bytes pub required_align: u64, /// Contain the size of each entry, for sections that contain fixed size entries, otherwise 0 /// /// Offset: 0x24 (32 bits) or 0x38 (64 bits), size: 4 (32 bits) or 8 (64 bits) bytes pub entry_size: u64 } impl SectionHeader { /// return true if flag of this section contains / have `key`, false otherwise pub fn does_flag_contains_key(&self, key: FlagValue) -> bool { self.flags & key as u64 != 0 } /// Return the offset to a string in .shstrtab that represents the name of this section fn get_name_offset(instructions: &[u8], address: usize) -> Option { get_address_point(instructions, address, true).map(|v| { v as u32 }) // set true to return a u32 } /// Return the type of header of the section fn get_header_type(instructions: &[u8], address: usize) -> Option { get_address_point(instructions, address + 0x4, true).map(|v| { v as u32 }) } /// Return the flags of the section, can hold multiples values, see [`FlagValue`] fn get_flags(instructions: &[u8], address: usize, is_32bits: bool) -> Option { get_address_point(instructions, address + 0x8, is_32bits) } /// Return the virtual address of the section in memory if the sectino is loaded(see section flag), otherwise 0 fn get_virtual_address(instructions: &[u8], address: usize, is_32bits: bool) -> Option { get_address_point(instructions, address + if is_32bits { 0x0C } else { 0x10 }, is_32bits) } fn get_image_offset(instructions: &Vec, address: usize, is_32bits: bool) -> Option { get_address_point(instructions, address + if is_32bits { 0x10 } else { 0x18 }, is_32bits) } fn get_section_size(instructions: &Vec, address: usize, is_32bits: bool) -> Option { get_address_point(instructions, address + if is_32bits { 0x14 } else { 0x20 }, is_32bits) } fn get_section_link(instructions: &Vec, address: usize, is_32bits: bool) -> Option { get_address_point(instructions, address + if is_32bits { 0x18 } else { 0x28 }, false).map(|v| { v as u32 }) } fn get_section_info(instructions: &Vec, address: usize, is_32bits: bool) -> Option { get_address_point(instructions, address + if is_32bits { 0x1C } else { 0x2C }, false).map(|v| { v as u32 }) } fn get_required_align(instructions: &Vec, address: usize, is_32bits: bool) -> Option { get_address_point(instructions, address + if is_32bits { 0x20 } else { 0x30 }, is_32bits) } fn get_entry_size(instructions: &Vec, address: usize, is_32bits: bool) -> Option { get_address_point(instructions, address + if is_32bits { 0x24 } else { 0x38 }, is_32bits) } } impl TryFrom<(&Vec, u64, bool)> for SectionHeader { type Error = (); fn try_from(value: (&Vec, u64, bool)) -> Result { let instructions = value.0; let address = value.1 as usize; let is_32bits = value.2; let name_offset = Self::get_name_offset(instructions, address).ok_or(())?; let header_type = Self::get_header_type(instructions, address).ok_or(())?; let attribute = Self::get_flags(instructions, address, is_32bits).ok_or(())?; let virt_addr = Self::get_virtual_address(instructions, address, is_32bits).ok_or(())?; let image_offset = Self::get_image_offset(instructions, address, is_32bits).ok_or(())?; let section_size = Self::get_section_size(instructions, address, is_32bits).ok_or(())?; let section_link = Self::get_section_link(instructions, address, is_32bits).ok_or(())?; let section_info = Self::get_section_info(instructions, address, is_32bits).ok_or(())?; let required_align = Self::get_required_align(instructions, address, is_32bits).ok_or(())?; let entry_size = Self::get_entry_size(instructions, address, is_32bits).ok_or(())?; Ok(Self { name_offset, header_type, flags: attribute, virt_addr, image_offset, section_size, section_link, section_info, required_align, entry_size }) } } pub struct Loader { bytes: Vec, pub elf_header: ElfHeader, pub sections: Vec } #[derive(Debug)] pub enum LoaderError { IOError(std::io::Error), ParsingError } impl Loader { pub fn new(path: &str, machine: &mut Machine, start_index: usize) -> Result<(Self, u64), LoaderError> { let loader = Self::load_and_parse(path)?; let end_alloc = loader.load_into_machine(machine, start_index)?; Ok((loader, end_alloc)) } fn load_into_machine(&self, machine: &mut Machine, start_index: usize) -> Result { let mut end_index = 0; for i in 0..self.sections.len() { let section = &self.sections[i]; if section.does_flag_contains_key(FlagValue::ShfAlloc) { end_index = section.virt_addr + section.section_size; // Can allocate to machine memory for j in (0..section.section_size as usize).step_by(4) { let mut buf: [u8; 4] = [0; 4]; for k in 0..4 { buf[k] = self.bytes.get(section.image_offset as usize + j + k).copied().ok_or(LoaderError::ParsingError)?; } machine.write_memory(4, start_index + section.virt_addr as usize + j, u32::from_le_bytes(buf) as u64); } } } Ok(start_index as u64 + end_index) } fn load_and_parse(path: &str) -> Result { let file = fs::File::open(path); match file { Ok(mut file) => { let mut instructions: Vec = Default::default(); loop { let mut buf: [u8; 1] = [0; 1]; let res = file.read(&mut buf); match res { Ok(res) => { if res == 0 { break; // eof } else { instructions.push(buf[0]); } }, Err(err) => { return Err(LoaderError::IOError(err)) } } } let elf_header = match ElfHeader::try_from(&instructions) { Ok(header) => { header }, Err(_) => { return Err(LoaderError::ParsingError); } }; let section_header = match Self::parse_section_header(&instructions, elf_header.is_32bits, elf_header.section_header_location, elf_header.section_header_entries, elf_header.section_header_size) { Ok(header) => { header }, Err(_) => { return Err(LoaderError::ParsingError); } }; // #[cfg(debug_assertions)] // println!("{:04x?}", instructions); // only print loaded program in debug build return Ok(Self { bytes: instructions, elf_header, sections: section_header }); }, Err(err) => { return Err(LoaderError::IOError(err)); } }; } fn parse_section_header(instructions: &Vec, is_32bits: bool, header_location: u64, num_of_entries: u16, entry_size: u16) -> Result, ()> { let mut sections: Vec = Default::default(); for i in 0..num_of_entries as u64 { sections.push(Self::parse_section_entry(instructions, is_32bits, header_location + i * entry_size as u64)?); } Ok(sections) } fn parse_section_entry(instructions: &Vec, is_32bits: bool, location: u64) -> Result { SectionHeader::try_from((instructions, location, is_32bits)) } } /// return the memory address of something stored at address /// Can return None if the file is smaller than adress + 3 (or 7 if 64 bits), in this case, the elf header is incorrect fn get_address_point(instructions: &[u8], address: usize, is_32bits: bool) -> Option { if is_32bits { let mut bytes: [u8; 4] = [0; 4]; bytes[0] = instructions.get(address).copied()?; bytes[1] = instructions.get(address + 1).copied()?; bytes[2] = instructions.get(address + 2).copied()?; bytes[3] = instructions.get(address + 3).copied()?; Option::Some(u32::from_le_bytes(bytes) as u64) } else { let mut bytes: [u8; 8] = [0; 8]; bytes[0] = instructions.get(address).copied()?; bytes[1] = instructions.get(address + 1).copied()?; bytes[2] = instructions.get(address + 2).copied()?; bytes[3] = instructions.get(address + 3).copied()?; bytes[4] = instructions.get(address + 4).copied()?; bytes[5] = instructions.get(address + 5).copied()?; bytes[6] = instructions.get(address + 6).copied()?; bytes[7] = instructions.get(address + 7).copied()?; Option::Some(u64::from_le_bytes(bytes)) } } #[cfg(test)] mod test { use crate::simulator::{loader::Loader, machine::Machine}; #[test] #[ignore = "CI gitlab a modifié"] fn test_parse_elf() { let mut machine = Machine::init_machine(); let loader = Loader::load_and_parse("./test/riscv_instructions/simple_arithmetics/unsigned_addition").expect("IO Error"); loader.load_into_machine(&mut machine, 0).expect("Parsing error"); assert!(!loader.elf_header.is_32bits); assert!(!loader.elf_header.endianess); assert!(loader.elf_header.sys_v_abi); assert!(loader.elf_header.is_riscv_target); assert_eq!(1, loader.elf_header.version); assert_eq!(0x4000, loader.elf_header.entrypoint); assert_eq!(64, loader.elf_header.elf_header_size); assert_eq!(64, loader.elf_header.program_header_location); assert_eq!(18984, loader.elf_header.section_header_location); assert_eq!(56, loader.elf_header.program_header_size); assert_eq!(64, loader.elf_header.section_header_size); assert_eq!(4, loader.elf_header.program_header_entries); assert_eq!(9, loader.elf_header.section_header_entries); println!("{:#x?}", loader.sections); } }