parse zip64 extra field

This commit is contained in:
Jean-Marie Mineau 2024-01-15 11:08:20 +01:00
parent 1e46baa7ef
commit 3aebd51768
Signed by: histausse
GPG key ID: B66AEEDA9B645AD2

View file

@ -1,5 +1,5 @@
use std::fs::File;
use std::io::{Read, Seek, SeekFrom, Write};
use std::io::{Cursor, Read, Seek, SeekFrom, Write};
use androscalpel_serializer::{ReadSeek, Result, Serializable};
@ -14,6 +14,142 @@ enum Encoding {
//struct Signature(pub [u8; 4]);
struct Signature(pub u32);
#[derive(Debug, Clone, PartialEq, Eq)]
enum ExtraField {
Zip64(Zip64ExtraField),
Generic(GenericExtraField),
}
impl ExtraField {
fn to_generic(&self) -> Result<GenericExtraField> {
match self {
Self::Zip64(field) => field.to_generic_field(),
Self::Generic(field) => Ok(field.clone()),
}
}
}
impl Serializable for ExtraField {
fn serialize(&self, output: &mut dyn Write) -> Result<()> {
self.to_generic()?.serialize(output)
}
fn deserialize(input: &mut dyn ReadSeek) -> Result<Self> {
Ok(Self::Generic(GenericExtraField::deserialize(input)?))
/*
match field.id {
Zip64ExtraField::ID => Ok(Self::Zip64(Zip64ExtraField::from_generic(&field)?)),
_ => Ok(Self::Generic(field)),
}
*/
}
fn size(&self) -> usize {
self.to_generic().unwrap().size()
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct Zip64ExtraField {
original_size: Option<u64>,
compressed_size: Option<u64>,
offset_header: Option<u64>,
disk_number: Option<u32>,
}
impl Zip64ExtraField {
const ID: u16 = 0x0001;
fn to_generic_field(&self) -> Result<GenericExtraField> {
let mut data = Cursor::new(Vec::<u8>::new());
if let Some(original_size) = self.original_size {
original_size.serialize(&mut data)?;
}
if let Some(compressed_size) = self.compressed_size {
compressed_size.serialize(&mut data)?;
}
if let Some(offset_header) = self.offset_header {
offset_header.serialize(&mut data)?;
}
if let Some(disk_number) = self.disk_number {
disk_number.serialize(&mut data)?;
}
Ok(GenericExtraField {
id: Self::ID,
data: data.into_inner(),
})
}
fn from_generic(
field: &GenericExtraField,
original_size: bool,
compressed_size: bool,
offset_header: bool,
disk_number: bool,
) -> Result<Self> {
assert_eq!(field.id, Self::ID);
let mut data = Cursor::new(&field.data);
let original_size = if original_size {
Some(u64::deserialize(&mut data)?)
} else {
None
};
let compressed_size = if compressed_size {
Some(u64::deserialize(&mut data)?)
} else {
None
};
let offset_header = if offset_header {
Some(u64::deserialize(&mut data)?)
} else {
None
};
let disk_number = if disk_number {
Some(u32::deserialize(&mut data)?)
} else {
None
};
Ok(Self {
original_size,
compressed_size,
offset_header,
disk_number,
})
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct GenericExtraField {
id: u16,
data: Vec<u8>,
}
impl Serializable for GenericExtraField {
fn serialize(&self, output: &mut dyn Write) -> Result<()> {
self.id.serialize(output)?;
(self.data.len() as u16).serialize(output)?;
for c in &self.data {
c.serialize(output)?;
}
Ok(())
}
fn deserialize(input: &mut dyn ReadSeek) -> Result<Self> {
let id = u16::deserialize(input)?;
let data_size = u16::deserialize(input)?;
let mut data = vec![];
for _ in 0..data_size {
data.push(u8::deserialize(input)?);
}
Ok(Self { id, data })
}
fn size(&self) -> usize {
4 + self.data.len()
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct FileHeader {
// signature: Signature(0x02014b50)
@ -34,9 +170,9 @@ struct FileHeader {
external_file_attributes: u32,
offset_local_header: u32,
file_name: Vec<u8>,
extra_field: Vec<u8>, // TODO:
// zip64 https://github.com/python/cpython/blob/cc11c76da7a099bb4535ab8ca507e55263fc35b6/Lib/zipfile/__init__.py#L500
// https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT 4.5 Extensible data fields
extra_field: Vec<ExtraField>,
/// Remaining bytes in the extra_fields that could not be parsed as ExtraField
malformed_extra_field: Vec<u8>,
file_comment: Vec<u8>,
}
@ -65,6 +201,9 @@ impl Serializable for FileHeader {
for c in &self.extra_field {
c.serialize(output)?;
}
for c in &self.malformed_extra_field {
c.serialize(output)?;
}
for c in &self.file_comment {
c.serialize(output)?;
}
@ -91,18 +230,10 @@ impl Serializable for FileHeader {
let external_file_attributes = u32::deserialize(input)?;
let offset_local_header = u32::deserialize(input)?;
let mut file_name = vec![];
let mut extra_field = vec![];
let mut file_comment = vec![];
for _ in 0..file_name_length {
file_name.push(u8::deserialize(input)?);
}
for _ in 0..extra_field_length {
extra_field.push(u8::deserialize(input)?);
}
for _ in 0..file_comment_length {
file_comment.push(u8::deserialize(input)?);
}
Ok(Self {
let mut header = Self {
version_made_by,
version_needed_to_extract,
general_purpose_flag,
@ -117,13 +248,78 @@ impl Serializable for FileHeader {
external_file_attributes,
offset_local_header,
file_name,
extra_field,
file_comment,
})
extra_field: vec![],
malformed_extra_field: vec![],
file_comment: vec![],
};
//let end_of_extra_field = input.stream_position().unwrap() + extra_field_length as u64;
let extra_field_off = input.stream_position().unwrap();
let mut extra_size_read = 0;
while extra_size_read < extra_field_length as usize {
let field_off = input.stream_position().unwrap();
let field = ExtraField::deserialize(input);
if let Err(err) = field {
println!(
"Failed to parsed extra field in {}: {err:?}",
header.get_name()
);
input.seek(SeekFrom::Start(field_off)).unwrap();
break;
} else {
let field = field.unwrap();
extra_size_read += field.size();
header.extra_field.push(field);
}
}
if extra_size_read > extra_field_length as usize {
println!("Failed to parsed last extra field in {}", header.get_name());
let size = header.extra_field.pop().unwrap().size();
input.seek(SeekFrom::Current(-(size as i64))).unwrap();
}
let mut extra_size_read = input.stream_position().unwrap() - extra_field_off;
while extra_size_read < extra_field_length as u64 {
header.malformed_extra_field.push(u8::deserialize(input)?);
extra_size_read += 1;
}
//input.seek(SeekFrom::Start(end_of_extra_field)).unwrap();
for _ in 0..file_comment_length {
header.file_comment.push(u8::deserialize(input)?);
}
for field in &mut header.extra_field {
if let ExtraField::Generic(GenericExtraField {
id: Zip64ExtraField::ID,
data,
}) = field
{
let original_size = uncompressed_size == u32::MAX;
let compressed_size = compressed_size == u32::MAX;
let offset_header = offset_local_header == u32::MAX;
let disk_number = disk_number_start == u16::MAX;
let zip64_filed = Zip64ExtraField::from_generic(
&GenericExtraField {
id: Zip64ExtraField::ID,
data: data.clone(),
},
original_size,
compressed_size,
offset_header,
disk_number,
)
.unwrap();
*field = ExtraField::Zip64(zip64_filed);
}
}
Ok(header)
}
fn size(&self) -> usize {
Self::MIN_SIZE + self.file_name.len() + self.extra_field.len() + self.file_comment.len()
Self::MIN_SIZE
+ self.file_name.len()
+ self.extra_field.iter().map(|f| f.size()).sum::<usize>()
+ self.malformed_extra_field.len()
+ self.file_comment.len()
}
}
@ -147,6 +343,75 @@ impl FileHeader {
Encoding::CP437 => cp437::cp437_to_string(&self.file_name),
}
}
#[allow(dead_code)]
fn get_uncompressed_size(&self) -> u64 {
if self.uncompressed_size != u32::MAX {
self.uncompressed_size as u64
} else if let Some(ExtraField::Zip64(Zip64ExtraField {
original_size: Some(original_size),
..
})) = self
.extra_field
.iter()
.find(|f| matches!(f, ExtraField::Zip64(_)))
{
*original_size
} else {
self.uncompressed_size as u64
}
}
#[allow(dead_code)]
fn get_compressed_size(&self) -> u64 {
if self.compressed_size != u32::MAX {
self.compressed_size as u64
} else if let Some(ExtraField::Zip64(Zip64ExtraField {
compressed_size: Some(compressed_size),
..
})) = self
.extra_field
.iter()
.find(|f| matches!(f, ExtraField::Zip64(_)))
{
*compressed_size
} else {
self.compressed_size as u64
}
}
#[allow(dead_code)]
fn get_offset_local_header(&self) -> u64 {
if self.offset_local_header != u32::MAX {
self.offset_local_header as u64
} else if let Some(ExtraField::Zip64(Zip64ExtraField {
offset_header: Some(offset_header),
..
})) = self
.extra_field
.iter()
.find(|f| matches!(f, ExtraField::Zip64(_)))
{
*offset_header
} else {
self.offset_local_header as u64
}
}
#[allow(dead_code)]
fn get_disk_number_start(&self) -> u32 {
if self.disk_number_start != u16::MAX {
self.disk_number_start as u32
} else if let Some(ExtraField::Zip64(Zip64ExtraField {
disk_number: Some(disk_number),
..
})) = self
.extra_field
.iter()
.find(|f| matches!(f, ExtraField::Zip64(_)))
{
*disk_number
} else {
self.disk_number_start as u32
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
@ -361,22 +626,23 @@ impl<T: Read + Seek> ZipFile<T> {
let cd_size = zip_file.get_ed_size();
while size_read < cd_size {
let file_header = FileHeader::deserialize(&mut zip_file.data).unwrap();
println!("{file_header:#?}");
size_read += file_header.size() as u64;
zip_file.files.push(file_header);
}
assert_eq!(size_read, cd_size);
for f in &zip_file.files {
println!("{f:?}");
println!("{f:#?}");
}
zip_file
}
/*
#[allow(dead_code)]
fn is_zip64(&self) -> bool {
self.zip64_end_of_central_directory.is_some()
}
*/
#[allow(dead_code)]
fn get_disk_num(&self) -> u32 {
if let Some(zip64_end_of_central_directory) = &self.zip64_end_of_central_directory {
zip64_end_of_central_directory.number_of_this_disk
@ -385,6 +651,7 @@ impl<T: Read + Seek> ZipFile<T> {
}
}
#[allow(dead_code)]
fn get_disk_ed_start(&self) -> u32 {
if let Some(zip64_end_of_central_directory) = &self.zip64_end_of_central_directory {
zip64_end_of_central_directory.disk_number_of_central_directory_start
@ -394,6 +661,7 @@ impl<T: Read + Seek> ZipFile<T> {
}
}
#[allow(dead_code)]
fn get_number_entries_on_disk(&self) -> u64 {
if let Some(zip64_end_of_central_directory) = &self.zip64_end_of_central_directory {
zip64_end_of_central_directory.number_entry_in_central_directory_on_this_disk
@ -403,6 +671,7 @@ impl<T: Read + Seek> ZipFile<T> {
}
}
#[allow(dead_code)]
fn get_number_entries(&self) -> u64 {
if let Some(zip64_end_of_central_directory) = &self.zip64_end_of_central_directory {
zip64_end_of_central_directory.number_entry_in_central_directory