1use std::{
8 cell::RefCell,
9 collections::{BTreeMap, HashMap},
10 mem::size_of,
11 os::unix::ffi::OsStrExt,
12 rc::Rc,
13};
14
15use log::trace;
16use xxhash_rust::xxh32::xxh32;
17use zerocopy::{Immutable, IntoBytes};
18
19use crate::{
20 erofs::{composefs::OverlayMetacopy, format, reader::round_up},
21 fsverity::FsVerityHashValue,
22 tree,
23};
24
25#[derive(Clone, Copy, Debug)]
26enum Offset {
27 Header,
28 Superblock,
29 Inode,
30 XAttr,
31 Block,
32 End,
33}
34
35trait Output {
36 fn note_offset(&mut self, offset_type: Offset);
37 fn get(&self, offset_type: Offset, idx: usize) -> usize;
38 fn write(&mut self, data: &[u8]);
39 fn pad(&mut self, alignment: usize);
40 fn len(&self) -> usize;
41
42 fn get_div(&self, offset_type: Offset, idx: usize, div: usize) -> usize {
43 let offset = self.get(offset_type, idx);
44 assert_eq!(offset % div, 0);
45 offset / div
46 }
47
48 fn get_nid(&self, idx: usize) -> u64 {
49 self.get_div(Offset::Inode, idx, 32) as u64
50 }
51
52 fn get_xattr(&self, idx: usize) -> u32 {
53 self.get_div(Offset::XAttr, idx, 4).try_into().unwrap()
54 }
55
56 fn write_struct(&mut self, st: impl IntoBytes + Immutable) {
57 self.write(st.as_bytes());
58 }
59}
60
61#[derive(PartialOrd, PartialEq, Eq, Ord, Clone)]
62struct XAttr {
63 prefix: u8,
64 suffix: Box<[u8]>,
65 value: Box<[u8]>,
66}
67
68#[derive(Clone, Default)]
69struct InodeXAttrs {
70 shared: Vec<usize>,
71 local: Vec<XAttr>,
72 filter: u32,
73}
74
75#[derive(Debug)]
76struct DirEnt<'a> {
77 name: &'a [u8],
78 inode: usize,
79 file_type: format::FileType,
80}
81
82#[derive(Debug, Default)]
83struct Directory<'a> {
84 blocks: Box<[Box<[DirEnt<'a>]>]>,
85 inline: Box<[DirEnt<'a>]>,
86 size: usize,
87 nlink: usize,
88}
89
90#[derive(Debug)]
91struct Leaf<'a, ObjectID: FsVerityHashValue> {
92 content: &'a tree::LeafContent<ObjectID>,
93 nlink: usize,
94}
95
96#[derive(Debug)]
97enum InodeContent<'a, ObjectID: FsVerityHashValue> {
98 Directory(Directory<'a>),
99 Leaf(Leaf<'a, ObjectID>),
100}
101
102struct Inode<'a, ObjectID: FsVerityHashValue> {
103 stat: &'a tree::Stat,
104 xattrs: InodeXAttrs,
105 content: InodeContent<'a, ObjectID>,
106}
107
108impl XAttr {
109 pub fn write(&self, output: &mut impl Output) {
110 output.write_struct(format::XAttrHeader {
111 name_len: self.suffix.len() as u8,
112 name_index: self.prefix,
113 value_size: (self.value.len() as u16).into(),
114 });
115 output.write(&self.suffix);
116 output.write(&self.value);
117 output.pad(4);
118 }
119}
120
121impl InodeXAttrs {
122 fn add(&mut self, name: &[u8], value: &[u8]) {
123 for (idx, prefix) in format::XATTR_PREFIXES.iter().enumerate().rev() {
124 if let Some(suffix) = name.strip_prefix(*prefix) {
125 self.filter |= 1 << (xxh32(suffix, format::XATTR_FILTER_SEED + idx as u32) % 32);
126 self.local.push(XAttr {
127 prefix: idx as u8,
128 suffix: Box::from(suffix),
129 value: Box::from(value),
130 });
131 return;
132 }
133 }
134 unreachable!("{:?}", std::str::from_utf8(name)); }
136
137 fn write(&self, output: &mut impl Output) {
138 if self.filter != 0 {
139 trace!(" write xattrs block");
140 output.write_struct(format::InodeXAttrHeader {
141 name_filter: (!self.filter).into(),
142 shared_count: self.shared.len() as u8,
143 ..Default::default()
144 });
145 for idx in &self.shared {
146 trace!(" shared {} @{}", idx, output.len());
147 output.write(&output.get_xattr(*idx).to_le_bytes());
148 }
149 for attr in &self.local {
150 trace!(" local @{}", output.len());
151 attr.write(output);
152 }
153 }
154 }
156}
157
158impl<'a> Directory<'a> {
159 pub fn from_entries(entries: Vec<DirEnt<'a>>) -> Self {
160 let mut blocks = vec![];
161 let mut rest = vec![];
162
163 let mut n_bytes = 0;
164 let mut nlink = 0;
165
166 trace!("Directory with {} items", entries.len());
167
168 for entry in entries.into_iter() {
171 let entry_size = size_of::<format::DirectoryEntryHeader>() + entry.name.len();
172 assert!(entry_size <= 4096);
173
174 trace!(" {:?}", entry.file_type);
175
176 if matches!(entry.file_type, format::FileType::Directory) {
177 nlink += 1;
178 }
179
180 n_bytes += entry_size;
181 if n_bytes <= 4096 {
182 rest.push(entry);
183 } else {
184 trace!(" block {}", rest.len());
186 blocks.push(rest.into_boxed_slice());
187
188 rest = vec![entry];
190 n_bytes = entry_size;
191 }
192 }
193
194 if n_bytes > 2048 {
196 blocks.push(rest.into_boxed_slice());
197 rest = vec![];
198 n_bytes = 0;
199 }
200
201 trace!(
202 " blocks {} inline {} inline_size {n_bytes}",
203 blocks.len(),
204 rest.len()
205 );
206
207 let size = format::BLOCK_SIZE * blocks.len() + n_bytes;
208 Self {
209 blocks: blocks.into_boxed_slice(),
210 inline: rest.into_boxed_slice(),
211 size,
212 nlink,
213 }
214 }
215
216 fn write_block(&self, output: &mut impl Output, block: &[DirEnt]) {
217 trace!(" write dir block {} @{}", block.len(), output.len());
218 let mut nameofs = size_of::<format::DirectoryEntryHeader>() * block.len();
219
220 for entry in block {
221 trace!(
222 " entry {:?} name {} @{}",
223 entry.file_type,
224 nameofs,
225 output.len()
226 );
227 output.write_struct(format::DirectoryEntryHeader {
228 name_offset: (nameofs as u16).into(),
229 inode_offset: output.get_nid(entry.inode).into(),
230 file_type: entry.file_type.into(),
231 ..Default::default()
232 });
233 nameofs += entry.name.len();
234 }
235
236 for entry in block {
237 trace!(" name @{}", output.len());
238 output.write(entry.name.as_bytes());
239 }
240 }
241
242 fn write_inline(&self, output: &mut impl Output) {
243 trace!(
244 " write inline len {} expected size {} of {}",
245 self.inline.len(),
246 self.size % 4096,
247 self.size
248 );
249 self.write_block(output, &self.inline);
250 }
251
252 fn write_blocks(&self, output: &mut impl Output) {
253 for block in &self.blocks {
254 assert_eq!(output.len() % format::BLOCK_SIZE, 0);
255 self.write_block(output, block);
256 output.pad(format::BLOCK_SIZE);
257 }
258 }
259
260 fn inode_meta(&self, block_offset: usize) -> (format::DataLayout, u32, u64, usize) {
261 let (layout, u) = if self.inline.is_empty() {
262 (format::DataLayout::FlatPlain, block_offset as u32 / 4096)
263 } else if !self.blocks.is_empty() {
264 (format::DataLayout::FlatInline, block_offset as u32 / 4096)
265 } else {
266 (format::DataLayout::FlatInline, 0)
267 };
268 (layout, u, self.size as u64, self.nlink)
269 }
270}
271
272impl<ObjectID: FsVerityHashValue> Leaf<'_, ObjectID> {
273 fn inode_meta(&self) -> (format::DataLayout, u32, u64, usize) {
274 let (layout, u, size) = match &self.content {
275 tree::LeafContent::Regular(tree::RegularFile::Inline(data)) => {
276 if data.is_empty() {
277 (format::DataLayout::FlatPlain, 0, data.len() as u64)
278 } else {
279 (format::DataLayout::FlatInline, 0, data.len() as u64)
280 }
281 }
282 tree::LeafContent::Regular(tree::RegularFile::External(.., size)) => {
283 (format::DataLayout::ChunkBased, 31, *size)
284 }
285 tree::LeafContent::CharacterDevice(rdev) | tree::LeafContent::BlockDevice(rdev) => {
286 (format::DataLayout::FlatPlain, *rdev as u32, 0)
287 }
288 tree::LeafContent::Fifo | tree::LeafContent::Socket => {
289 (format::DataLayout::FlatPlain, 0, 0)
290 }
291 tree::LeafContent::Symlink(target) => {
292 (format::DataLayout::FlatInline, 0, target.len() as u64)
293 }
294 };
295 (layout, u, size, self.nlink)
296 }
297
298 fn write_inline(&self, output: &mut impl Output) {
299 output.write(match self.content {
300 tree::LeafContent::Regular(tree::RegularFile::Inline(data)) => data,
301 tree::LeafContent::Regular(tree::RegularFile::External(..)) => b"\xff\xff\xff\xff", tree::LeafContent::Symlink(target) => target.as_bytes(),
303 _ => &[],
304 });
305 }
306}
307
308impl<ObjectID: FsVerityHashValue> Inode<'_, ObjectID> {
309 fn file_type(&self) -> format::FileType {
310 match &self.content {
311 InodeContent::Directory(..) => format::FileType::Directory,
312 InodeContent::Leaf(leaf) => match &leaf.content {
313 tree::LeafContent::Regular(..) => format::FileType::RegularFile,
314 tree::LeafContent::CharacterDevice(..) => format::FileType::CharacterDevice,
315 tree::LeafContent::BlockDevice(..) => format::FileType::BlockDevice,
316 tree::LeafContent::Fifo => format::FileType::Fifo,
317 tree::LeafContent::Socket => format::FileType::Socket,
318 tree::LeafContent::Symlink(..) => format::FileType::Symlink,
319 },
320 }
321 }
322
323 fn write_inode(&self, output: &mut impl Output, idx: usize) {
324 let (layout, u, size, nlink) = match &self.content {
325 InodeContent::Directory(dir) => dir.inode_meta(output.get(Offset::Block, idx)),
326 InodeContent::Leaf(leaf) => leaf.inode_meta(),
327 };
328
329 let xattr_size = {
330 let mut xattr = FirstPass::default();
331 self.xattrs.write(&mut xattr);
332 xattr.offset
333 };
334
335 output.pad(32);
337 if matches!(layout, format::DataLayout::FlatInline) {
338 let inode_and_xattr_size = size_of::<format::ExtendedInodeHeader>() + xattr_size;
339 let inline_start = output.len() + inode_and_xattr_size;
340 let end_of_metadata = inline_start - 1;
341 let inline_end = inline_start + (size as usize % format::BLOCK_SIZE);
342 if end_of_metadata / format::BLOCK_SIZE != inline_end / format::BLOCK_SIZE {
343 let pad = vec![0; 4096 - end_of_metadata % 4096];
347 trace!("added pad {}", pad.len());
348 output.write(&pad);
349 output.pad(32);
350 }
351 }
352
353 let format = format::InodeLayout::Extended | layout;
354
355 trace!(
356 "write inode {idx} nid {} {:?} {:?} xattrsize{xattr_size} icount{} inline{} @{}",
357 output.len() / 32,
358 format,
359 self.file_type(),
360 match xattr_size {
361 0 => 0,
362 n => (1 + (n - 12) / 4) as u16,
363 },
364 size % 4096,
365 output.len()
366 );
367
368 output.note_offset(Offset::Inode);
369 output.write_struct(format::ExtendedInodeHeader {
370 format,
371 xattr_icount: match xattr_size {
372 0 => 0,
373 n => (1 + (n - 12) / 4) as u16,
374 }
375 .into(),
376 mode: self.file_type() | self.stat.st_mode,
377 size: size.into(),
378 u: u.into(),
379 ino: ((output.len() / 32) as u32).into(),
380 uid: self.stat.st_uid.into(),
381 gid: self.stat.st_gid.into(),
382 mtime: (self.stat.st_mtim_sec as u64).into(),
383 nlink: (nlink as u32).into(),
384 ..Default::default()
385 });
386
387 self.xattrs.write(output);
388
389 match &self.content {
390 InodeContent::Directory(dir) => dir.write_inline(output),
391 InodeContent::Leaf(leaf) => leaf.write_inline(output),
392 };
393
394 output.pad(32);
395 }
396
397 fn write_blocks(&self, output: &mut impl Output) {
398 if let InodeContent::Directory(dir) = &self.content {
399 dir.write_blocks(output);
400 }
401 }
402}
403
404struct InodeCollector<'a, ObjectID: FsVerityHashValue> {
405 inodes: Vec<Inode<'a, ObjectID>>,
406 hardlinks: HashMap<*const tree::Leaf<ObjectID>, usize>,
407}
408
409impl<'a, ObjectID: FsVerityHashValue> InodeCollector<'a, ObjectID> {
410 fn push_inode(&mut self, stat: &'a tree::Stat, content: InodeContent<'a, ObjectID>) -> usize {
411 let mut xattrs = InodeXAttrs::default();
412
413 if let InodeContent::Leaf(Leaf {
415 content: tree::LeafContent::Regular(tree::RegularFile::External(id, ..)),
416 ..
417 }) = content
418 {
419 xattrs.add(
420 b"trusted.overlay.metacopy",
421 OverlayMetacopy::new(id).as_bytes(),
422 );
423
424 let redirect = format!("/{}", id.to_object_pathname());
425 xattrs.add(b"trusted.overlay.redirect", redirect.as_bytes());
426 }
427
428 for (name, value) in RefCell::borrow(&stat.xattrs).iter() {
430 let name = name.as_bytes();
431
432 if let Some(escapee) = name.strip_prefix(b"trusted.overlay.") {
433 let escaped = [b"trusted.overlay.overlay.", escapee].concat();
434 xattrs.add(&escaped, value);
435 } else {
436 xattrs.add(name, value);
437 }
438 }
439
440 let inode = self.inodes.len();
443 self.inodes.push(Inode {
444 stat,
445 xattrs,
446 content,
447 });
448 inode
449 }
450
451 fn collect_leaf(&mut self, leaf: &'a Rc<tree::Leaf<ObjectID>>) -> usize {
452 let nlink = Rc::strong_count(leaf);
453
454 if nlink > 1 {
455 if let Some(inode) = self.hardlinks.get(&Rc::as_ptr(leaf)) {
456 return *inode;
457 }
458 }
459
460 let inode = self.push_inode(
461 &leaf.stat,
462 InodeContent::Leaf(Leaf {
463 content: &leaf.content,
464 nlink,
465 }),
466 );
467
468 if nlink > 1 {
469 self.hardlinks.insert(Rc::as_ptr(leaf), inode);
470 }
471
472 inode
473 }
474
475 fn insert_sorted(
476 entries: &mut Vec<DirEnt<'a>>,
477 name: &'a [u8],
478 inode: usize,
479 file_type: format::FileType,
480 ) {
481 let entry = DirEnt {
482 name,
483 inode,
484 file_type,
485 };
486 let point = entries.partition_point(|e| e.name < entry.name);
487 entries.insert(point, entry);
488 }
489
490 fn collect_dir(&mut self, dir: &'a tree::Directory<ObjectID>, parent: usize) -> usize {
491 let me = self.push_inode(&dir.stat, InodeContent::Directory(Directory::default()));
494
495 let mut entries = vec![];
496
497 for (name, inode) in dir.sorted_entries() {
498 let child = match inode {
499 tree::Inode::Directory(dir) => self.collect_dir(dir, me),
500 tree::Inode::Leaf(leaf) => self.collect_leaf(leaf),
501 };
502 entries.push(DirEnt {
503 name: name.as_bytes(),
504 inode: child,
505 file_type: self.inodes[child].file_type(),
506 });
507 }
508
509 Self::insert_sorted(&mut entries, b".", me, format::FileType::Directory);
511 Self::insert_sorted(&mut entries, b"..", parent, format::FileType::Directory);
512
513 self.inodes[me].content = InodeContent::Directory(Directory::from_entries(entries));
515 me
516 }
517
518 pub fn collect(fs: &'a tree::FileSystem<ObjectID>) -> Vec<Inode<'a, ObjectID>> {
519 let mut this = Self {
520 inodes: vec![],
521 hardlinks: HashMap::new(),
522 };
523
524 let root_inode = this.collect_dir(&fs.root, 0);
526 assert_eq!(root_inode, 0);
527
528 this.inodes
529 }
530}
531
532fn share_xattrs(inodes: &mut [Inode<impl FsVerityHashValue>]) -> Vec<XAttr> {
535 let mut xattrs: BTreeMap<XAttr, usize> = BTreeMap::new();
536
537 for inode in inodes.iter() {
539 for attr in &inode.xattrs.local {
540 if let Some(count) = xattrs.get_mut(attr) {
541 *count += 1;
542 } else {
543 xattrs.insert(attr.clone(), 1);
544 }
545 }
546 }
547
548 xattrs.retain(|_k, v| *v > 1);
550
551 for (idx, value) in xattrs.values_mut().enumerate() {
553 *value = idx;
554 }
555
556 for inode in inodes.iter_mut() {
558 inode.xattrs.local.retain(|attr| {
559 if let Some(idx) = xattrs.get(attr) {
560 inode.xattrs.shared.push(*idx);
561 false } else {
563 true }
565 });
566 }
567
568 xattrs.into_keys().collect()
570}
571
572fn write_erofs(
573 output: &mut impl Output,
574 inodes: &[Inode<impl FsVerityHashValue>],
575 xattrs: &[XAttr],
576) {
577 output.note_offset(Offset::Header);
579 output.write_struct(format::ComposefsHeader {
580 magic: format::COMPOSEFS_MAGIC,
581 version: format::VERSION,
582 flags: 0.into(),
583 composefs_version: format::COMPOSEFS_VERSION,
584 ..Default::default()
585 });
586 output.pad(1024);
587
588 output.note_offset(Offset::Superblock);
590 output.write_struct(format::Superblock {
591 magic: format::MAGIC_V1,
592 blkszbits: format::BLOCK_BITS,
593 feature_compat: format::FEATURE_COMPAT_MTIME | format::FEATURE_COMPAT_XATTR_FILTER,
594 root_nid: (output.get_nid(0) as u16).into(),
595 inos: (inodes.len() as u64).into(),
596 blocks: ((output.get(Offset::End, 0) / format::BLOCK_SIZE) as u32).into(),
597 ..Default::default()
598 });
599
600 for (idx, inode) in inodes.iter().enumerate() {
602 inode.write_inode(output, idx);
604 }
605
606 for xattr in xattrs {
608 output.note_offset(Offset::XAttr);
609 xattr.write(output);
610 }
611
612 output.pad(4096);
614 for inode in inodes.iter() {
615 output.note_offset(Offset::Block);
616 inode.write_blocks(output);
617 }
618
619 output.note_offset(Offset::End);
621}
622
623#[derive(Default)]
624struct Layout {
625 offset_types: Vec<usize>,
626 offsets: Vec<usize>,
627}
628
629#[derive(Default)]
630struct FirstPass {
631 offset: usize,
632 layout: Layout,
633}
634
635struct SecondPass {
636 output: Vec<u8>,
637 layout: Layout,
638}
639
640impl Output for SecondPass {
641 fn note_offset(&mut self, _offset_type: Offset) {
642 }
644
645 fn get(&self, offset_type: Offset, idx: usize) -> usize {
646 let start = self.layout.offset_types[offset_type as usize];
647 self.layout.offsets[start + idx]
648 }
649
650 fn write(&mut self, data: &[u8]) {
651 self.output.extend_from_slice(data);
652 }
653
654 fn pad(&mut self, alignment: usize) {
655 self.output
656 .resize(round_up(self.output.len(), alignment), 0);
657 }
658
659 fn len(&self) -> usize {
660 self.output.len()
661 }
662}
663
664impl Output for FirstPass {
665 fn note_offset(&mut self, offset_type: Offset) {
666 while self.layout.offset_types.len() <= offset_type as usize {
667 self.layout.offset_types.push(self.layout.offsets.len());
668 }
669 assert_eq!(self.layout.offset_types.len(), offset_type as usize + 1);
670
671 trace!(
672 "{:?} #{} @{}",
673 offset_type,
674 self.layout.offsets.len() - self.layout.offset_types[offset_type as usize],
675 self.offset
676 );
677 self.layout.offsets.push(self.offset);
678 }
679
680 fn get(&self, _: Offset, _: usize) -> usize {
681 0 }
683
684 fn write(&mut self, data: &[u8]) {
685 self.offset += data.len();
686 }
687
688 fn pad(&mut self, alignment: usize) {
689 self.offset = round_up(self.offset, alignment);
690 }
691
692 fn len(&self) -> usize {
693 self.offset
694 }
695}
696
697pub fn mkfs_erofs<ObjectID: FsVerityHashValue>(fs: &tree::FileSystem<ObjectID>) -> Box<[u8]> {
705 let mut inodes = InodeCollector::collect(fs);
707 let xattrs = share_xattrs(&mut inodes);
708
709 let mut first_pass = FirstPass::default();
711 write_erofs(&mut first_pass, &inodes, &xattrs);
712
713 let mut second_pass = SecondPass {
715 output: vec![],
716 layout: first_pass.layout,
717 };
718 write_erofs(&mut second_pass, &inodes, &xattrs);
719
720 second_pass.output.into_boxed_slice()
722}