composefs_oci/
tar.rs

1//! TAR archive processing and split stream conversion.
2//!
3//! This module handles the conversion of tar archives (container image layers) into composefs split streams.
4//! It provides both synchronous and asynchronous tar processing, intelligently deciding whether to store
5//! file content inline in the split stream or externally in the object store based on file size.
6//!
7//! Key components include the `split()` and `split_async()` functions for converting tar streams,
8//! `get_entry()` for reading back tar entries from split streams, and comprehensive support for
9//! tar format features including GNU long names, PAX extensions, and various file types.
10//! The `TarEntry` and `TarItem` types represent processed tar entries in composefs format.
11
12use std::{
13    cell::RefCell,
14    collections::BTreeMap,
15    ffi::{OsStr, OsString},
16    fmt,
17    io::Read,
18    os::unix::prelude::{OsStrExt, OsStringExt},
19    path::PathBuf,
20};
21
22use anyhow::{bail, ensure, Result};
23use rustix::fs::makedev;
24use tar::{EntryType, Header, PaxExtensions};
25use tokio::io::{AsyncRead, AsyncReadExt};
26
27use composefs::{
28    dumpfile,
29    fsverity::FsVerityHashValue,
30    splitstream::{SplitStreamData, SplitStreamReader, SplitStreamWriter},
31    tree::{LeafContent, RegularFile, Stat},
32    util::{read_exactish, read_exactish_async},
33    INLINE_CONTENT_MAX,
34};
35
36fn read_header<R: Read>(reader: &mut R) -> Result<Option<Header>> {
37    let mut header = Header::new_gnu();
38    if read_exactish(reader, header.as_mut_bytes())? {
39        Ok(Some(header))
40    } else {
41        Ok(None)
42    }
43}
44
45async fn read_header_async(reader: &mut (impl AsyncRead + Unpin)) -> Result<Option<Header>> {
46    let mut header = Header::new_gnu();
47    if read_exactish_async(reader, header.as_mut_bytes()).await? {
48        Ok(Some(header))
49    } else {
50        Ok(None)
51    }
52}
53
54/// Splits the tar file from tar_stream into a Split Stream.  The store_data function is
55/// responsible for ensuring that "external data" is in the composefs repository and returns the
56/// fsverity hash value of that data.
57pub fn split(
58    tar_stream: &mut impl Read,
59    writer: &mut SplitStreamWriter<impl FsVerityHashValue>,
60) -> Result<()> {
61    while let Some(header) = read_header(tar_stream)? {
62        // the header always gets stored as inline data
63        writer.write_inline(header.as_bytes());
64
65        if header.as_bytes() == &[0u8; 512] {
66            continue;
67        }
68
69        // read the corresponding data, if there is any
70        let actual_size = header.entry_size()? as usize;
71        let storage_size = (actual_size + 511) & !511;
72        let mut buffer = vec![0u8; storage_size];
73        tar_stream.read_exact(&mut buffer)?;
74
75        if header.entry_type() == EntryType::Regular && actual_size > INLINE_CONTENT_MAX {
76            // non-empty regular file: store the data in the object store
77            let padding = buffer.split_off(actual_size);
78            writer.write_external(&buffer, padding)?;
79        } else {
80            // else: store the data inline in the split stream
81            writer.write_inline(&buffer);
82        }
83    }
84    Ok(())
85}
86
87/// Asynchronously splits a tar archive into a composefs split stream.
88///
89/// Similar to `split()` but processes the tar stream asynchronously. Files larger than
90/// `INLINE_CONTENT_MAX` are stored externally in the object store, while smaller files
91/// and metadata are stored inline in the split stream.
92///
93/// Returns an error if the tar stream is malformed or if writing to the split stream fails.
94pub async fn split_async(
95    mut tar_stream: impl AsyncRead + Unpin,
96    writer: &mut SplitStreamWriter<impl FsVerityHashValue>,
97) -> Result<()> {
98    while let Some(header) = read_header_async(&mut tar_stream).await? {
99        // the header always gets stored as inline data
100        writer.write_inline(header.as_bytes());
101
102        if header.as_bytes() == &[0u8; 512] {
103            continue;
104        }
105
106        // read the corresponding data, if there is any
107        let actual_size = header.entry_size()? as usize;
108        let storage_size = (actual_size + 511) & !511;
109        let mut buffer = vec![0u8; storage_size];
110        tar_stream.read_exact(&mut buffer).await?;
111
112        if header.entry_type() == EntryType::Regular && actual_size > INLINE_CONTENT_MAX {
113            // non-empty regular file: store the data in the object store
114            let padding = buffer.split_off(actual_size);
115            writer.write_external_async(buffer, padding).await?;
116        } else {
117            // else: store the data inline in the split stream
118            writer.write_inline(&buffer);
119        }
120    }
121    Ok(())
122}
123
124/// Represents the content type of a tar entry.
125///
126/// Tar entries can be directories, regular files/symlinks/devices (leaf nodes), or hardlinks
127/// to existing files. This enum captures the different types of content that can appear in a tar archive.
128#[derive(Debug)]
129pub enum TarItem<ObjectID: FsVerityHashValue> {
130    /// A directory entry.
131    Directory,
132    /// A leaf node (regular file, symlink, device, or fifo).
133    Leaf(LeafContent<ObjectID>),
134    /// A hardlink pointing to another path.
135    Hardlink(OsString),
136}
137
138/// Represents a complete tar entry extracted from a split stream.
139///
140/// Contains the full metadata and content for a single file or directory from a tar archive,
141/// including its path, stat information (permissions, ownership, timestamps), and the actual content.
142#[derive(Debug)]
143pub struct TarEntry<ObjectID: FsVerityHashValue> {
144    /// The absolute path of the entry in the filesystem.
145    pub path: PathBuf,
146    /// File metadata (mode, uid, gid, mtime, xattrs).
147    pub stat: Stat,
148    /// The content or type of this entry.
149    pub item: TarItem<ObjectID>,
150}
151
152impl<ObjectID: FsVerityHashValue> fmt::Display for TarEntry<ObjectID> {
153    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
154        match self.item {
155            TarItem::Hardlink(ref target) => dumpfile::write_hardlink(fmt, &self.path, target),
156            TarItem::Directory => dumpfile::write_directory(fmt, &self.path, &self.stat, 1),
157            TarItem::Leaf(ref content) => {
158                dumpfile::write_leaf(fmt, &self.path, &self.stat, content, 1)
159            }
160        }
161    }
162}
163
164fn path_from_tar(pax: Option<Box<[u8]>>, gnu: Vec<u8>, short: &[u8]) -> PathBuf {
165    // Prepend leading /
166    let mut path = vec![b'/'];
167    if let Some(name) = pax {
168        path.extend(name);
169    } else if !gnu.is_empty() {
170        path.extend(gnu);
171    } else {
172        path.extend(short);
173    }
174
175    // Drop trailing '/' characters in case of directories.
176    // https://github.com/rust-lang/rust/issues/122741
177    // path.pop_if(|x| x == &b'/');
178    if path.last() == Some(&b'/') {
179        path.pop(); // this is Vec<u8>, so that's a single char.
180    }
181
182    PathBuf::from(OsString::from_vec(path))
183}
184
185fn symlink_target_from_tar(pax: Option<Box<[u8]>>, gnu: Vec<u8>, short: &[u8]) -> Box<OsStr> {
186    if let Some(name) = pax {
187        OsStr::from_bytes(name.as_ref()).into()
188    } else if !gnu.is_empty() {
189        OsStr::from_bytes(&gnu).into()
190    } else {
191        OsStr::from_bytes(short).into()
192    }
193}
194
195/// Reads and parses the next tar entry from a split stream.
196///
197/// Decodes tar headers and data from a composefs split stream, handling both inline and
198/// external content storage. Supports GNU long name/link extensions, PAX headers, and
199/// extended attributes. Returns `None` when the end of the archive is reached.
200///
201/// Returns the parsed tar entry, or `None` if the end of the stream is reached.
202pub fn get_entry<R: Read, ObjectID: FsVerityHashValue>(
203    reader: &mut SplitStreamReader<R, ObjectID>,
204) -> Result<Option<TarEntry<ObjectID>>> {
205    let mut gnu_longlink: Vec<u8> = vec![];
206    let mut gnu_longname: Vec<u8> = vec![];
207    let mut pax_longlink: Option<Box<[u8]>> = None;
208    let mut pax_longname: Option<Box<[u8]>> = None;
209    let mut xattrs = BTreeMap::new();
210
211    loop {
212        let mut buf = [0u8; 512];
213        if !reader.read_inline_exact(&mut buf)? || buf == [0u8; 512] {
214            return Ok(None);
215        }
216
217        let header = tar::Header::from_byte_slice(&buf);
218
219        let size = header.entry_size()?;
220
221        let item = match reader.read_exact(size as usize, ((size + 511) & !511) as usize)? {
222            SplitStreamData::External(id) => match header.entry_type() {
223                EntryType::Regular | EntryType::Continuous => {
224                    ensure!(
225                        size as usize > INLINE_CONTENT_MAX,
226                        "Splitstream incorrectly stored a small ({size} byte) file external"
227                    );
228                    TarItem::Leaf(LeafContent::Regular(RegularFile::External(id, size)))
229                }
230                _ => bail!("Unsupported external-chunked entry {header:?} {id:?}"),
231            },
232            SplitStreamData::Inline(content) => match header.entry_type() {
233                EntryType::GNULongLink => {
234                    gnu_longlink.extend(content);
235
236                    // NOTE: We use a custom tar parser since splitstreams are not actual tar archives
237                    // The `tar` crate does have a higher level `path` function that would do this for us.
238                    // See: https://github.com/alexcrichton/tar-rs/blob/a1c3036af48fa02437909112239f0632e4cfcfae/src/header.rs#L1532
239                    // Similar operation is performed for GNULongName
240                    gnu_longlink.pop_if(|x| *x == b'\0');
241
242                    continue;
243                }
244                EntryType::GNULongName => {
245                    gnu_longname.extend(content);
246                    gnu_longname.pop_if(|x| *x == b'\0');
247                    continue;
248                }
249                EntryType::XGlobalHeader => {
250                    todo!();
251                }
252                EntryType::XHeader => {
253                    for item in PaxExtensions::new(&content) {
254                        let extension = item?;
255                        let key = extension.key()?;
256                        let value = Box::from(extension.value_bytes());
257
258                        if key == "path" {
259                            pax_longname = Some(value);
260                        } else if key == "linkpath" {
261                            pax_longlink = Some(value);
262                        } else if let Some(xattr) = key.strip_prefix("SCHILY.xattr.") {
263                            xattrs.insert(Box::from(OsStr::new(xattr)), value);
264                        }
265                    }
266                    continue;
267                }
268                EntryType::Directory => TarItem::Directory,
269                EntryType::Regular | EntryType::Continuous => {
270                    ensure!(
271                        content.len() <= INLINE_CONTENT_MAX,
272                        "Splitstream incorrectly stored a large ({} byte) file inline",
273                        content.len()
274                    );
275                    TarItem::Leaf(LeafContent::Regular(RegularFile::Inline(content)))
276                }
277                EntryType::Link => TarItem::Hardlink({
278                    let Some(link_name) = header.link_name_bytes() else {
279                        bail!("link without a name?")
280                    };
281                    OsString::from(path_from_tar(pax_longlink, gnu_longlink, &link_name))
282                }),
283                EntryType::Symlink => TarItem::Leaf(LeafContent::Symlink({
284                    let Some(link_name) = header.link_name_bytes() else {
285                        bail!("symlink without a name?")
286                    };
287                    symlink_target_from_tar(pax_longlink, gnu_longlink, &link_name)
288                })),
289                EntryType::Block => TarItem::Leaf(LeafContent::BlockDevice(
290                    match (header.device_major()?, header.device_minor()?) {
291                        (Some(major), Some(minor)) => makedev(major, minor),
292                        _ => bail!("Device entry without device numbers?"),
293                    },
294                )),
295                EntryType::Char => TarItem::Leaf(LeafContent::CharacterDevice(
296                    match (header.device_major()?, header.device_minor()?) {
297                        (Some(major), Some(minor)) => makedev(major, minor),
298                        _ => bail!("Device entry without device numbers?"),
299                    },
300                )),
301                EntryType::Fifo => TarItem::Leaf(LeafContent::Fifo),
302                _ => {
303                    todo!("Unsupported entry {:?}", header);
304                }
305            },
306        };
307
308        return Ok(Some(TarEntry {
309            path: path_from_tar(pax_longname, gnu_longname, &header.path_bytes()),
310            stat: Stat {
311                st_uid: header.uid()? as u32,
312                st_gid: header.gid()? as u32,
313                st_mode: header.mode()?,
314                st_mtim_sec: header.mtime()? as i64,
315                xattrs: RefCell::new(xattrs),
316            },
317            item,
318        }));
319    }
320}
321
322#[cfg(test)]
323mod tests {
324    use super::*;
325    use composefs::{
326        fsverity::Sha256HashValue, generic_tree::LeafContent, repository::Repository,
327        splitstream::SplitStreamReader,
328    };
329    use std::{io::Cursor, path::Path, sync::Arc};
330    use tar::Builder;
331
332    use once_cell::sync::Lazy;
333    use std::sync::Mutex;
334
335    static TEST_TEMPDIRS: Lazy<Mutex<Vec<tempfile::TempDir>>> =
336        Lazy::new(|| Mutex::new(Vec::new()));
337
338    pub(crate) fn create_test_repository() -> Result<Arc<Repository<Sha256HashValue>>> {
339        // Create a temporary repository for testing and store it in static
340        let tempdir = tempfile::TempDir::new().unwrap();
341        let fd = rustix::fs::open(
342            tempdir.path(),
343            rustix::fs::OFlags::CLOEXEC | rustix::fs::OFlags::PATH,
344            0.into(),
345        )?;
346
347        // Store tempdir in static to keep it alive
348        {
349            let mut guard = TEST_TEMPDIRS.lock().unwrap();
350            guard.push(tempdir);
351        }
352
353        let mut repo = Repository::open_path(&fd, ".").unwrap();
354        repo.set_insecure(true);
355
356        Ok(Arc::new(repo))
357    }
358
359    /// Helper method to append a file to a tar builder with sensible defaults
360    fn append_file(
361        builder: &mut Builder<&mut Vec<u8>>,
362        path: &str,
363        content: &[u8],
364    ) -> Result<tar::Header> {
365        let mut header = tar::Header::new_gnu();
366        header.set_mode(0o644);
367        header.set_uid(1000);
368        header.set_gid(1000);
369        header.set_mtime(1234567890);
370        header.set_size(content.len() as u64);
371        header.set_entry_type(tar::EntryType::Regular);
372        builder.append_data(&mut header, path, content)?;
373        Ok(header)
374    }
375
376    /// Helper method to process tar data through split/get_entry pipeline
377    fn read_all_via_splitstream(tar_data: Vec<u8>) -> Result<Vec<TarEntry<Sha256HashValue>>> {
378        let mut tar_cursor = Cursor::new(tar_data);
379        let repo = create_test_repository()?;
380        let mut writer = repo.create_stream(None, None);
381
382        split(&mut tar_cursor, &mut writer)?;
383        let object_id = writer.done()?;
384
385        let mut reader: SplitStreamReader<std::fs::File, Sha256HashValue> =
386            SplitStreamReader::new(repo.open_object(&object_id)?.into())?;
387
388        let mut entries = Vec::new();
389        while let Some(entry) = get_entry(&mut reader)? {
390            entries.push(entry);
391        }
392        Ok(entries)
393    }
394
395    #[test]
396    fn test_empty_tar() {
397        let mut tar_data = Vec::new();
398        {
399            let mut builder = Builder::new(&mut tar_data);
400            builder.finish().unwrap();
401        }
402
403        let mut tar_cursor = Cursor::new(tar_data);
404        let repo = create_test_repository().unwrap();
405        let mut writer = repo.create_stream(None, None);
406
407        split(&mut tar_cursor, &mut writer).unwrap();
408        let object_id = writer.done().unwrap();
409
410        let mut reader: SplitStreamReader<std::fs::File, Sha256HashValue> =
411            SplitStreamReader::new(repo.open_object(&object_id).unwrap().into()).unwrap();
412        assert!(get_entry(&mut reader).unwrap().is_none());
413    }
414
415    #[test]
416    fn test_single_small_file() {
417        let mut tar_data = Vec::new();
418        let original_header = {
419            let mut builder = Builder::new(&mut tar_data);
420
421            // Add one small regular file
422            let content = b"Hello, World!";
423            let header = append_file(&mut builder, "hello.txt", content).unwrap();
424
425            builder.finish().unwrap();
426            header
427        };
428
429        let mut tar_cursor = Cursor::new(tar_data);
430        let repo = create_test_repository().unwrap();
431        let mut writer = repo.create_stream(None, None);
432
433        split(&mut tar_cursor, &mut writer).unwrap();
434        let object_id = writer.done().unwrap();
435
436        let mut reader: SplitStreamReader<std::fs::File, Sha256HashValue> =
437            SplitStreamReader::new(repo.open_object(&object_id).unwrap().into()).unwrap();
438
439        // Should have exactly one entry
440        let entry = get_entry(&mut reader)
441            .unwrap()
442            .expect("Should have one entry");
443        assert_eq!(entry.path, PathBuf::from("/hello.txt"));
444        assert!(matches!(
445            entry.item,
446            TarItem::Leaf(LeafContent::Regular(RegularFile::Inline(_)))
447        ));
448
449        // Use the helper to compare header and stat
450        assert_header_stat_equal(&original_header, &entry.stat, "hello.txt");
451
452        if let TarItem::Leaf(LeafContent::Regular(RegularFile::Inline(ref content))) = entry.item {
453            assert_eq!(content.as_ref(), b"Hello, World!");
454        }
455
456        // Should be no more entries
457        assert!(get_entry(&mut reader).unwrap().is_none());
458    }
459
460    #[test]
461    fn test_inline_threshold() {
462        let mut tar_data = Vec::new();
463        let (threshold_header, over_threshold_header) = {
464            let mut builder = Builder::new(&mut tar_data);
465
466            // File exactly at the threshold should be inline
467            let threshold_content = vec![b'X'; INLINE_CONTENT_MAX];
468            let header1 =
469                append_file(&mut builder, "threshold_file.txt", &threshold_content).unwrap();
470
471            // File just over threshold should be external
472            let over_threshold_content = vec![b'Y'; INLINE_CONTENT_MAX + 1];
473            let header2 = append_file(
474                &mut builder,
475                "over_threshold_file.txt",
476                &over_threshold_content,
477            )
478            .unwrap();
479
480            builder.finish().unwrap();
481            (header1, header2)
482        };
483
484        let mut tar_cursor = Cursor::new(tar_data);
485        let repo = create_test_repository().unwrap();
486        let mut writer = repo.create_stream(None, None);
487
488        split(&mut tar_cursor, &mut writer).unwrap();
489        let object_id = writer.done().unwrap();
490
491        let mut reader: SplitStreamReader<std::fs::File, Sha256HashValue> =
492            SplitStreamReader::new(repo.open_object(&object_id).unwrap().into()).unwrap();
493        let mut entries = Vec::new();
494
495        while let Some(entry) = get_entry(&mut reader).unwrap() {
496            entries.push(entry);
497        }
498
499        assert_eq!(entries.len(), 2);
500
501        // First file should be inline
502        assert_eq!(entries[0].path, PathBuf::from("/threshold_file.txt"));
503        assert_header_stat_equal(&threshold_header, &entries[0].stat, "threshold_file.txt");
504        if let TarItem::Leaf(LeafContent::Regular(RegularFile::Inline(ref content))) =
505            entries[0].item
506        {
507            assert_eq!(content.len(), INLINE_CONTENT_MAX);
508            assert_eq!(content[0], b'X');
509        } else {
510            panic!("Expected inline regular file for threshold file");
511        }
512
513        // Second file should be external
514        assert_eq!(entries[1].path, PathBuf::from("/over_threshold_file.txt"));
515        assert_header_stat_equal(
516            &over_threshold_header,
517            &entries[1].stat,
518            "over_threshold_file.txt",
519        );
520        if let TarItem::Leaf(LeafContent::Regular(RegularFile::External(_, size))) = entries[1].item
521        {
522            assert_eq!(size, (INLINE_CONTENT_MAX + 1) as u64);
523        } else {
524            panic!("Expected external regular file for over-threshold file");
525        }
526    }
527
528    #[test]
529    fn test_round_trip_simple() {
530        // Create a simple tar with various file types
531        let mut original_tar = Vec::new();
532        let (small_header, large_header) = {
533            let mut builder = Builder::new(&mut original_tar);
534
535            // Add a small file
536            let small_content = b"Small file content";
537            let header1 = append_file(&mut builder, "small.txt", small_content).unwrap();
538
539            // Add a large file
540            let large_content = vec![b'L'; INLINE_CONTENT_MAX + 100];
541            let header2 = append_file(&mut builder, "large.txt", &large_content).unwrap();
542
543            builder.finish().unwrap();
544            (header1, header2)
545        };
546
547        // Split the tar
548        let mut tar_cursor = Cursor::new(original_tar.clone());
549        let repo = create_test_repository().unwrap();
550        let mut writer = repo.create_stream(None, None);
551        split(&mut tar_cursor, &mut writer).unwrap();
552        let object_id = writer.done().unwrap();
553
554        // Read back entries and compare with original headers
555        let mut reader: SplitStreamReader<std::fs::File, Sha256HashValue> =
556            SplitStreamReader::new(repo.open_object(&object_id).unwrap().into()).unwrap();
557        let mut entries = Vec::new();
558
559        while let Some(entry) = get_entry(&mut reader).unwrap() {
560            entries.push(entry);
561        }
562
563        assert_eq!(entries.len(), 2, "Should have exactly 2 entries");
564
565        // Compare small file
566        assert_eq!(entries[0].path, PathBuf::from("/small.txt"));
567        assert_header_stat_equal(&small_header, &entries[0].stat, "small.txt");
568
569        if let TarItem::Leaf(LeafContent::Regular(RegularFile::Inline(ref content))) =
570            entries[0].item
571        {
572            assert_eq!(content.as_ref(), b"Small file content");
573        } else {
574            panic!("Expected inline regular file for small.txt");
575        }
576
577        // Compare large file
578        assert_eq!(entries[1].path, PathBuf::from("/large.txt"));
579        assert_header_stat_equal(&large_header, &entries[1].stat, "large.txt");
580
581        if let TarItem::Leaf(LeafContent::Regular(RegularFile::External(ref id, size))) =
582            entries[1].item
583        {
584            assert_eq!(size, (INLINE_CONTENT_MAX + 100) as u64);
585            // Verify the external content matches
586            use std::io::Read;
587            let mut external_data = Vec::new();
588            std::fs::File::from(repo.open_object(id).unwrap())
589                .read_to_end(&mut external_data)
590                .unwrap();
591            let expected_content = vec![b'L'; INLINE_CONTENT_MAX + 100];
592            assert_eq!(
593                external_data, expected_content,
594                "External file content should match"
595            );
596        } else {
597            panic!("Expected external regular file for large.txt");
598        }
599    }
600
601    #[test]
602    fn test_special_filename_cases() {
603        let mut tar_data = Vec::new();
604        {
605            let mut builder = Builder::new(&mut tar_data);
606
607            // Test file with special characters
608            let content1 = b"Special chars content";
609            append_file(&mut builder, "file-with_special.chars@123", content1).unwrap();
610
611            // Test file with long filename
612            let long_name = "a".repeat(100);
613            let content2 = b"Long filename content";
614            append_file(&mut builder, &long_name, content2).unwrap();
615
616            builder.finish().unwrap();
617        };
618
619        let entries = read_all_via_splitstream(tar_data).unwrap();
620        assert_eq!(entries.len(), 2);
621
622        // Verify special characters filename
623        assert_eq!(
624            entries[0].path,
625            PathBuf::from("/file-with_special.chars@123")
626        );
627        assert_eq!(
628            entries[0].path.file_name().unwrap(),
629            "file-with_special.chars@123"
630        );
631
632        // Verify long filename
633        let expected_long_path = format!("/{}", "a".repeat(100));
634        assert_eq!(entries[1].path, PathBuf::from(expected_long_path));
635        assert_eq!(entries[1].path.file_name().unwrap(), &*"a".repeat(100));
636    }
637
638    #[test]
639    fn test_gnu_long_filename_reproduction() {
640        // Create a very long path that will definitely trigger GNU long name extensions
641        let very_long_path = format!(
642            "very/long/path/that/exceeds/the/normal/tar/header/limit/{}",
643            "x".repeat(120)
644        );
645        let content = b"Content for very long path";
646
647        // Use append_data to create a tar with a very long filename that triggers GNU extensions
648        let mut tar_data = Vec::new();
649        {
650            let mut builder = Builder::new(&mut tar_data);
651            append_file(&mut builder, &very_long_path, content).unwrap();
652            builder.finish().unwrap();
653        };
654
655        let entries = read_all_via_splitstream(tar_data).unwrap();
656        assert_eq!(entries.len(), 1);
657        let abspath = format!("/{very_long_path}");
658        assert_eq!(entries[0].path, Path::new(&abspath));
659    }
660
661    #[test]
662    fn test_gnu_longlink() {
663        let very_long_path = format!(
664            "very/long/path/that/exceeds/the/normal/tar/header/limit/{}",
665            "x".repeat(120)
666        );
667
668        // Use append_data to create a tar with a very long filename that triggers GNU extensions
669        let mut tar_data = Vec::new();
670        {
671            let mut builder = Builder::new(&mut tar_data);
672            let mut header = tar::Header::new_gnu();
673            header.set_mode(0o777);
674            header.set_entry_type(EntryType::Symlink);
675            header.set_size(0);
676            header.set_uid(0);
677            header.set_gid(0);
678            builder
679                .append_link(&mut header, "long-symlink", &very_long_path)
680                .unwrap();
681            builder.finish().unwrap();
682        };
683
684        let entries = read_all_via_splitstream(tar_data).unwrap();
685        assert_eq!(entries.len(), 1);
686        match &entries[0].item {
687            TarItem::Leaf(LeafContent::Symlink(ref target)) => {
688                assert_eq!(&**target, OsStr::new(&very_long_path));
689            }
690            _ => unreachable!(),
691        };
692    }
693
694    /// Compare a tar::Header with a composefs Stat structure for equality
695    fn assert_header_stat_equal(header: &tar::Header, stat: &Stat, msg_prefix: &str) {
696        assert_eq!(
697            header.mode().unwrap(),
698            stat.st_mode,
699            "{}: mode mismatch",
700            msg_prefix
701        );
702        assert_eq!(
703            header.uid().unwrap() as u32,
704            stat.st_uid,
705            "{}: uid mismatch",
706            msg_prefix
707        );
708        assert_eq!(
709            header.gid().unwrap() as u32,
710            stat.st_gid,
711            "{}: gid mismatch",
712            msg_prefix
713        );
714        assert_eq!(
715            header.mtime().unwrap() as i64,
716            stat.st_mtim_sec,
717            "{}: mtime mismatch",
718            msg_prefix
719        );
720    }
721}