From e255766ab86a1e60b30a22d48c7c3af1f1d4bb85 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 10 Mar 2026 22:38:05 +0000 Subject: [PATCH 01/23] Initial plan From ed6162e5fc9be7a5741535be3e3d2d1baaf1fd0e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 10 Mar 2026 22:53:25 +0000 Subject: [PATCH 02/23] fix(hardlink): use (device, inode) key for deduplication to prevent cross-filesystem false deduplication Co-authored-by: KSXGitHub <11488886+KSXGitHub@users.noreply.github.com> --- src/hardlink/aware.rs | 3 ++- src/hardlink/hardlink_list.rs | 22 ++++++++++++++--- src/hardlink/hardlink_list/iter.rs | 8 +++--- src/hardlink/hardlink_list/reflection.rs | 17 ++++++++++--- src/hardlink/hardlink_list/test.rs | 31 ++++++++++++------------ 5 files changed, 54 insertions(+), 27 deletions(-) diff --git a/src/hardlink/aware.rs b/src/hardlink/aware.rs index 36dedbc0..688a85ec 100644 --- a/src/hardlink/aware.rs +++ b/src/hardlink/aware.rs @@ -82,8 +82,9 @@ where })); let ino = InodeNumber::get(stats); + let dev = stats.dev(); self.record - .add(ino, size, links, path) + .add(ino, dev, size, links, path) .map_err(ReportHardlinksError::AddToRecord) } } diff --git a/src/hardlink/hardlink_list.rs b/src/hardlink/hardlink_list.rs index c955de8e..02efe145 100644 --- a/src/hardlink/hardlink_list.rs +++ b/src/hardlink/hardlink_list.rs @@ -20,6 +20,20 @@ use pipe_trait::Pipe; #[cfg(any(unix, test))] use std::path::Path; +/// Internal key used to uniquely identify an inode across all filesystems. +/// +/// Hardlinks cannot span filesystems, so including the device number prevents +/// false deduplication of files from different filesystems that happen to share +/// the same inode number. Both du-dust and dua-cli track `(device, inode)` pairs +/// for the same reason. +#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] +struct InodeKey { + /// Device number of the filesystem the inode belongs to. + dev: u64, + /// Inode number within the device. + ino: InodeNumber, +} + /// Map value in [`HardlinkList`]. #[derive(Debug, Clone)] struct Value { @@ -38,8 +52,8 @@ struct Value { /// [`Reflection`] which implement these traits. #[derive(Debug, SmartDefault, Clone)] pub struct HardlinkList( - /// Map an inode number to its size, number of links, and detected paths. - DashMap>, + /// Map an inode key (device + inode number) to its size, number of links, and detected paths. + DashMap>, ); impl HardlinkList { @@ -112,13 +126,15 @@ where pub(crate) fn add( &self, ino: InodeNumber, + dev: u64, size: Size, links: u64, path: &Path, ) -> Result<(), AddError> { + let key = InodeKey { dev, ino }; let mut assertions = Ok(()); self.0 - .entry(ino) + .entry(key) .and_modify(|recorded| { if size != recorded.size { assertions = Err(AddError::SizeConflict(SizeConflictError { diff --git a/src/hardlink/hardlink_list/iter.rs b/src/hardlink/hardlink_list/iter.rs index 4b2c1b3c..69d0f10c 100644 --- a/src/hardlink/hardlink_list/iter.rs +++ b/src/hardlink/hardlink_list/iter.rs @@ -1,4 +1,4 @@ -use super::{HardlinkList, Value}; +use super::{HardlinkList, InodeKey, Value}; use crate::{hardlink::LinkPathList, inode::InodeNumber}; use dashmap::{iter::Iter as DashIter, mapref::multiple::RefMulti}; use pipe_trait::Pipe; @@ -7,7 +7,7 @@ use pipe_trait::Pipe; #[derive(derive_more::Debug)] #[debug(bound())] #[debug("Iter(..)")] -pub struct Iter<'a, Size>(DashIter<'a, InodeNumber, Value>); +pub struct Iter<'a, Size>(DashIter<'a, InodeKey, Value>); impl HardlinkList { /// Iterate over the recorded entries. @@ -20,7 +20,7 @@ impl HardlinkList { #[derive(derive_more::Debug)] #[debug(bound())] #[debug("Item(..)")] -pub struct Item<'a, Size>(RefMulti<'a, InodeNumber, Value>); +pub struct Item<'a, Size>(RefMulti<'a, InodeKey, Value>); impl<'a, Size> Iterator for Iter<'a, Size> { type Item = Item<'a, Size>; @@ -33,7 +33,7 @@ impl<'a, Size> Item<'a, Size> { /// The inode number of the file. #[inline] pub fn ino(&self) -> InodeNumber { - *self.0.key() + self.0.key().ino } /// Size of the file. diff --git a/src/hardlink/hardlink_list/reflection.rs b/src/hardlink/hardlink_list/reflection.rs index c190041b..5fd20605 100644 --- a/src/hardlink/hardlink_list/reflection.rs +++ b/src/hardlink/hardlink_list/reflection.rs @@ -1,4 +1,4 @@ -use super::{HardlinkList, Value}; +use super::{HardlinkList, InodeKey, Value}; use crate::{hardlink::LinkPathListReflection, inode::InodeNumber}; use dashmap::DashMap; use derive_more::{Display, Error, Into, IntoIterator}; @@ -12,7 +12,10 @@ use serde::{Deserialize, Serialize}; /// internal content. /// /// **Guarantees:** -/// * Every inode number is unique. +/// * Every inode number is unique within the scope of a single scan (files are keyed by +/// `(device, inode)` in the underlying [`HardlinkList`], but the reflection stores +/// only the inode number; entries from different filesystems with the same inode number +/// are an unsupported edge case in JSON round-trips). /// * The internal list is always sorted by inode numbers. /// /// **Equality:** `Reflection` implements `PartialEq` and `Eq` traits. @@ -96,7 +99,7 @@ impl From>> for Reflection { impl From> for Reflection { fn from(HardlinkList(list): HardlinkList) -> Self { list.into_iter() - .map(|(ino, value)| ReflectionEntry::new(ino, value)) + .map(|(key, value)| ReflectionEntry::new(key.ino, value)) .collect::>() .pipe(Reflection::from) } @@ -119,7 +122,13 @@ impl TryFrom> for HardlinkList { for entry in entries { let (ino, value) = entry.dissolve(); - if map.insert(ino, value).is_some() { + // Device number is unknown when loading from a reflection (e.g. JSON input); + // use dev=0 as a placeholder. This means that when reloading JSON output that + // was produced by scanning multiple filesystems, files from different devices + // sharing the same inode number will be incorrectly merged into a single entry. + // This is an unsupported edge case: the JSON format does not carry device info. + let key = InodeKey { dev: 0, ino }; + if map.insert(key, value).is_some() { return ino.pipe(ConversionError::DuplicatedInode).pipe(Err); } } diff --git a/src/hardlink/hardlink_list/test.rs b/src/hardlink/hardlink_list/test.rs index 8e6878d2..6d9be448 100644 --- a/src/hardlink/hardlink_list/test.rs +++ b/src/hardlink/hardlink_list/test.rs @@ -3,21 +3,22 @@ use crate::size::Bytes; use pipe_trait::Pipe; use pretty_assertions::{assert_eq, assert_ne}; -const TABLE: &[(u64, u64, u64, &str)] = &[ - (241, 3652, 1, "a"), - (569, 2210, 1, "b"), - (110, 2350, 3, "c"), - (110, 2350, 3, "c1"), - (778, 1110, 1, "d"), - (274, 6060, 2, "e"), - (274, 6060, 2, "e1"), - (883, 4530, 1, "f"), +const TABLE: &[(u64, u64, u64, u64, &str)] = &[ + // dev, ino, size, links, path + (0, 241, 3652, 1, "a"), + (0, 569, 2210, 1, "b"), + (0, 110, 2350, 3, "c"), + (0, 110, 2350, 3, "c1"), + (0, 778, 1110, 1, "d"), + (0, 274, 6060, 2, "e"), + (0, 274, 6060, 2, "e1"), + (0, 883, 4530, 1, "f"), ]; fn add(list: HardlinkList) -> HardlinkList { let values = TABLE[ROW]; - let (ino, size, links, path) = values; - if let Err(error) = list.add(ino.into(), size.into(), links, path.as_ref()) { + let (dev, ino, size, links, path) = values; + if let Err(error) = list.add(ino.into(), dev, size.into(), links, path.as_ref()) { panic!("Failed to add {values:?} (index: {ROW}) to the list: {error}"); } list @@ -119,10 +120,10 @@ fn insertion_difference_cause_inequality() { #[test] fn detect_size_change() { let list = HardlinkList::::new(); - list.add(123.into(), 100.into(), 1, "a".as_ref()) + list.add(123.into(), 0, 100.into(), 1, "a".as_ref()) .expect("add the first path"); let actual = list - .add(123.into(), 110.into(), 1, "b".as_ref()) + .add(123.into(), 0, 110.into(), 1, "b".as_ref()) .expect_err("add the second path"); let expected = AddError::SizeConflict(SizeConflictError { ino: 123.into(), @@ -135,10 +136,10 @@ fn detect_size_change() { #[test] fn detect_number_of_links_change() { let list = HardlinkList::::new(); - list.add(123.into(), 100.into(), 1, "a".as_ref()) + list.add(123.into(), 0, 100.into(), 1, "a".as_ref()) .expect("add the first path"); let actual = list - .add(123.into(), 100.into(), 2, "b".as_ref()) + .add(123.into(), 0, 100.into(), 2, "b".as_ref()) .expect_err("add the second path"); let expected = AddError::NumberOfLinksConflict(NumberOfLinksConflictError { ino: 123.into(), From 38aacab4f41e61d805149deb81164f56371a8b86 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 10 Mar 2026 23:14:30 +0000 Subject: [PATCH 03/23] test(hardlink): add test proving device number is used in deduplication key Co-authored-by: KSXGitHub <11488886+KSXGitHub@users.noreply.github.com> --- src/hardlink/hardlink_list/test.rs | 35 ++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/hardlink/hardlink_list/test.rs b/src/hardlink/hardlink_list/test.rs index 6d9be448..b98845a6 100644 --- a/src/hardlink/hardlink_list/test.rs +++ b/src/hardlink/hardlink_list/test.rs @@ -148,3 +148,38 @@ fn detect_number_of_links_change() { }); assert_eq!(actual, expected); } + +/// Files on different devices may share the same inode number, but they are +/// unrelated — hardlinks cannot span filesystem boundaries. Verify that two +/// files with the same inode number but different device numbers produce +/// separate entries in the list (i.e. the device number is actually used in +/// the deduplication key). +#[test] +fn same_ino_on_different_devices_are_treated_separately() { + let list = HardlinkList::::new(); + + // dev=1, ino=100 — first filesystem + list.add(100.into(), 1, 50.into(), 2, "dev1/file_a".as_ref()) + .expect("add dev1/file_a"); + list.add(100.into(), 1, 50.into(), 2, "dev1/file_b".as_ref()) + .expect("add dev1/file_b (same dev+ino → same inode group)"); + + // dev=2, ino=100 — second filesystem, coincidentally same inode number + list.add(100.into(), 2, 80.into(), 2, "dev2/file_c".as_ref()) + .expect("add dev2/file_c (different dev → separate inode group)"); + list.add(100.into(), 2, 80.into(), 2, "dev2/file_d".as_ref()) + .expect("add dev2/file_d (same dev+ino → same inode group as file_c)"); + + // Each device should produce its own entry, so the list should have 2 entries. + assert_eq!(list.len(), 2, "expected one entry per (dev, ino) pair"); + + let reflection = list.into_reflection(); + // Both entries expose ino=100 in the reflection (device is not part of the + // public JSON format), so there are still 2 entries in the vector. + assert_eq!(reflection.len(), 2); + + // Paths are grouped per (dev, ino): each group has exactly 2 paths. + for entry in reflection.iter() { + assert_eq!(entry.paths.len(), 2); + } +} From eb92bdcad9fa308bb054ffa27d2fbab679b92623 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 10 Mar 2026 23:40:44 +0000 Subject: [PATCH 04/23] fix(reflection): address doc and comment accuracy for Reflection and TryFrom Co-authored-by: KSXGitHub <11488886+KSXGitHub@users.noreply.github.com> --- src/hardlink/hardlink_list/reflection.rs | 30 +++++++++++++++++------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/src/hardlink/hardlink_list/reflection.rs b/src/hardlink/hardlink_list/reflection.rs index 5fd20605..e19ca084 100644 --- a/src/hardlink/hardlink_list/reflection.rs +++ b/src/hardlink/hardlink_list/reflection.rs @@ -12,11 +12,14 @@ use serde::{Deserialize, Serialize}; /// internal content. /// /// **Guarantees:** -/// * Every inode number is unique within the scope of a single scan (files are keyed by -/// `(device, inode)` in the underlying [`HardlinkList`], but the reflection stores -/// only the inode number; entries from different filesystems with the same inode number -/// are an unsupported edge case in JSON round-trips). -/// * The internal list is always sorted by inode numbers. +/// * Every `(device, inode)` pair is unique within the scope of a single scan, but inode +/// numbers alone are **not** guaranteed to be unique: when scanning multiple filesystems, +/// two unrelated files on different devices can share the same inode number and will each +/// produce a separate entry. The reflection stores only the inode number (the JSON format +/// does not carry device information), so round-tripping a multi-filesystem scan through +/// JSON is an unsupported edge case. +/// * The internal list is always sorted by inode numbers (and by device number as a +/// tie-breaker when two entries share the same inode number). /// /// **Equality:** `Reflection` implements `PartialEq` and `Eq` traits. /// @@ -98,10 +101,17 @@ impl From>> for Reflection { impl From> for Reflection { fn from(HardlinkList(list): HardlinkList) -> Self { - list.into_iter() + // Collect to a vec, sort by (ino, dev) for a stable, deterministic order, then + // strip dev before wrapping. Sorting here (with dev still available) avoids the + // nondeterminism that would arise from an unstable sort on ino alone when two + // entries from different filesystems share the same inode number. + let mut pairs: Vec<(InodeKey, Value)> = list.into_iter().collect(); + pairs.sort_unstable_by_key(|(key, _)| (u64::from(key.ino), key.dev)); + pairs + .into_iter() .map(|(key, value)| ReflectionEntry::new(key.ino, value)) .collect::>() - .pipe(Reflection::from) + .pipe(Reflection) } } @@ -125,8 +135,10 @@ impl TryFrom> for HardlinkList { // Device number is unknown when loading from a reflection (e.g. JSON input); // use dev=0 as a placeholder. This means that when reloading JSON output that // was produced by scanning multiple filesystems, files from different devices - // sharing the same inode number will be incorrectly merged into a single entry. - // This is an unsupported edge case: the JSON format does not carry device info. + // sharing the same inode number cannot be distinguished and therefore cannot + // all be represented. Such duplicates cause a ConversionError::DuplicatedInode + // and are treated as an unsupported edge case, since the JSON format does not + // carry device information. let key = InodeKey { dev: 0, ino }; if map.insert(key, value).is_some() { return ino.pipe(ConversionError::DuplicatedInode).pipe(Err); From e7e5b5ca35f6c6ff4a8b41a6ec7fb97528eb437a Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 2 Apr 2026 09:13:31 +0000 Subject: [PATCH 05/23] fix(hardlink)!: include dev in ReflectionEntry and JSON schema The Copilot PR kept dev out of the reflection/JSON layer, introducing a dev=0 placeholder hack, double-allocation sorting, and "unsupported edge case" disclaimers. Since the reflection is meant to mirror the internal HardlinkList, just include dev in ReflectionEntry. This simplifies From back to a single-pass map+collect, removes the dev=0 workaround in TryFrom, and makes multi-filesystem JSON round-tripping correct. Bump SCHEMA_VERSION to 2026-04-02 for the new field. https://claude.ai/code/session_01QP9wZyoZcGmJsEsA66ZRok --- src/hardlink/hardlink_list.rs | 2 +- src/hardlink/hardlink_list/iter.rs | 6 +++ src/hardlink/hardlink_list/reflection.rs | 51 +++++++-------------- src/hardlink/hardlink_list/test.rs | 14 +++--- src/json_data/schema_version.rs | 2 +- tests/_utils.rs | 9 ++++ tests/hardlinks_deduplication.rs | 31 +++++++++++++ tests/hardlinks_deduplication_multi_args.rs | 15 ++++++ 8 files changed, 88 insertions(+), 42 deletions(-) diff --git a/src/hardlink/hardlink_list.rs b/src/hardlink/hardlink_list.rs index 02efe145..39044063 100644 --- a/src/hardlink/hardlink_list.rs +++ b/src/hardlink/hardlink_list.rs @@ -27,7 +27,7 @@ use std::path::Path; /// the same inode number. Both du-dust and dua-cli track `(device, inode)` pairs /// for the same reason. #[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] -struct InodeKey { +pub(crate) struct InodeKey { /// Device number of the filesystem the inode belongs to. dev: u64, /// Inode number within the device. diff --git a/src/hardlink/hardlink_list/iter.rs b/src/hardlink/hardlink_list/iter.rs index 69d0f10c..04e1de77 100644 --- a/src/hardlink/hardlink_list/iter.rs +++ b/src/hardlink/hardlink_list/iter.rs @@ -30,6 +30,12 @@ impl<'a, Size> Iterator for Iter<'a, Size> { } impl<'a, Size> Item<'a, Size> { + /// The device number of the filesystem the inode belongs to. + #[inline] + pub fn dev(&self) -> u64 { + self.0.key().dev + } + /// The inode number of the file. #[inline] pub fn ino(&self) -> InodeNumber { diff --git a/src/hardlink/hardlink_list/reflection.rs b/src/hardlink/hardlink_list/reflection.rs index e19ca084..e902c080 100644 --- a/src/hardlink/hardlink_list/reflection.rs +++ b/src/hardlink/hardlink_list/reflection.rs @@ -12,14 +12,8 @@ use serde::{Deserialize, Serialize}; /// internal content. /// /// **Guarantees:** -/// * Every `(device, inode)` pair is unique within the scope of a single scan, but inode -/// numbers alone are **not** guaranteed to be unique: when scanning multiple filesystems, -/// two unrelated files on different devices can share the same inode number and will each -/// produce a separate entry. The reflection stores only the inode number (the JSON format -/// does not carry device information), so round-tripping a multi-filesystem scan through -/// JSON is an unsupported edge case. -/// * The internal list is always sorted by inode numbers (and by device number as a -/// tie-breaker when two entries share the same inode number). +/// * Every `(device, inode)` pair is unique. +/// * The internal list is always sorted by inode numbers (with device number as tie-breaker). /// /// **Equality:** `Reflection` implements `PartialEq` and `Eq` traits. /// @@ -54,6 +48,8 @@ impl Reflection { #[derive(Debug, Clone, PartialEq, Eq)] #[cfg_attr(feature = "json", derive(Deserialize, Serialize))] pub struct ReflectionEntry { + /// Device number of the filesystem the inode belongs to. + pub dev: u64, /// The inode number of the file. pub ino: InodeNumber, /// Size of the file. @@ -67,9 +63,10 @@ pub struct ReflectionEntry { impl ReflectionEntry { /// Create a new entry. #[inline] - fn new(ino: InodeNumber, Value { size, links, paths }: Value) -> Self { + fn new(InodeKey { dev, ino }: InodeKey, Value { size, links, paths }: Value) -> Self { let paths = paths.into(); ReflectionEntry { + dev, ino, size, links, @@ -77,41 +74,35 @@ impl ReflectionEntry { } } - /// Dissolve [`ReflectionEntry`] into a pair of [`InodeNumber`] and [`Value`]. + /// Dissolve [`ReflectionEntry`] into a pair of [`InodeKey`] and [`Value`]. #[inline] - fn dissolve(self) -> (InodeNumber, Value) { + fn dissolve(self) -> (InodeKey, Value) { let ReflectionEntry { + dev, ino, size, links, paths, } = self; let paths = paths.into(); - (ino, Value { size, links, paths }) + (InodeKey { dev, ino }, Value { size, links, paths }) } } impl From>> for Reflection { - /// Sort the list by inode numbers, then create the reflection. + /// Sort the list by `(inode, device)`, then create the reflection. fn from(list: Vec>) -> Self { - list.into_sorted_unstable_by_key(|entry| u64::from(entry.ino)) + list.into_sorted_unstable_by_key(|entry| (u64::from(entry.ino), entry.dev)) .pipe(Reflection) } } impl From> for Reflection { fn from(HardlinkList(list): HardlinkList) -> Self { - // Collect to a vec, sort by (ino, dev) for a stable, deterministic order, then - // strip dev before wrapping. Sorting here (with dev still available) avoids the - // nondeterminism that would arise from an unstable sort on ino alone when two - // entries from different filesystems share the same inode number. - let mut pairs: Vec<(InodeKey, Value)> = list.into_iter().collect(); - pairs.sort_unstable_by_key(|(key, _)| (u64::from(key.ino), key.dev)); - pairs - .into_iter() - .map(|(key, value)| ReflectionEntry::new(key.ino, value)) + list.into_iter() + .map(|(key, value)| ReflectionEntry::new(key, value)) .collect::>() - .pipe(Reflection) + .pipe(Reflection::from) } } @@ -131,17 +122,9 @@ impl TryFrom> for HardlinkList { let map = DashMap::with_capacity(entries.len()); for entry in entries { - let (ino, value) = entry.dissolve(); - // Device number is unknown when loading from a reflection (e.g. JSON input); - // use dev=0 as a placeholder. This means that when reloading JSON output that - // was produced by scanning multiple filesystems, files from different devices - // sharing the same inode number cannot be distinguished and therefore cannot - // all be represented. Such duplicates cause a ConversionError::DuplicatedInode - // and are treated as an unsupported edge case, since the JSON format does not - // carry device information. - let key = InodeKey { dev: 0, ino }; + let (key, value) = entry.dissolve(); if map.insert(key, value).is_some() { - return ino.pipe(ConversionError::DuplicatedInode).pipe(Err); + return key.ino.pipe(ConversionError::DuplicatedInode).pipe(Err); } } diff --git a/src/hardlink/hardlink_list/test.rs b/src/hardlink/hardlink_list/test.rs index b98845a6..e7cd064a 100644 --- a/src/hardlink/hardlink_list/test.rs +++ b/src/hardlink/hardlink_list/test.rs @@ -174,12 +174,14 @@ fn same_ino_on_different_devices_are_treated_separately() { assert_eq!(list.len(), 2, "expected one entry per (dev, ino) pair"); let reflection = list.into_reflection(); - // Both entries expose ino=100 in the reflection (device is not part of the - // public JSON format), so there are still 2 entries in the vector. assert_eq!(reflection.len(), 2); - // Paths are grouped per (dev, ino): each group has exactly 2 paths. - for entry in reflection.iter() { - assert_eq!(entry.paths.len(), 2); - } + // Sorted by (ino, dev), so dev=1 comes first. + let entries: Vec<_> = reflection.iter().collect(); + assert_eq!(entries[0].dev, 1); + assert_eq!(entries[0].ino, 100.into()); + assert_eq!(entries[0].paths.len(), 2); + assert_eq!(entries[1].dev, 2); + assert_eq!(entries[1].ino, 100.into()); + assert_eq!(entries[1].paths.len(), 2); } diff --git a/src/json_data/schema_version.rs b/src/json_data/schema_version.rs index a392f1d6..6ac50bf4 100644 --- a/src/json_data/schema_version.rs +++ b/src/json_data/schema_version.rs @@ -6,7 +6,7 @@ use serde::{Deserialize, Serialize}; use std::convert::TryFrom; /// Content of [`SchemaVersion`]. -pub const SCHEMA_VERSION: &str = "2024-11-02"; +pub const SCHEMA_VERSION: &str = "2026-04-02"; /// Verifying schema version. #[derive(Debug, Clone, Copy)] diff --git a/tests/_utils.rs b/tests/_utils.rs index 3a9c785a..ff361c93 100644 --- a/tests/_utils.rs +++ b/tests/_utils.rs @@ -583,3 +583,12 @@ pub fn read_inode_number(path: &Path) -> u64 { .unwrap_or_else(|error| panic!("Can't read metadata at {path:?}: {error}")) .ino() } + +/// Read [dev](std::os::unix::fs::MetadataExt::dev) of a path. +#[cfg(unix)] +pub fn read_device_number(path: &Path) -> u64 { + use std::os::unix::fs::MetadataExt; + path.pipe(symlink_metadata) + .unwrap_or_else(|error| panic!("Can't read metadata at {path:?}: {error}")) + .dev() +} diff --git a/tests/hardlinks_deduplication.rs b/tests/hardlinks_deduplication.rs index 73274458..c33a010e 100644 --- a/tests/hardlinks_deduplication.rs +++ b/tests/hardlinks_deduplication.rs @@ -85,6 +85,8 @@ fn simple_tree_with_some_hardlinks() { .pipe(InodeNumber::from) }; + let dev = read_device_number(&workspace); + let shared_paths = |suffices: &[&str]| { suffices .iter() @@ -190,6 +192,7 @@ fn simple_tree_with_some_hardlinks() { .collect(); let expected_shared_details = [ ReflectionEntry { + dev, ino: file_inode("one-internal-hardlink.txt"), size: file_size("one-internal-hardlink.txt"), links: 1 + 1, @@ -199,6 +202,7 @@ fn simple_tree_with_some_hardlinks() { ]), }, ReflectionEntry { + dev, ino: file_inode("two-internal-hardlinks.txt"), size: file_size("two-internal-hardlinks.txt"), links: 1 + 2, @@ -209,12 +213,14 @@ fn simple_tree_with_some_hardlinks() { ]), }, ReflectionEntry { + dev, ino: file_inode("one-external-hardlink.txt"), size: file_size("one-external-hardlink.txt"), links: 1 + 1, paths: shared_paths(&["sources/one-external-hardlink.txt"]), }, ReflectionEntry { + dev, ino: file_inode("one-internal-one-external-hardlinks.txt"), size: file_size("one-internal-one-external-hardlinks.txt"), links: 1 + 1 + 1, @@ -338,6 +344,8 @@ fn multiple_hardlinks_to_a_single_file() { .pipe_as_ref(read_inode_number) .pipe(InodeNumber::from); + let dev = read_device_number(&workspace); + let actual_size = tree.size; let expected_size = workspace .pipe_as_ref(read_apparent_size) @@ -374,6 +382,7 @@ fn multiple_hardlinks_to_a_single_file() { .cloned() .collect(); let expected_shared_details = [ReflectionEntry { + dev, ino: file_inode, size: file_size, links: 1 + links, @@ -470,6 +479,8 @@ fn complex_tree_with_shared_and_unique_files() { .pipe(Bytes::new) }; + let dev = read_device_number(&workspace); + let actual_size = tree.size; // The following formula treat the first file as "real" and @@ -562,6 +573,7 @@ fn complex_tree_with_shared_and_unique_files() { .find(|item| starts_with_path(item, "some-hardlinks/file-0.txt")) .cloned(); let expected = Some(ReflectionEntry { + dev, ino: workspace .join("some-hardlinks/file-0.txt") .pipe_as_ref(read_inode_number) @@ -591,6 +603,7 @@ fn complex_tree_with_shared_and_unique_files() { .find(|item| starts_with_path(item, &format!("some-hardlinks/file-{file_index}.txt"))) .cloned(); let expected = Some(ReflectionEntry { + dev, ino: workspace .join(format!("some-hardlinks/file-{file_index}.txt")) .pipe_as_ref(read_inode_number) @@ -695,6 +708,8 @@ fn hardlinks_and_non_hardlinks() { .pipe(InodeNumber::from) }; + let dev = read_device_number(&workspace); + let shared_paths = |file_names: &[&str]| { file_names .iter() @@ -717,12 +732,14 @@ fn hardlinks_and_non_hardlinks() { .collect(); let expected_shared_details = [ ReflectionEntry { + dev, ino: file_inode("file-0.txt"), size: file_size, links: 3, paths: shared_paths(&["file-0.txt", "link0-file0.txt", "link1-file0.txt"]), }, ReflectionEntry { + dev, ino: file_inode("file-1.txt"), size: file_size, links: 2, @@ -730,24 +747,28 @@ fn hardlinks_and_non_hardlinks() { }, // ... file-2.txt and file-3.txt don't have hardlinks so they shouldn't appear here ... ReflectionEntry { + dev, ino: file_inode("file-4.txt"), size: file_size, links: 2, paths: shared_paths(&["file-4.txt"]), }, ReflectionEntry { + dev, ino: file_inode("file-5.txt"), size: file_size, links: 2, paths: shared_paths(&["file-5.txt"]), }, ReflectionEntry { + dev, ino: file_inode("file-6.txt"), size: file_size, links: 2, paths: shared_paths(&["file-6.txt"]), }, ReflectionEntry { + dev, ino: file_inode("file-7.txt"), size: file_size, links: 2, @@ -898,6 +919,8 @@ fn exclusive_hardlinks_only() { .pipe(InodeNumber::from) }; + let dev = read_device_number(&workspace); + let shared_paths = |file_names: &[&str]| { file_names .iter() @@ -921,6 +944,7 @@ fn exclusive_hardlinks_only() { let expected_shared_details = (0..files_per_branch) .par_bridge() .map(|index| ReflectionEntry { + dev, ino: file_inode(&format!("file-{index}.txt")), size: file_size, links: 2, @@ -1024,6 +1048,8 @@ fn exclusive_only_and_external_only_hardlinks() { .pipe(InodeNumber::from) }; + let dev = read_device_number(&workspace); + let shared_paths = |file_names: &[&str]| { file_names .iter() @@ -1050,6 +1076,7 @@ fn exclusive_only_and_external_only_hardlinks() { (0..(files_per_branch / 2)) .par_bridge() .map(|index| ReflectionEntry { + dev, ino: file_inode(&format!("link0-{index}.txt")), size: file_size, links: 2, @@ -1060,6 +1087,7 @@ fn exclusive_only_and_external_only_hardlinks() { ((files_per_branch / 2)..files_per_branch) .par_bridge() .map(|index| ReflectionEntry { + dev, ino: file_inode(&format!("link0-{index}.txt")), size: file_size, links: 2, @@ -1187,6 +1215,8 @@ fn external_hardlinks_only() { .pipe(InodeNumber::from) }; + let dev = read_device_number(&workspace); + let shared_paths = |file_names: &[&str]| { file_names .iter() @@ -1210,6 +1240,7 @@ fn external_hardlinks_only() { let expected_shared_details = (0..files_per_branch) .par_bridge() .map(|index| ReflectionEntry { + dev, ino: file_inode(&format!("linkX-{index}.txt")), size: file_size, links: 2, diff --git a/tests/hardlinks_deduplication_multi_args.rs b/tests/hardlinks_deduplication_multi_args.rs index 4851aff1..531dbd57 100644 --- a/tests/hardlinks_deduplication_multi_args.rs +++ b/tests/hardlinks_deduplication_multi_args.rs @@ -82,6 +82,8 @@ fn simple_tree_with_some_hardlinks() { .pipe(InodeNumber::from) }; + let dev = read_device_number(&workspace); + let shared_paths = |suffices: &[&str]| { suffices .iter() @@ -144,6 +146,7 @@ fn simple_tree_with_some_hardlinks() { .collect(); let expected_shared_details = [ ReflectionEntry { + dev, ino: file_inode("one-internal-hardlink.txt"), size: file_size("one-internal-hardlink.txt"), links: 1 + 1, @@ -153,6 +156,7 @@ fn simple_tree_with_some_hardlinks() { ]), }, ReflectionEntry { + dev, ino: file_inode("two-internal-hardlinks.txt"), size: file_size("two-internal-hardlinks.txt"), links: 1 + 2, @@ -163,12 +167,14 @@ fn simple_tree_with_some_hardlinks() { ]), }, ReflectionEntry { + dev, ino: file_inode("one-external-hardlink.txt"), size: file_size("one-external-hardlink.txt"), links: 1 + 1, paths: shared_paths(&["sources/one-external-hardlink.txt"]), }, ReflectionEntry { + dev, ino: file_inode("one-internal-one-external-hardlinks.txt"), size: file_size("one-internal-one-external-hardlinks.txt"), links: 1 + 1 + 1, @@ -295,6 +301,8 @@ fn multiple_hardlinks_to_a_single_file() { .pipe_as_ref(read_inode_number) .pipe(InodeNumber::from); + let dev = read_device_number(&workspace); + let actual_size = tree.size; let expected_size = file_size; assert_eq!(actual_size, expected_size); @@ -319,6 +327,7 @@ fn multiple_hardlinks_to_a_single_file() { .cloned() .collect(); let expected_shared_details = [ReflectionEntry { + dev, ino: file_inode, size: file_size, links: 1 + links, @@ -437,6 +446,8 @@ fn multiple_duplicated_arguments() { .pipe(InodeNumber::from) }; + let dev = read_device_number(&workspace); + let shared_paths = |suffices: &[&str]| { suffices .iter() @@ -501,6 +512,7 @@ fn multiple_duplicated_arguments() { .collect(); let expected_shared_details = [ ReflectionEntry { + dev, ino: file_inode("one-internal-hardlink.txt"), size: file_size("one-internal-hardlink.txt"), links: 1 + 1, @@ -510,6 +522,7 @@ fn multiple_duplicated_arguments() { ]), }, ReflectionEntry { + dev, ino: file_inode("two-internal-hardlinks.txt"), size: file_size("two-internal-hardlinks.txt"), links: 1 + 2, @@ -520,12 +533,14 @@ fn multiple_duplicated_arguments() { ]), }, ReflectionEntry { + dev, ino: file_inode("one-external-hardlink.txt"), size: file_size("one-external-hardlink.txt"), links: 1 + 1, paths: shared_paths(&["main/sources/one-external-hardlink.txt"]), }, ReflectionEntry { + dev, ino: file_inode("one-internal-one-external-hardlinks.txt"), size: file_size("one-internal-one-external-hardlinks.txt"), links: 1 + 1 + 1, From b6781642244d8b0838b3c6add93b4a9b216f0ca5 Mon Sep 17 00:00:00 2001 From: khai96_ Date: Thu, 2 Apr 2026 16:20:44 +0700 Subject: [PATCH 06/23] refactor: remove unnecessary `pub(crate)` --- src/hardlink/hardlink_list.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hardlink/hardlink_list.rs b/src/hardlink/hardlink_list.rs index 39044063..02efe145 100644 --- a/src/hardlink/hardlink_list.rs +++ b/src/hardlink/hardlink_list.rs @@ -27,7 +27,7 @@ use std::path::Path; /// the same inode number. Both du-dust and dua-cli track `(device, inode)` pairs /// for the same reason. #[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] -pub(crate) struct InodeKey { +struct InodeKey { /// Device number of the filesystem the inode belongs to. dev: u64, /// Inode number within the device. From f03530f9d9d851bb5c850adc0f9ba44e34de17f4 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 2 Apr 2026 09:46:42 +0000 Subject: [PATCH 07/23] refactor(hardlink): introduce DeviceNumber newtype, reorder ino before dev - Add DeviceNumber in src/device_number.rs, mirroring InodeNumber - Reorder InodeKey fields to ino-first for faster derived PartialEq (ino has far more entropy than dev, so comparisons short-circuit earlier) - Use DeviceNumber instead of raw u64 in InodeKey, ReflectionEntry, and all public/internal APIs - Add dev to ConversionError::DuplicatedInode for precise error messages - Add ConversionError::duplicated_inode(InodeKey) constructor - Update sort keys to (ino, dev) order throughout https://claude.ai/code/session_01QP9wZyoZcGmJsEsA66ZRok --- src/device_number.rs | 23 +++++++++++++ src/hardlink/aware.rs | 3 +- src/hardlink/hardlink_list.rs | 10 +++--- src/hardlink/hardlink_list/iter.rs | 14 ++++---- src/hardlink/hardlink_list/reflection.rs | 35 ++++++++++++------- src/hardlink/hardlink_list/test.rs | 44 ++++++++++++------------ src/lib.rs | 1 + tests/_utils.rs | 3 +- 8 files changed, 85 insertions(+), 48 deletions(-) create mode 100644 src/device_number.rs diff --git a/src/device_number.rs b/src/device_number.rs new file mode 100644 index 00000000..01966819 --- /dev/null +++ b/src/device_number.rs @@ -0,0 +1,23 @@ +use derive_more::{Display, From, Into, LowerHex, Octal, UpperHex}; + +#[cfg(feature = "json")] +use serde::{Deserialize, Serialize}; + +/// The device number of a filesystem. +#[derive( + Debug, Display, LowerHex, UpperHex, Octal, Clone, Copy, PartialEq, Eq, Hash, From, Into, +)] +#[cfg_attr(feature = "json", derive(Deserialize, Serialize))] +pub struct DeviceNumber(u64); + +/// POSIX-exclusive functions. +#[cfg(unix)] +impl DeviceNumber { + /// Get device number of a [`std::fs::Metadata`]. + #[inline] + pub fn get(stats: &std::fs::Metadata) -> Self { + use pipe_trait::Pipe; + use std::os::unix::fs::MetadataExt; + stats.dev().pipe(DeviceNumber) + } +} diff --git a/src/hardlink/aware.rs b/src/hardlink/aware.rs index 688a85ec..f0b5a6b2 100644 --- a/src/hardlink/aware.rs +++ b/src/hardlink/aware.rs @@ -4,6 +4,7 @@ use super::{ }; use crate::{ data_tree::DataTree, + device_number::DeviceNumber, inode::InodeNumber, os_string_display::OsStringDisplay, reporter::{event::HardlinkDetection, Event, Reporter}, @@ -82,7 +83,7 @@ where })); let ino = InodeNumber::get(stats); - let dev = stats.dev(); + let dev = DeviceNumber::get(stats); self.record .add(ino, dev, size, links, path) .map_err(ReportHardlinksError::AddToRecord) diff --git a/src/hardlink/hardlink_list.rs b/src/hardlink/hardlink_list.rs index 02efe145..67e9e4a4 100644 --- a/src/hardlink/hardlink_list.rs +++ b/src/hardlink/hardlink_list.rs @@ -9,7 +9,7 @@ pub use summary::Summary; pub use Reflection as HardlinkListReflection; pub use Summary as SharedLinkSummary; -use crate::{hardlink::LinkPathList, inode::InodeNumber, size}; +use crate::{device_number::DeviceNumber, hardlink::LinkPathList, inode::InodeNumber, size}; use dashmap::DashMap; use derive_more::{Display, Error}; use smart_default::SmartDefault; @@ -28,10 +28,10 @@ use std::path::Path; /// for the same reason. #[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] struct InodeKey { - /// Device number of the filesystem the inode belongs to. - dev: u64, /// Inode number within the device. ino: InodeNumber, + /// Device number of the filesystem the inode belongs to. + dev: DeviceNumber, } /// Map value in [`HardlinkList`]. @@ -126,12 +126,12 @@ where pub(crate) fn add( &self, ino: InodeNumber, - dev: u64, + dev: DeviceNumber, size: Size, links: u64, path: &Path, ) -> Result<(), AddError> { - let key = InodeKey { dev, ino }; + let key = InodeKey { ino, dev }; let mut assertions = Ok(()); self.0 .entry(key) diff --git a/src/hardlink/hardlink_list/iter.rs b/src/hardlink/hardlink_list/iter.rs index 04e1de77..156db894 100644 --- a/src/hardlink/hardlink_list/iter.rs +++ b/src/hardlink/hardlink_list/iter.rs @@ -1,5 +1,5 @@ use super::{HardlinkList, InodeKey, Value}; -use crate::{hardlink::LinkPathList, inode::InodeNumber}; +use crate::{device_number::DeviceNumber, hardlink::LinkPathList, inode::InodeNumber}; use dashmap::{iter::Iter as DashIter, mapref::multiple::RefMulti}; use pipe_trait::Pipe; @@ -30,18 +30,18 @@ impl<'a, Size> Iterator for Iter<'a, Size> { } impl<'a, Size> Item<'a, Size> { - /// The device number of the filesystem the inode belongs to. - #[inline] - pub fn dev(&self) -> u64 { - self.0.key().dev - } - /// The inode number of the file. #[inline] pub fn ino(&self) -> InodeNumber { self.0.key().ino } + /// The device number of the filesystem the inode belongs to. + #[inline] + pub fn dev(&self) -> DeviceNumber { + self.0.key().dev + } + /// Size of the file. #[inline] pub fn size(&self) -> &Size { diff --git a/src/hardlink/hardlink_list/reflection.rs b/src/hardlink/hardlink_list/reflection.rs index e902c080..0b5189d1 100644 --- a/src/hardlink/hardlink_list/reflection.rs +++ b/src/hardlink/hardlink_list/reflection.rs @@ -1,5 +1,5 @@ use super::{HardlinkList, InodeKey, Value}; -use crate::{hardlink::LinkPathListReflection, inode::InodeNumber}; +use crate::{device_number::DeviceNumber, hardlink::LinkPathListReflection, inode::InodeNumber}; use dashmap::DashMap; use derive_more::{Display, Error, Into, IntoIterator}; use into_sorted::IntoSortedUnstable; @@ -48,10 +48,10 @@ impl Reflection { #[derive(Debug, Clone, PartialEq, Eq)] #[cfg_attr(feature = "json", derive(Deserialize, Serialize))] pub struct ReflectionEntry { - /// Device number of the filesystem the inode belongs to. - pub dev: u64, /// The inode number of the file. pub ino: InodeNumber, + /// Device number of the filesystem the inode belongs to. + pub dev: DeviceNumber, /// Size of the file. pub size: Size, /// Total number of links of the file, both listed (in [`Self::paths`]) and unlisted. @@ -63,11 +63,11 @@ pub struct ReflectionEntry { impl ReflectionEntry { /// Create a new entry. #[inline] - fn new(InodeKey { dev, ino }: InodeKey, Value { size, links, paths }: Value) -> Self { + fn new(InodeKey { ino, dev }: InodeKey, Value { size, links, paths }: Value) -> Self { let paths = paths.into(); ReflectionEntry { - dev, ino, + dev, size, links, paths, @@ -78,21 +78,21 @@ impl ReflectionEntry { #[inline] fn dissolve(self) -> (InodeKey, Value) { let ReflectionEntry { - dev, ino, + dev, size, links, paths, } = self; let paths = paths.into(); - (InodeKey { dev, ino }, Value { size, links, paths }) + (InodeKey { ino, dev }, Value { size, links, paths }) } } impl From>> for Reflection { /// Sort the list by `(inode, device)`, then create the reflection. fn from(list: Vec>) -> Self { - list.into_sorted_unstable_by_key(|entry| (u64::from(entry.ino), entry.dev)) + list.into_sorted_unstable_by_key(|entry| (u64::from(entry.ino), u64::from(entry.dev))) .pipe(Reflection) } } @@ -111,9 +111,20 @@ impl From> for Reflection { #[derive(Debug, Display, Error, Clone, Copy, PartialEq, Eq)] #[non_exhaustive] pub enum ConversionError { - /// When the source has duplicated inode numbers. - #[display("Inode number {_0} is duplicated")] - DuplicatedInode(#[error(not(source))] InodeNumber), + /// When the source has a duplicated `(inode, device)` pair. + #[display("Inode {ino} on device {dev} is duplicated")] + DuplicatedInode { + #[error(not(source))] + ino: InodeNumber, + #[error(not(source))] + dev: DeviceNumber, + }, +} + +impl ConversionError { + fn duplicated_inode(InodeKey { ino, dev }: InodeKey) -> Self { + ConversionError::DuplicatedInode { ino, dev } + } } impl TryFrom> for HardlinkList { @@ -124,7 +135,7 @@ impl TryFrom> for HardlinkList { for entry in entries { let (key, value) = entry.dissolve(); if map.insert(key, value).is_some() { - return key.ino.pipe(ConversionError::DuplicatedInode).pipe(Err); + return key.pipe(ConversionError::duplicated_inode).pipe(Err); } } diff --git a/src/hardlink/hardlink_list/test.rs b/src/hardlink/hardlink_list/test.rs index e7cd064a..d51c81bf 100644 --- a/src/hardlink/hardlink_list/test.rs +++ b/src/hardlink/hardlink_list/test.rs @@ -4,21 +4,21 @@ use pipe_trait::Pipe; use pretty_assertions::{assert_eq, assert_ne}; const TABLE: &[(u64, u64, u64, u64, &str)] = &[ - // dev, ino, size, links, path - (0, 241, 3652, 1, "a"), - (0, 569, 2210, 1, "b"), - (0, 110, 2350, 3, "c"), - (0, 110, 2350, 3, "c1"), - (0, 778, 1110, 1, "d"), - (0, 274, 6060, 2, "e"), - (0, 274, 6060, 2, "e1"), - (0, 883, 4530, 1, "f"), + // ino, dev, size, links, path + (241, 0, 3652, 1, "a"), + (569, 0, 2210, 1, "b"), + (110, 0, 2350, 3, "c"), + (110, 0, 2350, 3, "c1"), + (778, 0, 1110, 1, "d"), + (274, 0, 6060, 2, "e"), + (274, 0, 6060, 2, "e1"), + (883, 0, 4530, 1, "f"), ]; fn add(list: HardlinkList) -> HardlinkList { let values = TABLE[ROW]; - let (dev, ino, size, links, path) = values; - if let Err(error) = list.add(ino.into(), dev, size.into(), links, path.as_ref()) { + let (ino, dev, size, links, path) = values; + if let Err(error) = list.add(ino.into(), dev.into(), size.into(), links, path.as_ref()) { panic!("Failed to add {values:?} (index: {ROW}) to the list: {error}"); } list @@ -120,10 +120,10 @@ fn insertion_difference_cause_inequality() { #[test] fn detect_size_change() { let list = HardlinkList::::new(); - list.add(123.into(), 0, 100.into(), 1, "a".as_ref()) + list.add(123.into(), 0.into(), 100.into(), 1, "a".as_ref()) .expect("add the first path"); let actual = list - .add(123.into(), 0, 110.into(), 1, "b".as_ref()) + .add(123.into(), 0.into(), 110.into(), 1, "b".as_ref()) .expect_err("add the second path"); let expected = AddError::SizeConflict(SizeConflictError { ino: 123.into(), @@ -136,10 +136,10 @@ fn detect_size_change() { #[test] fn detect_number_of_links_change() { let list = HardlinkList::::new(); - list.add(123.into(), 0, 100.into(), 1, "a".as_ref()) + list.add(123.into(), 0.into(), 100.into(), 1, "a".as_ref()) .expect("add the first path"); let actual = list - .add(123.into(), 0, 100.into(), 2, "b".as_ref()) + .add(123.into(), 0.into(), 100.into(), 2, "b".as_ref()) .expect_err("add the second path"); let expected = AddError::NumberOfLinksConflict(NumberOfLinksConflictError { ino: 123.into(), @@ -159,29 +159,29 @@ fn same_ino_on_different_devices_are_treated_separately() { let list = HardlinkList::::new(); // dev=1, ino=100 — first filesystem - list.add(100.into(), 1, 50.into(), 2, "dev1/file_a".as_ref()) + list.add(100.into(), 1.into(), 50.into(), 2, "dev1/file_a".as_ref()) .expect("add dev1/file_a"); - list.add(100.into(), 1, 50.into(), 2, "dev1/file_b".as_ref()) + list.add(100.into(), 1.into(), 50.into(), 2, "dev1/file_b".as_ref()) .expect("add dev1/file_b (same dev+ino → same inode group)"); // dev=2, ino=100 — second filesystem, coincidentally same inode number - list.add(100.into(), 2, 80.into(), 2, "dev2/file_c".as_ref()) + list.add(100.into(), 2.into(), 80.into(), 2, "dev2/file_c".as_ref()) .expect("add dev2/file_c (different dev → separate inode group)"); - list.add(100.into(), 2, 80.into(), 2, "dev2/file_d".as_ref()) + list.add(100.into(), 2.into(), 80.into(), 2, "dev2/file_d".as_ref()) .expect("add dev2/file_d (same dev+ino → same inode group as file_c)"); // Each device should produce its own entry, so the list should have 2 entries. - assert_eq!(list.len(), 2, "expected one entry per (dev, ino) pair"); + assert_eq!(list.len(), 2, "expected one entry per (ino, dev) pair"); let reflection = list.into_reflection(); assert_eq!(reflection.len(), 2); // Sorted by (ino, dev), so dev=1 comes first. let entries: Vec<_> = reflection.iter().collect(); - assert_eq!(entries[0].dev, 1); + assert_eq!(entries[0].dev, 1.into()); assert_eq!(entries[0].ino, 100.into()); assert_eq!(entries[0].paths.len(), 2); - assert_eq!(entries[1].dev, 2); + assert_eq!(entries[1].dev, 2.into()); assert_eq!(entries[1].ino, 100.into()); assert_eq!(entries[1].paths.len(), 2); } diff --git a/src/lib.rs b/src/lib.rs index f5a4d044..04d4046a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -41,6 +41,7 @@ pub use clap_utilities; pub mod bytes_format; pub mod data_tree; pub mod device; +pub mod device_number; pub mod fs_tree_builder; pub mod get_size; pub mod hardlink; diff --git a/tests/_utils.rs b/tests/_utils.rs index ff361c93..8507f7ab 100644 --- a/tests/_utils.rs +++ b/tests/_utils.rs @@ -586,9 +586,10 @@ pub fn read_inode_number(path: &Path) -> u64 { /// Read [dev](std::os::unix::fs::MetadataExt::dev) of a path. #[cfg(unix)] -pub fn read_device_number(path: &Path) -> u64 { +pub fn read_device_number(path: &Path) -> parallel_disk_usage::device_number::DeviceNumber { use std::os::unix::fs::MetadataExt; path.pipe(symlink_metadata) .unwrap_or_else(|error| panic!("Can't read metadata at {path:?}: {error}")) .dev() + .into() } From 005864800ff6a2b2ee5655efb7e793b573665a71 Mon Sep 17 00:00:00 2001 From: khai96_ Date: Thu, 2 Apr 2026 17:09:22 +0700 Subject: [PATCH 08/23] docs: remove unnecessary documentation --- src/hardlink/hardlink_list/test.rs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/hardlink/hardlink_list/test.rs b/src/hardlink/hardlink_list/test.rs index d51c81bf..fa63b474 100644 --- a/src/hardlink/hardlink_list/test.rs +++ b/src/hardlink/hardlink_list/test.rs @@ -149,11 +149,6 @@ fn detect_number_of_links_change() { assert_eq!(actual, expected); } -/// Files on different devices may share the same inode number, but they are -/// unrelated — hardlinks cannot span filesystem boundaries. Verify that two -/// files with the same inode number but different device numbers produce -/// separate entries in the list (i.e. the device number is actually used in -/// the deduplication key). #[test] fn same_ino_on_different_devices_are_treated_separately() { let list = HardlinkList::::new(); From 205f1d913bba4876e92d58d440dc4760457ab5d6 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 2 Apr 2026 10:26:58 +0000 Subject: [PATCH 09/23] refactor(hardlink): move DeviceNumber into device module, add dev to error types - Move DeviceNumber from device_number.rs into device.rs alongside DeviceBoundary, mirroring the InodeNumber-in-inode.rs pattern - Add dev: DeviceNumber to SizeConflictError and NumberOfLinksConflictError so error messages identify the exact (inode, device) pair https://claude.ai/code/session_01QP9wZyoZcGmJsEsA66ZRok --- src/device.rs | 24 ++++++++++++++++++++++++ src/device_number.rs | 23 ----------------------- src/hardlink/aware.rs | 2 +- src/hardlink/hardlink_list.rs | 24 +++++++++++------------- src/hardlink/hardlink_list/iter.rs | 2 +- src/hardlink/hardlink_list/reflection.rs | 2 +- src/hardlink/hardlink_list/test.rs | 2 ++ src/lib.rs | 1 - tests/_utils.rs | 2 +- 9 files changed, 41 insertions(+), 41 deletions(-) delete mode 100644 src/device_number.rs diff --git a/src/device.rs b/src/device.rs index 1ef5a12b..545cce12 100644 --- a/src/device.rs +++ b/src/device.rs @@ -1,3 +1,8 @@ +use derive_more::{Display, From, Into, LowerHex, Octal, UpperHex}; + +#[cfg(feature = "json")] +use serde::{Deserialize, Serialize}; + /// Whether to cross device boundary into a different filesystem. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum DeviceBoundary { @@ -15,3 +20,22 @@ impl DeviceBoundary { } } } + +/// The device number of a filesystem. +#[derive( + Debug, Display, LowerHex, UpperHex, Octal, Clone, Copy, PartialEq, Eq, Hash, From, Into, +)] +#[cfg_attr(feature = "json", derive(Deserialize, Serialize))] +pub struct DeviceNumber(u64); + +/// POSIX-exclusive functions. +#[cfg(unix)] +impl DeviceNumber { + /// Get device number of a [`std::fs::Metadata`]. + #[inline] + pub fn get(stats: &std::fs::Metadata) -> Self { + use pipe_trait::Pipe; + use std::os::unix::fs::MetadataExt; + stats.dev().pipe(DeviceNumber) + } +} diff --git a/src/device_number.rs b/src/device_number.rs deleted file mode 100644 index 01966819..00000000 --- a/src/device_number.rs +++ /dev/null @@ -1,23 +0,0 @@ -use derive_more::{Display, From, Into, LowerHex, Octal, UpperHex}; - -#[cfg(feature = "json")] -use serde::{Deserialize, Serialize}; - -/// The device number of a filesystem. -#[derive( - Debug, Display, LowerHex, UpperHex, Octal, Clone, Copy, PartialEq, Eq, Hash, From, Into, -)] -#[cfg_attr(feature = "json", derive(Deserialize, Serialize))] -pub struct DeviceNumber(u64); - -/// POSIX-exclusive functions. -#[cfg(unix)] -impl DeviceNumber { - /// Get device number of a [`std::fs::Metadata`]. - #[inline] - pub fn get(stats: &std::fs::Metadata) -> Self { - use pipe_trait::Pipe; - use std::os::unix::fs::MetadataExt; - stats.dev().pipe(DeviceNumber) - } -} diff --git a/src/hardlink/aware.rs b/src/hardlink/aware.rs index f0b5a6b2..067b4334 100644 --- a/src/hardlink/aware.rs +++ b/src/hardlink/aware.rs @@ -4,7 +4,7 @@ use super::{ }; use crate::{ data_tree::DataTree, - device_number::DeviceNumber, + device::DeviceNumber, inode::InodeNumber, os_string_display::OsStringDisplay, reporter::{event::HardlinkDetection, Event, Reporter}, diff --git a/src/hardlink/hardlink_list.rs b/src/hardlink/hardlink_list.rs index 67e9e4a4..0e4f173e 100644 --- a/src/hardlink/hardlink_list.rs +++ b/src/hardlink/hardlink_list.rs @@ -9,7 +9,7 @@ pub use summary::Summary; pub use Reflection as HardlinkListReflection; pub use Summary as SharedLinkSummary; -use crate::{device_number::DeviceNumber, hardlink::LinkPathList, inode::InodeNumber, size}; +use crate::{device::DeviceNumber, hardlink::LinkPathList, inode::InodeNumber, size}; use dashmap::DashMap; use derive_more::{Display, Error}; use smart_default::SmartDefault; @@ -78,31 +78,27 @@ impl HardlinkList { } } -/// Error that occurs when a different size was detected for the same [`ino`][ino]. -/// -/// -/// [ino]: https://doc.rust-lang.org/std/os/unix/fs/trait.MetadataExt.html#tymethod.ino +/// Error that occurs when a different size was detected for the same inode. #[derive(Debug, Display, Error)] #[cfg_attr(test, derive(PartialEq, Eq))] #[display(bound(Size: Debug))] -#[display("Size for inode {ino} changed from {recorded:?} to {detected:?}")] +#[display("Size for inode {ino} on device {dev} changed from {recorded:?} to {detected:?}")] pub struct SizeConflictError { pub ino: InodeNumber, + pub dev: DeviceNumber, pub recorded: Size, pub detected: Size, } -/// Error that occurs when a different [`nlink`][nlink] was detected for the same [`ino`][ino]. -/// -/// -/// [nlink]: https://doc.rust-lang.org/std/os/unix/fs/trait.MetadataExt.html#tymethod.nlink -/// -/// [ino]: https://doc.rust-lang.org/std/os/unix/fs/trait.MetadataExt.html#tymethod.ino +/// Error that occurs when a different number of links was detected for the same inode. #[derive(Debug, Display, Error)] #[cfg_attr(test, derive(PartialEq, Eq))] -#[display("Number of links of inode {ino} changed from {recorded:?} to {detected:?}")] +#[display( + "Number of links of inode {ino} on device {dev} changed from {recorded:?} to {detected:?}" +)] pub struct NumberOfLinksConflictError { pub ino: InodeNumber, + pub dev: DeviceNumber, pub recorded: u64, pub detected: u64, } @@ -139,6 +135,7 @@ where if size != recorded.size { assertions = Err(AddError::SizeConflict(SizeConflictError { ino, + dev, recorded: recorded.size, detected: size, })); @@ -149,6 +146,7 @@ where assertions = Err(AddError::NumberOfLinksConflict( NumberOfLinksConflictError { ino, + dev, recorded: recorded.links, detected: links, }, diff --git a/src/hardlink/hardlink_list/iter.rs b/src/hardlink/hardlink_list/iter.rs index 156db894..539ec5c7 100644 --- a/src/hardlink/hardlink_list/iter.rs +++ b/src/hardlink/hardlink_list/iter.rs @@ -1,5 +1,5 @@ use super::{HardlinkList, InodeKey, Value}; -use crate::{device_number::DeviceNumber, hardlink::LinkPathList, inode::InodeNumber}; +use crate::{device::DeviceNumber, hardlink::LinkPathList, inode::InodeNumber}; use dashmap::{iter::Iter as DashIter, mapref::multiple::RefMulti}; use pipe_trait::Pipe; diff --git a/src/hardlink/hardlink_list/reflection.rs b/src/hardlink/hardlink_list/reflection.rs index 0b5189d1..b6ca8d50 100644 --- a/src/hardlink/hardlink_list/reflection.rs +++ b/src/hardlink/hardlink_list/reflection.rs @@ -1,5 +1,5 @@ use super::{HardlinkList, InodeKey, Value}; -use crate::{device_number::DeviceNumber, hardlink::LinkPathListReflection, inode::InodeNumber}; +use crate::{device::DeviceNumber, hardlink::LinkPathListReflection, inode::InodeNumber}; use dashmap::DashMap; use derive_more::{Display, Error, Into, IntoIterator}; use into_sorted::IntoSortedUnstable; diff --git a/src/hardlink/hardlink_list/test.rs b/src/hardlink/hardlink_list/test.rs index fa63b474..04240f86 100644 --- a/src/hardlink/hardlink_list/test.rs +++ b/src/hardlink/hardlink_list/test.rs @@ -127,6 +127,7 @@ fn detect_size_change() { .expect_err("add the second path"); let expected = AddError::SizeConflict(SizeConflictError { ino: 123.into(), + dev: 0.into(), recorded: 100.into(), detected: 110.into(), }); @@ -143,6 +144,7 @@ fn detect_number_of_links_change() { .expect_err("add the second path"); let expected = AddError::NumberOfLinksConflict(NumberOfLinksConflictError { ino: 123.into(), + dev: 0.into(), recorded: 1, detected: 2, }); diff --git a/src/lib.rs b/src/lib.rs index 04d4046a..f5a4d044 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -41,7 +41,6 @@ pub use clap_utilities; pub mod bytes_format; pub mod data_tree; pub mod device; -pub mod device_number; pub mod fs_tree_builder; pub mod get_size; pub mod hardlink; diff --git a/tests/_utils.rs b/tests/_utils.rs index 8507f7ab..61314ef5 100644 --- a/tests/_utils.rs +++ b/tests/_utils.rs @@ -586,7 +586,7 @@ pub fn read_inode_number(path: &Path) -> u64 { /// Read [dev](std::os::unix::fs::MetadataExt::dev) of a path. #[cfg(unix)] -pub fn read_device_number(path: &Path) -> parallel_disk_usage::device_number::DeviceNumber { +pub fn read_device_number(path: &Path) -> parallel_disk_usage::device::DeviceNumber { use std::os::unix::fs::MetadataExt; path.pipe(symlink_metadata) .unwrap_or_else(|error| panic!("Can't read metadata at {path:?}: {error}")) From 9a16e848ed359f1bbe7fd2def3530dba0efba9ef Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 2 Apr 2026 10:57:41 +0000 Subject: [PATCH 10/23] refactor(hardlink): use tuple variant for ConversionError::DuplicatedInode https://claude.ai/code/session_01QP9wZyoZcGmJsEsA66ZRok --- src/hardlink/hardlink_list/reflection.rs | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/hardlink/hardlink_list/reflection.rs b/src/hardlink/hardlink_list/reflection.rs index b6ca8d50..641651b7 100644 --- a/src/hardlink/hardlink_list/reflection.rs +++ b/src/hardlink/hardlink_list/reflection.rs @@ -112,18 +112,16 @@ impl From> for Reflection { #[non_exhaustive] pub enum ConversionError { /// When the source has a duplicated `(inode, device)` pair. - #[display("Inode {ino} on device {dev} is duplicated")] - DuplicatedInode { - #[error(not(source))] - ino: InodeNumber, - #[error(not(source))] - dev: DeviceNumber, - }, + #[display("Inode {_0} on device {_1} is duplicated")] + DuplicatedInode( + #[error(not(source))] InodeNumber, + #[error(not(source))] DeviceNumber, + ), } impl ConversionError { fn duplicated_inode(InodeKey { ino, dev }: InodeKey) -> Self { - ConversionError::DuplicatedInode { ino, dev } + ConversionError::DuplicatedInode(ino, dev) } } From b4477d5ebdd0774a8322ff50a28cef608146e072 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 2 Apr 2026 11:12:01 +0000 Subject: [PATCH 11/23] docs(hardlink): address review feedback on doc comments and error attrs - Trim InodeKey doc to a single line (remove over-explanation) - Restore doc style for SizeConflictError and NumberOfLinksConflictError with `ino`, `dev`, and `nlink` links to MetadataExt methods - Rephrase Reflection guarantee as "pair of an inode number and a device number" - Remove unnecessary #[error(not(source))] on DuplicatedInode tuple variant (derive_more only auto-assumes source for single-field variants) https://claude.ai/code/session_01QP9wZyoZcGmJsEsA66ZRok --- src/hardlink/hardlink_list.rs | 16 +++++++++------- src/hardlink/hardlink_list/reflection.rs | 7 ++----- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/src/hardlink/hardlink_list.rs b/src/hardlink/hardlink_list.rs index 0e4f173e..27f2a8d7 100644 --- a/src/hardlink/hardlink_list.rs +++ b/src/hardlink/hardlink_list.rs @@ -21,11 +21,6 @@ use pipe_trait::Pipe; use std::path::Path; /// Internal key used to uniquely identify an inode across all filesystems. -/// -/// Hardlinks cannot span filesystems, so including the device number prevents -/// false deduplication of files from different filesystems that happen to share -/// the same inode number. Both du-dust and dua-cli track `(device, inode)` pairs -/// for the same reason. #[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] struct InodeKey { /// Inode number within the device. @@ -78,7 +73,10 @@ impl HardlinkList { } } -/// Error that occurs when a different size was detected for the same inode. +/// Error that occurs when a different size was detected for the same [`ino`] and [`dev`]. +/// +/// [`ino`]: std::os::unix::fs::MetadataExt::ino +/// [`dev`]: std::os::unix::fs::MetadataExt::dev #[derive(Debug, Display, Error)] #[cfg_attr(test, derive(PartialEq, Eq))] #[display(bound(Size: Debug))] @@ -90,7 +88,11 @@ pub struct SizeConflictError { pub detected: Size, } -/// Error that occurs when a different number of links was detected for the same inode. +/// Error that occurs when a different [`nlink`] was detected for the same [`ino`] and [`dev`]. +/// +/// [`nlink`]: std::os::unix::fs::MetadataExt::nlink +/// [`ino`]: std::os::unix::fs::MetadataExt::ino +/// [`dev`]: std::os::unix::fs::MetadataExt::dev #[derive(Debug, Display, Error)] #[cfg_attr(test, derive(PartialEq, Eq))] #[display( diff --git a/src/hardlink/hardlink_list/reflection.rs b/src/hardlink/hardlink_list/reflection.rs index 641651b7..f43ece30 100644 --- a/src/hardlink/hardlink_list/reflection.rs +++ b/src/hardlink/hardlink_list/reflection.rs @@ -12,7 +12,7 @@ use serde::{Deserialize, Serialize}; /// internal content. /// /// **Guarantees:** -/// * Every `(device, inode)` pair is unique. +/// * Every pair of an inode number and a device number is unique. /// * The internal list is always sorted by inode numbers (with device number as tie-breaker). /// /// **Equality:** `Reflection` implements `PartialEq` and `Eq` traits. @@ -113,10 +113,7 @@ impl From> for Reflection { pub enum ConversionError { /// When the source has a duplicated `(inode, device)` pair. #[display("Inode {_0} on device {_1} is duplicated")] - DuplicatedInode( - #[error(not(source))] InodeNumber, - #[error(not(source))] DeviceNumber, - ), + DuplicatedInode(InodeNumber, DeviceNumber), } impl ConversionError { From 90cda159ad6a17242472f1ff0a07c4f699c05e9e Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 2 Apr 2026 11:26:50 +0000 Subject: [PATCH 12/23] fix(docs): use raw URLs for MetadataExt links to avoid Windows doc errors std::os::unix::fs::MetadataExt does not exist on Windows, so rustdoc path links to it cause compilation errors. Use raw doc.rust-lang.org URLs instead, matching the style used before this PR. https://claude.ai/code/session_01QP9wZyoZcGmJsEsA66ZRok --- src/hardlink/hardlink_list.rs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/hardlink/hardlink_list.rs b/src/hardlink/hardlink_list.rs index 27f2a8d7..28f1c17b 100644 --- a/src/hardlink/hardlink_list.rs +++ b/src/hardlink/hardlink_list.rs @@ -75,8 +75,9 @@ impl HardlinkList { /// Error that occurs when a different size was detected for the same [`ino`] and [`dev`]. /// -/// [`ino`]: std::os::unix::fs::MetadataExt::ino -/// [`dev`]: std::os::unix::fs::MetadataExt::dev +/// +/// [`ino`]: https://doc.rust-lang.org/std/os/unix/fs/trait.MetadataExt.html#tymethod.ino +/// [`dev`]: https://doc.rust-lang.org/std/os/unix/fs/trait.MetadataExt.html#tymethod.dev #[derive(Debug, Display, Error)] #[cfg_attr(test, derive(PartialEq, Eq))] #[display(bound(Size: Debug))] @@ -90,9 +91,10 @@ pub struct SizeConflictError { /// Error that occurs when a different [`nlink`] was detected for the same [`ino`] and [`dev`]. /// -/// [`nlink`]: std::os::unix::fs::MetadataExt::nlink -/// [`ino`]: std::os::unix::fs::MetadataExt::ino -/// [`dev`]: std::os::unix::fs::MetadataExt::dev +/// +/// [`nlink`]: https://doc.rust-lang.org/std/os/unix/fs/trait.MetadataExt.html#tymethod.nlink +/// [`ino`]: https://doc.rust-lang.org/std/os/unix/fs/trait.MetadataExt.html#tymethod.ino +/// [`dev`]: https://doc.rust-lang.org/std/os/unix/fs/trait.MetadataExt.html#tymethod.dev #[derive(Debug, Display, Error)] #[cfg_attr(test, derive(PartialEq, Eq))] #[display( From 840768264ee2754bc59f84f1fe78a7b7897f81f1 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 2 Apr 2026 12:42:46 +0000 Subject: [PATCH 13/23] fix(docs): rephrase sort order line in Reflection doc The review comment was on line 16 (sort order), not line 15 (uniqueness). Rephrase to "sorted by pairs of an inode number and a device number". https://claude.ai/code/session_01QP9wZyoZcGmJsEsA66ZRok --- src/hardlink/hardlink_list/reflection.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hardlink/hardlink_list/reflection.rs b/src/hardlink/hardlink_list/reflection.rs index f43ece30..6d4577a8 100644 --- a/src/hardlink/hardlink_list/reflection.rs +++ b/src/hardlink/hardlink_list/reflection.rs @@ -13,7 +13,7 @@ use serde::{Deserialize, Serialize}; /// /// **Guarantees:** /// * Every pair of an inode number and a device number is unique. -/// * The internal list is always sorted by inode numbers (with device number as tie-breaker). +/// * The internal list is always sorted by pairs of an inode number and a device number. /// /// **Equality:** `Reflection` implements `PartialEq` and `Eq` traits. /// From ceecb11b3a5209a208fb20294c559c1c59988b94 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 2 Apr 2026 12:44:38 +0000 Subject: [PATCH 14/23] docs(hardlink): separate HTML comments per doc link Each HTML comment explaining the Windows workaround now sits directly above its corresponding link definition, rather than a single combined comment covering all links. https://claude.ai/code/session_01QP9wZyoZcGmJsEsA66ZRok --- src/hardlink/hardlink_list.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/hardlink/hardlink_list.rs b/src/hardlink/hardlink_list.rs index 28f1c17b..a529bf1d 100644 --- a/src/hardlink/hardlink_list.rs +++ b/src/hardlink/hardlink_list.rs @@ -75,8 +75,9 @@ impl HardlinkList { /// Error that occurs when a different size was detected for the same [`ino`] and [`dev`]. /// -/// +/// /// [`ino`]: https://doc.rust-lang.org/std/os/unix/fs/trait.MetadataExt.html#tymethod.ino +/// /// [`dev`]: https://doc.rust-lang.org/std/os/unix/fs/trait.MetadataExt.html#tymethod.dev #[derive(Debug, Display, Error)] #[cfg_attr(test, derive(PartialEq, Eq))] @@ -91,9 +92,11 @@ pub struct SizeConflictError { /// Error that occurs when a different [`nlink`] was detected for the same [`ino`] and [`dev`]. /// -/// +/// /// [`nlink`]: https://doc.rust-lang.org/std/os/unix/fs/trait.MetadataExt.html#tymethod.nlink +/// /// [`ino`]: https://doc.rust-lang.org/std/os/unix/fs/trait.MetadataExt.html#tymethod.ino +/// /// [`dev`]: https://doc.rust-lang.org/std/os/unix/fs/trait.MetadataExt.html#tymethod.dev #[derive(Debug, Display, Error)] #[cfg_attr(test, derive(PartialEq, Eq))] From 15d84e927e75e698e583e9b828c7cf37a938a00c Mon Sep 17 00:00:00 2001 From: khai96_ Date: Thu, 2 Apr 2026 19:50:21 +0700 Subject: [PATCH 15/23] docs: revert to prior style Anchor IDs shouldn't have weird syntax. Minimize diff. Why the FUCK was the AI so stupid? --- src/hardlink/hardlink_list.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/hardlink/hardlink_list.rs b/src/hardlink/hardlink_list.rs index a529bf1d..8879567d 100644 --- a/src/hardlink/hardlink_list.rs +++ b/src/hardlink/hardlink_list.rs @@ -73,12 +73,12 @@ impl HardlinkList { } } -/// Error that occurs when a different size was detected for the same [`ino`] and [`dev`]. +/// Error that occurs when a different size was detected for the same [`ino`][ino] and [`dev`][dev]. /// /// -/// [`ino`]: https://doc.rust-lang.org/std/os/unix/fs/trait.MetadataExt.html#tymethod.ino +/// [ino]: https://doc.rust-lang.org/std/os/unix/fs/trait.MetadataExt.html#tymethod.ino /// -/// [`dev`]: https://doc.rust-lang.org/std/os/unix/fs/trait.MetadataExt.html#tymethod.dev +/// [dev]: https://doc.rust-lang.org/std/os/unix/fs/trait.MetadataExt.html#tymethod.dev #[derive(Debug, Display, Error)] #[cfg_attr(test, derive(PartialEq, Eq))] #[display(bound(Size: Debug))] @@ -90,14 +90,14 @@ pub struct SizeConflictError { pub detected: Size, } -/// Error that occurs when a different [`nlink`] was detected for the same [`ino`] and [`dev`]. +/// Error that occurs when a different [`nlink`][nlink] was detected for the same [`ino`][ino] and [`dev`][dev]. /// /// -/// [`nlink`]: https://doc.rust-lang.org/std/os/unix/fs/trait.MetadataExt.html#tymethod.nlink +/// [nlink]: https://doc.rust-lang.org/std/os/unix/fs/trait.MetadataExt.html#tymethod.nlink /// -/// [`ino`]: https://doc.rust-lang.org/std/os/unix/fs/trait.MetadataExt.html#tymethod.ino +/// [ino]: https://doc.rust-lang.org/std/os/unix/fs/trait.MetadataExt.html#tymethod.ino /// -/// [`dev`]: https://doc.rust-lang.org/std/os/unix/fs/trait.MetadataExt.html#tymethod.dev +/// [dev]: https://doc.rust-lang.org/std/os/unix/fs/trait.MetadataExt.html#tymethod.dev #[derive(Debug, Display, Error)] #[cfg_attr(test, derive(PartialEq, Eq))] #[display( From 39cbd969ee4310a76cd049bc9bcb4815d86f41a1 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 2 Apr 2026 13:27:26 +0000 Subject: [PATCH 16/23] docs(hardlink): rephrase sort doc in From> https://claude.ai/code/session_01QP9wZyoZcGmJsEsA66ZRok --- src/hardlink/hardlink_list/reflection.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hardlink/hardlink_list/reflection.rs b/src/hardlink/hardlink_list/reflection.rs index 6d4577a8..fee47c4c 100644 --- a/src/hardlink/hardlink_list/reflection.rs +++ b/src/hardlink/hardlink_list/reflection.rs @@ -90,7 +90,7 @@ impl ReflectionEntry { } impl From>> for Reflection { - /// Sort the list by `(inode, device)`, then create the reflection. + /// Sort the list by inode numbers and device numbers, then create the reflection. fn from(list: Vec>) -> Self { list.into_sorted_unstable_by_key(|entry| (u64::from(entry.ino), u64::from(entry.dev))) .pipe(Reflection) From 9bb7a7ea4ac7e8fa4cb2375b586a236631a437b1 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 2 Apr 2026 13:36:06 +0000 Subject: [PATCH 17/23] refactor(hardlink): extract sorting_key method on ReflectionEntry https://claude.ai/code/session_01QP9wZyoZcGmJsEsA66ZRok --- src/hardlink/hardlink_list/reflection.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/hardlink/hardlink_list/reflection.rs b/src/hardlink/hardlink_list/reflection.rs index fee47c4c..c5698d87 100644 --- a/src/hardlink/hardlink_list/reflection.rs +++ b/src/hardlink/hardlink_list/reflection.rs @@ -87,12 +87,23 @@ impl ReflectionEntry { let paths = paths.into(); (InodeKey { ino, dev }, Value { size, links, paths }) } + + /// Sorting key to be used in the "sort by key" family of functions. + /// + /// Sort by the inode number first, then by the device number. + /// + /// This function returns a pair of 2 `u64`s instead a pair of 2 wrapper + /// types because we prefer them not to have to implement `Ord`. + #[inline] + fn sorting_key(&self) -> (u64, u64) { + (u64::from(self.ino), u64::from(self.dev)) + } } impl From>> for Reflection { /// Sort the list by inode numbers and device numbers, then create the reflection. fn from(list: Vec>) -> Self { - list.into_sorted_unstable_by_key(|entry| (u64::from(entry.ino), u64::from(entry.dev))) + list.into_sorted_unstable_by_key(ReflectionEntry::sorting_key) .pipe(Reflection) } } From 79f73a777aa239cde6f7509996f147004aca7333 Mon Sep 17 00:00:00 2001 From: khai96_ Date: Thu, 2 Apr 2026 20:43:28 +0700 Subject: [PATCH 18/23] docs: explain the existence of `duplicated_inode` --- src/hardlink/hardlink_list/reflection.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/hardlink/hardlink_list/reflection.rs b/src/hardlink/hardlink_list/reflection.rs index c5698d87..36328030 100644 --- a/src/hardlink/hardlink_list/reflection.rs +++ b/src/hardlink/hardlink_list/reflection.rs @@ -128,6 +128,10 @@ pub enum ConversionError { } impl ConversionError { + /// Convenient function to convert an [`InodeKey`] into a [`ConversionError::DuplicatedInode`]. + /// + /// We don't embed [`InodeKey`] directly into [`ConversionError::DuplicatedInode`] because of + /// their difference in visibility: One is private, the other public. fn duplicated_inode(InodeKey { ino, dev }: InodeKey) -> Self { ConversionError::DuplicatedInode(ino, dev) } From 205e97661f22b2bba64335818b556173a0e08a7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kh=E1=BA=A3i?= Date: Thu, 2 Apr 2026 21:49:46 +0700 Subject: [PATCH 19/23] docs: fix grammar Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/hardlink/hardlink_list/reflection.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hardlink/hardlink_list/reflection.rs b/src/hardlink/hardlink_list/reflection.rs index 36328030..7a6659c2 100644 --- a/src/hardlink/hardlink_list/reflection.rs +++ b/src/hardlink/hardlink_list/reflection.rs @@ -92,7 +92,7 @@ impl ReflectionEntry { /// /// Sort by the inode number first, then by the device number. /// - /// This function returns a pair of 2 `u64`s instead a pair of 2 wrapper + /// This function returns a pair of 2 `u64`s instead of a pair of 2 wrapper /// types because we prefer them not to have to implement `Ord`. #[inline] fn sorting_key(&self) -> (u64, u64) { From ae449d3050095bd35013a36584ed2111daf2c3b9 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 2 Apr 2026 14:52:42 +0000 Subject: [PATCH 20/23] docs(hardlink): update Aware::record doc to mention device number https://claude.ai/code/session_01QP9wZyoZcGmJsEsA66ZRok --- src/hardlink/aware.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hardlink/aware.rs b/src/hardlink/aware.rs index 067b4334..c9409eec 100644 --- a/src/hardlink/aware.rs +++ b/src/hardlink/aware.rs @@ -21,7 +21,7 @@ use std::{convert::Infallible, fmt::Debug, os::unix::fs::MetadataExt, path::Path /// accurately reflect the real size of their containers. #[derive(Debug, SmartDefault, Clone, AsRef, AsMut, From, Into)] pub struct Aware { - /// Map an inode number to its size and detected paths. + /// Map an inode number and device number to its size and detected paths. record: HardlinkList, } From 4aa04bbf28e43fbc9fe5667d68ae660b21bcb649 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 2 Apr 2026 14:54:37 +0000 Subject: [PATCH 21/23] docs(hardlink): fix pronoun in Aware::record doc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "its" → "their" (two subjects: inode number and device number) https://claude.ai/code/session_01QP9wZyoZcGmJsEsA66ZRok --- src/hardlink/aware.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hardlink/aware.rs b/src/hardlink/aware.rs index c9409eec..c1a418cc 100644 --- a/src/hardlink/aware.rs +++ b/src/hardlink/aware.rs @@ -21,7 +21,7 @@ use std::{convert::Infallible, fmt::Debug, os::unix::fs::MetadataExt, path::Path /// accurately reflect the real size of their containers. #[derive(Debug, SmartDefault, Clone, AsRef, AsMut, From, Into)] pub struct Aware { - /// Map an inode number and device number to its size and detected paths. + /// Map an inode number and device number to their size and detected paths. record: HardlinkList, } From 326b2a922fe622e3a4070e2de1623aefd25e5074 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 2 Apr 2026 14:55:15 +0000 Subject: [PATCH 22/23] docs(hardlink): fix Aware::record doc wording The size and paths belong to the file identified by the (ino, dev) pair, not to the numbers themselves. https://claude.ai/code/session_01QP9wZyoZcGmJsEsA66ZRok --- src/hardlink/aware.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hardlink/aware.rs b/src/hardlink/aware.rs index c1a418cc..53734bb5 100644 --- a/src/hardlink/aware.rs +++ b/src/hardlink/aware.rs @@ -21,7 +21,7 @@ use std::{convert::Infallible, fmt::Debug, os::unix::fs::MetadataExt, path::Path /// accurately reflect the real size of their containers. #[derive(Debug, SmartDefault, Clone, AsRef, AsMut, From, Into)] pub struct Aware { - /// Map an inode number and device number to their size and detected paths. + /// Map a pair of an inode number and a device number to its size and detected paths. record: HardlinkList, } From 21277264f61e26a2b2fccafd61c6af327cdcd09c Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 2 Apr 2026 14:57:35 +0000 Subject: [PATCH 23/23] docs(hardlink): clarify Aware::record doc Make it clear that the size and paths belong to the file, not the numbers. https://claude.ai/code/session_01QP9wZyoZcGmJsEsA66ZRok --- src/hardlink/aware.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hardlink/aware.rs b/src/hardlink/aware.rs index 53734bb5..ad3b5977 100644 --- a/src/hardlink/aware.rs +++ b/src/hardlink/aware.rs @@ -21,7 +21,7 @@ use std::{convert::Infallible, fmt::Debug, os::unix::fs::MetadataExt, path::Path /// accurately reflect the real size of their containers. #[derive(Debug, SmartDefault, Clone, AsRef, AsMut, From, Into)] pub struct Aware { - /// Map a pair of an inode number and a device number to its size and detected paths. + /// Map each file (identified by inode number and device number) to its size and detected paths. record: HardlinkList, }