Skip to content
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
e255766
Initial plan
Copilot Mar 10, 2026
ed6162e
fix(hardlink): use (device, inode) key for deduplication to prevent c…
Copilot Mar 10, 2026
38aacab
test(hardlink): add test proving device number is used in deduplicati…
Copilot Mar 10, 2026
eb92bdc
fix(reflection): address doc and comment accuracy for Reflection and …
Copilot Mar 10, 2026
65194d6
chore(git): merge from copilot/check-nlink-hardlink-deduplication
claude Apr 2, 2026
e7e5b5c
fix(hardlink)!: include dev in ReflectionEntry and JSON schema
claude Apr 2, 2026
b678164
refactor: remove unnecessary `pub(crate)`
KSXGitHub Apr 2, 2026
f03530f
refactor(hardlink): introduce DeviceNumber newtype, reorder ino befor…
claude Apr 2, 2026
0058648
docs: remove unnecessary documentation
KSXGitHub Apr 2, 2026
205f1d9
refactor(hardlink): move DeviceNumber into device module, add dev to …
claude Apr 2, 2026
9a16e84
refactor(hardlink): use tuple variant for ConversionError::Duplicated…
claude Apr 2, 2026
b4477d5
docs(hardlink): address review feedback on doc comments and error attrs
claude Apr 2, 2026
90cda15
fix(docs): use raw URLs for MetadataExt links to avoid Windows doc er…
claude Apr 2, 2026
8407682
fix(docs): rephrase sort order line in Reflection doc
claude Apr 2, 2026
ceecb11
docs(hardlink): separate HTML comments per doc link
claude Apr 2, 2026
15d84e9
docs: revert to prior style
KSXGitHub Apr 2, 2026
39cbd96
docs(hardlink): rephrase sort doc in From<Vec<ReflectionEntry>>
claude Apr 2, 2026
9bb7a7e
refactor(hardlink): extract sorting_key method on ReflectionEntry
claude Apr 2, 2026
79f73a7
docs: explain the existence of `duplicated_inode`
KSXGitHub Apr 2, 2026
205e976
docs: fix grammar
KSXGitHub Apr 2, 2026
ae449d3
docs(hardlink): update Aware::record doc to mention device number
claude Apr 2, 2026
4aa04bb
docs(hardlink): fix pronoun in Aware::record doc
claude Apr 2, 2026
326b2a9
docs(hardlink): fix Aware::record doc wording
claude Apr 2, 2026
2127726
docs(hardlink): clarify Aware::record doc
claude Apr 2, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions src/device.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
use derive_more::{Display, From, Into, LowerHex, Octal, UpperHex};

#[cfg(feature = "json")]
use serde::{Deserialize, Serialize};

/// Whether to cross device boundary into a different filesystem.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum DeviceBoundary {
Expand All @@ -15,3 +20,22 @@ impl DeviceBoundary {
}
}
}

/// The device number of a filesystem.
#[derive(
Debug, Display, LowerHex, UpperHex, Octal, Clone, Copy, PartialEq, Eq, Hash, From, Into,
)]
#[cfg_attr(feature = "json", derive(Deserialize, Serialize))]
pub struct DeviceNumber(u64);

/// POSIX-exclusive functions.
#[cfg(unix)]
impl DeviceNumber {
/// Get device number of a [`std::fs::Metadata`].
#[inline]
pub fn get(stats: &std::fs::Metadata) -> Self {
use pipe_trait::Pipe;
use std::os::unix::fs::MetadataExt;
stats.dev().pipe(DeviceNumber)
}
}
6 changes: 4 additions & 2 deletions src/hardlink/aware.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use super::{
};
use crate::{
data_tree::DataTree,
device::DeviceNumber,
inode::InodeNumber,
os_string_display::OsStringDisplay,
reporter::{event::HardlinkDetection, Event, Reporter},
Expand All @@ -20,7 +21,7 @@ use std::{convert::Infallible, fmt::Debug, os::unix::fs::MetadataExt, path::Path
/// accurately reflect the real size of their containers.
#[derive(Debug, SmartDefault, Clone, AsRef, AsMut, From, Into)]
pub struct Aware<Size> {
/// Map an inode number to its size and detected paths.
/// Map an inode number and device number to its size and detected paths.
Comment thread
KSXGitHub marked this conversation as resolved.
Outdated
record: HardlinkList<Size>,
}

Expand Down Expand Up @@ -82,8 +83,9 @@ where
}));

let ino = InodeNumber::get(stats);
let dev = DeviceNumber::get(stats);
self.record
.add(ino, size, links, path)
.add(ino, dev, size, links, path)
.map_err(ReportHardlinksError::AddToRecord)
Comment thread
KSXGitHub marked this conversation as resolved.
}
}
Expand Down
37 changes: 29 additions & 8 deletions src/hardlink/hardlink_list.rs
Comment thread
KSXGitHub marked this conversation as resolved.
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ pub use summary::Summary;
pub use Reflection as HardlinkListReflection;
pub use Summary as SharedLinkSummary;

use crate::{hardlink::LinkPathList, inode::InodeNumber, size};
use crate::{device::DeviceNumber, hardlink::LinkPathList, inode::InodeNumber, size};
use dashmap::DashMap;
use derive_more::{Display, Error};
use smart_default::SmartDefault;
Expand All @@ -20,6 +20,15 @@ use pipe_trait::Pipe;
#[cfg(any(unix, test))]
use std::path::Path;

/// Internal key used to uniquely identify an inode across all filesystems.
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
struct InodeKey {
/// Inode number within the device.
ino: InodeNumber,
/// Device number of the filesystem the inode belongs to.
dev: DeviceNumber,
}

/// Map value in [`HardlinkList`].
#[derive(Debug, Clone)]
struct Value<Size> {
Expand All @@ -38,8 +47,8 @@ struct Value<Size> {
/// [`Reflection`] which implement these traits.
#[derive(Debug, SmartDefault, Clone)]
pub struct HardlinkList<Size>(
/// Map an inode number to its size, number of links, and detected paths.
DashMap<InodeNumber, Value<Size>>,
/// Map an inode key (device + inode number) to its size, number of links, and detected paths.
DashMap<InodeKey, Value<Size>>,
);

impl<Size> HardlinkList<Size> {
Expand All @@ -64,31 +73,39 @@ impl<Size> HardlinkList<Size> {
}
}

/// Error that occurs when a different size was detected for the same [`ino`][ino].
/// Error that occurs when a different size was detected for the same [`ino`][ino] and [`dev`][dev].
///
/// <!-- Should have been `std::os::unix::fs::MetadataExt::ino` but it would error on Windows -->
/// [ino]: https://doc.rust-lang.org/std/os/unix/fs/trait.MetadataExt.html#tymethod.ino
/// <!-- Should have been `std::os::unix::fs::MetadataExt::dev` but it would error on Windows -->
/// [dev]: https://doc.rust-lang.org/std/os/unix/fs/trait.MetadataExt.html#tymethod.dev
#[derive(Debug, Display, Error)]
#[cfg_attr(test, derive(PartialEq, Eq))]
#[display(bound(Size: Debug))]
#[display("Size for inode {ino} changed from {recorded:?} to {detected:?}")]
#[display("Size for inode {ino} on device {dev} changed from {recorded:?} to {detected:?}")]
pub struct SizeConflictError<Size> {
pub ino: InodeNumber,
pub dev: DeviceNumber,
pub recorded: Size,
pub detected: Size,
}

/// Error that occurs when a different [`nlink`][nlink] was detected for the same [`ino`][ino].
/// Error that occurs when a different [`nlink`][nlink] was detected for the same [`ino`][ino] and [`dev`][dev].
///
/// <!-- Should have been `std::os::unix::fs::MetadataExt::nlink` but it would error on Windows -->
/// [nlink]: https://doc.rust-lang.org/std/os/unix/fs/trait.MetadataExt.html#tymethod.nlink
/// <!-- Should have been `std::os::unix::fs::MetadataExt::ino` but it would error on Windows -->
/// [ino]: https://doc.rust-lang.org/std/os/unix/fs/trait.MetadataExt.html#tymethod.ino
/// <!-- Should have been `std::os::unix::fs::MetadataExt::dev` but it would error on Windows -->
/// [dev]: https://doc.rust-lang.org/std/os/unix/fs/trait.MetadataExt.html#tymethod.dev
#[derive(Debug, Display, Error)]
#[cfg_attr(test, derive(PartialEq, Eq))]
#[display("Number of links of inode {ino} changed from {recorded:?} to {detected:?}")]
#[display(
"Number of links of inode {ino} on device {dev} changed from {recorded:?} to {detected:?}"
)]
pub struct NumberOfLinksConflictError {
pub ino: InodeNumber,
pub dev: DeviceNumber,
pub recorded: u64,
pub detected: u64,
}
Expand All @@ -112,17 +129,20 @@ where
pub(crate) fn add(
&self,
ino: InodeNumber,
dev: DeviceNumber,
size: Size,
links: u64,
path: &Path,
) -> Result<(), AddError<Size>> {
let key = InodeKey { ino, dev };
let mut assertions = Ok(());
self.0
.entry(ino)
.entry(key)
.and_modify(|recorded| {
if size != recorded.size {
assertions = Err(AddError::SizeConflict(SizeConflictError {
ino,
dev,
recorded: recorded.size,
detected: size,
}));
Expand All @@ -133,6 +153,7 @@ where
assertions = Err(AddError::NumberOfLinksConflict(
NumberOfLinksConflictError {
ino,
dev,
recorded: recorded.links,
detected: links,
},
Expand Down
16 changes: 11 additions & 5 deletions src/hardlink/hardlink_list/iter.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
use super::{HardlinkList, Value};
use crate::{hardlink::LinkPathList, inode::InodeNumber};
use super::{HardlinkList, InodeKey, Value};
use crate::{device::DeviceNumber, hardlink::LinkPathList, inode::InodeNumber};
use dashmap::{iter::Iter as DashIter, mapref::multiple::RefMulti};
use pipe_trait::Pipe;

/// Iterator over entries in [`HardlinkList`].
#[derive(derive_more::Debug)]
#[debug(bound())]
#[debug("Iter(..)")]
pub struct Iter<'a, Size>(DashIter<'a, InodeNumber, Value<Size>>);
pub struct Iter<'a, Size>(DashIter<'a, InodeKey, Value<Size>>);

impl<Size> HardlinkList<Size> {
/// Iterate over the recorded entries.
Expand All @@ -20,7 +20,7 @@ impl<Size> HardlinkList<Size> {
#[derive(derive_more::Debug)]
#[debug(bound())]
#[debug("Item(..)")]
pub struct Item<'a, Size>(RefMulti<'a, InodeNumber, Value<Size>>);
pub struct Item<'a, Size>(RefMulti<'a, InodeKey, Value<Size>>);

impl<'a, Size> Iterator for Iter<'a, Size> {
type Item = Item<'a, Size>;
Expand All @@ -33,7 +33,13 @@ impl<'a, Size> Item<'a, Size> {
/// The inode number of the file.
#[inline]
pub fn ino(&self) -> InodeNumber {
*self.0.key()
self.0.key().ino
}

/// The device number of the filesystem the inode belongs to.
#[inline]
pub fn dev(&self) -> DeviceNumber {
self.0.key().dev
}

/// Size of the file.
Expand Down
59 changes: 42 additions & 17 deletions src/hardlink/hardlink_list/reflection.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use super::{HardlinkList, Value};
use crate::{hardlink::LinkPathListReflection, inode::InodeNumber};
use super::{HardlinkList, InodeKey, Value};
use crate::{device::DeviceNumber, hardlink::LinkPathListReflection, inode::InodeNumber};
use dashmap::DashMap;
use derive_more::{Display, Error, Into, IntoIterator};
use into_sorted::IntoSortedUnstable;
Expand All @@ -12,8 +12,8 @@ use serde::{Deserialize, Serialize};
/// internal content.
///
/// **Guarantees:**
/// * Every inode number is unique.
/// * The internal list is always sorted by inode numbers.
/// * Every pair of an inode number and a device number is unique.
/// * The internal list is always sorted by pairs of an inode number and a device number.
///
/// **Equality:** `Reflection` implements `PartialEq` and `Eq` traits.
///
Expand Down Expand Up @@ -50,6 +50,8 @@ impl<Size> Reflection<Size> {
pub struct ReflectionEntry<Size> {
/// The inode number of the file.
pub ino: InodeNumber,
/// Device number of the filesystem the inode belongs to.
pub dev: DeviceNumber,
/// Size of the file.
pub size: Size,
/// Total number of links of the file, both listed (in [`Self::paths`]) and unlisted.
Expand All @@ -61,42 +63,55 @@ pub struct ReflectionEntry<Size> {
impl<Size> ReflectionEntry<Size> {
/// Create a new entry.
#[inline]
fn new(ino: InodeNumber, Value { size, links, paths }: Value<Size>) -> Self {
fn new(InodeKey { ino, dev }: InodeKey, Value { size, links, paths }: Value<Size>) -> Self {
let paths = paths.into();
ReflectionEntry {
ino,
dev,
size,
links,
paths,
}
}

/// Dissolve [`ReflectionEntry`] into a pair of [`InodeNumber`] and [`Value`].
/// Dissolve [`ReflectionEntry`] into a pair of [`InodeKey`] and [`Value`].
#[inline]
fn dissolve(self) -> (InodeNumber, Value<Size>) {
fn dissolve(self) -> (InodeKey, Value<Size>) {
let ReflectionEntry {
ino,
dev,
size,
links,
paths,
} = self;
let paths = paths.into();
(ino, Value { size, links, paths })
(InodeKey { ino, dev }, Value { size, links, paths })
}

/// Sorting key to be used in the "sort by key" family of functions.
///
/// Sort by the inode number first, then by the device number.
///
/// This function returns a pair of 2 `u64`s instead of a pair of 2 wrapper
/// types because we prefer them not to have to implement `Ord`.
#[inline]
fn sorting_key(&self) -> (u64, u64) {
(u64::from(self.ino), u64::from(self.dev))
}
}

impl<Size> From<Vec<ReflectionEntry<Size>>> for Reflection<Size> {
/// Sort the list by inode numbers, then create the reflection.
/// Sort the list by inode numbers and device numbers, then create the reflection.
fn from(list: Vec<ReflectionEntry<Size>>) -> Self {
list.into_sorted_unstable_by_key(|entry| u64::from(entry.ino))
list.into_sorted_unstable_by_key(ReflectionEntry::sorting_key)
.pipe(Reflection)
}
}

impl<Size> From<HardlinkList<Size>> for Reflection<Size> {
fn from(HardlinkList(list): HardlinkList<Size>) -> Self {
list.into_iter()
.map(|(ino, value)| ReflectionEntry::new(ino, value))
.map(|(key, value)| ReflectionEntry::new(key, value))
.collect::<Vec<_>>()
.pipe(Reflection::from)
}
Expand All @@ -107,9 +122,19 @@ impl<Size> From<HardlinkList<Size>> for Reflection<Size> {
#[derive(Debug, Display, Error, Clone, Copy, PartialEq, Eq)]
#[non_exhaustive]
pub enum ConversionError {
/// When the source has duplicated inode numbers.
#[display("Inode number {_0} is duplicated")]
DuplicatedInode(#[error(not(source))] InodeNumber),
/// When the source has a duplicated `(inode, device)` pair.
#[display("Inode {_0} on device {_1} is duplicated")]
DuplicatedInode(InodeNumber, DeviceNumber),
Comment thread
KSXGitHub marked this conversation as resolved.
}

impl ConversionError {
/// Convenient function to convert an [`InodeKey`] into a [`ConversionError::DuplicatedInode`].
///
/// We don't embed [`InodeKey`] directly into [`ConversionError::DuplicatedInode`] because of
/// their difference in visibility: One is private, the other public.
fn duplicated_inode(InodeKey { ino, dev }: InodeKey) -> Self {
ConversionError::DuplicatedInode(ino, dev)
}
}

impl<Size> TryFrom<Reflection<Size>> for HardlinkList<Size> {
Expand All @@ -118,9 +143,9 @@ impl<Size> TryFrom<Reflection<Size>> for HardlinkList<Size> {
let map = DashMap::with_capacity(entries.len());

for entry in entries {
let (ino, value) = entry.dissolve();
if map.insert(ino, value).is_some() {
return ino.pipe(ConversionError::DuplicatedInode).pipe(Err);
let (key, value) = entry.dissolve();
if map.insert(key, value).is_some() {
return key.pipe(ConversionError::duplicated_inode).pipe(Err);
}
}

Expand Down
Loading
Loading