diff --git a/Cargo.lock b/Cargo.lock index c45c3cd..24a8508 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -267,6 +267,7 @@ dependencies = [ "clap", "ed2k", "indicatif", + "libc", "md-5", "serde", "serde_json", diff --git a/Cargo.toml b/Cargo.toml index 9d7fcf4..70884b2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "l-s" -version = "0.5.1" +version = "0.5.2" authors = ["licsber "] edition = "2021" @@ -15,6 +15,7 @@ anyhow = "1.0" clap = { version = "4.5", features = ["derive"] } ed2k = "1.0.1" indicatif = "0.18" +libc = "0.2" md-5 = "0.10" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" diff --git a/src/cli.rs b/src/cli.rs index 5966967..b2f6f05 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -1,7 +1,7 @@ use std::path::PathBuf; use std::{env, fs}; -use anyhow::{Context, Result}; +use anyhow::{anyhow, Context, Result}; use clap::Parser; /// 负责解析命令行参数 @@ -20,7 +20,20 @@ impl Cli { None => env::current_dir().context("无法获取当前工作目录")?, }; - fs::canonicalize(&candidate) - .with_context(|| format!("无法解析路径: {}", candidate.display())) + let path = if candidate.is_absolute() { + candidate + } else { + env::current_dir() + .context("无法获取当前工作目录")? + .join(candidate) + }; + + let info = fs::symlink_metadata(&path) + .with_context(|| format!("无法读取路径信息: {}", path.display()))?; + if info.file_type().is_symlink() { + return Err(anyhow!("不支持扫描符号链接: {}", path.display())); + } + + Ok(path) } } diff --git a/src/main.rs b/src/main.rs index 0c1186a..26203e8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,13 +5,16 @@ mod meta; mod utils; use std::fs::{self, File, OpenOptions}; -use std::io::Write; +use std::io::{self, Write}; use std::path::{Path, PathBuf}; use std::time::{Instant, SystemTime, UNIX_EPOCH}; -use anyhow::{Context, Result}; +use anyhow::{anyhow, Context, Result}; use clap::Parser; -use meta::{calc_xxh128_with_callback, scan_dir_xxh128, DirSnapshot, FileMeta, ProgressTracker}; +use meta::{ + calc_xxh128_from_file_with_callback, open_regular_file_nofollow, scan_dir_xxh128, DirSnapshot, + FileMeta, ProgressTracker, +}; fn main() -> Result<()> { let started = Instant::now(); @@ -19,10 +22,14 @@ fn main() -> Result<()> { let target = cli.resolve_path()?; println!("目标: {}", target.display()); - if target.is_dir() { + let target_info = fs::symlink_metadata(&target) + .with_context(|| format!("无法读取路径信息: {}", target.display()))?; + if target_info.is_dir() { process_dir(&target)?; - } else { + } else if target_info.is_file() { process_file(&target)?; + } else { + return Err(anyhow!("{} 不是文件或目录", target.display())); } println!("耗时: {:?}", started.elapsed()); @@ -35,8 +42,19 @@ fn process_file(path: &Path) -> Result<()> { .map(Path::to_path_buf) .unwrap_or_else(|| PathBuf::from(".")) .join("meta"); - fs::create_dir_all(&meta_dir) - .with_context(|| format!("无法创建目录: {}", meta_dir.display()))?; + match symlink_metadata_optional(&meta_dir)? { + Some(info) if info.file_type().is_symlink() => { + return Err(anyhow!("不支持符号链接目录: {}", meta_dir.display())); + } + Some(info) if !info.is_dir() => { + return Err(anyhow!("{} 不是目录", meta_dir.display())); + } + Some(_) => {} + None => { + fs::create_dir_all(&meta_dir) + .with_context(|| format!("无法创建目录: {}", meta_dir.display()))?; + } + } let basename = path .file_name() @@ -44,32 +62,45 @@ fn process_file(path: &Path) -> Result<()> { .unwrap_or_else(|| "unknown".to_string()); let save_path = meta_dir.join(format!("{basename}.json")); - // 获取文件大小 - let file_size = fs::metadata(path) - .with_context(|| format!("无法读取文件信息: {}", path.display()))? - .len(); - - if !save_path.exists() { + let existing_save = symlink_metadata_optional(&save_path)?; + if existing_save.is_none() { + let file = open_regular_file_nofollow(path)?; + let file_size = file + .metadata() + .with_context(|| format!("无法读取文件信息: {}", path.display()))? + .len(); let tracker = ProgressTracker::new_single_file(file_size, &basename); let on_bytes = tracker.bytes_callback(); let on_iop = tracker.iop_callback(); - let meta = FileMeta::from_path_with_callback(path, on_bytes, on_iop)?; + let meta = FileMeta::from_open_file_with_callback(path, file, on_bytes, on_iop)?; tracker.finish("处理完成"); let json = meta.to_pretty_json()?; println!("{}", json); write_atomic(&save_path, &json)?; return Ok(()); } + let save_info = existing_save.expect("checked as Some"); + if save_info.file_type().is_symlink() { + return Err(anyhow!("不支持符号链接元数据文件: {}", save_path.display())); + } + if !save_info.is_file() { + return Err(anyhow!("{} 不是文件", save_path.display())); + } - let existing = File::open(&save_path) + let existing = open_regular_file_nofollow(&save_path) .with_context(|| format!("无法读取历史元数据: {}", save_path.display()))?; let old_meta = FileMeta::from_reader(existing)?; // 使用进度条计算快速哈希 + let file = open_regular_file_nofollow(path)?; + let file_size = file + .metadata() + .with_context(|| format!("无法读取文件信息: {}", path.display()))? + .len(); let tracker = ProgressTracker::new_single_file(file_size, &basename); let on_bytes = tracker.bytes_callback(); let on_iop = tracker.iop_callback(); - let fast_hash = calc_xxh128_with_callback(path, on_bytes, on_iop)?; + let fast_hash = calc_xxh128_from_file_with_callback(path, file, on_bytes, on_iop)?; tracker.finish("校验完成"); if fast_hash == old_meta.xxh128 { @@ -79,10 +110,15 @@ fn process_file(path: &Path) -> Result<()> { println!("校验失败!"); println!("现校验文件:"); + let file = open_regular_file_nofollow(path)?; + let file_size = file + .metadata() + .with_context(|| format!("无法读取文件信息: {}", path.display()))? + .len(); let tracker = ProgressTracker::new_single_file(file_size, &basename); let on_bytes = tracker.bytes_callback(); let on_iop = tracker.iop_callback(); - let meta = FileMeta::from_path_with_callback(path, on_bytes, on_iop)?; + let meta = FileMeta::from_open_file_with_callback(path, file, on_bytes, on_iop)?; tracker.finish("处理完成"); println!("{}", meta.to_pretty_json()?); println!("原校验文件:"); @@ -95,23 +131,30 @@ fn process_dir(path: &Path) -> Result<()> { let meta_path = path.join("meta.json"); let backup_path = path.join("meta-old.json"); - if !meta_path.exists() { + let Some(meta_info) = symlink_metadata_optional(&meta_path)? else { let snapshot = DirSnapshot::build_root(path)?; let json = serde_json::to_string_pretty(&snapshot)?; write_atomic(&meta_path, &json)?; return Ok(()); + }; + if meta_info.file_type().is_symlink() { + return Err(anyhow!("不支持符号链接元数据文件: {}", meta_path.display())); + } + if !meta_info.is_file() { + return Err(anyhow!("{} 不是文件", meta_path.display())); } - if backup_path.exists() { + if symlink_metadata_optional(&backup_path)?.is_some() { fs::remove_file(&backup_path)?; } fs::rename(&meta_path, &backup_path) .with_context(|| format!("无法重命名旧meta: {}", meta_path.display()))?; + sync_parent_dir(&backup_path)?; println!("发现旧元数据,已暂存为 meta-old.json,开始校验..."); - let meta_file = - File::open(&backup_path).with_context(|| format!("无法读取: {}", backup_path.display()))?; + let meta_file = open_regular_file_nofollow(&backup_path) + .with_context(|| format!("无法读取: {}", backup_path.display()))?; let snapshot = DirSnapshot::from_reader(meta_file)?; let mut stored = snapshot.collect_file_map(path); let current = scan_dir_xxh128(path)?; @@ -145,6 +188,7 @@ fn process_dir(path: &Path) -> Result<()> { println!("校验通过."); fs::rename(&backup_path, &meta_path) .with_context(|| format!("无法恢复meta: {}", meta_path.display()))?; + sync_parent_dir(&meta_path)?; } Ok(()) @@ -180,6 +224,7 @@ fn write_atomic(path: &Path, contents: &str) -> Result<()> { path.display() ) })?; + sync_parent_dir(path)?; Ok(()) })(); @@ -189,3 +234,19 @@ fn write_atomic(path: &Path, contents: &str) -> Result<()> { result } + +fn symlink_metadata_optional(path: &Path) -> Result> { + match fs::symlink_metadata(path) { + Ok(info) => Ok(Some(info)), + Err(err) if err.kind() == io::ErrorKind::NotFound => Ok(None), + Err(err) => Err(err).with_context(|| format!("无法读取路径信息: {}", path.display())), + } +} + +fn sync_parent_dir(path: &Path) -> Result<()> { + let parent = path.parent().unwrap_or_else(|| Path::new(".")); + let dir = File::open(parent) + .with_context(|| format!("无法打开父目录以同步: {}", parent.display()))?; + dir.sync_all() + .with_context(|| format!("无法同步父目录: {}", parent.display())) +} diff --git a/src/meta/file.rs b/src/meta/file.rs index 2ca19ed..6eb41f3 100644 --- a/src/meta/file.rs +++ b/src/meta/file.rs @@ -1,4 +1,4 @@ -use std::fs::{self, File}; +use std::fs::{File, OpenOptions}; use std::io::Read; use std::path::Path; use std::time::{SystemTime, UNIX_EPOCH}; @@ -16,6 +16,11 @@ use crate::constants::{DEFAULT_BUFFER_SIZE, HEAD_115_BYTES, HEAD_BAIDU_BYTES}; use crate::head_hash::{calc_head_115, calc_head_baidu, HeadChunk}; use crate::utils::{basename, friendly_size, hex_upper}; +#[cfg(not(unix))] +use std::fs; +#[cfg(unix)] +use std::os::unix::fs::OpenOptionsExt; + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct FileMeta { pub basename: String, @@ -32,8 +37,23 @@ pub struct FileMeta { } impl FileMeta { + #[cfg(not(unix))] pub fn from_path_with_callback( path: &Path, + on_bytes_read: F1, + on_iop: F2, + ) -> Result + where + F1: FnMut(u64), + F2: FnMut(), + { + let file = open_regular_file_nofollow(path)?; + Self::from_open_file_with_callback(path, file, on_bytes_read, on_iop) + } + + pub(crate) fn from_open_file_with_callback( + path: &Path, + mut file: File, mut on_bytes_read: F1, mut on_iop: F2, ) -> Result @@ -41,8 +61,9 @@ impl FileMeta { F1: FnMut(u64), F2: FnMut(), { - let info = - fs::metadata(path).with_context(|| format!("无法读取文件信息: {}", path.display()))?; + let info = file + .metadata() + .with_context(|| format!("无法读取文件信息: {}", path.display()))?; if !info.is_file() { return Err(anyhow!("{} 不是文件", path.display())); } @@ -60,9 +81,6 @@ impl FileMeta { .map(|d| d.as_secs() as i64) .unwrap_or(0); - let mut file = - File::open(path).with_context(|| format!("无法打开文件: {}", path.display()))?; - let mut buffer = vec![0u8; DEFAULT_BUFFER_SIZE]; let mut md5_hasher = Md5::new(); let mut sha1_hasher = Sha1::new(); @@ -124,8 +142,23 @@ impl FileMeta { } } +#[cfg(not(unix))] pub fn calc_xxh128_with_callback( path: &Path, + on_bytes_read: F1, + on_iop: F2, +) -> Result +where + F1: FnMut(u64), + F2: FnMut(), +{ + let file = open_regular_file_nofollow(path)?; + calc_xxh128_from_file_with_callback(path, file, on_bytes_read, on_iop) +} + +pub(crate) fn calc_xxh128_from_file_with_callback( + path: &Path, + mut file: File, mut on_bytes_read: F1, mut on_iop: F2, ) -> Result @@ -133,7 +166,13 @@ where F1: FnMut(u64), F2: FnMut(), { - let mut file = File::open(path).with_context(|| format!("无法打开文件: {}", path.display()))?; + let info = file + .metadata() + .with_context(|| format!("无法读取文件信息: {}", path.display()))?; + if !info.is_file() { + return Err(anyhow!("{} 不是文件", path.display())); + } + let mut buffer = vec![0u8; DEFAULT_BUFFER_SIZE]; let mut hasher = Xxh3::new(); @@ -149,3 +188,34 @@ where Ok(hex_upper(hasher.digest128().to_be_bytes())) } + +pub(crate) fn open_regular_file_nofollow(path: &Path) -> Result { + let file = open_file_nofollow(path)?; + let info = file + .metadata() + .with_context(|| format!("无法读取文件信息: {}", path.display()))?; + if !info.is_file() { + return Err(anyhow!("{} 不是文件", path.display())); + } + Ok(file) +} + +#[cfg(unix)] +fn open_file_nofollow(path: &Path) -> Result { + let mut options = OpenOptions::new(); + options.read(true); + options.custom_flags(libc::O_CLOEXEC | libc::O_NOFOLLOW); + options + .open(path) + .with_context(|| format!("无法打开文件: {}", path.display())) +} + +#[cfg(not(unix))] +fn open_file_nofollow(path: &Path) -> Result { + let info = fs::symlink_metadata(path) + .with_context(|| format!("无法读取文件信息: {}", path.display()))?; + if info.file_type().is_symlink() { + return Err(anyhow!("不支持扫描符号链接: {}", path.display())); + } + File::open(path).with_context(|| format!("无法打开文件: {}", path.display())) +} diff --git a/src/meta/mod.rs b/src/meta/mod.rs index 1fccf93..25629f3 100644 --- a/src/meta/mod.rs +++ b/src/meta/mod.rs @@ -2,6 +2,7 @@ mod file; mod progress; mod tree; -pub use file::{calc_xxh128_with_callback, FileMeta}; +pub use file::FileMeta; +pub(crate) use file::{calc_xxh128_from_file_with_callback, open_regular_file_nofollow}; pub use progress::ProgressTracker; pub use tree::{scan_dir_xxh128, DirSnapshot}; diff --git a/src/meta/tree.rs b/src/meta/tree.rs index a9afb68..350854f 100644 --- a/src/meta/tree.rs +++ b/src/meta/tree.rs @@ -1,16 +1,26 @@ use std::collections::BTreeMap; -use std::fs; -use std::fs::File; use std::path::{Path, PathBuf}; -use anyhow::{anyhow, Context, Result}; +use anyhow::Result; +#[cfg(not(unix))] +use anyhow::{anyhow, Context}; use serde::{Deserialize, Serialize}; -use super::file::{calc_xxh128_with_callback, FileMeta}; +#[cfg(unix)] +use super::file::calc_xxh128_from_file_with_callback; +#[cfg(not(unix))] +use super::file::calc_xxh128_with_callback; +use super::file::FileMeta; use super::progress::ProgressTracker; use crate::constants::META_VERSION; +#[cfg(not(unix))] use crate::utils::{basename, should_skip_dir, should_skip_file}; +#[cfg(not(unix))] +use std::fs; +#[cfg(not(unix))] +use std::fs::File; + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct DirSnapshot { pub dir_name: String, @@ -37,6 +47,12 @@ impl DirSnapshot { Ok(serde_json::from_reader(reader)?) } + #[cfg(unix)] + fn build_node(path: &Path, tracker: &ProgressTracker) -> Result { + unix_walk::build_node(path, tracker) + } + + #[cfg(not(unix))] fn build_node(path: &Path, tracker: &ProgressTracker) -> Result { let dir_name = path .file_name() @@ -120,6 +136,7 @@ impl DirSnapshot { /// 加载子目录的 meta.json 并通过 xxh128 快速校验。 /// 校验通过则返回已有的 DirSnapshot,否则返回 Err 终止流程。 + #[cfg(not(unix))] fn verify_and_load(path: &Path, tracker: &ProgressTracker) -> Result { let meta_path = path.join("meta.json"); let meta_file = @@ -181,11 +198,20 @@ pub fn scan_dir_xxh128(path: &Path) -> Result> { } fn count_files(path: &Path) -> Result { - let mut count = 0u64; - count_files_recursive(path, &mut count)?; - Ok(count) + #[cfg(unix)] + { + unix_walk::count_files(path) + } + + #[cfg(not(unix))] + { + let mut count = 0u64; + count_files_recursive(path, &mut count)?; + Ok(count) + } } +#[cfg(not(unix))] fn count_files_recursive(path: &Path, count: &mut u64) -> Result<()> { let entries = fs::read_dir(path) .with_context(|| format!("无法遍历目录: {}", path.display()))? @@ -209,10 +235,8 @@ fn count_files_recursive(path: &Path, count: &mut u64) -> Result<()> { continue; } count_files_recursive(&full_path, count)?; - } else { - if !should_skip_file(&name) { - *count += 1; - } + } else if !should_skip_file(&name) { + *count += 1; } } @@ -224,46 +248,488 @@ fn walk_dir_with_progress( map: &mut BTreeMap, tracker: &ProgressTracker, ) -> Result<()> { - let mut entries = fs::read_dir(path) - .with_context(|| format!("无法遍历目录: {}", path.display()))? - .collect::, _>>() - .with_context(|| format!("读取目录失败: {}", path.display()))?; - entries.sort_unstable_by_key(|e| e.file_name()); - - for entry in entries { - let file_name = entry.file_name(); - let name = file_name.to_string_lossy().to_string(); - let full_path = entry.path(); - let file_type = entry - .file_type() - .with_context(|| format!("无法读取类型: {}", full_path.display()))?; - - if file_type.is_symlink() { - continue; - } - - if file_type.is_dir() { - if should_skip_dir(&name) { - continue; - } - walk_dir_with_progress(&full_path, map, tracker)?; - continue; - } - - if should_skip_file(&name) { - continue; - } - - // 获取文件大小并开始跟踪 - let file_size = entry.metadata().map(|m| m.len()).unwrap_or(0); - tracker.start_file(file_size, &name); - - let on_bytes = tracker.bytes_callback(); - let on_iop = tracker.iop_callback(); - let hash = calc_xxh128_with_callback(&full_path, on_bytes, on_iop)?; - map.insert(full_path, hash); - tracker.finish_file(); + #[cfg(unix)] + { + unix_walk::walk_dir_with_progress(path, map, tracker) } - Ok(()) + #[cfg(not(unix))] + { + let mut entries = fs::read_dir(path) + .with_context(|| format!("无法遍历目录: {}", path.display()))? + .collect::, _>>() + .with_context(|| format!("读取目录失败: {}", path.display()))?; + entries.sort_unstable_by_key(|e| e.file_name()); + + for entry in entries { + let file_name = entry.file_name(); + let name = file_name.to_string_lossy().to_string(); + let full_path = entry.path(); + let file_type = entry + .file_type() + .with_context(|| format!("无法读取类型: {}", full_path.display()))?; + + if file_type.is_symlink() { + continue; + } + + if file_type.is_dir() { + if should_skip_dir(&name) { + continue; + } + walk_dir_with_progress(&full_path, map, tracker)?; + continue; + } + + if should_skip_file(&name) { + continue; + } + + // 获取文件大小并开始跟踪 + let file_size = entry.metadata().map(|m| m.len()).unwrap_or(0); + tracker.start_file(file_size, &name); + + let on_bytes = tracker.bytes_callback(); + let on_iop = tracker.iop_callback(); + let hash = calc_xxh128_with_callback(&full_path, on_bytes, on_iop)?; + map.insert(full_path, hash); + tracker.finish_file(); + } + + Ok(()) + } +} + +#[cfg(unix)] +mod unix_walk { + use std::collections::BTreeMap; + use std::ffi::{CStr, CString, OsStr, OsString}; + use std::fs::File; + use std::io; + use std::mem::MaybeUninit; + use std::os::fd::{AsRawFd, FromRawFd, RawFd}; + use std::os::unix::ffi::{OsStrExt, OsStringExt}; + use std::os::unix::fs::MetadataExt; + use std::path::{Path, PathBuf}; + + use anyhow::{anyhow, Context, Result}; + + use super::{calc_xxh128_from_file_with_callback, DirSnapshot, FileMeta, ProgressTracker}; + use crate::utils::{basename, should_skip_dir, should_skip_file}; + + struct DirHandle { + file: File, + } + + struct DirEntryInfo { + name: OsString, + stat: libc::stat, + } + + enum EntryKind { + Directory, + RegularFile, + Symlink, + Other, + } + + impl DirHandle { + fn open_path(path: &Path) -> Result { + let c_path = cstring_from_path(path)?; + let fd = unsafe { libc::open(c_path.as_ptr(), dir_open_flags()) }; + if fd == -1 { + return Err(io::Error::last_os_error()) + .with_context(|| format!("无法打开目录: {}", path.display())); + } + + Ok(Self { + file: unsafe { File::from_raw_fd(fd) }, + }) + } + + fn raw_fd(&self) -> RawFd { + self.file.as_raw_fd() + } + + fn entries(&self, path: &Path) -> Result> { + let dup_fd = unsafe { libc::dup(self.raw_fd()) }; + if dup_fd == -1 { + return Err(io::Error::last_os_error()) + .with_context(|| format!("无法遍历目录: {}", path.display())); + } + + let dir = unsafe { libc::fdopendir(dup_fd) }; + if dir.is_null() { + let err = io::Error::last_os_error(); + unsafe { + libc::close(dup_fd); + } + return Err(err).with_context(|| format!("无法遍历目录: {}", path.display())); + } + + let _stream = DirStream(dir); + let mut entries = Vec::new(); + loop { + let entry = unsafe { libc::readdir(dir) }; + if entry.is_null() { + break; + } + + let name_bytes = unsafe { CStr::from_ptr((*entry).d_name.as_ptr()) }.to_bytes(); + if name_bytes == b"." || name_bytes == b".." { + continue; + } + + let name = OsString::from_vec(name_bytes.to_vec()); + let full_path = path.join(&name); + if let Some(stat) = self.stat_child(&name, &full_path)? { + entries.push(DirEntryInfo { name, stat }); + } + } + + entries.sort_unstable_by(|left, right| left.name.cmp(&right.name)); + Ok(entries) + } + + fn stat_child(&self, name: &OsStr, path: &Path) -> Result> { + let c_name = cstring_from_os_str(name, path)?; + let mut stat = MaybeUninit::::uninit(); + let code = unsafe { + libc::fstatat( + self.raw_fd(), + c_name.as_ptr(), + stat.as_mut_ptr(), + libc::AT_SYMLINK_NOFOLLOW, + ) + }; + + if code == -1 { + let err = io::Error::last_os_error(); + if err.kind() == io::ErrorKind::NotFound { + return Ok(None); + } + return Err(err).with_context(|| format!("无法读取类型: {}", path.display())); + } + + Ok(Some(unsafe { stat.assume_init() })) + } + + fn has_regular_child(&self, name: &OsStr, path: &Path) -> Result { + Ok(matches!( + self.stat_child(name, path)? + .map(|stat| kind_from_mode(stat.st_mode)), + Some(EntryKind::RegularFile) + )) + } + + fn open_child_dir(&self, entry: &DirEntryInfo, path: &Path) -> Result { + let c_name = cstring_from_os_str(&entry.name, path)?; + let fd = unsafe { libc::openat(self.raw_fd(), c_name.as_ptr(), dir_open_flags()) }; + if fd == -1 { + return Err(io::Error::last_os_error()) + .with_context(|| format!("无法打开目录: {}", path.display())); + } + + let file = unsafe { File::from_raw_fd(fd) }; + let info = file + .metadata() + .with_context(|| format!("无法读取目录信息: {}", path.display()))?; + if !stat_matches(&info, &entry.stat) { + return Err(anyhow!("扫描期间目录被替换: {}", path.display())); + } + + Ok(Self { file }) + } + + fn open_child_file(&self, entry: &DirEntryInfo, path: &Path) -> Result { + let c_name = cstring_from_os_str(&entry.name, path)?; + let fd = unsafe { libc::openat(self.raw_fd(), c_name.as_ptr(), file_open_flags()) }; + if fd == -1 { + return Err(io::Error::last_os_error()) + .with_context(|| format!("无法打开文件: {}", path.display())); + } + + let file = unsafe { File::from_raw_fd(fd) }; + let info = file + .metadata() + .with_context(|| format!("无法读取文件信息: {}", path.display()))?; + if !info.is_file() { + return Err(anyhow!("{} 不是文件", path.display())); + } + if !stat_matches(&info, &entry.stat) { + return Err(anyhow!("扫描期间文件被替换: {}", path.display())); + } + + Ok(file) + } + } + + struct DirStream(*mut libc::DIR); + + impl Drop for DirStream { + fn drop(&mut self) { + unsafe { + libc::closedir(self.0); + } + } + } + + pub(super) fn build_node(path: &Path, tracker: &ProgressTracker) -> Result { + let dir = DirHandle::open_path(path)?; + build_node_at(path, &dir, tracker) + } + + pub(super) fn count_files(path: &Path) -> Result { + let dir = DirHandle::open_path(path)?; + let mut count = 0u64; + count_files_at(path, &dir, &mut count)?; + Ok(count) + } + + pub(super) fn walk_dir_with_progress( + path: &Path, + map: &mut BTreeMap, + tracker: &ProgressTracker, + ) -> Result<()> { + let dir = DirHandle::open_path(path)?; + walk_dir_with_progress_at(path, &dir, map, tracker) + } + + fn build_node_at( + path: &Path, + dir: &DirHandle, + tracker: &ProgressTracker, + ) -> Result { + let dir_name = path + .file_name() + .map(basename) + .unwrap_or_else(|| path.to_string_lossy().to_string()); + + let mut dirs = Vec::new(); + let mut files = Vec::new(); + + for entry in dir.entries(path)? { + let name = entry.name.to_string_lossy().to_string(); + let full_path = path.join(&entry.name); + + match kind_from_mode(entry.stat.st_mode) { + EntryKind::Symlink => continue, + EntryKind::Directory => { + if should_skip_dir(&name) { + continue; + } + + let child = dir.open_child_dir(&entry, &full_path)?; + let child_meta_path = full_path.join("meta.json"); + if child.has_regular_child(OsStr::new("meta.json"), &child_meta_path)? { + dirs.push(verify_and_load_at(&full_path, &child, tracker)?); + } else { + dirs.push(build_node_at(&full_path, &child, tracker)?); + } + } + EntryKind::RegularFile => { + if should_skip_file(&name) { + continue; + } + + let file_size = stat_size(&entry.stat); + tracker.start_file(file_size, &name); + + let file = dir.open_child_file(&entry, &full_path)?; + let on_bytes = tracker.bytes_callback(); + let on_iop = tracker.iop_callback(); + let meta = + FileMeta::from_open_file_with_callback(&full_path, file, on_bytes, on_iop)?; + files.push(meta); + tracker.finish_file(); + } + EntryKind::Other => { + if !should_skip_file(&name) { + return Err(anyhow!("{} 不是文件", full_path.display())); + } + } + } + } + + Ok(DirSnapshot { + dir_name, + dirs, + files, + v: None, + }) + } + + fn verify_and_load_at( + path: &Path, + dir: &DirHandle, + tracker: &ProgressTracker, + ) -> Result { + let meta_name = OsStr::new("meta.json"); + let meta_path = path.join(meta_name); + let Some(meta_stat) = dir.stat_child(meta_name, &meta_path)? else { + return build_node_at(path, dir, tracker); + }; + let meta_entry = DirEntryInfo { + name: meta_name.to_os_string(), + stat: meta_stat, + }; + let meta_file = dir.open_child_file(&meta_entry, &meta_path)?; + let mut snapshot: DirSnapshot = serde_json::from_reader(meta_file) + .with_context(|| format!("无法解析: {}", meta_path.display()))?; + + let mut stored = snapshot.collect_file_map(path); + let mut current = BTreeMap::new(); + walk_dir_with_progress_at(path, dir, &mut current, tracker)?; + + for (file_path, hash) in current { + if let Some(meta) = stored.remove(&file_path) { + if hash != meta.xxh128 { + return Err(anyhow!( + "校验失败: {}\n 期望: {}\n 当前: {}", + file_path.display(), + meta.xxh128, + hash + )); + } + } else { + return Err(anyhow!("文件新增: {}", file_path.display())); + } + } + + if let Some((missing_path, _)) = stored.into_iter().next() { + return Err(anyhow!("文件缺失: {}", missing_path.display())); + } + + let msg = format!("✓ 校验通过: {}", path.display()); + if let Some(multi) = tracker.multi() { + multi.suspend(|| { + eprintln!("{msg}"); + }); + } else { + eprintln!("{msg}"); + } + snapshot.dir_name = path + .file_name() + .map(basename) + .unwrap_or_else(|| path.to_string_lossy().to_string()); + snapshot.v = None; + Ok(snapshot) + } + + fn count_files_at(path: &Path, dir: &DirHandle, count: &mut u64) -> Result<()> { + for entry in dir.entries(path)? { + let name = entry.name.to_string_lossy().to_string(); + let full_path = path.join(&entry.name); + + match kind_from_mode(entry.stat.st_mode) { + EntryKind::Symlink => continue, + EntryKind::Directory => { + if should_skip_dir(&name) { + continue; + } + + let child = dir.open_child_dir(&entry, &full_path)?; + count_files_at(&full_path, &child, count)?; + } + EntryKind::RegularFile => { + if !should_skip_file(&name) { + *count += 1; + } + } + EntryKind::Other => { + if !should_skip_file(&name) { + return Err(anyhow!("{} 不是文件", full_path.display())); + } + } + } + } + + Ok(()) + } + + fn walk_dir_with_progress_at( + path: &Path, + dir: &DirHandle, + map: &mut BTreeMap, + tracker: &ProgressTracker, + ) -> Result<()> { + for entry in dir.entries(path)? { + let name = entry.name.to_string_lossy().to_string(); + let full_path = path.join(&entry.name); + + match kind_from_mode(entry.stat.st_mode) { + EntryKind::Symlink => continue, + EntryKind::Directory => { + if should_skip_dir(&name) { + continue; + } + + let child = dir.open_child_dir(&entry, &full_path)?; + walk_dir_with_progress_at(&full_path, &child, map, tracker)?; + } + EntryKind::RegularFile => { + if should_skip_file(&name) { + continue; + } + + let file_size = stat_size(&entry.stat); + tracker.start_file(file_size, &name); + + let file = dir.open_child_file(&entry, &full_path)?; + let on_bytes = tracker.bytes_callback(); + let on_iop = tracker.iop_callback(); + let hash = + calc_xxh128_from_file_with_callback(&full_path, file, on_bytes, on_iop)?; + map.insert(full_path, hash); + tracker.finish_file(); + } + EntryKind::Other => { + if !should_skip_file(&name) { + return Err(anyhow!("{} 不是文件", full_path.display())); + } + } + } + } + + Ok(()) + } + + fn kind_from_mode(mode: libc::mode_t) -> EntryKind { + match mode & libc::S_IFMT as libc::mode_t { + value if value == libc::S_IFDIR as libc::mode_t => EntryKind::Directory, + value if value == libc::S_IFREG as libc::mode_t => EntryKind::RegularFile, + value if value == libc::S_IFLNK as libc::mode_t => EntryKind::Symlink, + _ => EntryKind::Other, + } + } + + fn stat_size(stat: &libc::stat) -> u64 { + if stat.st_size >= 0 { + stat.st_size as u64 + } else { + 0 + } + } + + fn stat_matches(info: &std::fs::Metadata, stat: &libc::stat) -> bool { + info.dev() == stat.st_dev as u64 && info.ino() == stat.st_ino + } + + fn dir_open_flags() -> libc::c_int { + libc::O_RDONLY | libc::O_CLOEXEC | libc::O_DIRECTORY | libc::O_NOFOLLOW + } + + fn file_open_flags() -> libc::c_int { + libc::O_RDONLY | libc::O_CLOEXEC | libc::O_NOFOLLOW + } + + fn cstring_from_path(path: &Path) -> Result { + CString::new(path.as_os_str().as_bytes()) + .with_context(|| format!("路径包含 NUL 字节: {}", path.display())) + } + + fn cstring_from_os_str(value: &OsStr, path: &Path) -> Result { + CString::new(value.as_bytes()) + .with_context(|| format!("路径包含 NUL 字节: {}", path.display())) + } }