most done.

2025-11-21 00:14:11 +08:00
parent 6ab9c38955
commit bd0c755370
12 changed files with 877 additions and 28 deletions
@@ -0,0 +1,26 @@
+use std::path::PathBuf;
+use std::{env, fs};
+
+use anyhow::{Context, Result};
+use clap::Parser;
+
+/// 负责解析命令行参数
+#[derive(Debug, Parser)]
+// 这个宏定义命令行工具的信息，比如作者、版本和描述，用于帮助信息输出
+#[command(author, version, about = "汇总文件或文件夹的元信息")]
+pub struct Cli {
+    /// 需要扫描的目标路径，默认当前工作目录
+    pub path: Option<PathBuf>,
+}
+
+impl Cli {
+    pub fn resolve_path(&self) -> Result<PathBuf> {
+        let candidate = match &self.path {
+            Some(p) => p.clone(),
+            None => env::current_dir().context("无法获取当前工作目录")?,
+        };
+
+        fs::canonicalize(&candidate)
+            .with_context(|| format!("无法解析路径: {}", candidate.display()))
+    }
+}
@@ -0,0 +1,22 @@
+pub const DEFAULT_BUFFER_SIZE: usize = 4 * 1024 * 1024;
+pub const HEAD_115_BYTES: usize = 128 * 1024;
+pub const HEAD_BAIDU_BYTES: usize = 256 * 1024;
+pub const META_VERSION: &str = "2025-11-21";
+
+pub const SKIP_DIR_NAMES: &[&str] = &[
+    "@Recently-Snapshot",
+    "@Recycle",
+    ".@__thumb",
+    "@Transcode",
+    "meta",
+    "$RECYCLE.BIN",
+];
+
+pub const SKIP_FILE_NAMES: &[&str] = &[
+    ".DS_Store",
+    "licsber-bak.json",
+    "meta.json",
+    "meta-old.json",
+    "Thumbs.db",
+    "desktop.ini",
+];
@@ -0,0 +1,43 @@
+use md5::{digest::Digest, Md5};
+use sha1::Sha1;
+
+use crate::utils::hex_upper;
+
+pub struct HeadChunk {
+    buffer: Vec<u8>,
+    filled: usize,
+}
+
+impl HeadChunk {
+    pub fn new(size: usize) -> Self {
+        Self {
+            buffer: vec![0u8; size],
+            filled: 0,
+        }
+    }
+
+    pub fn feed(&mut self, data: &[u8]) {
+        if self.filled >= self.buffer.len() {
+            return;
+        }
+        let take = (self.buffer.len() - self.filled).min(data.len());
+        self.buffer[self.filled..self.filled + take].copy_from_slice(&data[..take]);
+        self.filled += take;
+    }
+
+    pub fn as_slice(&self) -> &[u8] {
+        &self.buffer
+    }
+}
+
+pub fn calc_head_115(chunk: &[u8]) -> String {
+    let mut hasher = Sha1::default();
+    hasher.update(chunk);
+    hex_upper(hasher.finalize())
+}
+
+pub fn calc_head_baidu(chunk: &[u8]) -> String {
+    let mut hasher = Md5::default();
+    hasher.update(chunk);
+    hex_upper(hasher.finalize())
+}
@@ -1,3 +1,97 @@
-fn main() {
-    println!("Hello, world!");
+mod cli;
+mod constants;
+mod head_hash;
+mod meta;
+mod utils;
+
+use std::fs::{self, File};
+use std::io::Write;
+use std::path::{Path, PathBuf};
+use std::time::Instant;
+
+use anyhow::{Context, Result};
+use clap::Parser;
+use meta::{DirSnapshot, FileMeta};
+
+fn main() -> Result<()> {
+    let started = Instant::now();
+    let cli = cli::Cli::parse();
+    let target = cli.resolve_path()?;
+    println!("目标: {}", target.display());
+
+    if target.is_dir() {
+        process_dir(&target)?;
+    } else {
+        process_file(&target)?;
+    }
+
+    println!("耗时: {:?}", started.elapsed());
+    Ok(())
+}
+
+fn process_file(path: &Path) -> Result<()> {
+    let meta = FileMeta::from_path(path)?;
+    let meta_dir = path
+        .parent()
+        .map(Path::to_path_buf)
+        .unwrap_or_else(|| PathBuf::from("."))
+        .join("meta");
+    fs::create_dir_all(&meta_dir)
+        .with_context(|| format!("无法创建目录: {}", meta_dir.display()))?;
+
+    let save_path = meta_dir.join(format!("{}.json", meta.basename));
+    if !save_path.exists() {
+        let json = meta.to_pretty_json()?;
+        println!("{}", json);
+        fs::write(&save_path, json)?;
+        return Ok(());
+    }
+
+    let existing = File::open(&save_path)
+        .with_context(|| format!("无法读取历史元数据: {}", save_path.display()))?;
+    let old_meta = FileMeta::from_reader(existing)?;
+    if meta.matches(&old_meta) {
+        println!("校验通过.");
+    } else {
+        println!("校验失败!");
+        println!("现校验文件:");
+        println!("{}", meta.to_pretty_json()?);
+        println!("原校验文件:");
+        println!("{}", old_meta.to_pretty_json()?);
+    }
+
+    Ok(())
+}
+
+fn process_dir(path: &Path) -> Result<()> {
+    let save_path = path.join("meta.json");
+    let old_path = path.join("meta-old.json");
+    let has_old = save_path.exists();
+
+    if has_old {
+        if old_path.exists() {
+            fs::remove_file(&old_path)?;
+        }
+        fs::rename(&save_path, &old_path)
+            .with_context(|| format!("无法备份旧文件: {}", save_path.display()))?;
+    }
+
+    let snapshot = DirSnapshot::build_root(path)?;
+    let json = serde_json::to_string_pretty(&snapshot)?;
+    let mut file =
+        File::create(&save_path).with_context(|| format!("无法写入: {}", save_path.display()))?;
+    file.write_all(json.as_bytes())?;
+
+    if has_old {
+        let old_meta = FileMeta::from_path(&old_path)?;
+        let new_meta = FileMeta::from_path(&save_path)?;
+        if old_meta.matches(&new_meta) {
+            println!("校验通过.");
+            fs::remove_file(&old_path)?;
+        } else {
+            println!("校验失败!");
+        }
+    }
+
+    Ok(())
 }
@@ -0,0 +1,123 @@
+use std::fs::{self, File};
+use std::io::Read;
+use std::path::Path;
+use std::time::{SystemTime, UNIX_EPOCH};
+
+use anyhow::{anyhow, Context, Result};
+use ed2k::digest::Digest;
+use ed2k::Ed2k;
+use md5::Md5;
+use serde::{Deserialize, Serialize};
+use sha1::Sha1;
+use sha2::Sha256;
+use xxhash_rust::xxh3::Xxh3;
+
+use crate::constants::{DEFAULT_BUFFER_SIZE, HEAD_115_BYTES, HEAD_BAIDU_BYTES};
+use crate::head_hash::{calc_head_115, calc_head_baidu, HeadChunk};
+use crate::utils::{basename, friendly_size, hex_upper};
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct FileMeta {
+    pub basename: String,
+    pub size: u64,
+    pub friendly_size: String,
+    pub mtime: i64,
+    pub head_115: String,
+    pub head_baidu: String,
+    pub ed2k: String,
+    pub md5: String,
+    pub sha1: String,
+    pub sha256: String,
+    pub xxh128: String,
+}
+
+impl FileMeta {
+    pub fn from_path(path: &Path) -> Result<Self> {
+        let info =
+            fs::metadata(path).with_context(|| format!("无法读取文件信息: {}", path.display()))?;
+        if !info.is_file() {
+            return Err(anyhow!("{} 不是文件", path.display()));
+        }
+
+        let basename_str = basename(
+            path.file_name()
+                .ok_or_else(|| anyhow!("{} 缺少文件名", path.display()))?,
+        );
+        let size = info.len();
+        let friendly = friendly_size(size);
+        let mtime = info
+            .modified()
+            .unwrap_or(SystemTime::UNIX_EPOCH)
+            .duration_since(UNIX_EPOCH)
+            .map(|d| d.as_secs() as i64)
+            .unwrap_or(0);
+
+        let mut file =
+            File::open(path).with_context(|| format!("无法打开文件: {}", path.display()))?;
+
+        let mut buffer = vec![0u8; DEFAULT_BUFFER_SIZE];
+        let mut md5_hasher = Md5::new();
+        let mut sha1_hasher = Sha1::new();
+        let mut sha256_hasher = Sha256::new();
+        let mut xxh_hasher = Xxh3::new();
+        let mut ed2k_hasher = Ed2k::new();
+        let mut head115 = HeadChunk::new(HEAD_115_BYTES);
+        let mut head_baidu = HeadChunk::new(HEAD_BAIDU_BYTES);
+
+        loop {
+            let read_len = file.read(&mut buffer)?;
+            if read_len == 0 {
+                break;
+            }
+            let chunk = &buffer[..read_len];
+            md5_hasher.update(chunk);
+            sha1_hasher.update(chunk);
+            sha256_hasher.update(chunk);
+            xxh_hasher.update(chunk);
+            ed2k_hasher.update(chunk);
+
+            head115.feed(chunk);
+            head_baidu.feed(chunk);
+        }
+
+        let head_115 = calc_head_115(head115.as_slice());
+        let head_baidu = calc_head_baidu(head_baidu.as_slice());
+
+        let md5_hex = hex_upper(md5_hasher.finalize());
+        let sha1_hex = hex_upper(sha1_hasher.finalize());
+        let sha256_hex = hex_upper(sha256_hasher.finalize());
+        let xxh_hex = hex_upper(xxh_hasher.digest128().to_be_bytes());
+        let ed2k_hex = hex_upper(ed2k_hasher.finalize());
+
+        Ok(Self {
+            basename: basename_str,
+            size,
+            friendly_size: friendly,
+            mtime,
+            head_115,
+            head_baidu,
+            ed2k: ed2k_hex,
+            md5: md5_hex,
+            sha1: sha1_hex,
+            sha256: sha256_hex,
+            xxh128: xxh_hex,
+        })
+    }
+
+    pub fn from_reader<R: Read>(reader: R) -> Result<Self> {
+        Ok(serde_json::from_reader(reader)?)
+    }
+
+    pub fn to_pretty_json(&self) -> Result<String> {
+        Ok(serde_json::to_string_pretty(self)?)
+    }
+
+    pub fn matches(&self, other: &Self) -> bool {
+        self.size == other.size
+            && self.ed2k == other.ed2k
+            && self.md5 == other.md5
+            && self.sha1 == other.sha1
+            && self.sha256 == other.sha256
+            && self.xxh128 == other.xxh128
+    }
+}
@@ -0,0 +1,5 @@
+mod file;
+mod tree;
+
+pub use file::FileMeta;
+pub use tree::DirSnapshot;
@@ -0,0 +1,76 @@
+use std::fs;
+use std::path::Path;
+
+use anyhow::{Context, Result};
+use serde::{Deserialize, Serialize};
+
+use super::file::FileMeta;
+use crate::constants::META_VERSION;
+use crate::utils::{basename, should_skip_dir, should_skip_file};
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct DirSnapshot {
+    pub dir_name: String,
+    pub dirs: Vec<DirSnapshot>,
+    pub files: Vec<FileMeta>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub v: Option<String>,
+}
+
+impl DirSnapshot {
+    pub fn build_root(path: &Path) -> Result<Self> {
+        let mut node = Self::build_node(path)?;
+        node.v = Some(META_VERSION.to_string());
+        Ok(node)
+    }
+
+    fn build_node(path: &Path) -> Result<Self> {
+        let dir_name = path
+            .file_name()
+            .map(basename)
+            .unwrap_or_else(|| path.to_string_lossy().to_string());
+
+        let mut dirs = Vec::new();
+        let mut files = Vec::new();
+
+        let mut entries = fs::read_dir(path)
+            .with_context(|| format!("无法遍历目录: {}", path.display()))?
+            .collect::<Result<Vec<_>, _>>()
+            .with_context(|| format!("读取目录失败: {}", path.display()))?;
+
+        entries.sort_by(|a, b| a.file_name().cmp(&b.file_name()));
+
+        for entry in entries {
+            let file_name = entry.file_name();
+            let name = file_name.to_string_lossy().to_string();
+            let full_path = entry.path();
+            let file_type = entry
+                .file_type()
+                .with_context(|| format!("无法读取类型: {}", full_path.display()))?;
+
+            if file_type.is_dir() {
+                if should_skip_dir(&name) {
+                    continue;
+                }
+                println!("目录: {}", full_path.display());
+                dirs.push(Self::build_node(&full_path)?);
+                continue;
+            }
+
+            if should_skip_file(&name) {
+                continue;
+            }
+
+            let meta = FileMeta::from_path(&full_path)?;
+            println!("文件: {} {}", meta.friendly_size, full_path.display());
+            files.push(meta);
+        }
+
+        Ok(Self {
+            dir_name,
+            dirs,
+            files,
+            v: None,
+        })
+    }
+}
@@ -0,0 +1,68 @@
+use std::ffi::OsStr;
+
+use crate::constants::{SKIP_DIR_NAMES, SKIP_FILE_NAMES};
+
+pub fn friendly_size(size: u64) -> String {
+    const UNITS: [(&str, u64); 5] = [
+        ("B", 1),
+        ("KB", 1024),
+        ("MB", 1024 * 1024),
+        ("GB", 1024 * 1024 * 1024),
+        ("TB", 1024 * 1024 * 1024 * 1024),
+    ];
+
+    if size == 0 {
+        return "0B".to_string();
+    }
+
+    let mut value = size as f64;
+    let mut unit = "B";
+    for (label, threshold) in UNITS.iter().rev() {
+        if size >= *threshold {
+            value = size as f64 / *threshold as f64;
+            unit = label;
+            break;
+        }
+    }
+
+    format!("{:.2}{}", value, unit)
+}
+
+pub fn basename(path: &OsStr) -> String {
+    path.to_string_lossy().to_string()
+}
+
+pub fn should_skip_dir(name: &str) -> bool {
+    SKIP_DIR_NAMES
+        .iter()
+        .any(|item| item.eq_ignore_ascii_case(name))
+}
+
+pub fn should_skip_file(name: &str) -> bool {
+    SKIP_FILE_NAMES
+        .iter()
+        .any(|item| item.eq_ignore_ascii_case(name))
+        || name.starts_with("._")
+        || name.starts_with("Thumb_")
+}
+
+pub fn hex_upper(bytes: impl AsRef<[u8]>) -> String {
+    bytes
+        .as_ref()
+        .iter()
+        .map(|b| format!("{:02X}", b))
+        .collect::<String>()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::friendly_size;
+
+    #[test]
+    fn friendly_size_formats_units() {
+        assert_eq!(friendly_size(0), "0B");
+        assert_eq!(friendly_size(1), "1.00B");
+        assert_eq!(friendly_size(1024), "1.00KB");
+        assert_eq!(friendly_size(1024 * 1024), "1.00MB");
+    }
+}