anonymiser/
main.rs

1//!## Anonymiser script
2//!
3//! This is a script to be used to anonymise files on the local disk.
4//!
5//! ## Install
6//! There is an [install.sh](./install.sh) script which will download the latest binary from GitHub and add it to `$HOME/.anonymiser/bin`.
7//! ```bash
8//! curl https://raw.githubusercontent.com/nationalarchives/dr2-court-document-package-anonymiser/main/install.sh | sh
9//! ```
10//! You will need to add `$HOME/.anonymiser/bin` to your $PATH.
11//!
12//! ## Running
13//! ```bash
14//! anonymiser --input /path/to/input --output /path/to/output
15//! ```
16//!
17//! ## Running with docker
18//! ```bash
19//! docker run -v /path/to/input:/input -v /path/to/output:/output public.ecr.aws/u4s1g5v1/anonymiser
20//! ```
21//!
22//! The input path must only contain the tar.gz files you're converting.
23//!
24use anonymiser_lib::*;
25use clap::Parser;
26use log::{self, LevelFilter};
27use simple_logger::SimpleLogger;
28use std::{path::PathBuf, process::exit};
29
30/// # The input files and output directory
31struct Files {
32    dir_output: PathBuf,
33    files: Vec<PathBuf>,
34}
35
36/// # Process the input arguments
37///
38/// Returns the `Files` struct with a list of files in the input directory and the output directory as a `PathBuf` struct.
39fn files_from_input_arguments(opt: Opt) -> Files {
40    let dir_input: PathBuf = PathBuf::from(shellexpand::full(&opt.input).unwrap().to_string());
41    let dir_output: PathBuf = PathBuf::from(shellexpand::full(&opt.output).unwrap().to_string());
42    let files = files_in_input_dir(&dir_input).unwrap();
43    Files { dir_output, files }
44}
45
46/// # The entrypoint for the anonymiser script
47fn main() {
48    SimpleLogger::new()
49        .with_level(LevelFilter::Info)
50        .init()
51        .unwrap();
52    let files_from_input = files_from_input_arguments(Opt::parse());
53    for file in files_from_input.files {
54        match process_package(&files_from_input.dir_output, &file) {
55            Ok(_) => {
56                log::info!(
57                    "Processed {}",
58                    file.file_name().and_then(|name| name.to_str()).unwrap()
59                )
60            }
61            Err(err) => {
62                log::error!("Error: {:?}", err);
63                exit(1);
64            }
65        };
66    }
67}
68
69#[cfg(test)]
70mod test {
71    use crate::files_from_input_arguments;
72    use anonymiser_lib::Opt;
73    use assert_fs::TempDir;
74    use std::fs::write;
75    use std::path::{Path, PathBuf};
76
77    #[test]
78    fn test_files_can_be_retrieved_from_input_arguments() {
79        let input_dir = TempDir::new().unwrap();
80        let test_file_names = ["file1", "file2", "file3"];
81        let _ = test_file_names.map(|file_name| {
82            let file_path = input_dir.join(PathBuf::from(file_name));
83            write(file_path.clone(), "".as_bytes()).unwrap();
84            file_path
85        });
86        let input = input_dir.to_str().unwrap().to_string();
87        let output = TempDir::new().unwrap().to_str().unwrap().to_string();
88        let opt = Opt { input, output };
89        let files_result = files_from_input_arguments(opt);
90        let mut files = files_result.files;
91
92        fn get_file_name(file_path: &Path) -> &str {
93            file_path
94                .file_name()
95                .and_then(|file_name| file_name.to_str())
96                .unwrap()
97        }
98
99        let _ = &files.sort();
100
101        assert_eq!(files.len(), 3);
102        assert_eq!(get_file_name(&files[0]), "file1");
103        assert_eq!(get_file_name(&files[1]), "file2");
104        assert_eq!(get_file_name(&files[2]), "file3")
105    }
106}