1use clap::Parser;
5use docx_rs::*;
6use flate2::{read::GzDecoder, write::GzEncoder, Compression};
7use serde_json::{json, Value};
8use sha256::try_digest;
9
10use std::fs::{remove_file, DirEntry};
11use std::io::ErrorKind;
12use std::{fs, fs::File, io, io::Error, io::Read, path::Path, path::PathBuf};
13use tar::{Archive, Builder};
14
15#[derive(Parser)]
17#[clap(name = "anonymiser")]
18pub struct Opt {
19 #[clap(long, short, value_parser)]
21 pub input: String,
22
23 #[clap(long, short, value_parser)]
25 pub output: String,
26}
27
28pub fn process_package(dir_output: &PathBuf, file: &PathBuf) -> Result<PathBuf, Error> {
38 let tar_gz_file_name: String = file
39 .file_name()
40 .and_then(|name| name.to_os_string().into_string().ok())
41 .ok_or("Error getting the file name from the file")
42 .map_err(|e| Error::new(ErrorKind::InvalidInput, e))?;
43
44 let output_tar_gz_path: PathBuf =
45 Path::new(&dir_output).join(Path::new(&tar_gz_file_name.replace("TDR", "TST")));
46 let uncompressed_folder_input_path: &PathBuf = &file.with_extension("").with_extension("");
47 let input_batch_reference: String = uncompressed_folder_input_path
48 .file_name()
49 .and_then(|name| name.to_str().map(|name| name.replace("TRE-", "")))
50 .ok_or(Error::new(
51 ErrorKind::InvalidInput,
52 "Cannot get a batch reference from the file name",
53 ))?;
54 let output_batch_reference: &String = &input_batch_reference.replace("TDR", "TST");
55
56 let extracted_output_original_name: PathBuf =
57 dir_output.join(PathBuf::from(&input_batch_reference));
58 let extracted_output_path: PathBuf = dir_output.join(PathBuf::from(output_batch_reference));
59
60 let output_path_with_file = |file_name: &str| -> PathBuf {
61 let output_path = extracted_output_path.clone();
62 output_path.join(PathBuf::from(file_name))
63 };
64
65 fs::create_dir_all(extracted_output_path.clone())?;
66
67 decompress_file(file, dir_output)?;
68
69 let metadata_input_file_path: PathBuf =
70 output_path_with_file(format!("TRE-{input_batch_reference}-metadata.json").as_str());
71 let metadata_output_file_path: PathBuf =
72 output_path_with_file(format!("TRE-{output_batch_reference}-metadata.json").as_str());
73
74 if extracted_output_path.exists() {
75 fs::remove_dir_all(&extracted_output_path)?;
76 }
77 fs::rename(extracted_output_original_name, &extracted_output_path)?;
78 fs::rename(metadata_input_file_path, &metadata_output_file_path)?;
79
80 let mut metadata_json_value: Value = parse_metadata_json(&metadata_output_file_path)?;
81
82 let docx_checksum =
83 create_docx_with_checksum(&extracted_output_path, &mut metadata_json_value)?;
84
85 update_json_file(
86 &metadata_output_file_path,
87 docx_checksum,
88 &mut metadata_json_value,
89 )?;
90
91 if_present_delete(output_path_with_file(
92 format!("{input_batch_reference}.xml").as_str(),
93 ))?;
94 if_present_delete(output_path_with_file("parser.log"))?;
95
96 tar_folder(
97 &output_tar_gz_path,
98 &extracted_output_path,
99 output_batch_reference,
100 )?;
101
102 fs::remove_dir_all(&extracted_output_path)?;
103 Ok(output_tar_gz_path)
104}
105
106fn create_docx_with_checksum(
115 extracted_output_path: &Path,
116 metadata_json_value: &mut Value,
117) -> Result<String, Error> {
118 let docx_file_name: &str = metadata_json_value["parameters"]["TRE"]["payload"]["filename"]
119 .as_str()
120 .ok_or("'filename' is missing from the metadata json")
121 .map_err(|e| Error::new(ErrorKind::InvalidInput, e))?;
122
123 let judgment_name: &str = metadata_json_value["parameters"]["PARSER"]["name"]
124 .as_str()
125 .unwrap_or(docx_file_name);
126 let docx_path: PathBuf = extracted_output_path.join(PathBuf::from(docx_file_name));
127
128 let file: File = File::create(&docx_path)?;
129 Docx::new()
130 .add_paragraph(Paragraph::new().add_run(Run::new().add_text(judgment_name)))
131 .build()
132 .pack(file)?;
133
134 let docx_checksum: String = try_digest(&docx_path).unwrap();
135 Ok(docx_checksum)
136}
137
138fn if_present_delete(path: PathBuf) -> io::Result<()> {
140 if path.exists() {
141 remove_file(path)?
142 }
143 Ok(())
144}
145
146fn is_not_hidden(entry: &DirEntry) -> bool {
148 entry
149 .file_name()
150 .to_str()
151 .map(|file_name| !file_name.starts_with('.'))
152 .unwrap_or(false)
153}
154
155fn is_file(entry: &DirEntry) -> bool {
157 !entry.path().is_dir()
158}
159
160pub fn files_in_input_dir(directory_path: &PathBuf) -> Result<Vec<PathBuf>, Error> {
165 let path_list: Vec<PathBuf> = fs::read_dir(directory_path)
166 .unwrap()
167 .filter_map(|e| {
168 let entry: DirEntry = e.ok()?;
169 if is_file(&entry) && is_not_hidden(&entry) {
170 Some(entry.path())
171 } else {
172 None
173 }
174 })
175 .collect::<Vec<PathBuf>>();
176 Ok(path_list)
177}
178
179fn tar_folder(
183 tar_path: &PathBuf,
184 path_to_compress: &PathBuf,
185 folder_name: &String,
186) -> Result<(), Error> {
187 let tar_gz: File = File::create(tar_path)?;
188 let enc: GzEncoder<File> = GzEncoder::new(tar_gz, Compression::default());
189 let mut tar: Builder<GzEncoder<File>> = Builder::new(enc);
190 tar.append_dir_all(folder_name, path_to_compress)?;
191 Ok(())
192}
193
194fn update_json_file(
196 metadata_file_name: &PathBuf,
197 checksum: String,
198 json_value: &mut Value,
199) -> Result<(), Error> {
200 let tdr: &mut Value = &mut json_value["parameters"]["TDR"];
201 tdr["Contact-Email"] = json!("XXXXXXXXX");
202 tdr["Contact-Name"] = json!("XXXXXXXXX");
203 tdr["Document-Checksum-sha256"] = json!(checksum);
204 fs::write(metadata_file_name, json_value.to_string())
205}
206
207fn decompress_file(path_to_tar: &PathBuf, output_path: &PathBuf) -> Result<(), Error> {
209 let tar_gz: File = File::open(path_to_tar)?;
210 let tar: GzDecoder<File> = GzDecoder::new(tar_gz);
211 let mut archive: Archive<GzDecoder<File>> = Archive::new(tar);
212 archive.unpack(output_path)?;
213 Ok(())
214}
215
216fn parse_metadata_json(metadata_file_path: &PathBuf) -> Result<Value, Error> {
218 let mut metadata_file: File = File::open(metadata_file_path)?;
219 let mut metadata_json_as_string: String = String::new();
220 metadata_file.read_to_string(&mut metadata_json_as_string)?;
221 Ok(serde_json::from_str(&metadata_json_as_string)?)
222}
223
224#[cfg(test)]
225mod tests {
226 use super::*;
227 use crate::create_docx_with_checksum;
228 use assert_fs::TempDir;
229 use std::fs::{read_dir, read_to_string};
230 use testlib::create_package;
231
232 #[test]
233 fn test_create_docx_with_checksum() {
234 let output_path = TempDir::new().unwrap();
235 let mut json_value = json!({
236 "parameters": {
237 "PARSER": {
238 "name" : "test-name"
239 },
240 "TRE": {
241 "payload": {
242 "filename": "test-file-name.docx"
243 }
244 }
245 }
246 });
247 let docx_checksum =
248 create_docx_with_checksum(&output_path.to_owned(), &mut json_value).unwrap();
249 let output_files = read_dir(&output_path.to_owned()).unwrap();
250 let filename = &output_files.last().unwrap().unwrap().file_name();
251
252 assert_eq!(
253 filename.to_str().unwrap().to_string(),
254 "test-file-name.docx"
255 );
256 assert_eq!(
257 docx_checksum,
258 "a951e0d7f11d9d2fa8c9508ee4b25944bb5810364089fc33221b1ec038eefd37"
259 )
260 }
261
262 #[test]
263 fn test_create_docx_with_checksum_missing_metadata_filename() {
264 let output_path = TempDir::new().unwrap();
265 let mut json_value = json!({
266 "parameters": {
267 "PARSER": {
268 "name" : "test-name"
269 }
270 }
271 });
272 let err = create_docx_with_checksum(&output_path.to_owned(), &mut json_value).unwrap_err();
273 assert_eq!(
274 err.to_string(),
275 "'filename' is missing from the metadata json"
276 )
277 }
278
279 #[test]
280 fn test_parse_metadata_json_parses_data_into_value() {
281 let output_dir = TempDir::new().unwrap();
282 let metadata_path = &output_dir.join(PathBuf::from("metadata.json"));
283 fs::write(&metadata_path, r#"{"a": "b"}"#.as_bytes()).unwrap();
284 let json = parse_metadata_json(&metadata_path).unwrap();
285 assert_eq!(&json["a"], "b")
286 }
287
288 #[test]
289 fn test_decompress_file() {
290 let input_dir = TempDir::new().unwrap();
291 let output_dir = TempDir::new().unwrap();
292 let tar_path = create_package(&input_dir, "{}", None);
293 decompress_file(&tar_path, &output_dir.to_owned()).unwrap();
294 assert!(output_dir
295 .join(PathBuf::from("TDR-2023/test.docx"))
296 .exists());
297 assert!(output_dir
298 .join(PathBuf::from("TDR-2023/TRE-TDR-2023-metadata.json"))
299 .exists());
300 }
301
302 #[test]
303 fn test_update_json_file() {
304 let output_dir = TempDir::new().unwrap();
305 let metadata_path = &output_dir.join(PathBuf::from("metadata.json"));
306 let mut json_value = json!({
307 "parameters": {
308 "TDR": {
309 "Contact-Email" : "test-email",
310 "Contact-Email2": "test-email-2",
311 "TDR-Contact-Name": "tdr-contact-name",
312 "Contact-Name" : "test-name",
313 "Document-Checksum-sha256": "test-checksum"
314 }
315 }
316 });
317 update_json_file(&metadata_path, "abcde".to_owned(), &mut json_value).unwrap();
318 let metadata_json_string = read_to_string(&metadata_path).unwrap();
319 let expected_json = r#"{"parameters":{"TDR":{"Contact-Email":"XXXXXXXXX","Contact-Email2":"test-email-2","Contact-Name":"XXXXXXXXX","Document-Checksum-sha256":"abcde","TDR-Contact-Name":"tdr-contact-name"}}}"#;
320 assert_eq!(metadata_json_string, expected_json);
321 }
322
323 #[test]
324 fn test_tar_folder_creates_a_new_tar() {
325 let tar_dir = TempDir::new().unwrap();
326 let output_dir = TempDir::new().unwrap();
327 let folder_name: String = String::from("test_name");
328 let tar_file_path = tar_dir.join("test.tar.gz");
329 tar_folder(&tar_file_path, &output_dir.to_owned(), &folder_name).unwrap();
330
331 assert!(tar_file_path.exists());
332 }
333}