From 30117dde4b93d0c9537619b3ef2163dc9ca17d0c Mon Sep 17 00:00:00 2001 From: askiiart Date: Tue, 29 Oct 2024 22:34:44 -0500 Subject: [PATCH] it's doneeeeeee (aside from adding stuff like clap) --- README.md | 18 +++++++++++ prepare.sh | 15 +++++++-- src/main.rs | 91 +++++++++++++++++++++++++++++++++++------------------ 3 files changed, 92 insertions(+), 32 deletions(-) mode change 100644 => 100755 prepare.sh diff --git a/README.md b/README.md index c542c69..c980818 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,21 @@ # Read-only benchmark This tests the latency, sequential read, and random read speeds of a variety of data. + +## Running + +The program will automatically generate all data used, except for the regular polygon data. Once the data is generated, stop the program with Ctrl+C, then run `prepare.sh` to archive and mount the data using [DwarFS](https://github.com/mhx/dwarfs), `tar`, and [`fuse-archive`](https://github.com/google/fuse-archive). + +It will output its data at `./data/benchmark-data.csv` and `./data/bulk.csv` in these formats: + +`benchmark-data.csv`: + +```txt +filesystem dir,file path,sequential read time,sequential read latency,random read time,random read latency +``` + +`bulk.csv`: + +```txt +filesystem dir,folder path,test type,time1,time2,time3,[...] +``` diff --git a/prepare.sh b/prepare.sh old mode 100644 new mode 100755 index dee5bcc..23e0511 --- a/prepare.sh +++ b/prepare.sh @@ -17,11 +17,11 @@ time dwarfs ./data/ext-workdir/dwarfs ./data/mountpoints/dwarfs/ #mkdir ./data/mountpoints/fuse-archive-tar-gz/ #time fuse-archive ./data/ext-workdir/fuse-archive.tar.gz ./data/mountpoints/fuse-archive-tar-gz/ -cd ./data/datasets/ if [ ! -f ./data/ext-workdir/fuse-archive.tar ]; then + cd ./data/datasets/ time tar -cf ../ext-workdir/fuse-archive.tar . + cd - fi -cd - mkdir ./data/mountpoints/fuse-archive-tar/ time fuse-archive ./data/ext-workdir/fuse-archive.tar ./data/mountpoints/fuse-archive-tar/ @@ -33,3 +33,14 @@ time fuse-archive ./data/ext-workdir/fuse-archive.tar ./data/mountpoints/fuse-ar #cd - #mkdir ./data/mountpoints/fuse-archive-tar-zst/ #time fuse-archive ./data/ext-workdir/fuse-archive.tar.zst ./data/mountpoints/fuse-archive-tar-zst/ + +# btrfs-fuse is broken - ERROR: failed to scan device /dev/nvme0n1p3: -13 +device="" +#mkdir ./data/mountpoints/btrfs-fuse +#sudo mount $device ./data/mountpoints/btrfs-fuse +#sudo chmod -R 777 ./data/mountpoints/btrfs-fuse/ +#if [ ! -f ./data/mountpoints/btrfs-fuse/25G-null.bin ]; then +# cp -r ./data/datasets/* ./data/mountpoints/btrfs-fuse/ +# sudo umount ./data/mountpoints/btrfs-fuse/ +#fi +#btrfs-fuse $device ./data/mountpoints/btrfs-fuse diff --git a/src/main.rs b/src/main.rs index 2508185..e7a13c5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -53,7 +53,6 @@ fn _large_random_file_generation_helper(i: &u64, out: Arc Result { } fn prep_other_dirs() -> bool { - if !exists("data/ext-workdir").unwrap() { - create_dir_all("data/ext-workdir").unwrap(); - }; - - if !exists("data/benchmark-workdir").unwrap() { - create_dir_all("data/benchmark-workdir").unwrap(); - } - if !exists("data/mountpoints").unwrap() { create_dir_all("data/mountpoints").unwrap(); }; @@ -315,6 +306,21 @@ fn bulk_sequential_read_latency(path: String) -> Vec { return times; } +fn bulk_random_read_latency(path: String) -> Vec { + let mut rng = XorShiftRng::seed_from_u64(9198675309); + let mut data: [u8; 1] = [0u8; 1]; + let mut times: Vec = Vec::new(); + for i in 1..1025 { + let mut f: File = File::open(format!("{path}/{i}")).unwrap(); + let offset = rng.gen_range(0..1023); + let now = Instant::now(); + f.read_at(&mut data, offset).unwrap(); + let elapsed = now.elapsed(); + times.push(elapsed); + } + + return times; +} fn benchmark() { let mut recorder = csv::Writer::from_path("data/benchmark-data.csv").unwrap(); @@ -337,15 +343,14 @@ fn benchmark() { "100M-polygon.txt".to_string(), "kernel/linux-6.6.58.tar.xz".to_string(), ]; - - let bulk_files = vec!["small-files/null", "small-files/random"]; + let bulk_files: Vec = vec![ + "small-files/null".to_string(), + "small-files/random".to_string(), + ]; for filename in single_files { - println!("=== {} ===", filename); - let path = format!("{fs}/{filename}"); - println!("{}", path); - //panic!("hi"); + println!("=== {} ===", path.clone()); let seq_read = format!("{:.5?}", sequential_read(path.clone())); println!("Sequential read (complete file read): {}", seq_read.clone()); @@ -356,7 +361,11 @@ fn benchmark() { let rand_read = format!("{:.5?}", random_read(path.clone())); println!("Random read (1024x 1 MiB): {}", rand_read); - let rand_latency = format!("{:.5?}", random_read_latency(path.clone())); + let mut rand_latency: String = "0s".to_string(); + if fs != "data/mountpoints/fuse-archive-tar" { + rand_latency = format!("{:.5?}", random_read_latency(path.clone())); + } + println!("Random latency (1024x 1 byte read): {}", rand_latency); let data: Vec = vec![ @@ -368,18 +377,40 @@ fn benchmark() { rand_latency, ]; recorder.write_record(data).unwrap(); - println!(); } + // bulk files for folder in bulk_files { - bulk_recorder.write_record(_vec_duration_to_string(bulk_sequential_read(folder.to_string()))).unwrap(); - bulk_recorder.write_record(_vec_duration_to_string(bulk_sequential_read_latency(folder.to_string()))).unwrap(); - //bulk_recorder.write_record(_vec_duration_to_string(bulk_random_read(folder.to_string()))).unwrap(); - //bulk_recorder.write_record(_vec_duration_to_string(bulk_random_read_latency(folder.to_string()))).unwrap(); - } + let cloned = fs.clone(); + let path = format!("{cloned}/{folder}"); + println!("[bulk] Testing {}", path); + let dataset_info: Vec = vec![fs.clone(), folder]; - println!("=== === === === === === === === === === ===\n") + let mut times = _vec_duration_to_string(bulk_sequential_read(path.clone())); + let mut tmp = Vec::new(); + dataset_info.clone_into(&mut tmp); + tmp.push("bulk_sequential_read".to_string()); + tmp.append(&mut times); + bulk_recorder.write_record(tmp).unwrap(); + + times = _vec_duration_to_string(bulk_sequential_read_latency(path.clone())); + tmp = Vec::new(); + dataset_info.clone_into(&mut tmp); + tmp.push("bulk_sequential_read_latency".to_string()); + tmp.append(&mut times); + bulk_recorder.write_record(tmp).unwrap(); + + // not enough data in these files to warrant bulk_random_read() + //bulk_recorder.write_record(_vec_duration_to_string(bulk_random_read(path.clone()))).unwrap(); + times = _vec_duration_to_string(bulk_random_read_latency(path.clone())); + tmp = Vec::new(); + dataset_info.clone_into(&mut tmp); + tmp.push("bulk_random_read_latency".to_string()); + tmp.append(&mut times); + bulk_recorder.write_record(tmp).unwrap(); + } + println!("\n=== === === === === === === === === === ===\n") } } @@ -389,11 +420,11 @@ fn main() { benchmark(); } -fn _vec_duration_to_string(vector_committing_crimes_with_both_direction_and_magnitude: Vec) -> Vec { - return vector_committing_crimes_with_both_direction_and_magnitude.iter() - .map(|item| { - format!("{:.5?}", item) - }) - .collect::>(); - +fn _vec_duration_to_string( + vector_committing_crimes_with_both_direction_and_magnitude: Vec, +) -> Vec { + return vector_committing_crimes_with_both_direction_and_magnitude + .iter() + .map(|item| format!("{:.5?}", item)) + .collect::>(); }