diff --git a/Cargo.lock b/Cargo.lock index 3e5145d..65e9428 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,9 +4,9 @@ version = 3 [[package]] name = "anstream" -version = "0.6.17" +version = "0.6.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23a1e53f0f5d86382dafe1cf314783b2044280f406e7e1506368220ad11b1338" +checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" dependencies = [ "anstyle", "anstyle-parse", @@ -19,9 +19,9 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.9" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8365de52b16c035ff4fcafe0092ba9390540e3e352870ac09933bebcaa2c8c56" +checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" [[package]] name = "anstyle-parse" @@ -59,9 +59,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "cc" -version = "1.1.31" +version = "1.1.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2e7962b54006dcfcc61cb72735f4d89bb97061dd6a7ed882ec6b8ee53714c6f" +checksum = "0f57c4b4da2a9d619dd035f27316d7a426305b75be93d09e92f2b9229c34feaf" dependencies = [ "shlex", ] @@ -171,7 +171,7 @@ dependencies = [ [[package]] name = "disk-read-benchmark" -version = "0.1.0" +version = "0.2.0" dependencies = [ "clap", "csv", @@ -382,9 +382,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "syn" -version = "2.0.85" +version = "2.0.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5023162dfcd14ef8f32034d8bcd4cc5ddc61ef7a247c024a33e24e1f24d21b56" +checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index d793011..ed4f949 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "disk-read-benchmark" -version = "0.1.0" +version = "0.2.0" edition = "2021" [dependencies] diff --git a/src/benchmarks.rs b/src/benchmarks.rs new file mode 100644 index 0000000..13ad5e5 --- /dev/null +++ b/src/benchmarks.rs @@ -0,0 +1,214 @@ +use rand::{self, Rng, SeedableRng}; +use rand_xorshift::XorShiftRng; +use std::io::Read; +use std::time::{Duration, Instant}; +use std::{fs::File, os::unix::fs::FileExt}; + + +/* + ================================================================= + ==== ==== + ==== ↓ BENCHMARKS ↓ ==== + ==== ==== + ================================================================= +*/ + +pub fn sequential_read(path: String) -> Duration { + let mut f: File = File::open(path).unwrap(); + let size = f.metadata().unwrap().len(); + + let mut data: [u8; 1310720] = [0u8; 1310720]; + // benchmarking/elapsed: https://stackoverflow.com/a/40953863 + let now = Instant::now(); + for _ in 0..(size / 1310720) { + f.read(&mut data).unwrap(); + } + let elapsed = now.elapsed(); + return elapsed; +} + +/// Reads 1 byte from the start of file +pub fn sequential_read_latency(path: String) -> Duration { + let mut f: File = File::open(path).unwrap(); + let mut data: [u8; 1] = [0u8; 1]; + let now = Instant::now(); + f.read(&mut data).unwrap(); + let elapsed = now.elapsed(); + return elapsed; +} + +/// Reads 1 GiB from the file at `path` in random 1 MiB chunks +pub fn random_read(path: String) -> Duration { + let mut rng = XorShiftRng::seed_from_u64(9198675309); + let f: File = File::open(path).unwrap(); + let size = f.metadata().unwrap().len(); + + let mut data: [u8; 1048576] = [0u8; 1048576]; + let now = Instant::now(); + for _ in 0..1024 { + let offset = rng.gen_range(0..((size - 1048576) / 1048576)); + f.read_at(&mut data, offset).unwrap(); + } + let elapsed = now.elapsed(); + return elapsed; +} + +/// Reads 1 random byte from the file at `path` 1024 times +pub fn random_read_latency(path: String) -> Duration { + let mut rng = XorShiftRng::seed_from_u64(9198675309); + let f: File = File::open(path).unwrap(); + let size = f.metadata().unwrap().len(); + let mut data: [u8; 1] = [0u8; 1]; + let now = Instant::now(); + for _ in 0..1024 { + let offset = rng.gen_range(0..(size - 1)); + f.read_at(&mut data, offset).unwrap(); + } + let elapsed = now.elapsed(); + return elapsed; +} + +pub fn bulk_sequential_read(path: String) -> Vec { + let mut data: [u8; 1024] = [0u8; 1024]; + let mut times: Vec = Vec::new(); + for i in 1..1025 { + let mut f: File = File::open(format!("{path}/{i}")).unwrap(); + let now = Instant::now(); + f.read(&mut data).unwrap(); + let elapsed = now.elapsed(); + times.push(elapsed); + } + + return times; +} + +pub fn bulk_sequential_read_latency(path: String) -> Vec { + let mut data: [u8; 1] = [0u8; 1]; + let mut times: Vec = Vec::new(); + for i in 1..1025 { + let now = Instant::now(); + let mut f: File = File::open(format!("{path}/{i}")).unwrap(); + f.read(&mut data).unwrap(); + let elapsed = now.elapsed(); + times.push(elapsed); + } + + return times; +} + +pub fn bulk_random_read_latency(path: String) -> Vec { + let mut rng = XorShiftRng::seed_from_u64(9198675309); + let mut data: [u8; 1] = [0u8; 1]; + let mut times: Vec = Vec::new(); + for i in 1..1025 { + let f: File = File::open(format!("{path}/{i}")).unwrap(); + let offset = rng.gen_range(0..1023); + let now = Instant::now(); + f.read_at(&mut data, offset).unwrap(); + let elapsed = now.elapsed(); + times.push(elapsed); + } + + return times; +} + +pub fn benchmark() { + let mut recorder = csv::Writer::from_path("data/benchmark-data.csv").unwrap(); + let mut bulk_recorder = csv::Writer::from_path("data/bulk.csv").unwrap(); + let mountpoint_dir = "data/mountpoints"; + let mut filesystems = std::fs::read_dir(mountpoint_dir) + .unwrap() + .map(|item| { + let tmp = item.unwrap().file_name().into_string().unwrap(); + format!("{mountpoint_dir}/{tmp}") + }) + .collect::>(); + + filesystems.push("data/datasets".to_string()); + + for fs in filesystems { + let single_files = vec![ + "25G-null.bin".to_string(), + "25G-random.bin".to_string(), + "100M-polygon.txt".to_string(), + "kernel/linux-6.6.58.tar.xz".to_string(), + ]; + let bulk_files: Vec = vec![ + "small-files/null".to_string(), + "small-files/random".to_string(), + ]; + + for filename in single_files { + let path = format!("{fs}/{filename}"); + println!("=== {} ===", path.clone()); + + let seq_read = format!("{:.5?}", sequential_read(path.clone())); + println!("Sequential read (complete file read): {}", seq_read.clone()); + + let seq_latency = format!("{:.5?}", sequential_read_latency(path.clone())); + println!("Sequential latency (1 byte read): {}", seq_latency); + + let rand_read = format!("{:.5?}", random_read(path.clone())); + println!("Random read (1024x 1 MiB): {}", rand_read); + + let mut rand_latency: String = "0s".to_string(); + if fs != "data/mountpoints/fuse-archive-tar" { + rand_latency = format!("{:.5?}", random_read_latency(path.clone())); + } + + println!("Random latency (1024x 1 byte read): {}", rand_latency); + + let data: Vec = vec![ + fs.clone(), + filename, + seq_read, + seq_latency, + rand_read, + rand_latency, + ]; + recorder.write_record(data).unwrap(); + println!(); + } + + // bulk files + for folder in bulk_files { + let cloned = fs.clone(); + let path = format!("{cloned}/{folder}"); + println!("[bulk] Testing {}", path); + let dataset_info: Vec = vec![fs.clone(), folder]; + + let mut times = _vec_duration_to_string(bulk_sequential_read(path.clone())); + let mut tmp = Vec::new(); + dataset_info.clone_into(&mut tmp); + tmp.push("bulk_sequential_read".to_string()); + tmp.append(&mut times); + bulk_recorder.write_record(tmp).unwrap(); + + times = _vec_duration_to_string(bulk_sequential_read_latency(path.clone())); + tmp = Vec::new(); + dataset_info.clone_into(&mut tmp); + tmp.push("bulk_sequential_read_latency".to_string()); + tmp.append(&mut times); + bulk_recorder.write_record(tmp).unwrap(); + + // not enough data in these files to warrant bulk_random_read() + //bulk_recorder.write_record(_vec_duration_to_string(bulk_random_read(path.clone()))).unwrap(); + times = _vec_duration_to_string(bulk_random_read_latency(path.clone())); + tmp = Vec::new(); + dataset_info.clone_into(&mut tmp); + tmp.push("bulk_random_read_latency".to_string()); + tmp.append(&mut times); + bulk_recorder.write_record(tmp).unwrap(); + } + println!("\n=== === === === === === === === === === ===\n") + } +} + +pub fn _vec_duration_to_string( + vector_committing_crimes_with_both_direction_and_magnitude: Vec, +) -> Vec { + return vector_committing_crimes_with_both_direction_and_magnitude + .iter() + .map(|item| format!("{:.5?}", item)) + .collect::>(); +} diff --git a/src/cli.rs b/src/cli.rs new file mode 100644 index 0000000..a259f92 --- /dev/null +++ b/src/cli.rs @@ -0,0 +1,20 @@ +use clap::{Parser, Subcommand}; + +#[derive(Parser)] +#[command(version, about, long_about = None)] +pub struct Cli { + #[command(subcommand)] + pub command: Commands, +} + +#[derive(Subcommand)] +pub enum Commands { + ///Grabs the datasets used for benchmarking + GrabData, + ///Runs the benchmark + Benchmark, + ///Prepares the directories so other programs can prepare their datasets + PrepDirs, + ///Runs it all + Run, +} diff --git a/src/dataset_gathering.rs b/src/dataset_gathering.rs new file mode 100644 index 0000000..9d250d6 --- /dev/null +++ b/src/dataset_gathering.rs @@ -0,0 +1,213 @@ +use curl::easy::Easy as easy_curl; +use rand::{self, RngCore, SeedableRng}; +use rand_xorshift::XorShiftRng; +use std::{ + env::current_dir, + fs::{create_dir_all, exists, remove_dir_all, remove_file, File}, + io::{Error, Write}, + os::unix::fs::FileExt, + process::Command, + sync::{Arc, Mutex}, + thread::{self, JoinHandle}, +}; + +/* +=================== + ==== ==== + ==== ↓ DATASET GATHERING ↓ ==== + ==== ==== + ================================================================= +*/ +pub fn large_random_file_generation(path: String) { + // https://stackoverflow.com/a/65235966 + let out = Arc::new(Mutex::new(File::create(path))); + // NOTE: update this both here and in the helper (_large_random_file_generation_helper()) + let num_threads: u64 = 12; + let mut threads: Vec> = Vec::new(); + for i in 0..num_threads { + let out = Arc::clone(&out); + + let thread = thread::spawn(move || { + _large_random_file_generation_helper(&i, out); + }); + + threads.push(thread); + } + + for t in threads { + t.join().unwrap(); + } +} + +pub fn _large_random_file_generation_helper(i: &u64, out: Arc>>) { + let mut rng = XorShiftRng::seed_from_u64(2484345508); + // NOTE: update this both here and in `large_random_file_generation()` + let num_threads = 12; + let mut data = [0u8; 1310720]; + let block_size = 1310720; + + // enter desired size in bytes, must be a multiple of 655360 + // this is not a typo, the extra zero after 65536is for the threads + // 26843545600 = 25 GiB + let blocks_per_thread: u64 = 26843545600 / (block_size * num_threads); + for u in (i * blocks_per_thread)..((i + 1) * blocks_per_thread) { + rng.fill_bytes(&mut data); + + //let offset: u64 = (i * blocks_per_thread * 1310720) + (1310720 * u); + let offset: u64 = u * block_size; + let mut out = out.lock().unwrap(); + out.as_mut().unwrap().write_all_at(&data, offset).unwrap(); + } +} + +/* +pub fn single_threaded_large_random_file_generation(path: String) { + let mut out = File::create(path).unwrap(); + let mut rng = XorShiftRng::seed_from_u64(2484345508); + let mut data = [0u8; 65536]; + for _ in 0..409600 { + rng.fill_bytes(&mut data); + out.write_all(&data).unwrap(); + } +} +*/ + +pub fn small_random_files_generation(folder: String) { + let mut rng = XorShiftRng::seed_from_u64(2484345508); + let mut data: [u8; 1024] = [0u8; 1024]; + for i in 1..1025 { + let mut out = File::create(format!("{folder}/{i}")).unwrap(); + rng.fill_bytes(&mut data); + out.write_all(&data).unwrap(); + } +} + +pub fn random_file_generator(path: String, size_mib: u64) { + let mut out = File::create(path).unwrap(); + let mut rng = XorShiftRng::seed_from_u64(2484345508); + + let mut data = [0u8; 1310720]; + let block_size = 1310720; + let blocks: u64 = (size_mib * 1024 * 1024) / block_size; + + for _ in 0..blocks { + rng.fill_bytes(&mut data); + out.write_all(&data).unwrap(); + } +} + +pub fn create_null_file(path: String, size: u64) { + let out = File::create(path).unwrap(); + out.write_all_at(&[0], size - 1).unwrap(); +} + +// no reason for it not to be multithreaded, but there's not much point either, it hardly takes any time... if anything, the overhead from multithreading might be worse? +pub fn small_null_files_generation(folder: String) { + for i in 1..1025 { + create_null_file(format!("{folder}/{i}"), 1024); + } +} + +pub fn grab_kernel(folder: String, kernel_version: String) -> Result { + // maybe i should've just used reqwest, but that's no fun (also much more dependencies and stuff i'm sure) + // NOTE: requires openssl-devel to be installed for compilation (presumably requires openssl-libs for execution) + if !(exists(format!("{folder}/linux-{kernel_version}.tar.xz")).unwrap()) { + let mut curl = easy_curl::new(); + curl.url(&format!( + "https://cdn.kernel.org/pub/linux/kernel/v6.x/linux-{kernel_version}.tar.xz" + )) + .unwrap(); + curl.follow_location(true).unwrap(); + let mut out = File::create(format!("{folder}/linux-{kernel_version}.tar.xz")).unwrap(); + match curl.write_function(move |data| { + out.write_all(data).unwrap(); + Ok(data.len()) + }) { + Ok(_) => (), + Err(e) => return Err(e.to_string()), + } + curl.perform().unwrap(); + } + + // i'm too lazy to do this in rust + if !(exists(format!("{folder}/linux-{kernel_version}")).unwrap()) { + let mut dir = current_dir().unwrap(); + dir.push(folder); + match Command::new("tar") + .current_dir(dir) + .arg("-xf") + .arg(&format!("linux-{kernel_version}.tar.xz")) + .arg("") + .output() + { + Ok(_) => (), + Err(e) => return Err(e.to_string()), + } + } + + return Ok(true); +} + +pub fn grab_datasets() -> Result { + let kernel_version = "6.6.58"; + + if !exists(format!("data/datasets/kernel/linux-{kernel_version}")).unwrap() { + println!("Downloading kernel..."); + create_dir_all("data/datasets/kernel").unwrap(); + match grab_kernel( + "data/datasets/kernel".to_string(), + kernel_version.to_string(), + ) { + Ok(_) => (), + Err(e) => { + remove_dir_all(format!("data/datasets/kernel/linux-{kernel_version}")).unwrap(); + remove_file(format!( + "data/datasets/kernel/linux-{kernel_version}.tar.xz" + )) + .unwrap(); + panic!("{}", e.to_string()); + } + } + println!("Kernel downloaded"); + } + + if !exists(format!("data/datasets/25G-random.bin")).unwrap() { + println!("Generating random 25 GiB file..."); + large_random_file_generation("data/datasets/25G-random.bin".to_string()); + println!("Random 25 GiB file generated"); + } + + if !exists(format!("data/datasets/small-files/random")).unwrap() { + println!("Generating random 1 KiB files..."); + create_dir_all("data/datasets/small-files/random").unwrap(); + small_random_files_generation("data/datasets/small-files/random".to_string()); + println!("Random 1 KiB files generated..."); + } + + if !exists(format!("data/datasets/25G-null.bin")).unwrap() { + println!("Generating null 25 GiB file..."); + create_null_file("data/datasets/25G-null.bin".to_string(), 26843545600); + println!("Null 25 GiB file generated..."); + } + + if !exists("data/datasets/small-files/null").unwrap() { + println!("Generating null 1 KiB files..."); + create_dir_all("data/datasets/small-files/null").unwrap(); + small_null_files_generation("data/datasets/small-files/null".to_string()); + println!("Null 1 KiB files generated..."); + } + + if !exists("data/datasets/100M-polygon.txt").unwrap() { + return Err("*** MANUAL: Get 100M-sided regular polygon data and put it at `./data/datasets/100M-polygon.txt` ***".to_string()); + }; + + return Ok(true); +} + +pub fn prep_other_dirs() -> bool { + if !exists("data/mountpoints").unwrap() { + create_dir_all("data/mountpoints").unwrap(); + }; + + return true; +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..98135b1 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,3 @@ +pub mod dataset_gathering; +pub mod benchmarks; +pub mod cli; \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index e7a13c5..1a3dd29 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,430 +1,26 @@ -use curl::easy::Easy as easy_curl; -use rand::{self, Rng, RngCore, SeedableRng}; -use rand_xorshift::XorShiftRng; -use std::io::Read; -use std::time::{Duration, Instant}; -use std::{ - env::current_dir, - fs::{create_dir_all, exists, remove_dir_all, remove_file, File}, - io::{Error, Write}, - os::unix::fs::FileExt, - process::Command, - sync::{Arc, Mutex}, - thread::{self, JoinHandle}, -}; - -/* - ================================================================= - ==== ==== - ==== ↓ DATASET GATHERING ↓ ==== - ==== ==== - ================================================================= -*/ - -fn large_random_file_generation(path: String) { - // https://stackoverflow.com/a/65235966 - let out = Arc::new(Mutex::new(File::create(path))); - // NOTE: update this both here and in the helper (_large_random_file_generation_helper()) - let num_threads: u64 = 12; - let mut threads: Vec> = Vec::new(); - for i in 0..num_threads { - let out = Arc::clone(&out); - - let thread = thread::spawn(move || { - _large_random_file_generation_helper(&i, out); - }); - - threads.push(thread); - } - - for t in threads { - t.join().unwrap(); - } -} - -fn _large_random_file_generation_helper(i: &u64, out: Arc>>) { - let mut rng = XorShiftRng::seed_from_u64(2484345508); - // NOTE: update this both here and in `large_random_file_generation()` - let num_threads = 12; - let mut data = [0u8; 1310720]; - let block_size = 1310720; - - // enter desired size in bytes, must be a multiple of 655360 - // this is not a typo, the extra zero after 65536is for the threads - // 26843545600 = 25 GiB - let blocks_per_thread: u64 = 26843545600 / (block_size * num_threads); - for u in (i * blocks_per_thread)..((i + 1) * blocks_per_thread) { - rng.fill_bytes(&mut data); - - //let offset: u64 = (i * blocks_per_thread * 1310720) + (1310720 * u); - let offset: u64 = u * block_size; - let mut out = out.lock().unwrap(); - out.as_mut().unwrap().write_all_at(&data, offset).unwrap(); - } -} - -/* -fn single_threaded_large_random_file_generation(path: String) { - let mut out = File::create(path).unwrap(); - let mut rng = XorShiftRng::seed_from_u64(2484345508); - let mut data = [0u8; 65536]; - for _ in 0..409600 { - rng.fill_bytes(&mut data); - out.write_all(&data).unwrap(); - } -} -*/ - -fn small_random_files_generation(folder: String) { - let mut rng = XorShiftRng::seed_from_u64(2484345508); - let mut data: [u8; 1024] = [0u8; 1024]; - for i in 1..1025 { - let mut out = File::create(format!("{folder}/{i}")).unwrap(); - rng.fill_bytes(&mut data); - out.write_all(&data).unwrap(); - } -} - -fn random_file_generator(path: String, size_mib: u64) { - let mut out = File::create(path).unwrap(); - let mut rng = XorShiftRng::seed_from_u64(2484345508); - - let mut data = [0u8; 1310720]; - let block_size = 1310720; - let blocks: u64 = (size_mib * 1024 * 1024) / block_size; - - for _ in 0..blocks { - rng.fill_bytes(&mut data); - out.write_all(&data).unwrap(); - } -} - -fn create_null_file(path: String, size: u64) { - let out = File::create(path).unwrap(); - out.write_all_at(&[0], size - 1).unwrap(); -} - -// no reason for it not to be multithreaded, but there's not much point either, it hardly takes any time... if anything, the overhead from multithreading might be worse? -fn small_null_files_generation(folder: String) { - for i in 1..1025 { - create_null_file(format!("{folder}/{i}"), 1024); - } -} - -fn grab_kernel(folder: String, kernel_version: String) -> Result { - // maybe i should've just used reqwest, but that's no fun (also much more dependencies and stuff i'm sure) - // NOTE: requires openssl-devel to be installed for compilation (presumably requires openssl-libs for execution) - if !(exists(format!("{folder}/linux-{kernel_version}.tar.xz")).unwrap()) { - let mut curl = easy_curl::new(); - curl.url(&format!( - "https://cdn.kernel.org/pub/linux/kernel/v6.x/linux-{kernel_version}.tar.xz" - )) - .unwrap(); - curl.follow_location(true).unwrap(); - let mut out = File::create(format!("{folder}/linux-{kernel_version}.tar.xz")).unwrap(); - match curl.write_function(move |data| { - out.write_all(data).unwrap(); - Ok(data.len()) - }) { - Ok(_) => (), - Err(e) => return Err(e.to_string()), - } - curl.perform().unwrap(); - } - - // i'm too lazy to do this in rust - if !(exists(format!("{folder}/linux-{kernel_version}")).unwrap()) { - let mut dir = current_dir().unwrap(); - dir.push(folder); - match Command::new("tar") - .current_dir(dir) - .arg("-xf") - .arg(&format!("linux-{kernel_version}.tar.xz")) - .arg("") - .output() - { - Ok(_) => (), - Err(e) => return Err(e.to_string()), - } - } - - return Ok(true); -} - -fn grab_datasets() -> Result { - let kernel_version = "6.6.58"; - - if !exists(format!("data/datasets/kernel/linux-{kernel_version}")).unwrap() { - println!("Downloading kernel..."); - create_dir_all("data/datasets/kernel").unwrap(); - match grab_kernel( - "data/datasets/kernel".to_string(), - kernel_version.to_string(), - ) { - Ok(_) => (), - Err(e) => { - remove_dir_all(format!("data/datasets/kernel/linux-{kernel_version}")).unwrap(); - remove_file(format!( - "data/datasets/kernel/linux-{kernel_version}.tar.xz" - )) - .unwrap(); - panic!("{}", e.to_string()); - } - } - println!("Kernel downloaded"); - } - - if !exists(format!("data/datasets/25G-random.bin")).unwrap() { - println!("Generating random 25 GiB file..."); - large_random_file_generation("data/datasets/25G-random.bin".to_string()); - println!("Random 25 GiB file generated"); - } - - if !exists(format!("data/datasets/small-files/random")).unwrap() { - println!("Generating random 1 KiB files..."); - create_dir_all("data/datasets/small-files/random").unwrap(); - small_random_files_generation("data/datasets/small-files/random".to_string()); - println!("Random 1 KiB files generated..."); - } - - if !exists(format!("data/datasets/25G-null.bin")).unwrap() { - println!("Generating null 25 GiB file..."); - create_null_file("data/datasets/25G-null.bin".to_string(), 26843545600); - println!("Null 25 GiB file generated..."); - } - - if !exists("data/datasets/small-files/null").unwrap() { - println!("Generating null 1 KiB files..."); - create_dir_all("data/datasets/small-files/null").unwrap(); - small_null_files_generation("data/datasets/small-files/null".to_string()); - println!("Null 1 KiB files generated..."); - } - - if !exists("data/datasets/100M-polygon.txt").unwrap() { - return Err("*** MANUAL: Get 100M-sided regular polygon data and put it at `./data/datasets/100M-polygon.txt` ***".to_string()); - }; - - return Ok(true); -} - -fn prep_other_dirs() -> bool { - if !exists("data/mountpoints").unwrap() { - create_dir_all("data/mountpoints").unwrap(); - }; - - return true; -} - -/* - ================================================================= - ==== ==== - ==== ↓ BENCHMARKS ↓ ==== - ==== ==== - ================================================================= -*/ - -fn sequential_read(path: String) -> Duration { - let mut f: File = File::open(path).unwrap(); - let size = f.metadata().unwrap().len(); - - let mut data: [u8; 1310720] = [0u8; 1310720]; - // benchmarking/elapsed: https://stackoverflow.com/a/40953863 - let now = Instant::now(); - for _ in 0..(size / 1310720) { - f.read(&mut data).unwrap(); - } - let elapsed = now.elapsed(); - return elapsed; -} - -/// Reads 1 byte from the start of file -fn sequential_read_latency(path: String) -> Duration { - let mut f: File = File::open(path).unwrap(); - let mut data: [u8; 1] = [0u8; 1]; - let now = Instant::now(); - f.read(&mut data).unwrap(); - let elapsed = now.elapsed(); - return elapsed; -} - -/// Reads 1 GiB from the file at `path` in random 1 MiB chunks -fn random_read(path: String) -> Duration { - let mut rng = XorShiftRng::seed_from_u64(9198675309); - let f: File = File::open(path).unwrap(); - let size = f.metadata().unwrap().len(); - - let mut data: [u8; 1048576] = [0u8; 1048576]; - let now = Instant::now(); - for _ in 0..1024 { - let offset = rng.gen_range(0..((size - 1048576) / 1048576)); - f.read_at(&mut data, offset).unwrap(); - } - let elapsed = now.elapsed(); - return elapsed; -} - -/// Reads 1 random byte from the file at `path` 1024 times -fn random_read_latency(path: String) -> Duration { - let mut rng = XorShiftRng::seed_from_u64(9198675309); - let f: File = File::open(path).unwrap(); - let size = f.metadata().unwrap().len(); - let mut data: [u8; 1] = [0u8; 1]; - let now = Instant::now(); - for _ in 0..1024 { - let offset = rng.gen_range(0..(size - 1)); - f.read_at(&mut data, offset).unwrap(); - } - let elapsed = now.elapsed(); - return elapsed; -} - -fn bulk_sequential_read(path: String) -> Vec { - let mut data: [u8; 1024] = [0u8; 1024]; - let mut times: Vec = Vec::new(); - for i in 1..1025 { - let mut f: File = File::open(format!("{path}/{i}")).unwrap(); - let now = Instant::now(); - f.read(&mut data).unwrap(); - let elapsed = now.elapsed(); - times.push(elapsed); - } - - return times; -} - -fn bulk_sequential_read_latency(path: String) -> Vec { - let mut data: [u8; 1] = [0u8; 1]; - let mut times: Vec = Vec::new(); - for i in 1..1025 { - let now = Instant::now(); - let mut f: File = File::open(format!("{path}/{i}")).unwrap(); - f.read(&mut data).unwrap(); - let elapsed = now.elapsed(); - times.push(elapsed); - } - - return times; -} - -fn bulk_random_read_latency(path: String) -> Vec { - let mut rng = XorShiftRng::seed_from_u64(9198675309); - let mut data: [u8; 1] = [0u8; 1]; - let mut times: Vec = Vec::new(); - for i in 1..1025 { - let mut f: File = File::open(format!("{path}/{i}")).unwrap(); - let offset = rng.gen_range(0..1023); - let now = Instant::now(); - f.read_at(&mut data, offset).unwrap(); - let elapsed = now.elapsed(); - times.push(elapsed); - } - - return times; -} - -fn benchmark() { - let mut recorder = csv::Writer::from_path("data/benchmark-data.csv").unwrap(); - let mut bulk_recorder = csv::Writer::from_path("data/bulk.csv").unwrap(); - let mountpoint_dir = "data/mountpoints"; - let mut filesystems = std::fs::read_dir(mountpoint_dir) - .unwrap() - .map(|item| { - let tmp = item.unwrap().file_name().into_string().unwrap(); - format!("{mountpoint_dir}/{tmp}") - }) - .collect::>(); - - filesystems.push("data/datasets".to_string()); - - for fs in filesystems { - let single_files = vec![ - "25G-null.bin".to_string(), - "25G-random.bin".to_string(), - "100M-polygon.txt".to_string(), - "kernel/linux-6.6.58.tar.xz".to_string(), - ]; - let bulk_files: Vec = vec![ - "small-files/null".to_string(), - "small-files/random".to_string(), - ]; - - for filename in single_files { - let path = format!("{fs}/{filename}"); - println!("=== {} ===", path.clone()); - - let seq_read = format!("{:.5?}", sequential_read(path.clone())); - println!("Sequential read (complete file read): {}", seq_read.clone()); - - let seq_latency = format!("{:.5?}", sequential_read_latency(path.clone())); - println!("Sequential latency (1 byte read): {}", seq_latency); - - let rand_read = format!("{:.5?}", random_read(path.clone())); - println!("Random read (1024x 1 MiB): {}", rand_read); - - let mut rand_latency: String = "0s".to_string(); - if fs != "data/mountpoints/fuse-archive-tar" { - rand_latency = format!("{:.5?}", random_read_latency(path.clone())); - } - - println!("Random latency (1024x 1 byte read): {}", rand_latency); - - let data: Vec = vec![ - fs.clone(), - filename, - seq_read, - seq_latency, - rand_read, - rand_latency, - ]; - recorder.write_record(data).unwrap(); - println!(); - } - - // bulk files - for folder in bulk_files { - let cloned = fs.clone(); - let path = format!("{cloned}/{folder}"); - println!("[bulk] Testing {}", path); - let dataset_info: Vec = vec![fs.clone(), folder]; - - let mut times = _vec_duration_to_string(bulk_sequential_read(path.clone())); - let mut tmp = Vec::new(); - dataset_info.clone_into(&mut tmp); - tmp.push("bulk_sequential_read".to_string()); - tmp.append(&mut times); - bulk_recorder.write_record(tmp).unwrap(); - - times = _vec_duration_to_string(bulk_sequential_read_latency(path.clone())); - tmp = Vec::new(); - dataset_info.clone_into(&mut tmp); - tmp.push("bulk_sequential_read_latency".to_string()); - tmp.append(&mut times); - bulk_recorder.write_record(tmp).unwrap(); - - // not enough data in these files to warrant bulk_random_read() - //bulk_recorder.write_record(_vec_duration_to_string(bulk_random_read(path.clone()))).unwrap(); - times = _vec_duration_to_string(bulk_random_read_latency(path.clone())); - tmp = Vec::new(); - dataset_info.clone_into(&mut tmp); - tmp.push("bulk_random_read_latency".to_string()); - tmp.append(&mut times); - bulk_recorder.write_record(tmp).unwrap(); - } - println!("\n=== === === === === === === === === === ===\n") - } -} +use clap::Parser; +use disk_read_benchmark::benchmarks::benchmark; +use disk_read_benchmark::cli::*; +use disk_read_benchmark::dataset_gathering::*; fn main() { - grab_datasets().unwrap(); - prep_other_dirs(); - benchmark(); -} + let cli = Cli::parse(); -fn _vec_duration_to_string( - vector_committing_crimes_with_both_direction_and_magnitude: Vec, -) -> Vec { - return vector_committing_crimes_with_both_direction_and_magnitude - .iter() - .map(|item| format!("{:.5?}", item)) - .collect::>(); + match cli.command { + Commands::PrepDirs => { + // FIXME: okay i'm dumb, this only covers stuff that's not handled by grab_datasets(), and literally nothing creates ext-workdir + prep_other_dirs(); + } + Commands::GrabData => { + grab_datasets().unwrap(); // * should unwrap + } + Commands::Benchmark => { + benchmark(); + } + Commands::Run => { + prep_other_dirs(); + grab_datasets().unwrap(); // * should unwrap + benchmark(); + } + } }