me when actual cli:

2024-11-05 15:25:38 -06:00 · 2024-11-05 15:25:38 -06:00 · 39c2378e72
commit 39c2378e72
parent 30117dde4b
7 changed files with 482 additions and 436 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -4,9 +4,9 @@ version = 3

 [[package]]
 name = "anstream"
-version = "0.6.17"
+version = "0.6.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "23a1e53f0f5d86382dafe1cf314783b2044280f406e7e1506368220ad11b1338"
+checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b"
 dependencies = [
 "anstyle",
 "anstyle-parse",
@ -19,9 +19,9 @@ dependencies = [

 [[package]]
 name = "anstyle"
-version = "1.0.9"
+version = "1.0.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8365de52b16c035ff4fcafe0092ba9390540e3e352870ac09933bebcaa2c8c56"
+checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9"

 [[package]]
 name = "anstyle-parse"
@ -59,9 +59,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"

 [[package]]
 name = "cc"
-version = "1.1.31"
+version = "1.1.35"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c2e7962b54006dcfcc61cb72735f4d89bb97061dd6a7ed882ec6b8ee53714c6f"
+checksum = "0f57c4b4da2a9d619dd035f27316d7a426305b75be93d09e92f2b9229c34feaf"
 dependencies = [
 "shlex",
 ]
@ -171,7 +171,7 @@ dependencies = [

 [[package]]
 name = "disk-read-benchmark"
-version = "0.1.0"
+version = "0.2.0"
 dependencies = [
 "clap",
 "csv",
@ -382,9 +382,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"

 [[package]]
 name = "syn"
-version = "2.0.85"
+version = "2.0.87"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5023162dfcd14ef8f32034d8bcd4cc5ddc61ef7a247c024a33e24e1f24d21b56"
+checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d"
 dependencies = [
 "proc-macro2",
 "quote",
--- a/Cargo.toml
+++ b/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "disk-read-benchmark"
-version = "0.1.0"
+version = "0.2.0"
 edition = "2021"

 [dependencies]
--- a/src/benchmarks.rs
+++ b/src/benchmarks.rs
@ -0,0 +1,214 @@
+use rand::{self, Rng, SeedableRng};
+use rand_xorshift::XorShiftRng;
+use std::io::Read;
+use std::time::{Duration, Instant};
+use std::{fs::File, os::unix::fs::FileExt};
+
+
+/*
+    =================================================================
+    ====                                                         ====
+    ====                     ↓ BENCHMARKS ↓                      ====
+    ====                                                         ====
+    =================================================================
+*/
+
+pub fn sequential_read(path: String) -> Duration {
+    let mut f: File = File::open(path).unwrap();
+    let size = f.metadata().unwrap().len();
+
+    let mut data: [u8; 1310720] = [0u8; 1310720];
+    // benchmarking/elapsed: https://stackoverflow.com/a/40953863
+    let now = Instant::now();
+    for _ in 0..(size / 1310720) {
+        f.read(&mut data).unwrap();
+    }
+    let elapsed = now.elapsed();
+    return elapsed;
+}
+
+/// Reads 1 byte from the start of file
+pub fn sequential_read_latency(path: String) -> Duration {
+    let mut f: File = File::open(path).unwrap();
+    let mut data: [u8; 1] = [0u8; 1];
+    let now = Instant::now();
+    f.read(&mut data).unwrap();
+    let elapsed = now.elapsed();
+    return elapsed;
+}
+
+/// Reads 1 GiB from the file at `path` in random 1 MiB chunks
+pub fn random_read(path: String) -> Duration {
+    let mut rng = XorShiftRng::seed_from_u64(9198675309);
+    let f: File = File::open(path).unwrap();
+    let size = f.metadata().unwrap().len();
+
+    let mut data: [u8; 1048576] = [0u8; 1048576];
+    let now = Instant::now();
+    for _ in 0..1024 {
+        let offset = rng.gen_range(0..((size - 1048576) / 1048576));
+        f.read_at(&mut data, offset).unwrap();
+    }
+    let elapsed = now.elapsed();
+    return elapsed;
+}
+
+/// Reads 1 random byte from the file at `path` 1024 times
+pub fn random_read_latency(path: String) -> Duration {
+    let mut rng = XorShiftRng::seed_from_u64(9198675309);
+    let f: File = File::open(path).unwrap();
+    let size = f.metadata().unwrap().len();
+    let mut data: [u8; 1] = [0u8; 1];
+    let now = Instant::now();
+    for _ in 0..1024 {
+        let offset = rng.gen_range(0..(size - 1));
+        f.read_at(&mut data, offset).unwrap();
+    }
+    let elapsed = now.elapsed();
+    return elapsed;
+}
+
+pub fn bulk_sequential_read(path: String) -> Vec<Duration> {
+    let mut data: [u8; 1024] = [0u8; 1024];
+    let mut times: Vec<Duration> = Vec::new();
+    for i in 1..1025 {
+        let mut f: File = File::open(format!("{path}/{i}")).unwrap();
+        let now = Instant::now();
+        f.read(&mut data).unwrap();
+        let elapsed = now.elapsed();
+        times.push(elapsed);
+    }
+
+    return times;
+}
+
+pub fn bulk_sequential_read_latency(path: String) -> Vec<Duration> {
+    let mut data: [u8; 1] = [0u8; 1];
+    let mut times: Vec<Duration> = Vec::new();
+    for i in 1..1025 {
+        let now = Instant::now();
+        let mut f: File = File::open(format!("{path}/{i}")).unwrap();
+        f.read(&mut data).unwrap();
+        let elapsed = now.elapsed();
+        times.push(elapsed);
+    }
+
+    return times;
+}
+
+pub fn bulk_random_read_latency(path: String) -> Vec<Duration> {
+    let mut rng = XorShiftRng::seed_from_u64(9198675309);
+    let mut data: [u8; 1] = [0u8; 1];
+    let mut times: Vec<Duration> = Vec::new();
+    for i in 1..1025 {
+        let f: File = File::open(format!("{path}/{i}")).unwrap();
+        let offset = rng.gen_range(0..1023);
+        let now = Instant::now();
+        f.read_at(&mut data, offset).unwrap();
+        let elapsed = now.elapsed();
+        times.push(elapsed);
+    }
+
+    return times;
+}
+
+pub fn benchmark() {
+    let mut recorder = csv::Writer::from_path("data/benchmark-data.csv").unwrap();
+    let mut bulk_recorder = csv::Writer::from_path("data/bulk.csv").unwrap();
+    let mountpoint_dir = "data/mountpoints";
+    let mut filesystems = std::fs::read_dir(mountpoint_dir)
+        .unwrap()
+        .map(|item| {
+            let tmp = item.unwrap().file_name().into_string().unwrap();
+            format!("{mountpoint_dir}/{tmp}")
+        })
+        .collect::<Vec<String>>();
+
+    filesystems.push("data/datasets".to_string());
+
+    for fs in filesystems {
+        let single_files = vec![
+            "25G-null.bin".to_string(),
+            "25G-random.bin".to_string(),
+            "100M-polygon.txt".to_string(),
+            "kernel/linux-6.6.58.tar.xz".to_string(),
+        ];
+        let bulk_files: Vec<String> = vec![
+            "small-files/null".to_string(),
+            "small-files/random".to_string(),
+        ];
+
+        for filename in single_files {
+            let path = format!("{fs}/{filename}");
+            println!("=== {} ===", path.clone());
+
+            let seq_read = format!("{:.5?}", sequential_read(path.clone()));
+            println!("Sequential read (complete file read): {}", seq_read.clone());
+
+            let seq_latency = format!("{:.5?}", sequential_read_latency(path.clone()));
+            println!("Sequential latency (1 byte read): {}", seq_latency);
+
+            let rand_read = format!("{:.5?}", random_read(path.clone()));
+            println!("Random read (1024x 1 MiB): {}", rand_read);
+
+            let mut rand_latency: String = "0s".to_string();
+            if fs != "data/mountpoints/fuse-archive-tar" {
+                rand_latency = format!("{:.5?}", random_read_latency(path.clone()));
+            }
+
+            println!("Random latency (1024x 1 byte read): {}", rand_latency);
+
+            let data: Vec<String> = vec![
+                fs.clone(),
+                filename,
+                seq_read,
+                seq_latency,
+                rand_read,
+                rand_latency,
+            ];
+            recorder.write_record(data).unwrap();
+            println!();
+        }
+
+        // bulk files
+        for folder in bulk_files {
+            let cloned = fs.clone();
+            let path = format!("{cloned}/{folder}");
+            println!("[bulk] Testing {}", path);
+            let dataset_info: Vec<String> = vec![fs.clone(), folder];
+
+            let mut times = _vec_duration_to_string(bulk_sequential_read(path.clone()));
+            let mut tmp = Vec::new();
+            dataset_info.clone_into(&mut tmp);
+            tmp.push("bulk_sequential_read".to_string());
+            tmp.append(&mut times);
+            bulk_recorder.write_record(tmp).unwrap();
+
+            times = _vec_duration_to_string(bulk_sequential_read_latency(path.clone()));
+            tmp = Vec::new();
+            dataset_info.clone_into(&mut tmp);
+            tmp.push("bulk_sequential_read_latency".to_string());
+            tmp.append(&mut times);
+            bulk_recorder.write_record(tmp).unwrap();
+
+            // not enough data in these files to warrant bulk_random_read()
+            //bulk_recorder.write_record(_vec_duration_to_string(bulk_random_read(path.clone()))).unwrap();
+            times = _vec_duration_to_string(bulk_random_read_latency(path.clone()));
+            tmp = Vec::new();
+            dataset_info.clone_into(&mut tmp);
+            tmp.push("bulk_random_read_latency".to_string());
+            tmp.append(&mut times);
+            bulk_recorder.write_record(tmp).unwrap();
+        }
+        println!("\n=== === === === === === === === === === ===\n")
+    }
+}
+
+pub fn _vec_duration_to_string(
+    vector_committing_crimes_with_both_direction_and_magnitude: Vec<Duration>,
+) -> Vec<String> {
+    return vector_committing_crimes_with_both_direction_and_magnitude
+        .iter()
+        .map(|item| format!("{:.5?}", item))
+        .collect::<Vec<String>>();
+}
--- a/src/cli.rs
+++ b/src/cli.rs
@ -0,0 +1,20 @@
+use clap::{Parser, Subcommand};
+
+#[derive(Parser)]
+#[command(version, about, long_about = None)]
+pub struct Cli {
+    #[command(subcommand)]
+    pub command: Commands,
+}
+
+#[derive(Subcommand)]
+pub enum Commands {
+    ///Grabs the datasets used for benchmarking
+    GrabData,
+    ///Runs the benchmark
+    Benchmark,
+    ///Prepares the directories so other programs can prepare their datasets
+    PrepDirs,
+    ///Runs it all
+    Run,
+}
--- a/src/dataset_gathering.rs
+++ b/src/dataset_gathering.rs
@ -0,0 +1,213 @@
+use curl::easy::Easy as easy_curl;
+use rand::{self, RngCore, SeedableRng};
+use rand_xorshift::XorShiftRng;
+use std::{
+    env::current_dir,
+    fs::{create_dir_all, exists, remove_dir_all, remove_file, File},
+    io::{Error, Write},
+    os::unix::fs::FileExt,
+    process::Command,
+    sync::{Arc, Mutex},
+    thread::{self, JoinHandle},
+};
+
+/*
+===================
+    ====                                                         ====
+    ====                 ↓ DATASET GATHERING ↓                   ====
+    ====                                                         ====
+    =================================================================
+*/
+pub fn large_random_file_generation(path: String) {
+    // https://stackoverflow.com/a/65235966
+    let out = Arc::new(Mutex::new(File::create(path)));
+    // NOTE: update this both here and in the helper (_large_random_file_generation_helper())
+    let num_threads: u64 = 12;
+    let mut threads: Vec<JoinHandle<()>> = Vec::new();
+    for i in 0..num_threads {
+        let out = Arc::clone(&out);
+
+        let thread = thread::spawn(move || {
+            _large_random_file_generation_helper(&i, out);
+        });
+
+        threads.push(thread);
+    }
+
+    for t in threads {
+        t.join().unwrap();
+    }
+}
+
+pub fn _large_random_file_generation_helper(i: &u64, out: Arc<Mutex<Result<File, Error>>>) {
+    let mut rng = XorShiftRng::seed_from_u64(2484345508);
+    // NOTE: update this both here and in `large_random_file_generation()`
+    let num_threads = 12;
+    let mut data = [0u8; 1310720];
+    let block_size = 1310720;
+
+    // enter desired size in bytes, must be a multiple of 655360
+    // this is not a typo, the extra zero after 65536is for the threads
+    // 26843545600 = 25 GiB
+    let blocks_per_thread: u64 = 26843545600 / (block_size * num_threads);
+    for u in (i * blocks_per_thread)..((i + 1) * blocks_per_thread) {
+        rng.fill_bytes(&mut data);
+
+        //let offset: u64 = (i * blocks_per_thread * 1310720) + (1310720 * u);
+        let offset: u64 = u * block_size;
+        let mut out = out.lock().unwrap();
+        out.as_mut().unwrap().write_all_at(&data, offset).unwrap();
+    }
+}
+
+/*
+pub fn single_threaded_large_random_file_generation(path: String) {
+    let mut out = File::create(path).unwrap();
+    let mut rng = XorShiftRng::seed_from_u64(2484345508);
+    let mut data = [0u8; 65536];
+    for _ in 0..409600 {
+        rng.fill_bytes(&mut data);
+        out.write_all(&data).unwrap();
+    }
+}
+*/
+
+pub fn small_random_files_generation(folder: String) {
+    let mut rng = XorShiftRng::seed_from_u64(2484345508);
+    let mut data: [u8; 1024] = [0u8; 1024];
+    for i in 1..1025 {
+        let mut out = File::create(format!("{folder}/{i}")).unwrap();
+        rng.fill_bytes(&mut data);
+        out.write_all(&data).unwrap();
+    }
+}
+
+pub fn random_file_generator(path: String, size_mib: u64) {
+    let mut out = File::create(path).unwrap();
+    let mut rng = XorShiftRng::seed_from_u64(2484345508);
+
+    let mut data = [0u8; 1310720];
+    let block_size = 1310720;
+    let blocks: u64 = (size_mib * 1024 * 1024) / block_size;
+
+    for _ in 0..blocks {
+        rng.fill_bytes(&mut data);
+        out.write_all(&data).unwrap();
+    }
+}
+
+pub fn create_null_file(path: String, size: u64) {
+    let out = File::create(path).unwrap();
+    out.write_all_at(&[0], size - 1).unwrap();
+}
+
+// no reason for it not to be multithreaded, but there's not much point either, it hardly takes any time... if anything, the overhead from multithreading might be worse?
+pub fn small_null_files_generation(folder: String) {
+    for i in 1..1025 {
+        create_null_file(format!("{folder}/{i}"), 1024);
+    }
+}
+
+pub fn grab_kernel(folder: String, kernel_version: String) -> Result<bool, String> {
+    // maybe i should've just used reqwest, but that's no fun (also much more dependencies and stuff i'm sure)
+    // NOTE: requires openssl-devel to be installed for compilation (presumably requires openssl-libs for execution)
+    if !(exists(format!("{folder}/linux-{kernel_version}.tar.xz")).unwrap()) {
+        let mut curl = easy_curl::new();
+        curl.url(&format!(
+            "https://cdn.kernel.org/pub/linux/kernel/v6.x/linux-{kernel_version}.tar.xz"
+        ))
+        .unwrap();
+        curl.follow_location(true).unwrap();
+        let mut out = File::create(format!("{folder}/linux-{kernel_version}.tar.xz")).unwrap();
+        match curl.write_function(move |data| {
+            out.write_all(data).unwrap();
+            Ok(data.len())
+        }) {
+            Ok(_) => (),
+            Err(e) => return Err(e.to_string()),
+        }
+        curl.perform().unwrap();
+    }
+
+    // i'm too lazy to do this in rust
+    if !(exists(format!("{folder}/linux-{kernel_version}")).unwrap()) {
+        let mut dir = current_dir().unwrap();
+        dir.push(folder);
+        match Command::new("tar")
+            .current_dir(dir)
+            .arg("-xf")
+            .arg(&format!("linux-{kernel_version}.tar.xz"))
+            .arg("")
+            .output()
+        {
+            Ok(_) => (),
+            Err(e) => return Err(e.to_string()),
+        }
+    }
+
+    return Ok(true);
+}
+
+pub fn grab_datasets() -> Result<bool, String> {
+    let kernel_version = "6.6.58";
+
+    if !exists(format!("data/datasets/kernel/linux-{kernel_version}")).unwrap() {
+        println!("Downloading kernel...");
+        create_dir_all("data/datasets/kernel").unwrap();
+        match grab_kernel(
+            "data/datasets/kernel".to_string(),
+            kernel_version.to_string(),
+        ) {
+            Ok(_) => (),
+            Err(e) => {
+                remove_dir_all(format!("data/datasets/kernel/linux-{kernel_version}")).unwrap();
+                remove_file(format!(
+                    "data/datasets/kernel/linux-{kernel_version}.tar.xz"
+                ))
+                .unwrap();
+                panic!("{}", e.to_string());
+            }
+        }
+        println!("Kernel downloaded");
+    }
+
+    if !exists(format!("data/datasets/25G-random.bin")).unwrap() {
+        println!("Generating random 25 GiB file...");
+        large_random_file_generation("data/datasets/25G-random.bin".to_string());
+        println!("Random 25 GiB file generated");
+    }
+
+    if !exists(format!("data/datasets/small-files/random")).unwrap() {
+        println!("Generating random 1 KiB files...");
+        create_dir_all("data/datasets/small-files/random").unwrap();
+        small_random_files_generation("data/datasets/small-files/random".to_string());
+        println!("Random 1 KiB files generated...");
+    }
+
+    if !exists(format!("data/datasets/25G-null.bin")).unwrap() {
+        println!("Generating null 25 GiB file...");
+        create_null_file("data/datasets/25G-null.bin".to_string(), 26843545600);
+        println!("Null 25 GiB file generated...");
+    }
+
+    if !exists("data/datasets/small-files/null").unwrap() {
+        println!("Generating null 1 KiB files...");
+        create_dir_all("data/datasets/small-files/null").unwrap();
+        small_null_files_generation("data/datasets/small-files/null".to_string());
+        println!("Null 1 KiB files generated...");
+    }
+
+    if !exists("data/datasets/100M-polygon.txt").unwrap() {
+        return Err("*** MANUAL: Get 100M-sided regular polygon data and put it at `./data/datasets/100M-polygon.txt` ***".to_string());
+    };
+
+    return Ok(true);
+}
+
+pub fn prep_other_dirs() -> bool {
+    if !exists("data/mountpoints").unwrap() {
+        create_dir_all("data/mountpoints").unwrap();
+    };
+
+    return true;
+}
--- a/src/lib.rs
+++ b/src/lib.rs
@ -0,0 +1,3 @@
+pub mod dataset_gathering;
+pub mod benchmarks;
+pub mod cli;
--- a/src/main.rs
+++ b/src/main.rs
@ -1,430 +1,26 @@
-use curl::easy::Easy as easy_curl;
-use rand::{self, Rng, RngCore, SeedableRng};
-use rand_xorshift::XorShiftRng;
-use std::io::Read;
-use std::time::{Duration, Instant};
-use std::{
-    env::current_dir,
-    fs::{create_dir_all, exists, remove_dir_all, remove_file, File},
-    io::{Error, Write},
-    os::unix::fs::FileExt,
-    process::Command,
-    sync::{Arc, Mutex},
-    thread::{self, JoinHandle},
-};
-
-/*
-    =================================================================
-    ====                                                         ====
-    ====                 ↓ DATASET GATHERING ↓                   ====
-    ====                                                         ====
-    =================================================================
-*/
-
-fn large_random_file_generation(path: String) {
-    // https://stackoverflow.com/a/65235966
-    let out = Arc::new(Mutex::new(File::create(path)));
-    // NOTE: update this both here and in the helper (_large_random_file_generation_helper())
-    let num_threads: u64 = 12;
-    let mut threads: Vec<JoinHandle<()>> = Vec::new();
-    for i in 0..num_threads {
-        let out = Arc::clone(&out);
-
-        let thread = thread::spawn(move || {
-            _large_random_file_generation_helper(&i, out);
-        });
-
-        threads.push(thread);
-    }
-
-    for t in threads {
-        t.join().unwrap();
-    }
-}
-
-fn _large_random_file_generation_helper(i: &u64, out: Arc<Mutex<Result<File, Error>>>) {
-    let mut rng = XorShiftRng::seed_from_u64(2484345508);
-    // NOTE: update this both here and in `large_random_file_generation()`
-    let num_threads = 12;
-    let mut data = [0u8; 1310720];
-    let block_size = 1310720;
-
-    // enter desired size in bytes, must be a multiple of 655360
-    // this is not a typo, the extra zero after 65536is for the threads
-    // 26843545600 = 25 GiB
-    let blocks_per_thread: u64 = 26843545600 / (block_size * num_threads);
-    for u in (i * blocks_per_thread)..((i + 1) * blocks_per_thread) {
-        rng.fill_bytes(&mut data);
-
-        //let offset: u64 = (i * blocks_per_thread * 1310720) + (1310720 * u);
-        let offset: u64 = u * block_size;
-        let mut out = out.lock().unwrap();
-        out.as_mut().unwrap().write_all_at(&data, offset).unwrap();
-    }
-}
-
-/*
-fn single_threaded_large_random_file_generation(path: String) {
-    let mut out = File::create(path).unwrap();
-    let mut rng = XorShiftRng::seed_from_u64(2484345508);
-    let mut data = [0u8; 65536];
-    for _ in 0..409600 {
-        rng.fill_bytes(&mut data);
-        out.write_all(&data).unwrap();
-    }
-}
-*/
-
-fn small_random_files_generation(folder: String) {
-    let mut rng = XorShiftRng::seed_from_u64(2484345508);
-    let mut data: [u8; 1024] = [0u8; 1024];
-    for i in 1..1025 {
-        let mut out = File::create(format!("{folder}/{i}")).unwrap();
-        rng.fill_bytes(&mut data);
-        out.write_all(&data).unwrap();
-    }
-}
-
-fn random_file_generator(path: String, size_mib: u64) {
-    let mut out = File::create(path).unwrap();
-    let mut rng = XorShiftRng::seed_from_u64(2484345508);
-
-    let mut data = [0u8; 1310720];
-    let block_size = 1310720;
-    let blocks: u64 = (size_mib * 1024 * 1024) / block_size;
-
-    for _ in 0..blocks {
-        rng.fill_bytes(&mut data);
-        out.write_all(&data).unwrap();
-    }
-}
-
-fn create_null_file(path: String, size: u64) {
-    let out = File::create(path).unwrap();
-    out.write_all_at(&[0], size - 1).unwrap();
-}
-
-// no reason for it not to be multithreaded, but there's not much point either, it hardly takes any time... if anything, the overhead from multithreading might be worse?
-fn small_null_files_generation(folder: String) {
-    for i in 1..1025 {
-        create_null_file(format!("{folder}/{i}"), 1024);
-    }
-}
-
-fn grab_kernel(folder: String, kernel_version: String) -> Result<bool, String> {
-    // maybe i should've just used reqwest, but that's no fun (also much more dependencies and stuff i'm sure)
-    // NOTE: requires openssl-devel to be installed for compilation (presumably requires openssl-libs for execution)
-    if !(exists(format!("{folder}/linux-{kernel_version}.tar.xz")).unwrap()) {
-        let mut curl = easy_curl::new();
-        curl.url(&format!(
-            "https://cdn.kernel.org/pub/linux/kernel/v6.x/linux-{kernel_version}.tar.xz"
-        ))
-        .unwrap();
-        curl.follow_location(true).unwrap();
-        let mut out = File::create(format!("{folder}/linux-{kernel_version}.tar.xz")).unwrap();
-        match curl.write_function(move |data| {
-            out.write_all(data).unwrap();
-            Ok(data.len())
-        }) {
-            Ok(_) => (),
-            Err(e) => return Err(e.to_string()),
-        }
-        curl.perform().unwrap();
-    }
-
-    // i'm too lazy to do this in rust
-    if !(exists(format!("{folder}/linux-{kernel_version}")).unwrap()) {
-        let mut dir = current_dir().unwrap();
-        dir.push(folder);
-        match Command::new("tar")
-            .current_dir(dir)
-            .arg("-xf")
-            .arg(&format!("linux-{kernel_version}.tar.xz"))
-            .arg("")
-            .output()
-        {
-            Ok(_) => (),
-            Err(e) => return Err(e.to_string()),
-        }
-    }
-
-    return Ok(true);
-}
-
-fn grab_datasets() -> Result<bool, String> {
-    let kernel_version = "6.6.58";
-
-    if !exists(format!("data/datasets/kernel/linux-{kernel_version}")).unwrap() {
-        println!("Downloading kernel...");
-        create_dir_all("data/datasets/kernel").unwrap();
-        match grab_kernel(
-            "data/datasets/kernel".to_string(),
-            kernel_version.to_string(),
-        ) {
-            Ok(_) => (),
-            Err(e) => {
-                remove_dir_all(format!("data/datasets/kernel/linux-{kernel_version}")).unwrap();
-                remove_file(format!(
-                    "data/datasets/kernel/linux-{kernel_version}.tar.xz"
-                ))
-                .unwrap();
-                panic!("{}", e.to_string());
-            }
-        }
-        println!("Kernel downloaded");
-    }
-
-    if !exists(format!("data/datasets/25G-random.bin")).unwrap() {
-        println!("Generating random 25 GiB file...");
-        large_random_file_generation("data/datasets/25G-random.bin".to_string());
-        println!("Random 25 GiB file generated");
-    }
-
-    if !exists(format!("data/datasets/small-files/random")).unwrap() {
-        println!("Generating random 1 KiB files...");
-        create_dir_all("data/datasets/small-files/random").unwrap();
-        small_random_files_generation("data/datasets/small-files/random".to_string());
-        println!("Random 1 KiB files generated...");
-    }
-
-    if !exists(format!("data/datasets/25G-null.bin")).unwrap() {
-        println!("Generating null 25 GiB file...");
-        create_null_file("data/datasets/25G-null.bin".to_string(), 26843545600);
-        println!("Null 25 GiB file generated...");
-    }
-
-    if !exists("data/datasets/small-files/null").unwrap() {
-        println!("Generating null 1 KiB files...");
-        create_dir_all("data/datasets/small-files/null").unwrap();
-        small_null_files_generation("data/datasets/small-files/null".to_string());
-        println!("Null 1 KiB files generated...");
-    }
-
-    if !exists("data/datasets/100M-polygon.txt").unwrap() {
-        return Err("*** MANUAL: Get 100M-sided regular polygon data and put it at `./data/datasets/100M-polygon.txt` ***".to_string());
-    };
-
-    return Ok(true);
-}
-
-fn prep_other_dirs() -> bool {
-    if !exists("data/mountpoints").unwrap() {
-        create_dir_all("data/mountpoints").unwrap();
-    };
-
-    return true;
-}
-
-/*
-    =================================================================
-    ====                                                         ====
-    ====                     ↓ BENCHMARKS ↓                      ====
-    ====                                                         ====
-    =================================================================
-*/
-
-fn sequential_read(path: String) -> Duration {
-    let mut f: File = File::open(path).unwrap();
-    let size = f.metadata().unwrap().len();
-
-    let mut data: [u8; 1310720] = [0u8; 1310720];
-    // benchmarking/elapsed: https://stackoverflow.com/a/40953863
-    let now = Instant::now();
-    for _ in 0..(size / 1310720) {
-        f.read(&mut data).unwrap();
-    }
-    let elapsed = now.elapsed();
-    return elapsed;
-}
-
-/// Reads 1 byte from the start of file
-fn sequential_read_latency(path: String) -> Duration {
-    let mut f: File = File::open(path).unwrap();
-    let mut data: [u8; 1] = [0u8; 1];
-    let now = Instant::now();
-    f.read(&mut data).unwrap();
-    let elapsed = now.elapsed();
-    return elapsed;
-}
-
-/// Reads 1 GiB from the file at `path` in random 1 MiB chunks
-fn random_read(path: String) -> Duration {
-    let mut rng = XorShiftRng::seed_from_u64(9198675309);
-    let f: File = File::open(path).unwrap();
-    let size = f.metadata().unwrap().len();
-
-    let mut data: [u8; 1048576] = [0u8; 1048576];
-    let now = Instant::now();
-    for _ in 0..1024 {
-        let offset = rng.gen_range(0..((size - 1048576) / 1048576));
-        f.read_at(&mut data, offset).unwrap();
-    }
-    let elapsed = now.elapsed();
-    return elapsed;
-}
-
-/// Reads 1 random byte from the file at `path` 1024 times
-fn random_read_latency(path: String) -> Duration {
-    let mut rng = XorShiftRng::seed_from_u64(9198675309);
-    let f: File = File::open(path).unwrap();
-    let size = f.metadata().unwrap().len();
-    let mut data: [u8; 1] = [0u8; 1];
-    let now = Instant::now();
-    for _ in 0..1024 {
-        let offset = rng.gen_range(0..(size - 1));
-        f.read_at(&mut data, offset).unwrap();
-    }
-    let elapsed = now.elapsed();
-    return elapsed;
-}
-
-fn bulk_sequential_read(path: String) -> Vec<Duration> {
-    let mut data: [u8; 1024] = [0u8; 1024];
-    let mut times: Vec<Duration> = Vec::new();
-    for i in 1..1025 {
-        let mut f: File = File::open(format!("{path}/{i}")).unwrap();
-        let now = Instant::now();
-        f.read(&mut data).unwrap();
-        let elapsed = now.elapsed();
-        times.push(elapsed);
-    }
-
-    return times;
-}
-
-fn bulk_sequential_read_latency(path: String) -> Vec<Duration> {
-    let mut data: [u8; 1] = [0u8; 1];
-    let mut times: Vec<Duration> = Vec::new();
-    for i in 1..1025 {
-        let now = Instant::now();
-        let mut f: File = File::open(format!("{path}/{i}")).unwrap();
-        f.read(&mut data).unwrap();
-        let elapsed = now.elapsed();
-        times.push(elapsed);
-    }
-
-    return times;
-}
-
-fn bulk_random_read_latency(path: String) -> Vec<Duration> {
-    let mut rng = XorShiftRng::seed_from_u64(9198675309);
-    let mut data: [u8; 1] = [0u8; 1];
-    let mut times: Vec<Duration> = Vec::new();
-    for i in 1..1025 {
-        let mut f: File = File::open(format!("{path}/{i}")).unwrap();
-        let offset = rng.gen_range(0..1023);
-        let now = Instant::now();
-        f.read_at(&mut data, offset).unwrap();
-        let elapsed = now.elapsed();
-        times.push(elapsed);
-    }
-
-    return times;
-}
-
-fn benchmark() {
-    let mut recorder = csv::Writer::from_path("data/benchmark-data.csv").unwrap();
-    let mut bulk_recorder = csv::Writer::from_path("data/bulk.csv").unwrap();
-    let mountpoint_dir = "data/mountpoints";
-    let mut filesystems = std::fs::read_dir(mountpoint_dir)
-        .unwrap()
-        .map(|item| {
-            let tmp = item.unwrap().file_name().into_string().unwrap();
-            format!("{mountpoint_dir}/{tmp}")
-        })
-        .collect::<Vec<String>>();
-
-    filesystems.push("data/datasets".to_string());
-
-    for fs in filesystems {
-        let single_files = vec![
-            "25G-null.bin".to_string(),
-            "25G-random.bin".to_string(),
-            "100M-polygon.txt".to_string(),
-            "kernel/linux-6.6.58.tar.xz".to_string(),
-        ];
-        let bulk_files: Vec<String> = vec![
-            "small-files/null".to_string(),
-            "small-files/random".to_string(),
-        ];
-
-        for filename in single_files {
-            let path = format!("{fs}/{filename}");
-            println!("=== {} ===", path.clone());
-
-            let seq_read = format!("{:.5?}", sequential_read(path.clone()));
-            println!("Sequential read (complete file read): {}", seq_read.clone());
-
-            let seq_latency = format!("{:.5?}", sequential_read_latency(path.clone()));
-            println!("Sequential latency (1 byte read): {}", seq_latency);
-
-            let rand_read = format!("{:.5?}", random_read(path.clone()));
-            println!("Random read (1024x 1 MiB): {}", rand_read);
-
-            let mut rand_latency: String = "0s".to_string();
-            if fs != "data/mountpoints/fuse-archive-tar" {
-                rand_latency = format!("{:.5?}", random_read_latency(path.clone()));
-            }
-
-            println!("Random latency (1024x 1 byte read): {}", rand_latency);
-
-            let data: Vec<String> = vec![
-                fs.clone(),
-                filename,
-                seq_read,
-                seq_latency,
-                rand_read,
-                rand_latency,
-            ];
-            recorder.write_record(data).unwrap();
-            println!();
-        }
-
-        // bulk files
-        for folder in bulk_files {
-            let cloned = fs.clone();
-            let path = format!("{cloned}/{folder}");
-            println!("[bulk] Testing {}", path);
-            let dataset_info: Vec<String> = vec![fs.clone(), folder];
-
-            let mut times = _vec_duration_to_string(bulk_sequential_read(path.clone()));
-            let mut tmp = Vec::new();
-            dataset_info.clone_into(&mut tmp);
-            tmp.push("bulk_sequential_read".to_string());
-            tmp.append(&mut times);
-            bulk_recorder.write_record(tmp).unwrap();
-
-            times = _vec_duration_to_string(bulk_sequential_read_latency(path.clone()));
-            tmp = Vec::new();
-            dataset_info.clone_into(&mut tmp);
-            tmp.push("bulk_sequential_read_latency".to_string());
-            tmp.append(&mut times);
-            bulk_recorder.write_record(tmp).unwrap();
-
-            // not enough data in these files to warrant bulk_random_read()
-            //bulk_recorder.write_record(_vec_duration_to_string(bulk_random_read(path.clone()))).unwrap();
-            times = _vec_duration_to_string(bulk_random_read_latency(path.clone()));
-            tmp = Vec::new();
-            dataset_info.clone_into(&mut tmp);
-            tmp.push("bulk_random_read_latency".to_string());
-            tmp.append(&mut times);
-            bulk_recorder.write_record(tmp).unwrap();
-        }
-        println!("\n=== === === === === === === === === === ===\n")
-    }
-}
+use clap::Parser;
+use disk_read_benchmark::benchmarks::benchmark;
+use disk_read_benchmark::cli::*;
+use disk_read_benchmark::dataset_gathering::*;

 fn main() {
-    grab_datasets().unwrap();
-    prep_other_dirs();
-    benchmark();
-}
+    let cli = Cli::parse();

-fn _vec_duration_to_string(
-    vector_committing_crimes_with_both_direction_and_magnitude: Vec<Duration>,
-) -> Vec<String> {
-    return vector_committing_crimes_with_both_direction_and_magnitude
-        .iter()
-        .map(|item| format!("{:.5?}", item))
-        .collect::<Vec<String>>();
+    match cli.command {
+        Commands::PrepDirs => {
+            // FIXME: okay i'm dumb, this only covers stuff that's not handled by grab_datasets(), and literally nothing creates ext-workdir
+            prep_other_dirs();
+        }
+        Commands::GrabData => {
+            grab_datasets().unwrap(); // * should unwrap
+        }
+        Commands::Benchmark => {
+            benchmark();
+        }
+        Commands::Run => {
+            prep_other_dirs();
+            grab_datasets().unwrap(); // * should unwrap
+            benchmark();
+        }
+    }
 }