me when actual cli:

2024-11-05 15:25:38 -06:00 · 2024-11-05 15:25:38 -06:00 · 39c2378e72
commit 39c2378e72
parent 30117dde4b
7 changed files with 482 additions and 436 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -4,9 +4,9 @@ version = 3
 [[package]]
 name = "anstream"
-version = "0.6.17"
+version = "0.6.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "23a1e53f0f5d86382dafe1cf314783b2044280f406e7e1506368220ad11b1338"
+checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b"
 dependencies = [
 "anstyle",
 "anstyle-parse",
@ -19,9 +19,9 @@ dependencies = [
 [[package]]
 name = "anstyle"
-version = "1.0.9"
+version = "1.0.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8365de52b16c035ff4fcafe0092ba9390540e3e352870ac09933bebcaa2c8c56"
+checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9"
 [[package]]
 name = "anstyle-parse"
@ -59,9 +59,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
 [[package]]
 name = "cc"
-version = "1.1.31"
+version = "1.1.35"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c2e7962b54006dcfcc61cb72735f4d89bb97061dd6a7ed882ec6b8ee53714c6f"
+checksum = "0f57c4b4da2a9d619dd035f27316d7a426305b75be93d09e92f2b9229c34feaf"
 dependencies = [
 "shlex",
 ]
@ -171,7 +171,7 @@ dependencies = [
 [[package]]
 name = "disk-read-benchmark"
-version = "0.1.0"
+version = "0.2.0"
 dependencies = [
 "clap",
 "csv",
@ -382,9 +382,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
 [[package]]
 name = "syn"
-version = "2.0.85"
+version = "2.0.87"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5023162dfcd14ef8f32034d8bcd4cc5ddc61ef7a247c024a33e24e1f24d21b56"
+checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d"
 dependencies = [
 "proc-macro2",
 "quote",
--- a/Cargo.toml
+++ b/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "disk-read-benchmark"
-version = "0.1.0"
+version = "0.2.0"
 edition = "2021"
 [dependencies]
--- a/src/benchmarks.rs
+++ b/src/benchmarks.rs
@ -0,0 +1,214 @@
 use rand::{self, Rng, SeedableRng};
 use rand_xorshift::XorShiftRng;
 use std::io::Read;
 use std::time::{Duration, Instant};
 use std::{fs::File, os::unix::fs::FileExt};
 /*
    =================================================================
    ====                                                         ====
    ====                     ↓ BENCHMARKS ↓                      ====
    ====                                                         ====
    =================================================================
 */
 pub fn sequential_read(path: String) -> Duration {
    let mut f: File = File::open(path).unwrap();
    let size = f.metadata().unwrap().len();
    let mut data: [u8; 1310720] = [0u8; 1310720];
    // benchmarking/elapsed: https://stackoverflow.com/a/40953863
    let now = Instant::now();
    for _ in 0..(size / 1310720) {
        f.read(&mut data).unwrap();
    }
    let elapsed = now.elapsed();
    return elapsed;
 }
 /// Reads 1 byte from the start of file
 pub fn sequential_read_latency(path: String) -> Duration {
    let mut f: File = File::open(path).unwrap();
    let mut data: [u8; 1] = [0u8; 1];
    let now = Instant::now();
    f.read(&mut data).unwrap();
    let elapsed = now.elapsed();
    return elapsed;
 }
 /// Reads 1 GiB from the file at `path` in random 1 MiB chunks
 pub fn random_read(path: String) -> Duration {
    let mut rng = XorShiftRng::seed_from_u64(9198675309);
    let f: File = File::open(path).unwrap();
    let size = f.metadata().unwrap().len();
    let mut data: [u8; 1048576] = [0u8; 1048576];
    let now = Instant::now();
    for _ in 0..1024 {
        let offset = rng.gen_range(0..((size - 1048576) / 1048576));
        f.read_at(&mut data, offset).unwrap();
    }
    let elapsed = now.elapsed();
    return elapsed;
 }
 /// Reads 1 random byte from the file at `path` 1024 times
 pub fn random_read_latency(path: String) -> Duration {
    let mut rng = XorShiftRng::seed_from_u64(9198675309);
    let f: File = File::open(path).unwrap();
    let size = f.metadata().unwrap().len();
    let mut data: [u8; 1] = [0u8; 1];
    let now = Instant::now();
    for _ in 0..1024 {
        let offset = rng.gen_range(0..(size - 1));
        f.read_at(&mut data, offset).unwrap();
    }
    let elapsed = now.elapsed();
    return elapsed;
 }
 pub fn bulk_sequential_read(path: String) -> Vec<Duration> {
    let mut data: [u8; 1024] = [0u8; 1024];
    let mut times: Vec<Duration> = Vec::new();
    for i in 1..1025 {
        let mut f: File = File::open(format!("{path}/{i}")).unwrap();
        let now = Instant::now();
        f.read(&mut data).unwrap();
        let elapsed = now.elapsed();
        times.push(elapsed);
    }
    return times;
 }
 pub fn bulk_sequential_read_latency(path: String) -> Vec<Duration> {
    let mut data: [u8; 1] = [0u8; 1];
    let mut times: Vec<Duration> = Vec::new();
    for i in 1..1025 {
        let now = Instant::now();
        let mut f: File = File::open(format!("{path}/{i}")).unwrap();
        f.read(&mut data).unwrap();
        let elapsed = now.elapsed();
        times.push(elapsed);
    }
    return times;
 }
 pub fn bulk_random_read_latency(path: String) -> Vec<Duration> {
    let mut rng = XorShiftRng::seed_from_u64(9198675309);
    let mut data: [u8; 1] = [0u8; 1];
    let mut times: Vec<Duration> = Vec::new();
    for i in 1..1025 {
        let f: File = File::open(format!("{path}/{i}")).unwrap();
        let offset = rng.gen_range(0..1023);
        let now = Instant::now();
        f.read_at(&mut data, offset).unwrap();
        let elapsed = now.elapsed();
        times.push(elapsed);
    }
    return times;
 }
 pub fn benchmark() {
    let mut recorder = csv::Writer::from_path("data/benchmark-data.csv").unwrap();
    let mut bulk_recorder = csv::Writer::from_path("data/bulk.csv").unwrap();
    let mountpoint_dir = "data/mountpoints";
    let mut filesystems = std::fs::read_dir(mountpoint_dir)
        .unwrap()
        .map(|item| {
            let tmp = item.unwrap().file_name().into_string().unwrap();
            format!("{mountpoint_dir}/{tmp}")
        })
        .collect::<Vec<String>>();
    filesystems.push("data/datasets".to_string());
    for fs in filesystems {
        let single_files = vec![
            "25G-null.bin".to_string(),
            "25G-random.bin".to_string(),
            "100M-polygon.txt".to_string(),
            "kernel/linux-6.6.58.tar.xz".to_string(),
        ];
        let bulk_files: Vec<String> = vec![
            "small-files/null".to_string(),
            "small-files/random".to_string(),
        ];
        for filename in single_files {
            let path = format!("{fs}/{filename}");
            println!("=== {} ===", path.clone());
            let seq_read = format!("{:.5?}", sequential_read(path.clone()));
            println!("Sequential read (complete file read): {}", seq_read.clone());
            let seq_latency = format!("{:.5?}", sequential_read_latency(path.clone()));
            println!("Sequential latency (1 byte read): {}", seq_latency);
            let rand_read = format!("{:.5?}", random_read(path.clone()));
            println!("Random read (1024x 1 MiB): {}", rand_read);
            let mut rand_latency: String = "0s".to_string();
            if fs != "data/mountpoints/fuse-archive-tar" {
                rand_latency = format!("{:.5?}", random_read_latency(path.clone()));
            }
            println!("Random latency (1024x 1 byte read): {}", rand_latency);
            let data: Vec<String> = vec![
                fs.clone(),
                filename,
                seq_read,
                seq_latency,
                rand_read,
                rand_latency,
            ];
            recorder.write_record(data).unwrap();
            println!();
        }
        // bulk files
        for folder in bulk_files {
            let cloned = fs.clone();
            let path = format!("{cloned}/{folder}");
            println!("[bulk] Testing {}", path);
            let dataset_info: Vec<String> = vec![fs.clone(), folder];
            let mut times = _vec_duration_to_string(bulk_sequential_read(path.clone()));
            let mut tmp = Vec::new();
            dataset_info.clone_into(&mut tmp);
            tmp.push("bulk_sequential_read".to_string());
            tmp.append(&mut times);
            bulk_recorder.write_record(tmp).unwrap();
            times = _vec_duration_to_string(bulk_sequential_read_latency(path.clone()));
            tmp = Vec::new();
            dataset_info.clone_into(&mut tmp);
            tmp.push("bulk_sequential_read_latency".to_string());
            tmp.append(&mut times);
            bulk_recorder.write_record(tmp).unwrap();
            // not enough data in these files to warrant bulk_random_read()
            //bulk_recorder.write_record(_vec_duration_to_string(bulk_random_read(path.clone()))).unwrap();
            times = _vec_duration_to_string(bulk_random_read_latency(path.clone()));
            tmp = Vec::new();
            dataset_info.clone_into(&mut tmp);
            tmp.push("bulk_random_read_latency".to_string());
            tmp.append(&mut times);
            bulk_recorder.write_record(tmp).unwrap();
        }
        println!("\n=== === === === === === === === === === ===\n")
    }
 }
 pub fn _vec_duration_to_string(
    vector_committing_crimes_with_both_direction_and_magnitude: Vec<Duration>,
 ) -> Vec<String> {
    return vector_committing_crimes_with_both_direction_and_magnitude
        .iter()
        .map(|item| format!("{:.5?}", item))
        .collect::<Vec<String>>();
 }
--- a/src/cli.rs
+++ b/src/cli.rs
@ -0,0 +1,20 @@
 use clap::{Parser, Subcommand};
 #[derive(Parser)]
 #[command(version, about, long_about = None)]
 pub struct Cli {
    #[command(subcommand)]
    pub command: Commands,
 }
 #[derive(Subcommand)]
 pub enum Commands {
    ///Grabs the datasets used for benchmarking
    GrabData,
    ///Runs the benchmark
    Benchmark,
    ///Prepares the directories so other programs can prepare their datasets
    PrepDirs,
    ///Runs it all
    Run,
 }
--- a/src/dataset_gathering.rs
+++ b/src/dataset_gathering.rs
@ -0,0 +1,213 @@
 use curl::easy::Easy as easy_curl;
 use rand::{self, RngCore, SeedableRng};
 use rand_xorshift::XorShiftRng;
 use std::{
    env::current_dir,
    fs::{create_dir_all, exists, remove_dir_all, remove_file, File},
    io::{Error, Write},
    os::unix::fs::FileExt,
    process::Command,
    sync::{Arc, Mutex},
    thread::{self, JoinHandle},
 };
 /*
 ===================
    ====                                                         ====
    ====                 ↓ DATASET GATHERING ↓                   ====
    ====                                                         ====
    =================================================================
 */
 pub fn large_random_file_generation(path: String) {
    // https://stackoverflow.com/a/65235966
    let out = Arc::new(Mutex::new(File::create(path)));
    // NOTE: update this both here and in the helper (_large_random_file_generation_helper())
    let num_threads: u64 = 12;
    let mut threads: Vec<JoinHandle<()>> = Vec::new();
    for i in 0..num_threads {
        let out = Arc::clone(&out);
        let thread = thread::spawn(move || {
            _large_random_file_generation_helper(&i, out);
        });
        threads.push(thread);
    }
    for t in threads {
        t.join().unwrap();
    }
 }
 pub fn _large_random_file_generation_helper(i: &u64, out: Arc<Mutex<Result<File, Error>>>) {
    let mut rng = XorShiftRng::seed_from_u64(2484345508);
    // NOTE: update this both here and in `large_random_file_generation()`
    let num_threads = 12;
    let mut data = [0u8; 1310720];
    let block_size = 1310720;
    // enter desired size in bytes, must be a multiple of 655360
    // this is not a typo, the extra zero after 65536is for the threads
    // 26843545600 = 25 GiB
    let blocks_per_thread: u64 = 26843545600 / (block_size * num_threads);
    for u in (i * blocks_per_thread)..((i + 1) * blocks_per_thread) {
        rng.fill_bytes(&mut data);
        //let offset: u64 = (i * blocks_per_thread * 1310720) + (1310720 * u);
        let offset: u64 = u * block_size;
        let mut out = out.lock().unwrap();
        out.as_mut().unwrap().write_all_at(&data, offset).unwrap();
    }
 }
 /*
 pub fn single_threaded_large_random_file_generation(path: String) {
    let mut out = File::create(path).unwrap();
    let mut rng = XorShiftRng::seed_from_u64(2484345508);
    let mut data = [0u8; 65536];
    for _ in 0..409600 {
        rng.fill_bytes(&mut data);
        out.write_all(&data).unwrap();
    }
 }
 */
 pub fn small_random_files_generation(folder: String) {
    let mut rng = XorShiftRng::seed_from_u64(2484345508);
    let mut data: [u8; 1024] = [0u8; 1024];
    for i in 1..1025 {
        let mut out = File::create(format!("{folder}/{i}")).unwrap();
        rng.fill_bytes(&mut data);
        out.write_all(&data).unwrap();
    }
 }
 pub fn random_file_generator(path: String, size_mib: u64) {
    let mut out = File::create(path).unwrap();
    let mut rng = XorShiftRng::seed_from_u64(2484345508);
    let mut data = [0u8; 1310720];
    let block_size = 1310720;
    let blocks: u64 = (size_mib * 1024 * 1024) / block_size;
    for _ in 0..blocks {
        rng.fill_bytes(&mut data);
        out.write_all(&data).unwrap();
    }
 }
 pub fn create_null_file(path: String, size: u64) {
    let out = File::create(path).unwrap();
    out.write_all_at(&[0], size - 1).unwrap();
 }
 // no reason for it not to be multithreaded, but there's not much point either, it hardly takes any time... if anything, the overhead from multithreading might be worse?
 pub fn small_null_files_generation(folder: String) {
    for i in 1..1025 {
        create_null_file(format!("{folder}/{i}"), 1024);
    }
 }
 pub fn grab_kernel(folder: String, kernel_version: String) -> Result<bool, String> {
    // maybe i should've just used reqwest, but that's no fun (also much more dependencies and stuff i'm sure)
    // NOTE: requires openssl-devel to be installed for compilation (presumably requires openssl-libs for execution)
    if !(exists(format!("{folder}/linux-{kernel_version}.tar.xz")).unwrap()) {
        let mut curl = easy_curl::new();
        curl.url(&format!(
            "https://cdn.kernel.org/pub/linux/kernel/v6.x/linux-{kernel_version}.tar.xz"
        ))
        .unwrap();
        curl.follow_location(true).unwrap();
        let mut out = File::create(format!("{folder}/linux-{kernel_version}.tar.xz")).unwrap();
        match curl.write_function(move |data| {
            out.write_all(data).unwrap();
            Ok(data.len())
        }) {
            Ok(_) => (),
            Err(e) => return Err(e.to_string()),
        }
        curl.perform().unwrap();
    }
    // i'm too lazy to do this in rust
    if !(exists(format!("{folder}/linux-{kernel_version}")).unwrap()) {
        let mut dir = current_dir().unwrap();
        dir.push(folder);
        match Command::new("tar")
            .current_dir(dir)
            .arg("-xf")
            .arg(&format!("linux-{kernel_version}.tar.xz"))
            .arg("")
            .output()
        {
            Ok(_) => (),
            Err(e) => return Err(e.to_string()),
        }
    }
    return Ok(true);
 }
 pub fn grab_datasets() -> Result<bool, String> {
    let kernel_version = "6.6.58";
    if !exists(format!("data/datasets/kernel/linux-{kernel_version}")).unwrap() {
        println!("Downloading kernel...");
        create_dir_all("data/datasets/kernel").unwrap();
        match grab_kernel(
            "data/datasets/kernel".to_string(),
            kernel_version.to_string(),
        ) {
            Ok(_) => (),
            Err(e) => {
                remove_dir_all(format!("data/datasets/kernel/linux-{kernel_version}")).unwrap();
                remove_file(format!(
                    "data/datasets/kernel/linux-{kernel_version}.tar.xz"
                ))
                .unwrap();
                panic!("{}", e.to_string());
            }
        }
        println!("Kernel downloaded");
    }
    if !exists(format!("data/datasets/25G-random.bin")).unwrap() {
        println!("Generating random 25 GiB file...");
        large_random_file_generation("data/datasets/25G-random.bin".to_string());
        println!("Random 25 GiB file generated");
    }
    if !exists(format!("data/datasets/small-files/random")).unwrap() {
        println!("Generating random 1 KiB files...");
        create_dir_all("data/datasets/small-files/random").unwrap();
        small_random_files_generation("data/datasets/small-files/random".to_string());
        println!("Random 1 KiB files generated...");
    }
    if !exists(format!("data/datasets/25G-null.bin")).unwrap() {
        println!("Generating null 25 GiB file...");
        create_null_file("data/datasets/25G-null.bin".to_string(), 26843545600);
        println!("Null 25 GiB file generated...");
    }
    if !exists("data/datasets/small-files/null").unwrap() {
        println!("Generating null 1 KiB files...");
        create_dir_all("data/datasets/small-files/null").unwrap();
        small_null_files_generation("data/datasets/small-files/null".to_string());
        println!("Null 1 KiB files generated...");
    }
    if !exists("data/datasets/100M-polygon.txt").unwrap() {
        return Err("*** MANUAL: Get 100M-sided regular polygon data and put it at `./data/datasets/100M-polygon.txt` ***".to_string());
    };
    return Ok(true);
 }
 pub fn prep_other_dirs() -> bool {
    if !exists("data/mountpoints").unwrap() {
        create_dir_all("data/mountpoints").unwrap();
    };
    return true;
 }
--- a/src/lib.rs
+++ b/src/lib.rs
@ -0,0 +1,3 @@
 pub mod dataset_gathering;
 pub mod benchmarks;
 pub mod cli;
--- a/src/main.rs
+++ b/src/main.rs
@ -1,430 +1,26 @@
-use curl::easy::Easy as easy_curl;
+use clap::Parser;
-use rand::{self, Rng, RngCore, SeedableRng};
+use disk_read_benchmark::benchmarks::benchmark;
-use rand_xorshift::XorShiftRng;
+use disk_read_benchmark::cli::*;
-use std::io::Read;
+use disk_read_benchmark::dataset_gathering::*;
 use std::time::{Duration, Instant};
 use std::{
    env::current_dir,
    fs::{create_dir_all, exists, remove_dir_all, remove_file, File},
    io::{Error, Write},
    os::unix::fs::FileExt,
    process::Command,
    sync::{Arc, Mutex},
    thread::{self, JoinHandle},
 };
 /*
    =================================================================
    ====                                                         ====
    ====                 ↓ DATASET GATHERING ↓                   ====
    ====                                                         ====
    =================================================================
 */
 fn large_random_file_generation(path: String) {
    // https://stackoverflow.com/a/65235966
    let out = Arc::new(Mutex::new(File::create(path)));
    // NOTE: update this both here and in the helper (_large_random_file_generation_helper())
    let num_threads: u64 = 12;
    let mut threads: Vec<JoinHandle<()>> = Vec::new();
    for i in 0..num_threads {
        let out = Arc::clone(&out);
        let thread = thread::spawn(move || {
            _large_random_file_generation_helper(&i, out);
        });
        threads.push(thread);
    }
    for t in threads {
        t.join().unwrap();
    }
 }
 fn _large_random_file_generation_helper(i: &u64, out: Arc<Mutex<Result<File, Error>>>) {
    let mut rng = XorShiftRng::seed_from_u64(2484345508);
    // NOTE: update this both here and in `large_random_file_generation()`
    let num_threads = 12;
    let mut data = [0u8; 1310720];
    let block_size = 1310720;
    // enter desired size in bytes, must be a multiple of 655360
    // this is not a typo, the extra zero after 65536is for the threads
    // 26843545600 = 25 GiB
    let blocks_per_thread: u64 = 26843545600 / (block_size * num_threads);
    for u in (i * blocks_per_thread)..((i + 1) * blocks_per_thread) {
        rng.fill_bytes(&mut data);
        //let offset: u64 = (i * blocks_per_thread * 1310720) + (1310720 * u);
        let offset: u64 = u * block_size;
        let mut out = out.lock().unwrap();
        out.as_mut().unwrap().write_all_at(&data, offset).unwrap();
    }
 }
 /*
 fn single_threaded_large_random_file_generation(path: String) {
    let mut out = File::create(path).unwrap();
    let mut rng = XorShiftRng::seed_from_u64(2484345508);
    let mut data = [0u8; 65536];
    for _ in 0..409600 {
        rng.fill_bytes(&mut data);
        out.write_all(&data).unwrap();
    }
 }
 */
 fn small_random_files_generation(folder: String) {
    let mut rng = XorShiftRng::seed_from_u64(2484345508);
    let mut data: [u8; 1024] = [0u8; 1024];
    for i in 1..1025 {
        let mut out = File::create(format!("{folder}/{i}")).unwrap();
        rng.fill_bytes(&mut data);
        out.write_all(&data).unwrap();
    }
 }
 fn random_file_generator(path: String, size_mib: u64) {
    let mut out = File::create(path).unwrap();
    let mut rng = XorShiftRng::seed_from_u64(2484345508);
    let mut data = [0u8; 1310720];
    let block_size = 1310720;
    let blocks: u64 = (size_mib * 1024 * 1024) / block_size;
    for _ in 0..blocks {
        rng.fill_bytes(&mut data);
        out.write_all(&data).unwrap();
    }
 }
 fn create_null_file(path: String, size: u64) {
    let out = File::create(path).unwrap();
    out.write_all_at(&[0], size - 1).unwrap();
 }
 // no reason for it not to be multithreaded, but there's not much point either, it hardly takes any time... if anything, the overhead from multithreading might be worse?
 fn small_null_files_generation(folder: String) {
    for i in 1..1025 {
        create_null_file(format!("{folder}/{i}"), 1024);
    }
 }
 fn grab_kernel(folder: String, kernel_version: String) -> Result<bool, String> {
    // maybe i should've just used reqwest, but that's no fun (also much more dependencies and stuff i'm sure)
    // NOTE: requires openssl-devel to be installed for compilation (presumably requires openssl-libs for execution)
    if !(exists(format!("{folder}/linux-{kernel_version}.tar.xz")).unwrap()) {
        let mut curl = easy_curl::new();
        curl.url(&format!(
            "https://cdn.kernel.org/pub/linux/kernel/v6.x/linux-{kernel_version}.tar.xz"
        ))
        .unwrap();
        curl.follow_location(true).unwrap();
        let mut out = File::create(format!("{folder}/linux-{kernel_version}.tar.xz")).unwrap();
        match curl.write_function(move |data| {
            out.write_all(data).unwrap();
            Ok(data.len())
        }) {
            Ok(_) => (),
            Err(e) => return Err(e.to_string()),
        }
        curl.perform().unwrap();
    }
    // i'm too lazy to do this in rust
    if !(exists(format!("{folder}/linux-{kernel_version}")).unwrap()) {
        let mut dir = current_dir().unwrap();
        dir.push(folder);
        match Command::new("tar")
            .current_dir(dir)
            .arg("-xf")
            .arg(&format!("linux-{kernel_version}.tar.xz"))
            .arg("")
            .output()
        {
            Ok(_) => (),
            Err(e) => return Err(e.to_string()),
        }
    }
    return Ok(true);
 }
 fn grab_datasets() -> Result<bool, String> {
    let kernel_version = "6.6.58";
    if !exists(format!("data/datasets/kernel/linux-{kernel_version}")).unwrap() {
        println!("Downloading kernel...");
        create_dir_all("data/datasets/kernel").unwrap();
        match grab_kernel(
            "data/datasets/kernel".to_string(),
            kernel_version.to_string(),
        ) {
            Ok(_) => (),
            Err(e) => {
                remove_dir_all(format!("data/datasets/kernel/linux-{kernel_version}")).unwrap();
                remove_file(format!(
                    "data/datasets/kernel/linux-{kernel_version}.tar.xz"
                ))
                .unwrap();
                panic!("{}", e.to_string());
            }
        }
        println!("Kernel downloaded");
    }
    if !exists(format!("data/datasets/25G-random.bin")).unwrap() {
        println!("Generating random 25 GiB file...");
        large_random_file_generation("data/datasets/25G-random.bin".to_string());
        println!("Random 25 GiB file generated");
    }
    if !exists(format!("data/datasets/small-files/random")).unwrap() {
        println!("Generating random 1 KiB files...");
        create_dir_all("data/datasets/small-files/random").unwrap();
        small_random_files_generation("data/datasets/small-files/random".to_string());
        println!("Random 1 KiB files generated...");
    }
    if !exists(format!("data/datasets/25G-null.bin")).unwrap() {
        println!("Generating null 25 GiB file...");
        create_null_file("data/datasets/25G-null.bin".to_string(), 26843545600);
        println!("Null 25 GiB file generated...");
    }
    if !exists("data/datasets/small-files/null").unwrap() {
        println!("Generating null 1 KiB files...");
        create_dir_all("data/datasets/small-files/null").unwrap();
        small_null_files_generation("data/datasets/small-files/null".to_string());
        println!("Null 1 KiB files generated...");
    }
    if !exists("data/datasets/100M-polygon.txt").unwrap() {
        return Err("*** MANUAL: Get 100M-sided regular polygon data and put it at `./data/datasets/100M-polygon.txt` ***".to_string());
    };
    return Ok(true);
 }
 fn prep_other_dirs() -> bool {
    if !exists("data/mountpoints").unwrap() {
        create_dir_all("data/mountpoints").unwrap();
    };
    return true;
 }
 /*
    =================================================================
    ====                                                         ====
    ====                     ↓ BENCHMARKS ↓                      ====
    ====                                                         ====
    =================================================================
 */
 fn sequential_read(path: String) -> Duration {
    let mut f: File = File::open(path).unwrap();
    let size = f.metadata().unwrap().len();
    let mut data: [u8; 1310720] = [0u8; 1310720];
    // benchmarking/elapsed: https://stackoverflow.com/a/40953863
    let now = Instant::now();
    for _ in 0..(size / 1310720) {
        f.read(&mut data).unwrap();
    }
    let elapsed = now.elapsed();
    return elapsed;
 }
 /// Reads 1 byte from the start of file
 fn sequential_read_latency(path: String) -> Duration {
    let mut f: File = File::open(path).unwrap();
    let mut data: [u8; 1] = [0u8; 1];
    let now = Instant::now();
    f.read(&mut data).unwrap();
    let elapsed = now.elapsed();
    return elapsed;
 }
 /// Reads 1 GiB from the file at `path` in random 1 MiB chunks
 fn random_read(path: String) -> Duration {
    let mut rng = XorShiftRng::seed_from_u64(9198675309);
    let f: File = File::open(path).unwrap();
    let size = f.metadata().unwrap().len();
    let mut data: [u8; 1048576] = [0u8; 1048576];
    let now = Instant::now();
    for _ in 0..1024 {
        let offset = rng.gen_range(0..((size - 1048576) / 1048576));
        f.read_at(&mut data, offset).unwrap();
    }
    let elapsed = now.elapsed();
    return elapsed;
 }
 /// Reads 1 random byte from the file at `path` 1024 times
 fn random_read_latency(path: String) -> Duration {
    let mut rng = XorShiftRng::seed_from_u64(9198675309);
    let f: File = File::open(path).unwrap();
    let size = f.metadata().unwrap().len();
    let mut data: [u8; 1] = [0u8; 1];
    let now = Instant::now();
    for _ in 0..1024 {
        let offset = rng.gen_range(0..(size - 1));
        f.read_at(&mut data, offset).unwrap();
    }
    let elapsed = now.elapsed();
    return elapsed;
 }
 fn bulk_sequential_read(path: String) -> Vec<Duration> {
    let mut data: [u8; 1024] = [0u8; 1024];
    let mut times: Vec<Duration> = Vec::new();
    for i in 1..1025 {
        let mut f: File = File::open(format!("{path}/{i}")).unwrap();
        let now = Instant::now();
        f.read(&mut data).unwrap();
        let elapsed = now.elapsed();
        times.push(elapsed);
    }
    return times;
 }
 fn bulk_sequential_read_latency(path: String) -> Vec<Duration> {
    let mut data: [u8; 1] = [0u8; 1];
    let mut times: Vec<Duration> = Vec::new();
    for i in 1..1025 {
        let now = Instant::now();
        let mut f: File = File::open(format!("{path}/{i}")).unwrap();
        f.read(&mut data).unwrap();
        let elapsed = now.elapsed();
        times.push(elapsed);
    }
    return times;
 }
 fn bulk_random_read_latency(path: String) -> Vec<Duration> {
    let mut rng = XorShiftRng::seed_from_u64(9198675309);
    let mut data: [u8; 1] = [0u8; 1];
    let mut times: Vec<Duration> = Vec::new();
    for i in 1..1025 {
        let mut f: File = File::open(format!("{path}/{i}")).unwrap();
        let offset = rng.gen_range(0..1023);
        let now = Instant::now();
        f.read_at(&mut data, offset).unwrap();
        let elapsed = now.elapsed();
        times.push(elapsed);
    }
    return times;
 }
 fn benchmark() {
    let mut recorder = csv::Writer::from_path("data/benchmark-data.csv").unwrap();
    let mut bulk_recorder = csv::Writer::from_path("data/bulk.csv").unwrap();
    let mountpoint_dir = "data/mountpoints";
    let mut filesystems = std::fs::read_dir(mountpoint_dir)
        .unwrap()
        .map(|item| {
            let tmp = item.unwrap().file_name().into_string().unwrap();
            format!("{mountpoint_dir}/{tmp}")
        })
        .collect::<Vec<String>>();
    filesystems.push("data/datasets".to_string());
    for fs in filesystems {
        let single_files = vec![
            "25G-null.bin".to_string(),
            "25G-random.bin".to_string(),
            "100M-polygon.txt".to_string(),
            "kernel/linux-6.6.58.tar.xz".to_string(),
        ];
        let bulk_files: Vec<String> = vec![
            "small-files/null".to_string(),
            "small-files/random".to_string(),
        ];
        for filename in single_files {
            let path = format!("{fs}/{filename}");
            println!("=== {} ===", path.clone());
            let seq_read = format!("{:.5?}", sequential_read(path.clone()));
            println!("Sequential read (complete file read): {}", seq_read.clone());
            let seq_latency = format!("{:.5?}", sequential_read_latency(path.clone()));
            println!("Sequential latency (1 byte read): {}", seq_latency);
            let rand_read = format!("{:.5?}", random_read(path.clone()));
            println!("Random read (1024x 1 MiB): {}", rand_read);
            let mut rand_latency: String = "0s".to_string();
            if fs != "data/mountpoints/fuse-archive-tar" {
                rand_latency = format!("{:.5?}", random_read_latency(path.clone()));
            }
            println!("Random latency (1024x 1 byte read): {}", rand_latency);
            let data: Vec<String> = vec![
                fs.clone(),
                filename,
                seq_read,
                seq_latency,
                rand_read,
                rand_latency,
            ];
            recorder.write_record(data).unwrap();
            println!();
        }
        // bulk files
        for folder in bulk_files {
            let cloned = fs.clone();
            let path = format!("{cloned}/{folder}");
            println!("[bulk] Testing {}", path);
            let dataset_info: Vec<String> = vec![fs.clone(), folder];
            let mut times = _vec_duration_to_string(bulk_sequential_read(path.clone()));
            let mut tmp = Vec::new();
            dataset_info.clone_into(&mut tmp);
            tmp.push("bulk_sequential_read".to_string());
            tmp.append(&mut times);
            bulk_recorder.write_record(tmp).unwrap();
            times = _vec_duration_to_string(bulk_sequential_read_latency(path.clone()));
            tmp = Vec::new();
            dataset_info.clone_into(&mut tmp);
            tmp.push("bulk_sequential_read_latency".to_string());
            tmp.append(&mut times);
            bulk_recorder.write_record(tmp).unwrap();
            // not enough data in these files to warrant bulk_random_read()
            //bulk_recorder.write_record(_vec_duration_to_string(bulk_random_read(path.clone()))).unwrap();
            times = _vec_duration_to_string(bulk_random_read_latency(path.clone()));
            tmp = Vec::new();
            dataset_info.clone_into(&mut tmp);
            tmp.push("bulk_random_read_latency".to_string());
            tmp.append(&mut times);
            bulk_recorder.write_record(tmp).unwrap();
        }
        println!("\n=== === === === === === === === === === ===\n")
    }
 }
 fn main() {
-    grab_datasets().unwrap();
+    let cli = Cli::parse();
    match cli.command {
        Commands::PrepDirs => {
            // FIXME: okay i'm dumb, this only covers stuff that's not handled by grab_datasets(), and literally nothing creates ext-workdir
            prep_other_dirs();
        }
        Commands::GrabData => {
            grab_datasets().unwrap(); // * should unwrap
        }
        Commands::Benchmark => {
            benchmark();
        }
-
+        Commands::Run => {
-fn _vec_duration_to_string(
+            prep_other_dirs();
-    vector_committing_crimes_with_both_direction_and_magnitude: Vec<Duration>,
+            grab_datasets().unwrap(); // * should unwrap
-) -> Vec<String> {
+            benchmark();
-    return vector_committing_crimes_with_both_direction_and_magnitude
+        }
-        .iter()
+    }
        .map(|item| format!("{:.5?}", item))
        .collect::<Vec<String>>();
 }