me when actual cli:
This commit is contained in:
parent
30117dde4b
commit
39c2378e72
7 changed files with 482 additions and 436 deletions
18
Cargo.lock
generated
18
Cargo.lock
generated
|
@ -4,9 +4,9 @@ version = 3
|
|||
|
||||
[[package]]
|
||||
name = "anstream"
|
||||
version = "0.6.17"
|
||||
version = "0.6.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "23a1e53f0f5d86382dafe1cf314783b2044280f406e7e1506368220ad11b1338"
|
||||
checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b"
|
||||
dependencies = [
|
||||
"anstyle",
|
||||
"anstyle-parse",
|
||||
|
@ -19,9 +19,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "anstyle"
|
||||
version = "1.0.9"
|
||||
version = "1.0.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8365de52b16c035ff4fcafe0092ba9390540e3e352870ac09933bebcaa2c8c56"
|
||||
checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9"
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-parse"
|
||||
|
@ -59,9 +59,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
|
|||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.1.31"
|
||||
version = "1.1.35"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c2e7962b54006dcfcc61cb72735f4d89bb97061dd6a7ed882ec6b8ee53714c6f"
|
||||
checksum = "0f57c4b4da2a9d619dd035f27316d7a426305b75be93d09e92f2b9229c34feaf"
|
||||
dependencies = [
|
||||
"shlex",
|
||||
]
|
||||
|
@ -171,7 +171,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "disk-read-benchmark"
|
||||
version = "0.1.0"
|
||||
version = "0.2.0"
|
||||
dependencies = [
|
||||
"clap",
|
||||
"csv",
|
||||
|
@ -382,9 +382,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
|
|||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.85"
|
||||
version = "2.0.87"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5023162dfcd14ef8f32034d8bcd4cc5ddc61ef7a247c024a33e24e1f24d21b56"
|
||||
checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "disk-read-benchmark"
|
||||
version = "0.1.0"
|
||||
version = "0.2.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
|
|
214
src/benchmarks.rs
Normal file
214
src/benchmarks.rs
Normal file
|
@ -0,0 +1,214 @@
|
|||
use rand::{self, Rng, SeedableRng};
|
||||
use rand_xorshift::XorShiftRng;
|
||||
use std::io::Read;
|
||||
use std::time::{Duration, Instant};
|
||||
use std::{fs::File, os::unix::fs::FileExt};
|
||||
|
||||
|
||||
/*
|
||||
=================================================================
|
||||
==== ====
|
||||
==== ↓ BENCHMARKS ↓ ====
|
||||
==== ====
|
||||
=================================================================
|
||||
*/
|
||||
|
||||
pub fn sequential_read(path: String) -> Duration {
|
||||
let mut f: File = File::open(path).unwrap();
|
||||
let size = f.metadata().unwrap().len();
|
||||
|
||||
let mut data: [u8; 1310720] = [0u8; 1310720];
|
||||
// benchmarking/elapsed: https://stackoverflow.com/a/40953863
|
||||
let now = Instant::now();
|
||||
for _ in 0..(size / 1310720) {
|
||||
f.read(&mut data).unwrap();
|
||||
}
|
||||
let elapsed = now.elapsed();
|
||||
return elapsed;
|
||||
}
|
||||
|
||||
/// Reads 1 byte from the start of file
|
||||
pub fn sequential_read_latency(path: String) -> Duration {
|
||||
let mut f: File = File::open(path).unwrap();
|
||||
let mut data: [u8; 1] = [0u8; 1];
|
||||
let now = Instant::now();
|
||||
f.read(&mut data).unwrap();
|
||||
let elapsed = now.elapsed();
|
||||
return elapsed;
|
||||
}
|
||||
|
||||
/// Reads 1 GiB from the file at `path` in random 1 MiB chunks
|
||||
pub fn random_read(path: String) -> Duration {
|
||||
let mut rng = XorShiftRng::seed_from_u64(9198675309);
|
||||
let f: File = File::open(path).unwrap();
|
||||
let size = f.metadata().unwrap().len();
|
||||
|
||||
let mut data: [u8; 1048576] = [0u8; 1048576];
|
||||
let now = Instant::now();
|
||||
for _ in 0..1024 {
|
||||
let offset = rng.gen_range(0..((size - 1048576) / 1048576));
|
||||
f.read_at(&mut data, offset).unwrap();
|
||||
}
|
||||
let elapsed = now.elapsed();
|
||||
return elapsed;
|
||||
}
|
||||
|
||||
/// Reads 1 random byte from the file at `path` 1024 times
|
||||
pub fn random_read_latency(path: String) -> Duration {
|
||||
let mut rng = XorShiftRng::seed_from_u64(9198675309);
|
||||
let f: File = File::open(path).unwrap();
|
||||
let size = f.metadata().unwrap().len();
|
||||
let mut data: [u8; 1] = [0u8; 1];
|
||||
let now = Instant::now();
|
||||
for _ in 0..1024 {
|
||||
let offset = rng.gen_range(0..(size - 1));
|
||||
f.read_at(&mut data, offset).unwrap();
|
||||
}
|
||||
let elapsed = now.elapsed();
|
||||
return elapsed;
|
||||
}
|
||||
|
||||
pub fn bulk_sequential_read(path: String) -> Vec<Duration> {
|
||||
let mut data: [u8; 1024] = [0u8; 1024];
|
||||
let mut times: Vec<Duration> = Vec::new();
|
||||
for i in 1..1025 {
|
||||
let mut f: File = File::open(format!("{path}/{i}")).unwrap();
|
||||
let now = Instant::now();
|
||||
f.read(&mut data).unwrap();
|
||||
let elapsed = now.elapsed();
|
||||
times.push(elapsed);
|
||||
}
|
||||
|
||||
return times;
|
||||
}
|
||||
|
||||
pub fn bulk_sequential_read_latency(path: String) -> Vec<Duration> {
|
||||
let mut data: [u8; 1] = [0u8; 1];
|
||||
let mut times: Vec<Duration> = Vec::new();
|
||||
for i in 1..1025 {
|
||||
let now = Instant::now();
|
||||
let mut f: File = File::open(format!("{path}/{i}")).unwrap();
|
||||
f.read(&mut data).unwrap();
|
||||
let elapsed = now.elapsed();
|
||||
times.push(elapsed);
|
||||
}
|
||||
|
||||
return times;
|
||||
}
|
||||
|
||||
pub fn bulk_random_read_latency(path: String) -> Vec<Duration> {
|
||||
let mut rng = XorShiftRng::seed_from_u64(9198675309);
|
||||
let mut data: [u8; 1] = [0u8; 1];
|
||||
let mut times: Vec<Duration> = Vec::new();
|
||||
for i in 1..1025 {
|
||||
let f: File = File::open(format!("{path}/{i}")).unwrap();
|
||||
let offset = rng.gen_range(0..1023);
|
||||
let now = Instant::now();
|
||||
f.read_at(&mut data, offset).unwrap();
|
||||
let elapsed = now.elapsed();
|
||||
times.push(elapsed);
|
||||
}
|
||||
|
||||
return times;
|
||||
}
|
||||
|
||||
pub fn benchmark() {
|
||||
let mut recorder = csv::Writer::from_path("data/benchmark-data.csv").unwrap();
|
||||
let mut bulk_recorder = csv::Writer::from_path("data/bulk.csv").unwrap();
|
||||
let mountpoint_dir = "data/mountpoints";
|
||||
let mut filesystems = std::fs::read_dir(mountpoint_dir)
|
||||
.unwrap()
|
||||
.map(|item| {
|
||||
let tmp = item.unwrap().file_name().into_string().unwrap();
|
||||
format!("{mountpoint_dir}/{tmp}")
|
||||
})
|
||||
.collect::<Vec<String>>();
|
||||
|
||||
filesystems.push("data/datasets".to_string());
|
||||
|
||||
for fs in filesystems {
|
||||
let single_files = vec![
|
||||
"25G-null.bin".to_string(),
|
||||
"25G-random.bin".to_string(),
|
||||
"100M-polygon.txt".to_string(),
|
||||
"kernel/linux-6.6.58.tar.xz".to_string(),
|
||||
];
|
||||
let bulk_files: Vec<String> = vec![
|
||||
"small-files/null".to_string(),
|
||||
"small-files/random".to_string(),
|
||||
];
|
||||
|
||||
for filename in single_files {
|
||||
let path = format!("{fs}/{filename}");
|
||||
println!("=== {} ===", path.clone());
|
||||
|
||||
let seq_read = format!("{:.5?}", sequential_read(path.clone()));
|
||||
println!("Sequential read (complete file read): {}", seq_read.clone());
|
||||
|
||||
let seq_latency = format!("{:.5?}", sequential_read_latency(path.clone()));
|
||||
println!("Sequential latency (1 byte read): {}", seq_latency);
|
||||
|
||||
let rand_read = format!("{:.5?}", random_read(path.clone()));
|
||||
println!("Random read (1024x 1 MiB): {}", rand_read);
|
||||
|
||||
let mut rand_latency: String = "0s".to_string();
|
||||
if fs != "data/mountpoints/fuse-archive-tar" {
|
||||
rand_latency = format!("{:.5?}", random_read_latency(path.clone()));
|
||||
}
|
||||
|
||||
println!("Random latency (1024x 1 byte read): {}", rand_latency);
|
||||
|
||||
let data: Vec<String> = vec![
|
||||
fs.clone(),
|
||||
filename,
|
||||
seq_read,
|
||||
seq_latency,
|
||||
rand_read,
|
||||
rand_latency,
|
||||
];
|
||||
recorder.write_record(data).unwrap();
|
||||
println!();
|
||||
}
|
||||
|
||||
// bulk files
|
||||
for folder in bulk_files {
|
||||
let cloned = fs.clone();
|
||||
let path = format!("{cloned}/{folder}");
|
||||
println!("[bulk] Testing {}", path);
|
||||
let dataset_info: Vec<String> = vec![fs.clone(), folder];
|
||||
|
||||
let mut times = _vec_duration_to_string(bulk_sequential_read(path.clone()));
|
||||
let mut tmp = Vec::new();
|
||||
dataset_info.clone_into(&mut tmp);
|
||||
tmp.push("bulk_sequential_read".to_string());
|
||||
tmp.append(&mut times);
|
||||
bulk_recorder.write_record(tmp).unwrap();
|
||||
|
||||
times = _vec_duration_to_string(bulk_sequential_read_latency(path.clone()));
|
||||
tmp = Vec::new();
|
||||
dataset_info.clone_into(&mut tmp);
|
||||
tmp.push("bulk_sequential_read_latency".to_string());
|
||||
tmp.append(&mut times);
|
||||
bulk_recorder.write_record(tmp).unwrap();
|
||||
|
||||
// not enough data in these files to warrant bulk_random_read()
|
||||
//bulk_recorder.write_record(_vec_duration_to_string(bulk_random_read(path.clone()))).unwrap();
|
||||
times = _vec_duration_to_string(bulk_random_read_latency(path.clone()));
|
||||
tmp = Vec::new();
|
||||
dataset_info.clone_into(&mut tmp);
|
||||
tmp.push("bulk_random_read_latency".to_string());
|
||||
tmp.append(&mut times);
|
||||
bulk_recorder.write_record(tmp).unwrap();
|
||||
}
|
||||
println!("\n=== === === === === === === === === === ===\n")
|
||||
}
|
||||
}
|
||||
|
||||
pub fn _vec_duration_to_string(
|
||||
vector_committing_crimes_with_both_direction_and_magnitude: Vec<Duration>,
|
||||
) -> Vec<String> {
|
||||
return vector_committing_crimes_with_both_direction_and_magnitude
|
||||
.iter()
|
||||
.map(|item| format!("{:.5?}", item))
|
||||
.collect::<Vec<String>>();
|
||||
}
|
20
src/cli.rs
Normal file
20
src/cli.rs
Normal file
|
@ -0,0 +1,20 @@
|
|||
use clap::{Parser, Subcommand};
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(version, about, long_about = None)]
|
||||
pub struct Cli {
|
||||
#[command(subcommand)]
|
||||
pub command: Commands,
|
||||
}
|
||||
|
||||
#[derive(Subcommand)]
|
||||
pub enum Commands {
|
||||
///Grabs the datasets used for benchmarking
|
||||
GrabData,
|
||||
///Runs the benchmark
|
||||
Benchmark,
|
||||
///Prepares the directories so other programs can prepare their datasets
|
||||
PrepDirs,
|
||||
///Runs it all
|
||||
Run,
|
||||
}
|
213
src/dataset_gathering.rs
Normal file
213
src/dataset_gathering.rs
Normal file
|
@ -0,0 +1,213 @@
|
|||
use curl::easy::Easy as easy_curl;
|
||||
use rand::{self, RngCore, SeedableRng};
|
||||
use rand_xorshift::XorShiftRng;
|
||||
use std::{
|
||||
env::current_dir,
|
||||
fs::{create_dir_all, exists, remove_dir_all, remove_file, File},
|
||||
io::{Error, Write},
|
||||
os::unix::fs::FileExt,
|
||||
process::Command,
|
||||
sync::{Arc, Mutex},
|
||||
thread::{self, JoinHandle},
|
||||
};
|
||||
|
||||
/*
|
||||
===================
|
||||
==== ====
|
||||
==== ↓ DATASET GATHERING ↓ ====
|
||||
==== ====
|
||||
=================================================================
|
||||
*/
|
||||
pub fn large_random_file_generation(path: String) {
|
||||
// https://stackoverflow.com/a/65235966
|
||||
let out = Arc::new(Mutex::new(File::create(path)));
|
||||
// NOTE: update this both here and in the helper (_large_random_file_generation_helper())
|
||||
let num_threads: u64 = 12;
|
||||
let mut threads: Vec<JoinHandle<()>> = Vec::new();
|
||||
for i in 0..num_threads {
|
||||
let out = Arc::clone(&out);
|
||||
|
||||
let thread = thread::spawn(move || {
|
||||
_large_random_file_generation_helper(&i, out);
|
||||
});
|
||||
|
||||
threads.push(thread);
|
||||
}
|
||||
|
||||
for t in threads {
|
||||
t.join().unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn _large_random_file_generation_helper(i: &u64, out: Arc<Mutex<Result<File, Error>>>) {
|
||||
let mut rng = XorShiftRng::seed_from_u64(2484345508);
|
||||
// NOTE: update this both here and in `large_random_file_generation()`
|
||||
let num_threads = 12;
|
||||
let mut data = [0u8; 1310720];
|
||||
let block_size = 1310720;
|
||||
|
||||
// enter desired size in bytes, must be a multiple of 655360
|
||||
// this is not a typo, the extra zero after 65536is for the threads
|
||||
// 26843545600 = 25 GiB
|
||||
let blocks_per_thread: u64 = 26843545600 / (block_size * num_threads);
|
||||
for u in (i * blocks_per_thread)..((i + 1) * blocks_per_thread) {
|
||||
rng.fill_bytes(&mut data);
|
||||
|
||||
//let offset: u64 = (i * blocks_per_thread * 1310720) + (1310720 * u);
|
||||
let offset: u64 = u * block_size;
|
||||
let mut out = out.lock().unwrap();
|
||||
out.as_mut().unwrap().write_all_at(&data, offset).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
pub fn single_threaded_large_random_file_generation(path: String) {
|
||||
let mut out = File::create(path).unwrap();
|
||||
let mut rng = XorShiftRng::seed_from_u64(2484345508);
|
||||
let mut data = [0u8; 65536];
|
||||
for _ in 0..409600 {
|
||||
rng.fill_bytes(&mut data);
|
||||
out.write_all(&data).unwrap();
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
pub fn small_random_files_generation(folder: String) {
|
||||
let mut rng = XorShiftRng::seed_from_u64(2484345508);
|
||||
let mut data: [u8; 1024] = [0u8; 1024];
|
||||
for i in 1..1025 {
|
||||
let mut out = File::create(format!("{folder}/{i}")).unwrap();
|
||||
rng.fill_bytes(&mut data);
|
||||
out.write_all(&data).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn random_file_generator(path: String, size_mib: u64) {
|
||||
let mut out = File::create(path).unwrap();
|
||||
let mut rng = XorShiftRng::seed_from_u64(2484345508);
|
||||
|
||||
let mut data = [0u8; 1310720];
|
||||
let block_size = 1310720;
|
||||
let blocks: u64 = (size_mib * 1024 * 1024) / block_size;
|
||||
|
||||
for _ in 0..blocks {
|
||||
rng.fill_bytes(&mut data);
|
||||
out.write_all(&data).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn create_null_file(path: String, size: u64) {
|
||||
let out = File::create(path).unwrap();
|
||||
out.write_all_at(&[0], size - 1).unwrap();
|
||||
}
|
||||
|
||||
// no reason for it not to be multithreaded, but there's not much point either, it hardly takes any time... if anything, the overhead from multithreading might be worse?
|
||||
pub fn small_null_files_generation(folder: String) {
|
||||
for i in 1..1025 {
|
||||
create_null_file(format!("{folder}/{i}"), 1024);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn grab_kernel(folder: String, kernel_version: String) -> Result<bool, String> {
|
||||
// maybe i should've just used reqwest, but that's no fun (also much more dependencies and stuff i'm sure)
|
||||
// NOTE: requires openssl-devel to be installed for compilation (presumably requires openssl-libs for execution)
|
||||
if !(exists(format!("{folder}/linux-{kernel_version}.tar.xz")).unwrap()) {
|
||||
let mut curl = easy_curl::new();
|
||||
curl.url(&format!(
|
||||
"https://cdn.kernel.org/pub/linux/kernel/v6.x/linux-{kernel_version}.tar.xz"
|
||||
))
|
||||
.unwrap();
|
||||
curl.follow_location(true).unwrap();
|
||||
let mut out = File::create(format!("{folder}/linux-{kernel_version}.tar.xz")).unwrap();
|
||||
match curl.write_function(move |data| {
|
||||
out.write_all(data).unwrap();
|
||||
Ok(data.len())
|
||||
}) {
|
||||
Ok(_) => (),
|
||||
Err(e) => return Err(e.to_string()),
|
||||
}
|
||||
curl.perform().unwrap();
|
||||
}
|
||||
|
||||
// i'm too lazy to do this in rust
|
||||
if !(exists(format!("{folder}/linux-{kernel_version}")).unwrap()) {
|
||||
let mut dir = current_dir().unwrap();
|
||||
dir.push(folder);
|
||||
match Command::new("tar")
|
||||
.current_dir(dir)
|
||||
.arg("-xf")
|
||||
.arg(&format!("linux-{kernel_version}.tar.xz"))
|
||||
.arg("")
|
||||
.output()
|
||||
{
|
||||
Ok(_) => (),
|
||||
Err(e) => return Err(e.to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
return Ok(true);
|
||||
}
|
||||
|
||||
pub fn grab_datasets() -> Result<bool, String> {
|
||||
let kernel_version = "6.6.58";
|
||||
|
||||
if !exists(format!("data/datasets/kernel/linux-{kernel_version}")).unwrap() {
|
||||
println!("Downloading kernel...");
|
||||
create_dir_all("data/datasets/kernel").unwrap();
|
||||
match grab_kernel(
|
||||
"data/datasets/kernel".to_string(),
|
||||
kernel_version.to_string(),
|
||||
) {
|
||||
Ok(_) => (),
|
||||
Err(e) => {
|
||||
remove_dir_all(format!("data/datasets/kernel/linux-{kernel_version}")).unwrap();
|
||||
remove_file(format!(
|
||||
"data/datasets/kernel/linux-{kernel_version}.tar.xz"
|
||||
))
|
||||
.unwrap();
|
||||
panic!("{}", e.to_string());
|
||||
}
|
||||
}
|
||||
println!("Kernel downloaded");
|
||||
}
|
||||
|
||||
if !exists(format!("data/datasets/25G-random.bin")).unwrap() {
|
||||
println!("Generating random 25 GiB file...");
|
||||
large_random_file_generation("data/datasets/25G-random.bin".to_string());
|
||||
println!("Random 25 GiB file generated");
|
||||
}
|
||||
|
||||
if !exists(format!("data/datasets/small-files/random")).unwrap() {
|
||||
println!("Generating random 1 KiB files...");
|
||||
create_dir_all("data/datasets/small-files/random").unwrap();
|
||||
small_random_files_generation("data/datasets/small-files/random".to_string());
|
||||
println!("Random 1 KiB files generated...");
|
||||
}
|
||||
|
||||
if !exists(format!("data/datasets/25G-null.bin")).unwrap() {
|
||||
println!("Generating null 25 GiB file...");
|
||||
create_null_file("data/datasets/25G-null.bin".to_string(), 26843545600);
|
||||
println!("Null 25 GiB file generated...");
|
||||
}
|
||||
|
||||
if !exists("data/datasets/small-files/null").unwrap() {
|
||||
println!("Generating null 1 KiB files...");
|
||||
create_dir_all("data/datasets/small-files/null").unwrap();
|
||||
small_null_files_generation("data/datasets/small-files/null".to_string());
|
||||
println!("Null 1 KiB files generated...");
|
||||
}
|
||||
|
||||
if !exists("data/datasets/100M-polygon.txt").unwrap() {
|
||||
return Err("*** MANUAL: Get 100M-sided regular polygon data and put it at `./data/datasets/100M-polygon.txt` ***".to_string());
|
||||
};
|
||||
|
||||
return Ok(true);
|
||||
}
|
||||
|
||||
pub fn prep_other_dirs() -> bool {
|
||||
if !exists("data/mountpoints").unwrap() {
|
||||
create_dir_all("data/mountpoints").unwrap();
|
||||
};
|
||||
|
||||
return true;
|
||||
}
|
3
src/lib.rs
Normal file
3
src/lib.rs
Normal file
|
@ -0,0 +1,3 @@
|
|||
pub mod dataset_gathering;
|
||||
pub mod benchmarks;
|
||||
pub mod cli;
|
448
src/main.rs
448
src/main.rs
|
@ -1,430 +1,26 @@
|
|||
use curl::easy::Easy as easy_curl;
|
||||
use rand::{self, Rng, RngCore, SeedableRng};
|
||||
use rand_xorshift::XorShiftRng;
|
||||
use std::io::Read;
|
||||
use std::time::{Duration, Instant};
|
||||
use std::{
|
||||
env::current_dir,
|
||||
fs::{create_dir_all, exists, remove_dir_all, remove_file, File},
|
||||
io::{Error, Write},
|
||||
os::unix::fs::FileExt,
|
||||
process::Command,
|
||||
sync::{Arc, Mutex},
|
||||
thread::{self, JoinHandle},
|
||||
};
|
||||
|
||||
/*
|
||||
=================================================================
|
||||
==== ====
|
||||
==== ↓ DATASET GATHERING ↓ ====
|
||||
==== ====
|
||||
=================================================================
|
||||
*/
|
||||
|
||||
fn large_random_file_generation(path: String) {
|
||||
// https://stackoverflow.com/a/65235966
|
||||
let out = Arc::new(Mutex::new(File::create(path)));
|
||||
// NOTE: update this both here and in the helper (_large_random_file_generation_helper())
|
||||
let num_threads: u64 = 12;
|
||||
let mut threads: Vec<JoinHandle<()>> = Vec::new();
|
||||
for i in 0..num_threads {
|
||||
let out = Arc::clone(&out);
|
||||
|
||||
let thread = thread::spawn(move || {
|
||||
_large_random_file_generation_helper(&i, out);
|
||||
});
|
||||
|
||||
threads.push(thread);
|
||||
}
|
||||
|
||||
for t in threads {
|
||||
t.join().unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
fn _large_random_file_generation_helper(i: &u64, out: Arc<Mutex<Result<File, Error>>>) {
|
||||
let mut rng = XorShiftRng::seed_from_u64(2484345508);
|
||||
// NOTE: update this both here and in `large_random_file_generation()`
|
||||
let num_threads = 12;
|
||||
let mut data = [0u8; 1310720];
|
||||
let block_size = 1310720;
|
||||
|
||||
// enter desired size in bytes, must be a multiple of 655360
|
||||
// this is not a typo, the extra zero after 65536is for the threads
|
||||
// 26843545600 = 25 GiB
|
||||
let blocks_per_thread: u64 = 26843545600 / (block_size * num_threads);
|
||||
for u in (i * blocks_per_thread)..((i + 1) * blocks_per_thread) {
|
||||
rng.fill_bytes(&mut data);
|
||||
|
||||
//let offset: u64 = (i * blocks_per_thread * 1310720) + (1310720 * u);
|
||||
let offset: u64 = u * block_size;
|
||||
let mut out = out.lock().unwrap();
|
||||
out.as_mut().unwrap().write_all_at(&data, offset).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
fn single_threaded_large_random_file_generation(path: String) {
|
||||
let mut out = File::create(path).unwrap();
|
||||
let mut rng = XorShiftRng::seed_from_u64(2484345508);
|
||||
let mut data = [0u8; 65536];
|
||||
for _ in 0..409600 {
|
||||
rng.fill_bytes(&mut data);
|
||||
out.write_all(&data).unwrap();
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
fn small_random_files_generation(folder: String) {
|
||||
let mut rng = XorShiftRng::seed_from_u64(2484345508);
|
||||
let mut data: [u8; 1024] = [0u8; 1024];
|
||||
for i in 1..1025 {
|
||||
let mut out = File::create(format!("{folder}/{i}")).unwrap();
|
||||
rng.fill_bytes(&mut data);
|
||||
out.write_all(&data).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
fn random_file_generator(path: String, size_mib: u64) {
|
||||
let mut out = File::create(path).unwrap();
|
||||
let mut rng = XorShiftRng::seed_from_u64(2484345508);
|
||||
|
||||
let mut data = [0u8; 1310720];
|
||||
let block_size = 1310720;
|
||||
let blocks: u64 = (size_mib * 1024 * 1024) / block_size;
|
||||
|
||||
for _ in 0..blocks {
|
||||
rng.fill_bytes(&mut data);
|
||||
out.write_all(&data).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
fn create_null_file(path: String, size: u64) {
|
||||
let out = File::create(path).unwrap();
|
||||
out.write_all_at(&[0], size - 1).unwrap();
|
||||
}
|
||||
|
||||
// no reason for it not to be multithreaded, but there's not much point either, it hardly takes any time... if anything, the overhead from multithreading might be worse?
|
||||
fn small_null_files_generation(folder: String) {
|
||||
for i in 1..1025 {
|
||||
create_null_file(format!("{folder}/{i}"), 1024);
|
||||
}
|
||||
}
|
||||
|
||||
fn grab_kernel(folder: String, kernel_version: String) -> Result<bool, String> {
|
||||
// maybe i should've just used reqwest, but that's no fun (also much more dependencies and stuff i'm sure)
|
||||
// NOTE: requires openssl-devel to be installed for compilation (presumably requires openssl-libs for execution)
|
||||
if !(exists(format!("{folder}/linux-{kernel_version}.tar.xz")).unwrap()) {
|
||||
let mut curl = easy_curl::new();
|
||||
curl.url(&format!(
|
||||
"https://cdn.kernel.org/pub/linux/kernel/v6.x/linux-{kernel_version}.tar.xz"
|
||||
))
|
||||
.unwrap();
|
||||
curl.follow_location(true).unwrap();
|
||||
let mut out = File::create(format!("{folder}/linux-{kernel_version}.tar.xz")).unwrap();
|
||||
match curl.write_function(move |data| {
|
||||
out.write_all(data).unwrap();
|
||||
Ok(data.len())
|
||||
}) {
|
||||
Ok(_) => (),
|
||||
Err(e) => return Err(e.to_string()),
|
||||
}
|
||||
curl.perform().unwrap();
|
||||
}
|
||||
|
||||
// i'm too lazy to do this in rust
|
||||
if !(exists(format!("{folder}/linux-{kernel_version}")).unwrap()) {
|
||||
let mut dir = current_dir().unwrap();
|
||||
dir.push(folder);
|
||||
match Command::new("tar")
|
||||
.current_dir(dir)
|
||||
.arg("-xf")
|
||||
.arg(&format!("linux-{kernel_version}.tar.xz"))
|
||||
.arg("")
|
||||
.output()
|
||||
{
|
||||
Ok(_) => (),
|
||||
Err(e) => return Err(e.to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
return Ok(true);
|
||||
}
|
||||
|
||||
fn grab_datasets() -> Result<bool, String> {
|
||||
let kernel_version = "6.6.58";
|
||||
|
||||
if !exists(format!("data/datasets/kernel/linux-{kernel_version}")).unwrap() {
|
||||
println!("Downloading kernel...");
|
||||
create_dir_all("data/datasets/kernel").unwrap();
|
||||
match grab_kernel(
|
||||
"data/datasets/kernel".to_string(),
|
||||
kernel_version.to_string(),
|
||||
) {
|
||||
Ok(_) => (),
|
||||
Err(e) => {
|
||||
remove_dir_all(format!("data/datasets/kernel/linux-{kernel_version}")).unwrap();
|
||||
remove_file(format!(
|
||||
"data/datasets/kernel/linux-{kernel_version}.tar.xz"
|
||||
))
|
||||
.unwrap();
|
||||
panic!("{}", e.to_string());
|
||||
}
|
||||
}
|
||||
println!("Kernel downloaded");
|
||||
}
|
||||
|
||||
if !exists(format!("data/datasets/25G-random.bin")).unwrap() {
|
||||
println!("Generating random 25 GiB file...");
|
||||
large_random_file_generation("data/datasets/25G-random.bin".to_string());
|
||||
println!("Random 25 GiB file generated");
|
||||
}
|
||||
|
||||
if !exists(format!("data/datasets/small-files/random")).unwrap() {
|
||||
println!("Generating random 1 KiB files...");
|
||||
create_dir_all("data/datasets/small-files/random").unwrap();
|
||||
small_random_files_generation("data/datasets/small-files/random".to_string());
|
||||
println!("Random 1 KiB files generated...");
|
||||
}
|
||||
|
||||
if !exists(format!("data/datasets/25G-null.bin")).unwrap() {
|
||||
println!("Generating null 25 GiB file...");
|
||||
create_null_file("data/datasets/25G-null.bin".to_string(), 26843545600);
|
||||
println!("Null 25 GiB file generated...");
|
||||
}
|
||||
|
||||
if !exists("data/datasets/small-files/null").unwrap() {
|
||||
println!("Generating null 1 KiB files...");
|
||||
create_dir_all("data/datasets/small-files/null").unwrap();
|
||||
small_null_files_generation("data/datasets/small-files/null".to_string());
|
||||
println!("Null 1 KiB files generated...");
|
||||
}
|
||||
|
||||
if !exists("data/datasets/100M-polygon.txt").unwrap() {
|
||||
return Err("*** MANUAL: Get 100M-sided regular polygon data and put it at `./data/datasets/100M-polygon.txt` ***".to_string());
|
||||
};
|
||||
|
||||
return Ok(true);
|
||||
}
|
||||
|
||||
fn prep_other_dirs() -> bool {
|
||||
if !exists("data/mountpoints").unwrap() {
|
||||
create_dir_all("data/mountpoints").unwrap();
|
||||
};
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
=================================================================
|
||||
==== ====
|
||||
==== ↓ BENCHMARKS ↓ ====
|
||||
==== ====
|
||||
=================================================================
|
||||
*/
|
||||
|
||||
fn sequential_read(path: String) -> Duration {
|
||||
let mut f: File = File::open(path).unwrap();
|
||||
let size = f.metadata().unwrap().len();
|
||||
|
||||
let mut data: [u8; 1310720] = [0u8; 1310720];
|
||||
// benchmarking/elapsed: https://stackoverflow.com/a/40953863
|
||||
let now = Instant::now();
|
||||
for _ in 0..(size / 1310720) {
|
||||
f.read(&mut data).unwrap();
|
||||
}
|
||||
let elapsed = now.elapsed();
|
||||
return elapsed;
|
||||
}
|
||||
|
||||
/// Reads 1 byte from the start of file
|
||||
fn sequential_read_latency(path: String) -> Duration {
|
||||
let mut f: File = File::open(path).unwrap();
|
||||
let mut data: [u8; 1] = [0u8; 1];
|
||||
let now = Instant::now();
|
||||
f.read(&mut data).unwrap();
|
||||
let elapsed = now.elapsed();
|
||||
return elapsed;
|
||||
}
|
||||
|
||||
/// Reads 1 GiB from the file at `path` in random 1 MiB chunks
|
||||
fn random_read(path: String) -> Duration {
|
||||
let mut rng = XorShiftRng::seed_from_u64(9198675309);
|
||||
let f: File = File::open(path).unwrap();
|
||||
let size = f.metadata().unwrap().len();
|
||||
|
||||
let mut data: [u8; 1048576] = [0u8; 1048576];
|
||||
let now = Instant::now();
|
||||
for _ in 0..1024 {
|
||||
let offset = rng.gen_range(0..((size - 1048576) / 1048576));
|
||||
f.read_at(&mut data, offset).unwrap();
|
||||
}
|
||||
let elapsed = now.elapsed();
|
||||
return elapsed;
|
||||
}
|
||||
|
||||
/// Reads 1 random byte from the file at `path` 1024 times
|
||||
fn random_read_latency(path: String) -> Duration {
|
||||
let mut rng = XorShiftRng::seed_from_u64(9198675309);
|
||||
let f: File = File::open(path).unwrap();
|
||||
let size = f.metadata().unwrap().len();
|
||||
let mut data: [u8; 1] = [0u8; 1];
|
||||
let now = Instant::now();
|
||||
for _ in 0..1024 {
|
||||
let offset = rng.gen_range(0..(size - 1));
|
||||
f.read_at(&mut data, offset).unwrap();
|
||||
}
|
||||
let elapsed = now.elapsed();
|
||||
return elapsed;
|
||||
}
|
||||
|
||||
fn bulk_sequential_read(path: String) -> Vec<Duration> {
|
||||
let mut data: [u8; 1024] = [0u8; 1024];
|
||||
let mut times: Vec<Duration> = Vec::new();
|
||||
for i in 1..1025 {
|
||||
let mut f: File = File::open(format!("{path}/{i}")).unwrap();
|
||||
let now = Instant::now();
|
||||
f.read(&mut data).unwrap();
|
||||
let elapsed = now.elapsed();
|
||||
times.push(elapsed);
|
||||
}
|
||||
|
||||
return times;
|
||||
}
|
||||
|
||||
fn bulk_sequential_read_latency(path: String) -> Vec<Duration> {
|
||||
let mut data: [u8; 1] = [0u8; 1];
|
||||
let mut times: Vec<Duration> = Vec::new();
|
||||
for i in 1..1025 {
|
||||
let now = Instant::now();
|
||||
let mut f: File = File::open(format!("{path}/{i}")).unwrap();
|
||||
f.read(&mut data).unwrap();
|
||||
let elapsed = now.elapsed();
|
||||
times.push(elapsed);
|
||||
}
|
||||
|
||||
return times;
|
||||
}
|
||||
|
||||
fn bulk_random_read_latency(path: String) -> Vec<Duration> {
|
||||
let mut rng = XorShiftRng::seed_from_u64(9198675309);
|
||||
let mut data: [u8; 1] = [0u8; 1];
|
||||
let mut times: Vec<Duration> = Vec::new();
|
||||
for i in 1..1025 {
|
||||
let mut f: File = File::open(format!("{path}/{i}")).unwrap();
|
||||
let offset = rng.gen_range(0..1023);
|
||||
let now = Instant::now();
|
||||
f.read_at(&mut data, offset).unwrap();
|
||||
let elapsed = now.elapsed();
|
||||
times.push(elapsed);
|
||||
}
|
||||
|
||||
return times;
|
||||
}
|
||||
|
||||
fn benchmark() {
|
||||
let mut recorder = csv::Writer::from_path("data/benchmark-data.csv").unwrap();
|
||||
let mut bulk_recorder = csv::Writer::from_path("data/bulk.csv").unwrap();
|
||||
let mountpoint_dir = "data/mountpoints";
|
||||
let mut filesystems = std::fs::read_dir(mountpoint_dir)
|
||||
.unwrap()
|
||||
.map(|item| {
|
||||
let tmp = item.unwrap().file_name().into_string().unwrap();
|
||||
format!("{mountpoint_dir}/{tmp}")
|
||||
})
|
||||
.collect::<Vec<String>>();
|
||||
|
||||
filesystems.push("data/datasets".to_string());
|
||||
|
||||
for fs in filesystems {
|
||||
let single_files = vec![
|
||||
"25G-null.bin".to_string(),
|
||||
"25G-random.bin".to_string(),
|
||||
"100M-polygon.txt".to_string(),
|
||||
"kernel/linux-6.6.58.tar.xz".to_string(),
|
||||
];
|
||||
let bulk_files: Vec<String> = vec![
|
||||
"small-files/null".to_string(),
|
||||
"small-files/random".to_string(),
|
||||
];
|
||||
|
||||
for filename in single_files {
|
||||
let path = format!("{fs}/{filename}");
|
||||
println!("=== {} ===", path.clone());
|
||||
|
||||
let seq_read = format!("{:.5?}", sequential_read(path.clone()));
|
||||
println!("Sequential read (complete file read): {}", seq_read.clone());
|
||||
|
||||
let seq_latency = format!("{:.5?}", sequential_read_latency(path.clone()));
|
||||
println!("Sequential latency (1 byte read): {}", seq_latency);
|
||||
|
||||
let rand_read = format!("{:.5?}", random_read(path.clone()));
|
||||
println!("Random read (1024x 1 MiB): {}", rand_read);
|
||||
|
||||
let mut rand_latency: String = "0s".to_string();
|
||||
if fs != "data/mountpoints/fuse-archive-tar" {
|
||||
rand_latency = format!("{:.5?}", random_read_latency(path.clone()));
|
||||
}
|
||||
|
||||
println!("Random latency (1024x 1 byte read): {}", rand_latency);
|
||||
|
||||
let data: Vec<String> = vec![
|
||||
fs.clone(),
|
||||
filename,
|
||||
seq_read,
|
||||
seq_latency,
|
||||
rand_read,
|
||||
rand_latency,
|
||||
];
|
||||
recorder.write_record(data).unwrap();
|
||||
println!();
|
||||
}
|
||||
|
||||
// bulk files
|
||||
for folder in bulk_files {
|
||||
let cloned = fs.clone();
|
||||
let path = format!("{cloned}/{folder}");
|
||||
println!("[bulk] Testing {}", path);
|
||||
let dataset_info: Vec<String> = vec![fs.clone(), folder];
|
||||
|
||||
let mut times = _vec_duration_to_string(bulk_sequential_read(path.clone()));
|
||||
let mut tmp = Vec::new();
|
||||
dataset_info.clone_into(&mut tmp);
|
||||
tmp.push("bulk_sequential_read".to_string());
|
||||
tmp.append(&mut times);
|
||||
bulk_recorder.write_record(tmp).unwrap();
|
||||
|
||||
times = _vec_duration_to_string(bulk_sequential_read_latency(path.clone()));
|
||||
tmp = Vec::new();
|
||||
dataset_info.clone_into(&mut tmp);
|
||||
tmp.push("bulk_sequential_read_latency".to_string());
|
||||
tmp.append(&mut times);
|
||||
bulk_recorder.write_record(tmp).unwrap();
|
||||
|
||||
// not enough data in these files to warrant bulk_random_read()
|
||||
//bulk_recorder.write_record(_vec_duration_to_string(bulk_random_read(path.clone()))).unwrap();
|
||||
times = _vec_duration_to_string(bulk_random_read_latency(path.clone()));
|
||||
tmp = Vec::new();
|
||||
dataset_info.clone_into(&mut tmp);
|
||||
tmp.push("bulk_random_read_latency".to_string());
|
||||
tmp.append(&mut times);
|
||||
bulk_recorder.write_record(tmp).unwrap();
|
||||
}
|
||||
println!("\n=== === === === === === === === === === ===\n")
|
||||
}
|
||||
}
|
||||
use clap::Parser;
|
||||
use disk_read_benchmark::benchmarks::benchmark;
|
||||
use disk_read_benchmark::cli::*;
|
||||
use disk_read_benchmark::dataset_gathering::*;
|
||||
|
||||
fn main() {
|
||||
grab_datasets().unwrap();
|
||||
prep_other_dirs();
|
||||
benchmark();
|
||||
}
|
||||
let cli = Cli::parse();
|
||||
|
||||
fn _vec_duration_to_string(
|
||||
vector_committing_crimes_with_both_direction_and_magnitude: Vec<Duration>,
|
||||
) -> Vec<String> {
|
||||
return vector_committing_crimes_with_both_direction_and_magnitude
|
||||
.iter()
|
||||
.map(|item| format!("{:.5?}", item))
|
||||
.collect::<Vec<String>>();
|
||||
match cli.command {
|
||||
Commands::PrepDirs => {
|
||||
// FIXME: okay i'm dumb, this only covers stuff that's not handled by grab_datasets(), and literally nothing creates ext-workdir
|
||||
prep_other_dirs();
|
||||
}
|
||||
Commands::GrabData => {
|
||||
grab_datasets().unwrap(); // * should unwrap
|
||||
}
|
||||
Commands::Benchmark => {
|
||||
benchmark();
|
||||
}
|
||||
Commands::Run => {
|
||||
prep_other_dirs();
|
||||
grab_datasets().unwrap(); // * should unwrap
|
||||
benchmark();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue