Add the rest of the datasets

This commit is contained in:
askiiart 2024-10-25 21:53:37 -05:00
parent 42fbe81ecb
commit 0dcaa0ba53
Signed by untrusted user who does not match committer: askiiart
GPG key ID: EA85979611654C30
2 changed files with 97 additions and 30 deletions

3
README.md Normal file
View file

@ -0,0 +1,3 @@
# Read-only benchmark
This tests the latency, sequential read, and random read speeds of a variety of data.

View file

@ -1,8 +1,9 @@
use curl::easy::{self, Easy as easy_curl}; use curl::easy::Easy as easy_curl;
use rand::{self, RngCore, SeedableRng}; use rand::{self, RngCore, SeedableRng};
use rand_xorshift::XorShiftRng; use rand_xorshift::XorShiftRng;
use std::{ use std::{
fs::{exists, File}, env::current_dir,
fs::{create_dir_all, exists, File},
io::{Error, Write}, io::{Error, Write},
os::unix::fs::FileExt, os::unix::fs::FileExt,
process::Command, process::Command,
@ -12,7 +13,7 @@ use std::{
fn large_random_file_generation(path: String) { fn large_random_file_generation(path: String) {
// https://stackoverflow.com/a/65235966 // https://stackoverflow.com/a/65235966
let mut out = Arc::new(Mutex::new(File::create(path))); let out = Arc::new(Mutex::new(File::create(path)));
// NOTE: update this both here and in the helper (_large_random_file_generation_helper()) // NOTE: update this both here and in the helper (_large_random_file_generation_helper())
let num_threads: u64 = 12; let num_threads: u64 = 12;
let mut threads: Vec<JoinHandle<()>> = Vec::new(); let mut threads: Vec<JoinHandle<()>> = Vec::new();
@ -49,61 +50,124 @@ fn _large_random_file_generation_helper(i: &u64, out: Arc<Mutex<Result<File, Err
//let offset: u64 = (i * blocks_per_thread * 1310720) + (1310720 * u); //let offset: u64 = (i * blocks_per_thread * 1310720) + (1310720 * u);
let offset: u64 = u * block_size; let offset: u64 = u * block_size;
let mut out = out.lock().unwrap(); let mut out = out.lock().unwrap();
out.as_mut().unwrap().write_all_at(&data, offset); out.as_mut().unwrap().write_all_at(&data, offset).unwrap();
} }
} }
/*
fn single_threaded_large_random_file_generation(path: String) { fn single_threaded_large_random_file_generation(path: String) {
let mut out = File::create(path).unwrap(); let mut out = File::create(path).unwrap();
let mut rng = XorShiftRng::seed_from_u64(2484345508); let mut rng = XorShiftRng::seed_from_u64(2484345508);
let mut data = [0u8; 65536]; let mut data = [0u8; 65536];
for i in 0..409600 { for _ in 0..409600 {
rng.fill_bytes(&mut data); rng.fill_bytes(&mut data);
out.write_all(&data); out.write_all(&data).unwrap();
} }
} }
*/
fn small_random_files_generation(folder: String) { fn small_random_files_generation(folder: String) {
let mut rng = XorShiftRng::seed_from_u64(2484345508); let mut rng = XorShiftRng::seed_from_u64(2484345508);
let mut data = [0u8; 1024]; let mut data: [u8; 1024] = [0u8; 1024];
for i in 1..1001 { for i in 1..1001 {
let mut out = File::create(format!("{folder}/{i}")).unwrap(); let mut out = File::create(format!("{folder}/{i}")).unwrap();
rng.fill_bytes(&mut data); rng.fill_bytes(&mut data);
out.write(&data); out.write_all(&data).unwrap();
} }
} }
fn grab_kernel(folder: String) { fn create_empty_file(path: String, size: u64) {
let out = File::create(path).unwrap();
out.write_all_at(&[0], size - 1).unwrap();
}
fn small_empty_files_generation(folder: String) {
for i in 1..1001 {
let out = File::create(format!("{folder}/{i}")).unwrap();
out.write_all_at(&[0], 1023).unwrap();
}
}
fn grab_kernel(folder: String, kernel_version: String) {
// https://cdn.kernel.org/pub/linux/kernel/v6.x/linux-6.6.58.tar.xz // https://cdn.kernel.org/pub/linux/kernel/v6.x/linux-6.6.58.tar.xz
if !(exists(format!("{folder}/linux-{kernel_version}.tar.xz")).unwrap()) {
let mut curl = easy_curl::new(); let mut curl = easy_curl::new();
curl.url("https://cdn.kernel.org/pub/linux/kernel/v6.x/linux-6.6.58.tar.xz"); curl.url(&format!(
curl.follow_location(true); "https://cdn.kernel.org/pub/linux/kernel/v6.x/linux-{kernel_version}.tar.xz"
if !(exists(format!("{folder}/kernel.tar.xz")).unwrap()) { ))
let mut out = File::create(format!("{folder}/kernel.tar.xz")).unwrap(); .unwrap();
curl.follow_location(true).unwrap();
let mut out = File::create(format!("{folder}/linux-{kernel_version}.tar.xz")).unwrap();
curl.write_function(move |data| { curl.write_function(move |data| {
out.write_all(data).unwrap(); out.write_all(data).unwrap();
Ok(data.len()) Ok(data.len())
}); })
.unwrap();
curl.perform().unwrap(); curl.perform().unwrap();
} }
// i'm too lazy to do this in rust // i'm too lazy to do this in rust
println!( let mut dir = current_dir().unwrap();
"{:?}", dir.push(folder);
Command::new("tar") Command::new("tar")
.arg("-xvf") .current_dir(dir)
.arg("data/kernel/kernel.tar.xz") .arg("-xf")
.arg("--one-top-level") .arg(&format!("linux-{kernel_version}.tar.xz"))
.current_dir("data/kernel/") .arg("");
.output() }
.unwrap()
); fn grab_datasets() {
let kernel_version = "6.6.58";
create_dir_all("data/kernel").unwrap();
if !(exists(format!("data/kernel/linux-{kernel_version}")).unwrap()) {
println!("Downloading kernel...");
grab_kernel("data/kernel".to_string(), kernel_version.to_string());
println!("Kernel downloaded");
} else {
println!("Kernel already downloaded");
}
if !(exists(format!("data/25G-random.bin")).unwrap()) {
println!("Generating random 25 GiB file...");
large_random_file_generation("data/25G-random.bin".to_string());
println!("Random 25 GiB file generated");
} else {
println!("Random 25 GiB file already generated");
}
if !(exists(format!("data/small-files/random")).unwrap()) {
println!("Generating random 1 KiB files...");
create_dir_all("data/small-files/random").unwrap();
small_random_files_generation("data/small-files/random".to_string());
println!("Random 1 KiB files generated...");
} else {
println!("Random 1 KiB files already generated")
}
if !(exists(format!("data/25G-null.bin")).unwrap()) {
println!("Generating empty 25 GiB file...");
create_empty_file("data/25G-null.bin".to_string(), 26843545600);
println!("Empty 25 GiB file generated...");
} else {
println!("Empty 25 GiB file already generated");
}
if !(exists("data/small-files/null").unwrap()) {
println!("Generating empty 1 KiB files...");
create_dir_all("data/small-files/null").unwrap();
small_empty_files_generation("data/small-files/null".to_string());
println!("Empty 1 KiB files generated...");
} else {
println!("Empty 1 KiB files already generated")
}
if !(exists("data/small-files/100M-polygon.txt").unwrap()) {
println!("*** Get 100M-sided regular polygon data and put it at `./data/small-files/100M-polygon.txt` ***");
}
} }
fn main() { fn main() {
large_random_file_generation("data/25G-random.bin".to_string()); grab_datasets();
//single_threaded_large_random_file_generation("data/output".to_string())
//small_random_files_generation("data/small-files/random".to_string());
//grab_kernel("data/kernel".to_string());
} }