Add the rest of the datasets

This commit is contained in:
askiiart 2024-10-25 21:53:37 -05:00
parent 42fbe81ecb
commit 0dcaa0ba53
Signed by untrusted user who does not match committer: askiiart
GPG key ID: EA85979611654C30
2 changed files with 97 additions and 30 deletions

3
README.md Normal file
View file

@ -0,0 +1,3 @@
# Read-only benchmark
This tests the latency, sequential read, and random read speeds of a variety of data.

View file

@ -1,8 +1,9 @@
use curl::easy::{self, Easy as easy_curl};
use curl::easy::Easy as easy_curl;
use rand::{self, RngCore, SeedableRng};
use rand_xorshift::XorShiftRng;
use std::{
fs::{exists, File},
env::current_dir,
fs::{create_dir_all, exists, File},
io::{Error, Write},
os::unix::fs::FileExt,
process::Command,
@ -12,7 +13,7 @@ use std::{
fn large_random_file_generation(path: String) {
// https://stackoverflow.com/a/65235966
let mut out = Arc::new(Mutex::new(File::create(path)));
let out = Arc::new(Mutex::new(File::create(path)));
// NOTE: update this both here and in the helper (_large_random_file_generation_helper())
let num_threads: u64 = 12;
let mut threads: Vec<JoinHandle<()>> = Vec::new();
@ -49,61 +50,124 @@ fn _large_random_file_generation_helper(i: &u64, out: Arc<Mutex<Result<File, Err
//let offset: u64 = (i * blocks_per_thread * 1310720) + (1310720 * u);
let offset: u64 = u * block_size;
let mut out = out.lock().unwrap();
out.as_mut().unwrap().write_all_at(&data, offset);
out.as_mut().unwrap().write_all_at(&data, offset).unwrap();
}
}
/*
fn single_threaded_large_random_file_generation(path: String) {
let mut out = File::create(path).unwrap();
let mut rng = XorShiftRng::seed_from_u64(2484345508);
let mut data = [0u8; 65536];
for i in 0..409600 {
for _ in 0..409600 {
rng.fill_bytes(&mut data);
out.write_all(&data);
out.write_all(&data).unwrap();
}
}
*/
fn small_random_files_generation(folder: String) {
let mut rng = XorShiftRng::seed_from_u64(2484345508);
let mut data = [0u8; 1024];
let mut data: [u8; 1024] = [0u8; 1024];
for i in 1..1001 {
let mut out = File::create(format!("{folder}/{i}")).unwrap();
rng.fill_bytes(&mut data);
out.write(&data);
out.write_all(&data).unwrap();
}
}
fn grab_kernel(folder: String) {
fn create_empty_file(path: String, size: u64) {
let out = File::create(path).unwrap();
out.write_all_at(&[0], size - 1).unwrap();
}
fn small_empty_files_generation(folder: String) {
for i in 1..1001 {
let out = File::create(format!("{folder}/{i}")).unwrap();
out.write_all_at(&[0], 1023).unwrap();
}
}
fn grab_kernel(folder: String, kernel_version: String) {
// https://cdn.kernel.org/pub/linux/kernel/v6.x/linux-6.6.58.tar.xz
let mut curl = easy_curl::new();
curl.url("https://cdn.kernel.org/pub/linux/kernel/v6.x/linux-6.6.58.tar.xz");
curl.follow_location(true);
if !(exists(format!("{folder}/kernel.tar.xz")).unwrap()) {
let mut out = File::create(format!("{folder}/kernel.tar.xz")).unwrap();
if !(exists(format!("{folder}/linux-{kernel_version}.tar.xz")).unwrap()) {
let mut curl = easy_curl::new();
curl.url(&format!(
"https://cdn.kernel.org/pub/linux/kernel/v6.x/linux-{kernel_version}.tar.xz"
))
.unwrap();
curl.follow_location(true).unwrap();
let mut out = File::create(format!("{folder}/linux-{kernel_version}.tar.xz")).unwrap();
curl.write_function(move |data| {
out.write_all(data).unwrap();
Ok(data.len())
});
})
.unwrap();
curl.perform().unwrap();
}
// i'm too lazy to do this in rust
println!(
"{:?}",
Command::new("tar")
.arg("-xvf")
.arg("data/kernel/kernel.tar.xz")
.arg("--one-top-level")
.current_dir("data/kernel/")
.output()
.unwrap()
);
let mut dir = current_dir().unwrap();
dir.push(folder);
Command::new("tar")
.current_dir(dir)
.arg("-xf")
.arg(&format!("linux-{kernel_version}.tar.xz"))
.arg("");
}
fn grab_datasets() {
let kernel_version = "6.6.58";
create_dir_all("data/kernel").unwrap();
if !(exists(format!("data/kernel/linux-{kernel_version}")).unwrap()) {
println!("Downloading kernel...");
grab_kernel("data/kernel".to_string(), kernel_version.to_string());
println!("Kernel downloaded");
} else {
println!("Kernel already downloaded");
}
if !(exists(format!("data/25G-random.bin")).unwrap()) {
println!("Generating random 25 GiB file...");
large_random_file_generation("data/25G-random.bin".to_string());
println!("Random 25 GiB file generated");
} else {
println!("Random 25 GiB file already generated");
}
if !(exists(format!("data/small-files/random")).unwrap()) {
println!("Generating random 1 KiB files...");
create_dir_all("data/small-files/random").unwrap();
small_random_files_generation("data/small-files/random".to_string());
println!("Random 1 KiB files generated...");
} else {
println!("Random 1 KiB files already generated")
}
if !(exists(format!("data/25G-null.bin")).unwrap()) {
println!("Generating empty 25 GiB file...");
create_empty_file("data/25G-null.bin".to_string(), 26843545600);
println!("Empty 25 GiB file generated...");
} else {
println!("Empty 25 GiB file already generated");
}
if !(exists("data/small-files/null").unwrap()) {
println!("Generating empty 1 KiB files...");
create_dir_all("data/small-files/null").unwrap();
small_empty_files_generation("data/small-files/null".to_string());
println!("Empty 1 KiB files generated...");
} else {
println!("Empty 1 KiB files already generated")
}
if !(exists("data/small-files/100M-polygon.txt").unwrap()) {
println!("*** Get 100M-sided regular polygon data and put it at `./data/small-files/100M-polygon.txt` ***");
}
}
fn main() {
large_random_file_generation("data/25G-random.bin".to_string());
//single_threaded_large_random_file_generation("data/output".to_string())
//small_random_files_generation("data/small-files/random".to_string());
//grab_kernel("data/kernel".to_string());
grab_datasets();
}