random file generation works and is somewhat fast

the drive doesn't like 25 GB of random writes, though. should add another level of blocks or use mmap or something
This commit is contained in:
askiiart 2024-10-25 00:12:10 -05:00
parent 19db836ec0
commit 6da961fc84
Signed by untrusted user who does not match committer: askiiart
GPG key ID: EA85979611654C30
4 changed files with 432 additions and 24 deletions

1
.gitignore vendored
View file

@ -1 +1,2 @@
/target /target
data/

335
Cargo.lock generated
View file

@ -3,14 +3,337 @@
version = 3 version = 3
[[package]] [[package]]
name = "disk-read-benchmark" name = "byteorder"
version = "0.1.0" version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
[[package]]
name = "cc"
version = "1.1.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2e7962b54006dcfcc61cb72735f4d89bb97061dd6a7ed882ec6b8ee53714c6f"
dependencies = [ dependencies = [
"xxhash-rust", "shlex",
] ]
[[package]] [[package]]
name = "xxhash-rust" name = "cfg-if"
version = "0.8.12" version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a5cbf750400958819fb6178eaa83bee5cd9c29a26a40cc241df8c70fdd46984" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "curl"
version = "0.4.47"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9fb4d13a1be2b58f14d60adba57c9834b78c62fd86c3e76a148f732686e9265"
dependencies = [
"curl-sys",
"libc",
"openssl-probe",
"openssl-sys",
"schannel",
"socket2",
"windows-sys 0.52.0",
]
[[package]]
name = "curl-sys"
version = "0.4.77+curl-8.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f469e8a5991f277a208224f6c7ad72ecb5f986e36d09ae1f2c1bb9259478a480"
dependencies = [
"cc",
"libc",
"libz-sys",
"openssl-sys",
"pkg-config",
"vcpkg",
"windows-sys 0.52.0",
]
[[package]]
name = "disk-read-benchmark"
version = "0.1.0"
dependencies = [
"curl",
"rand",
"rand_xorshift",
]
[[package]]
name = "getrandom"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
[[package]]
name = "libc"
version = "0.2.161"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e9489c2807c139ffd9c1794f4af0ebe86a828db53ecdc7fea2111d0fed085d1"
[[package]]
name = "libz-sys"
version = "1.1.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2d16453e800a8cf6dd2fc3eb4bc99b786a9b90c663b8559a5b1a041bf89e472"
dependencies = [
"cc",
"libc",
"pkg-config",
"vcpkg",
]
[[package]]
name = "openssl-probe"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf"
[[package]]
name = "openssl-sys"
version = "0.9.104"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "45abf306cbf99debc8195b66b7346498d7b10c210de50418b5ccd7ceba08c741"
dependencies = [
"cc",
"libc",
"pkg-config",
"vcpkg",
]
[[package]]
name = "pkg-config"
version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2"
[[package]]
name = "ppv-lite86"
version = "0.2.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04"
dependencies = [
"zerocopy",
]
[[package]]
name = "proc-macro2"
version = "1.0.89"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.37"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af"
dependencies = [
"proc-macro2",
]
[[package]]
name = "rand"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"libc",
"rand_chacha",
"rand_core",
]
[[package]]
name = "rand_chacha"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
"rand_core",
]
[[package]]
name = "rand_core"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
dependencies = [
"getrandom",
]
[[package]]
name = "rand_xorshift"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d25bf25ec5ae4a3f1b92f929810509a2f53d7dca2f50b794ff57e3face536c8f"
dependencies = [
"rand_core",
]
[[package]]
name = "schannel"
version = "0.1.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "01227be5826fa0690321a2ba6c5cd57a19cf3f6a09e76973b58e61de6ab9d1c1"
dependencies = [
"windows-sys 0.59.0",
]
[[package]]
name = "shlex"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]]
name = "socket2"
version = "0.5.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c"
dependencies = [
"libc",
"windows-sys 0.52.0",
]
[[package]]
name = "syn"
version = "2.0.85"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5023162dfcd14ef8f32034d8bcd4cc5ddc61ef7a247c024a33e24e1f24d21b56"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe"
[[package]]
name = "vcpkg"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "windows-sys"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-sys"
version = "0.59.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_gnullvm",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
[[package]]
name = "windows_i686_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
[[package]]
name = "windows_i686_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
[[package]]
name = "windows_i686_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
[[package]]
name = "zerocopy"
version = "0.7.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
dependencies = [
"byteorder",
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.7.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
dependencies = [
"proc-macro2",
"quote",
"syn",
]

View file

@ -4,4 +4,9 @@ version = "0.1.0"
edition = "2021" edition = "2021"
[dependencies] [dependencies]
xxhash-rust = {version = "*", features = ["xxh3"]} curl = "0.4.47"
rand = "0.8.5"
rand_xorshift = "0.3.0"
[profile.release]
opt-level = 3

View file

@ -1,24 +1,103 @@
use curl::easy::{self, Easy as easy_curl};
use rand::{self, RngCore, SeedableRng};
use rand_xorshift::XorShiftRng;
use std::{ use std::{
fs, fs::{exists, File},
io::{BufReader, Read, Write}, io::{Error, Write},
os::unix::fs::FileExt,
process::Command,
sync::{Arc, Mutex},
thread::{self, JoinHandle},
}; };
use xxhash_rust::xxh3::xxh3_128;
fn main() { fn large_random_file_generation(path: String) {
let filename = "/home/askiiart/whyy/testing/stuff-100M"; // https://stackoverflow.com/a/65235966
let mut stuff = fs::File::open(filename).unwrap(); let mut out = Arc::new(Mutex::new(File::create(path)));
let file_size: u64 = stuff.metadata().unwrap().len(); let num_threads: u64 = 10;
let mut threads: Vec<JoinHandle<()>> = Vec::new();
for i in 0..num_threads {
let out = Arc::clone(&out);
let mut buf = BufReader::new(stuff); let thread = thread::spawn(move || {
let mut out = fs::File::create("output").unwrap(); _large_random_file_generation_helper(&i, out);
});
let mut data = [0; 16]; threads.push(thread);
buf.read_exact(&mut data); }
out.write(&xxh3_128(&data).to_be_bytes());
let mut location: u64 = 0; for t in threads {
while location <= file_size / 16 { t.join().unwrap();
buf.read_exact(&mut data);
location += 1;
out.write(&xxh3_128(&data).to_be_bytes());
} }
} }
fn _large_random_file_generation_helper(i: &u64, out: Arc<Mutex<Result<File, Error>>>) {
let mut rng = XorShiftRng::seed_from_u64(2484345508);
let mut data = [0u8; 1310720];
// enter desired size in bytes, must be a multiple of 655360
// this is not a typo, the extra zero after 65536is for the threads
// 26843545600 = 25 GiB
let blocks_per_thread: u64 = 26843545600 / 13107200;
for u in (i * blocks_per_thread)..((i + 1) * blocks_per_thread) {
rng.fill_bytes(&mut data);
println!("{} {}", i, u);
let offset: u64 = (i * blocks_per_thread * 1310720) + (1310720 * u);
let mut out = out.lock().unwrap();
out.as_mut().unwrap().write_all_at(&data, offset);
}
}
fn single_threaded_large_random_file_generation(path: String) {
let mut out = File::create(path).unwrap();
let mut rng = XorShiftRng::seed_from_u64(2484345508);
let mut data = [0u8; 65536];
for i in 0..409600 {
rng.fill_bytes(&mut data);
out.write_all(&data);
}
}
fn small_random_files_generation(folder: String) {
let mut rng = XorShiftRng::seed_from_u64(2484345508);
let mut data = [0u8; 1000];
for i in 1..1001 {
let mut out = File::create(format!("{folder}/{i}")).unwrap();
rng.fill_bytes(&mut data);
out.write(&data);
}
}
fn grab_kernel(folder: String) {
// https://cdn.kernel.org/pub/linux/kernel/v6.x/linux-6.6.58.tar.xz
let mut curl = easy_curl::new();
curl.url("https://cdn.kernel.org/pub/linux/kernel/v6.x/linux-6.6.58.tar.xz");
curl.follow_location(true);
if !(exists(format!("{folder}/kernel.tar.xz")).unwrap()) {
let mut out = File::create(format!("{folder}/kernel.tar.xz")).unwrap();
curl.write_function(move |data| {
out.write_all(data).unwrap();
Ok(data.len())
});
curl.perform().unwrap();
}
// i'm too lazy to do this in rust
println!("{:?}", Command::new("tar")
.arg("-xvf")
.arg("data/kernel/kernel.tar.xz")
.arg("--one-top-level")
.current_dir("data/kernel/")
.output()
.unwrap());
}
fn main() {
large_random_file_generation("data/output".to_string());
//single_threaded_large_random_file_generation("data/output".to_string())
//small_random_files_generation("data/small-files/random".to_string());
//grab_kernel("data/kernel".to_string());
}