Add a graphs and stuff to benchmarking-dwarfs - not done, but progress

This commit is contained in:
askiiart 2024-11-17 00:47:41 -06:00
parent e6e393d1bc
commit c2c5c0a677
Signed by untrusted user who does not match committer: askiiart
GPG key ID: EA85979611654C30
9 changed files with 583 additions and 30 deletions

View file

@ -0,0 +1,12 @@
data/mountpoints/dwarfs,25G-null.bin,12.80490s,371.14600µs,96.32895ms,351.30788ms
data/mountpoints/dwarfs,25G-random.bin,40.71916s,14.15143ms,109.78266ms,3.51396s
data/mountpoints/dwarfs,100M-polygon.txt,19.11096s,2.95083ms,96.39260ms,480.97789ms
data/mountpoints/dwarfs,kernel/linux-6.6.58.tar.xz,160.75466ms,1.52300µs,94.55468ms,882.57600µs
data/mountpoints/fuse-archive-tar,25G-null.bin,24.88932s,393.56800µs,98.66828ms,0ms
data/mountpoints/fuse-archive-tar,25G-random.bin,24.84052s,397.62600µs,94.52984ms,0ms
data/mountpoints/fuse-archive-tar,100M-polygon.txt,26.63768s,77.50500µs,96.61561ms,0ms
data/mountpoints/fuse-archive-tar,kernel/linux-6.6.58.tar.xz,121.50200ms,1.22300µs,93.25915ms,0ms
data/datasets,25G-null.bin,25.54820s,27.92200µs,96.79632ms,5.51523ms
data/datasets,25G-random.bin,16.91976s,290.90600µs,97.64200ms,91.13626ms
data/datasets,100M-polygon.txt,17.98264s,140.88400µs,98.92292ms,94.05722ms
data/datasets,kernel/linux-6.6.58.tar.xz,88.59571ms,1.39300µs,91.41823ms,949.77100µs
1 data/mountpoints/dwarfs 25G-null.bin 12.80490s 371.14600µs 96.32895ms 351.30788ms
2 data/mountpoints/dwarfs 25G-random.bin 40.71916s 14.15143ms 109.78266ms 3.51396s
3 data/mountpoints/dwarfs 100M-polygon.txt 19.11096s 2.95083ms 96.39260ms 480.97789ms
4 data/mountpoints/dwarfs kernel/linux-6.6.58.tar.xz 160.75466ms 1.52300µs 94.55468ms 882.57600µs
5 data/mountpoints/fuse-archive-tar 25G-null.bin 24.88932s 393.56800µs 98.66828ms 0ms
6 data/mountpoints/fuse-archive-tar 25G-random.bin 24.84052s 397.62600µs 94.52984ms 0ms
7 data/mountpoints/fuse-archive-tar 100M-polygon.txt 26.63768s 77.50500µs 96.61561ms 0ms
8 data/mountpoints/fuse-archive-tar kernel/linux-6.6.58.tar.xz 121.50200ms 1.22300µs 93.25915ms 0ms
9 data/datasets 25G-null.bin 25.54820s 27.92200µs 96.79632ms 5.51523ms
10 data/datasets 25G-random.bin 16.91976s 290.90600µs 97.64200ms 91.13626ms
11 data/datasets 100M-polygon.txt 17.98264s 140.88400µs 98.92292ms 94.05722ms
12 data/datasets kernel/linux-6.6.58.tar.xz 88.59571ms 1.39300µs 91.41823ms 949.77100µs

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,86 @@
=== data/mountpoints/dwarfs/25G-null.bin ===
Sequential read (complete file read): 12.80490s
Sequential latency (1 byte read): 371.14600µs
Random read (1024x 1 MiB): 96.32895ms
Random latency (1024x 1 byte read): 351.30788ms
=== data/mountpoints/dwarfs/25G-random.bin ===
Sequential read (complete file read): 40.71916s
Sequential latency (1 byte read): 14.15143ms
Random read (1024x 1 MiB): 109.78266ms
Random latency (1024x 1 byte read): 3.51396s
=== data/mountpoints/dwarfs/100M-polygon.txt ===
Sequential read (complete file read): 19.11096s
Sequential latency (1 byte read): 2.95083ms
Random read (1024x 1 MiB): 96.39260ms
Random latency (1024x 1 byte read): 480.97789ms
=== data/mountpoints/dwarfs/kernel/linux-6.6.58.tar.xz ===
Sequential read (complete file read): 160.75466ms
Sequential latency (1 byte read): 1.52300µs
Random read (1024x 1 MiB): 94.55468ms
Random latency (1024x 1 byte read): 882.57600µs
[bulk] Testing data/mountpoints/dwarfs/small-files/null
[bulk] Testing data/mountpoints/dwarfs/small-files/random
=== === === === === === === === === === ===
=== data/mountpoints/fuse-archive-tar/25G-null.bin ===
Sequential read (complete file read): 24.88932s
Sequential latency (1 byte read): 393.56800µs
Random read (1024x 1 MiB): 98.66828ms
Random latency (1024x 1 byte read): 0ms
=== data/mountpoints/fuse-archive-tar/25G-random.bin ===
Sequential read (complete file read): 24.84052s
Sequential latency (1 byte read): 397.62600µs
Random read (1024x 1 MiB): 94.52984ms
Random latency (1024x 1 byte read): 0ms
=== data/mountpoints/fuse-archive-tar/100M-polygon.txt ===
Sequential read (complete file read): 26.63768s
Sequential latency (1 byte read): 77.50500µs
Random read (1024x 1 MiB): 96.61561ms
Random latency (1024x 1 byte read): 0ms
=== data/mountpoints/fuse-archive-tar/kernel/linux-6.6.58.tar.xz ===
Sequential read (complete file read): 121.50200ms
Sequential latency (1 byte read): 1.22300µs
Random read (1024x 1 MiB): 93.25915ms
Random latency (1024x 1 byte read): 0ms
[bulk] Testing data/mountpoints/fuse-archive-tar/small-files/null
[bulk] Testing data/mountpoints/fuse-archive-tar/small-files/random
=== === === === === === === === === === ===
=== data/datasets/25G-null.bin ===
Sequential read (complete file read): 25.54820s
Sequential latency (1 byte read): 27.92200µs
Random read (1024x 1 MiB): 96.79632ms
Random latency (1024x 1 byte read): 5.51523ms
=== data/datasets/25G-random.bin ===
Sequential read (complete file read): 16.91976s
Sequential latency (1 byte read): 290.90600µs
Random read (1024x 1 MiB): 97.64200ms
Random latency (1024x 1 byte read): 91.13626ms
=== data/datasets/100M-polygon.txt ===
Sequential read (complete file read): 17.98264s
Sequential latency (1 byte read): 140.88400µs
Random read (1024x 1 MiB): 98.92292ms
Random latency (1024x 1 byte read): 94.05722ms
=== data/datasets/kernel/linux-6.6.58.tar.xz ===
Sequential read (complete file read): 88.59571ms
Sequential latency (1 byte read): 1.39300µs
Random read (1024x 1 MiB): 91.41823ms
Random latency (1024x 1 byte read): 949.77100µs
[bulk] Testing data/datasets/small-files/null
[bulk] Testing data/datasets/small-files/random
=== === === === === === === === === === ===

View file

@ -0,0 +1,144 @@
#!/usr/bin/env python3
import csv
import re
class HelperFunctions:
def get_fs(dir):
if dir.endswith('dwarfs'):
return 'DwarFS'
elif dir.endswith('fuse-archive-tar'):
return 'fuse-archive (tar)'
return 'Btrfs'
def get_label(filename):
if filename == '25G-null.bin':
return 'Null 25 GiB file'
elif filename == '25G-random.bin':
return 'Random 25 GiB file'
elif filename == '100M-polygon.txt':
return '100 million-sided polygon data'
elif filename.startswith('kernel'):
return 'Linux LTS kernel'
def convert_time(time: str, unit: str) -> int:
unit_exponents = ['ns', 'µs', 'ms', 's']
if time.endswith('ms'):
current_unit = 'ms'
elif time.endswith('µs'):
current_unit = 'µs'
elif time.endswith('ns'):
current_unit = 'ns'
else:
current_unit = 's'
unit_multiplier = unit_exponents.index(current_unit) - unit_exponents.index(unit)
return HelperFunctions.time_int(time) * (1000 ** unit_multiplier)
def time_int(time: str):
time = re.sub("[^0-9\\.]", "", time)
return float(time)
def sequential_latency():
datasets = {'labels': []}
with open('assets/benchmarking-dwarfs/original-data/benchmark-data.csv', 'rt') as f:
for line in csv.reader(f):
fs = HelperFunctions.get_fs(line[0])
label = HelperFunctions.get_label(line[1])
datasets['labels'].append(label) if label not in datasets[
'labels'
] else False
try:
datasets[fs].append(line[3])
except KeyError:
datasets[fs] = []
datasets[fs].append(line[3])
return datasets
def singles():
pass
def bulk():
pass
if __name__ == '__main__':
# from https://github.com/chartjs/Chart.js/blob/master/docs/scripts/utils.js (CHART_COLORS)
# modified so similar color aren't adjacent
chart_colors = [
"'rgb(255, 99, 132)'", # red
"'rgb(75, 192, 192)'", # green
"'rgb(54, 162, 235)'", # blue
"'rgb(255, 159, 64)'", # orange
"'rgb(153, 102, 255)'", # purple
"'rgb(255, 205, 86)'", # yellow
"'rgb(201, 203, 207)'", # grey
]
print('Sequential latency:')
labels_code = 'const labels = $labels$'
dataset_code = '''
{
label: '$label$',
data: $data$,
backgroundColor: $color$,
},'''
config_code = '''
let config = {
type: 'bar',
data: {
datasets: data,
labels
},
options: {
plugins: {
title: {
display: true,
text: '$title$ - in $timeunit$'
},
},
responsive: true,
interaction: {
intersect: false,
},
}
};
'''
data = sequential_latency()
labels_code = labels_code.replace('$labels$', format(data['labels']))
print(labels_code)
data.pop('labels')
print('let data = [', end='')
largest_time_unit = 'ns'
for fs in data.keys():
for item in data[fs]:
if item.endswith('ms'):
largest_time_unit = 'ms'
elif item.endswith('µs') and largest_time_unit != 'ms':
largest_time_unit = 'µs'
elif item.endswith('ns') and largest_time_unit != 'ms' and largest_time_unit != 'µs':
largest_time_unit = 'ns'
elif re.sub('[0-9]', '', item) == 's':
largest_time_unit = 's'
break
for i in range(len(data[fs])):
data[fs][i] = HelperFunctions.convert_time(data[fs][i], largest_time_unit)
print(
dataset_code.replace('$label$', fs)
.replace('$data$', format(data[fs]))
.replace('$color$', format(chart_colors[list(data.keys()).index(fs)])),
end=''
)
print('\n]\n')
title = 'Sequential Read Latency'
print(config_code.replace('$title$', title).replace('$timeunit$', largest_time_unit))