From 4e20db145e2c1d5c7fdb336e95cae2e5c512e3de Mon Sep 17 00:00:00 2001
From: askiiart
Date: Sun, 17 Nov 2024 23:40:06 -0600
Subject: [PATCH] FINALLY add the rest of the graphs with this hellish code
i will be repenting to the DRY gods for the rest of eternity
---
assets/benchmarking-dwarfs/js/declare_vars.js | 4 +
assets/benchmarking-dwarfs/js/rand_latency.js | 46 ++
assets/benchmarking-dwarfs/js/rand_read.js | 46 ++
assets/benchmarking-dwarfs/js/seq_latency.js | 14 +-
assets/benchmarking-dwarfs/js/seq_read.js | 46 ++
assets/benchmarking-dwarfs/process-data.py | 495 +++++++++++++++++-
blog/benchmarking-dwarfs.html | 19 +-
blog/benchmarking-dwarfs.md | 19 +
8 files changed, 663 insertions(+), 26 deletions(-)
create mode 100644 assets/benchmarking-dwarfs/js/declare_vars.js
create mode 100644 assets/benchmarking-dwarfs/js/rand_latency.js
create mode 100644 assets/benchmarking-dwarfs/js/rand_read.js
create mode 100644 assets/benchmarking-dwarfs/js/seq_read.js
diff --git a/assets/benchmarking-dwarfs/js/declare_vars.js b/assets/benchmarking-dwarfs/js/declare_vars.js
new file mode 100644
index 0000000..d898d5b
--- /dev/null
+++ b/assets/benchmarking-dwarfs/js/declare_vars.js
@@ -0,0 +1,4 @@
+let labels;
+let config;
+let data;
+let ctx;
\ No newline at end of file
diff --git a/assets/benchmarking-dwarfs/js/rand_latency.js b/assets/benchmarking-dwarfs/js/rand_latency.js
new file mode 100644
index 0000000..e91c68d
--- /dev/null
+++ b/assets/benchmarking-dwarfs/js/rand_latency.js
@@ -0,0 +1,46 @@
+labels = ['Null 25 GiB file', 'Random 25 GiB file', '100 million-sided polygon data', 'Linux LTS kernel', '1024 null files (avg)', '1024 random files (avg)']
+data = [
+ {
+ label: 'DwarFS',
+ data: [351.30788, 3513.96, 480.97789, 0.882576, 0.000811, 0.000661],
+ backgroundColor: 'rgb(255, 99, 132)',
+ },
+
+ {
+ label: 'fuse-archive (tar)',
+ data: [0.0, 0.0, 0.0, 0.0, 0.000652, 0.000772],
+ backgroundColor: 'rgb(75, 192, 192)',
+ },
+
+ {
+ label: 'Btrfs',
+ data: [5.51523, 91.13626, 94.05722, 0.949771, 0.000741, 0.0007509999999999999],
+ backgroundColor: 'rgb(54, 162, 235)',
+ },
+
+]
+
+ config = {
+ type: 'bar',
+ data: {
+ datasets: data,
+ labels
+ },
+ options: {
+ plugins: {
+ title: {
+ display: true,
+ text: 'Random Read Latency - in ms'
+ },
+ },
+ responsive: true,
+ interaction: {
+ intersect: false,
+ },
+ }
+ };
+
+Chart.defaults.borderColor = "#eee"
+Chart.defaults.color = "#eee";
+ctx = document.getElementById("rand_read_latency_chart");
+new Chart(ctx, config);
diff --git a/assets/benchmarking-dwarfs/js/rand_read.js b/assets/benchmarking-dwarfs/js/rand_read.js
new file mode 100644
index 0000000..e12b9d9
--- /dev/null
+++ b/assets/benchmarking-dwarfs/js/rand_read.js
@@ -0,0 +1,46 @@
+labels = ['Null 25 GiB file', 'Random 25 GiB file', '100 million-sided polygon data', 'Linux LTS kernel']
+data = [
+ {
+ label: 'DwarFS',
+ data: [0.37114600000000003, 14.15143, 2.95083, 0.001523],
+ backgroundColor: 'rgb(255, 99, 132)',
+ },
+
+ {
+ label: 'fuse-archive (tar)',
+ data: [0.393568, 0.397626, 0.07750499999999999, 0.0012230000000000001],
+ backgroundColor: 'rgb(75, 192, 192)',
+ },
+
+ {
+ label: 'Btrfs',
+ data: [0.027922000000000002, 0.290906, 0.14088399999999998, 0.0013930000000000001],
+ backgroundColor: 'rgb(54, 162, 235)',
+ },
+
+]
+
+ config = {
+ type: 'bar',
+ data: {
+ datasets: data,
+ labels
+ },
+ options: {
+ plugins: {
+ title: {
+ display: true,
+ text: 'Random Read Times - in ms'
+ },
+ },
+ responsive: true,
+ interaction: {
+ intersect: false,
+ },
+ }
+ };
+
+Chart.defaults.borderColor = "#eee"
+Chart.defaults.color = "#eee";
+ctx = document.getElementById("rand_read_chart");
+new Chart(ctx, config);
diff --git a/assets/benchmarking-dwarfs/js/seq_latency.js b/assets/benchmarking-dwarfs/js/seq_latency.js
index bc965a6..f8f51b5 100644
--- a/assets/benchmarking-dwarfs/js/seq_latency.js
+++ b/assets/benchmarking-dwarfs/js/seq_latency.js
@@ -1,26 +1,26 @@
-const labels = ['Null 25 GiB file', 'Random 25 GiB file', '100 million-sided polygon data', 'Linux LTS kernel', '1024 null files', '1024 random files']
-let data = [
+labels = ['Null 25 GiB file', 'Random 25 GiB file', '100 million-sided polygon data', 'Linux LTS kernel', '1024 null files (avg)', '1024 random files (avg)']
+data = [
{
label: 'DwarFS',
- data: [0.37114600000000003, 14.15143, 2.95083, 0.001523, 0.014287000000000001, 0.013595000000000001],
+ data: [96.32895, 109.78266, 96.3926, 94.55468, 0.014287000000000001, 0.013595000000000001],
backgroundColor: 'rgb(255, 99, 132)',
},
{
label: 'fuse-archive (tar)',
- data: [0.393568, 0.397626, 0.07750499999999999, 0.0012230000000000001, 0.013405, 0.013465],
+ data: [98.66828, 94.52984, 96.61561, 93.25915, 0.013405, 0.013465],
backgroundColor: 'rgb(75, 192, 192)',
},
{
label: 'Btrfs',
- data: [0.027922000000000002, 0.290906, 0.14088399999999998, 0.0013930000000000001, 0.0032860000000000003, 0.003326],
+ data: [96.79632, 97.642, 98.92292, 91.41823, 0.0032860000000000003, 0.003326],
backgroundColor: 'rgb(54, 162, 235)',
},
]
- let config = {
+ config = {
type: 'bar',
data: {
datasets: data,
@@ -42,5 +42,5 @@ let data = [
Chart.defaults.borderColor = "#eee"
Chart.defaults.color = "#eee";
-let ctx = document.getElementById("seq_read_latency_chart");
+ctx = document.getElementById("seq_read_latency_chart");
new Chart(ctx, config);
diff --git a/assets/benchmarking-dwarfs/js/seq_read.js b/assets/benchmarking-dwarfs/js/seq_read.js
new file mode 100644
index 0000000..1338f1e
--- /dev/null
+++ b/assets/benchmarking-dwarfs/js/seq_read.js
@@ -0,0 +1,46 @@
+labels = ['Null 25 GiB file', 'Random 25 GiB file', '100 million-sided polygon data', 'Linux LTS kernel', '1024 null files (avg)', '1024 random files (avg)']
+data = [
+ {
+ label: 'DwarFS',
+ data: [12.8049, 40.71916, 19.11096, 0.16075466, 0.000945113, 3.6729e-05],
+ backgroundColor: 'rgb(255, 99, 132)',
+ },
+
+ {
+ label: 'fuse-archive (tar)',
+ data: [24.88932, 24.84052, 26.63768, 0.121502, 4.131799999999999e-05, 1.6571e-05],
+ backgroundColor: 'rgb(75, 192, 192)',
+ },
+
+ {
+ label: 'Btrfs',
+ data: [25.5482, 16.91976, 17.98264, 0.08859571, 6.873e-06, 6.432e-06],
+ backgroundColor: 'rgb(54, 162, 235)',
+ },
+
+]
+
+ config = {
+ type: 'bar',
+ data: {
+ datasets: data,
+ labels
+ },
+ options: {
+ plugins: {
+ title: {
+ display: true,
+ text: 'Sequential Read Times - in s'
+ },
+ },
+ responsive: true,
+ interaction: {
+ intersect: false,
+ },
+ }
+ };
+
+Chart.defaults.borderColor = "#eee"
+Chart.defaults.color = "#eee";
+ctx = document.getElementById("seq_read_chart");
+new Chart(ctx, config);
diff --git a/assets/benchmarking-dwarfs/process-data.py b/assets/benchmarking-dwarfs/process-data.py
index ec4029e..d766af7 100644
--- a/assets/benchmarking-dwarfs/process-data.py
+++ b/assets/benchmarking-dwarfs/process-data.py
@@ -25,9 +25,9 @@ class HelperFunctions:
elif filename.startswith('kernel'):
return 'Linux LTS kernel'
elif filename == 'small-files/random':
- return '1024 random files'
+ return '1024 random files (avg)'
elif filename == 'small-files/null':
- return '1024 null files'
+ return '1024 null files (avg)'
def convert_time(time: str, unit: str) -> int:
unit_exponents = ['ns', 'µs', 'ms', 's']
@@ -62,10 +62,10 @@ def get_seq_latency_data() -> tuple:
'labels'
] else False
try:
- datasets[fs].append(line[3])
+ datasets[fs].append(line[4])
except KeyError:
datasets[fs] = []
- datasets[fs].append(line[3])
+ datasets[fs].append(line[4])
# NOTE: this will break if the bulk data contains a larger unit than the single file data, but that's unlikely to happen so I'm not gonna deal with it
largest_time_unit = 'ns'
@@ -73,6 +73,8 @@ def get_seq_latency_data() -> tuple:
if key == 'labels':
continue
for item in datasets[key]:
+ if largest_time_unit == 's':
+ break
if item.endswith('ms'):
largest_time_unit = 'ms'
elif item.endswith('µs') and largest_time_unit != 'ms':
@@ -83,7 +85,7 @@ def get_seq_latency_data() -> tuple:
and largest_time_unit != 'µs'
):
largest_time_unit = 'ns'
- elif re.sub('[0-9]', '', item) == 's':
+ elif re.sub('[0-9]\\.', '', item) == 's':
largest_time_unit = 's'
break
@@ -106,6 +108,8 @@ def get_seq_latency_data() -> tuple:
] else False
for item in line[3:]:
+ if largest_time_unit == 's':
+ break
if item.endswith('ms'):
largest_time_unit = 'ms'
elif item.endswith('µs') and largest_time_unit != 'ms':
@@ -116,7 +120,7 @@ def get_seq_latency_data() -> tuple:
and largest_time_unit != 'µs'
):
largest_time_unit = 'ns'
- elif re.sub('[0-9]', '', item) == 's':
+ elif re.sub('[0-9]\\.', '', item) == 's':
largest_time_unit = 's'
break
@@ -127,6 +131,7 @@ def get_seq_latency_data() -> tuple:
return (datasets, largest_time_unit)
+
def seq_latency():
with open('assets/benchmarking-dwarfs/js/seq_latency.js', 'wt') as f:
# from https://github.com/chartjs/Chart.js/blob/master/docs/scripts/utils.js (CHART_COLORS)
@@ -141,8 +146,7 @@ def seq_latency():
"'rgb(201, 203, 207)'", # grey
]
- #print('Sequential latency:')
- labels_code = 'const labels = $labels$'
+ labels_code = 'labels = $labels$'
dataset_code = '''
{
label: '$label$',
@@ -152,7 +156,7 @@ def seq_latency():
'''
config_code = '''
- let config = {
+ config = {
type: 'bar',
data: {
datasets: data,
@@ -177,7 +181,7 @@ def seq_latency():
labels_code = labels_code.replace('$labels$', format(data['labels']))
f.write(labels_code)
data.pop('labels')
- f.write('\nlet data = [')
+ f.write('\ndata = [')
for fs in data.keys():
f.write(
dataset_code.replace('$label$', fs)
@@ -188,22 +192,479 @@ def seq_latency():
title = 'Sequential Read Latency'
f.write(
- config_code.replace('$title$', title).replace('$timeunit$', largest_time_unit)
+ config_code.replace('$title$', title).replace(
+ '$timeunit$', largest_time_unit
+ )
)
f.write('\nChart.defaults.borderColor = "#eee"\n')
f.write('Chart.defaults.color = "#eee";\n')
- f.write('let ctx = document.getElementById("seq_read_latency_chart");\n')
+ f.write('ctx = document.getElementById("seq_read_latency_chart");\n')
f.write('new Chart(ctx, config);\n')
-def singles():
- pass
+def get_rand_latency_data() -> tuple:
+ # format: { 'labels': ['btrfs'], 'btrfs': [9, 8, 4, 6]}
+ datasets = {'labels': []}
+ with open('assets/benchmarking-dwarfs/data/benchmark-data.csv', 'rt') as f:
+ for line in csv.reader(f):
+ fs = HelperFunctions.get_fs(line[0])
+ label = HelperFunctions.get_label(line[1])
+ datasets['labels'].append(label) if label not in datasets[
+ 'labels'
+ ] else False
+ try:
+ datasets[fs].append(line[5])
+ except KeyError:
+ datasets[fs] = []
+ datasets[fs].append(line[5])
+
+ # NOTE: this will break if the bulk data contains a larger unit than the single file data, but that's unlikely to happen so I'm not gonna deal with it
+ largest_time_unit = 'ns'
+ for key in datasets.keys():
+ if key == 'labels':
+ continue
+ for item in datasets[key]:
+ if largest_time_unit == 's':
+ break
+ if item.endswith('ms'):
+ largest_time_unit = 'ms'
+ elif item.endswith('µs') and largest_time_unit != 'ms':
+ largest_time_unit = 'µs'
+ elif (
+ item.endswith('ns')
+ and largest_time_unit != 'ms'
+ and largest_time_unit != 'µs'
+ ):
+ largest_time_unit = 'ns'
+ elif re.sub('[0-9]\\.', '', item) == 's':
+ largest_time_unit = 's'
+ break
+
+ for key in datasets.keys():
+ if key == 'labels':
+ continue
+ for i in range(len(datasets[key])):
+ datasets[key][i] = HelperFunctions.convert_time(
+ datasets[key][i], largest_time_unit
+ )
+
+ with open('assets/benchmarking-dwarfs/data/bulk.csv', 'rt') as f:
+ for line in csv.reader(f):
+ if line[2] != 'bulk_random_read_latency':
+ continue
+ fs = HelperFunctions.get_fs(line[0])
+ label = HelperFunctions.get_label(line[1])
+ datasets['labels'].append(label) if label not in datasets[
+ 'labels'
+ ] else False
+
+ for item in line[3:]:
+ if largest_time_unit == 's':
+ break
+ if item.endswith('ms'):
+ largest_time_unit = 'ms'
+ elif item.endswith('µs') and largest_time_unit != 'ms':
+ largest_time_unit = 'µs'
+ elif (
+ item.endswith('ns')
+ and largest_time_unit != 'ms'
+ and largest_time_unit != 'µs'
+ ):
+ largest_time_unit = 'ns'
+ elif re.sub('[0-9]\\.', '', item) == 's':
+ largest_time_unit = 's'
+ break
+
+ for i in range(len(line[3:])):
+ line[i + 3] = HelperFunctions.convert_time(item, largest_time_unit)
+
+ datasets[fs].append(sum(line[3:]) / len(line[3:]))
+
+ return (datasets, largest_time_unit)
-def bulk():
- pass
+def rand_latency():
+ with open('assets/benchmarking-dwarfs/js/rand_latency.js', 'wt') as f:
+ # from https://github.com/chartjs/Chart.js/blob/master/docs/scripts/utils.js (CHART_COLORS)
+ # modified so similar color aren't adjacent
+ chart_colors = [
+ "'rgb(255, 99, 132)'", # red
+ "'rgb(75, 192, 192)'", # green
+ "'rgb(54, 162, 235)'", # blue
+ "'rgb(255, 159, 64)'", # orange
+ "'rgb(153, 102, 255)'", # purple
+ "'rgb(255, 205, 86)'", # yellow
+ "'rgb(201, 203, 207)'", # grey
+ ]
+
+ labels_code = 'labels = $labels$'
+ dataset_code = '''
+ {
+ label: '$label$',
+ data: $data$,
+ backgroundColor: $color$,
+ },
+ '''
+
+ config_code = '''
+ config = {
+ type: 'bar',
+ data: {
+ datasets: data,
+ labels
+ },
+ options: {
+ plugins: {
+ title: {
+ display: true,
+ text: '$title$ - in $timeunit$'
+ },
+ },
+ responsive: true,
+ interaction: {
+ intersect: false,
+ },
+ }
+ };
+ '''
+
+ data, largest_time_unit = get_rand_latency_data()
+ labels_code = labels_code.replace('$labels$', format(data['labels']))
+ f.write(labels_code)
+ data.pop('labels')
+ f.write('\ndata = [')
+ for fs in data.keys():
+ f.write(
+ dataset_code.replace('$label$', fs)
+ .replace('$data$', format(data[fs]))
+ .replace('$color$', format(chart_colors[list(data.keys()).index(fs)]))
+ )
+ f.write('\n]\n')
+
+ title = 'Random Read Latency'
+ f.write(
+ config_code.replace('$title$', title).replace(
+ '$timeunit$', largest_time_unit
+ )
+ )
+
+ f.write('\nChart.defaults.borderColor = "#eee"\n')
+ f.write('Chart.defaults.color = "#eee";\n')
+ f.write('ctx = document.getElementById("rand_read_latency_chart");\n')
+ f.write('new Chart(ctx, config);\n')
+
+
+def get_seq_read_data() -> tuple:
+ # format: { 'labels': ['btrfs'], 'btrfs': [9, 8, 4, 6]}
+ datasets = {'labels': []}
+ with open('assets/benchmarking-dwarfs/data/benchmark-data.csv', 'rt') as f:
+ for line in csv.reader(f):
+ fs = HelperFunctions.get_fs(line[0])
+ label = HelperFunctions.get_label(line[1])
+ datasets['labels'].append(label) if label not in datasets[
+ 'labels'
+ ] else False
+ try:
+ datasets[fs].append(line[2])
+ except KeyError:
+ datasets[fs] = []
+ datasets[fs].append(line[2])
+
+ # NOTE: this will break if the bulk data contains a larger unit than the single file data, but that's unlikely to happen so I'm not gonna deal with it
+ # and it's a bit broken regardless but whatever
+ largest_time_unit = 'ns'
+ for key in datasets.keys():
+ if key == 'labels':
+ continue
+ for item in datasets[key]:
+ if largest_time_unit == 's':
+ break
+ if item.endswith('ms'):
+ largest_time_unit = 'ms'
+ elif item.endswith('µs') and largest_time_unit != 'ms':
+ largest_time_unit = 'µs'
+ elif (
+ item.endswith('ns')
+ and largest_time_unit != 'ms'
+ and largest_time_unit != 'µs'
+ ):
+ largest_time_unit = 'ns'
+ elif re.sub('[0-9\\.]', '', item) == 's':
+ largest_time_unit = 's'
+ break
+
+ for key in datasets.keys():
+ if key == 'labels':
+ continue
+ for i in range(len(datasets[key])):
+ datasets[key][i] = HelperFunctions.convert_time(
+ datasets[key][i], largest_time_unit
+ )
+
+ with open('assets/benchmarking-dwarfs/data/bulk.csv', 'rt') as f:
+ for line in csv.reader(f):
+ if line[2] != 'bulk_sequential_read':
+ continue
+ fs = HelperFunctions.get_fs(line[0])
+ label = HelperFunctions.get_label(line[1])
+ datasets['labels'].append(label) if label not in datasets[
+ 'labels'
+ ] else False
+
+ for item in line[3:]:
+ if largest_time_unit == 's':
+ break
+ if item.endswith('ms'):
+ largest_time_unit = 'ms'
+ elif item.endswith('µs') and largest_time_unit != 'ms':
+ largest_time_unit = 'µs'
+ elif (
+ item.endswith('ns')
+ and largest_time_unit != 'ms'
+ and largest_time_unit != 'µs'
+ ):
+ largest_time_unit = 'ns'
+ elif re.sub('[0-9]\\.', '', item) == 's':
+ largest_time_unit = 's'
+ break
+
+ for i in range(len(line[3:])):
+ line[i + 3] = HelperFunctions.convert_time(item, largest_time_unit)
+
+ datasets[fs].append(sum(line[3:]) / len(line[3:]))
+
+ return (datasets, largest_time_unit)
+
+
+def seq_read():
+ with open('assets/benchmarking-dwarfs/js/seq_read.js', 'wt') as f:
+ # from https://github.com/chartjs/Chart.js/blob/master/docs/scripts/utils.js (CHART_COLORS)
+ # modified so similar color aren't adjacent
+ chart_colors = [
+ "'rgb(255, 99, 132)'", # red
+ "'rgb(75, 192, 192)'", # green
+ "'rgb(54, 162, 235)'", # blue
+ "'rgb(255, 159, 64)'", # orange
+ "'rgb(153, 102, 255)'", # purple
+ "'rgb(255, 205, 86)'", # yellow
+ "'rgb(201, 203, 207)'", # grey
+ ]
+
+ labels_code = 'labels = $labels$'
+ dataset_code = '''
+ {
+ label: '$label$',
+ data: $data$,
+ backgroundColor: $color$,
+ },
+ '''
+
+ config_code = '''
+ config = {
+ type: 'bar',
+ data: {
+ datasets: data,
+ labels
+ },
+ options: {
+ plugins: {
+ title: {
+ display: true,
+ text: '$title$ - in $timeunit$'
+ },
+ },
+ responsive: true,
+ interaction: {
+ intersect: false,
+ },
+ }
+ };
+ '''
+
+ data, largest_time_unit = get_seq_read_data()
+ labels_code = labels_code.replace('$labels$', format(data['labels']))
+ f.write(labels_code)
+ data.pop('labels')
+ f.write('\ndata = [')
+ for fs in data.keys():
+ f.write(
+ dataset_code.replace('$label$', fs)
+ .replace('$data$', format(data[fs]))
+ .replace('$color$', format(chart_colors[list(data.keys()).index(fs)]))
+ )
+ f.write('\n]\n')
+
+ title = 'Sequential Read Times'
+ f.write(
+ config_code.replace('$title$', title).replace(
+ '$timeunit$', largest_time_unit
+ )
+ )
+
+ f.write('\nChart.defaults.borderColor = "#eee"\n')
+ f.write('Chart.defaults.color = "#eee";\n')
+ f.write('ctx = document.getElementById("seq_read_chart");\n')
+ f.write('new Chart(ctx, config);\n')
+
+
+def get_rand_read_data() -> tuple:
+ # format: { 'labels': ['btrfs'], 'btrfs': [9, 8, 4, 6]}
+ datasets = {'labels': []}
+ with open('assets/benchmarking-dwarfs/data/benchmark-data.csv', 'rt') as f:
+ for line in csv.reader(f):
+ fs = HelperFunctions.get_fs(line[0])
+ label = HelperFunctions.get_label(line[1])
+ datasets['labels'].append(label) if label not in datasets[
+ 'labels'
+ ] else False
+ try:
+ datasets[fs].append(line[3])
+ except KeyError:
+ datasets[fs] = []
+ datasets[fs].append(line[3])
+
+ # NOTE: this will break if the bulk data contains a larger unit than the single file data, but that's unlikely to happen so I'm not gonna deal with it
+ # and it's a bit broken regardless but whatever
+ largest_time_unit = 'ns'
+ for key in datasets.keys():
+ if key == 'labels':
+ continue
+ for item in datasets[key]:
+ if largest_time_unit == 's':
+ break
+ if item.endswith('ms'):
+ largest_time_unit = 'ms'
+ elif item.endswith('µs') and largest_time_unit != 'ms':
+ largest_time_unit = 'µs'
+ elif (
+ item.endswith('ns')
+ and largest_time_unit != 'ms'
+ and largest_time_unit != 'µs'
+ ):
+ largest_time_unit = 'ns'
+ elif re.sub('[0-9\\.]', '', item) == 's':
+ largest_time_unit = 's'
+ break
+
+ for key in datasets.keys():
+ if key == 'labels':
+ continue
+ for i in range(len(datasets[key])):
+ datasets[key][i] = HelperFunctions.convert_time(
+ datasets[key][i], largest_time_unit
+ )
+
+ with open('assets/benchmarking-dwarfs/data/bulk.csv', 'rt') as f:
+ for line in csv.reader(f):
+ if line[2] != 'bulk_random_read':
+ continue
+ fs = HelperFunctions.get_fs(line[0])
+ label = HelperFunctions.get_label(line[1])
+ datasets['labels'].append(label) if label not in datasets[
+ 'labels'
+ ] else False
+
+ for item in line[3:]:
+ if largest_time_unit == 's':
+ break
+ if item.endswith('ms'):
+ largest_time_unit = 'ms'
+ elif item.endswith('µs') and largest_time_unit != 'ms':
+ largest_time_unit = 'µs'
+ elif (
+ item.endswith('ns')
+ and largest_time_unit != 'ms'
+ and largest_time_unit != 'µs'
+ ):
+ largest_time_unit = 'ns'
+ elif re.sub('[0-9]\\.', '', item) == 's':
+ largest_time_unit = 's'
+ break
+
+ for i in range(len(line[3:])):
+ line[i + 3] = HelperFunctions.convert_time(item, largest_time_unit)
+
+ datasets[fs].append(sum(line[3:]) / len(line[3:]))
+
+ return (datasets, largest_time_unit)
+
+
+def rand_read():
+ with open('assets/benchmarking-dwarfs/js/rand_read.js', 'wt') as f:
+ # from https://github.com/chartjs/Chart.js/blob/master/docs/scripts/utils.js (CHART_COLORS)
+ # modified so similar color aren't adjacent
+ chart_colors = [
+ "'rgb(255, 99, 132)'", # red
+ "'rgb(75, 192, 192)'", # green
+ "'rgb(54, 162, 235)'", # blue
+ "'rgb(255, 159, 64)'", # orange
+ "'rgb(153, 102, 255)'", # purple
+ "'rgb(255, 205, 86)'", # yellow
+ "'rgb(201, 203, 207)'", # grey
+ ]
+
+ labels_code = 'labels = $labels$'
+ dataset_code = '''
+ {
+ label: '$label$',
+ data: $data$,
+ backgroundColor: $color$,
+ },
+ '''
+
+ config_code = '''
+ config = {
+ type: 'bar',
+ data: {
+ datasets: data,
+ labels
+ },
+ options: {
+ plugins: {
+ title: {
+ display: true,
+ text: '$title$ - in $timeunit$'
+ },
+ },
+ responsive: true,
+ interaction: {
+ intersect: false,
+ },
+ }
+ };
+ '''
+
+ data, largest_time_unit = get_rand_read_data()
+ labels_code = labels_code.replace('$labels$', format(data['labels']))
+ f.write(labels_code)
+ data.pop('labels')
+ f.write('\ndata = [')
+ for fs in data.keys():
+ f.write(
+ dataset_code.replace('$label$', fs)
+ .replace('$data$', format(data[fs]))
+ .replace('$color$', format(chart_colors[list(data.keys()).index(fs)]))
+ )
+ f.write('\n]\n')
+
+ title = 'Random Read Times'
+ f.write(
+ config_code.replace('$title$', title).replace(
+ '$timeunit$', largest_time_unit
+ )
+ )
+
+ f.write('\nChart.defaults.borderColor = "#eee"\n')
+ f.write('Chart.defaults.color = "#eee";\n')
+ f.write('ctx = document.getElementById("rand_read_chart");\n')
+ f.write('new Chart(ctx, config);\n')
if __name__ == '__main__':
- seq_latency()
\ No newline at end of file
+ # NOTE: this code is absolutely horrible and all these functions (except declare_vars) should be one function that just takes the title, chart canvas id, filename, test name in bulk, and index in singles
+ # i will repent to the DRY gods someday
+ seq_read()
+ rand_read()
+ seq_latency()
+ rand_latency()
diff --git a/blog/benchmarking-dwarfs.html b/blog/benchmarking-dwarfs.html
index ab8fc9c..e5089c4 100644
--- a/blog/benchmarking-dwarfs.html
+++ b/blog/benchmarking-dwarfs.html
@@ -93,13 +93,25 @@
script to make it a bit easier, I put the resulting graphs
in here ↓
Sequential read
+
+
+
Random read
+
+
+
Sequential read latency
Random read latency
+
+
+
The FUSE-based filesystems run into a bit of trouble here -
with incompressible data, DwarFS has a hard time keeping up for
some reason, despite keeping up just fine with larger random
@@ -127,7 +139,11 @@
+
+
+
+