Add a graphs and stuff to benchmarking-dwarfs - not done, but progress
This commit is contained in:
parent
e6e393d1bc
commit
c2c5c0a677
9 changed files with 583 additions and 30 deletions
12
assets/benchmarking-dwarfs/data/benchmark-data.csv
Normal file
12
assets/benchmarking-dwarfs/data/benchmark-data.csv
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
data/mountpoints/dwarfs,25G-null.bin,12.80490s,371.14600µs,96.32895ms,351.30788ms
|
||||||
|
data/mountpoints/dwarfs,25G-random.bin,40.71916s,14.15143ms,109.78266ms,3.51396s
|
||||||
|
data/mountpoints/dwarfs,100M-polygon.txt,19.11096s,2.95083ms,96.39260ms,480.97789ms
|
||||||
|
data/mountpoints/dwarfs,kernel/linux-6.6.58.tar.xz,160.75466ms,1.52300µs,94.55468ms,882.57600µs
|
||||||
|
data/mountpoints/fuse-archive-tar,25G-null.bin,24.88932s,393.56800µs,98.66828ms,0ms
|
||||||
|
data/mountpoints/fuse-archive-tar,25G-random.bin,24.84052s,397.62600µs,94.52984ms,0ms
|
||||||
|
data/mountpoints/fuse-archive-tar,100M-polygon.txt,26.63768s,77.50500µs,96.61561ms,0ms
|
||||||
|
data/mountpoints/fuse-archive-tar,kernel/linux-6.6.58.tar.xz,121.50200ms,1.22300µs,93.25915ms,0ms
|
||||||
|
data/datasets,25G-null.bin,25.54820s,27.92200µs,96.79632ms,5.51523ms
|
||||||
|
data/datasets,25G-random.bin,16.91976s,290.90600µs,97.64200ms,91.13626ms
|
||||||
|
data/datasets,100M-polygon.txt,17.98264s,140.88400µs,98.92292ms,94.05722ms
|
||||||
|
data/datasets,kernel/linux-6.6.58.tar.xz,88.59571ms,1.39300µs,91.41823ms,949.77100µs
|
|
18
assets/benchmarking-dwarfs/data/bulk.csv
Normal file
18
assets/benchmarking-dwarfs/data/bulk.csv
Normal file
File diff suppressed because one or more lines are too long
86
assets/benchmarking-dwarfs/data/printed.txt
Normal file
86
assets/benchmarking-dwarfs/data/printed.txt
Normal file
|
@ -0,0 +1,86 @@
|
||||||
|
=== data/mountpoints/dwarfs/25G-null.bin ===
|
||||||
|
Sequential read (complete file read): 12.80490s
|
||||||
|
Sequential latency (1 byte read): 371.14600µs
|
||||||
|
Random read (1024x 1 MiB): 96.32895ms
|
||||||
|
Random latency (1024x 1 byte read): 351.30788ms
|
||||||
|
|
||||||
|
=== data/mountpoints/dwarfs/25G-random.bin ===
|
||||||
|
Sequential read (complete file read): 40.71916s
|
||||||
|
Sequential latency (1 byte read): 14.15143ms
|
||||||
|
Random read (1024x 1 MiB): 109.78266ms
|
||||||
|
Random latency (1024x 1 byte read): 3.51396s
|
||||||
|
|
||||||
|
=== data/mountpoints/dwarfs/100M-polygon.txt ===
|
||||||
|
Sequential read (complete file read): 19.11096s
|
||||||
|
Sequential latency (1 byte read): 2.95083ms
|
||||||
|
Random read (1024x 1 MiB): 96.39260ms
|
||||||
|
Random latency (1024x 1 byte read): 480.97789ms
|
||||||
|
|
||||||
|
=== data/mountpoints/dwarfs/kernel/linux-6.6.58.tar.xz ===
|
||||||
|
Sequential read (complete file read): 160.75466ms
|
||||||
|
Sequential latency (1 byte read): 1.52300µs
|
||||||
|
Random read (1024x 1 MiB): 94.55468ms
|
||||||
|
Random latency (1024x 1 byte read): 882.57600µs
|
||||||
|
|
||||||
|
[bulk] Testing data/mountpoints/dwarfs/small-files/null
|
||||||
|
[bulk] Testing data/mountpoints/dwarfs/small-files/random
|
||||||
|
|
||||||
|
=== === === === === === === === === === ===
|
||||||
|
|
||||||
|
=== data/mountpoints/fuse-archive-tar/25G-null.bin ===
|
||||||
|
Sequential read (complete file read): 24.88932s
|
||||||
|
Sequential latency (1 byte read): 393.56800µs
|
||||||
|
Random read (1024x 1 MiB): 98.66828ms
|
||||||
|
Random latency (1024x 1 byte read): 0ms
|
||||||
|
|
||||||
|
=== data/mountpoints/fuse-archive-tar/25G-random.bin ===
|
||||||
|
Sequential read (complete file read): 24.84052s
|
||||||
|
Sequential latency (1 byte read): 397.62600µs
|
||||||
|
Random read (1024x 1 MiB): 94.52984ms
|
||||||
|
Random latency (1024x 1 byte read): 0ms
|
||||||
|
|
||||||
|
=== data/mountpoints/fuse-archive-tar/100M-polygon.txt ===
|
||||||
|
Sequential read (complete file read): 26.63768s
|
||||||
|
Sequential latency (1 byte read): 77.50500µs
|
||||||
|
Random read (1024x 1 MiB): 96.61561ms
|
||||||
|
Random latency (1024x 1 byte read): 0ms
|
||||||
|
|
||||||
|
=== data/mountpoints/fuse-archive-tar/kernel/linux-6.6.58.tar.xz ===
|
||||||
|
Sequential read (complete file read): 121.50200ms
|
||||||
|
Sequential latency (1 byte read): 1.22300µs
|
||||||
|
Random read (1024x 1 MiB): 93.25915ms
|
||||||
|
Random latency (1024x 1 byte read): 0ms
|
||||||
|
|
||||||
|
[bulk] Testing data/mountpoints/fuse-archive-tar/small-files/null
|
||||||
|
[bulk] Testing data/mountpoints/fuse-archive-tar/small-files/random
|
||||||
|
|
||||||
|
=== === === === === === === === === === ===
|
||||||
|
|
||||||
|
=== data/datasets/25G-null.bin ===
|
||||||
|
Sequential read (complete file read): 25.54820s
|
||||||
|
Sequential latency (1 byte read): 27.92200µs
|
||||||
|
Random read (1024x 1 MiB): 96.79632ms
|
||||||
|
Random latency (1024x 1 byte read): 5.51523ms
|
||||||
|
|
||||||
|
=== data/datasets/25G-random.bin ===
|
||||||
|
Sequential read (complete file read): 16.91976s
|
||||||
|
Sequential latency (1 byte read): 290.90600µs
|
||||||
|
Random read (1024x 1 MiB): 97.64200ms
|
||||||
|
Random latency (1024x 1 byte read): 91.13626ms
|
||||||
|
|
||||||
|
=== data/datasets/100M-polygon.txt ===
|
||||||
|
Sequential read (complete file read): 17.98264s
|
||||||
|
Sequential latency (1 byte read): 140.88400µs
|
||||||
|
Random read (1024x 1 MiB): 98.92292ms
|
||||||
|
Random latency (1024x 1 byte read): 94.05722ms
|
||||||
|
|
||||||
|
=== data/datasets/kernel/linux-6.6.58.tar.xz ===
|
||||||
|
Sequential read (complete file read): 88.59571ms
|
||||||
|
Sequential latency (1 byte read): 1.39300µs
|
||||||
|
Random read (1024x 1 MiB): 91.41823ms
|
||||||
|
Random latency (1024x 1 byte read): 949.77100µs
|
||||||
|
|
||||||
|
[bulk] Testing data/datasets/small-files/null
|
||||||
|
[bulk] Testing data/datasets/small-files/random
|
||||||
|
|
||||||
|
=== === === === === === === === === === ===
|
144
assets/benchmarking-dwarfs/process-data.py
Normal file
144
assets/benchmarking-dwarfs/process-data.py
Normal file
|
@ -0,0 +1,144 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
import csv
|
||||||
|
import re
|
||||||
|
|
||||||
|
class HelperFunctions:
|
||||||
|
def get_fs(dir):
|
||||||
|
if dir.endswith('dwarfs'):
|
||||||
|
return 'DwarFS'
|
||||||
|
elif dir.endswith('fuse-archive-tar'):
|
||||||
|
return 'fuse-archive (tar)'
|
||||||
|
|
||||||
|
return 'Btrfs'
|
||||||
|
|
||||||
|
def get_label(filename):
|
||||||
|
if filename == '25G-null.bin':
|
||||||
|
return 'Null 25 GiB file'
|
||||||
|
elif filename == '25G-random.bin':
|
||||||
|
return 'Random 25 GiB file'
|
||||||
|
elif filename == '100M-polygon.txt':
|
||||||
|
return '100 million-sided polygon data'
|
||||||
|
elif filename.startswith('kernel'):
|
||||||
|
return 'Linux LTS kernel'
|
||||||
|
|
||||||
|
def convert_time(time: str, unit: str) -> int:
|
||||||
|
unit_exponents = ['ns', 'µs', 'ms', 's']
|
||||||
|
|
||||||
|
if time.endswith('ms'):
|
||||||
|
current_unit = 'ms'
|
||||||
|
elif time.endswith('µs'):
|
||||||
|
current_unit = 'µs'
|
||||||
|
elif time.endswith('ns'):
|
||||||
|
current_unit = 'ns'
|
||||||
|
else:
|
||||||
|
current_unit = 's'
|
||||||
|
|
||||||
|
unit_multiplier = unit_exponents.index(current_unit) - unit_exponents.index(unit)
|
||||||
|
return HelperFunctions.time_int(time) * (1000 ** unit_multiplier)
|
||||||
|
|
||||||
|
def time_int(time: str):
|
||||||
|
time = re.sub("[^0-9\\.]", "", time)
|
||||||
|
return float(time)
|
||||||
|
|
||||||
|
|
||||||
|
def sequential_latency():
|
||||||
|
datasets = {'labels': []}
|
||||||
|
with open('assets/benchmarking-dwarfs/original-data/benchmark-data.csv', 'rt') as f:
|
||||||
|
for line in csv.reader(f):
|
||||||
|
fs = HelperFunctions.get_fs(line[0])
|
||||||
|
label = HelperFunctions.get_label(line[1])
|
||||||
|
datasets['labels'].append(label) if label not in datasets[
|
||||||
|
'labels'
|
||||||
|
] else False
|
||||||
|
try:
|
||||||
|
datasets[fs].append(line[3])
|
||||||
|
except KeyError:
|
||||||
|
datasets[fs] = []
|
||||||
|
datasets[fs].append(line[3])
|
||||||
|
|
||||||
|
return datasets
|
||||||
|
|
||||||
|
|
||||||
|
def singles():
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def bulk():
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
# from https://github.com/chartjs/Chart.js/blob/master/docs/scripts/utils.js (CHART_COLORS)
|
||||||
|
# modified so similar color aren't adjacent
|
||||||
|
chart_colors = [
|
||||||
|
"'rgb(255, 99, 132)'", # red
|
||||||
|
"'rgb(75, 192, 192)'", # green
|
||||||
|
"'rgb(54, 162, 235)'", # blue
|
||||||
|
"'rgb(255, 159, 64)'", # orange
|
||||||
|
"'rgb(153, 102, 255)'", # purple
|
||||||
|
"'rgb(255, 205, 86)'", # yellow
|
||||||
|
"'rgb(201, 203, 207)'", # grey
|
||||||
|
]
|
||||||
|
|
||||||
|
print('Sequential latency:')
|
||||||
|
labels_code = 'const labels = $labels$'
|
||||||
|
dataset_code = '''
|
||||||
|
{
|
||||||
|
label: '$label$',
|
||||||
|
data: $data$,
|
||||||
|
backgroundColor: $color$,
|
||||||
|
},'''
|
||||||
|
|
||||||
|
config_code = '''
|
||||||
|
let config = {
|
||||||
|
type: 'bar',
|
||||||
|
data: {
|
||||||
|
datasets: data,
|
||||||
|
labels
|
||||||
|
},
|
||||||
|
options: {
|
||||||
|
plugins: {
|
||||||
|
title: {
|
||||||
|
display: true,
|
||||||
|
text: '$title$ - in $timeunit$'
|
||||||
|
},
|
||||||
|
},
|
||||||
|
responsive: true,
|
||||||
|
interaction: {
|
||||||
|
intersect: false,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
};
|
||||||
|
'''
|
||||||
|
|
||||||
|
data = sequential_latency()
|
||||||
|
labels_code = labels_code.replace('$labels$', format(data['labels']))
|
||||||
|
print(labels_code)
|
||||||
|
data.pop('labels')
|
||||||
|
print('let data = [', end='')
|
||||||
|
largest_time_unit = 'ns'
|
||||||
|
for fs in data.keys():
|
||||||
|
for item in data[fs]:
|
||||||
|
if item.endswith('ms'):
|
||||||
|
largest_time_unit = 'ms'
|
||||||
|
elif item.endswith('µs') and largest_time_unit != 'ms':
|
||||||
|
largest_time_unit = 'µs'
|
||||||
|
elif item.endswith('ns') and largest_time_unit != 'ms' and largest_time_unit != 'µs':
|
||||||
|
largest_time_unit = 'ns'
|
||||||
|
elif re.sub('[0-9]', '', item) == 's':
|
||||||
|
largest_time_unit = 's'
|
||||||
|
break
|
||||||
|
|
||||||
|
for i in range(len(data[fs])):
|
||||||
|
data[fs][i] = HelperFunctions.convert_time(data[fs][i], largest_time_unit)
|
||||||
|
|
||||||
|
print(
|
||||||
|
dataset_code.replace('$label$', fs)
|
||||||
|
.replace('$data$', format(data[fs]))
|
||||||
|
.replace('$color$', format(chart_colors[list(data.keys()).index(fs)])),
|
||||||
|
end=''
|
||||||
|
)
|
||||||
|
print('\n]\n')
|
||||||
|
|
||||||
|
title = 'Sequential Read Latency'
|
||||||
|
print(config_code.replace('$title$', title).replace('$timeunit$', largest_time_unit))
|
|
@ -20,11 +20,11 @@
|
||||||
<p>The datasets being used for this test will be the
|
<p>The datasets being used for this test will be the
|
||||||
following:</p>
|
following:</p>
|
||||||
<ul>
|
<ul>
|
||||||
<li>25 GB of null data (just <code>00000000</code> in
|
<li>25 GiB of null data (just <code>00000000</code> in
|
||||||
binary)</li>
|
binary)</li>
|
||||||
<li>25 GB of random data<a href="#fn1" class="footnote-ref"
|
<li>25 GiB of random data<a href="#fn1" class="footnote-ref"
|
||||||
id="fnref1" role="doc-noteref"><sup>1</sup></a></li>
|
id="fnref1" role="doc-noteref"><sup>1</sup></a></li>
|
||||||
<li>Data for a 100 million-sided regular polygon; ~29 GB<a
|
<li>Data for a 100 million-sided regular polygon; ~26.5 GiB<a
|
||||||
href="#fn2" class="footnote-ref" id="fnref2"
|
href="#fn2" class="footnote-ref" id="fnref2"
|
||||||
role="doc-noteref"><sup>2</sup></a></li>
|
role="doc-noteref"><sup>2</sup></a></li>
|
||||||
<li>The current Linux longterm release source (<a
|
<li>The current Linux longterm release source (<a
|
||||||
|
@ -32,30 +32,161 @@
|
||||||
[2]); ~1.5 GB</li>
|
[2]); ~1.5 GB</li>
|
||||||
<li>For some rough latency testing:
|
<li>For some rough latency testing:
|
||||||
<ul>
|
<ul>
|
||||||
<li>1000 4 kilobyte files filled with null data (again, just
|
<li>1024 4 KiB files filled with null data (again, just
|
||||||
<code>00000000</code> in binary)</li>
|
<code>00000000</code> in binary)</li>
|
||||||
<li>1000 4 kilobyte files filled with random data</li>
|
<li>1024 4 KiB files filled with random data</li>
|
||||||
</ul></li>
|
</ul></li>
|
||||||
</ul>
|
</ul>
|
||||||
<p>All this data should cover both latency and read speed
|
<p>All this data should cover both latency and read speed
|
||||||
testing for data that compresses differently - extremely
|
testing for data that compresses differently - extremely
|
||||||
compressible files with null data, decently compressible files,
|
compressible files with null data, decently compressible files,
|
||||||
and random data which can't be compressed well.</p>
|
and random data which can't be compressed well.</p>
|
||||||
|
<h3 id="what-filesystems">What filesystems?</h3>
|
||||||
|
<p>I'll be benchmarking DwarFS, fuse-archive (with tar files),
|
||||||
|
and btrfs. In some early, basic testing, I found that mounting
|
||||||
|
any <em>compressed</em> archives with <code>fuse-archive</code>,
|
||||||
|
a tool for mounting archive file formats as read-only
|
||||||
|
filesystems, took far too long. Additionally, being FUSE-based,
|
||||||
|
these would have slightly worse performance than kernel
|
||||||
|
filesystems, so I tried to use a FUSE driver as well for btrfs.
|
||||||
|
Unforunately, I ran into a bug, so I won't be able to quite do
|
||||||
|
an equivalent test; btrfs will only be running in the
|
||||||
|
kernel.</p>
|
||||||
|
<p>During said early testing, I also ran into the fact that most
|
||||||
|
compressed archives, like Gzip-compressed tar archives, also
|
||||||
|
took far too long to <em>create</em>, because Gzip is
|
||||||
|
single-threaded. So all the options with no chance of being used
|
||||||
|
have been marked off, and I'll only be looking into these
|
||||||
|
three.</p>
|
||||||
|
<p>DwarFS also took far too long to create on its default
|
||||||
|
setting, but on compression level 1, it's much faster -
|
||||||
|
11m2.738s for the ~80 GiB total, and considering</p>
|
||||||
|
<h2 id="running-the-benchmark">Running the benchmark</h2>
|
||||||
|
<p>First installed it by cloning the repository, installing it
|
||||||
|
using Cargo, then added its completions to fish (just for this
|
||||||
|
session):</p>
|
||||||
|
<div class="sourceCode" id="cb2"><pre
|
||||||
|
class="language-sh"><code class="language-bash"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="fu">git</span> clone https://git.askiiart.net/askiiart/disk-read-benchmark</span>
|
||||||
|
<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a><span class="bu">cd</span> ./disk-read-benchmark</span>
|
||||||
|
<span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a><span class="ex">cargo</span> install <span class="at">--path</span> .</span>
|
||||||
|
<span id="cb2-4"><a href="#cb2-4" aria-hidden="true" tabindex="-1"></a><span class="ex">disk-read-benchmark</span> generate-fish-completions <span class="kw">|</span> <span class="bu">source</span></span></code></pre></div>
|
||||||
|
<p>Then I prepared all the data:</p>
|
||||||
|
<div class="sourceCode" id="cb3"><pre
|
||||||
|
class="language-sh"><code class="language-bash"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="ex">disk-read-benchmark</span> prep-dirs</span>
|
||||||
|
<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a><span class="ex">disk-read-benchmark</span> grab-data</span>
|
||||||
|
<span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a><span class="ex">./prepare.sh</span></span></code></pre></div>
|
||||||
|
<p><code>disk-read-benchmark</code> prepares all the
|
||||||
|
directories, generates the data to be used for testing, then
|
||||||
|
<code>./prepare.sh</code> uses the data to generate the DwarFS
|
||||||
|
and tar archives.</p>
|
||||||
|
<p>To run it, I just ran this:</p>
|
||||||
|
<div class="sourceCode" id="cb4"><pre
|
||||||
|
class="language-sh"><code class="language-bash"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="ex">disk-read-benchmark</span> benchmark</span></code></pre></div>
|
||||||
|
<p>Which outputs the data at
|
||||||
|
<code>data/benchmark-data.csv</code> and
|
||||||
|
<code>data/bulk.csv</code> for the single and bulk files,
|
||||||
|
respectively.</p>
|
||||||
|
<h2 id="results">Results</h2>
|
||||||
|
<p>After processing the data with <a
|
||||||
|
href="/assets/benchmarking-dwarfs/process-data.py">this
|
||||||
|
script</a> to make it a bit easier, I put the resulting graphs
|
||||||
|
in here ↓</p>
|
||||||
|
<h3 id="sequential-read">Sequential read</h3>
|
||||||
|
<h3 id="random-read">Random read</h3>
|
||||||
|
<h3 id="sequential-read-latency">Sequential read latency</h3>
|
||||||
|
<div>
|
||||||
|
<canvas id="seq_read_latency_chart" class="chart">
|
||||||
|
</canvas>
|
||||||
|
</div>
|
||||||
|
<h3 id="random-read-latency">Random read latency</h3>
|
||||||
|
<p>The FUSE-based filesystems run into a bit of trouble here -
|
||||||
|
with incompressible data, DwarFS has a hard time keeping up for
|
||||||
|
some reason, despite keeping up just fine with larger random
|
||||||
|
reads on the same data, and so it takes 3 to 4 seconds to run
|
||||||
|
random read latency testing on the 25 GiB random file.
|
||||||
|
Meanwhile, when testing random read latency in
|
||||||
|
<code>fuse-archive</code> pretty much just dies, becoming
|
||||||
|
ridiculously slow (even compared to DwarFS), so I didn't test
|
||||||
|
its random read latency at all and just had its results put as 0
|
||||||
|
milliseconds.</p>
|
||||||
|
<h3 id="summary-and-notes">Summary and notes</h3>
|
||||||
<h2 id="sources">Sources</h2>
|
<h2 id="sources">Sources</h2>
|
||||||
<ol type="1">
|
<ol type="1">
|
||||||
<li><a href="https://github.com/mhx/dwarfs"
|
<li><a href="https://github.com/mhx/dwarfs"
|
||||||
class="uri">https://github.com/mhx/dwarfs</a></li>
|
class="uri">https://github.com/mhx/dwarfs</a></li>
|
||||||
<li><a href="https://www.kernel.org/"
|
<li><a href="https://www.kernel.org/"
|
||||||
class="uri">https://www.kernel.org/</a></li>
|
class="uri">https://www.kernel.org/</a></li>
|
||||||
|
<li><a
|
||||||
|
href="https://git.askiiart.net/askiiart/disk-read-benchmark"
|
||||||
|
class="uri">https://git.askiiart.net/askiiart/disk-read-benchmark</a></li>
|
||||||
|
<li><a
|
||||||
|
href="https://git.askiiart.net/confused_ace_noises/maths-demos/src/branch/headless-deterministic"
|
||||||
|
class="uri">https://git.askiiart.net/confused_ace_noises/maths-demos/src/branch/headless-deterministic</a></li>
|
||||||
</ol>
|
</ol>
|
||||||
<h2 id="footnotes">Footnotes</h2>
|
<h2 id="footnotes">Footnotes</h2>
|
||||||
|
<!-- JavaScript for graphs goes hereeeeeee -->
|
||||||
|
<!-- EXAMPLE HERE -->
|
||||||
|
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
||||||
|
<script>
|
||||||
|
let ctx = document.getElementById('seq_read_latency_chart');
|
||||||
|
const labels = ['Null 25 GiB file', 'Random 25 GiB file', '100 million-sided polygon data', 'Linux LTS kernel']
|
||||||
|
let data = [
|
||||||
|
{
|
||||||
|
label: 'DwarFS',
|
||||||
|
data: [0.37114600000000003, 14.15143, 2.95083, 0.001523],
|
||||||
|
backgroundColor: 'rgb(255, 99, 132)',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: 'fuse-archive (tar)',
|
||||||
|
data: [0.393568, 0.397626, 0.07750499999999999, 0.0012230000000000001],
|
||||||
|
backgroundColor: 'rgb(75, 192, 192)',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: 'Btrfs',
|
||||||
|
data: [0.027922000000000002, 0.290906, 0.14088399999999998, 0.0013930000000000001],
|
||||||
|
backgroundColor: 'rgb(54, 162, 235)',
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
let config = {
|
||||||
|
type: 'bar',
|
||||||
|
data: {
|
||||||
|
datasets: data,
|
||||||
|
labels
|
||||||
|
},
|
||||||
|
options: {
|
||||||
|
plugins: {
|
||||||
|
title: {
|
||||||
|
display: true,
|
||||||
|
text: 'Sequential Read Latency - in ms'
|
||||||
|
},
|
||||||
|
},
|
||||||
|
responsive: true,
|
||||||
|
interaction: {
|
||||||
|
intersect: false,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
new Chart(ctx, config);
|
||||||
|
</script>
|
||||||
<section id="footnotes"
|
<section id="footnotes"
|
||||||
class="footnotes footnotes-end-of-document" role="doc-endnotes">
|
class="footnotes footnotes-end-of-document" role="doc-endnotes">
|
||||||
<hr />
|
<hr />
|
||||||
<ol>
|
<ol>
|
||||||
<li id="fn1">This data is from a very early version of a math
|
<li id="fn1"><p>My code can generate up to 25 GB/s. However, it
|
||||||
demonstration program made by a friend. The example below shows
|
does random writes to my drive, which is <em>much</em> slower.
|
||||||
what the data looks like for a 3-sided regular polygon.
|
So on one hand, you could say my code is so amazingly fast that
|
||||||
|
current day technologies simply can't keep up. Or you could say
|
||||||
|
that I have no idea how to code for real world scenarios.<a
|
||||||
|
href="#fnref1" class="footnote-back"
|
||||||
|
role="doc-backlink">↩︎</a></p></li>
|
||||||
|
<li id="fn2">This data is from a modified version of an
|
||||||
|
abandoned math demonstration program [4] made by a friend; it
|
||||||
|
generates regular polygons and writes their data to a file. I
|
||||||
|
chose this because it was an artificial and reproducible yet
|
||||||
|
fairly compressible dataset (without being extremely
|
||||||
|
compressible like null data).
|
||||||
<details open>
|
<details open>
|
||||||
<summary>
|
<summary>
|
||||||
3-sided regular polygon data
|
3-sided regular polygon data
|
||||||
|
@ -67,15 +198,8 @@
|
||||||
<pre><code>[Vertex { position: Pos([0.5, 0.0, 0.0]), color: Col([0.5310667, 0.7112941, 0.7138775]) }, Vertex { position: Pos([-0.25000003, 0.4330127, 0.0]), color: Col([0.7492257, 0.3142163, 0.49905664]) }, Vertex { position: Pos([0.0, 0.0, 0.0]), color: Col([0.2046682, 0.25598457, 0.72071356]) }, Vertex { position: Pos([-0.25000003, 0.4330127, 0.0]), color: Col([0.6389981, 0.5204368, 0.077735074]) }, Vertex { position: Pos([-0.24999996, -0.43301272, 0.0]), color: Col([0.8869035, 0.30709425, 0.8658899]) }, Vertex { position: Pos([0.0, 0.0, 0.0]), color: Col([0.2046682, 0.25598457, 0.72071356]) }, Vertex { position: Pos([-0.24999996, -0.43301272, 0.0]), color: Col([0.6236294, 0.03584433, 0.7590722]) }, Vertex { position: Pos([0.5, 8.742278e-8, 0.0]), color: Col([0.6105084, 0.3593351, 0.85544324]) }, Vertex { position: Pos([0.0, 0.0, 0.0]), color: Col([0.2046682, 0.25598457, 0.72071356]) }]</code></pre>
|
<pre><code>[Vertex { position: Pos([0.5, 0.0, 0.0]), color: Col([0.5310667, 0.7112941, 0.7138775]) }, Vertex { position: Pos([-0.25000003, 0.4330127, 0.0]), color: Col([0.7492257, 0.3142163, 0.49905664]) }, Vertex { position: Pos([0.0, 0.0, 0.0]), color: Col([0.2046682, 0.25598457, 0.72071356]) }, Vertex { position: Pos([-0.25000003, 0.4330127, 0.0]), color: Col([0.6389981, 0.5204368, 0.077735074]) }, Vertex { position: Pos([-0.24999996, -0.43301272, 0.0]), color: Col([0.8869035, 0.30709425, 0.8658899]) }, Vertex { position: Pos([0.0, 0.0, 0.0]), color: Col([0.2046682, 0.25598457, 0.72071356]) }, Vertex { position: Pos([-0.24999996, -0.43301272, 0.0]), color: Col([0.6236294, 0.03584433, 0.7590722]) }, Vertex { position: Pos([0.5, 8.742278e-8, 0.0]), color: Col([0.6105084, 0.3593351, 0.85544324]) }, Vertex { position: Pos([0.0, 0.0, 0.0]), color: Col([0.2046682, 0.25598457, 0.72071356]) }]</code></pre>
|
||||||
</div>
|
</div>
|
||||||
</details>
|
</details>
|
||||||
<a href="#fnref1" class="footnote-back"
|
<a href="#fnref2" class="footnote-back"
|
||||||
role="doc-backlink">↩︎</a></li>
|
role="doc-backlink">↩︎</a></li>
|
||||||
<li id="fn2"><p>My code can generate up to 25 GB/s. However, it
|
|
||||||
does random writes to my drive, which is <em>much</em> slower.
|
|
||||||
So on one hand, you could say my code is so amazingly fast that
|
|
||||||
current day technologies simply can't keep up. Or you could say
|
|
||||||
that I have no idea how to code for real world scenarios.<a
|
|
||||||
href="#fnref2" class="footnote-back"
|
|
||||||
role="doc-backlink">↩︎</a></p></li>
|
|
||||||
</ol>
|
</ol>
|
||||||
</section>
|
</section>
|
||||||
<iframe src="https://john.citrons.xyz/embed?ref=askiiart.net" style="margin-left:auto;display:block;margin-right:auto;max-width:732px;width:100%;height:94px;border:none;"></iframe>
|
<iframe src="https://john.citrons.xyz/embed?ref=askiiart.net" style="margin-left:auto;display:block;margin-right:auto;max-width:732px;width:100%;height:94px;border:none;"></iframe>
|
||||||
|
|
|
@ -6,24 +6,84 @@ DwarFS is a filesystem developed by the user mhx on GitHub [1], which is self-de
|
||||||
|
|
||||||
The datasets being used for this test will be the following:
|
The datasets being used for this test will be the following:
|
||||||
|
|
||||||
- 25 GB of null data (just `00000000` in binary)
|
- 25 GiB of null data (just `00000000` in binary)
|
||||||
- 25 GB of random data[^1]
|
- 25 GiB of random data[^1]
|
||||||
- Data for a 100 million-sided regular polygon; ~29 GB[^2]
|
- Data for a 100 million-sided regular polygon; ~26.5 GiB[^2]
|
||||||
- The current Linux longterm release source ([6.6.58](https://cdn.kernel.org/pub/linux/kernel/v6.x/linux-6.6.58.tar.xz) [2]); ~1.5 GB
|
- The current Linux longterm release source ([6.6.58](https://cdn.kernel.org/pub/linux/kernel/v6.x/linux-6.6.58.tar.xz) [2]); ~1.5 GB
|
||||||
- For some rough latency testing:
|
- For some rough latency testing:
|
||||||
- 1000 4 kilobyte files filled with null data (again, just `00000000` in binary)
|
- 1024 4 KiB files filled with null data (again, just `00000000` in binary)
|
||||||
- 1000 4 kilobyte files filled with random data
|
- 1024 4 KiB files filled with random data
|
||||||
|
|
||||||
All this data should cover both latency and read speed testing for data that compresses differently - extremely compressible files with null data, decently compressible files, and random data which can't be compressed well.
|
All this data should cover both latency and read speed testing for data that compresses differently - extremely compressible files with null data, decently compressible files, and random data which can't be compressed well.
|
||||||
|
|
||||||
|
### What filesystems?
|
||||||
|
|
||||||
|
I'll be benchmarking DwarFS, fuse-archive (with tar files), and btrfs. In some early, basic testing, I found that mounting any *compressed* archives with `fuse-archive`, a tool for mounting archive file formats as read-only filesystems, took far too long. Additionally, being FUSE-based, these would have slightly worse performance than kernel filesystems, so I tried to use a FUSE driver as well for btrfs. Unforunately, I ran into a bug, so I won't be able to quite do an equivalent test; btrfs will only be running in the kernel.
|
||||||
|
|
||||||
|
During said early testing, I also ran into the fact that most compressed archives, like Gzip-compressed tar archives, also took far too long to *create*, because Gzip is single-threaded. So all the options with no chance of being used have been marked off, and I'll only be looking into these three.
|
||||||
|
|
||||||
|
DwarFS also took far too long to create on its default setting, but on compression level 1, it's much faster - 11m2.738s for the ~80 GiB total, and considering
|
||||||
|
|
||||||
|
## Running the benchmark
|
||||||
|
|
||||||
|
First installed it by cloning the repository, installing it using Cargo, then added its completions to fish (just for this session):
|
||||||
|
|
||||||
|
```sh
|
||||||
|
git clone https://git.askiiart.net/askiiart/disk-read-benchmark
|
||||||
|
cd ./disk-read-benchmark
|
||||||
|
cargo install --path .
|
||||||
|
disk-read-benchmark generate-fish-completions | source
|
||||||
|
```
|
||||||
|
|
||||||
|
Then I prepared all the data:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
disk-read-benchmark prep-dirs
|
||||||
|
disk-read-benchmark grab-data
|
||||||
|
./prepare.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
`disk-read-benchmark` prepares all the directories, generates the data to be used for testing, then `./prepare.sh` uses the data to generate the DwarFS and tar archives.
|
||||||
|
|
||||||
|
To run it, I just ran this:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
disk-read-benchmark benchmark
|
||||||
|
```
|
||||||
|
|
||||||
|
Which outputs the data at `data/benchmark-data.csv` and `data/bulk.csv` for the single and bulk files, respectively.
|
||||||
|
|
||||||
|
## Results
|
||||||
|
|
||||||
|
After processing [the data](/assets/benchmarking-dwarfs/data/) with [this script](/assets/benchmarking-dwarfs/process-data.py) to make it a bit easier, I put the resulting graphs in here ↓
|
||||||
|
|
||||||
|
### Sequential read
|
||||||
|
|
||||||
|
### Random read
|
||||||
|
|
||||||
|
### Sequential read latency
|
||||||
|
|
||||||
|
<div>
|
||||||
|
<canvas id="seq_read_latency_chart" class="chart"></canvas>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
### Random read latency
|
||||||
|
|
||||||
|
The FUSE-based filesystems run into a bit of trouble here - with incompressible data, DwarFS has a hard time keeping up for some reason, despite keeping up just fine with larger random reads on the same data, and so it takes 3 to 4 seconds to run random read latency testing on the 25 GiB random file. Meanwhile, when testing random read latency in `fuse-archive` pretty much just dies, becoming ridiculously slow (even compared to DwarFS), so I didn't test its random read latency at all and just had its results put as 0 milliseconds.
|
||||||
|
|
||||||
|
### Summary and notes
|
||||||
|
|
||||||
## Sources
|
## Sources
|
||||||
|
|
||||||
1. <https://github.com/mhx/dwarfs>
|
1. <https://github.com/mhx/dwarfs>
|
||||||
2. <https://www.kernel.org/>
|
2. <https://www.kernel.org/>
|
||||||
|
3. <https://git.askiiart.net/askiiart/disk-read-benchmark>
|
||||||
|
4. <https://git.askiiart.net/confused_ace_noises/maths-demos/src/branch/headless-deterministic>
|
||||||
|
|
||||||
## Footnotes
|
## Footnotes
|
||||||
|
|
||||||
[^1]: This data is from a very early version of a math demonstration program made by a friend. The example below shows what the data looks like for a 3-sided regular polygon.
|
[^1]: My code can generate up to 25 GB/s. However, it does random writes to my drive, which is *much* slower. So on one hand, you could say my code is so amazingly fast that current day technologies simply can't keep up. Or you could say that I have no idea how to code for real world scenarios.
|
||||||
|
[^2]: This data is from a modified version of an abandoned math demonstration program [4] made by a friend; it generates regular polygons and writes their data to a file. I chose this because it was an artificial and reproducible yet fairly compressible dataset (without being extremely compressible like null data).
|
||||||
<details open>
|
<details open>
|
||||||
<summary>3-sided regular polygon data</summary>
|
<summary>3-sided regular polygon data</summary>
|
||||||
<br>
|
<br>
|
||||||
|
@ -35,4 +95,50 @@ All this data should cover both latency and read speed testing for data that com
|
||||||
```
|
```
|
||||||
</div>
|
</div>
|
||||||
</details>
|
</details>
|
||||||
[^2]: My code can generate up to 25 GB/s. However, it does random writes to my drive, which is *much* slower. So on one hand, you could say my code is so amazingly fast that current day technologies simply can't keep up. Or you could say that I have no idea how to code for real world scenarios.
|
|
||||||
|
<!-- JavaScript for graphs goes hereeeeeee -->
|
||||||
|
<!-- EXAMPLE HERE -->
|
||||||
|
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
||||||
|
<script>
|
||||||
|
let ctx = document.getElementById('seq_read_latency_chart');
|
||||||
|
const labels = ['Null 25 GiB file', 'Random 25 GiB file', '100 million-sided polygon data', 'Linux LTS kernel']
|
||||||
|
let data = [
|
||||||
|
{
|
||||||
|
label: 'DwarFS',
|
||||||
|
data: [0.37114600000000003, 14.15143, 2.95083, 0.001523],
|
||||||
|
backgroundColor: 'rgb(255, 99, 132)',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: 'fuse-archive (tar)',
|
||||||
|
data: [0.393568, 0.397626, 0.07750499999999999, 0.0012230000000000001],
|
||||||
|
backgroundColor: 'rgb(75, 192, 192)',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: 'Btrfs',
|
||||||
|
data: [0.027922000000000002, 0.290906, 0.14088399999999998, 0.0013930000000000001],
|
||||||
|
backgroundColor: 'rgb(54, 162, 235)',
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
let config = {
|
||||||
|
type: 'bar',
|
||||||
|
data: {
|
||||||
|
datasets: data,
|
||||||
|
labels
|
||||||
|
},
|
||||||
|
options: {
|
||||||
|
plugins: {
|
||||||
|
title: {
|
||||||
|
display: true,
|
||||||
|
text: 'Sequential Read Latency - in ms'
|
||||||
|
},
|
||||||
|
},
|
||||||
|
responsive: true,
|
||||||
|
interaction: {
|
||||||
|
intersect: false,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
new Chart(ctx, config);
|
||||||
|
</script>
|
||||||
|
|
50
blog/minimum.html
Normal file
50
blog/minimum.html
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<body>
|
||||||
|
<div>
|
||||||
|
<canvas id="myChart" style="max-height: 600px; max-width: 900px">
|
||||||
|
</canvas>
|
||||||
|
</div>
|
||||||
|
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
||||||
|
<script>
|
||||||
|
let ctx = document.getElementById('myChart');
|
||||||
|
const labels = ['Null 25 GiB file', 'Random 25 GiB file', '100 million-sided polygon data', 'Linux LTS kernel']
|
||||||
|
let data = [
|
||||||
|
{
|
||||||
|
label: 'DwarFS',
|
||||||
|
data: [1.0, 1.0, 1.0, 1.0],
|
||||||
|
backgroundColor: 'rgb(255, 99, 132)',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: 'fuse-archive (tar)',
|
||||||
|
data: [2.0, 2.0, 2.0, 2.0],
|
||||||
|
backgroundColor: 'rgb(75, 192, 192)',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: 'Btrfs',
|
||||||
|
data: [3.0, 3.0, 3.0, 3.0],
|
||||||
|
backgroundColor: 'rgb(54, 162, 235)',
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
let config = {
|
||||||
|
type: 'bar',
|
||||||
|
data: {
|
||||||
|
datasets: data,
|
||||||
|
labels
|
||||||
|
},
|
||||||
|
options: {
|
||||||
|
plugins: {
|
||||||
|
title: {
|
||||||
|
display: true,
|
||||||
|
text: 'Sequential Read Latency - in ms'
|
||||||
|
},
|
||||||
|
},
|
||||||
|
responsive: true,
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
new Chart(ctx, config);
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
22
feed.xml
22
feed.xml
|
@ -5,7 +5,19 @@
|
||||||
<title>eng.askiiart.net</title>
|
<title>eng.askiiart.net</title>
|
||||||
<description>This is the feed for engl.askiiart.net, I guess</description>
|
<description>This is the feed for engl.askiiart.net, I guess</description>
|
||||||
<link>https://askiiart.net</link>
|
<link>https://askiiart.net</link>
|
||||||
<lastBuildDate>Fri, 15 Nov 2024 16:14:25 +0000</lastBuildDate>
|
<lastBuildDate>Sun, 17 Nov 2024 06:45:39 +0000</lastBuildDate>
|
||||||
|
<item>
|
||||||
|
<title></title>
|
||||||
|
<link>https://engl.askiiart.net/blog/minimum.html</link>
|
||||||
|
</item>
|
||||||
|
<item>
|
||||||
|
<title>Using `clap`</title>
|
||||||
|
<link>https://engl.askiiart.net/blog/using-clap.html</link>
|
||||||
|
</item>
|
||||||
|
<item>
|
||||||
|
<title>Checking out blendOS</title>
|
||||||
|
<link>https://engl.askiiart.net/blog/blendos.html</link>
|
||||||
|
</item>
|
||||||
<item>
|
<item>
|
||||||
<title>Building blendOS (and its packages)</title>
|
<title>Building blendOS (and its packages)</title>
|
||||||
<link>https://engl.askiiart.net/blog/building-blendos.html</link>
|
<link>https://engl.askiiart.net/blog/building-blendos.html</link>
|
||||||
|
@ -15,12 +27,8 @@
|
||||||
<link>https://engl.askiiart.net/blog/vanilla-os.html</link>
|
<link>https://engl.askiiart.net/blog/vanilla-os.html</link>
|
||||||
</item>
|
</item>
|
||||||
<item>
|
<item>
|
||||||
<title>Checking out blendOS</title>
|
<title>Benchmarking and comparing DwarFS</title>
|
||||||
<link>https://engl.askiiart.net/blog/blendos.html</link>
|
<link>https://engl.askiiart.net/blog/benchmarking-dwarfs.html</link>
|
||||||
</item>
|
|
||||||
<item>
|
|
||||||
<title>Using `clap`</title>
|
|
||||||
<link>https://engl.askiiart.net/blog/using-clap.html</link>
|
|
||||||
</item>
|
</item>
|
||||||
<item>
|
<item>
|
||||||
<title>Glossary</title>
|
<title>Glossary</title>
|
||||||
|
|
|
@ -118,3 +118,8 @@ blockquote {
|
||||||
img {
|
img {
|
||||||
max-width: 90vw;
|
max-width: 90vw;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.chart {
|
||||||
|
max-width: 50vw;
|
||||||
|
max-height: 50vh;
|
||||||
|
}
|
Loading…
Reference in a new issue