From 4e20db145e2c1d5c7fdb336e95cae2e5c512e3de Mon Sep 17 00:00:00 2001 From: askiiart Date: Sun, 17 Nov 2024 23:40:06 -0600 Subject: [PATCH] FINALLY add the rest of the graphs with this hellish code i will be repenting to the DRY gods for the rest of eternity --- assets/benchmarking-dwarfs/js/declare_vars.js | 4 + assets/benchmarking-dwarfs/js/rand_latency.js | 46 ++ assets/benchmarking-dwarfs/js/rand_read.js | 46 ++ assets/benchmarking-dwarfs/js/seq_latency.js | 14 +- assets/benchmarking-dwarfs/js/seq_read.js | 46 ++ assets/benchmarking-dwarfs/process-data.py | 495 +++++++++++++++++- blog/benchmarking-dwarfs.html | 19 +- blog/benchmarking-dwarfs.md | 19 + 8 files changed, 663 insertions(+), 26 deletions(-) create mode 100644 assets/benchmarking-dwarfs/js/declare_vars.js create mode 100644 assets/benchmarking-dwarfs/js/rand_latency.js create mode 100644 assets/benchmarking-dwarfs/js/rand_read.js create mode 100644 assets/benchmarking-dwarfs/js/seq_read.js diff --git a/assets/benchmarking-dwarfs/js/declare_vars.js b/assets/benchmarking-dwarfs/js/declare_vars.js new file mode 100644 index 0000000..d898d5b --- /dev/null +++ b/assets/benchmarking-dwarfs/js/declare_vars.js @@ -0,0 +1,4 @@ +let labels; +let config; +let data; +let ctx; \ No newline at end of file diff --git a/assets/benchmarking-dwarfs/js/rand_latency.js b/assets/benchmarking-dwarfs/js/rand_latency.js new file mode 100644 index 0000000..e91c68d --- /dev/null +++ b/assets/benchmarking-dwarfs/js/rand_latency.js @@ -0,0 +1,46 @@ +labels = ['Null 25 GiB file', 'Random 25 GiB file', '100 million-sided polygon data', 'Linux LTS kernel', '1024 null files (avg)', '1024 random files (avg)'] +data = [ + { + label: 'DwarFS', + data: [351.30788, 3513.96, 480.97789, 0.882576, 0.000811, 0.000661], + backgroundColor: 'rgb(255, 99, 132)', + }, + + { + label: 'fuse-archive (tar)', + data: [0.0, 0.0, 0.0, 0.0, 0.000652, 0.000772], + backgroundColor: 'rgb(75, 192, 192)', + }, + + { + label: 'Btrfs', + data: [5.51523, 91.13626, 94.05722, 0.949771, 0.000741, 0.0007509999999999999], + backgroundColor: 'rgb(54, 162, 235)', + }, + +] + + config = { + type: 'bar', + data: { + datasets: data, + labels + }, + options: { + plugins: { + title: { + display: true, + text: 'Random Read Latency - in ms' + }, + }, + responsive: true, + interaction: { + intersect: false, + }, + } + }; + +Chart.defaults.borderColor = "#eee" +Chart.defaults.color = "#eee"; +ctx = document.getElementById("rand_read_latency_chart"); +new Chart(ctx, config); diff --git a/assets/benchmarking-dwarfs/js/rand_read.js b/assets/benchmarking-dwarfs/js/rand_read.js new file mode 100644 index 0000000..e12b9d9 --- /dev/null +++ b/assets/benchmarking-dwarfs/js/rand_read.js @@ -0,0 +1,46 @@ +labels = ['Null 25 GiB file', 'Random 25 GiB file', '100 million-sided polygon data', 'Linux LTS kernel'] +data = [ + { + label: 'DwarFS', + data: [0.37114600000000003, 14.15143, 2.95083, 0.001523], + backgroundColor: 'rgb(255, 99, 132)', + }, + + { + label: 'fuse-archive (tar)', + data: [0.393568, 0.397626, 0.07750499999999999, 0.0012230000000000001], + backgroundColor: 'rgb(75, 192, 192)', + }, + + { + label: 'Btrfs', + data: [0.027922000000000002, 0.290906, 0.14088399999999998, 0.0013930000000000001], + backgroundColor: 'rgb(54, 162, 235)', + }, + +] + + config = { + type: 'bar', + data: { + datasets: data, + labels + }, + options: { + plugins: { + title: { + display: true, + text: 'Random Read Times - in ms' + }, + }, + responsive: true, + interaction: { + intersect: false, + }, + } + }; + +Chart.defaults.borderColor = "#eee" +Chart.defaults.color = "#eee"; +ctx = document.getElementById("rand_read_chart"); +new Chart(ctx, config); diff --git a/assets/benchmarking-dwarfs/js/seq_latency.js b/assets/benchmarking-dwarfs/js/seq_latency.js index bc965a6..f8f51b5 100644 --- a/assets/benchmarking-dwarfs/js/seq_latency.js +++ b/assets/benchmarking-dwarfs/js/seq_latency.js @@ -1,26 +1,26 @@ -const labels = ['Null 25 GiB file', 'Random 25 GiB file', '100 million-sided polygon data', 'Linux LTS kernel', '1024 null files', '1024 random files'] -let data = [ +labels = ['Null 25 GiB file', 'Random 25 GiB file', '100 million-sided polygon data', 'Linux LTS kernel', '1024 null files (avg)', '1024 random files (avg)'] +data = [ { label: 'DwarFS', - data: [0.37114600000000003, 14.15143, 2.95083, 0.001523, 0.014287000000000001, 0.013595000000000001], + data: [96.32895, 109.78266, 96.3926, 94.55468, 0.014287000000000001, 0.013595000000000001], backgroundColor: 'rgb(255, 99, 132)', }, { label: 'fuse-archive (tar)', - data: [0.393568, 0.397626, 0.07750499999999999, 0.0012230000000000001, 0.013405, 0.013465], + data: [98.66828, 94.52984, 96.61561, 93.25915, 0.013405, 0.013465], backgroundColor: 'rgb(75, 192, 192)', }, { label: 'Btrfs', - data: [0.027922000000000002, 0.290906, 0.14088399999999998, 0.0013930000000000001, 0.0032860000000000003, 0.003326], + data: [96.79632, 97.642, 98.92292, 91.41823, 0.0032860000000000003, 0.003326], backgroundColor: 'rgb(54, 162, 235)', }, ] - let config = { + config = { type: 'bar', data: { datasets: data, @@ -42,5 +42,5 @@ let data = [ Chart.defaults.borderColor = "#eee" Chart.defaults.color = "#eee"; -let ctx = document.getElementById("seq_read_latency_chart"); +ctx = document.getElementById("seq_read_latency_chart"); new Chart(ctx, config); diff --git a/assets/benchmarking-dwarfs/js/seq_read.js b/assets/benchmarking-dwarfs/js/seq_read.js new file mode 100644 index 0000000..1338f1e --- /dev/null +++ b/assets/benchmarking-dwarfs/js/seq_read.js @@ -0,0 +1,46 @@ +labels = ['Null 25 GiB file', 'Random 25 GiB file', '100 million-sided polygon data', 'Linux LTS kernel', '1024 null files (avg)', '1024 random files (avg)'] +data = [ + { + label: 'DwarFS', + data: [12.8049, 40.71916, 19.11096, 0.16075466, 0.000945113, 3.6729e-05], + backgroundColor: 'rgb(255, 99, 132)', + }, + + { + label: 'fuse-archive (tar)', + data: [24.88932, 24.84052, 26.63768, 0.121502, 4.131799999999999e-05, 1.6571e-05], + backgroundColor: 'rgb(75, 192, 192)', + }, + + { + label: 'Btrfs', + data: [25.5482, 16.91976, 17.98264, 0.08859571, 6.873e-06, 6.432e-06], + backgroundColor: 'rgb(54, 162, 235)', + }, + +] + + config = { + type: 'bar', + data: { + datasets: data, + labels + }, + options: { + plugins: { + title: { + display: true, + text: 'Sequential Read Times - in s' + }, + }, + responsive: true, + interaction: { + intersect: false, + }, + } + }; + +Chart.defaults.borderColor = "#eee" +Chart.defaults.color = "#eee"; +ctx = document.getElementById("seq_read_chart"); +new Chart(ctx, config); diff --git a/assets/benchmarking-dwarfs/process-data.py b/assets/benchmarking-dwarfs/process-data.py index ec4029e..d766af7 100644 --- a/assets/benchmarking-dwarfs/process-data.py +++ b/assets/benchmarking-dwarfs/process-data.py @@ -25,9 +25,9 @@ class HelperFunctions: elif filename.startswith('kernel'): return 'Linux LTS kernel' elif filename == 'small-files/random': - return '1024 random files' + return '1024 random files (avg)' elif filename == 'small-files/null': - return '1024 null files' + return '1024 null files (avg)' def convert_time(time: str, unit: str) -> int: unit_exponents = ['ns', 'µs', 'ms', 's'] @@ -62,10 +62,10 @@ def get_seq_latency_data() -> tuple: 'labels' ] else False try: - datasets[fs].append(line[3]) + datasets[fs].append(line[4]) except KeyError: datasets[fs] = [] - datasets[fs].append(line[3]) + datasets[fs].append(line[4]) # NOTE: this will break if the bulk data contains a larger unit than the single file data, but that's unlikely to happen so I'm not gonna deal with it largest_time_unit = 'ns' @@ -73,6 +73,8 @@ def get_seq_latency_data() -> tuple: if key == 'labels': continue for item in datasets[key]: + if largest_time_unit == 's': + break if item.endswith('ms'): largest_time_unit = 'ms' elif item.endswith('µs') and largest_time_unit != 'ms': @@ -83,7 +85,7 @@ def get_seq_latency_data() -> tuple: and largest_time_unit != 'µs' ): largest_time_unit = 'ns' - elif re.sub('[0-9]', '', item) == 's': + elif re.sub('[0-9]\\.', '', item) == 's': largest_time_unit = 's' break @@ -106,6 +108,8 @@ def get_seq_latency_data() -> tuple: ] else False for item in line[3:]: + if largest_time_unit == 's': + break if item.endswith('ms'): largest_time_unit = 'ms' elif item.endswith('µs') and largest_time_unit != 'ms': @@ -116,7 +120,7 @@ def get_seq_latency_data() -> tuple: and largest_time_unit != 'µs' ): largest_time_unit = 'ns' - elif re.sub('[0-9]', '', item) == 's': + elif re.sub('[0-9]\\.', '', item) == 's': largest_time_unit = 's' break @@ -127,6 +131,7 @@ def get_seq_latency_data() -> tuple: return (datasets, largest_time_unit) + def seq_latency(): with open('assets/benchmarking-dwarfs/js/seq_latency.js', 'wt') as f: # from https://github.com/chartjs/Chart.js/blob/master/docs/scripts/utils.js (CHART_COLORS) @@ -141,8 +146,7 @@ def seq_latency(): "'rgb(201, 203, 207)'", # grey ] - #print('Sequential latency:') - labels_code = 'const labels = $labels$' + labels_code = 'labels = $labels$' dataset_code = ''' { label: '$label$', @@ -152,7 +156,7 @@ def seq_latency(): ''' config_code = ''' - let config = { + config = { type: 'bar', data: { datasets: data, @@ -177,7 +181,7 @@ def seq_latency(): labels_code = labels_code.replace('$labels$', format(data['labels'])) f.write(labels_code) data.pop('labels') - f.write('\nlet data = [') + f.write('\ndata = [') for fs in data.keys(): f.write( dataset_code.replace('$label$', fs) @@ -188,22 +192,479 @@ def seq_latency(): title = 'Sequential Read Latency' f.write( - config_code.replace('$title$', title).replace('$timeunit$', largest_time_unit) + config_code.replace('$title$', title).replace( + '$timeunit$', largest_time_unit + ) ) f.write('\nChart.defaults.borderColor = "#eee"\n') f.write('Chart.defaults.color = "#eee";\n') - f.write('let ctx = document.getElementById("seq_read_latency_chart");\n') + f.write('ctx = document.getElementById("seq_read_latency_chart");\n') f.write('new Chart(ctx, config);\n') -def singles(): - pass +def get_rand_latency_data() -> tuple: + # format: { 'labels': ['btrfs'], 'btrfs': [9, 8, 4, 6]} + datasets = {'labels': []} + with open('assets/benchmarking-dwarfs/data/benchmark-data.csv', 'rt') as f: + for line in csv.reader(f): + fs = HelperFunctions.get_fs(line[0]) + label = HelperFunctions.get_label(line[1]) + datasets['labels'].append(label) if label not in datasets[ + 'labels' + ] else False + try: + datasets[fs].append(line[5]) + except KeyError: + datasets[fs] = [] + datasets[fs].append(line[5]) + + # NOTE: this will break if the bulk data contains a larger unit than the single file data, but that's unlikely to happen so I'm not gonna deal with it + largest_time_unit = 'ns' + for key in datasets.keys(): + if key == 'labels': + continue + for item in datasets[key]: + if largest_time_unit == 's': + break + if item.endswith('ms'): + largest_time_unit = 'ms' + elif item.endswith('µs') and largest_time_unit != 'ms': + largest_time_unit = 'µs' + elif ( + item.endswith('ns') + and largest_time_unit != 'ms' + and largest_time_unit != 'µs' + ): + largest_time_unit = 'ns' + elif re.sub('[0-9]\\.', '', item) == 's': + largest_time_unit = 's' + break + + for key in datasets.keys(): + if key == 'labels': + continue + for i in range(len(datasets[key])): + datasets[key][i] = HelperFunctions.convert_time( + datasets[key][i], largest_time_unit + ) + + with open('assets/benchmarking-dwarfs/data/bulk.csv', 'rt') as f: + for line in csv.reader(f): + if line[2] != 'bulk_random_read_latency': + continue + fs = HelperFunctions.get_fs(line[0]) + label = HelperFunctions.get_label(line[1]) + datasets['labels'].append(label) if label not in datasets[ + 'labels' + ] else False + + for item in line[3:]: + if largest_time_unit == 's': + break + if item.endswith('ms'): + largest_time_unit = 'ms' + elif item.endswith('µs') and largest_time_unit != 'ms': + largest_time_unit = 'µs' + elif ( + item.endswith('ns') + and largest_time_unit != 'ms' + and largest_time_unit != 'µs' + ): + largest_time_unit = 'ns' + elif re.sub('[0-9]\\.', '', item) == 's': + largest_time_unit = 's' + break + + for i in range(len(line[3:])): + line[i + 3] = HelperFunctions.convert_time(item, largest_time_unit) + + datasets[fs].append(sum(line[3:]) / len(line[3:])) + + return (datasets, largest_time_unit) -def bulk(): - pass +def rand_latency(): + with open('assets/benchmarking-dwarfs/js/rand_latency.js', 'wt') as f: + # from https://github.com/chartjs/Chart.js/blob/master/docs/scripts/utils.js (CHART_COLORS) + # modified so similar color aren't adjacent + chart_colors = [ + "'rgb(255, 99, 132)'", # red + "'rgb(75, 192, 192)'", # green + "'rgb(54, 162, 235)'", # blue + "'rgb(255, 159, 64)'", # orange + "'rgb(153, 102, 255)'", # purple + "'rgb(255, 205, 86)'", # yellow + "'rgb(201, 203, 207)'", # grey + ] + + labels_code = 'labels = $labels$' + dataset_code = ''' + { + label: '$label$', + data: $data$, + backgroundColor: $color$, + }, + ''' + + config_code = ''' + config = { + type: 'bar', + data: { + datasets: data, + labels + }, + options: { + plugins: { + title: { + display: true, + text: '$title$ - in $timeunit$' + }, + }, + responsive: true, + interaction: { + intersect: false, + }, + } + }; + ''' + + data, largest_time_unit = get_rand_latency_data() + labels_code = labels_code.replace('$labels$', format(data['labels'])) + f.write(labels_code) + data.pop('labels') + f.write('\ndata = [') + for fs in data.keys(): + f.write( + dataset_code.replace('$label$', fs) + .replace('$data$', format(data[fs])) + .replace('$color$', format(chart_colors[list(data.keys()).index(fs)])) + ) + f.write('\n]\n') + + title = 'Random Read Latency' + f.write( + config_code.replace('$title$', title).replace( + '$timeunit$', largest_time_unit + ) + ) + + f.write('\nChart.defaults.borderColor = "#eee"\n') + f.write('Chart.defaults.color = "#eee";\n') + f.write('ctx = document.getElementById("rand_read_latency_chart");\n') + f.write('new Chart(ctx, config);\n') + + +def get_seq_read_data() -> tuple: + # format: { 'labels': ['btrfs'], 'btrfs': [9, 8, 4, 6]} + datasets = {'labels': []} + with open('assets/benchmarking-dwarfs/data/benchmark-data.csv', 'rt') as f: + for line in csv.reader(f): + fs = HelperFunctions.get_fs(line[0]) + label = HelperFunctions.get_label(line[1]) + datasets['labels'].append(label) if label not in datasets[ + 'labels' + ] else False + try: + datasets[fs].append(line[2]) + except KeyError: + datasets[fs] = [] + datasets[fs].append(line[2]) + + # NOTE: this will break if the bulk data contains a larger unit than the single file data, but that's unlikely to happen so I'm not gonna deal with it + # and it's a bit broken regardless but whatever + largest_time_unit = 'ns' + for key in datasets.keys(): + if key == 'labels': + continue + for item in datasets[key]: + if largest_time_unit == 's': + break + if item.endswith('ms'): + largest_time_unit = 'ms' + elif item.endswith('µs') and largest_time_unit != 'ms': + largest_time_unit = 'µs' + elif ( + item.endswith('ns') + and largest_time_unit != 'ms' + and largest_time_unit != 'µs' + ): + largest_time_unit = 'ns' + elif re.sub('[0-9\\.]', '', item) == 's': + largest_time_unit = 's' + break + + for key in datasets.keys(): + if key == 'labels': + continue + for i in range(len(datasets[key])): + datasets[key][i] = HelperFunctions.convert_time( + datasets[key][i], largest_time_unit + ) + + with open('assets/benchmarking-dwarfs/data/bulk.csv', 'rt') as f: + for line in csv.reader(f): + if line[2] != 'bulk_sequential_read': + continue + fs = HelperFunctions.get_fs(line[0]) + label = HelperFunctions.get_label(line[1]) + datasets['labels'].append(label) if label not in datasets[ + 'labels' + ] else False + + for item in line[3:]: + if largest_time_unit == 's': + break + if item.endswith('ms'): + largest_time_unit = 'ms' + elif item.endswith('µs') and largest_time_unit != 'ms': + largest_time_unit = 'µs' + elif ( + item.endswith('ns') + and largest_time_unit != 'ms' + and largest_time_unit != 'µs' + ): + largest_time_unit = 'ns' + elif re.sub('[0-9]\\.', '', item) == 's': + largest_time_unit = 's' + break + + for i in range(len(line[3:])): + line[i + 3] = HelperFunctions.convert_time(item, largest_time_unit) + + datasets[fs].append(sum(line[3:]) / len(line[3:])) + + return (datasets, largest_time_unit) + + +def seq_read(): + with open('assets/benchmarking-dwarfs/js/seq_read.js', 'wt') as f: + # from https://github.com/chartjs/Chart.js/blob/master/docs/scripts/utils.js (CHART_COLORS) + # modified so similar color aren't adjacent + chart_colors = [ + "'rgb(255, 99, 132)'", # red + "'rgb(75, 192, 192)'", # green + "'rgb(54, 162, 235)'", # blue + "'rgb(255, 159, 64)'", # orange + "'rgb(153, 102, 255)'", # purple + "'rgb(255, 205, 86)'", # yellow + "'rgb(201, 203, 207)'", # grey + ] + + labels_code = 'labels = $labels$' + dataset_code = ''' + { + label: '$label$', + data: $data$, + backgroundColor: $color$, + }, + ''' + + config_code = ''' + config = { + type: 'bar', + data: { + datasets: data, + labels + }, + options: { + plugins: { + title: { + display: true, + text: '$title$ - in $timeunit$' + }, + }, + responsive: true, + interaction: { + intersect: false, + }, + } + }; + ''' + + data, largest_time_unit = get_seq_read_data() + labels_code = labels_code.replace('$labels$', format(data['labels'])) + f.write(labels_code) + data.pop('labels') + f.write('\ndata = [') + for fs in data.keys(): + f.write( + dataset_code.replace('$label$', fs) + .replace('$data$', format(data[fs])) + .replace('$color$', format(chart_colors[list(data.keys()).index(fs)])) + ) + f.write('\n]\n') + + title = 'Sequential Read Times' + f.write( + config_code.replace('$title$', title).replace( + '$timeunit$', largest_time_unit + ) + ) + + f.write('\nChart.defaults.borderColor = "#eee"\n') + f.write('Chart.defaults.color = "#eee";\n') + f.write('ctx = document.getElementById("seq_read_chart");\n') + f.write('new Chart(ctx, config);\n') + + +def get_rand_read_data() -> tuple: + # format: { 'labels': ['btrfs'], 'btrfs': [9, 8, 4, 6]} + datasets = {'labels': []} + with open('assets/benchmarking-dwarfs/data/benchmark-data.csv', 'rt') as f: + for line in csv.reader(f): + fs = HelperFunctions.get_fs(line[0]) + label = HelperFunctions.get_label(line[1]) + datasets['labels'].append(label) if label not in datasets[ + 'labels' + ] else False + try: + datasets[fs].append(line[3]) + except KeyError: + datasets[fs] = [] + datasets[fs].append(line[3]) + + # NOTE: this will break if the bulk data contains a larger unit than the single file data, but that's unlikely to happen so I'm not gonna deal with it + # and it's a bit broken regardless but whatever + largest_time_unit = 'ns' + for key in datasets.keys(): + if key == 'labels': + continue + for item in datasets[key]: + if largest_time_unit == 's': + break + if item.endswith('ms'): + largest_time_unit = 'ms' + elif item.endswith('µs') and largest_time_unit != 'ms': + largest_time_unit = 'µs' + elif ( + item.endswith('ns') + and largest_time_unit != 'ms' + and largest_time_unit != 'µs' + ): + largest_time_unit = 'ns' + elif re.sub('[0-9\\.]', '', item) == 's': + largest_time_unit = 's' + break + + for key in datasets.keys(): + if key == 'labels': + continue + for i in range(len(datasets[key])): + datasets[key][i] = HelperFunctions.convert_time( + datasets[key][i], largest_time_unit + ) + + with open('assets/benchmarking-dwarfs/data/bulk.csv', 'rt') as f: + for line in csv.reader(f): + if line[2] != 'bulk_random_read': + continue + fs = HelperFunctions.get_fs(line[0]) + label = HelperFunctions.get_label(line[1]) + datasets['labels'].append(label) if label not in datasets[ + 'labels' + ] else False + + for item in line[3:]: + if largest_time_unit == 's': + break + if item.endswith('ms'): + largest_time_unit = 'ms' + elif item.endswith('µs') and largest_time_unit != 'ms': + largest_time_unit = 'µs' + elif ( + item.endswith('ns') + and largest_time_unit != 'ms' + and largest_time_unit != 'µs' + ): + largest_time_unit = 'ns' + elif re.sub('[0-9]\\.', '', item) == 's': + largest_time_unit = 's' + break + + for i in range(len(line[3:])): + line[i + 3] = HelperFunctions.convert_time(item, largest_time_unit) + + datasets[fs].append(sum(line[3:]) / len(line[3:])) + + return (datasets, largest_time_unit) + + +def rand_read(): + with open('assets/benchmarking-dwarfs/js/rand_read.js', 'wt') as f: + # from https://github.com/chartjs/Chart.js/blob/master/docs/scripts/utils.js (CHART_COLORS) + # modified so similar color aren't adjacent + chart_colors = [ + "'rgb(255, 99, 132)'", # red + "'rgb(75, 192, 192)'", # green + "'rgb(54, 162, 235)'", # blue + "'rgb(255, 159, 64)'", # orange + "'rgb(153, 102, 255)'", # purple + "'rgb(255, 205, 86)'", # yellow + "'rgb(201, 203, 207)'", # grey + ] + + labels_code = 'labels = $labels$' + dataset_code = ''' + { + label: '$label$', + data: $data$, + backgroundColor: $color$, + }, + ''' + + config_code = ''' + config = { + type: 'bar', + data: { + datasets: data, + labels + }, + options: { + plugins: { + title: { + display: true, + text: '$title$ - in $timeunit$' + }, + }, + responsive: true, + interaction: { + intersect: false, + }, + } + }; + ''' + + data, largest_time_unit = get_rand_read_data() + labels_code = labels_code.replace('$labels$', format(data['labels'])) + f.write(labels_code) + data.pop('labels') + f.write('\ndata = [') + for fs in data.keys(): + f.write( + dataset_code.replace('$label$', fs) + .replace('$data$', format(data[fs])) + .replace('$color$', format(chart_colors[list(data.keys()).index(fs)])) + ) + f.write('\n]\n') + + title = 'Random Read Times' + f.write( + config_code.replace('$title$', title).replace( + '$timeunit$', largest_time_unit + ) + ) + + f.write('\nChart.defaults.borderColor = "#eee"\n') + f.write('Chart.defaults.color = "#eee";\n') + f.write('ctx = document.getElementById("rand_read_chart");\n') + f.write('new Chart(ctx, config);\n') if __name__ == '__main__': - seq_latency() \ No newline at end of file + # NOTE: this code is absolutely horrible and all these functions (except declare_vars) should be one function that just takes the title, chart canvas id, filename, test name in bulk, and index in singles + # i will repent to the DRY gods someday + seq_read() + rand_read() + seq_latency() + rand_latency() diff --git a/blog/benchmarking-dwarfs.html b/blog/benchmarking-dwarfs.html index ab8fc9c..e5089c4 100644 --- a/blog/benchmarking-dwarfs.html +++ b/blog/benchmarking-dwarfs.html @@ -93,13 +93,25 @@ script to make it a bit easier, I put the resulting graphs in here ↓

Sequential read

+
+ + +

Random read

+
+ + +

Sequential read latency

Random read latency

+
+ + +

The FUSE-based filesystems run into a bit of trouble here - with incompressible data, DwarFS has a hard time keeping up for some reason, despite keeping up just fine with larger random @@ -127,7 +139,11 @@

Footnotes

+ + + +

@@ -144,8 +160,7 @@ generates regular polygons and writes their data to a file. I chose this because it was an artificial and reproducible yet fairly compressible dataset (without being extremely - compressible like null data).
- + compressible like null data).
3-sided regular polygon data diff --git a/blog/benchmarking-dwarfs.md b/blog/benchmarking-dwarfs.md index 1c6be48..fbef7df 100644 --- a/blog/benchmarking-dwarfs.md +++ b/blog/benchmarking-dwarfs.md @@ -59,8 +59,18 @@ After processing [the data](/assets/benchmarking-dwarfs/data/) with [this script ### Sequential read +
+ +
+ + ### Random read +
+ +
+ + ### Sequential read latency
@@ -69,6 +79,11 @@ After processing [the data](/assets/benchmarking-dwarfs/data/) with [this script ### Random read latency +
+ +
+ + The FUSE-based filesystems run into a bit of trouble here - with incompressible data, DwarFS has a hard time keeping up for some reason, despite keeping up just fine with larger random reads on the same data, and so it takes 3 to 4 seconds to run random read latency testing on the 25 GiB random file. Meanwhile, when testing random read latency in `fuse-archive` pretty much just dies, becoming ridiculously slow (even compared to DwarFS), so I didn't test its random read latency at all and just had its results put as 0 milliseconds. ### Summary and notes @@ -98,4 +113,8 @@ The FUSE-based filesystems run into a bit of trouble here - with incompressible + + + +