2024-11-17 00:47:41 -06:00
#!/usr/bin/env python3
import csv
import re
2024-11-17 15:42:47 -06:00
# a bunch of horrible code to make the chart.js code
2024-11-17 00:47:41 -06:00
class HelperFunctions :
def get_fs ( dir ) :
if dir . endswith ( ' dwarfs ' ) :
return ' DwarFS '
elif dir . endswith ( ' fuse-archive-tar ' ) :
return ' fuse-archive (tar) '
return ' Btrfs '
def get_label ( filename ) :
if filename == ' 25G-null.bin ' :
return ' Null 25 GiB file '
elif filename == ' 25G-random.bin ' :
return ' Random 25 GiB file '
elif filename == ' 100M-polygon.txt ' :
return ' 100 million-sided polygon data '
elif filename . startswith ( ' kernel ' ) :
return ' Linux LTS kernel '
2024-11-17 15:42:47 -06:00
elif filename == ' small-files/random ' :
2024-11-17 23:40:06 -06:00
return ' 1024 random files (avg) '
2024-11-17 15:42:47 -06:00
elif filename == ' small-files/null ' :
2024-11-17 23:40:06 -06:00
return ' 1024 null files (avg) '
2024-11-17 00:47:41 -06:00
def convert_time ( time : str , unit : str ) - > int :
unit_exponents = [ ' ns ' , ' µs ' , ' ms ' , ' s ' ]
if time . endswith ( ' ms ' ) :
current_unit = ' ms '
elif time . endswith ( ' µs ' ) :
current_unit = ' µs '
elif time . endswith ( ' ns ' ) :
current_unit = ' ns '
else :
current_unit = ' s '
2024-11-17 15:42:47 -06:00
unit_multiplier = unit_exponents . index ( current_unit ) - unit_exponents . index (
unit
)
return HelperFunctions . time_num ( time ) * ( 1000 * * unit_multiplier )
def time_num ( time : str ) :
time = re . sub ( ' [^0-9 \\ .] ' , ' ' , time )
2024-11-17 00:47:41 -06:00
return float ( time )
2024-11-18 09:35:14 -06:00
def get_data ( single_files_index : int , bulk_test_name : str ) :
2024-11-17 23:40:06 -06:00
# format: { 'labels': ['btrfs'], 'btrfs': [9, 8, 4, 6]}
2024-11-18 09:35:14 -06:00
data = { ' labels ' : [ ] }
2024-11-17 23:40:06 -06:00
with open ( ' assets/benchmarking-dwarfs/data/benchmark-data.csv ' , ' rt ' ) as f :
for line in csv . reader ( f ) :
fs = HelperFunctions . get_fs ( line [ 0 ] )
label = HelperFunctions . get_label ( line [ 1 ] )
2024-11-18 09:35:14 -06:00
data [ ' labels ' ] . append ( label ) if label not in data [
2024-11-17 23:40:06 -06:00
' labels '
] else False
try :
2024-11-18 09:35:14 -06:00
data [ fs ] . append ( line [ single_files_index ] )
2024-11-17 23:40:06 -06:00
except KeyError :
2024-11-18 09:35:14 -06:00
data [ fs ] = [ ]
data [ fs ] . append ( line [ single_files_index ] )
2024-11-17 15:42:47 -06:00
2024-11-17 23:40:06 -06:00
# NOTE: this will break if the bulk data contains a larger unit than the single file data, but that's unlikely to happen so I'm not gonna deal with it
# and it's a bit broken regardless but whatever
largest_time_unit = ' ns '
2024-11-18 09:35:14 -06:00
for key in data . keys ( ) :
2024-11-17 23:40:06 -06:00
if key == ' labels ' :
continue
2024-11-18 09:35:14 -06:00
for item in data [ key ] :
2024-11-17 23:40:06 -06:00
if largest_time_unit == ' s ' :
break
if item . endswith ( ' ms ' ) :
largest_time_unit = ' ms '
elif item . endswith ( ' µs ' ) and largest_time_unit != ' ms ' :
largest_time_unit = ' µs '
elif (
item . endswith ( ' ns ' )
and largest_time_unit != ' ms '
and largest_time_unit != ' µs '
) :
largest_time_unit = ' ns '
elif re . sub ( ' [0-9 \\ .] ' , ' ' , item ) == ' s ' :
largest_time_unit = ' s '
break
2024-11-18 09:35:14 -06:00
for key in data . keys ( ) :
2024-11-17 23:40:06 -06:00
if key == ' labels ' :
continue
2024-11-18 09:35:14 -06:00
for i in range ( len ( data [ key ] ) ) :
data [ key ] [ i ] = HelperFunctions . convert_time (
data [ key ] [ i ] , largest_time_unit
2024-11-17 23:40:06 -06:00
)
with open ( ' assets/benchmarking-dwarfs/data/bulk.csv ' , ' rt ' ) as f :
for line in csv . reader ( f ) :
2024-11-18 09:35:14 -06:00
if line [ 2 ] != bulk_test_name :
2024-11-17 23:40:06 -06:00
continue
fs = HelperFunctions . get_fs ( line [ 0 ] )
label = HelperFunctions . get_label ( line [ 1 ] )
2024-11-18 09:35:14 -06:00
data [ ' labels ' ] . append ( label ) if label not in data [
2024-11-17 23:40:06 -06:00
' labels '
] else False
2024-11-17 15:42:47 -06:00
2024-11-17 23:40:06 -06:00
for item in line [ 3 : ] :
if largest_time_unit == ' s ' :
break
if item . endswith ( ' ms ' ) :
largest_time_unit = ' ms '
elif item . endswith ( ' µs ' ) and largest_time_unit != ' ms ' :
largest_time_unit = ' µs '
elif (
item . endswith ( ' ns ' )
and largest_time_unit != ' ms '
and largest_time_unit != ' µs '
) :
largest_time_unit = ' ns '
elif re . sub ( ' [0-9] \\ . ' , ' ' , item ) == ' s ' :
largest_time_unit = ' s '
break
for i in range ( len ( line [ 3 : ] ) ) :
line [ i + 3 ] = HelperFunctions . convert_time ( item , largest_time_unit )
2024-11-18 09:35:14 -06:00
data [ fs ] . append ( sum ( line [ 3 : ] ) / len ( line [ 3 : ] ) )
2024-11-17 23:40:06 -06:00
2024-11-18 09:35:14 -06:00
return ( data , largest_time_unit )
2024-11-17 23:40:06 -06:00
2024-11-18 09:35:14 -06:00
def run ( single_files_index : int , bulk_test_name : str , filename : str , title : str , chart_canvas_id : str ) :
with open ( f ' assets/benchmarking-dwarfs/js/ { filename } ' , ' wt ' ) as f :
2024-11-17 23:40:06 -06:00
# from https://github.com/chartjs/Chart.js/blob/master/docs/scripts/utils.js (CHART_COLORS)
# modified so similar color aren't adjacent
chart_colors = [
" ' rgb(255, 99, 132) ' " , # red
" ' rgb(75, 192, 192) ' " , # green
" ' rgb(54, 162, 235) ' " , # blue
" ' rgb(255, 159, 64) ' " , # orange
" ' rgb(153, 102, 255) ' " , # purple
" ' rgb(255, 205, 86) ' " , # yellow
" ' rgb(201, 203, 207) ' " , # grey
]
labels_code = ' labels = $labels$ '
dataset_code = '''
{
label : ' $label$ ' ,
data : $ data $ ,
backgroundColor : $ color $ ,
} ,
'''
config_code = '''
config = {
type : ' bar ' ,
data : {
datasets : data ,
labels
} ,
options : {
plugins : {
title : {
display : true ,
text : ' $title$ - in $timeunit$ '
} ,
} ,
responsive : true ,
interaction : {
intersect : false ,
} ,
}
} ;
'''
2024-11-18 09:35:14 -06:00
data , largest_time_unit = get_data ( single_files_index , bulk_test_name )
2024-11-17 23:40:06 -06:00
labels_code = labels_code . replace ( ' $labels$ ' , format ( data [ ' labels ' ] ) )
f . write ( labels_code )
data . pop ( ' labels ' )
f . write ( ' \n data = [ ' )
for fs in data . keys ( ) :
f . write (
dataset_code . replace ( ' $label$ ' , fs )
. replace ( ' $data$ ' , format ( data [ fs ] ) )
. replace ( ' $color$ ' , format ( chart_colors [ list ( data . keys ( ) ) . index ( fs ) ] ) )
)
f . write ( ' \n ] \n ' )
f . write (
config_code . replace ( ' $title$ ' , title ) . replace (
' $timeunit$ ' , largest_time_unit
)
)
f . write ( ' \n Chart.defaults.borderColor = " #eee " \n ' )
f . write ( ' Chart.defaults.color = " #eee " ; \n ' )
2024-11-18 09:35:14 -06:00
f . write ( f ' ctx = document.getElementById( " { chart_canvas_id } " ); \n ' )
2024-11-17 23:40:06 -06:00
f . write ( ' new Chart(ctx, config); \n ' )
2024-11-17 15:42:47 -06:00
2024-11-18 09:35:14 -06:00
def declare_vars ( ) :
with open ( ' assets/benchmarking-dwarfs/js/declare_vars.js ' , ' wt ' ) as f :
f . write ( ' let labels; \n ' )
f . write ( ' let config; \n ' )
f . write ( ' let data; \n ' )
f . write ( ' let ctx; \n ' )
2024-11-17 15:42:47 -06:00
if __name__ == ' __main__ ' :
2024-11-17 23:40:06 -06:00
# NOTE: this code is absolutely horrible and all these functions (except declare_vars) should be one function that just takes the title, chart canvas id, filename, test name in bulk, and index in singles
2024-11-18 09:35:14 -06:00
# and what function to get data from, if that's possible
2024-11-17 23:40:06 -06:00
# i will repent to the DRY gods someday
2024-11-18 09:35:14 -06:00
declare_vars ( )
run ( 2 , ' bulk_sequential_read ' , ' seq_read.js ' , ' Sequential Read Times ' , ' seq_read_chart ' )
run ( 3 , ' bulk_random_read ' , ' rand_read.js ' , ' Random Read Times ' , ' rand_read_chart ' )
run ( 4 , ' bulk_sequential_read_latency ' , ' seq_latency.js ' , ' Sequential Read Latency ' , ' seq_read_latency_chart ' )
run ( 5 , ' bulk_random_read_latency ' , ' rand_latency.js ' , ' Random Read Latency ' , ' rand_read_latency_chart ' )