BibTeX

@misc{EIBLAZFCS17,
	author	 = {Sven Schmidt},
	title	 = {{Efficient interaction between Lustre and ZFS for compression}},
	advisors	 = {Anna Fuchs and Michael Kuhn},
	year	 = {2017},
	month	 = {08},
	school	 = {Universität Hamburg},
	howpublished	 = {{Online \url{https://wr.informatik.uni-hamburg.de/_media/research:theses:sven_schmidt_efficient_interaction_between_lustre_and_zfs_for_compression.pdf}}},
	type	 = {Bachelor's Thesis},
	abstract	 = {As predicted by Moore’s law, computational power was increasing rapidly within the last years, roughly doubling every 14.5 months throughout the history of the TOP500 [KKL16, p. 75], whilst storage capacity and speed showed far less significant growing factors. This results in an increasing gap, and, especially for High Performance Computing, Input and Output became a performance bottleneck. Furthermore, for the storage of data with up to multiple petabytes using distributed file systems like Lustre, another bottleneck evolves from the need to transfer the data over the network. For the compensation of this bottlenecks, investigating compression techniques is more urgent than ever, basically aiming to exploit computational power to reduce the amount of data transferred and stored. One approach for the Lustre file system was presented by Anna Fuchs in her thesis “Client-side Data Transformation in Lustre” [Fuc16]. For the efficient storage within the underlying ZFS file system, Niklas Behrmann extended ZFS to allow storing externally compressed data as if it was compressed by ZFS itself, which allows to make use of the already existing infrastructure [Beh17]. This thesis interconnects both works. First, modifications to the read and write path are made to handle compressed data, that is, receiving it from Lustre and handing it to ZFS and vice versa. Moreover, metadata regarding the compression (such as the used algorithm) is stored together with the data as a header and given back to the client for decompression on the read path. The ultimate goal is to tailor new functionality as tightly as possible to the existing structures for best performance. First benchmarks showed, that the amount of data transferred over the network could be reduced by a fair amount, while the new functionality did not introduce performance regressions. Rather, reading compressed data turns out to be indeed faster.},
}