The performance of parallel distributed file systems suffers from many clients executing a large number of operations in parallel, because the I/O subsystem can be easily overwhelmed by the sheer amount of incoming I/O operations. This, in turn, can slow down the whole distributed system. Many optimizations exist that try to alleviate this problem. Client-side optimizations perform preprocessing to minimize the amount of work the file servers have to do. Server-side optimizations use server-internal knowledge to improve performance. This paper provides an overview of existing client-side optimizations and presents new modifications of the Two-Phase protocol. Interleaved Two-Phase is a modification of ROMIO's Two-Phase protocol, which iterates over the file differently to reduce the number of seek operations on disk. Pipelined Two-Phase uses a pipelined scheme which overlaps I/O and communication phases to utilize the network and I/O subsystems concurrently.
@inproceedings{OFTCIKKTML12, author = {Michael Kuhn and Julian Kunkel and Yuichi Tsujita and Hidetaka Muguruma and Thomas Ludwig}, title = {{Optimizations for Two-Phase Collective I/O}}, year = {2012}, booktitle = {{Applications, Tools and Techniques on the Road to Exascale Computing}}, editor = {Koen De Bosschere and Erik H. D'Hollander and Gerhard R. Joubert and David Padua and Frans Peters}, publisher = {IOS Press}, address = {Amsterdam, Berlin, Tokyo, Washington DC}, series = {Advances in Parallel Computing}, number = {22}, pages = {455--462}, conference = {ParCo 2011}, organization = {University of Ghent, ELIS Department}, location = {Ghent, Belgium}, isbn = {978-1-61499-040-6}, issn = {0927-5452}, doi = {https://doi.org/10.3233/978-1-61499-041-3-455}, abstract = {The performance of parallel distributed file systems suffers from many clients executing a large number of operations in parallel, because the I/O subsystem can be easily overwhelmed by the sheer amount of incoming I/O operations. This, in turn, can slow down the whole distributed system. Many optimizations exist that try to alleviate this problem. Client-side optimizations perform preprocessing to minimize the amount of work the file servers have to do. Server-side optimizations use server-internal knowledge to improve performance. This paper provides an overview of existing client-side optimizations and presents new modifications of the Two-Phase protocol. Interleaved Two-Phase is a modification of ROMIO's Two-Phase protocol, which iterates over the file differently to reduce the number of seek operations on disk. Pipelined Two-Phase uses a pipelined scheme which overlaps I/O and communication phases to utilize the network and I/O subsystems concurrently.}, }