The Concept
MPI IO can be used to write sequentially to multiple files.
The Code
The program takes the output file and the number of blocks to write as input arguments. Each process attempts to write random content to its own file. If any of the processes fails, the processes that did create files delete them.
$ cd mpi_io/03_writing_sequential/ $ cat src/mkranfiles.c /****************************************************************************** * * * MPI IO Example - Writing Sequential Files * * * * Each process attempts to write a specified number of blocks to its * * own output file. * * * ****************************************************************************** * * * The original code was written by Gustav at University of Indiana in 2003. * * * * The current version has been tested/updated by the HPC department at * * the Norwegian University of Science and Technology in 2011. * * * ******************************************************************************/ #include/* all IO stuff lives here */ #include /* exit lives here */ #include /* getopt lives here */ #include /* UNIX error handling lives here */ #include /* strcpy lives here */ #include /* MPI and MPI-IO live here */ #define MASTER_RANK 0 #define TRUE 1 #define FALSE 0 #define BOOLEAN int #define BLOCK_SIZE 1048576 #define MBYTE 1048576 #define SYNOPSIS printf ("synopsis: %s -f -l \n", argv[0]) int main(argc, argv) int argc; char *argv[]; { /* my variables */ int my_rank, pool_size, number_of_blocks = 0, i; int number_of_integers, number_of_bytes; long long total_number_of_integers, total_number_of_bytes; BOOLEAN i_am_the_master = FALSE, input_error = FALSE, my_file_open_error = FALSE, file_open_error = FALSE, my_write_error = FALSE, write_error = FALSE; char *basename = NULL, file_name[BUFSIZ], message[BUFSIZ]; int basename_length, *junk; MPI_File fh; double start, finish, io_time, longest_io_time; char error_string[BUFSIZ]; int length_of_error_string, error_class; MPI_Status status; /* getopt variables */ extern char *optarg; int c; /* error handling variables */ extern int errno; /* ACTION */ MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &pool_size); if (my_rank == MASTER_RANK) i_am_the_master = TRUE; if (i_am_the_master) { /* read the command line */ while ((c = getopt(argc, argv, "f:l:h")) != EOF) switch(c) { case 'f': basename = optarg; break; case 'l': if ((sscanf (optarg, "%d", &number_of_blocks) != 1) || (number_of_blocks < 1)) input_error = TRUE; break; case 'h': input_error = TRUE; break; case '?': input_error = TRUE; break; } /* Check if the command line has initialized both the basename and * the number_of_blocks. */ if ((basename == NULL) || (number_of_blocks == 0)) input_error = TRUE; if (input_error) SYNOPSIS; else { basename_length = strlen(basename) + 1; #ifdef DEBUG printf("basename = %s\n", basename); printf("basename_length = %d\n", basename_length); printf("number_of_blocks = %d\n", number_of_blocks); #endif } } /* end of if(i_am_the_master) { } */ /* Transmit the effect of reading the command line to other processes. */ MPI_Bcast(&input_error, 1, MPI_INT, MASTER_RANK, MPI_COMM_WORLD); if (! input_error) { /* If we managed to get here, data read from the command line is probably OK. */ MPI_Bcast(&number_of_blocks, 1, MPI_INT, MASTER_RANK, MPI_COMM_WORLD); MPI_Bcast(&basename_length, 1, MPI_INT, MASTER_RANK, MPI_COMM_WORLD); if (! i_am_the_master) basename = (char*) malloc(basename_length); MPI_Bcast(basename, basename_length, MPI_CHAR, MASTER_RANK, MPI_COMM_WORLD); #ifdef DEBUG printf("%3d: basename = %s, number_of_blocks = %d\n", my_rank, basename, number_of_blocks); #endif /* Allocate space needed to generate the integers */ number_of_integers = number_of_blocks * BLOCK_SIZE; number_of_bytes = sizeof(int) * number_of_integers; total_number_of_integers = (long long) number_of_integers * (long long) pool_size; total_number_of_bytes = (long long) number_of_bytes * (long long) pool_size; junk = (int*) malloc(number_of_bytes); /* Now every process creates its own file name and attempts to open the file. */ sprintf(file_name, "%s.%d", basename, my_rank); #ifdef DEBUG printf("%3d: opening file %s\n", my_rank, file_name); #endif my_file_open_error = MPI_File_open(MPI_COMM_SELF, file_name, MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh); if (my_file_open_error != MPI_SUCCESS) { MPI_Error_class(my_file_open_error, &error_class); MPI_Error_string(error_class, error_string, &length_of_error_string); printf("%3d: %s\n", my_rank, error_string); MPI_Error_string(my_file_open_error, error_string, &length_of_error_string); printf("%3d: %s\n", my_rank, error_string); my_file_open_error = TRUE; } /* Now we must ALL check that NOBODY had problems with opening the file. */ MPI_Allreduce (&my_file_open_error, &file_open_error, 1, MPI_INT, MPI_LOR, MPI_COMM_WORLD); #ifdef DEBUG if (i_am_the_master) if (file_open_error) fprintf(stderr, "problem opening output files\n"); #endif /* If all files are open for writing, write to them */ if (! file_open_error) { srand(28 + my_rank); for (i = 0; i < number_of_integers; i++) *(junk + i) = rand(); start = MPI_Wtime(); my_write_error = MPI_File_write(fh, junk, number_of_integers, MPI_INT, &status); if (my_write_error != MPI_SUCCESS) { MPI_Error_class(my_write_error, &error_class); MPI_Error_string(error_class, error_string, &length_of_error_string); printf("%3d: %s\n", my_rank, error_string); MPI_Error_string(my_write_error, error_string, &length_of_error_string); printf("%3d: %s\n", my_rank, error_string); my_write_error = TRUE; } else { finish = MPI_Wtime(); io_time = finish - start; printf("%3d: io_time = %f\n", my_rank, io_time); } /* Check if anybody had problems writing on the file */ MPI_Allreduce (&my_write_error, &write_error, 1, MPI_INT, MPI_LOR, MPI_COMM_WORLD); #ifdef DEBUG if (i_am_the_master) if (write_error) fprintf(stderr, "problem writing on files\n"); #endif } /* of if(! file_open_error) { } */ /* Only processes that were successful opening the files need do close them here */ if (!my_file_open_error) { MPI_File_close(&fh); #ifdef DEBUG printf ("%3d: closed %s\n", my_rank, file_name); #endif } /* If we have either write errors or file open errors, then processes that managed to open their files are requested to throw them away */ if (write_error || file_open_error) { if (! my_file_open_error) { MPI_File_delete(file_name, MPI_INFO_NULL); #ifdef DEBUG printf("%3d: deleted %s\n", my_rank, file_name); #endif } } else { MPI_Reduce(&io_time, &longest_io_time, 1, MPI_DOUBLE, MPI_MAX, MASTER_RANK, MPI_COMM_WORLD); if (i_am_the_master) { printf("longest_io_time = %f seconds\n", longest_io_time); printf("total_number_of_bytes = %lld\n", total_number_of_bytes); printf("transfer rate = %f MB/s\n", total_number_of_bytes / longest_io_time / MBYTE); } } /* end of if (write_error || file_open_error) { } */ } /* end of if(! input_error) { } */ MPI_Finalize(); exit(0); }
Previous Instructions
MPI_Init() and MPI_Finalize(); Used to initialize and finalize the MPI program.
MPI_Comm_rank() and MPI_Comm_size(); Used to find the rank of a process and the total number of processes.
MPI_Bcast(); Used to broadcast the input from the master to the other processes.
MPI_File_open(); Open MPI file.
MPI_Error_class(); Get MPI error class from MPI error.
MPI_Error_string(); Get error string associated with error class.
MPI_Allreduce(); Used with MPI_LOR to find if any of the processes had errors opening or writing to their file.
New instructions
MPI_File_delete( file_name, MPI_INFO_NULL ); Used to delete the files that were created if not all the processes completed successfully.
Compile & Run
If you have not already done so, obtain all the example code here.
Switch to the Intel compiler (optional, only necessary once in each terminal session)
$ module load intel
Compile the program using
$ make
Submit the job to the queue
$ make submit
The output files from the program execution are placed in the output folder
$ ls output/ 131846.vilje.hpc.ntnu.no.ER file.0 file.10 file.12 file.14 file.2 file.4 file.6 file.8 131846.vilje.hpc.ntnu.no.OU file.1 file.11 file.13 file.15 file.3 file.5 file.7 file.9
The standard out is placed in the .OU file
$ cat output/*OU 6: io_time = 1.070781 14: io_time = 1.100705 0: io_time = 1.222521 10: io_time = 1.239067 11: io_time = 1.319720 1: io_time = 1.682421 5: io_time = 1.736790 3: io_time = 1.797548 2: io_time = 1.811755 12: io_time = 1.827409 9: io_time = 1.878157 13: io_time = 1.907817 8: io_time = 1.974429 7: io_time = 2.016452 15: io_time = 2.021286 4: io_time = 2.033217 longest_io_time = 2.033217 seconds total_number_of_bytes = 469762048 transfer rate = 220.340480 MB/s