How to run R on a HPC system:
(More info here: https://www.rdocumentation.org/packages/foreach/versions/1.4.7/topics/foreach)
Modules:
Find a R module version on login node: Use module spider R
Use eg
module load R/4.2.2-foss-2022b
Sequential Code: (Example code)
testseq.R
n<-20000 m<-100000 sum<-0.0 for(i in 1:n) { for(j in 1:m) { sum<-sum+sqrt(i+j)+cos(i+j)+sin(i+j) } } sprintf("Sum %f",sum)
Job script (Idun) (job.sh)
!/bin/bash #SBATCH -J job # sensible name for the job #SBATCH -N 1 # Allocate 1 nodes for the job #SBATCH -t 00:10:00 # Upper time limit for the job (d-HH:MM:SS) #SBATCH -p CPUQ
module load R/4.2.2-foss-2022b export LANG=C R --vanilla -f testseq.R
Parallel Code (doParallel for 1 compute node and all cores on the node (20))
Example: testdopar.R
library(doParallel) registerDoParallel(cores=20) n<-20000 m<-100000 ls<-0.0 gsum<-foreach(i=istart:iend,.combine='+') %dopar% { for(j in 1:m) { ls<-ls+sqrt(i+j)+cos(i+j)+sin(i+j) } ls } sprintf("Sum %f",gsum)
Job script (Idun) (job.sh)
!/bin/bash #SBATCH -J job # sensible name for the job #SBATCH -N 1 # Allocate 1 nodes for the job #SBATCH -c 20 #SBATCH -t 00:10:00 # Upper time limit for the job #SBATCH -p CPUQ module load R/4.2.2-foss-2022b export LANG=C R --vanilla -f testdopar.R
Hybrid Code ( MPI and doParallel for several compute nodes)
Exemple: testhybr.R for 2 compute nodes
library(doParallel) library("Rmpi") options(echo=FALSE) registerDoParallel(cores=20) myrank<-mpi.comm.rank(0) ranks <-mpi.comm.size(0) n<-0 m<-0 if (myrank==0) { # Master rank set number of iterations n<-20000 m<-100000 } # y = mpi.bcast (x, type, rank , comm ) , type=1 integer, type=2 double n<-mpi.bcast(n,1,0,0); m<-mpi.bcast(m,1,0,0); # For 2 compute nodes if (myrank==0) { istart<- 1 iend <- n/ranks } else { istart<- n/ranks+1 iend <- n } ls<-0.0 lsum<-foreach(i=istart:iend,.combine='+') %dopar% { for(j in 1:m) { ls<-ls+sqrt(i+j)+cos(i+j)+sin(i+j) } ls } # y = mpi.allreduce(x, type, op="sum",comm), type=2 double gsum<-mpi.allreduce(lsum,2,op="sum",0) sprintf("Sum %f",gsum) mpi.quit()
Job script
#!/bin/bash
#SBATCH -J job # sensible name for the job
#SBATCH -N 2 # Allocate 2 nodes for the job
#SBATCH --ntasks-per-node=1 # 1 task per node
#SBATCH -c 20
#SBATCH -t 00:10:00 # Upper time limit for the job
#SBATCH -p CPUQ
module load R/4.2.2-foss-2022b
export LANG=C
time mpirun R --vanilla -f testhybr.R
Performance (idun):
Sequential (1 core): 7 min 30 sec
Hybrid (40 cores): 16 sec