Technical Note - Grid Computing: ECS Grid

+64 4 463 5341 office@ecs.vuw.ac.nz · able to start them

need sgegrid
critical error: Please set the environment variable SGE_ROOT.
need sgegrid
# Now we are in the job-specific directory so now can do something useful
% diff tech_note_template.sh my_job_submission_script.sh
9c9
< #$ -wd /vol/grid-solar/sgeusers/fred 
---
> #$ -wd /vol/grid-solar/sgeusers/myusername 
18,19c18,19
< if [ -d /local/tmp/fred/$JOB_ID ]; then
<         cd /local/tmp/fred/$JOB_ID
---
> if [ -d /local/tmp/myusername/$JOB_ID ]; then
>         cd /local/tmp/myusername/$JOB_ID
26,27c26,27
<         echo "AND LOCAL TMP FRED "
<         ls -la /local/tmp/fred
---
>         echo "AND LOCAL TMP myusername "
>         ls -la /local/tmp/myusername
...
dos2unix < my_windows_file.txt  > my_unix_file.sh
Ctrl-x RET f undecided-unix RET
#!/bin/sh
#
# Force Bourne Shell if not Sun Grid Engine default shell (you never know!)
#
#$ -S /bin/sh
#
# I know I have a directory here so I'll use it as my initial working directory
#
#$ -wd /vol/grid-solar/sgeusers/fred 
#
# End of the setup directives
#
# Now let's do something useful, but first change into the job-specific directory that should
#  have been created for us
#
# Check we have somewhere to work now and if we don't, exit nicely.
#
if [ -d /local/tmp/fred/$JOB_ID ]; then
        cd /local/tmp/fred/$JOB_ID
else
        echo "Uh oh ! There's no job directory to change into "
        echo "Something is broken. I should inform the programmers"
        echo "Save some information that may be of use to them"
        echo "Here's LOCAL TMP "
        ls -la /local/tmp
        echo "AND LOCAL TMP FRED "
        ls -la /local/tmp/fred
        echo "Exiting"
        exit 1
fi
#
# Now we are in the job-specific directory so now can do something useful
#
# Stdout from programs and shell echos will go into the file
#    scriptname.o$JOB_ID
#  so we'll put a few things in there to help us see what went on
#
echo ==UNAME==
uname -n
echo ==WHO AM I and GROUPS==
id
groups
echo ==SGE_O_WORKDIR==
echo $SGE_O_WORKDIR
echo ==/LOCAL/TMP==
ls -ltr /local/tmp/
echo ==/VOL/GRID-SOLAR==
ls -l /vol/grid-solar/sgeusers/
#
# OK, where are we starting from and what's the environment we're in
#
echo ==RUN HOME==
pwd
ls
echo ==ENV==
env
echo ==SET==
set
#
echo == WHATS IN LOCAL/TMP ON THE MACHINE WE ARE RUNNING ON ==
ls -ltra /local/tmp | tail
#
echo == WHATS IN LOCAL TMP FRED JOB_ID AT THE START==
ls -la 
#
# Copy the input file to the local directory
#
cp /vol/grid-solar/sgeusers/fred/krb_tkt_flow.JPG .
echo ==WHATS THERE HAVING COPIED STUFF OVER AS INPUT==
ls -la 
# 
# Note that we need the full path to this utility, as it is not on the PATH
#
/usr/pkg/bin/convert krb_tkt_flow.JPG krb_tkt_flow.png
#
echo ==AND NOW, HAVING DONE SOMTHING USEFUL AND CREATED SOME OUTPUT==
ls -la
#
# Now we move the output to a place to pick it up from later
#  (really should check that directory exists too, but this is just a test)
#
mkdir -p /vol/grid-solar/sgeusers/fred/$JOB_ID
cp krb_tkt_flow.png  /vol/grid-solar/sgeusers/fred/$JOB_ID
#
echo "Ran through OK"
qstat                    shows you the state of your jobs

qstat -u \*              shows you the state of all jobs

qsub script_name         submits the job defined in the script into the queuing system

qdel job_number          deletes the job with the job_number from the queuing system
#
# Mail me at the b(eginning) and e(nd) of the job
#
#$ -M Fred.Bloggs@ecs.vuw.ac.nz
#$ -m be
#
Subject:      Job 341642 (freds_test.sh) Started

Job 341642 (freds_test.sh) Started
 User       = fred
 Queue      = GX755
 Host       = lumiere.ecs.vuw.ac.nz
 Start Time = 03/18/2009 16:20:54
Subject:      Job 341642 (freds_test.sh) Complete

Job 341642 (freds_test.sh) Complete
 User             = fred
 Queue            = GX755@lumiere.ecs.vuw.ac.nz
 Host             = lumiere.ecs.vuw.ac.nz
 Start Time       = 03/18/2009 16:20:54
 End Time         = 03/18/2009 16:20:55
 User Time        = 00:00:00
 System Time      = 00:00:00
 Wallclock Time   = 00:00:01
 CPU              = NA
 Max vmem         = NA
 Exit Status      = 0
/local/tmp/fred/1234.1
/local/tmp/fred/1234.3
/local/tmp/fred/1234.5
/local/tmp/fred/1234.7
/local/tmp/fred/1234.9
 -l resource=value
ecs_df_local 
ecs_model
ecs_netgroup
ecs_room
 -l ecs_model=GX745
 qsub -l arch=lx-x86 your_script.sh
SGE_ARCH=lx-x86 
 /usr/pkg/sge/util/arch
  /vol/grid-solar/username/mycodes/bin/lx-x86/prog1
  /vol/grid-solar/username/mycodes/bin/prog1.lx-x86
if [ -z "$SGE_ARCH" ]; then
     echo "Can't determine SGE ARCH"
 else
     if [ "$SGE_ARCH" = "lx-x86" ]; then
         echo "I could run a Linux x86 binary"
     fi
 fi
if ( $?SGE_ARCH == 0 ) then
    echo "Can't determine SGE ARCH"
else
    if ( $SGE_ARCH == "lx-x86" ) then
        echo "I could run a Linux x86 binary"
    endif
endif
#!/bin/sh
#
# Force Bourne Shell if not Sun Grid Engine default shell (you never know!)
#
#$ -S /bin/sh
#
# I know I have a directory here so I'll use it as my initial working directory
#
#$ -wd /vol/grid-solar/sgeusers/fred 
#
# Now let's do something useful, but first change into the job-specific directory that should
#  have been created for us
#
if [ -d /local/tmp/fred/$JOB_ID ]; then
        cd /local/tmp/fred/$JOB_ID
else
        echo "There's no job directory to change into "
        echo "Something is broken. I should inform the programmers"
        echo "Save some information that may be of use to them"
        echo "Here's LOCAL TMP "
        ls -la /local/tmp
        echo "AND LOCAL TMP FRED "
        ls -la /local/tmp/fred
        echo "Exiting"
        exit 1
fi
#
if [ -z "$SGE_ARCH" ]; then
   echo "Can't determine SGE ARCH"
else
   if [ "$SGE_ARCH" = "lx-amd64" ]; then
       JAVA_HOME="/usr/pkg/java/sun-8"
   fi
fi

if [ -z "$JAVA_HOME" ]; then
   echo "Can't define a JAVA_HOME"
else
   export JAVA_HOME
   PATH="/usr/pkg/java/bin:${JAVA_HOME}/bin:${PATH}"; export PATH

   java Hello
fi
"ERROR: configuration error -- this is the wrong version of need
        You should never see this -- please report to bugs@ecs.vuw.ac.nz" 
% gcc -c -I/usr/pkg/sge/include/ ListingOne.c
% gcc -o  ListingOne \
      -L/usr/pkg/sge/lib/lx-x86/ \
      -Wl,-R/usr/pkg/sge/lib/lx-x86/ -ldrmaa ListingOne.o
% ./ListingOne 
Successfully started the DRMAA library
% gcc -c -I/usr/pkg/sge/include/ drdobbs-shell.c
% gcc -o drdobbs-shell \
      -L/usr/pkg/sge/lib/lx-amd64/ \
      -Wl,-R/usr/pkg/sge/lib/lx-amd64/ -ldrmaa \
 drdobbs-shell.o
% mv i_am_alive.sh i_am_alive.sh.orig
% sed -e "s/fred/yourusername/g" i_am_alive.sh.orig > i_am_alive.sh
% chmod u+x i_am_alive.sh
% ~/DRMAA/drdobbs-shell ~/DRMAA/i_am_alive.sh
Your job "/u/students/fred/DRMAA/i_am_alive.sh"has been submitted with id 000000
%
% ls -ltr ~
...
drwx------  2 fred  students    512 Oct  6 12:02 DRMAA
-rw-r--r--  1 fred  students      0 Oct  6 12:20 i_am_alive.sh.e000000
-rw-r--r--  1 fred  students  29753 Oct  6 12:20 i_am_alive.sh.o000000
%
% cat ~/i_am_alive.sh.o000000
==UNAME==
breaker.msor.vuw.ac.nz
==WHO AM I and GROUPS==
uid=0000(fred) gid=25(students) groups=25(students),1500(c302t1)
students c302t1
==SGE_O_WORKDIR==
/home/rialto1/fred/DRMAA
...
% cp sleeper.sh ~
% chmod u+x ~/sleeper.sh 
% java -Djava.library.path=/vol/grid-solar/sgeusers/admin/DRMAA/lx-amd64 \
   -cp /vol/grid-solar/sgeusers/admin/DRMAA/lx-amd64/drmaa.jar:. Howto2
Your job has been submitted with id 000000
%
% ls -ltr ~
...
-rwx------  1 fred    1746 Sep 30 14:50 sleeper.sh
drwx------  2 fred     512 Sep 30 15:25 DRMAA
-rw-r--r--  1 fred       0 Sep 30 15:28 Sleeper.e000000
-rw-r--r--  1 fred      99 Sep 30 15:28 Sleeper.o000000
% cat ~/Sleeper.o
Here I am. Sleeping now at: Tue Sep 30 15:28:06 NZDT 2014
Now it is: Tue Sep 30 15:28:11 NZDT 2014
%

I	Attachment	Action	Size	Date	Who	Comment
pdf	SGE-User-Guide-820-0699.pdf	manage	2 MB	16 Sep 2013 - 11:00	Main.kevin	Sun N1 Grid Engine 6.1 User's Guide
sh	submission_script-basic.sh	manage	2 K	29 Aug 2016 - 12:59	Main.kevin	The basic submission script
sh	submission_script-task_array.sh	manage	2 K	29 Aug 2016 - 13:05	Main.kevin	The task array submission script

Te Kura Mātai Pūkaha, Pūrorohiko

School of Engineering and Computer Science

Technical Note - Grid Computing: ECS Grid

IMPORTANT

FOR EXISTING/RETURNING USERS

Summary

Details

General

Setting up the environment

Do I have a home on the Grid?

Note for Windows users

Where will the input and output files be?

Preserving results after execution

Where do `stdin`, `stdout` and `stderr` appear

Job submission script example

A basic job submission script

Basic job-related commands

Emailed output

Array Jobs (Task Array Jobs)

Managing Array Jobs (Task Array Jobs)

Specialised job summission

Targetting machine architectures

Compilation for the ArchLinux machines

Running Java programs on the ECS/SGE Grid

You can't use `need` inside a basic job submission script

Using DRMAA with the ECS/SGE Grid

Background

C Bindings

Java Bindings

Simple, proof of concept example: C Binding

Spawning an actual job into the SGE: C Binding

Spawning an actual job into the SGE: Java Binding

Caveats

Environmental variables now have an `SGE_` prefix not `GE_`

Jobs in Error states: unable to chdir

Technical Note - Grid Computing: ECS Grid

IMPORTANT

FOR EXISTING/RETURNING USERS

Summary

Details

General

Setting up the environment

Do I have a home on the Grid?

Note for Windows users

Where will the input and output files be?

Preserving results after execution

Where do stdin, stdout and stderr appear

Job submission script example

A basic job submission script

Basic job-related commands

Emailed output

Array Jobs (Task Array Jobs)

Managing Array Jobs (Task Array Jobs)

Specialised job summission

Targetting machine architectures

Compilation for the ArchLinux machines

Running Java programs on the ECS/SGE Grid

You can't use need inside a basic job submission script

Using DRMAA with the ECS/SGE Grid

Background

C Bindings

Java Bindings

Simple, proof of concept example: C Binding

Spawning an actual job into the SGE: C Binding

Spawning an actual job into the SGE: Java Binding

Caveats

Environmental variables now have an SGE_ prefix not GE_

Jobs in Error states: unable to chdir

Where do `stdin`, `stdout` and `stderr` appear

You can't use `need` inside a basic job submission script

Environmental variables now have an `SGE_` prefix not `GE_`