Stefano Bridi stefano.bridi at gmail.com
Thu May 5 08:50:48 UTC 2011

Hi all
I'm tring to configure openmpi with tight integration in an old SGE
installation (6.1u3).
In the past I had configured all the rsh/rlogin/qlogin "daemon" and
"command" to "/usr/sbin/sshd -i" and "/usr/bin/ssh".
Now  I'm tring to go back to the builtin configuration but I get the
strange error in the subject:

error: 1: rsh_daemon "builtin" is not an absolute path

If I test with an interactive job
"qrsh -pe orte 4"
I get the almost the same error
error: 1: rlogin_daemon "builtin" is not an absolute path

plus  a queue in error state..

Anyone can tell me what is going on?


Here some detail of the setup:

There are two queues for this test: "n0000.q" and "n0001.q" configured
in the same way.
There is one dedicated parallel environment "orte"

# qconf -sp orte
pe_name           orte
slots             8
user_lists        NONE
xuser_lists       NONE
start_proc_args   /bin/true
stop_proc_args    /bin/true
allocation_rule   $round_robin
control_slaves    TRUE
job_is_first_task FALSE
urgency_slots     min

# qconf -sconf
execd_spool_dir              /sge/default/spool
mailer                       /bin/mail
xterm                        /usr/bin/X11/xterm
load_sensor                  none
prolog                       none
epilog                       none
shell_start_mode             posix_compliant
login_shells                 sh,ksh,csh,tcsh
min_uid                      0
min_gid                      0
user_lists                   none
xuser_lists                  none
projects                     none
xprojects                    none
enforce_project              false
enforce_user                 auto
load_report_time             00:00:40
max_unheard                  00:05:00
reschedule_unknown           00:00:00
loglevel                     log_warning
administrator_mail           myaddress at mydomain.com
set_token_cmd                none
pag_cmd                      none
token_extend_time            none
shepherd_cmd                 none
qmaster_params               none
execd_params                 enable_windomacc=true
reporting_params             accounting=true reporting=false \
                             flush_time=00:00:15 joblog=false sharelog=00:00:00
finished_jobs                100000
gid_range                    20000-20100
qlogin_command               builtin
qlogin_daemon                builtin
rlogin_daemon                builtin
rlogin_command               builtin
max_aj_instances             2000
max_aj_tasks                 75000
max_u_jobs                   0
max_jobs                     0
auto_user_oticket            0
auto_user_fshare             0
auto_user_default_project    none
auto_user_delete_time        86400
delegated_file_staging       false
reprioritize                 false
rsh_daemon                   builtin
rsh_command                  builtin

# qconf -sconf n0000
configuration n0000 not defined

# qconf -sconf n0001
configuration n0001 not defined

# qconf -sq n0000.q
qname                 n0000.q
hostlist              n0000
seq_no                0
load_thresholds       np_load_avg=1.75
suspend_thresholds    NONE
nsuspend              1
suspend_interval      00:05:00
priority              0
min_cpu_interval      00:05:00
processors            UNDEFINED
qtype                 BATCH INTERACTIVE
ckpt_list             NONE
pe_list               test2-smp test2-smp test3 fds test4 test1-smp test1-mmp \
rerun                 FALSE
slots                 4
tmpdir                /tmp
shell                 /bin/bash
prolog                NONE
epilog                NONE
shell_start_mode      posix_compliant
starter_method        NONE
suspend_method        NONE
resume_method         NONE
terminate_method      NONE
notify                00:00:60
owner_list            NONE
user_lists            NONE
xuser_lists           NONE
subordinate_list      NONE
complex_values        NONE
projects              NONE
xprojects             NONE
calendar              NONE
initial_state         default
s_rt                  INFINITY
h_rt                  INFINITY
s_cpu                 INFINITY
h_cpu                 INFINITY
s_fsize               INFINITY
h_fsize               INFINITY
s_data                INFINITY
h_data                INFINITY
s_stack               INFINITY
h_stack               INFINITY
s_core                INFINITY
h_core                INFINITY
s_rss                 INFINITY
h_rss                 INFINITY
s_vmem                INFINITY
h_vmem                INFINITY

The test job I'm using is

#$ -N prova
#$ -pe orte 4
#$ -cwd

/sw/openmpi/141/bin/mpirun -v -np 4 -mca btl openib,self -mca ras
gridengine mppexe-openmpi-141


the stderr of this job is:

error: 1: rsh_daemon "builtin" is not an absolute path

A daemon (pid 8206) died unexpectedly with status 1 while attempting
to launch so we are aborting.

There may be more information reported by the environment (see above).

This may be because the daemon was unable to find all the needed shared
libraries on the remote node. You may set your LD_LIBRARY_PATH to have the
location of the shared libraries on the remote nodes and this will
automatically be forwarded to the remote nodes.
mpirun noticed that the job aborted, but has no info as to the process
that caused that situation.
mpirun: clean termination accomplished

