[gridengine users] gridengine-master(sge_qmaster) does not start

Kerim Gueney kgueney at uni-koeln.de
Mon Aug 11 14:16:01 UTC 2014


Hello guys,

Absolute gridengine newbie here. I'm trying to fix a problem that we 
have with our gridengine, while our main admin is on his vacation. We 
are using Debian stable running gridengine-master (6.2u5-7.1).
The problem is that gridengine-master won't start, at all. if I call it 
manually it returns nothing. Setting the SGE_ND environment variable 
results in the following output (blank error):

# /etc/init.d/gridengine-master start
error:

#


qstat shows

# qstat
error: unable to read qmaster name: qmaster hostname in 
"/var/lib/gridengine/default/common/act_qmaster" has zero length

adding the hosts name to the file manually doesn't help. It results in

# qstat
error: commlib error: got select error (Connection refused)
error: unable to send message to qmaster using port 6444 on host 
"queuemaster": got send error

attached is an strace of output of

# strace /etc/init.d/gridengine-master start

I'm grateful for every kind of help I can get. Thank you in advance.

Best regards,
Kerim Gueney
-------------- next part --------------
execve("/etc/init.d/gridengine-master", ["/etc/init.d/gridengine-master"], [/* 34 vars */]) = 0
brk(0)                                  = 0x992000
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fb64a15b000
access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)
open("tls/x86_64/libc.so.6", O_RDONLY)  = -1 ENOENT (No such file or directory)
open("tls/libc.so.6", O_RDONLY)         = -1 ENOENT (No such file or directory)
open("x86_64/libc.so.6", O_RDONLY)      = -1 ENOENT (No such file or directory)
open("libc.so.6", O_RDONLY)             = -1 ENOENT (No such file or directory)
open("/usr/local/lib/tls/x86_64/libc.so.6", O_RDONLY) = -1 ENOENT (No such file or directory)
stat("/usr/local/lib/tls/x86_64", 0x7fff357f5e90) = -1 ENOENT (No such file or directory)
open("/usr/local/lib/tls/libc.so.6", O_RDONLY) = -1 ENOENT (No such file or directory)
stat("/usr/local/lib/tls", 0x7fff357f5e90) = -1 ENOENT (No such file or directory)
open("/usr/local/lib/x86_64/libc.so.6", O_RDONLY) = -1 ENOENT (No such file or directory)
stat("/usr/local/lib/x86_64", 0x7fff357f5e90) = -1 ENOENT (No such file or directory)
open("/usr/local/lib/libc.so.6", O_RDONLY) = -1 ENOENT (No such file or directory)
stat("/usr/local/lib", {st_mode=S_IFDIR|S_ISGID|0775, st_size=4096, ...}) = 0
open("/etc/ld.so.cache", O_RDONLY)      = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=101679, ...}) = 0
mmap(NULL, 101679, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7fb64a142000
close(3)                                = 0
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
open("/lib/x86_64-linux-gnu/libc.so.6", O_RDONLY) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\0\360\1\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=1599536, ...}) = 0
mmap(NULL, 3713144, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7fb649bb3000
mprotect(0x7fb649d35000, 2093056, PROT_NONE) = 0
mmap(0x7fb649f34000, 20480, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x181000) = 0x7fb649f34000
mmap(0x7fb649f39000, 18552, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7fb649f39000
close(3)                                = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fb64a141000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fb64a140000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fb64a13f000
arch_prctl(ARCH_SET_FS, 0x7fb64a140700) = 0
mprotect(0x7fb649f34000, 16384, PROT_READ) = 0
mprotect(0x7fb64a15d000, 4096, PROT_READ) = 0
munmap(0x7fb64a142000, 101679)          = 0
getpid()                                = 2043
rt_sigaction(SIGCHLD, {0x40f270, ~[RTMIN RT_1], SA_RESTORER, 0x7fb649be55c0}, NULL, 8) = 0
geteuid()                               = 0
brk(0)                                  = 0x992000
brk(0x9b3000)                           = 0x9b3000
getppid()                               = 2042
stat("/home/gueney", {st_mode=S_IFDIR|0750, st_size=4096, ...}) = 0
stat(".", {st_mode=S_IFDIR|0750, st_size=4096, ...}) = 0
open("/etc/init.d/gridengine-master", O_RDONLY) = 3
fcntl(3, F_DUPFD, 10)                   = 10
close(3)                                = 0
fcntl(10, F_SETFD, FD_CLOEXEC)          = 0
rt_sigaction(SIGINT, NULL, {SIG_DFL, [], 0}, 8) = 0
rt_sigaction(SIGINT, {0x40f270, ~[RTMIN RT_1], SA_RESTORER, 0x7fb649be55c0}, NULL, 8) = 0
rt_sigaction(SIGQUIT, NULL, {SIG_DFL, [], 0}, 8) = 0
rt_sigaction(SIGQUIT, {SIG_DFL, ~[RTMIN RT_1], SA_RESTORER, 0x7fb649be55c0}, NULL, 8) = 0
rt_sigaction(SIGTERM, NULL, {SIG_DFL, [], 0}, 8) = 0
rt_sigaction(SIGTERM, {SIG_DFL, ~[RTMIN RT_1], SA_RESTORER, 0x7fb649be55c0}, NULL, 8) = 0
read(10, "#! /bin/sh\n### BEGIN INIT INFO\n#"..., 8192) = 3353
geteuid()                               = 0
stat("/usr/sbin/sge_qmaster", {st_mode=S_IFREG|0755, st_size=1401, ...}) = 0
faccessat(AT_FDCWD, "/usr/sbin/sge_qmaster", X_OK) = 0
faccessat(AT_FDCWD, "/etc/default/gridengine", R_OK) = 0
open("/etc/default/gridengine", O_RDONLY) = 3
fcntl(3, F_DUPFD, 10)                   = 11
close(3)                                = 0
fcntl(11, F_SETFD, FD_CLOEXEC)          = 0
read(11, "# Sun Grid Engine configuration\n"..., 8192) = 357
read(11, "", 8192)                      = 0
close(11)                               = 0
open("/lib/init/vars.sh", O_RDONLY)     = 3
fcntl(3, F_DUPFD, 10)                   = 11
close(3)                                = 0
fcntl(11, F_SETFD, FD_CLOEXEC)          = 0
read(11, "#\n# Set rcS vars\n#\n\n# Because /e"..., 8192) = 1228
stat("/etc/default/rcS", {st_mode=S_IFREG|0644, st_size=620, ...}) = 0
open("/etc/default/rcS", O_RDONLY)      = 3
fcntl(3, F_DUPFD, 10)                   = 12
close(3)                                = 0
fcntl(12, F_SETFD, FD_CLOEXEC)          = 0
read(12, "#\n# /etc/default/rcS\n#\n# Default"..., 8192) = 620
read(12, "", 8192)                      = 0
close(12)                               = 0
faccessat(AT_FDCWD, "/proc/cmdline", R_OK) = 0
pipe([3, 4])                            = 0
clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7fb64a1409d0) = 2044
close(4)                                = 0
read(3, "BOOT_IMAGE=/boot/vmlinuz-3.2.0-4"..., 128) = 95
read(3, "", 128)                        = 0
--- SIGCHLD (Child exited) @ 0 (0) ---
rt_sigreturn(0x11)                      = 0
close(3)                                = 0
wait4(-1, [{WIFEXITED(s) && WEXITSTATUS(s) == 0}], 0, NULL) = 2044
read(11, "", 8192)                      = 0
close(11)                               = 0
open("/lib/lsb/init-functions", O_RDONLY) = 3
fcntl(3, F_DUPFD, 10)                   = 11
close(3)                                = 0
fcntl(11, F_SETFD, FD_CLOEXEC)          = 0
read(11, "# /lib/lsb/init-functions for De"..., 8192) = 8192
read(11, " \" * Starting remote filesystem "..., 8192) = 3253
pipe([3, 4])                            = 0
clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7fb64a1409d0) = 2045
close(4)                                = 0
read(3, "/lib/lsb/init-functions.d/20-lef"..., 128) = 46
--- SIGCHLD (Child exited) @ 0 (0) ---
rt_sigreturn(0x11)                      = 46
read(3, "", 128)                        = 0
close(3)                                = 0
wait4(-1, [{WIFEXITED(s) && WEXITSTATUS(s) == 0}], 0, NULL) = 2045
faccessat(AT_FDCWD, "/lib/lsb/init-functions.d/20-left-info-blocks", R_OK) = 0
open("/lib/lsb/init-functions.d/20-left-info-blocks", O_RDONLY) = 3
fcntl(3, F_DUPFD, 10)                   = 12
close(3)                                = 0
fcntl(12, F_SETFD, FD_CLOEXEC)          = 0
read(12, "# Default info blocks put to the"..., 8192) = 1093
read(12, "", 8192)                      = 0
close(12)                               = 0
stat("/etc/lsb-base-logging.sh", 0x7fff357f5fb0) = -1 ENOENT (No such file or directory)
read(11, "", 8192)                      = 0
close(11)                               = 0
stat("/var/run/gridengine", {st_mode=S_IFDIR|0755, st_size=60, ...}) = 0
fcntl(1, F_DUPFD, 10)                   = 11
close(1)                                = 0
fcntl(11, F_SETFD, FD_CLOEXEC)          = 0
dup2(2, 1)                              = 1
write(1, "Usage: /etc/init.d/gridengine-ma"..., 71Usage: /etc/init.d/gridengine-master {start|stop|restart|force-reload}
) = 71
dup2(11, 1)                             = 1
close(11)                               = 0
exit_group(3)                           = ?


More information about the users mailing list