[gridengine dev] [DRAFT PATCH] Enhancement: exempt certain programs from execd control
Mark Dixon
m.c.dixon at leeds.ac.uk
Thu Nov 10 12:44:24 UTC 2011
In an attempt to lighten people's mood today...
Please find a draft patch attached, allowing a GE admin to specify a list
of programs that are not counted against a job's resource limits. The
patch has been prepared against a vanilla ge-6.2u5 and only gives this
feature to Linux execd's.
This is to address a problem where tightly-integrated parallel jobs can be
killed due to all the instances of qrsh exceeding h_vmem on the MASTER, as
documented here:
https://arc.liv.ac.uk/trac/SGE/ticket/694
Is this something the major forks would be interested in?
This is my first GE patch and I'm very rusty (it's been over decade since
I last worked on a big C code), so any and all
help/comments/insults/rotten fruit would be welcomed.
How to use once applied and built:
"qconf -mconf" - add EXEMPT_PROGRAMS=<filename> to the execd_params line
(use ":" as a delimiter, if more than one filename is desired).
Setting this to contain whatever filenames `ls $SGE_ROOT/bin/*/qsh`
expands to on your system should fix the mpirun-qrsh problem (qrsh is
symlinked to qsh).
Major concerns yet to resolve:
1) I'm not happy with how I'm configuring this feature. Using execd_params
doesn't seem to fit nicely, so am still looking for the best way to do
this (a line in "qconf -se", allowing per-execd config, perhaps? Or a
separate line in "qconf -sconf"?)
2) Using execd_params also means that simply removing the variable doesn't
actually turn off this feature - you need to set "EXEMPT_PROGRAMS=" and
wait a bit for the config to propogate to the execds, before removing the
reference completely. Ugh.
3) I think it will break running a standalone "pdc" binary, as pt_open now
refers to the cluster configuration. Don't know how big an issue this is.
4) I don't know what "touch_time_stamp" is supposed to achieve, but other
(similar) bits use it. Commented-out for now.
5) The name of the featire. I don't like the string "exempt_programs" I
keep using. "resource_exempt" instead?
6) I assume this needs to be submitted under the SISSL 1.2?
7) I'm not happy about the amount of memory allocation error checking
going on... but seems to be on par with similar routines.
Any guidance would be wonderful :)
Thanks,
Mark
--
-----------------------------------------------------------------
Mark Dixon Email : m.c.dixon at leeds.ac.uk
HPC/Grid Systems Support Tel (int): 35429
Information Systems Services Tel (ext): +44(0)113 343 5429
University of Leeds, LS2 9JT, UK
-----------------------------------------------------------------
-------------- next part --------------
--- gridengine/source/daemons/common/procfs.c 2011-11-10 11:29:37.654547000 +0000
+++ gridengine/source/daemons/common/procfs.c 2011-11-10 11:28:31.901153000 +0000
@@ -80,7 +80,8 @@
#include "sge_log.h"
#include "msg_sge.h"
#include "sgermon.h"
-#include "basis_types.h"
+#include "sge_conf.h"
+#include "sge_all_listsL.h"
#include "sgedefs.h"
#include "exec_ifm.h"
#include "pdc.h"
@@ -104,6 +105,7 @@
#endif
+static lList* exempt_programs = NULL;
/*-----------------------------------------------------------------------*/
#if defined(LINUX) || defined(ALPHA) || defined(SOLARIS)
@@ -388,10 +390,21 @@
int pt_open(void)
{
cwd = opendir(PROC_DIR);
+
+ if (cwd) {
+ /* storage free'd in pt_close */
+ exempt_programs = mconf_get_exempt_programs();
+ }
+
return !cwd;
}
void pt_close(void)
{
+ /* free storage assigned in pt_open */
+ if (exempt_programs) {
+ lFreeList(&exempt_programs);
+ }
+
closedir(cwd);
}
@@ -515,6 +528,58 @@
continue;
}
+ /**
+ ** get program filename to decide if this
+ ** process should be ignored.
+ **/
+# if defined(LINUX)
+ /*
+ * Read proc symlink containing exe filename
+ */
+ sprintf(procnam, PROC_DIR "/%s/exe", dent->d_name);
+ if ((ret = readlink(procnam, buffer, BIGLINE-1))<=0) {
+ close(fd);
+ if (ret == -1 && errno != ENOENT) {
+#ifdef MONITOR_PDC
+ INFO((SGE_EVENT, "could not read %s: %s\n", procnam, strerror(errno)));
+#endif
+ touch_time_stamp(dent->d_name, time_stamp, job_list);
+ }
+ continue;
+ } else {
+ /* readlink doesn't NULL-terminate */
+ buffer[ret] = '\0';
+ }
+ buffer[BIGLINE-1] = '\0';
+
+ /*
+ * Compare against list of exe filenames to ignore
+ */
+ lListElem *exempt_ep = lFirst(exempt_programs);
+ {
+ int found_it = 0;
+ do {
+ const char *exempt_program = NULL;
+
+ if (exempt_ep != NULL) {
+ exempt_program = lGetString(exempt_ep, ST_name);
+ }
+
+ if (exempt_program && ! strcmp(exempt_program, buffer)) {
+// touch_time_stamp(dent->d_name, time_stamp, job_list); // Do we need to do this? Not sure what it does...
+ found_it = 1;
+ break;
+ }
+ } while((exempt_ep = lNext(exempt_ep)));
+
+ if (found_it == 1) {
+ continue;
+ }
+ }
+# else
+ /* Not supported yet - to do */
+# endif
+
/**
** get a list of supplementary group ids to decide
** whether this process will be needed;
--- gridengine/source/libs/sgeobj/sge_conf.c 2009-10-20 08:19:19.000000000 +0100
+++ gridengine/source/libs/sgeobj/sge_conf.c 2011-11-10 10:44:28.000000000 +0000
@@ -63,7 +63,7 @@
#include "sgeobj/msg_sgeobjlib.h"
#include "sge.h"
-#include "basis_types.h"
+#include "sge_all_listsL.h"
#include "sge_conf.h"
#include "sge_feature.h"
#include "sge_usage.h"
@@ -174,6 +174,7 @@
static bool enable_windomacc = false;
static bool enable_binding = false;
static bool enable_addgrp_kill = false;
+static char* exempt_programs = NULL;
static u_long32 pdc_interval = 1;
static char s_descriptors[100];
static char h_descriptors[100];
@@ -989,6 +990,13 @@
sge_strlcpy(h_locks, s+sizeof("H_LOCKS"), 100);
continue;
}
+ if (!strncasecmp(s, "EXEMPT_PROGRAMS", sizeof("EXEMPT_PROGRAMS")-1)) {
+ if (exempt_programs) {
+ free(exempt_programs);
+ }
+ exempt_programs = sge_strdup(NULL, &(s[sizeof("EXEMPT_PROGRAMS")]));
+ continue;
+ }
}
SGE_UNLOCK(LOCK_MASTER_CONF, LOCK_WRITE);
sge_free_saved_vars(conf_context);
@@ -2048,6 +2056,28 @@
DRETURN(ret);
}
+/* returned pointer needs to be freed */
+lList* mconf_get_exempt_programs(void) {
+ lList* exempt_list = NULL;
+
+ DENTER(BASIS_LAYER, "mconf_get_exempt_programs");
+ SGE_LOCK(LOCK_MASTER_CONF, LOCK_READ);
+
+ exempt_list = lCreateList("exempt_programs", ST_Type);
+
+ struct saved_vars_s *conf_context = NULL;
+ char *s;
+ for(s=sge_strtok_r(exempt_programs, ":", &conf_context); s; s=sge_strtok_r(NULL, ":", &conf_context)) {
+ lListElem *element;
+ element = lCreateElem(ST_Type);
+ lSetString(element, ST_name, s);
+ lAppendElem(exempt_list, element);
+ }
+
+ SGE_UNLOCK(LOCK_MASTER_CONF, LOCK_READ);
+ DRETURN(exempt_list);
+}
+
u_long32 mconf_get_pdc_interval(void) {
u_long32 ret;
--- gridengine/source/libs/sgeobj/sge_conf.h 2009-10-20 08:19:20.000000000 +0100
+++ gridengine/source/libs/sgeobj/sge_conf.h 2011-11-08 16:42:46.000000000 +0000
@@ -146,6 +146,7 @@
bool mconf_get_enable_test_sleep_after_request(void);
int mconf_get_max_job_deletion_time(void);
bool mconf_get_enable_addgrp_kill(void);
+lList* mconf_get_exempt_programs(void);
u_long32 mconf_get_pdc_interval(void);
bool mconf_get_enable_reschedule_kill(void);
bool mconf_get_enable_reschedule_slave(void);
More information about the dev
mailing list