Import upstream version 1.8.3+git20210302.1.a1875cb
Debian Janitor
3 years ago
7 | 7 | [xcat] |
8 | 8 | |
9 | 9 | # list the nodes in the specified node group |
10 | map: lsdef -s -t node $GROUP | cut -d' ' -f1 | |
10 | map: lsdef -s -t node "$GROUP" | cut -d' ' -f1 | |
11 | 11 | |
12 | 12 | # list all the nodes defined in the xCAT tables |
13 | 13 | all: lsdef -s -t node | cut -d' ' -f1 |
38 | 38 | # and yes, ranges work for groups too! |
39 | 39 | old: '@rack[1,3]' |
40 | 40 | new: '@rack[2,4]' |
41 | # YAML lists | |
42 | rack5: | |
43 | - 'example[200-205]' # some comment about example[200-205] | |
44 | - 'example245' | |
45 | - 'example300,example[401-406]' | |
41 | 46 | |
42 | 47 | # Group source cpu: |
43 | 48 | # define groups @cpu:ivy, @cpu:hsw and @cpu:all |
280 | 280 | limit time for command to run on the node |
281 | 281 | .TP |
282 | 282 | .BI \-R \ WORKER\fP,\fB \ \-\-worker\fB= WORKER |
283 | worker name to use for connection (\fBexec\fP, \fBssh\fP, \fBrsh\fP, \fBpdsh\fP), default is \fBssh\fP | |
283 | worker name to use for connection (\fBexec\fP, \fBssh\fP, \fBrsh\fP, \fBpdsh\fP, or the name of a Python worker module), default is \fBssh\fP | |
284 | 284 | .TP |
285 | 285 | .BI \-\-remote\fB= REMOTE |
286 | 286 | whether to enable remote execution: in tree mode, \(aqyes\(aq forces connections to the leaf nodes for execution, \(aqno\(aq establishes connections up to the leaf parent nodes for execution (default is \(aqyes\(aq) |
235 | 235 | compute: 'node[0001-0288]' |
236 | 236 | gpu: 'node[0001-0008]' |
237 | 237 | |
238 | servers: # example of yaml list syntax for nodes | |
239 | - 'server001' # in a group | |
240 | - 'server002,server101' | |
241 | - 'server[003-006]' | |
242 | ||
238 | 243 | cpu_only: '@compute!@gpu' # example of inline set operation |
239 | 244 | # define group @cpu_only with node[0009-0288] |
240 | 245 |
632 | 632 | installed; doesn't provide write support (eg. you cannot ``cat file | clush |
633 | 633 | --worker pdsh``); it is primarily an 1-to-n worker example. |
634 | 634 | |
635 | Worker modules distributed outside of ClusterShell are also supported by | |
636 | specifying the case-sensitive full Python module name of a worker module. | |
635 | 637 | |
636 | 638 | .. [#] LLNL parallel remote shell utility |
637 | 639 | (https://computing.llnl.gov/linux/pdsh.html) |
81 | 81 | self.master_worker.write(msg) |
82 | 82 | |
83 | 83 | class OutputHandler(EventHandler): |
84 | """Base class for clush output handlers.""" | |
85 | ||
86 | def __init__(self): | |
84 | """Base class for generic output handlers.""" | |
85 | ||
86 | def __init__(self, prog=None): | |
87 | 87 | EventHandler.__init__(self) |
88 | 88 | self._runtimer = None |
89 | self._prog = prog if prog else os.path.basename(sys.argv[0]) | |
89 | 90 | |
90 | 91 | def runtimer_init(self, task, ntotal=0): |
91 | 92 | """Init timer for live command-completed progressmeter.""" |
92 | thandler = RunTimer(task, ntotal) | |
93 | thandler = RunTimer(task, ntotal, prog=self._prog) | |
93 | 94 | self._runtimer = task.timer(1.33, thandler, interval=1./3., |
94 | 95 | autoclose=True) |
95 | 96 | |
133 | 134 | class DirectOutputHandler(OutputHandler): |
134 | 135 | """Direct output event handler class.""" |
135 | 136 | |
136 | def __init__(self, display): | |
137 | OutputHandler.__init__(self) | |
137 | def __init__(self, display, prog=None): | |
138 | OutputHandler.__init__(self, prog=prog) | |
138 | 139 | self._display = display |
139 | 140 | |
140 | 141 | def ev_read(self, worker, node, sname, msg): |
148 | 149 | verb = VERB_QUIET |
149 | 150 | if self._display.maxrc: |
150 | 151 | verb = VERB_STD |
151 | self._display.vprint_err(verb, "clush: %s: " | |
152 | "exited with exit code %d" % (node, rc)) | |
152 | self._display.vprint_err(verb, "%s: %s: exited with exit code %d" % | |
153 | (self._prog, node, rc)) | |
153 | 154 | |
154 | 155 | def ev_close(self, worker, timedout): |
155 | 156 | if timedout: |
156 | 157 | nodeset = NodeSet._fromlist1(worker.iter_keys_timeout()) |
157 | 158 | self._display.vprint_err(VERB_QUIET, |
158 | "clush: %s: command timeout" % nodeset) | |
159 | "%s: %s: command timeout" % | |
160 | (self._prog, nodeset)) | |
159 | 161 | self.update_prompt(worker) |
160 | 162 | |
161 | 163 | class DirectProgressOutputHandler(DirectOutputHandler): |
179 | 181 | |
180 | 182 | class CopyOutputHandler(DirectProgressOutputHandler): |
181 | 183 | """Copy output event handler.""" |
182 | def __init__(self, display, reverse=False): | |
183 | DirectOutputHandler.__init__(self, display) | |
184 | def __init__(self, display, reverse=False, prog=None): | |
185 | DirectOutputHandler.__init__(self, display, prog=prog) | |
184 | 186 | self.reverse = reverse |
185 | 187 | |
186 | 188 | def ev_close(self, worker, timedout): |
203 | 205 | DirectOutputHandler.ev_close(self, worker, timedout) |
204 | 206 | |
205 | 207 | class GatherOutputHandler(OutputHandler): |
206 | """Gathered output event handler class (clush -b).""" | |
207 | ||
208 | def __init__(self, display): | |
209 | OutputHandler.__init__(self) | |
208 | """Gathered output event handler class (e.g. clush -b).""" | |
209 | ||
210 | def __init__(self, display, prog=None): | |
211 | OutputHandler.__init__(self, prog=prog) | |
210 | 212 | self._display = display |
211 | 213 | |
212 | 214 | def ev_read(self, worker, node, sname, msg): |
255 | 257 | nsdisp = ns = NodeSet._fromlist1(nodelist) |
256 | 258 | if self._display.verbosity > VERB_QUIET and len(ns) > 1: |
257 | 259 | nsdisp = "%s (%d)" % (ns, len(ns)) |
258 | msgrc = "clush: %s: exited with exit code %d" % (nsdisp, rc) | |
260 | msgrc = "%s: %s: exited with exit code %d" % (self._prog, nsdisp, rc) | |
259 | 261 | self._display.vprint_err(verbexit, msgrc) |
260 | 262 | |
261 | 263 | # Display nodes that didn't answer within command timeout delay |
262 | 264 | if worker.num_timeout() > 0: |
263 | self._display.vprint_err(verbexit, "clush: %s: command timeout" % \ | |
264 | NodeSet._fromlist1(worker.iter_keys_timeout())) | |
265 | self._display.vprint_err(verbexit, "%s: %s: command timeout" % \ | |
266 | (self._prog, NodeSet._fromlist1(worker.iter_keys_timeout()))) | |
265 | 267 | |
266 | 268 | class SortedOutputHandler(GatherOutputHandler): |
267 | """Sorted by node output event handler class (clush -L).""" | |
269 | """Sorted by node output event handler class (e.g. clush -L).""" | |
268 | 270 | |
269 | 271 | def ev_close(self, worker, timedout): |
270 | 272 | # Overrides GatherOutputHandler.ev_close() |
289 | 291 | class LiveGatherOutputHandler(GatherOutputHandler): |
290 | 292 | """Live line-gathered output event handler class (-bL).""" |
291 | 293 | |
292 | def __init__(self, display, nodes): | |
294 | def __init__(self, display, nodes, prog=None): | |
293 | 295 | assert nodes is not None, "cannot gather local command" |
294 | GatherOutputHandler.__init__(self, display) | |
296 | GatherOutputHandler.__init__(self, display, prog=prog) | |
295 | 297 | self._nodes = NodeSet(nodes) |
296 | 298 | self._nodecnt = dict.fromkeys(self._nodes, 0) |
297 | 299 | self._mtreeq = [] |
345 | 347 | |
346 | 348 | class RunTimer(EventHandler): |
347 | 349 | """Running progress timer event handler""" |
348 | def __init__(self, task, total): | |
350 | def __init__(self, task, total, prog=None): | |
349 | 351 | EventHandler.__init__(self) |
350 | 352 | self.task = task |
351 | 353 | self.total = total |
356 | 358 | # updated by worker handler for progress |
357 | 359 | self.start_time = 0 |
358 | 360 | self.bytes_written = 0 |
361 | self._prog = prog if prog else os.path.basename(sys.argv[0]) | |
359 | 362 | |
360 | 363 | def ev_timer(self, timer): |
361 | 364 | self.update() |
389 | 392 | if self.bytes_written > 0 or cnt != self.cnt_last: |
390 | 393 | self.cnt_last = cnt |
391 | 394 | # display completed/total clients |
392 | towrite = 'clush: %*d/%*d%s%s\r' % (self.tslen, self.total - cnt, | |
393 | self.tslen, self.total, gwinfo, | |
394 | wrbwinfo) | |
395 | towrite = '%s: %*d/%*d%s%s\r' % (self._prog, self.tslen, | |
396 | self.total - cnt, self.tslen, | |
397 | self.total, gwinfo, wrbwinfo) | |
395 | 398 | self.wholelen = len(towrite) |
396 | 399 | sys.stderr.write(towrite) |
397 | 400 | self.started = True |
402 | 405 | return |
403 | 406 | self.erase_line() |
404 | 407 | # display completed/total clients |
405 | fmt = 'clush: %*d/%*d' | |
408 | fmt = '%s: %*d/%*d' | |
406 | 409 | if force_cr: |
407 | 410 | fmt += '\n' |
408 | 411 | else: |
409 | 412 | fmt += '\r' |
410 | sys.stderr.write(fmt % (self.tslen, self.total, self.tslen, self.total)) | |
413 | sys.stderr.write(fmt % (self._prog, self.tslen, self.total, self.tslen, | |
414 | self.total)) | |
411 | 415 | |
412 | 416 | |
413 | 417 | def signal_handler(signum, frame): |
54 | 54 | """ |
55 | 55 | Return the class pointer matching `workername`. |
56 | 56 | |
57 | This can be the 'short' name (such as `ssh`) or a fully-qualified | |
58 | module path (such as ClusterShell.Worker.Ssh). | |
59 | ||
57 | 60 | The module is loaded if not done yet. |
58 | 61 | """ |
59 | modname = "ClusterShell.Worker.%s" % workername.capitalize() | |
60 | ||
62 | ||
63 | # First try the worker name as a module under ClusterShell.Worker, | |
64 | # but if that fails, try the worker name directly | |
65 | try: | |
66 | modname = "ClusterShell.Worker.%s" % workername.capitalize() | |
67 | _import_module(modname) | |
68 | except ImportError: | |
69 | modname = workername | |
70 | _import_module(modname) | |
71 | ||
72 | # Get the class pointer | |
73 | return sys.modules[modname].WORKER_CLASS | |
74 | ||
75 | def _import_module(modname): | |
76 | """Import a python module if not done yet.""" | |
61 | 77 | # Iterate over a copy of sys.modules' keys to avoid RuntimeError |
62 | 78 | if modname.lower() not in [mod.lower() for mod in list(sys.modules)]: |
63 | 79 | # Import module if not yet loaded |
64 | 80 | __import__(modname) |
65 | ||
66 | # Get the class pointer | |
67 | return sys.modules[modname].WORKER_CLASS | |
68 | 81 | |
69 | 82 | def _local_workerclass(defaults): |
70 | 83 | """Return default local worker class.""" |
253 | 253 | task = task_self() |
254 | 254 | task._info.update(taskinfo) |
255 | 255 | task.set_info('print_debug', _gw_print_debug) |
256 | ||
257 | for infokey in taskinfo: | |
258 | if infokey.startswith('tree_default:'): | |
259 | self.logger.debug('Setting default %s to %s', infokey[13:], taskinfo[infokey]) | |
260 | task.set_default(infokey[13:], taskinfo[infokey]) | |
256 | 261 | |
257 | 262 | if task.info('debug'): |
258 | 263 | self.logger.setLevel(logging.DEBUG) |
444 | 444 | result = [] |
445 | 445 | assert source |
446 | 446 | raw = getattr(source, 'resolv_%s' % what)(*args) |
447 | if isinstance(raw, list): | |
448 | raw = ','.join(raw) | |
447 | 449 | for line in raw.splitlines(): |
448 | 450 | [result.append(x) for x in line.strip().split()] |
449 | 451 | return result |
955 | 955 | for rgvec in self._veclist: |
956 | 956 | iveclist += product(*rgvec) |
957 | 957 | assert(len(iveclist) == len(self)) |
958 | rnd = RangeSetND(iveclist[index], | |
959 | pads=[rg.padding for rg in self._veclist[0]], | |
958 | rnd = RangeSetND(iveclist[index], pads=self.pads(), | |
960 | 959 | autostep=self.autostep) |
961 | 960 | return rnd |
962 | 961 |
57 | 57 | basestring = str |
58 | 58 | |
59 | 59 | from ClusterShell.Defaults import config_paths, DEFAULTS |
60 | from ClusterShell.Defaults import _local_workerclass, _distant_workerclass | |
60 | from ClusterShell.Defaults import _local_workerclass, _distant_workerclass, _load_workerclass | |
61 | 61 | from ClusterShell.Engine.Engine import EngineAbortException |
62 | 62 | from ClusterShell.Engine.Engine import EngineTimeoutException |
63 | 63 | from ClusterShell.Engine.Engine import EngineAlreadyRunningError |
469 | 469 | self._default_lock.acquire() |
470 | 470 | try: |
471 | 471 | self._default[default_key] = value |
472 | if default_key == 'local_workername': | |
473 | self._default['local_worker'] = _load_workerclass(value) | |
474 | elif default_key == 'distant_workername': | |
475 | self._default['distant_worker'] = _load_workerclass(value) | |
472 | 476 | finally: |
473 | 477 | self._default_lock.release() |
474 | 478 | |
509 | 513 | - "command_timeout": Time in seconds to wait for a command to |
510 | 514 | complete before aborting (default: 0, which means |
511 | 515 | unlimited). |
516 | - "tree_default:<key>": In tree mode, overrides the key <key> | |
517 | in Defaults (settings normally set in defaults.conf) | |
512 | 518 | |
513 | 519 | Threading considerations |
514 | 520 | ======================== |
25 | 25 | |
26 | 26 | import os |
27 | 27 | import shlex |
28 | import re | |
28 | 29 | |
29 | 30 | from ClusterShell.Worker.Exec import ExecClient, CopyClient, ExecWorker |
30 | 31 | |
33 | 34 | """ |
34 | 35 | Rsh EngineClient. |
35 | 36 | """ |
37 | ||
38 | def __init__(self, node, command, worker, stderr, timeout, autoclose=False, | |
39 | rank=None): | |
40 | ExecClient.__init__(self, node, command, worker, stderr, timeout, | |
41 | autoclose, rank) | |
42 | self.rsh_rc = None | |
36 | 43 | |
37 | 44 | def _build_cmd(self): |
38 | 45 | """ |
58 | 65 | cmd_l.append("%s" % self.key) # key is the node |
59 | 66 | cmd_l.append("%s" % self.command) |
60 | 67 | |
68 | # rsh does not properly return exit status | |
69 | # force the exit status to be printed out | |
70 | cmd_l.append("; echo XXRETCODE: $?") | |
71 | ||
61 | 72 | return (cmd_l, None) |
73 | ||
74 | def _on_nodeset_msgline(self, nodes, msg, sname): | |
75 | """Override _on_nodeset_msgline to parse magic return code""" | |
76 | match = re.search("^XXRETCODE: (\d+)$", msg.decode()) | |
77 | if match: | |
78 | self.rsh_rc = int(match.group(1)) | |
79 | else: | |
80 | ExecClient._on_nodeset_msgline(self, nodes, msg, sname) | |
81 | ||
82 | def _on_nodeset_close(self, nodes, rc): | |
83 | """Override _on_nodeset_close to return rsh_rc""" | |
84 | if (rc == 0 or rc == 1) and self.rsh_rc is not None: | |
85 | rc = self.rsh_rc | |
86 | ExecClient._on_nodeset_close(self, nodes, rc) | |
62 | 87 | |
63 | 88 | |
64 | 89 | class RcpClient(CopyClient): |
124 | 124 | |
125 | 125 | if self.reverse: |
126 | 126 | if user: |
127 | cmd_l.append("%s@%s:%s" % (user, self.key, self.source)) | |
127 | cmd_l.append("%s@[%s]:%s" % (user, self.key, self.source)) | |
128 | 128 | else: |
129 | cmd_l.append("%s:%s" % (self.key, self.source)) | |
129 | cmd_l.append("[%s]:%s" % (self.key, self.source)) | |
130 | 130 | |
131 | 131 | cmd_l.append(os.path.join(self.dest, "%s.%s" % \ |
132 | 132 | (os.path.basename(self.source), self.key))) |
133 | 133 | else: |
134 | 134 | cmd_l.append(self.source) |
135 | 135 | if user: |
136 | cmd_l.append("%s@%s:%s" % (user, self.key, self.dest)) | |
136 | cmd_l.append("%s@[%s]:%s" % (user, self.key, self.dest)) | |
137 | 137 | else: |
138 | cmd_l.append("%s:%s" % (self.key, self.dest)) | |
138 | cmd_l.append("[%s]:%s" % (self.key, self.dest)) | |
139 | 139 | |
140 | 140 | return (cmd_l, None) |
141 | 141 |
279 | 279 | tree=False) |
280 | 280 | else: |
281 | 281 | assert self.source is None |
282 | worker = ExecWorker(nodes=targets, | |
283 | command=self.command, | |
284 | handler=self.metahandler, | |
285 | timeout=self.timeout, | |
286 | stderr=self.stderr) | |
282 | workerclass = self.task.default('local_worker') | |
283 | worker = workerclass(nodes=targets, | |
284 | command=self.command, | |
285 | handler=self.metahandler, | |
286 | timeout=self.timeout, | |
287 | stderr=self.stderr) | |
287 | 288 | self.task.schedule(worker) |
288 | 289 | |
289 | 290 | self.workers.append(worker) |
27 | 27 | doc/man/man5/clush.conf.5 |
28 | 28 | doc/man/man5/groups.conf.5 |
29 | 29 | doc/sphinx/Makefile |
30 | doc/sphinx/clustershell-nautilus-logo200.png | |
31 | 30 | doc/sphinx/conf.py |
32 | 31 | doc/sphinx/config.rst |
33 | 32 | doc/sphinx/further.rst |
2 | 2 | |
3 | 3 | """Unit test for ClusterShell.Defaults""" |
4 | 4 | |
5 | import os | |
6 | import sys | |
7 | import shutil | |
8 | ||
5 | 9 | from textwrap import dedent |
6 | 10 | import unittest |
7 | 11 | |
8 | from TLib import make_temp_file | |
12 | from TLib import make_temp_file, make_temp_dir | |
9 | 13 | |
10 | 14 | from ClusterShell.Defaults import Defaults, _task_print_debug |
11 | 15 | |
97 | 101 | self.assertTrue(task.default("distant_worker") is WorkerSsh) |
98 | 102 | task_terminate() |
99 | 103 | |
104 | dname = make_temp_dir() | |
105 | modfile = open(os.path.join(dname, 'OutOfTree.py'), 'w') | |
106 | modfile.write(dedent(""" | |
107 | class OutOfTreeWorker(object): | |
108 | pass | |
109 | WORKER_CLASS = OutOfTreeWorker""")) | |
110 | modfile.flush() | |
111 | modfile.close() | |
112 | sys.path.append(dname) | |
113 | self.defaults.distant_workername = 'OutOfTree' | |
114 | task = task_self(self.defaults) | |
115 | self.assertTrue(task.default("distant_worker").__name__ is 'OutOfTreeWorker') | |
116 | task_terminate() | |
117 | shutil.rmtree(dname, ignore_errors=True) | |
118 | ||
100 | 119 | def test_005_misc_value_errors(self): |
101 | 120 | """test Defaults misc value errors""" |
102 | 121 | task_terminate() |
1588 | 1588 | self.assertRaises(GroupResolverConfigError, YAMLGroupLoader, f.name) |
1589 | 1589 | |
1590 | 1590 | |
1591 | def test_list_group(self): | |
1592 | f = make_temp_file(dedent(""" | |
1593 | rednecks: | |
1594 | bubba: | |
1595 | - pickup-1 | |
1596 | - pickup-2 | |
1597 | - tractor-[1-2]""").encode('ascii')) | |
1598 | loader = YAMLGroupLoader(f.name) | |
1599 | sources = list(loader) | |
1600 | resolver = GroupResolver(sources[0]) | |
1601 | self.assertEqual(resolver.group_nodes('bubba'), | |
1602 | [ 'pickup-1,pickup-2,tractor-[1-2]' ]) | |
1603 | ||
1591 | 1604 | class GroupResolverYAMLTest(unittest.TestCase): |
1592 | 1605 | |
1593 | 1606 | def setUp(self): |
431 | 431 | # steps |
432 | 432 | self.assertEqual(str(rn1[0:12:2]), "0-3; 1\n10; 10,12\n") |
433 | 433 | self.assertEqual(str(rn1[1:12:2]), "0-3; 2\n10; 11,13\n") |
434 | # GitHub #429 | |
435 | rn1 = RangeSetND([["110", "15-16"], ["107", "06"]]) | |
436 | self.assertEqual(str(rn1[0:3:2]), "107; 06\n110; 15\n") | |
434 | 437 | |
435 | 438 | def test_contiguous(self): |
436 | 439 | rn0 = RangeSetND() |
189 | 189 | self.assertEqual(teh.ev_timedout_cnt, 0) |
190 | 190 | self.assertEqual(teh.ev_close_cnt, 1) |
191 | 191 | self.assertEqual(teh.last_read, NODE_DISTANT.encode('ascii')) |
192 | ||
193 | def test_tree_run_noremote_alt_localworker(self): | |
194 | """test tree run with remote=False and a non-exec localworker""" | |
195 | teh = TEventHandler() | |
196 | self.task.set_info('tree_default:local_workername', 'ssh') | |
197 | self.task.run('echo %h', nodes=NODE_DISTANT, handler=teh, remote=False) | |
198 | self.assertEqual(teh.ev_start_cnt, 1) | |
199 | self.assertEqual(teh.ev_pickup_cnt, 1) | |
200 | self.assertEqual(teh.ev_read_cnt, 1) | |
201 | self.assertEqual(teh.ev_written_cnt, 0) | |
202 | self.assertEqual(teh.ev_hup_cnt, 1) | |
203 | self.assertEqual(teh.ev_timedout_cnt, 0) | |
204 | self.assertEqual(teh.ev_close_cnt, 1) | |
205 | # The exec worker will expand %h to the host, but ssh will just echo '%h' | |
206 | self.assertEqual(teh.last_read, '%h'.encode('ascii')) | |
207 | del self.task._info['tree_default:local_workername'] | |
192 | 208 | |
193 | 209 | def test_tree_run_direct(self): |
194 | 210 | """test tree run with direct target, in topology""" |