Skip to content

Commit 2eb1c79

Browse files
committed
🐛 bug fixes. Arrays should now show up correctly
1 parent 2b735e7 commit 2eb1c79

File tree

2 files changed

+95
-35
lines changed

2 files changed

+95
-35
lines changed

stui/backend.py

+65-24
Original file line numberDiff line numberDiff line change
@@ -102,11 +102,13 @@ def _thread_fn(self):
102102

103103
sleep(1)
104104

105-
except (
106-
EOFError,
107-
OSError,
108-
socket.error,
109-
):
105+
except:
106+
# except (
107+
# EOFError,
108+
# OSError,
109+
# socket.error,
110+
# ):
111+
# TODO implement a proper reconnect state with timeout?
110112
# TODO:: Where's the best place to do error handing
111113
with self.lock:
112114
self.fabric_connection = fabric.Connection(self.remote)
@@ -150,13 +152,23 @@ def _get_partition_info(self):
150152
return my_p, all_p
151153

152154
def _get_jobs(self):
153-
cmd = 'squeue --all --format="%A|%C|%b|%F|%K|%j|%P|%r|%u|%y|%T|%M|%b|%N"'
155+
# %A : Job id. This will have a unique value for each element of job arrays. (Valid for jobs only)
156+
# %F : Job array's job ID. This is the base job ID. For non-array jobs, this is the job ID. (Valid for jobs only)
157+
# %i : Job or job step id. In the case of job arrays, the job ID format will be of the form "<base_job_id>_<index>"
158+
# %K : Job array index
159+
cmd = r'squeue --all --format="%A|%i|%K|%F|%C|%b|%j|%P|%r|%u|%y|%T|%M|%b|%N"'
154160
o = self._run_command(cmd)
155161

156162
jobs = []
157163
fields = o[0].split("|")
164+
165+
# squeue gives the same column name (JOBID) to %A and %i so we'd have to
166+
# manually differentiate them here
167+
fields[1] = "JOB_ID_COMBINED"
168+
158169
for line in o[1:]:
159170
job = {k: v for k, v in zip(fields, line.split("|"))}
171+
job["whole_line"] = line
160172
jobs.append(Job(job))
161173

162174
return jobs
@@ -194,36 +206,65 @@ def cancel_my_oldest_job(self):
194206

195207

196208
class Job(object):
197-
def __init__(self, string):
209+
def __init__(self, d: dict):
198210
super().__init__()
199211

200-
self.job_id = string["JOBID"]
201-
self.nodes = string["NODELIST"].split(",")
202-
self.partition = string["PARTITION"]
203-
self.name = string["NAME"]
204-
self.user = string["USER"]
205-
self.state = string["STATE"]
206-
self.time = string["TIME"]
207-
self.nice = string["NICE"]
208-
self.cpus = string["CPUS"]
209-
self.gres = string["GRES"] if "GRES" in string else None
212+
self.job_id = d["JOBID"]
213+
self.job_id_combined = d["JOB_ID_COMBINED"]
214+
self.nodes = d["NODELIST"].split(",")
215+
self.partition = d["PARTITION"]
216+
self.name = d["NAME"]
217+
self.user = d["USER"]
218+
self.state = d["STATE"]
219+
self.time = d["TIME"]
220+
self.nice = d["NICE"]
221+
self.cpus = d["CPUS"]
222+
self.gres = d["GRES"] if "GRES" in d else None
210223

211-
self.array_base_id = string["ARRAY_JOB_ID"]
212-
self.array_task_id = string["ARRAY_TASK_ID"]
224+
self.whole_line = d["whole_line"]
225+
226+
self.array_base_id = d["ARRAY_JOB_ID"]
227+
self.array_task_id = d["ARRAY_TASK_ID"]
213228

214229
self.is_array_job = False if self.array_task_id == "N/A" else True
215230

216-
if self.is_array_job and "%" in self.array_task_id:
217-
match = re.search(r"\d+%(\d+)", self.array_task_id)
218-
self.array_throttle = match.group(1)
219-
else:
220-
self.array_throttle = None
231+
if self.is_array_job:
232+
if self.is_pending():
233+
if "%" in self.array_task_id:
234+
match = re.search(r"-(\d+)%(\d+)$", self.array_task_id)
235+
self.array_total_jobs = match.group(2)
236+
self.array_throttle = match.group(2)
237+
else:
238+
match = re.search(r"-(\d+)$", self.array_task_id)
239+
self.array_total_jobs = match.group(1)
240+
self.array_throttle = 0 # TODO: 0 means unlimited. Is this good?
241+
else:
242+
self.array_throttle = None
243+
self.array_total_jobs = None
244+
245+
# if self.is_array_job and "%" in self.array_task_id:
246+
# match = re.search(r"\d+%(\d+)", self.array_task_id)
247+
# self.array_throttle = match.group(1)
248+
# else:
249+
# self.array_throttle = None
221250

222251
def __repr__(self):
223252
return f"Job {self.job_id} - State{self.state}"
224253

225254
def is_running(self):
226255
return self.state == "RUNNING"
227256

257+
def is_pending(self):
258+
return self.state == "PENDING"
259+
228260
def uses_gpu(self):
229261
return "gpu" in self.gres
262+
263+
def is_array_job_f(self):
264+
return self.is_array_job # TODO: use property?
265+
266+
def array_str(self):
267+
if not self.is_array_job:
268+
return ""
269+
else:
270+
return self.array_task_id

stui/stui.py

+30-11
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ class JobWidget(urwid.WidgetWrap):
1919
"CANCELLED": {None: ""},
2020
"COMPLETED": {None: ""},
2121
"CONFIGURING": {None: ""},
22-
"COMPLETING": {None: ""},
22+
"COMPLETING": {None: "job_state_completeing"},
2323
"DEADLINE": {None: ""},
2424
"FAILED": {None: ""},
2525
"NODE FAIL": {None: ""},
@@ -46,6 +46,7 @@ def __init__(self, job):
4646
self.columns = OrderedDict()
4747
self.columns["selected"] = urwid.Text("", wrap="ellipsis")
4848
self.columns["job_id"] = urwid.Text("", wrap="ellipsis")
49+
self.columns["array"] = urwid.Text("", wrap="ellipsis")
4950
self.columns["user"] = urwid.Text("", wrap="ellipsis")
5051
self.columns["name"] = urwid.Text("", wrap="ellipsis")
5152
self.columns["state"] = urwid.AttrMap(urwid.Text("", wrap="ellipsis"), None)
@@ -73,6 +74,7 @@ def __init__(self, job):
7374

7475
def update_values(self, job):
7576
self.columns["job_id"].set_text(job.job_id)
77+
self.columns["array"].set_text(job.array_str())
7678
self.columns["user"].set_text(job.user)
7779
self.columns["name"].set_text(job.name)
7880
self.columns["state"]._original_widget.set_text(job.state.title())
@@ -91,7 +93,12 @@ def set_selected_attr(self, in_focus):
9193
attr = "highlight_out_of_focus"
9294

9395
self._w.set_focus_map(
94-
{None: attr, "job_state_running": attr, "job_state_pending": attr}
96+
{
97+
None: attr,
98+
"job_state_running": attr,
99+
"job_state_pending": attr,
100+
"job_state_completeing": attr,
101+
}
95102
)
96103

97104
def keypress(self, size, key):
@@ -118,6 +125,7 @@ class JobQueueWidget(urwid.WidgetWrap):
118125
column_widths = [
119126
(2,),
120127
(10,),
128+
(10,),
121129
("weight", 1),
122130
("weight", 2),
123131
(14,),
@@ -133,6 +141,7 @@ def __init__(self):
133141
column_labels = [
134142
"",
135143
"Job ID",
144+
"Job Array",
136145
"User",
137146
"Name",
138147
"State",
@@ -298,9 +307,15 @@ def __init__(self):
298307

299308
super().__init__(w)
300309

301-
def set_nice(self, value):
310+
def set_nice_value(self, value):
302311
self.nice_spinbutton.set_value(value)
303312

313+
def enable_nice(self):
314+
self.nice_spinbutton.enable()
315+
316+
def disable_nice(self):
317+
self.nice_spinbutton.disable()
318+
304319
def enable_throttle(self):
305320
self.throttle_spinbutton.enable()
306321

@@ -384,13 +399,16 @@ def on_jobs_focus_changed(self):
384399
if job is None:
385400
return
386401

387-
self.apanel.set_nice(job.nice)
402+
self.apanel.set_nice_value(job.nice)
388403

389-
if job.array_throttle is not None:
390-
self.apanel.enable_throttle()
391-
self.apanel.set_throttle_value(job.array_throttle)
392-
else:
404+
if job.is_running():
405+
self.apanel.disable_nice()
393406
self.apanel.disable_throttle()
407+
else:
408+
self.apanel.enable_nice()
409+
if job.is_array_job_f():
410+
self.apanel.enable_throttle()
411+
self.apanel.set_throttle_value(job.array_throttle)
394412

395413
def filter_jobs(self, jobs):
396414

@@ -706,6 +724,7 @@ def __init__(self, args):
706724
self.palette = [
707725
("job_state_running", "light cyan", ""),
708726
("job_state_pending", "yellow", ""),
727+
("job_state_completeing", "light magenta", ""),
709728
("active_tab_label", "yellow", ""),
710729
("focus_and_active_tab_label", "yellow,underline", ""),
711730
("focus_and_inactive_tab_label", "underline", ""),
@@ -748,13 +767,13 @@ def exit(*args, **kwargs):
748767

749768
elif message == b"connection established":
750769
self.fd = None # TODO: delattr?
751-
self.register_refresh()
752-
self.w.cluster_connected_callback()
770+
self.register_refresh()
771+
self.w.cluster_connected_callback()
753772

754773
# Return False so that the watch is removed from main loop and its read-end
755774
# of the pipe is closed. The write-end of the pipe will be closed on the
756775
# backend.
757-
return False
776+
return False
758777

759778
def run(self):
760779
# self.loop.screen.set_terminal_properties(bright_is_bold=False)

0 commit comments

Comments
 (0)