Page History
...
Code Block |
---|
import glob logs = glob.glob('iter2/*.log') logs.sort() # put them in time order nodes = [] ontimes = [] offtimes = [] print(logs) for log in logs: f = open(log,'r')onnodes = [] offnodes = [] #print('***',logs) def nodecount(nodelist): onuniquenodes = Falseset(nodelist) for linen in funiquenodes: if '***' in print(n,nodelist.count(n)) for log in logs: f = open(log,'r') on = False for line in f: if '***' in line: if 'cson' in line: on=True node = line.split()[1] if 'real' in line: timestr = line.split()[1] hours_minutes = timestr.split('m') minutes = float(hours_minutes[0]) seconds = float(hours_minutes[1][:-1]) time = minutes*60+seconds #if node in nodes: # print('skipping duplicate node',node) # continue nodes.append(node) if on: ontimes.append(time) onnodes.append(node) else: offtimes.append(time) offnodes.append(node) import numpy as np mean = [] err_on_mean = [] for times in [offtimes,ontimes]: print#print(times) mean.append(np.mean(times)) err_on_mean.append(np.std(times)/np.sqrt(len(times))) diff_err = np.sqrt(err_on_mean[0]**2+err_on_mean[1]**2) diff = mean[1]-mean[0] print('Fractional change:',diff/mean[0],'+-',diff_err/mean[0]) import matplotlib*** offnodes job count:') nodecount(offnodes) print('*** onnodes job count:') nodecount(onnodes) print('Fractional change:',diff/mean[0],'+-',diff_err/mean[0]) import matplotlib.pyplot as plt plt.hist([ontimes,offtimes]) plt.show() |
...
*** offnodes job count:
sdfrome042 48
sdfrome043 14
sdfrome111 1
sdfrome039 27
sdfrome086 10
*** onnodes job count:
sdfrome016 100
Fractional change: 0.2359417044882193 +- 0.015870310667490246
Update 2024-09-15
We repeated the test on roma partition, 105 iterations each with constraint Crowdstrike_on/Crowdstrike_off alternating. This test was performed during a period of low utilization of the rome partition with no competing network or storage contention.
Measured runtime for psana analysis of mfxl1028222 run=29:smd on exclusive node with 120 cores.
Note: the previous measurements were done with run=90:smd. We chose run=29:smd, because it has more events and therefore takes longer, minimizing effects related to job startup.
Fractional change: 0.24461288024797354 +- 0.001079561972505891