Page History
...
where parameter defines test for different number of CPUs, e.g. <parameter> = −1,−2,−8,−13,−16,−17,−18 stands for test on single, 8, 16, 32, 56, 64, 128 CPUs.
Code Block | ||||
---|---|---|---|---|
| ||||
import numpy as np
from time import time, sleep
def random_standard(shape=(40,60), mu=200, sigma=25, dtype=np.float64):
a = mu + sigma*np.random.standard_normal(shape)
return np.require(a, dtype)
def random_arrays(sh2d = (8*512,1024), dtype=np.float64):
sh3d = (3,) + sh2d
return random_standard(shape=sh2d, mu=10, sigma=2, dtype=dtype),\
random_standard(shape=sh3d, mu=20, sigma=3, dtype=dtype)
def time_consuming_algorithm():
t01 = time()
a, b = random_arrays()
t02 = time()
gr1 = a>=11
gr2 = (a>9) & (a<11)
gr3 = a<=9
t03 = time()
a[gr1] -= b[0, gr1]
a[gr2] -= b[1, gr2]
a[gr3] -= b[2, gr3]
t04 = time()
return (t01, t02, t03, t04) |
Code Block | ||||
---|---|---|---|---|
| ||||
def do_algo(cpu=0, cmt='v0'):
hostname = get_hostname()
#cpu_num = psutil.Process().cpu_num()
print('requested cpu:%03d' % cpu)
SAVE_FIGS = True
SHOW_FIGS = False
nevents = 100
ntpoints = 6
arrts = np.zeros((nevents,ntpoints), dtype=np.float64)
t05_old = time()
for nevt in range(nevents):
t00 = time()
times = time_consuming_algorithm()
cpu_num = psutil.Process().cpu_num()
#if cpu_num >=16 and cpu_num <=23:
# print('cpu_num:%03d nevt:%03d time:%.6f CPU_NUM IN WEKA RANGE [16,23]' % (cpu_num, nevt, dt_sec))
t05 = time()
times = (t00,) + times + (t05,)
arrts[nevt,:] = times
dt_evt = t05 - t05_old
t05_old = t05
if nevt%10>0: continue
dt_alg = times[4] - times[3]
dt_in = times[4] - times[1]
print('cpu_num:%03d nevt:%03d times (sec)' % (cpu_num, nevt), \
' random arrs: %.6f' % (times[2] - times[1]), \
' indeces: %.6f' % (times[3] - times[2]), \
' alg: %.6f' % (times[4] - times[3]), \
' inside algo: %.6f' % (times[4] - times[1]), \
' per event: %.6f' % dt_evt)
...
further code is ffor saving results and graphics |
Results
Code Block | ||||
---|---|---|---|---|
| ||||
ana-4.0.59-py3 [dubrovin@sdfmilan216:~/LCLS/con-py3]$ 1 CPU ====== Performance counter stats for 'python test-scaling-subproc.py -1': 4,522,410,200 cache-references:u (62.49%) 112,207,635 cache-misses:u # 2.481 % of all cache refs (62.51%) 224,402,878,245 cycles:u (62.51%) 428,582,543,872 instructions:u # 1.91 insn per cycle (62.51%) 59,430,436,824 branches:u (62.50%) 2,353,206,592 branch-misses:u # 3.96% of all branches (62.50%) 657,277 faults:u 0 migrations:u 657,277 page-faults:u 2,169,783,808 L1-dcache-load-misses:u (62.50%) 7,173,374 L1-icache-load-misses:u (62.50%) 70.762930452 seconds time elapsed 66.918003000 seconds user 2.380196000 seconds sys 8 CPU ====== Performance counter stats for 'python test-scaling-subproc.py -2': 35,293,654,947 cache-references:u (62.50%) 675,772,563 cache-misses:u # 1.915 % of all cache refs (62.50%) 1,863,835,416,629 cycles:u (62.50%) 3,408,694,078,315 instructions:u # 1.83 insn per cycle (62.50%) 470,729,321,611 branches:u (62.50%) 18,710,029,709 branch-misses:u # 3.97% of all branches (62.50%) 4,759,204 faults:u 0 migrations:u 4,759,204 page-faults:u 17,164,781,068 L1-dcache-load-misses:u (62.50%) 42,407,266 L1-icache-load-misses:u (62.50%) 82.107165073 seconds time elapsed 600.726489000 seconds user 28.169314000 seconds sys 16 CPU ====== Performance counter stats for 'python test-scaling-subproc.py -8': 71,125,012,043 cache-references:u (62.50%) 2,509,743,885 cache-misses:u # 3.529 % of all cache refs (62.50%) 4,256,512,072,612 cycles:u (62.50%) 6,815,210,853,848 instructions:u # 1.60 insn per cycle (62.50%) 940,797,592,651 branches:u (62.50%) 37,401,077,277 branch-misses:u # 3.98% of all branches (62.50%) 9,874,603 faults:u 0 migrations:u 9,874,603 page-faults:u 34,764,585,133 L1-dcache-load-misses:u (62.50%) 82,908,203 L1-icache-load-misses:u (62.50%) 98.180409648 seconds time elapsed 1370.175346000 seconds user 121.864448000 seconds sys 32 CPU ====== Performance counter stats for 'python test-scaling-subproc.py -13': 140,229,421,945 cache-references:u (62.50%) 5,022,345,750 cache-misses:u # 3.582 % of all cache refs (62.50%) 8,558,410,936,114 cycles:u (62.50%) 13,628,360,184,584 instructions:u # 1.59 insn per cycle (62.50%) 1,881,291,550,548 branches:u (62.50%) 74,783,808,615 branch-misses:u # 3.98% of all branches (62.50%) 19,579,143 faults:u 0 migrations:u 19,579,143 page-faults:u 68,615,480,748 L1-dcache-load-misses:u (62.50%) 163,094,161 L1-icache-load-misses:u (62.50%) 99.279801084 seconds time elapsed 2763.979749000 seconds user 246.852789000 seconds sys 56 CPU ====== Performance counter stats for 'python test-scaling-subproc.py -16': 245,664,589,385 cache-references:u (62.50%) 5,986,128,102 cache-misses:u # 2.437 % of all cache refs (62.50%) 13,462,198,820,573 cycles:u (62.50%) 23,847,765,747,744 instructions:u # 1.77 insn per cycle (62.50%) 3,290,927,488,525 branches:u (62.50%) 130,897,170,304 branch-misses:u # 3.98% of all branches (62.50%) 35,494,247 faults:u 0 migrations:u 35,494,247 page-faults:u 119,933,873,577 L1-dcache-load-misses:u (62.50%) 288,403,921 L1-icache-load-misses:u (62.50%) 108.453630713 seconds time elapsed 5381.177612000 seconds user 333.903330000 seconds sys 64 CPU ====== Performance counter stats for 'python test-scaling-subproc.py -17': 281,639,175,978 cache-references:u (62.50%) 8,968,404,974 cache-misses:u # 3.184 % of all cache refs (62.50%) 16,140,364,752,053 cycles:u (62.50%) 27,256,133,511,829 instructions:u # 1.69 insn per cycle (62.50%) 3,761,710,111,186 branches:u (62.50%) 149,569,155,086 branch-misses:u # 3.98% of all branches (62.50%) 39,148,442 faults:u 0 migrations:u 39,148,442 page-faults:u 137,584,278,754 L1-dcache-load-misses:u (62.50%) 330,750,296 L1-icache-load-misses:u (62.50%) 120.688547006 seconds time elapsed 6274.688233000 seconds user 484.406164000 seconds sys 120 CPU ======= Performance counter stats for 'python test-scaling-subproc.py -18': 532,229,037,371 cache-references:u (62.50%) 14,227,944,434 cache-misses:u # 2.673 % of all cache refs (62.50%) 29,404,359,241,173 cycles:u (62.50%) 51,095,884,028,391 instructions:u # 1.74 insn per cycle (62.50%) 7,053,547,766,317 branches:u (62.50%) 280,479,284,507 branch-misses:u # 3.98% of all branches (62.50%) 73,250,012 faults:u 0 migrations:u 73,250,012 page-faults:u 260,078,672,869 L1-dcache-load-misses:u (62.50%) 618,858,635 L1-icache-load-misses:u (62.50%) 119.736692035 seconds time elapsed 11628.275939000 seconds user 843.423292000 seconds sys |
...
Overview
Content Tools