Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

where parameter defines test for different number of CPUs, e.g. <parameter> = −1,−2,−8,−13,−16,−17,−18 stands for test on single, 8, 16, 32, 56, 64, 128 CPUs.

Code Block
titleCode of time_consuming_algorithm
collapsetrue
import numpy as np
from time import time, sleep

def random_standard(shape=(40,60), mu=200, sigma=25, dtype=np.float64):
    a = mu + sigma*np.random.standard_normal(shape)
    return np.require(a, dtype)

def random_arrays(sh2d = (8*512,1024), dtype=np.float64):
    sh3d = (3,) + sh2d
    return random_standard(shape=sh2d, mu=10, sigma=2, dtype=dtype),\
           random_standard(shape=sh3d, mu=20, sigma=3, dtype=dtype)

def time_consuming_algorithm():
    t01 = time()
    a, b = random_arrays()
    t02 = time()
    gr1 = a>=11
    gr2 = (a>9) & (a<11)
    gr3 = a<=9
    t03 = time()
    a[gr1] -= b[0, gr1]
    a[gr2] -= b[1, gr2]
    a[gr3] -= b[2, gr3]
    t04 = time()
    return (t01, t02, t03, t04)
Code Block
titleCode of the event loop
collapsetrue
def do_algo(cpu=0, cmt='v0'):

    hostname = get_hostname()
    #cpu_num = psutil.Process().cpu_num()
    print('requested cpu:%03d' % cpu)

    SAVE_FIGS = True
    SHOW_FIGS = False
    nevents = 100
    ntpoints = 6
    arrts = np.zeros((nevents,ntpoints), dtype=np.float64)
    t05_old = time()

    for nevt in range(nevents):
        t00 = time()
        times = time_consuming_algorithm()
        cpu_num = psutil.Process().cpu_num()
        #if cpu_num >=16 and cpu_num <=23:
        #    print('cpu_num:%03d nevt:%03d time:%.6f CPU_NUM IN WEKA RANGE [16,23]' % (cpu_num, nevt, dt_sec))
        t05 = time()
        times = (t00,) + times + (t05,)
        arrts[nevt,:] = times
        dt_evt = t05 - t05_old
        t05_old = t05
        if nevt%10>0: continue
        dt_alg = times[4] - times[3]
        dt_in  = times[4] - times[1]
        print('cpu_num:%03d nevt:%03d  times (sec)' % (cpu_num, nevt), \
             ' random arrs: %.6f' % (times[2] - times[1]), \
             ' indeces: %.6f'     % (times[3] - times[2]), \
             ' alg: %.6f'         % (times[4] - times[3]), \
             ' inside algo: %.6f' % (times[4] - times[1]), \
             ' per event: %.6f'   % dt_evt)
...
further code is ffor saving results and graphics


Results

Code Block
titleResults of the test for 1,8,16,32,56,64,128 CPU
collapsetrue
ana-4.0.59-py3 [dubrovin@sdfmilan216:~/LCLS/con-py3]$


1 CPU
======
 Performance counter stats for 'python test-scaling-subproc.py -1':

     4,522,410,200      cache-references:u                                            (62.49%)
       112,207,635      cache-misses:u            #    2.481 % of all cache refs      (62.51%)
   224,402,878,245      cycles:u                                                      (62.51%)
   428,582,543,872      instructions:u            #    1.91  insn per cycle           (62.51%)
    59,430,436,824      branches:u                                                    (62.50%)
     2,353,206,592      branch-misses:u           #    3.96% of all branches          (62.50%)
           657,277      faults:u                                                    
                 0      migrations:u                                                
           657,277      page-faults:u                                               
     2,169,783,808      L1-dcache-load-misses:u                                       (62.50%)
         7,173,374      L1-icache-load-misses:u                                       (62.50%)

      70.762930452 seconds time elapsed

      66.918003000 seconds user
       2.380196000 seconds sys

8 CPU
======
 Performance counter stats for 'python test-scaling-subproc.py -2':

    35,293,654,947      cache-references:u                                            (62.50%)
       675,772,563      cache-misses:u            #    1.915 % of all cache refs      (62.50%)
 1,863,835,416,629      cycles:u                                                      (62.50%)
 3,408,694,078,315      instructions:u            #    1.83  insn per cycle           (62.50%)
   470,729,321,611      branches:u                                                    (62.50%)
    18,710,029,709      branch-misses:u           #    3.97% of all branches          (62.50%)
         4,759,204      faults:u                                                    
                 0      migrations:u                                                
         4,759,204      page-faults:u                                               
    17,164,781,068      L1-dcache-load-misses:u                                       (62.50%)
        42,407,266      L1-icache-load-misses:u                                       (62.50%)

      82.107165073 seconds time elapsed

     600.726489000 seconds user
      28.169314000 seconds sys

16 CPU
======
Performance counter stats for 'python test-scaling-subproc.py -8':

    71,125,012,043      cache-references:u                                            (62.50%)
     2,509,743,885      cache-misses:u            #    3.529 % of all cache refs      (62.50%)
 4,256,512,072,612      cycles:u                                                      (62.50%)
 6,815,210,853,848      instructions:u            #    1.60  insn per cycle           (62.50%)
   940,797,592,651      branches:u                                                    (62.50%)
    37,401,077,277      branch-misses:u           #    3.98% of all branches          (62.50%)
         9,874,603      faults:u                                                    
                 0      migrations:u                                                
         9,874,603      page-faults:u                                               
    34,764,585,133      L1-dcache-load-misses:u                                       (62.50%)
        82,908,203      L1-icache-load-misses:u                                       (62.50%)

      98.180409648 seconds time elapsed

    1370.175346000 seconds user
     121.864448000 seconds sys

32 CPU
======
 Performance counter stats for 'python test-scaling-subproc.py -13':

   140,229,421,945      cache-references:u                                            (62.50%)
     5,022,345,750      cache-misses:u            #    3.582 % of all cache refs      (62.50%)
 8,558,410,936,114      cycles:u                                                      (62.50%)
13,628,360,184,584      instructions:u            #    1.59  insn per cycle           (62.50%)
 1,881,291,550,548      branches:u                                                    (62.50%)
    74,783,808,615      branch-misses:u           #    3.98% of all branches          (62.50%)
        19,579,143      faults:u                                                    
                 0      migrations:u                                                
        19,579,143      page-faults:u                                               
    68,615,480,748      L1-dcache-load-misses:u                                       (62.50%)
       163,094,161      L1-icache-load-misses:u                                       (62.50%)

      99.279801084 seconds time elapsed

    2763.979749000 seconds user
     246.852789000 seconds sys

56 CPU
======
 Performance counter stats for 'python test-scaling-subproc.py -16':

   245,664,589,385      cache-references:u                                            (62.50%)
     5,986,128,102      cache-misses:u            #    2.437 % of all cache refs      (62.50%)
13,462,198,820,573      cycles:u                                                      (62.50%)
23,847,765,747,744      instructions:u            #    1.77  insn per cycle           (62.50%)
 3,290,927,488,525      branches:u                                                    (62.50%)
   130,897,170,304      branch-misses:u           #    3.98% of all branches          (62.50%)
        35,494,247      faults:u                                                    
                 0      migrations:u                                                
        35,494,247      page-faults:u                                               
   119,933,873,577      L1-dcache-load-misses:u                                       (62.50%)
       288,403,921      L1-icache-load-misses:u                                       (62.50%)

     108.453630713 seconds time elapsed

    5381.177612000 seconds user
     333.903330000 seconds sys

64 CPU
======
Performance counter stats for 'python test-scaling-subproc.py -17':

   281,639,175,978      cache-references:u                                            (62.50%)
     8,968,404,974      cache-misses:u            #    3.184 % of all cache refs      (62.50%)
16,140,364,752,053      cycles:u                                                      (62.50%)
27,256,133,511,829      instructions:u            #    1.69  insn per cycle           (62.50%)
 3,761,710,111,186      branches:u                                                    (62.50%)
   149,569,155,086      branch-misses:u           #    3.98% of all branches          (62.50%)
        39,148,442      faults:u                                                    
                 0      migrations:u                                                
        39,148,442      page-faults:u                                               
   137,584,278,754      L1-dcache-load-misses:u                                       (62.50%)
       330,750,296      L1-icache-load-misses:u                                       (62.50%)

     120.688547006 seconds time elapsed

    6274.688233000 seconds user
     484.406164000 seconds sys

120 CPU
=======
 Performance counter stats for 'python test-scaling-subproc.py -18':

   532,229,037,371      cache-references:u                                            (62.50%)
    14,227,944,434      cache-misses:u            #    2.673 % of all cache refs      (62.50%)
29,404,359,241,173      cycles:u                                                      (62.50%)
51,095,884,028,391      instructions:u            #    1.74  insn per cycle           (62.50%)
 7,053,547,766,317      branches:u                                                    (62.50%)
   280,479,284,507      branch-misses:u           #    3.98% of all branches          (62.50%)
        73,250,012      faults:u                                                    
                 0      migrations:u                                                
        73,250,012      page-faults:u                                               
   260,078,672,869      L1-dcache-load-misses:u                                       (62.50%)
       618,858,635      L1-icache-load-misses:u                                       (62.50%)

     119.736692035 seconds time elapsed

   11628.275939000 seconds user
     843.423292000 seconds sys

...