Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

We use 396 cores on 9 srcf nodes (32 eventbuilder cores) to keep up with all the eight xtc2 files. Below shows python and bash/job scripts for slurm.


Code Block
languagepy
titletest_live.py
import time
import os,sys
from psana import DataSource
import numpy as np
import vals
from mpi4py import MPI
comm = MPI.COMM_WORLD
size = comm.Get_size()
rank = comm.Get_rank()

def test_standard():
    batch_size = 1000
    max_events = 0

    hutch='tst'
    exp=sys.argv[1]
    runno=int(sys.argv[2])

    xtc_dir=f'/cds/data/drpsrcf/{hutch}/{exp}/xtc/'

    ds = DataSource(exp=exp,
                    run=runno,
                    batch_size=batch_size,
                    max_events=max_events,
                    dir=xtc_dir,
                    live=True
                    )

    sendbuf = np.zeros(1, dtype='i')
    recvbuf = None
    if rank == 0:
        recvbuf = np.empty([size, 1], dtype='i')

    st = time.time()
    for run in ds.runs():
        for nevt, evt in enumerate(run.events()):
            if nevt % 1000 == 0 and nevt > 0:
                en = time.time()
                print(f'RANK: {rank:4d} EVENTS: {nevt:10d} RATE: {(1000/(en-st))*1e-3:.2f}kHz', flush=True)
                st = time.time()
            sendbuf += 1

    # Count total no. of events
    comm.Gather(sendbuf, recvbuf, root=0)
    if rank == 0:
        n_events = np.sum(recvbuf)
    else:
        n_events = None
    n_events = comm.bcast(n_events, root=0)
    return n_events


if __name__ == "__main__":
    comm.Barrier()
    t0 = MPI.Wtime()

    n_events = test_standard()

    comm.Barrier()
    t1 = MPI.Wtime()
    if rank == 0:
        n_eb_nodes = int(os.environ.get('PS_EB_NODES', '1'))
        print(f'TOTAL TIME:{t1-t0:.2f}s #EB: {n_eb_nodes:3d} EVENTS:{n_events:10d} RATE:{(n_events/(t1-t0))*1e-6:.2f}MHz', flush=True)


Code Block
languagebash
titlesubmit_slac.sh
#!/bin/bash
#SBATCH --partition=anaq
#SBATCH --job-name=psana2
#SBATCH --nodes=9
#SBATCH --ntasks=396
##SBATCH --ntasks-per-node=50
#SBATCH --output=%j.log
#SBATCH --exclusive


t_start=`date +%s`


source setup_hosts.sh
echo SLURM_HOSTFILE $SLURM_HOSTFILE SLURM_NTASKS $SLURM_NTASKS 


export PS_EB_NODES=32
MAX_EVENTS=0
EXP="tstx00817"
RUNNO=55
srun ./run_slac.sh $MAX_EVENTS $EXP $RUNNO


t_end=`date +%s`
echo PSJobCompleted TotalElapsed $((t_end-t_start))
Code Block
languagebash
*** /cds/home/m/monarin/lcls2/install/include/xtcdata/xtc/ShapesData.hh:355: incorrect TypeId 0
[drp-srcf-cmp048:209412] *** Process received signal ***
[drp-srcf-cmp048:209412] Signal: Aborted (6)
[drp-srcf-cmp048:209412] Signal code:  (-6)