Page History

...
Code Block
language	bash
title	psana-translate default_psana.cfg file - all options
collapse	true
######################################################################
[psana]

# MODULES: any modules that produce data to be translated need be loaded 
# **BEFORE** Translator.H5Output (such as calibrated data or ndarray's)
# event data added by modules listed after Translator.H5Output is not translated.
modules = Translator.H5Output

files = **TODO: SPECIFY INPUT FILES OR DATA SOURCE HERE**

######################################################################
[Translator.H5Output]

# The only option you need to set for the Translator.H5Output module is
# output_file. All other options have default values (explained below).

# TODO: enter the full h5 output file name, including the output directory
output_file = output_directory/h5output.h5

# By default, the Translator will not overwrite the h5 file if it already exists
overwrite = false

# # # # # # # # # # # # # # # # # # # # #
# EPICS FILTERING
# The Translator can store epics pv's in one of three ways, or not at all.
# set store_epics below, to one of the following:
#
# updates_only   stores an EPICS pv when it has been updated in the psana epics store.
#                For xtc input this happens whenever EPICS data is present in a datagram.
#                Note - many EPICS pvs are not present in every shot. A dataset
#                for an EPIC pv is often shorter than the total number of events.
#                Experiments with many short calib cycles may have some calib cycles where
#                no EPICS pv's show up. Users would then have to look back through several 
#                calib cycle's to find the latest value of a pv.
#
# calib_repeat   This is the same as updates_only except that each calib cycle starts with
#                the most recent value of each pv. This makes it easier to find pv's in a 
#                calib cycle. For experiments with many short calib cycles, it can produce
#                more datasets than neccessary.
#
# always         For each event, store the most recent value of the EPICs pv. Produces 
#                longer datasets than neccessary, but makes it easier to find the latest
#                pv for an event.
#
# no             epics pv's will not be stored. You may also want to set Epics=exclude
#                (see below) if you do not want the epics configuration data stored

# The default is 'calib_repeat'

store_epics = calib_repeat

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# FILTERING
# 
# By default, all xtc data is Translated and many ndarrays that user modules (if any) 
# add are translated. The FilteringTranslator can occurbe inconfigured eitherto thefilter codedata of user modules, orbased on
# througha options in the psana.cfg file. Here in the config file, different groups of 
# data can be filterednumber of criteria. There are fourfive options for filtering data: 
#
#    type filtering   -  for example, exclude all cspad, regardless of the detector source
#    source filtering -  for example, exclude any data from a given detector source
#    key filtering    -  for example, include only ndarrays with a given key string
#    calibration      -  do not translate original xtc if a calibrated version is found
#    eventkey filtering -very fine control over filtering - specify type,src and key
#
# Type filtering is based on sets of Psana data types. If you know what detectors or 
# devices to filter, leave type filtering alone and go to src_filter. 
#
# Type filtering has the highest precedence, then key filtering, then source 
# filtering, then "full" event key filtering, and lastly calibration filtering. When the 
# Translator sees new data, 
# it first checks the type filter. If it is a filtered type 
# (or unknown type) no further 
# translation occurs with the data - regardless of src or key. 
# For data that gets 
# past the type filter, the Translator looks at the src and key. If the key The src
# stringfilter is empty,only it checks the source filter. Data with non emptyapplied to data that has an empty key string. To filter data with key strings are ,
# handleduse via the key filter., Ifor thefull srceventkey isfilter filtered,(if butfine thecontrol key is not, then the
# data will be translatedneeded). Data with the special 
# calibration key string are handled 
# via the calibration filtering.  
#
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# TYPE FILTERING 
#
# One can include or exclude a class of Psana types with the following 
# options. Only the strings include or exclude are valid for these 
# type filtering options. 
# 
# Note - Epics in the list below refers only to the epicsConfig data
# which is the epics alias list, not the epics pv's. To filter the epics pv's
# see the 'store_epics' option above.

AcqTdc = include               # Psana::Acqiris::TdcConfigV1, Psana::Acqiris::TdcDataV1
AcqWaveform = include          # Psana::Acqiris::ConfigV1, Psana::Acqiris::DataDescV1
Alias = include                # Psana::Alias::ConfigV1
AnalogInput = include          # Psana::Bld::BldDataAnalogInputV1
Andor = include                # Psana::Andor::ConfigV1, Psana::Andor::FrameV1
Andor3d = include              # Psana::Andor3d::ConfigV1, Psana::Andor3d::FrameV1
Arraychar = include            # Psana::Arraychar::DataV1
Control = include              # Psana::ControlData::ConfigV1, Psana::ControlData::ConfigV2, Psana::ControlData::ConfigV3
Cspad = include                # Psana::CsPad::ConfigV1, Psana::CsPad::ConfigV2, Psana::CsPad::ConfigV3, Psana::CsPad::ConfigV4, Psana::CsPad::ConfigV5, Psana::CsPad::DataV1, Psana::CsPad::DataV2
Cspad2x2 = include             # Psana::CsPad2x2::ConfigV1, Psana::CsPad2x2::ConfigV2, Psana::CsPad2x2::ElementV1
DiodeFex = include             # Psana::Lusi::DiodeFexConfigV1, Psana::Lusi::DiodeFexConfigV2, Psana::Lusi::DiodeFexV1
EBeam = include                # Psana::Bld::BldDataEBeamV0, Psana::Bld::BldDataEBeamV1, Psana::Bld::BldDataEBeamV2, Psana::Bld::BldDataEBeamV3, Psana::Bld::BldDataEBeamV4, Psana::Bld::BldDataEBeamV5, Psana::Bld::BldDataEBeamV6, Psana::Bld::BldDataEBeamV7
Encoder = include              # Psana::Encoder::ConfigV1, Psana::Encoder::ConfigV2, Psana::Encoder::DataV1, Psana::Encoder::DataV2
Epics = include                # Psana::Epics::ConfigV1
Epix = include                 # Psana::Epix::ConfigV1, Psana::Epix::ElementV1, Psana::Epix::ElementV2, Psana::Epix::ElementV3
Epix100a = include             # Psana::Epix::Config100aV1
Epix10k , Psana::Epix::Config100aV2
Epix10k = include              # Psana::Epix::Config10KV1
EpixSampler = include          # Psana::EpixSampler::ConfigV1, Psana::EpixSampler::ElementV1
Evr = include                  # Psana::EvrData::ConfigV1, Psana::EvrData::ConfigV2, Psana::EvrData::ConfigV3, Psana::EvrData::ConfigV4, Psana::EvrData::ConfigV5, Psana::EvrData::ConfigV6, Psana::EvrData::ConfigV7, Psana::EvrData::DataV3, Psana::EvrData::DataV4
EvrIO = include                # Psana::EvrData::IOConfigV1, Psana::EvrData::IOConfigV2
Evs = include                  # Psana::EvrData::SrcConfigV1
FEEGasDetEnergy = include      # Psana::Bld::BldDataFEEGasDetEnergy, Psana::Bld::BldDataFEEGasDetEnergyV1
Fccd = include                 # Psana::FCCD::FccdConfigV1, Psana::FCCD::FccdConfigV2
Fli = include                  # Psana::Fli::ConfigV1, Psana::Fli::FrameV1
Frame = include                # Psana::Camera::FrameV1
FrameFccd = include            # Psana::Camera::FrameFccdConfigV1
FrameFex = include             # Psana::Camera::FrameFexConfigV1
GMD = include                  # Psana::Bld::BldDataGMDV0, Psana::Bld::BldDataGMDV1, Psana::Bld::BldDataGMDV2
GenericPgp = include           # Psana::GenericPgp::ConfigV1
Gsc16ai = include              # Psana::Gsc16ai::ConfigV1, Psana::Gsc16ai::DataV1
Imp = include                  # Psana::Imp::ConfigV1, Psana::Imp::ElementV1
Ipimb = include                # Psana::Ipimb::ConfigV1, Psana::Ipimb::ConfigV2, Psana::Ipimb::DataV1, Psana::Ipimb::DataV2
IpmFex = include               # Psana::Lusi::IpmFexConfigV1, Psana::Lusi::IpmFexConfigV2, Psana::Lusi::IpmFexV1
L3T = include                  # Psana::L3T::ConfigV1, Psana::L3T::DataV1, Psana::L3T::DataV2
OceanOptics = include          # Psana::OceanOptics::ConfigV1, Psana::OceanOptics::ConfigV2, Psana::OceanOptics::DataV1, Psana::OceanOptics::DataV2, Psana::OceanOptics::DataV3
Opal1k = include               # Psana::Opal1k::ConfigV1
Orca = include                 # Psana::Orca::ConfigV1
Partition = include            # Psana::Partition::ConfigV1
PhaseCavity = include          # Psana::Bld::BldDataPhaseCavity
PimImage = include             # Psana::Lusi::PimImageConfigV1
Pimax = include                # Psana::Pimax::ConfigV1, Psana::Pimax::FrameV1
Princeton = include            # Psana::Princeton::ConfigV1, Psana::Princeton::ConfigV2, Psana::Princeton::ConfigV3, Psana::Princeton::ConfigV4, Psana::Princeton::ConfigV5, Psana::Princeton::FrameV1, Psana::Princeton::FrameV2
PrincetonInfo = include        # Psana::Princeton::InfoV1
Quartz = include               # Psana::Quartz::ConfigV1, Psana::Quartz::ConfigV2
Rayonix = include              # Psana::Rayonix::ConfigV1, Psana::Rayonix::ConfigV2
SharedAcqADC = include         # Psana::Bld::BldDataAcqADCV1
SharedIpimb = include          # Psana::Bld::BldDataIpimbV0, Psana::Bld::BldDataIpimbV1
SharedPim = include            # Psana::Bld::BldDataPimV1
Spectrometer = include         # Psana::Bld::BldDataSpectrometerV0, Psana::Bld::BldDataSpectrometerV1
TM6740 = include               # Psana::Pulnix::TM6740ConfigV1, Psana::Pulnix::TM6740ConfigV2
TimeTool = include             # Psana::TimeTool::ConfigV1, Psana::TimeTool::ConfigV2, Psana::TimeTool::DataV1, Psana::TimeTool::DataV2
Timepix = include              # Psana::Timepix::ConfigV1, Psana::Timepix::ConfigV2, Psana::Timepix::ConfigV3, Psana::Timepix::DataV1, Psana::Timepix::DataV2
TwoDGaussian = include         # Psana::Camera::TwoDGaussianV1
UsdUsb = include               # Psana::UsdUsb::ConfigV1, Psana::UsdUsb::DataV1
pnCCD = include                # Psana::PNCCD::ConfigV1, Psana::PNCCD::ConfigV2, Psana::PNCCD::FramesV1

# user types to translate from the event store
ndarray_types = include        # ndarray<int8_t,1>, ndarray<int8_t,2>, ndarray<int8_t,3>, ndarray<int8_t,4>, ndarray<int16_t,1>, ndarray<int16_t,2>, ndarray<int16_t,3>, ndarray<int16_t,4>, ndarray<int32_t,1>, ndarray<int32_t,2>, ndarray<int32_t,3>, ndarray<int32_t,4>, ndarray<int64_t,1>, ndarray<int64_t,2>, ndarray<int64_t,3>, ndarray<int64_t,4>, ndarray<uint8_t,1>, ndarray<uint8_t,2>, ndarray<uint8_t,3>, ndarray<uint8_t,4>, ndarray<uint16_t,1>, ndarray<uint16_t,2>, ndarray<uint16_t,3>, ndarray<uint16_t,4>, ndarray<uint32_t,1>, ndarray<uint32_t,2>, ndarray<uint32_t,3>, ndarray<uint32_t,4>, ndarray<uint64_t,1>, ndarray<uint64_t,2>, ndarray<uint64_t,3>, ndarray<uint64_t,4>, ndarray<float,1>, ndarray<float,2>, ndarray<float,3>, ndarray<float,4>, ndarray<double,1>, ndarray<double,2>, ndarray<double,3>, ndarray<double,4>, ndarray<const int8_t,1>, ndarray<const int8_t,2>, ndarray<const int8_t,3>, ndarray<const int8_t,4>, ndarray<const int16_t,1>, ndarray<const int16_t,2>, ndarray<const int16_t,3>, ndarray<const int16_t,4>, ndarray<const int32_t,1>, ndarray<const int32_t,2>, ndarray<const int32_t,3>, ndarray<const int32_t,4>, ndarray<const int64_t,1>, ndarray<const int64_t,2>, ndarray<const int64_t,3>, ndarray<const int64_t,4>, ndarray<const uint8_t,1>, ndarray<const uint8_t,2>, ndarray<const uint8_t,3>, ndarray<const uint8_t,4>, ndarray<const uint16_t,1>, ndarray<const uint16_t,2>, ndarray<const uint16_t,3>, ndarray<const uint16_t,4>, ndarray<const uint32_t,1>, ndarray<const uint32_t,2>, ndarray<const uint32_t,3>, ndarray<const uint32_t,4>, ndarray<const uint64_t,1>, ndarray<const uint64_t,2>, ndarray<const uint64_t,3>, ndarray<const uint64_t,4>, ndarray<const float,1>, ndarray<const float,2>, ndarray<const float,3>, ndarray<const float,4>, ndarray<const double,1>, ndarray<const double,2>, ndarray<const double,3>, ndarray<const double,4>
std_string = include           # std::string


# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# TYPE FILTER SHORTCUT
#
# In addition to filtering Psana types by the options above, one can use
# the type_filter option below. For example:
#
# type_filter = include cspad       # will only translate cspad types. Will not translate
#                                 # ndarrays or strings
# type_filter = exclude Andor evr   # translate all except the Andor or Evr types
# 
# If you do not want to translate what is in the xtc file, use the psana shortcut:
#
# type_filter = exclude psana       # This will only translate ndarray's and strings 
#
# Likewise doing:
#
# type_filter = include psana       # will translate all xtc data, but skip any ndarray's or strings
#
# The default is to include all

type_filter = include all

# note - if type_filter is anything other than 'include all' it takes precedence
# over the classes of type filter options above, like Cspad=include.

# # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# SOURCE FILTERING
#
# The default for the src_filter option is "include all"
# If you want to include a subset of the sources, do
#
# src_filter include srcname1 srcname2  
#
#  or if you want to exclude a subset of sources, do
#
# src_filter exclude srcname1 srcname2
#
# The syntax for specifying a srcname follows that of the Psana Source (discussed in 
# the Psana Users Guide). The Psana Source recognizes DAQ alias names (if present
# in the xtc files), several styles for specifying a Pds Src, as well as detector matches 
# where the detector number, or device number is not known. 
# 
# Specifically, format ofUnknown sources generate exceptions that by default stop the matchTranslator. stringThis can be: 
#
# inconvenient for users that reuse one  DetInfo(det.detId:dev.devId) - fully or partially specified DetInfoconfiguration across many runs in an experiment, 
# where some runs includes certain  det.detId:dev.devId - same as abovesources and some runs don't. You can tell the Translator
# to ignore unknown sources by  DetInfo(det-detId|dev.devId) - same as above
#       det-setting the option
#
# unknown_src_ok=0   # to 1, by default it is False, which means stop.
# 
# Specifically, format of the match string can be:
#
#       DetInfo(det.detId:dev.devId) - fully or partially specified DetInfo
#       det.detId:dev.devId - same as above
#       DetInfo(det-detId|dev.devId) - same as above
#       det-detId|dev.devId - same as above
#       BldInfo(type) - fully or partially specified BldInfo
#       type - same as above
#       ProcInfo(ipAddr) - fully or partially specified ProcInfo
#
# For example
#        DetInfo(AmoETOF.0.Acqiris.0)  
#        DetInfo(AmoETOF.0.Acqiris)  
#        DetInfo(AmoETOF:Acqiris)
#        AmoETOF:Acqiris
#        AmoETOF|Acqiris
#
# will all match the same data, AmoETOF.0.Acqiris.0. The later ones will match
# additional data (such as detector 1, 2, etc.) if it is present.
#
# A simple way to set up src filtering is to take a look at the sources in the 
# xtc input using the psana EventKeys module.  For example
#
# psana -n 5 -m EventKeys exp=cxitut13:run=22 
#
# Will print the EventKeys in the first 5 events.  If the output includes
#
#   EventKey(type=Psana::EvrData::DataV3, src=DetInfo(NoDetector.0:Evr.2))
#   EventKey(type=Psana::CsPad::DataV2, src=DetInfo(CxiDs1.0:Cspad.0))
#   EventKey(type=Psana::CsPad2x2::ElementV1, src=DetInfo(CxiSc2.0:Cspad2x2.1))
#   EventKey(type=Psana::Bld::BldDataEBeamV3, src=BldInfo(EBeam))
#   EventKey(type=Psana::Bld::BldDataFEEGasDetEnergy, src=BldInfo(FEEGasDetEnergy))
#   EventKey(type=Psana::Camera::FrameV1, src=BldInfo(CxiDg2_Pim))
#
# Then one can filter on these six srcname's:
#
#  NoDetector.0:Evr.2  CxiDs1.0:Cspad.0  CxiSc2.0:Cspad2x2.1  EBeam  FEEGasDetEnergy  CxiDg2_Pim
#

src_filter = include all

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# CALIBRATION FILTERING
#
# Psana calibration modules can produce calibrated versions of different 
# data types. Depending on the module used, you may get an NDArray, an 
# image, or the same data type as was in the xtc but with calibrated data.
#
# If you are doing the latter, the module output will be data of the same type 
# and src as the uncalibrated data, with an additional key, such as 'calibrated'.
# If these modules are configured to use a different key, set calibration_key
# below accordingly:

calibration_key = calibrated

# The Translator defaults to writing calibrated data in place of uncalibrated
# data. If you do not want the calibrated data and would prefer to have the
# original uncalibrated data from the xtc, then set skip_calibrated to true.

skip_calibrated = false

# note, setting skip_calibrated to true will force sets exclude_calibstore 
# (below) to be true as well.

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# CALIBSTORE FILTERING
#
# Calibration modules may publish the data they used to produce the calibrated
# event objects. Examples of data would be pedestal values, pixel status (what
# pixels are hot) and common mode algorithm parameters. This data will be published
# in what is called the Psana calibStore. When the Translator sees calibrated 
# event data, it will look for the corresponsinding calibStore data as well.
# If you do not want it to translate calibStore data, set the following to true.

exclude_calibstore = false

# otherwise, the Translator will create a group CalibStore that holds the
# calibstore data. Note, the Translator looks for all calibStore data associated 
# with the calibration modules. If a calibration module was configured to not do 
# certain calibrations (such as gain) but the module still put gain values
# in the config store (even though it did not use them) the Translator 
# would still translate those gain values.

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# KEY FILTERING
#
# Psana modules loaded before the translator may put a variety of objects in the event 
# store. Be default, the Translator will translate any new data that it knows about.
# In addition to the psana types, it knows about NDArrays, C++ strings, and has a C++ interface 
# for registering new simple types. NDarray's up to 4 dimensions of 10 basic types 
# (8, 16, 32 and 64 bit signed and unsigned int, float and double) as well as the const 
# versions of these types are translated.
#
# Generally Psana modules will attach keys to these objects (the keys are simply strings).
# To filter the set of keys that are translated, modify the parameter below:

key_filter = include all

# The default is to not look at the key but rather translate all data that the translator
# knows about. An example of including only data with the key finalanswer would be
#
# key_filter = include finalanswer
#
# To exclude a few keys, one can do
#
# key_filter = exclude arrayA arrayB
#
# Note, key filtering does not affect translation of data without keys. For instance
# setting key_filter = include keyA does not turn off translation of data without keys.
# Of all the data with keys, only those where the key is keyA will be translated.
#

# ---------------------------------------
# SPLIT INTO SEPARTE HDF5 FILES BASED ON CALIB CYCLES
#
# There are two reasons to split the Translator output into separate files based on
# calib cycles. The first is to reduce the size of the hdf5 files, and the second is
# to speedup translation by translating separate calib cycles in parallel# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# EVENTKEY FILTERING
#
# One can also filter data based on the event key. The default is
#
# iseventkey_filter to= notinclude split:all
#
split = NoSplit
# but one could do
#
# howevereventkey_filter the= Translator also supports SplitScan mode. The best way to invoke this is to
# run the separate driver program
#
#  h5-mpi-translate
#
# which requires MPI to be available in the environment, but see below for the non-MPI 
# based split scan mode. In SplitScan mode, in addition to the output File, separate files will 
# be made for the calib cycles. The output file (the master file) will include external links 
# to the other files. Several mpi jobs are run simultaneously to divide the work of creating the 
# calib cycle files. For example, running six jobs to produce out.h5 might look like:
#
#   mpirun -n 6 h5-mpi-translate -m Translator.H5Output -o Translator.H5Output.output_file=out.h5 exp=xppd9714:run=16
#
# The driver program, h5-mpi-translate, takes all arguments that psana takes.
# Presently with our implementation, one must give the full path to mpirun, and run
# a driver program that sets up the analysis environment. For example, 
#
#   /reg/g/psdm/sw/releases/ana-current/arch/x86_64-rhel6-gcc44-opt/bin/mpirun -n 6 mpilaunch h5-mpi-translate -m Translator.H5Output -o Translator.H5Output.output_file=out.h5 exp=xppd9714:run=16
#
# where mpilaunch is
#
#  #!/bin/bash
#  . /reg/g/psdm/bin/sit_setup.sh ana-current
#  $@
#
# If six jobs were used, one becomes the master process and the other five are the workers.
# The master process does two things. First it writes the file out.h5 with the external links 
# to the calib files. Second it reads through all the data and finds the calib cycles. When it
# finds a calib cycle, it tells the next available worker where this is. When a worker is done,
# it tells the master process. The master process than adds all neccessary external links from
# out.h5 to the translated calib file produced by the worker.
#
# Generally, there will be one calib cycle file for each calib cycle. However to prevent to many 
# calib cycle files from being produced for experiments that have only a few events per calib cycle, 
# an option controls the minumum number of events per external calib cycle file. The default is

# min_events_per_calib_file = 100

# For example, if there are only 10 events per calib cycle, and assuming the master file is called 
# out.h5, the file output_cc0000.h5 will contain the groups 
#
# /CalibCycle:0000
# /CalibCycle:0001
# ...
# /CalibCycle:0009
#
# and the file output_cc0010.h5 will start with group /CalibCycle:0010
#
# As mentioned above, when workers finish a calib cycle file, they send a message to the master. 
# How frequently the master stops reading through the data to check for these messages is controlled 
# by the following parameter

# num_events_check_done_calib_file = 120

# that is, it defaults to check for a 'done' message from a worker every 120 events.

# by default, the calib cycle files are written to the same directory as the master file. Optionally,
# they can be placed into a subdirectory based on the master filename. The subdirectory name is the
# master file basename, without the extension, with _ccfiles appended to it. This subdirectory will be
# created if need be. To do this, set
#
# split_cc_in_subdir = True
#
# then if one does something like
#
# output_file = mydir/xpptut13-r0179.h5
# split_cc_in_subdir = True
#
# one will get
# 
# mydir/xpptut13-r0179.h5
# mydir/xpptut13-r0179_ccfiles/xpptut13-r0179_cc0000.h5
# mydir/xpptut13-r0179_ccfiles/xpptut13-r0179_cc0001.h5
# ...
#
# rather than 
# mydir/xpptut13-r0179.h5
# mydir/xpptut13-r0179_cc0000.h5
# mydir/xpptut13-r0179_cc0001.h5
##
# When running the h5-mpi-translate and specifying user psana modules (perhaps to add ndarrays
# into the translation or dynamically filter events) it is important to note that these modules
# are restarted for each calib cycle file. That is these modules will have their beginJob/endJob
# and beginRun/endRun routines called for each calib file that a worker produces.
#
#### FAST INDEX ####
#
# For online analysis with live data, one of the impediments to keeping up with the data is the time 
# it takes h5-mpi-translate's to read through the data to find the calib cycles. Typically the data is 
# recorded in 6 or more separate files and each must be read through to identify the start of calib cycles. 
# Unfortunately this read speed can be 30-40hz for a typical experiment - far short of the 120hz we'd like 
# to obtain. A recent feature added to h5-mpi-translate takes advantage of the unique signature of each 
# new calib cycle, combined with the regular structure of the separate data files in order to limit the 
# reading to just one of the files. In this way, the h5-mpi-translate master rank need only get through the 
# data it reads/searches at 20hz to keep up with the data. We have had good success with this feature 
# recently, but it is not guaranteed to work. It is a temporary solution until a more robust way to do 
# fast/live indexing is put in place. However in the meantime, the translator supports the following options 
# to turn on fast indexing and controlling how much time is spent searching the other files
#
# fast_index=1                 # do the fast indexing, be default it is off
# fi_mb_half_block=12          # when fast indexing is on, use 12MB on each side, or 24MB for each block that is searched
# fi_num_blocks=3              # this it half the number of 'other' blocks to try. The translator will try 1 + 2*3 = 7 blocks if this is 3 
#
# Some details, If the Translator finds a calib cycle at offset N in DAQ stream 0, then the Translator 
# will by default look in a 24MB block around offset N in stream 1, i.e., N -+ 12MB. It is looking for a 
# match on about 52 bits, spread out among 20 bytes. If the Translator fails to find the calib cycle in 
# those 24MB, then it tries the next 24MB below, then the next 24MB above, then below again, then above 
# again, etc. In the end, the Translator will cover 7 of these blocks, or 7*24MB=168MB in stream 1. 
# After it finds the calib cycle in stream 1, it repeats this process for stream 2,3,4 and 5. 
# If the Translator fails with any of these streams, it throws an exception.
#
# If MPI is not available, the Translator supports an additional  split scan mode by setting the 
# split option as follows:
#
# split=SplitScan
#
# In this mode, there is no communication between jobs, and each Translator job reads through all 
# the input, so launching too many jobs will significantly increase the amount of input processing. 
# Dividing the work of this SplitScan mode is done with the parameters

# jobTotal = 1
# jobNumber = 0

# which default to 1 job that is numbered 0. However if jobTotal=3 and jobNumber=1, this 
# Translator will process calibCycle 1, 4, 7, etc. If jobTotal is 3, the user MUST
# make sure to launch 3 Translator jobs with jobNumber being 0,1 and 2 to get all the calib cycle
# files written. jobNumber=0 will write the master file with the external links to the calib
# cycle files. 
#
# For example, the following two command lines:
#
# psana -m Translator.H5Output -o Translator.H5Output.output_file=mydir/split.h5 -o Translator.H5Output.split=SplitScan -o Translator.H5Output.jobNumber=0 -o Translator.H5Output.jobTotal=2 exp=xpp123:run=10
# psana -m Translator.H5Output -o Translator.H5Output.output_file=mydir/split.h5 -o Translator.H5Output.split=SplitScan -o Translator.H5Output.jobNumber=1 -o Translator.H5Output.jobTotal=2 exp=xpp123:run=10
#
# will divide the work into two translator jobs. When they finish, the output will be
# 
# mydir/split.h5
# mydir/split_cc0000.h5
# mydir/split_cc0001.h5
# ...
#
# note, this split scan mode, unlike the MPI version, does not put multiple calib cycles in
# one file. One can get a large number of files if calib cycles only contain a few events.

# The remaining values for split are
#
# split=MPIWorker
# split=MPIMaster
#
# These values are meant to be set by h5-mpi-translate. Generally users should not set
# these values. The h5-mpi-translate driver will set additional options that we 
# document here (again, users should not need to set these options). These options include the
# two discussed above:
#
#   num_events_check_done_calib_file 
#   min_events_per_calib_file
#
# as well as the optionexclude Frame__xtcav Frame__yag3
#
# that is, one can provide a list of event keys, which are (type, src, keystring) triplets, and
# the final key string is optional. The three items, type, src, keystring, must be separated by
# double underscores, i.e: __
# # # # # # # # # # # # # # # # # # # # #
# EXCLUDE CONFIGURE EVENT DATA
#
# this option was put in place to address a problem with a few runs in a particular
# experiment. Generally you do not need to turn this on. The explanation is as follows. 
# Usually regular event data does not appear during the configure transition. The 
# exception is Bld data. Each bld type will get an entry during the configure - more as 
# a signal that the data is present - the values are not reliable. For this experiment,
# the BldDataSpectrometerV1 during the Configure transition had a junk number of peaks
# that caused psana to crash. To address this, one can set the following:
exclude_config_eventdata = false
# to true
# ---------------------------------------
# SPLIT INTO SEPARTE HDF5 FILES BASED ON CALIB CYCLES
#
# There are two reasons to split the Translator output into separate files based on
# calib cycles. The first is to reduce the size of the hdf5 files, and the second is
# to speedup translation by translating separate calib cycles in parallel. The default 
# is to not split:
split = NoSplit
# however the Translator also supports SplitScan mode. This can only be invoked by running
# the separate driver program
#
#  h5-mpi-translate
#
# which requires MPI to be available in the environment. Under the hood, it will use the two other
# values for the split parameter - MPIWorker and MPIMaster - but users should not set these directly.
# In SplitScan mode, in addition to the output File, separate files will be made for the calib cycles. 
# The output file (the master file) will include external links to the other files. Several mpi jobs are 
# run simultaneously to divide the work of creating the calib cycle files. For example, running six jobs 
# to produce out.h5 might look like:
#
#   mpirun -n 6 h5-mpi-translate -m Translator.H5Output -o Translator.H5Output.output_file=out.h5 exp=xppd9714:run=16
#
# The driver program, h5-mpi-translate, takes all arguments that psana takes.
# If six jobs were used, one becomes the master process and the other five are the workers.
# The master process does two things. First it writes the file out.h5 with the external links 
# to the calib files. Second it reads through all the data and finds the calib cycles. When it
# finds a calib cycle, it tells the next available worker where this is. When a worker is done,
# it tells the master process. The master process than adds all neccessary external links from
# out.h5 to the translated calib file produced by the worker.
#
# Generally, there will be one calib cycle file for each calib cycle. However to prevent to many 
# calib cycle files from being produced for experiments that have only a few events per calib cycle, 
# an option controls the minumum number of events per external calib cycle file. The default is
# min_events_per_calib_file = 100
# For example, if there are only 10 events per calib cycle, and assuming the master file is called 
# out.h5, the file output_cc0000.h5 will contain the groups 
#
# /CalibCycle:0000
# /CalibCycle:0001
# ...
# /CalibCycle:0009
#
# and the file output_cc0010.h5 will start with group /CalibCycle:0010
#
# As mentioned above, when workers finish a calib cycle file, they send a message to the master. 
# How frequently the master stops reading through the data to check for these messages is controlled 
# by the following parameter
# num_events_check_done_calib_file = 120
# that is, it defaults to check for a 'done' message from a worker every 120 events.
# by default, the calib cycle files are written to the same directory as the master file. Optionally,
# they can be placed into a subdirectory based on the master filename. The subdirectory name is the
# master file basename, without the extension, with _ccfiles appended to it. This subdirectory will be
# created if need be. To do this, set
#
# split_cc_in_subdir = True
#
# then if one does something like
#
# output_file = mydir/xpptut13-r0179.h5
# split_cc_in_subdir = True
#
# one will get
# 
# mydir/xpptut13-r0179.h5
# mydir/xpptut13-r0179_ccfiles/xpptut13-r0179_cc0000.h5
# mydir/xpptut13-r0179_ccfiles/xpptut13-r0179_cc0001.h5
# ...
#
# rather than 
# mydir/xpptut13-r0179.h5
# mydir/xpptut13-r0179_cc0000.h5
# mydir/xpptut13-r0179_cc0001.h5
##
# When running the h5-mpi-translate and specifying user psana modules (perhaps to add ndarrays
# into the translation or dynamically filter events) it is important to note that these modules
# are restarted for each calib cycle file. That is these modules will have their beginJob/endJob
# and beginRun/endRun routines called for each calib file that a worker produces.
#
#### FAST INDEX ####
#
# For online analysis with live data, one of the impediments to keeping up with the data is the time 
# it takes h5-mpi-translate's to read through the data to find the calib cycles. As long as users
# read the small data (adding :smd to the dataset specification) h5-mpi-translate should have no
# trouble indexing the calib cycles in real time. 
#
# If for some reason there is a problem with the small data, users can fall back on the 
# fast_index feature against the large xtc files, however be aware that the web portal 
# adds :smd to the datasource, so you will have to coordinate with data management to
# switch to the large xtc. Below we document fast_index.
#
# fast_index takes advantage of the unique signature of each 
# new calib cycle, combined with the regular structure of the separate xtc data files in order to 
# limit the reading to just one of the file. In this way, the h5-mpi-translate master rank 
# need only get through the data it reads/searches at 20hz to keep up with the data. Part of why
# it is deprecated is because it is not guaranteed to work, whereas small data will.
#
# The translator supports the following options to turn on fast indexing and controlling how much 
# time is spent searching the other files
#
# fast_index_force=0                 # set to 1 to turn fast indexing on
# fi_mb_half_block=12          # when fast indexing is on, use 12MB on each side, or 24MB for each block that is searched
# fi_num_blocks=50             # this it half the number of 'other' blocks to try. The translator will try 1 + 2*50 = 101 blocks if this is 50
#
# Some details, If the Translator finds a calib cycle at offset N in DAQ stream 0, then the Translator 
# will by default look in a 24MB block around offset N in stream 1, i.e., N -+ 12MB. It is looking for a 
# match on about 52 bits, spread out among 20 bytes. If the Translator fails to find the calib cycle in 
# those 24MB, then it tries the next 24MB below, then the next 24MB above, then below again, then above 
# again, etc. In the end, the Translator will cover 5` of these blocks, or 51*24MB=1.2GB in stream 1. 
# After it finds the calib cycle in stream 1, it repeats this process for stream 2,3,4 and 5. 
# If the Translator fails with any of these streams, it throws an exception.
#
# Another option related to split scan is
#
# first_calib_cycle_number
#
# which is a 0-up counter for the first calib cycle that the MPIWorker will see. However users should not set
# this option - it is set by the Translator.
#
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
# COMPRESSION 
#
# The following options control compression for most all datasets.
# Shuffling improves compression for certain datasets. Valid values for
# deflate (gzip compression level) are 0-9. Setting deflate = -1 turns off
# compression.

shuffle = true
deflate = 1

# if deflate is set to -1, set shuffle to false, as it performs no function without compression.

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# TECHNICAL, ADVANCED CONFIGURATION
# 
# ---------------------------------------
# CHUNKING
# The commented options below give the default chunking options.
# Objects per chunk are selected from the target chunk size (16 MB) and 
# adjusted based on min/max objects per chunk, and the max bytes per chunk.
# It is important that the chunkCache (created on a per dataset basis) be 
# large enough to hold at least one chunk, ideally all chunks we need to have
# open at one time when writing to the dataset (usually one, unless we repair
# split events):
 
# chunkSizeTargetInBytes = 1703936 (16MB)
# chunkSizeTargetObjects = 0 (0 means select objects per chunk from chunkSizeInBytes)
# maxChunkSizeInBytes = 10649600  (100MB)
# minObjectsPerChunk = 50              
# maxObjectsPerChunk = 2048
# chunkCacheSizeTargetInChunks = 3
# maxChunkCacheSizeInBytes = 10649600  (100MB)
# By default, the Translator looks for control data to see if the number of events is known.
# If so, this overrides options above. To control chunking, one should also set useControlData
# below to 0 (or False)
# useControlData = 1  
#
# ---------------------------------------
# REFINED DATASET CONTROL
#
# There are six classes of datasets for which individual options for shuffle,
# deflate, chunkSizeTargetInBytes and chunkSizeTargetObjects can be specified:
#
# regular (most everything, all psana types)
# epics (all the epics pv's)
# damage (accompanies all regular data from event store)
# ndarrays (new data from other modules)
# string's (new data from other modules)
# eventId (the time dataset that also accompanies all regular data, epics pvs, ndarrays and strings)
#
# The options for regular datasets have been discussed above. The other five datasets 
# get their default values for shuffle, deflate, chunkSizeInBytes and chunkSizeInObjects
# from the regular dataset options except in the cases below:
 
# damageShuffle = false
# stringShuffle = false
# epicsPvShuffle = false
# stringDeflate = -1
# eventIdChunkSizeTargetInBytes = 16384
# epicsPvChunkSizeTargetInBytes = 16384

# The rest of the shuffle, deflate and chunk size options for the other five datasets are:
#
# eventIdShuffle = true
# eventIdDeflate = 1
# damageDeflate = 1
# epicsPvDeflate = 1
# ndarrayShuffle = true
# ndarrayDeflate = 1
# eventIdChunkSizeTargetObjects = 0
# damageChunkSizeTargetInBytes = 1703936
# damageChunkSizeTargetObjects = 0
# stringChunkSizeTargetInBytes = 1703936
# stringChunkSizeTargetObjects = 0
# ndarrayChunkSizeTargetInBytes = 1703936
# ndarrayChunkSizeTargetObjects = 0
# epicsPvChunkSizeTargetObjects = 0

# ---------------------------------------
# SPLIT EVENTS
# When the Translator encounters a split event, it checks a cache to see
# if it has already seen it.  If it has, it fills in any blanks that it can.
# To prevent this cache from growing to large, set the maximum number of
# split events to look back through here (default is 3000):

max_saved_split_events = 3000
...
Page tree

Versions Compared

Old Version 79

New Version 80

Key