Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

Below is a Python example that gathers some data from an experiment and saves it to a hdf5 file for future use. The gathered data will be used to select which shots to use for future analysis. It demonstrates the use of h5py to write and read compound data types to hdf5 files.

The data used is a CXI tutorial run from the Tutorial test data/reg/d/psdm/cxi/cxitut13/.

Code Block
themeConfluence
languagepython
titlegatherSave.py
linenumberstrue
collapsetrue
import sys
import numpy as np
import h5py
import psana

def gatherData(maxEventsToProcess=None):
    '''Goes through a specific experiment and run, 
    gathers specific values from the data,
    returns a two numpy arrays:
       dataArray: the gathered data
       timeArray: the event times for each row of the gathered data
                  has two fields - 'seconds' 'nanoseconds', both integers
    '''
    dataSource = psana.DataSource("exp=cxitut13:run=0022")
    srcDiode = psana.Source('DetInfo(CxiDg4.0:Ipimb.0)')
    srcGasDet = psana.Source('BldInfo(FEEGasDetEnergy)')
    epicsStore = dataSource.env().epicsStore()

    # define data type for data array and time array
    dataDtype = np.dtype([('aG',np.float), ('aD',np.float), ('aH',np.float),  
                          ('aX',np.float), ('aY',np.float), ('aZ',np.float)])

    timeDtype = np.dtype([('seconds',np.uint32),('nanoseconds',np.uint32)])

    # to minimize realloc of arrays, set the number of elements in a  block size
    blockSize = 100000
    dataArray = np.zeros(blockSize, dtype=dataDtype)
    timeArray = np.zeros(blockSize, dtype=timeDtype)

    # motor calibration
    EncoderScale = 0.005 
    Xoffset = 17050.
    Yoffset = 1056510.
    Zoffset = 1830400.
    
    nextArrayRow = 0
    for eventNumber, evt in enumerate(dataSource.events()):
        if maxEventsToProcess is not None and nextArrayRow >= maxEventsToProcess:
            break
        diode = evt.get(psana.Ipimb.DataV2,srcDiode)
        gasDet = evt.get(psana.Bld.BldDataFEEGasDetEnergy,srcGasDet)
        # check that all data is present in event before adding to dataArray
        if diode is None or gasDet is None:
            continue
        # grow arrays if they are to small
        if nextArrayRow >= timeArray.size:
            timeArray.resize(timeArray.size + blockSize)
            dataArray.resize(dataArray.size + blockSize)
        timeArray[nextArrayRow] = evt.get(psana.EventId).time()
        diodeSum = diode.channel0Volts() + diode.channel1Volts() + \
                   diode.channel2Volts() + diode.channel3Volts()
        dataArray[nextArrayRow]['aD'] = diodeSum
        dataArray[nextArrayRow]['aG'] = gasDet.f_11_ENRC()
        sampleX = epicsStore.getPV('CXI:SC1:MZM:08:ENCPOSITIONGET').value(0)
        sampleY = epicsStore.getPV('CXI:SC1:MZM:09:ENCPOSITIONGET').value(0)
        sampleZ = epicsStore.getPV('CXI:SC1:MZM:10:ENCPOSITIONGET').value(0)
        dataArray[nextArrayRow]['aX'] = sampleX * EncoderScale - Xoffset
        dataArray[nextArrayRow]['aY'] = sampleY * EncoderScale - Yoffset
        dataArray[nextArrayRow]['aZ'] = sampleZ * EncoderScale - Zoffset
        dataArray[nextArrayRow]['aH'] = \
                             epicsStore.getPV('CXI:KB1:MMS:07.RBV').value(0)
        if nextArrayRow % 1000 == 0:
            print "event %8d diode.channel0Volts=%f gasDet.f_11_ENCR=%f" % \
                (eventNumber, diode.channel0Volts(),gasDet.f_11_ENRC())
        nextArrayRow += 1

    # shrink arrays to number of events we stored data from
    timeArray.resize(nextArrayRow)
    dataArray.resize(nextArrayRow)

    return dataArray, timeArray

def H5WriteDataAndTime(dataArray, timeArray, h5filename):
    f = h5py.File(h5filename,'w')
    dataDtype = dataArray.dtype
    dataDset = f.create_dataset('data', dataArray.shape, dataDtype)
    dataDset[:] = dataArray
    timeDtype = timeArray.dtype
    timeDset = f.create_dataset('time',timeArray.shape, timeDtype)
    timeDset[:] = timeArray
    f.close()

def H5ReadDataAndTime(h5filename):
    f = h5py.File(h5filename)
    dataDset = f['data']
    dataArray = dataDset[:]
    timeDset = f['time']
    timeArray = timeDset[:]
    f.close()
    return dataArray, timeArray

if __name__ == '__main__':
    maxArraySize = None
    if len(sys.argv) > 1:
        maxArraySize = int(sys.argv[1])
    dataArray,timeArray = gatherData(maxArraySize)
    H5WriteDataAndTime(dataArray, timeArray, "saved_output.h5")
    dataArray,timeArray = H5ReadDataAndTime("saved_output.h5")

...

Code Block
languagepython
import h5py   

f = h5py.File('myfile.h5'h5filename,'w')
data_comp_typedataDtype = compoundDatadataArray.dtype
data_dsetdataDset = f.create_dataset('data', compoundDatadataArray.shape, data_comp_type)
data_dsetdataDtype)
dataDset[:] = dataArray
timeDtype = timeArray.dtype
timeDset = f.create_dataset('time',timeArray.shape, timeDtype)
timeDset[:] = compoundDatatimeArray
f.close()

It is important to call the close() method of the h5py.File object when done.