Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

  • Flexibility; HDF5 file has indexed structure, that means direct access to any data of any file from your code.
  • Python is a high-level scripting language allows to write transparent and compact code based on well-elaborated standard libraries.
  • In general, code in Python works slow comparing to C++, but there are libraries like+ NumPy written on C++, which solve this problem for manipulation with large arrays.

...

Code Block
#!/usr/bin/env python

import h5py
import numpy as np

eventNumber = 5

file    = h5py.File('/reg/d/psdm/XPP/xppcom10/hdf5/xppcom10-r0546.h5', 'r')
dataset = file['/Configure:0000/Run:0000/CalibCycle:0000/Camera::FrameV1/XppSb4Pim.1:Tm6740.1/image']
arr1ev  = dataset[eventNumber]
file.close()

print 'arr1ev.shape =', arr1ev.shape
print 'arr1ev =\n',     arr1ev

Example 2:

...

Extract and print the time variables:

Code Block
#!/usr/bin/env python

import h5py
import time

#-----------------------------------------------------

def print_time(t_sec, t_nsec):
    """Converts seconds in human-readable time and prints formatted time"""

    tloc = time.localtime(t_sec) # converts sec to the tuple struct_time in local
    print 'Input time :',t_sec,'sec,',  t_nsec,'nsec, '
    print 'Local time :', time.strftime('%Y-%m-%d %H:%M:%S',tloc)

#-----------------------------------------------------
file_name = '/reg/d/psdm/xpp/xpp22510/hdf5/xpp22510-r0100.h5'
file = h5py.File(file_name, 'r') # open read-only

print "EXAMPLE: Get time from the group attributes:"

group = file["/Configure:0000"]
t_sec  = group.attrs.values()[0]
t_nsec = group.attrs.values()[1]
print_time(t_sec, t_nsec)

print "EXAMPLE: Get time from the data record 'time':"

dataset = file['/Configure:0000/Run:0000/CalibCycle:0002/Acqiris::DataDescV1/XppLas.0:Acqiris.0/time']
index = 0
time = dataset[ind]
t_sec  = time[0]
t_nsec = time[1]
print_time(t_sec, t_nsec)

f.close()
#----------------------------------------------------

Example 3: Print entire file/group structure using recursive method

Code Block
#!/usr/bin/env python
import h5py

def print_hdf5_file_structure(file_name):
    """Prints the HDF5 file structure"""
    file = h5py.File(file_name, 'r') # open read-only
    item = file #["/Configure:0000/EvrData::ConfigV4"]
    print_hdf5_item_structure(item)
    file.close()
    print '=== EOF ==='

def print_hdf5_item_structure(g,offset='    '):
    """Prints the input file/group/dataset (g) name and begin iterations on its content"""
    print "Structure of the",
    if   isinstance(g,h5py.File):    print "'File'",
    elif isinstance(g,h5py.Group):   print "'Group' from file",
    elif isinstance(g,h5py.Dataset): print "'Dataset' from file",
    print g.file,"\n",g.name
    if   isinstance(g,h5py.Dataset): print offset, "(Dateset)   len =", g.shape #, subg.dtype
    else:                            print_group_content(g,offset)

def print_group_content(g,offset='    '):
    """Prints content of the file/group/dataset iteratively, starting from the sub-groups of g"""
    for key,val in dict(g).iteritems():
        subg = val
        print offset, key, #,"   ", subg.name #, val, subg.len(), type(subg),
        if   isinstance(subg, h5py.Dataset):
            print " (Dateset)   len =", subg.shape #, subg.dtype
        elif isinstance(subg, h5py.Group):
            print " (Group)   len =",len(subg)
            print_group_content(subg,offset + '    ')

if __name__ == "__main__" :
    print_hdf5_file_structure('/reg/d/psdm/XPP/xppcom10/hdf5/xppcom10-r0546.h5')

Example 4: Time-based syncronization of two datasets

Code Block

#!/usr/bin/env python
import os
import sys
import h5py
import numpy as np

class TwoDatasetSynchronization ( object ) :
    """Matching elements of two datasets using their time stamps"""
    def __init__ ( self, file, Xdsname, Ydsname ) :
        """Initialization"""

        self.dsX        = file[Xdsname]
        self.dsY        = file[Ydsname]
        XTimedsname     = get_item_path_to_last_name(Xdsname) + '/time'
        YTimedsname     = get_item_path_to_last_name(Ydsname) + '/time'
        self.dsXT       = file[XTimedsname]
        self.dsYT       = file[YTimedsname]
        self.XTarr      = 0.000000001 * self.dsXT['nanoseconds'] + self.dsXT['seconds']
        self.YTarr      = 0.000000001 * self.dsYT['nanoseconds'] + self.dsYT['seconds']
        self._nXpoints  = self.dsX.shape[0]
        self._nYpoints  = self.dsY.shape[0]
        self._indX      = 0
        self._indY      = 0
        self._tmapXlist = []
        self._tmapYlist = []
        print 'Xdsname     =',Xdsname 
        print 'Ydsname     =',Ydsname 
        print 'XTimedsname =',XTimedsname 
        print 'YTimedsname =',YTimedsname 
        print 'Initialization: datasets X and Y have length =', self._nXpoints, self._nYpoints

    def twoDatasetSynchronizationIterations( self ) :
        """Iteration over time indexes and appending of syncronized arrays."""

        while self._indX < self._nXpoints and self._indY < self._nYpoints :

            if self.XTarr[self._indX] == self.YTarr[self._indY] :   # Time is the same
                self._tmapXlist.append(self.dsX[self._indX])
                self._tmapYlist.append(self.dsY[self._indY])
                self._indX += 1
                self._indY += 1

            elif self.XTarr[self._indX] > self.YTarr[self._indY] :  # Time X > Time Y
                self._indY += 1            
                self.printMissingSynchronization()

            else :                                                  # Time X < Time Y
                self._indX += 1            
                self.printMissingSynchronization()

    def printMissingSynchronization( self ) :
        print 'Missing of syncronization for X,Y indexes ',self._indX,self._indY

    def runSynchronization( self ) :
        """Executes synchronization and makes the references for synchronized arrays."""
        self.twoDatasetSynchronizationIterations()
        self.Xarr = np.array(self._tmapXlist)
        self.Yarr = np.array(self._tmapYlist)
        print 'Number of synchronized in time X and Y array elements =', self.Xarr.shape, self.Yarr.shape

def get_item_path_to_last_name(dsname):
    """Returns the path to the last part of the item name"""
    path,name = os.path.split(str(dsname))
    return path

def main() :
    """EXAMPLE: Time synchronization of two datasets.

    In this example we open the file, which contains normal dataset (Y)
    and the dataset with lost records (X).
    We access these arrays and associated time arrays
    through the class TwoDatasetSynchronization.
    Then we iterate over indexes of these arrays and appends the lists of
    syncronized arrays. Program prints the message in case of missing synchronization.
    """

    file     = h5py.File('/reg/d/psdm/CXI/cxi80410/hdf5/cxi80410-r0730.h5', 'r')
    Xdsname  = '/Configure:0000/Run:0000/CalibCycle:0000/Bld::BldDataFEEGasDetEnergy/NoDetector.0:NoDevice.2/data'
    Ydsname  = '/Configure:0000/Run:0000/CalibCycle:0000/Ipimb::DataV1/CxiDg1.0:Ipimb.0/data'

    synchro  = TwoDatasetSynchronization (file, Xdsname, Ydsname)
    synchro.runSynchronization()

#--------------------------------
if __name__ == "__main__" :
    main()
    sys.exit ( "That's it!" )
#--------------------------------