Source code for pynbody.test_utils.pyread_gadget_hdf5

"""Routine for reading Gadget3 HDF5 output files."""

import fnmatch
import os
import warnings

import h5py
import numpy

with warnings.catch_warnings():
    # pandas raises really annoying warning about its own dependencies
    # that seems to be impossible to disable
    warnings.simplefilter("ignore", DeprecationWarning)
    import pandas as pd


__author__ = 'Alan Duffy'
__email__ = 'mail@alanrduffy.com'
__version__ = '0.1.0'


[docs] def pyread_gadget_hdf5(filename, ptype, var_name, sub_dir=None,\ smooth=None,cgsunits=None,physunits=None,floatunits=None,\ silent=None,nopanda=None,noloop=None,leaveh=None): """ + NAME: PYREAD_GADGET_HDF5 PURPOSE: This function reads in HDF5 outputs (snapshots, FOF and SubFind files) of the OWLS Gadget3 code CATEGORY: I/O, HDF5, OWLS / PAISTE :) REQUIREMENTS: import pandas as pd import h5py import numpy import fnmatch import os CALLING SEQUENCE: from pyread_gadget_hdf5 import * Result = pyread_gadget_hdf5(filename, ptype, var_name [, sub_dir=sub_dir, smooth=True, cgsunits=True, physunits=True, floatunits=True, silent=True, nopanda=True, noloop=True, leaveh=True] ) INPUTS: filename: Name of the file to read in. In case of a multiple files output, any sub-file name can be given. The routine will take care of reading data from each sub-file. ptype: Defines either the particle type or the sub-set for which the data has to be read in (see table below). sub_dir: Define the group (/FOF or /SUBFIND) from which particle and/or subhaloes properties are read-in from (see table below). ptype | snapshot | Old FOF ------+------------------------------+------------- 0 | gas particles | FOF/ 1 | dark matter particles | FOF/NSF 2 | extra collisionless parts | FOF/SF 3 | extra collisionless parts | FOF/Stars 4 | star particles | - 5 | extra collisionless parts | - 6 | - | - 7 | virtual particles (TRAPHIC) | - 8 | - | - 9 | - | - 10 | - | - ptype | SubFind / sub_dir='FOF' | SubFind / sub_dir='SUBFIND' ------+------------------------------+----------------------------- 0 | gas particles | gas particles 1 | dark matter particles | dark matter particles 2 | extra collisionless parts | extra collisionless parts 3 | extra collisionless parts | extra collisionless parts 4 | star particles | star particles 5 | extra collisionless parts | extra collisionless parts 6 | - | - 7 | - | - 8 | - | - 9 | - | - 10 | FOF | SUBFIND 11 | FOF/NSF | SUBFIND/NSF 12 | FOF/SF | SUBFIND/SF 13 | FOF/Stars | SUBFIND/Stars var_name: Dataset to read in. OPTIONAL INPUTS: sub_dir: Kind of data group to read: 'FOF' - friends-of-friends group file 'SUBFIND' - subfind group file KEYWORD PARAMETERS (set to True) floatunits: returns quantities in single precision cgsunits: Converts from Gadget code units to CGS physunits: Converts from Gadget code units to Proper (with h-> h_73) and a small change with 10^10 Msol to Msol, and ENTROPY! silent: does not print any message on the screen smooth: reads SPH smoothed quantities noloop: limit the read in to only the file selected nopanda: convert from PANDAS dataframe to Numpy array (you still need Pandas to run!) leaveh: set to true to leave 'litte h' values as code 'h' dependency, will have no effect unless cgsunits or physunits selected in which case it overrides them regarding h OUTPUTS: An array (either 1D or 2D depending on variable) in PANDAS dataframe format or Numpy style, if nopanda=True set... but you shouldn't as PANDAS is fast. RESTRICTIONS: PROCEDURE: EXAMPLE: Reading star positions from a snapshot file in CGS units: pos = pyread_gadget_hdf5('snap_035.0.hdf5', 4, 'Coordinates', cgsunits=True) and in phys units (proper h_73^-1 Mpc): pos = pyread_gadget_hdf5('snap_035.0.hdf5', 4, 'Coordinates', physunits=True) and in code units: pos = pyread_gadget_hdf5('snap_035.0.hdf5', 4, 'Coordinates') Reading of FOF data from a SubFind output: pos = pyread_gadget_hdf5('subhalo_035.0.hdf5', 10, 'CenterOfMass', sub_dir='fof') Reading of FOF gas particle data from a SubFind output: FeIa= pyread_gadget_hdf5('subhalo_035.0.hdf5', 0, 'IronFromSNIa', sub_dir='fof') Reading of SPH Smoothed FOF gas particle data from a SubFind output: SmoothFeIa = pyread_gadget_hdf5('subhalo_035.0.hdf5', 0, 'IronFromSNIa', sub_dir='fof', smooth=True) Reading of SUBFIND non-star-forming gas data from a SubFind output: pos = pyread_gadget_hdf5('subhalo_035.0.hdf5', 11, 'Temperature', sub_dir='subfind') MODIFICATION HISTORY (by Alan Duffy): 1/09/13 Converted Claudio Dalla Vecchia's superlative IDL script nread_gadget_hdf5.pro 5/09/13 Removed ' input ' as an option in the script 5/09/13 Removed ' noconversion ', default is to leave in Code units, i.e. don't convert 5/09/13 Added ' cgsunits ' as the option to convert variables entirely to CGS units (the default switch in the IDL version) 5/09/13 Added ' physunits ' as an option to convert variables to proper units with h-> h_73 as well as masses to Msol instead of 10^10, and MaximumEntropy in particular changes 6/09/13 Added ' noloop ' as an option to force a read in from one file alone 6/09/13 Changed header information to match flake8 standard 25/09/13 Added ' leaveh ' as an option to make units remain in code 'h' dependency 08/01/14 Fixed bug on looping over subfiles 08/01/14 Force C-Contiguous array Any issues please contact Alan Duffy on mail@alanrduffy.com or (preferred) twitter @astroduff """ ######################################################### ## ## Check user choices... ## ######################################################### if cgsunits is not None and physunits is not None: print("[ERROR] Can't convert units to both CGS and PHYS, as CGS includes PHYS, assuming CGS ") physunits is None if silent is None: print("User gave "+filename) ## Determine the file type, i.e. SubFind, Snapshot or old FOF. if filename.rfind('/subhalo') >= 0: inputtype = 'SubFind' ## Define File type inputname = 'subhalo' elif filename.rfind('/group') >= 0: inputtype = 'FoF' ## Define File type inputname = 'group' elif filename.rfind('/snap') >= 0: inputtype = 'Snapshot' ## Define File type inputname = 'snap' if silent is None: print("This is a "+inputtype+" output filetype") ## Check that the file given and the ptype used makes sense if ptype < 0 or \ inputtype == 'Snapshot' and ptype > 7 or \ inputtype == 'FoF' and ptype > 3 or \ inputtype == 'SubFind' and ptype > 13: print('[ERROR] ptype = '+str(ptype)+' is not allowed with this input file type ('+input+')') return -1 if ptype < 6: if inputtype == 'SubFind' and sub_dir is None: print('[ERROR] ptype = '+str(ptype)+' is not allowed without specifying sub_dir flag') return -2 ######################################################### ## ## Determine if, and how many, subfiles we must loop over ## ######################################################### folder_index = filename.rfind(inputname) snapnum_index = filename[:folder_index-1].rfind('_') snapnum = int(filename[snapnum_index+1:folder_index-1]) numfile = len(fnmatch.filter(os.listdir(filename[:folder_index]), '*.hdf5')) if noloop is not None: numfile = 1 ## Force the code to only consider the input file if silent is None: print("Considering "+str(numfile)+" subfiles") ######################################################### ## ## Read in common header parameters ## ######################################################### with h5py.File(filename, "r") as fin: hubble = fin['Header'].attrs['HubbleParam'] aexp = fin['Header'].attrs['ExpansionFactor'] if numfile == 0: ## Stupid bug with one file subfind outputs, the total array isn't created. numpart_total = fin['Header'].attrs['NumPart_Total'] else: ## instead must read the case for the single file 'ThisFile' numpart_total = fin['Header'].attrs['NumPart_ThisFile'] if ptype < 6: if numpart_total[ptype] == 0: print("There are no particles of type "+str(ptype)+" quitting") return -3 masstable = fin['Header'].attrs['MassTable'] ######################################################### ## ## Read in unit conversion ## ######################################################### if cgsunits is not None or physunits is not None: gamma = fin['Constants'].attrs['GAMMA'] solar_mass = fin['Constants'].attrs['SOLAR_MASS'] ## [g] boltzmann = fin['Constants'].attrs['BOLTZMANN'] ## [erg/K] protonmass = fin['Constants'].attrs['PROTONMASS'] ## [g] sec_per_year = fin['Constants'].attrs['SEC_PER_YEAR'] if silent is None: print("Note h="+("%.3f" % hubble)+" in this simulation"+\ " and snapshot is at a="+("%.3f" % aexp)+\ " (z="+("%.3f" % (1./aexp-1.))+")") ######################################################### ## ## Read in Chemical Parameters, i.e. Element Names, if present ## ######################################################### e = 'Parameters/ChemicalElements' in fin if e is True: nelements = fin['Parameters/ChemicalElements'].attrs['BG_NELEMENTS'] elementnames = fin['Parameters/ChemicalElements'].attrs['ElementNames'] if silent is None: print("Number of Elements Tracked "+str(nelements)+", they are") for elementtype in elementnames: print(elementtype) ######################################################### ## ## Assuming the user has given us the correct information ## let's work out what array to access ## If particle type is less than 6 (more than 10) we want ## the actual particle values, otherwise 10 is the group variables ## and ## ## Also checks the case for smooth_element variables! ## ######################################################### ## For the case of a Baryon run, check if the user is asking for an element ## AND if that's a smoothed quantity then change their request slightly... if numpart_total[0] > 0 or numpart_total[4] > 0 or numpart_total[5] > 0: if var_name in list(elementnames): var_name = 'ElementAbundance/'+var_name if smooth is not None: var_name = 'Smoothed'+var_name if ptype < 6: global_var_name = '/'+'PartType'+str(ptype)+'/'+var_name+'/' elif ptype == 10: global_var_name = '/'+var_name+'/' elif ptype == 11: global_var_name = '/NSF/'+var_name+'/' elif ptype == 12: global_var_name = '/SF/'+var_name+'/' elif ptype == 13: global_var_name = '/Stars/'+var_name+'/' if sub_dir is not None: global_var_name = sub_dir.upper()+global_var_name if silent is None: print("Now select the requested variable "+global_var_name) ######################################################### ## ## Now loop over the subfiles and concatenate arrays ## ######################################################### prefix_index = filename.rfind('.hdf5') ## First HDF5 for ifile in range(0,numfile): if noloop is not None: newinfile = filename else: newinfile = filename[0:folder_index] + inputname +'_'+str(snapnum).zfill(3)+'.'+str(ifile)+filename[prefix_index:] if silent is None: print("Reading from "+newinfile) with h5py.File(newinfile, "r") as fin: ## If the dataset is present in the file then read it into tmpset, and if dataset exists concatenate them e = global_var_name in fin if e is True: tmpset = pd.DataFrame(fin[global_var_name][()]) ## Let HDF5 metadata dictate array format try: dataset except NameError: dataset = tmpset ## Grab the conversion factors while we find this dataset for the first time if cgsunits is not None or physunits is not None: cgsconvfactor = fin[global_var_name].attrs['CGSConversionFactor'] aexpscaleexponent = fin[global_var_name].attrs['aexp-scale-exponent'] hscaleexponent = fin[global_var_name].attrs['h-scale-exponent'] else: dataset = pd.concat([dataset,tmpset],ignore_index=True) try: dataset except NameError: if var_name == 'Mass' and inputtype == 'Snapshot': if ptype == 1 or ptype == 2 or ptype == 3: ## Have to hardwire in the case for collisionless particles, which all have the same mass, ## as given by the masstable, and a number of particles given by Header numppart_total ## Assume that the standard Gadget units are used. if cgsunits is not None or physunits is not None: cgsconvfactor=1.989e43 aexpscaleexponent=0.0 hscaleexponent=-1.0 dataset = pd.DataFrame(numpy.ones(numpart_total[ptype])*masstable[ptype], dtype='f8') else: print("Are you reading in a strange particle type mass? ") return -4 else: print("There is no dataset... something went wrong reading this! ") return -5 ######################################################### ## ## Horribe Bug in the Code which has the FOF CoM wrong scaling ## ######################################################### if cgsunits is not None or physunits is not None: if global_var_name == 'FOF/CenterOfMassVelocity/' and abs(aexpscaleexponent-0.5) < 0.01: print("OWLS SubFind had a bug which stated FoF CenterOfMassVelocity had aexp of 0.5, in reality it was -1.0 ") aexpscaleexponent = -1.0 ######################################################### ## ## Convert the array to cgs units, including the factors ## for 'a' and 'h' dependencies, else output in code units ## ######################################################### if cgsunits is not None: if leaveh is not None: convfactor = aexp**aexpscaleexponent * cgsconvfactor else: convfactor = aexp**aexpscaleexponent * hubble**hscaleexponent * cgsconvfactor if physunits is not None: if leaveh is not None: convfactor = aexp**aexpscaleexponent else: convfactor = aexp**aexpscaleexponent * hubble**hscaleexponent if var_name == 'Mass' or var_name == 'MassType' or var_name == 'InitialMass' or var_name == 'Halo_M_TopHat200' or \ var_name == 'Halo_M_Crit200' or var_name == 'Halo_M_Crit500' or var_name == 'Halo_M_Crit2500' or \ var_name == 'Halo_M_Mean200' or var_name == 'Halo_M_Mean500' or var_name == 'Halo_M_Mean2500': convfactor *= cgsconvfactor/solar_mass if var_name == 'StarFormationRate': convfactor *= cgsconvfactor/(solar_mass/sec_per_year) if var_name == 'StellarAge': convfactor *= cgsconvfactor/sec_per_year if var_name == 'MaximumEntropy': convfactor *= cgsconvfactor*protonmass**gamma / boltzmann if cgsunits is not None or physunits is not None: dataset *= convfactor if silent is None: if cgsunits is not None: print("Converting units to CGS") if physunits is not None: print("Converting units to Proper Normalised Units ") print("Scale Factor dependency "+str(aexpscaleexponent)) if leaveh is not None: print("User asked to leave little h unchanged! ") else: print("little h dependency "+str(hscaleexponent)) print("Overall conversion factor "+str(convfactor)) else: if silent is None: print("No conversion - still in Code units") ######################################################### ## ## Convert the 1D array (nadded*3) to 2D matrix (nadded,3) ## Except for particle type arrays which are (nadded,6) ## ######################################################### ncols = 1 if var_name == 'Coordinates' or var_name == 'Velocity' or var_name == 'CenterOfMass' \ or var_name == 'CenterOfMassVelocity' or var_name == 'Position' or var_name == 'SubSpin' \ or var_name == 'GasSpin' or var_name == 'StarSpin' or var_name == 'SFSpin' \ or var_name == 'NSFSpin': if inputtype != 'Snapshot': ncols = 3 if var_name == 'LengthType' or var_name == 'MassType' or var_name == 'OffsetType' \ or var_name == 'SubHalfMassProj' or var_name == 'SubHalfMass': if inputtype != 'Snapshot': ncols = 6 if ncols > 1: nadded = len(dataset.index) // ncols dataset = pd.DataFrame(dataset.values.reshape((nadded,ncols))) if floatunits is not None: dataset = dataset.astype('f4')## Make the units float32 if silent is None: print("Done ") ## User wants to change from Dataframe to numpy format if nopanda is not None: dataset = numpy.ascontiguousarray(dataset) return dataset