"""Routine for reading Gadget3 HDF5 output files."""
import fnmatch
import os
import warnings
import h5py
import numpy
with warnings.catch_warnings():
# pandas raises really annoying warning about its own dependencies
# that seems to be impossible to disable
warnings.simplefilter("ignore", DeprecationWarning)
import pandas as pd
__author__ = 'Alan Duffy'
__email__ = 'mail@alanrduffy.com'
__version__ = '0.1.0'
[docs]
def pyread_gadget_hdf5(filename, ptype, var_name, sub_dir=None,\
smooth=None,cgsunits=None,physunits=None,floatunits=None,\
silent=None,nopanda=None,noloop=None,leaveh=None):
"""
+
NAME:
PYREAD_GADGET_HDF5
PURPOSE:
This function reads in HDF5 outputs (snapshots, FOF and SubFind files) of the OWLS Gadget3 code
CATEGORY:
I/O, HDF5, OWLS / PAISTE :)
REQUIREMENTS:
import pandas as pd
import h5py
import numpy
import fnmatch
import os
CALLING SEQUENCE:
from pyread_gadget_hdf5 import *
Result = pyread_gadget_hdf5(filename, ptype, var_name
[, sub_dir=sub_dir, smooth=True, cgsunits=True, physunits=True,
floatunits=True, silent=True, nopanda=True, noloop=True,
leaveh=True] )
INPUTS:
filename: Name of the file to read in. In case of a multiple files
output, any sub-file name can be given. The routine will
take care of reading data from each sub-file.
ptype: Defines either the particle type or the sub-set for
which the data has to be read in (see table below).
sub_dir: Define the group (/FOF or /SUBFIND) from which
particle and/or subhaloes properties are read-in
from (see table below).
ptype | snapshot | Old FOF
------+------------------------------+-------------
0 | gas particles | FOF/
1 | dark matter particles | FOF/NSF
2 | extra collisionless parts | FOF/SF
3 | extra collisionless parts | FOF/Stars
4 | star particles | -
5 | extra collisionless parts | -
6 | - | -
7 | virtual particles (TRAPHIC) | -
8 | - | -
9 | - | -
10 | - | -
ptype | SubFind / sub_dir='FOF' | SubFind / sub_dir='SUBFIND'
------+------------------------------+-----------------------------
0 | gas particles | gas particles
1 | dark matter particles | dark matter particles
2 | extra collisionless parts | extra collisionless parts
3 | extra collisionless parts | extra collisionless parts
4 | star particles | star particles
5 | extra collisionless parts | extra collisionless parts
6 | - | -
7 | - | -
8 | - | -
9 | - | -
10 | FOF | SUBFIND
11 | FOF/NSF | SUBFIND/NSF
12 | FOF/SF | SUBFIND/SF
13 | FOF/Stars | SUBFIND/Stars
var_name: Dataset to read in.
OPTIONAL INPUTS:
sub_dir: Kind of data group to read:
'FOF' - friends-of-friends group file
'SUBFIND' - subfind group file
KEYWORD PARAMETERS (set to True)
floatunits: returns quantities in single precision
cgsunits: Converts from Gadget code units to CGS
physunits: Converts from Gadget code units to Proper (with h-> h_73)
and a small change with 10^10 Msol to Msol, and ENTROPY!
silent: does not print any message on the screen
smooth: reads SPH smoothed quantities
noloop: limit the read in to only the file selected
nopanda: convert from PANDAS dataframe to Numpy array (you still need Pandas to run!)
leaveh: set to true to leave 'litte h' values as code 'h' dependency, will have no effect
unless cgsunits or physunits selected in which case it overrides them regarding h
OUTPUTS:
An array (either 1D or 2D depending on variable) in PANDAS dataframe format
or Numpy style, if nopanda=True set... but you shouldn't as PANDAS is fast.
RESTRICTIONS:
PROCEDURE:
EXAMPLE:
Reading star positions from a snapshot file in CGS units:
pos = pyread_gadget_hdf5('snap_035.0.hdf5', 4, 'Coordinates', cgsunits=True)
and in phys units (proper h_73^-1 Mpc):
pos = pyread_gadget_hdf5('snap_035.0.hdf5', 4, 'Coordinates', physunits=True)
and in code units:
pos = pyread_gadget_hdf5('snap_035.0.hdf5', 4, 'Coordinates')
Reading of FOF data from a SubFind output:
pos = pyread_gadget_hdf5('subhalo_035.0.hdf5', 10, 'CenterOfMass', sub_dir='fof')
Reading of FOF gas particle data from a SubFind output:
FeIa= pyread_gadget_hdf5('subhalo_035.0.hdf5', 0, 'IronFromSNIa', sub_dir='fof')
Reading of SPH Smoothed FOF gas particle data from a SubFind output:
SmoothFeIa = pyread_gadget_hdf5('subhalo_035.0.hdf5', 0, 'IronFromSNIa', sub_dir='fof', smooth=True)
Reading of SUBFIND non-star-forming gas data from a SubFind output:
pos = pyread_gadget_hdf5('subhalo_035.0.hdf5', 11, 'Temperature', sub_dir='subfind')
MODIFICATION HISTORY (by Alan Duffy):
1/09/13 Converted Claudio Dalla Vecchia's superlative IDL script nread_gadget_hdf5.pro
5/09/13 Removed ' input ' as an option in the script
5/09/13 Removed ' noconversion ', default is to leave in Code units, i.e. don't convert
5/09/13 Added ' cgsunits ' as the option to convert variables entirely to CGS units
(the default switch in the IDL version)
5/09/13 Added ' physunits ' as an option to convert variables to proper units with h-> h_73
as well as masses to Msol instead of 10^10, and MaximumEntropy in particular changes
6/09/13 Added ' noloop ' as an option to force a read in from one file alone
6/09/13 Changed header information to match flake8 standard
25/09/13 Added ' leaveh ' as an option to make units remain in code 'h' dependency
08/01/14 Fixed bug on looping over subfiles
08/01/14 Force C-Contiguous array
Any issues please contact Alan Duffy on mail@alanrduffy.com or (preferred) twitter @astroduff
"""
#########################################################
##
## Check user choices...
##
#########################################################
if cgsunits is not None and physunits is not None:
print("[ERROR] Can't convert units to both CGS and PHYS, as CGS includes PHYS, assuming CGS ")
physunits is None
if silent is None:
print("User gave "+filename)
## Determine the file type, i.e. SubFind, Snapshot or old FOF.
if filename.rfind('/subhalo') >= 0:
inputtype = 'SubFind' ## Define File type
inputname = 'subhalo'
elif filename.rfind('/group') >= 0:
inputtype = 'FoF' ## Define File type
inputname = 'group'
elif filename.rfind('/snap') >= 0:
inputtype = 'Snapshot' ## Define File type
inputname = 'snap'
if silent is None:
print("This is a "+inputtype+" output filetype")
## Check that the file given and the ptype used makes sense
if ptype < 0 or \
inputtype == 'Snapshot' and ptype > 7 or \
inputtype == 'FoF' and ptype > 3 or \
inputtype == 'SubFind' and ptype > 13:
print('[ERROR] ptype = '+str(ptype)+' is not allowed with this input file type ('+input+')')
return -1
if ptype < 6:
if inputtype == 'SubFind' and sub_dir is None:
print('[ERROR] ptype = '+str(ptype)+' is not allowed without specifying sub_dir flag')
return -2
#########################################################
##
## Determine if, and how many, subfiles we must loop over
##
#########################################################
folder_index = filename.rfind(inputname)
snapnum_index = filename[:folder_index-1].rfind('_')
snapnum = int(filename[snapnum_index+1:folder_index-1])
numfile = len(fnmatch.filter(os.listdir(filename[:folder_index]), '*.hdf5'))
if noloop is not None:
numfile = 1 ## Force the code to only consider the input file
if silent is None:
print("Considering "+str(numfile)+" subfiles")
#########################################################
##
## Read in common header parameters
##
#########################################################
with h5py.File(filename, "r") as fin:
hubble = fin['Header'].attrs['HubbleParam']
aexp = fin['Header'].attrs['ExpansionFactor']
if numfile == 0: ## Stupid bug with one file subfind outputs, the total array isn't created.
numpart_total = fin['Header'].attrs['NumPart_Total']
else: ## instead must read the case for the single file 'ThisFile'
numpart_total = fin['Header'].attrs['NumPart_ThisFile']
if ptype < 6:
if numpart_total[ptype] == 0:
print("There are no particles of type "+str(ptype)+" quitting")
return -3
masstable = fin['Header'].attrs['MassTable']
#########################################################
##
## Read in unit conversion
##
#########################################################
if cgsunits is not None or physunits is not None:
gamma = fin['Constants'].attrs['GAMMA']
solar_mass = fin['Constants'].attrs['SOLAR_MASS'] ## [g]
boltzmann = fin['Constants'].attrs['BOLTZMANN'] ## [erg/K]
protonmass = fin['Constants'].attrs['PROTONMASS'] ## [g]
sec_per_year = fin['Constants'].attrs['SEC_PER_YEAR']
if silent is None:
print("Note h="+("%.3f" % hubble)+" in this simulation"+\
" and snapshot is at a="+("%.3f" % aexp)+\
" (z="+("%.3f" % (1./aexp-1.))+")")
#########################################################
##
## Read in Chemical Parameters, i.e. Element Names, if present
##
#########################################################
e = 'Parameters/ChemicalElements' in fin
if e is True:
nelements = fin['Parameters/ChemicalElements'].attrs['BG_NELEMENTS']
elementnames = fin['Parameters/ChemicalElements'].attrs['ElementNames']
if silent is None:
print("Number of Elements Tracked "+str(nelements)+", they are")
for elementtype in elementnames:
print(elementtype)
#########################################################
##
## Assuming the user has given us the correct information
## let's work out what array to access
## If particle type is less than 6 (more than 10) we want
## the actual particle values, otherwise 10 is the group variables
## and
##
## Also checks the case for smooth_element variables!
##
#########################################################
## For the case of a Baryon run, check if the user is asking for an element
## AND if that's a smoothed quantity then change their request slightly...
if numpart_total[0] > 0 or numpart_total[4] > 0 or numpart_total[5] > 0:
if var_name in list(elementnames):
var_name = 'ElementAbundance/'+var_name
if smooth is not None:
var_name = 'Smoothed'+var_name
if ptype < 6:
global_var_name = '/'+'PartType'+str(ptype)+'/'+var_name+'/'
elif ptype == 10:
global_var_name = '/'+var_name+'/'
elif ptype == 11:
global_var_name = '/NSF/'+var_name+'/'
elif ptype == 12:
global_var_name = '/SF/'+var_name+'/'
elif ptype == 13:
global_var_name = '/Stars/'+var_name+'/'
if sub_dir is not None:
global_var_name = sub_dir.upper()+global_var_name
if silent is None:
print("Now select the requested variable "+global_var_name)
#########################################################
##
## Now loop over the subfiles and concatenate arrays
##
#########################################################
prefix_index = filename.rfind('.hdf5') ## First HDF5
for ifile in range(0,numfile):
if noloop is not None:
newinfile = filename
else:
newinfile = filename[0:folder_index] + inputname +'_'+str(snapnum).zfill(3)+'.'+str(ifile)+filename[prefix_index:]
if silent is None:
print("Reading from "+newinfile)
with h5py.File(newinfile, "r") as fin:
## If the dataset is present in the file then read it into tmpset, and if dataset exists concatenate them
e = global_var_name in fin
if e is True:
tmpset = pd.DataFrame(fin[global_var_name][()]) ## Let HDF5 metadata dictate array format
try:
dataset
except NameError:
dataset = tmpset
## Grab the conversion factors while we find this dataset for the first time
if cgsunits is not None or physunits is not None:
cgsconvfactor = fin[global_var_name].attrs['CGSConversionFactor']
aexpscaleexponent = fin[global_var_name].attrs['aexp-scale-exponent']
hscaleexponent = fin[global_var_name].attrs['h-scale-exponent']
else:
dataset = pd.concat([dataset,tmpset],ignore_index=True)
try:
dataset
except NameError:
if var_name == 'Mass' and inputtype == 'Snapshot':
if ptype == 1 or ptype == 2 or ptype == 3:
## Have to hardwire in the case for collisionless particles, which all have the same mass,
## as given by the masstable, and a number of particles given by Header numppart_total
## Assume that the standard Gadget units are used.
if cgsunits is not None or physunits is not None:
cgsconvfactor=1.989e43
aexpscaleexponent=0.0
hscaleexponent=-1.0
dataset = pd.DataFrame(numpy.ones(numpart_total[ptype])*masstable[ptype], dtype='f8')
else:
print("Are you reading in a strange particle type mass? ")
return -4
else:
print("There is no dataset... something went wrong reading this! ")
return -5
#########################################################
##
## Horribe Bug in the Code which has the FOF CoM wrong scaling
##
#########################################################
if cgsunits is not None or physunits is not None:
if global_var_name == 'FOF/CenterOfMassVelocity/' and abs(aexpscaleexponent-0.5) < 0.01:
print("OWLS SubFind had a bug which stated FoF CenterOfMassVelocity had aexp of 0.5, in reality it was -1.0 ")
aexpscaleexponent = -1.0
#########################################################
##
## Convert the array to cgs units, including the factors
## for 'a' and 'h' dependencies, else output in code units
##
#########################################################
if cgsunits is not None:
if leaveh is not None:
convfactor = aexp**aexpscaleexponent * cgsconvfactor
else:
convfactor = aexp**aexpscaleexponent * hubble**hscaleexponent * cgsconvfactor
if physunits is not None:
if leaveh is not None:
convfactor = aexp**aexpscaleexponent
else:
convfactor = aexp**aexpscaleexponent * hubble**hscaleexponent
if var_name == 'Mass' or var_name == 'MassType' or var_name == 'InitialMass' or var_name == 'Halo_M_TopHat200' or \
var_name == 'Halo_M_Crit200' or var_name == 'Halo_M_Crit500' or var_name == 'Halo_M_Crit2500' or \
var_name == 'Halo_M_Mean200' or var_name == 'Halo_M_Mean500' or var_name == 'Halo_M_Mean2500':
convfactor *= cgsconvfactor/solar_mass
if var_name == 'StarFormationRate':
convfactor *= cgsconvfactor/(solar_mass/sec_per_year)
if var_name == 'StellarAge':
convfactor *= cgsconvfactor/sec_per_year
if var_name == 'MaximumEntropy':
convfactor *= cgsconvfactor*protonmass**gamma / boltzmann
if cgsunits is not None or physunits is not None:
dataset *= convfactor
if silent is None:
if cgsunits is not None:
print("Converting units to CGS")
if physunits is not None:
print("Converting units to Proper Normalised Units ")
print("Scale Factor dependency "+str(aexpscaleexponent))
if leaveh is not None:
print("User asked to leave little h unchanged! ")
else:
print("little h dependency "+str(hscaleexponent))
print("Overall conversion factor "+str(convfactor))
else:
if silent is None:
print("No conversion - still in Code units")
#########################################################
##
## Convert the 1D array (nadded*3) to 2D matrix (nadded,3)
## Except for particle type arrays which are (nadded,6)
##
#########################################################
ncols = 1
if var_name == 'Coordinates' or var_name == 'Velocity' or var_name == 'CenterOfMass' \
or var_name == 'CenterOfMassVelocity' or var_name == 'Position' or var_name == 'SubSpin' \
or var_name == 'GasSpin' or var_name == 'StarSpin' or var_name == 'SFSpin' \
or var_name == 'NSFSpin':
if inputtype != 'Snapshot':
ncols = 3
if var_name == 'LengthType' or var_name == 'MassType' or var_name == 'OffsetType' \
or var_name == 'SubHalfMassProj' or var_name == 'SubHalfMass':
if inputtype != 'Snapshot':
ncols = 6
if ncols > 1:
nadded = len(dataset.index) // ncols
dataset = pd.DataFrame(dataset.values.reshape((nadded,ncols)))
if floatunits is not None:
dataset = dataset.astype('f4')## Make the units float32
if silent is None:
print("Done ")
## User wants to change from Dataframe to numpy format
if nopanda is not None:
dataset = numpy.ascontiguousarray(dataset)
return dataset