"""Support for the SubFind halo finder"""
from __future__ import annotations
import os.path
import warnings
import numpy as np
from .. import units
from ..array import SimArray
from . import HaloCatalogue
from .details import number_mapping, particle_indices
from .subhalo_catalogue import SubhaloCatalogue
[docs]
class SubfindCatalogue(HaloCatalogue):
"""Handles catalogues produced by the SubFind halo finder (old versions that do not use HDF5 outputs)."""
[docs]
def __init__(self, sim, filename=None, subs=None, subhalos=False, ordered=None, _inherit_data_from=None):
"""Initialise a SubFind catalogue
By default, the FoF groups are imported, and subhalos are available via the 'subhalos' attribute of each
halo object, e.g.
>>> snap = pynbody.load('path/to/snapshot')
>>> halos = snap.halos()
>>> halos[1].subhalos[2] # returns the third subhalo of FoF group 1
However by setting ``subs=True``, the FoF groups are ignored and the catalogue is of all subhalos.
Parameters
----------
sim : ~pynbody.snapshot.simsnap.SimSnap
The simulation snapshot to which this catalogue applies.
filename : str, optional
The path to the SubFind output(s). This is expected to be a folder named ``groups_XXX`` where XXX is the
snapshot number. The code extracts the snapshot number from the folder name and uses it to construct
the filename of the catalogue files, for example ``groups_XXX/subhalo_tab_XXX.0``. If no filename is
provided, the code will attempt to find a suitable catalogue starting from the simulation's directory.
subhalos : bool, optional
If False (default), catalogue represents the FoF groups and subhalos are available through the
:meth:`~pynbody.halo.Halo.subhalos` attribute of each group (see note above). If True, the catalogue
represents the subhalos directly and FoF groups are not available.
ordered : bool, optional
If True, the snapshot must have sequential iords. If False, the iords might be out-of-sequence, and
therefore reordering will take place. If None (default), the iords are examined to check whether
re-ordering is required or not. Note that re-ordering can be undesirable because it also destroys
subfind's order-by-binding-energy
_inherit_data_from : SubfindCatalogue, optional
For internal use only; allows subhalo catalogue to share data with its parent FOF catalogue
"""
if subs is not None:
warnings.warn("The 'subs' argument to SubfindCatalogue is deprecated; use 'subhalos' instead",
DeprecationWarning)
subhalos = subs
self._subs=subhalos
if ordered is not None:
self._ordered = ordered
else:
self._ordered = bool((sim['iord']==np.arange(len(sim))).all())
self._halos = {}
self.dtype_int = sim['iord'].dtype
self.dtype_flt = 'float32' #SUBFIND data is apparently always single precision???
self._subfind_dir = filename or self._name_of_catalogue(sim)
self.header = self._read_header()
self._tasks = self.header[4]
if _inherit_data_from:
self._inherit_data(_inherit_data_from)
else:
self._read_data(sim)
if subhalos:
if self.header[6]==0:
raise ValueError("This file does not contain subhalos")
length = len(self._subhalodat['sub_off'])
else:
length = len(self._halodat['group_off'])
super().__init__(sim, number_mapping.SimpleHaloNumberMapper(0, length))
if not subhalos:
self._subhalo_catalogue = SubfindCatalogue(sim, subhalos=True, filename=filename,
ordered=self._ordered, _inherit_data_from=self)
def _inherit_data(self, from_):
inherit = ['_halodat', '_subhalodat', '_keys_halo', '_keys_subhalo']
for k in inherit:
setattr(self, k, getattr(from_, k))
def _get_all_particle_indices(self):
ids = self._read_ids()
boundaries = np.empty((len(self), 2), dtype=self.dtype_int)
if self._subs:
boundaries[:, 0] = self._subhalodat['sub_off']
boundaries[:, 1] = self._subhalodat['sub_off'] + self._subhalodat['sub_len']
else:
boundaries[:, 0] = self._halodat['group_off']
boundaries[:, 1] = self._halodat['group_off'] + self._halodat['group_len']
if not self._ordered:
self._init_iord_to_fpos()
for a, b in boundaries:
ids[a:b] = self._iord_to_fpos.map_ignoring_order(ids[a:b])
# must be done segmented in case iord_to_fpos doesn't preserve input order
return particle_indices.HaloParticleIndices(ids, boundaries)
[docs]
def get_properties_one_halo(self, i):
extract = units.get_item_with_unit
properties = {}
if self._subs is False:
for key in self._keys_halo:
properties[key] = extract(self._halodat[key], i)
properties['children'] = self._get_children_of_group(i)
else:
for key in self._keys_subhalo:
properties[key] = extract(self._subhalodat[key], i)
properties['children'] = self._get_children_of_sub(i)
return properties
def _get_children_of_group(self, group_nr):
if self.header[6] > 0:
return np.where(self._subhalodat['sub_groupNr'] == group_nr)[0]
else:
return []
def _get_children_of_sub(self, subhalo_nr):
# this goes down one level in the hierarchy, i.e. a subhalo will have all its sub-subhalos listed,
# but not its sub-sub-subhalos (those will be listed in each sub-subhalo)
halo_number_within_group = (self._subhalodat['sub_groupNr'][:subhalo_nr]
== self._subhalodat['sub_groupNr'][subhalo_nr]).sum()
return np.where((self._subhalodat['sub_parent'] == halo_number_within_group) &
(self._subhalodat['sub_groupNr'] == self._subhalodat['sub_groupNr'][subhalo_nr]))[0]
[docs]
def get_properties_all_halos(self, with_units=True) -> dict:
properties = {}
if self._subs:
data = self._subhalodat
properties['children'] = [self._get_children_of_sub(subhalo_nr)
for subhalo_nr in self.number_mapper.all_numbers]
else:
data = self._halodat
if self.header[6] > 0:
properties['children'] = [self._get_children_of_group(group_nr)
for group_nr in self.number_mapper.all_numbers]
else:
properties['children'] = []
keys = self._keys_halo if not self._subs else self._keys_subhalo
for key in keys:
if with_units:
properties[key] = data[key]
else:
properties[key] = data[key].view(np.ndarray)
return properties
def _read_header(self):
iout = self._subfind_dir.split("_")[-1]
filename = os.path.join(
self._subfind_dir,
f"subhalo_tab_{iout}.0"
)
with open(filename, "rb") as fd:
# read header: this is strange but it works: there is an extra value in
# header which we delete in the next step
header = np.fromfile(fd, dtype='int32', sep="", count=8)
header = np.delete(header, 4)
return header
def _get_subhalo_catalogue(self, parent_halo_number):
if self._subs:
return SubhaloCatalogue(self, self._get_children_of_sub(parent_halo_number))
else:
return SubhaloCatalogue(self._subhalo_catalogue,
self._get_children_of_group(parent_halo_number))
def _read_ids(self):
data_ids = np.array([], dtype=self.dtype_int)
iout = self._subfind_dir.split("_")[-1]
for n in range(0, self._tasks):
filename = os.path.join(
self._subfind_dir,
f"subhalo_ids_{iout}.{n}"
)
fd = open(filename, "rb")
# for some reason there is an extra value in header which we delete
# in the next step
header1 = np.fromfile(fd, dtype='int32', sep="", count=7)
header = np.delete(header1, 4)
# TODO: include a check if both headers agree (they better)
ids = np.fromfile(fd, dtype=self.dtype_int, sep="", count=-1)
fd.close()
data_ids = np.append(data_ids, ids)
return data_ids
def _read_data(self, sim):
halodat={}
keys_flt=['mass', 'pos', 'mmean_200', 'rmean_200', 'mcrit_200', 'rcrit_200', 'mtop_200', 'rtop_200', 'contmass']
keys_int=['group_len', 'group_off', 'first_sub', 'Nsubs', 'cont_count', 'mostboundID']
for key in keys_flt:
halodat[key]=np.array([], dtype=self.dtype_flt)
for key in keys_int:
halodat[key]=np.array([], dtype='int32')
subhalodat={}
subkeys_int=['sub_len', 'sub_off', 'sub_parent', 'sub_mostboundID', 'sub_groupNr']
subkeys_flt=['sub_pos', 'sub_vel', 'sub_CM', 'sub_mass', 'sub_spin', 'sub_veldisp', 'sub_VMax', 'sub_VMaxRad', 'sub_HalfMassRad', ]
for key in subkeys_int:
subhalodat[key]=np.array([], dtype='int32')
subhalodat['sub_mostboundID']=np.array([], dtype=self.dtype_int)
#subhalodat['sub_groupNr']=np.array([], dtype=self.dtype_int) #these are special
for key in subkeys_flt:
subhalodat[key]=np.array([], dtype=self.dtype_flt)
self._keys_subhalo = subkeys_flt+subkeys_int
self._keys_halo = keys_flt + keys_int
for n in range(0,self._tasks):
iout = self._subfind_dir.split("_")[-1]
filename = os.path.join(
self._subfind_dir,
f"subhalo_tab_{iout}.{n}"
)
fd=open(filename, "rb")
header1=np.fromfile(fd, dtype='int32', sep="", count=8)
header=np.delete(header1,4)
#read groups
if header[0]>0:
halodat['group_len']=np.append(halodat['group_len'], np.fromfile(fd, dtype='int32', sep="", count=header[0]))
halodat['group_off']=np.append(halodat['group_off'], np.fromfile(fd, dtype='int32', sep="", count=header[0]))
halodat['mass']=np.append(halodat['mass'], np.fromfile(fd, dtype=self.dtype_flt, sep="", count=header[0]))
halodat['pos']=np.append(halodat['pos'], np.fromfile(fd, dtype=self.dtype_flt, sep="", count=3*header[0]) )
halodat['mmean_200']=np.append(halodat['mmean_200'], np.fromfile(fd, dtype=self.dtype_flt, sep="", count=header[0]))
halodat['rmean_200']=np.append(halodat['rmean_200'], np.fromfile(fd, dtype=self.dtype_flt, sep="", count=header[0]))
halodat['mcrit_200']=np.append(halodat['mcrit_200'], np.fromfile(fd, dtype=self.dtype_flt, sep="", count=header[0]))
halodat['rcrit_200']=np.append(halodat['rcrit_200'], np.fromfile(fd, dtype=self.dtype_flt, sep="", count=header[0]))
halodat['mtop_200']=np.append(halodat['mtop_200'], np.fromfile(fd, dtype=self.dtype_flt, sep="", count=header[0]))
halodat['rtop_200']=np.append(halodat['rtop_200'], np.fromfile(fd, dtype=self.dtype_flt, sep="", count=header[0]))
halodat['cont_count']=np.append(halodat['cont_count'], np.fromfile(fd, dtype='int32', sep="", count=header[0]))
halodat['contmass']=np.append(halodat['contmass'],np.fromfile(fd, dtype=self.dtype_flt, sep="", count=header[0]))
halodat['Nsubs']=np.append(halodat['Nsubs'],np.fromfile(fd, dtype='int32', sep="", count=header[0]))
halodat['first_sub']=np.append(halodat['first_sub'],np.fromfile(fd, dtype='int32', sep="", count=header[0]))
#read subhalos only if expected to exist from header
if header[5]>0:
subhalodat['sub_len']=np.append(subhalodat['sub_len'], np.fromfile(fd, dtype='int32', sep="", count=header[5]))
subhalodat['sub_off']=np.append(subhalodat['sub_off'], np.fromfile(fd, dtype='int32', sep="", count=header[5]))
subhalodat['sub_parent']=np.append(subhalodat['sub_parent'], np.fromfile(fd, dtype='int32', sep="", count=header[5]))
subhalodat['sub_mass']=np.append(subhalodat['sub_mass'], np.fromfile(fd, dtype=self.dtype_flt, sep="", count=header[5]))
subhalodat['sub_pos']=np.append(subhalodat['sub_pos'],np.fromfile(fd, dtype=self.dtype_flt, sep="", count=3*header[5]))
subhalodat['sub_vel']=np.append(subhalodat['sub_vel'], np.fromfile(fd, dtype=self.dtype_flt, sep="", count=3*header[5]))
subhalodat['sub_CM']=np.append(subhalodat['sub_CM'], np.fromfile(fd, dtype=self.dtype_flt, sep="", count=3*header[5]))
subhalodat['sub_spin']=np.append(subhalodat['sub_spin'], np.fromfile(fd, dtype=self.dtype_flt, sep="", count=3*header[5]))
subhalodat['sub_veldisp']=np.append(subhalodat['sub_veldisp'], np.fromfile(fd, dtype=self.dtype_flt, sep="", count=header[5]))
subhalodat['sub_VMax']=np.append(subhalodat['sub_VMax'], np.fromfile(fd, dtype=self.dtype_flt, sep="", count=header[5]))
subhalodat['sub_VMaxRad']=np.append(subhalodat['sub_VMaxRad'],np.fromfile(fd, dtype=self.dtype_flt, sep="", count=header[5]))
subhalodat['sub_HalfMassRad']=np.append(subhalodat['sub_HalfMassRad'], np.fromfile(fd, dtype=self.dtype_flt, sep="", count=header[5]))
subhalodat['sub_mostboundID']=np.append(subhalodat['sub_mostboundID'], np.fromfile(fd, dtype=self.dtype_int, sep="", count=header[5]))
subhalodat['sub_groupNr']=np.append(subhalodat['sub_groupNr'], np.fromfile(fd, dtype='int32', sep="", count=header[5]))
fd.close()
halodat['pos']=np.reshape(halodat['pos'], (header[1],3))
if header[6]>0:
#some voodoo because some SubFind files may have (at least?) one extra entry which is not really a subhalo
real_ones=np.where(halodat['first_sub']<header[6])[0]
fake_ones=np.where(halodat['first_sub']>=header[6])[0]
halodat['mostboundID']=np.zeros(len(halodat['Nsubs']),dtype=self.dtype_int)-1
halodat['mostboundID'][real_ones]=subhalodat['sub_mostboundID'][halodat['first_sub'][real_ones]] #useful for the case of unordered snapshot IDs
subhalodat['sub_pos']=np.reshape(subhalodat['sub_pos'], (header[6],3))
subhalodat['sub_vel']=np.reshape(subhalodat['sub_vel'], (header[6],3))
subhalodat['sub_CM']=np.reshape(subhalodat['sub_CM'], (header[6],3))
subhalodat['sub_spin']=np.reshape(subhalodat['sub_spin'], (header[6],3))
ar_names = 'mass', 'pos', 'mmean_200', 'rmean_200', 'mcrit_200', 'rcrit_200', 'mtop_200', 'rtop_200', \
'sub_mass', 'sub_pos', 'sub_vel', 'sub_CM', 'sub_veldisp', 'sub_VMax', 'sub_VMaxRad', 'sub_HalfMassRad'
ar_dimensions = 'kg', 'm', 'kg', 'm', 'kg', 'm', 'kg', 'm', \
'kg', 'm', 'm s^-1', 'm', 'm s^-1', 'm s^-1', 'm', 'm'
for name, dimension in zip(ar_names, ar_dimensions):
if name in halodat:
halodat[name] = SimArray(halodat[name], sim.infer_original_units(dimension))
halodat[name].sim = sim
if name in subhalodat:
subhalodat[name] = SimArray(subhalodat[name], sim.infer_original_units(dimension))
subhalodat[name].sim = sim
self._halodat, self._subhalodat = halodat, subhalodat
@staticmethod
def _name_of_catalogue(sim):
# standard path for multiple snapshot files
snapnum = os.path.basename(
os.path.dirname(sim.filename)).split("_")[-1]
parent_dir = os.path.dirname(os.path.dirname(sim.filename))
dir_path=os.path.join(parent_dir,"groups_" + snapnum)
if os.path.exists(dir_path):
return dir_path
# alternative path if snapshot is single file
else:
snapnum = os.path.basename(sim.filename).split("_")[-1]
parent_dir = os.path.dirname(sim.filename)
return os.path.join(parent_dir,"groups_" + snapnum)
@staticmethod
def _can_load(sim, filename=None, **kwargs):
if filename is not None:
if str(filename)[:-3].endswith("groups_"):
return True
file = SubfindCatalogue._name_of_catalogue(sim)
if os.path.exists(file):
if os.path.exists(file):
if file.endswith(".hdf5"):
return False
elif os.path.isdir(file) and os.listdir(file)[0].endswith(".hdf5"):
return False
else:
return True
else:
return False