"""Support for the Rockstar halo finder"""
from __future__ import annotations
import glob
import os.path
import sys
import numpy as np
from .. import util
from . import HaloCatalogue
from .details import number_mapping
class RockstarFormatRevisionError(RuntimeError):
pass
[docs]
class RockstarCatalogue(HaloCatalogue):
[docs]
def __init__(self, sim, filename=None, format_revision=None):
"""Initialize a RockstarCatalogue.
Parameters
----------
sim : :class:`pynbody.snapshot.simsnap.SimSnap`
The simulation snapshot to which this catalogue applies.
filename : str, optional
The path to the Rockstar outputs. This can either be the first binary file in the series (e.g.
``path/to/halos_15.0.bin``) or the stem for the series (e.g. ``path/to/halos_15``). If not specified,
the code will attempt to find the Rockstar outputs in the same directory as the simulation snapshot.
format_revision : int, str, optional
Override the header's format revision information. Specify 1, 2, 'caterpillar', 'galaxies' for Rockstar
prior to 2014, post 2014, customized for the caterpillar project and for rockstar galaxies respectively.
If not specified, the code will attempt to detect the format revision automatically.
"""
if filename is not None:
filestem = str(filename)
if filestem.endswith(".0.bin"):
filestem = filestem[:-6]
self._files = glob.glob(filestem + "*.bin") + glob.glob(filestem + "*.boundbin")
else:
pathname = os.path.dirname(sim.filename)
self._files = glob.glob(os.path.join(pathname, 'halos*.bin'))
if len(self._files)==0 :
self._files = glob.glob(os.path.join(pathname, 'halos*.boundbin'))
self._files.sort()
if len(self._files)==0:
raise OSError("Could not find any Rockstar output. Try specifying pathname='/path/to/rockstar/outputfolder'")
self._cpus = [_RockstarCatalogueOneCpu(file_i, format_revision=format_revision) for file_i in self._files]
self._prune_files_from_wrong_scalefactor(sim)
halomins = [cpu.halo_min_inclusive for cpu in self._cpus]
# order self._cpus and self._files by halomins
# this isn't technically necessary but it means that the internal halo indexing will be
# in the same order as the halo numbering in the files. In fact, it probably means that
# the internal halo indexing will be identical to the halo numbering in the files, but we
# won't take that for granted anywhere.
self._cpus = [x for _, x in sorted(zip(halomins, self._cpus))]
self._files = [x for _, x in sorted(zip(halomins, self._files))]
halo_numbers = np.empty(sum(len(x) for x in self._cpus), dtype=int)
self._cpu_per_halo = np.empty(len(halo_numbers), dtype=int)
i = 0
for j, cat in enumerate(self._cpus):
halo_numbers[i:i+len(cat)] = cat.get_halo_numbers()
self._cpu_per_halo[i:i+len(cat)] = j
i+=len(cat)
assert i == len(halo_numbers)
super().__init__(sim, number_mapping.create_halo_number_mapper(halo_numbers))
def _prune_files_from_wrong_scalefactor(self, sim):
new_cpus = []
new_files = []
for file,cpu in zip(self._files,self._cpus):
if abs(sim.properties['a']-cpu._head['scale'][0])<1e-6:
new_cpus.append(cpu)
new_files.append(file)
self._cpus = new_cpus
self._files = new_files
def _get_particle_indices_one_halo(self, halo_number):
halo_index = self.number_mapper.number_to_index(halo_number)
cpu = self._cpu_per_halo[halo_index]
iords = self._cpus[cpu].read_iords_for_halo(halo_number)
self._init_iord_to_fpos()
return self._iord_to_fpos.map_ignoring_order(iords)
def _get_all_particle_indices(self):
iords = np.empty(0, dtype=int)
boundaries = np.empty((0, 2), dtype=int)
for cpu in self._cpus:
iords_this_cpu, boundaries_this_cpu = cpu.read_iords_for_all_halos()
boundaries = np.append(boundaries, boundaries_this_cpu + len(iords), axis=0)
iords = np.append(iords, iords_this_cpu)
self._init_iord_to_fpos()
fpos = iords # nb this doesn't copy! but we won't use the iords again after this, so it's ok
for a, b in boundaries:
# iord_to_fpos may not retain ordering, so have to do this per halo
fpos[a:b] = self._iord_to_fpos.map_ignoring_order(iords[a:b])
return fpos, boundaries
[docs]
def get_properties_one_halo(self, halo_number):
halo_index = self.number_mapper.number_to_index(halo_number)
cpu = self._cpu_per_halo[halo_index]
return self._cpus[cpu].read_properties_for_halo(halo_number)
[docs]
def get_properties_all_halos(self, with_units=True) -> dict:
props = {}
for cpu in self._cpus:
props_this_cpu = cpu.read_properties_all_halos()
# concatenate all the properties
for k, v in props_this_cpu.items():
if k in props:
props[k] = np.concatenate((props[k], v))
else:
props[k] = v
return props
@classmethod
def _can_load(cls, sim, filename=None, format_revision=None):
if filename is None:
return len(
glob.glob(os.path.join(os.path.dirname(sim.filename), 'halos*.bin'))
) > 0
else:
if str(filename).endswith(".0.bin") and os.path.exists(filename):
return True
if os.path.exists(str(filename)+".0.bin"):
return True
return False
class _RockstarCatalogueOneCpu:
"""
Low-level reader for single CPU output from Rockstar. Users should normally not use this class,
rather using RockstarCatalogue which collates the multiple sub-files that Rockstar produces.
"""
head_type = np.dtype([('magic',np.uint64),('snap',np.int64),
('chunk',np.int64),('scale','f'),
('Om','f'),('Ol','f'),('h0','f'),
('bounds','f',6),('num_halos',np.int64),
('num_particles',np.int64),('box_size','f'),
('particle_mass','f'),('particle_type',np.int64),
('format_revision',np.int32),
('rockstar_version',np.str_,12)])
halo_types = {
1: np.dtype([('id', np.int64), ('pos', 'f', 3), ('vel', 'f', 3),
('corevel', 'f', 3), ('bulkvel', 'f', 3), ('m', 'f'),
('r', 'f'),
('child_r', 'f'), ('vmax_r', 'f'), ('mgrav', 'f'),
('vmax', 'f'), ('rvmax', 'f'), ('rs', 'f'),
('klypin_rs', 'f'), ('vrms', 'f'), ('J', 'f', 3),
('energy', 'f'), ('spin', 'f'), ('alt_m', 'f', 4),
('Xoff', 'f'), ('Voff', 'f'), ('b_to_a', 'f'),
('c_to_a', 'f'), ('A', 'f', 3), ('b_to_a2', 'f'),
('c_to_a2', 'f'), ('A2', 'f', 3), ('bullock_spin', 'f'),
('kin_to_pot', 'f'), ('m_pe_b', 'f'), ('m_pe_d', 'f'),
('num_p', np.int64), ('num_child_particles', np.int64),
('p_start', np.int64), ('desc', np.int64),
('flags', np.int64), ('n_core', np.int64),
('min_pos_err', 'f'), ('min_vel_err', 'f'),
('min_bulkvel_err', 'f')], align=True) # Rockstar format v1
,
2: np.dtype([('id',np.int64),('pos','f',3),('vel','f',3),
('corevel','f',3),('bulkvel','f',3),('m','f'),
('r','f'),
('child_r','f'),('vmax_r','f'),('mgrav','f'),
('vmax','f'),('rvmax','f'),('rs','f'),
('klypin_rs','f'),('vrms','f'),('J','f',3),
('energy','f'),('spin','f'),('alt_m','f',4),
('Xoff','f'),('Voff','f'),('b_to_a','f'),
('c_to_a','f'),('A','f',3),('b_to_a2','f'),
('c_to_a2','f'),('A2','f',3),('bullock_spin','f'),
('kin_to_pot','f'),('m_pe_b','f'),('m_pe_d','f'),
('halfmass_radius','f'),
('num_p',np.int64),('num_child_particles',np.int64),
('p_start',np.int64),('desc',np.int64),
('flags',np.int64),('n_core',np.int64),
('min_pos_err','f'),('min_vel_err','f'),
('min_bulkvel_err','f')], align=True), # Rockstar format v2, includes halfmass_radius
'caterpillar': np.dtype([('id',np.int64),
('pos','f',3),('vel','f',3),
('corevel','f',3),('bulkvel','f',3),('m','f'),
('r','f'),
('child_r','f'),('vmax_r','f'),('mgrav','f'),
('vmax','f'),('rvmax','f'),('rs','f'),
('klypin_rs','f'),('vrms','f'),('J','f',3),
('energy','f'),('spin','f'),('alt_m','f',4),
('Xoff','f'),('Voff','f'),('b_to_a','f'),
('c_to_a','f'),('A','f',3),('b_to_a2','f'),
('c_to_a2','f'),('A2','f',3),('bullock_spin','f'),
('kin_to_pot','f'),('m_pe_b','f'),('m_pe_d','f'),
('halfmass_radius','f'),
('num_p',np.int64),('num_child_particles',np.int64),
('p_start',np.int64),('desc',np.int64),
('flags',np.int64),('n_core',np.int64),
('min_pos_err','f'),('min_vel_err','f'),
('min_bulkvel_err','f'),
('num_bound', 'i8'), ('num_iter', 'i8')]
, align=True), # Hacked rockstar from caterpillar project
'galaxies': np.dtype(
[
("id", np.int64),
("pos", np.float32, 3),
("vel", np.float32, 3),
("corevel", np.float32, 3),
("bulkvel", np.float32, 3),
("m", np.float32),
("r", np.float32),
("child_r", np.float32),
("vmax_r", np.float32),
("mgrav", np.float32),
("vmax", np.float32),
("rvmax", np.float32),
("rs", np.float32),
("klypin_rs", np.float32),
("vrms", np.float32),
("J", np.float32, 3),
("energy", np.float32),
("spin", np.float32),
("alt_m", np.float32, 4),
("Xoff", np.float32),
("Voff", np.float32),
("b_to_a", np.float32),
("c_to_a", np.float32),
("A", np.float32, 3),
("b_to_a2", np.float32),
("c_to_a2", np.float32),
("A2", np.float32, 3),
("bullock_spin", np.float32),
("kin_to_pot", np.float32),
("m_pe_b", np.float32),
("m_pe_d", np.float32),
("num_p", np.int64),
("num_child_particles", np.int64),
("p_start", np.int64),
("desc", np.int64),
("flags", np.int64),
("n_core", np.int64),
("min_pos_err", np.float32),
("min_vel_err", np.float32),
("min_bulkvel_err", np.float32),
("type", np.int32),
("sm", np.float32),
("gas", np.float32),
("bh", np.float32),
("peak_density", np.float32),
("av_density", np.float32),
],
align=True
), # Galaxy format from Rockstar
}
def __init__(self, filename=None, format_revision=None):
self._rsFilename = filename
if not os.path.exists(self._rsFilename):
raise OSError(
"Halo catalogue not found -- check the file name of catalogue data"
" or try specifying a catalogue using the filename keyword"
)
with util.open_(self._rsFilename, 'rb') as f:
self._head = np.frombuffer(f.read(self.head_type.itemsize),
dtype=self.head_type)
# Seek to absolute position
f.seek(256)
self._nhalos = self._head['num_halos'][0]
self._load_rs_halos_with_format_detection(f, format_revision)
def _load_rs_halos_with_format_detection(self, f, format_revision):
if format_revision is None:
# The 'galaxies' format can be either 1 or 2, so we need to try it
# in both cases.
format_revision_to_try = [self._head['format_revision'][0], "galaxies"]
else:
format_revision_to_try = [format_revision]
current_pos = f.tell()
for format_revision in format_revision_to_try:
f.seek(current_pos)
try:
self.halo_type = self.halo_types[format_revision]
self._load_rs_halos(f)
return
except RockstarFormatRevisionError:
pass
raise RockstarFormatRevisionError(
"Could not detect the format revision of the Rockstar catalogue."
)
def __len__(self):
return len(self._halo_lens)
def read_properties_for_halo(self, n):
if n<self.halo_min_inclusive or n>=self.halo_max_exclusive:
raise KeyError("No such halo")
with util.open_(self._rsFilename, 'rb') as f:
f.seek(self._haloprops_offset + (n - self.halo_min_inclusive) * self.halo_type.itemsize)
halo_data = np.fromfile(f, dtype=self.halo_type, count=1)
# TODO: properties are in Msun / h, Mpc / h
return dict(list(zip(halo_data.dtype.names,halo_data[0])))
def read_properties_all_halos(self):
with util.open_(self._rsFilename, 'rb') as f:
f.seek(self._haloprops_offset)
data = np.fromfile(f, dtype=self.halo_type, count=self.halo_max_exclusive - self.halo_min_inclusive)
data_dict = {name: data[name] for name in data.dtype.names}
return data_dict
def _load_rs_halos(self, f):
self._haloprops_offset = f.tell()
self._halo_offsets = np.empty(self._head['num_halos'][0],dtype=np.int64)
self._halo_lens = np.empty(self._head['num_halos'][0],dtype=np.int64)
offset = self._haloprops_offset+self.halo_type.itemsize*self._head['num_halos'][0]
self.halo_min_inclusive = int(np.fromfile(f, dtype=self.halo_type, count=1)['id'][0])
self.halo_max_exclusive = int(self.halo_min_inclusive + self._head['num_halos'][0])
f.seek(self._haloprops_offset)
for this_id in range(self.halo_min_inclusive, self.halo_max_exclusive):
halo_data = np.fromfile(f, dtype=self.halo_type, count=1)
if halo_data['id'] != this_id:
raise RockstarFormatRevisionError(
"Error while reading halo catalogue. Expected "
"halo ID %d, but got %d" % (this_id, halo_data['id'])
)
self._halo_offsets[this_id - self.halo_min_inclusive] = offset
if 'num_bound' in self.halo_type.names:
num_ptcls = int(halo_data['num_bound'][0])
else:
num_ptcls = int(halo_data['num_p'][0])
self._halo_lens[this_id - self.halo_min_inclusive] = num_ptcls
offset+=num_ptcls*np.dtype('int64').itemsize
def get_halo_numbers(self):
return np.arange(self.halo_min_inclusive, self.halo_max_exclusive)
def read_iords_for_halo(self, num):
if num<self.halo_min_inclusive or num>=self.halo_max_exclusive:
raise KeyError("No such halo")
with util.open_(self._rsFilename, 'rb') as f:
f.seek(self._halo_offsets[num - self.halo_min_inclusive])
return np.fromfile(f, dtype=np.int64, count=self._halo_lens[num - self.halo_min_inclusive])
def read_iords_for_all_halos(self):
"""Returns an array with all halo iords, and the boundaries of each halo in the array."""
# check the halo ids are contiguous as expected
assert (np.diff(self._halo_offsets) == 8 * self._halo_lens[:-1]).all()
with util.open_(self._rsFilename, 'rb') as f:
f.seek(self._halo_offsets[0])
iords = np.fromfile(f, dtype=np.int64, count=self._halo_lens.sum())
boundaries = np.empty((len(self._halo_lens), 2), dtype=np.int64)
boundaries[:,1] = np.cumsum(self._halo_lens)
boundaries[0,0] = 0
boundaries[1:,0] = boundaries[:-1,1]
return iords, boundaries