Source code for pynbody.halo

"""
Support for halo and group catalogues.

Halo catalogues act like a dictionary, mapping from halo numbers to a Halo objects. The halo *number* is typically
determined by the halo finder, and is often (but not always) the same as the halo *index* which is the zero-based
offset within the catalogue.

If you have a supported halo catalogue on disk or a halo finder installed and correctly configured, you can access a
halo catalogue through ``f.halos()`` where ``f`` is a SimSnap.

See the :ref:`halo catalogue tutorial <halo_tutorial>` for introductory information and guidance.


.. _v2_0_halo_changes:

.. versionchanged:: 2.0

  Backwards-incompatible changes to the halo catalogue system

  For version 2.0, the halo catalogue loading system was substantially rewritten. The new system is more robust and
  more consistent across different halo finders. However, this means that some defaults have changed, most significantly
  in the AHF halo numbering. Backward-compatibility can be achieved by passing ``halo_numbers='v1'`` to the
  :class:`~pynbody.halo.ahf.AHFCatalogue` constructor. For more information, read the documentation for that class.

  Furthermore, older versions of pynbody (i.e. v1.x) could be configured to create a halo catalogue if one was not
  found, using AHF. This is no longer the case, as creating a halo catalogue requires choosing a halo finder and its
  parameters carefully for the task in hand and it was not possible to provide a one-size-fits-all solution.

  Finally, options to write ``.stat`` files and ``.grp`` files have been removed. However it is still possible to
  generate a ``.grp`` file by  calling :meth:`~HaloCatalogue.get_group_array` and writing out the resulting
  array of integers using a tool like ``numpy.savetxt``.

  By paring back the less-used functionality of the halo catalogue system, the remaining functionality is more
  consistent, robust, and extensible to new halo finders.


.. _supported_halo_finders:

Supported halo-finder formats
-----------------------------

The currently-supported formats are:

- Adaptahop (:class:`~pynbody.halo.adaptahop.AdaptaHOPCatalogue`);
- AHF (:class:`~pynbody.halo.ahf.AHFCatalogue`);
- HBT+ (:class:`~pynbody.halo.hbtplus.HBTPlusCatalogue`);
- HOP (:class:`~pynbody.halo.hop.HOPCatalogue`);
- Rockstar (:class:`~pynbody.halo.rockstar.RockstarCatalogue`);
- Subfind (old format :class:`~pynbody.halo.subfind.SubfindCatalogue`, or various HDF5 variants
  as :class:`~pynbody.halo.subfindhdf.SubfindHDFCatalogue`);
- VELOCIraptor (:class:`~pynbody.halo.velociraptor.VelociraptorCatalogue`).

In addition, generic halo finders which output a list of halo numbers for each particle are supported via
:class:`~pynbody.halo.number_array.HaloNumberCatalogue`.



.. note::

    The principal development of ``pynbody`` took place in the UK, and the spelling of "catalogue" is British English.
    However, since much code is written in American English, v2.0.0 introduced aliases such that all
    classes can be accessed with the American spelling ``HaloCatalog``, ``AdaptaHOPCatalog`` etc.


"""
from __future__ import annotations

import copy
import logging
import warnings
import weakref
from typing import TYPE_CHECKING, Iterable

import numpy as np
from numpy.typing import NDArray

from .. import array, snapshot, units, util
from ..util import iter_subclasses
from .details.iord_mapping import make_iord_to_offset_mapper
from .details.number_mapping import (
    HaloNumberMapper,
    MonotonicHaloNumberMapper,
    create_halo_number_mapper,
)
from .details.particle_indices import HaloParticleIndices

if TYPE_CHECKING:
    from .subhalo_catalogue import SubhaloCatalogue

logger = logging.getLogger("pynbody.halo")

[docs] class DummyHalo(snapshot.util.ContainerWithPhysicalUnitsOption):
[docs] def __init__(self): self.properties = {}
[docs] def physical_units(self, *args, **kwargs): pass
[docs] class Halo(snapshot.subsnap.IndexedSubSnap): """ Represents a single halo from a halo catalogue. Note that pynbody refers to groups, halos and subhalos interchangably, with the term "halo" being used to cover all of these. """
[docs] def __init__(self, halo_number, properties, halo_catalogue, *args, **kwa): super().__init__(*args, **kwa) self._halo_catalogue = halo_catalogue self._halo_number = halo_number self._descriptor = "halo_" + str(halo_number) self.properties = copy.copy(self.properties) self.properties['halo_number'] = halo_number self.properties.update(properties) # Inherit autoconversion from parent self._autoconvert_properties()
@property @util.deprecated("The sub property has been renamed to subhalos") def sub(self): """Deprecated alias for :property:`subhalos`.""" return self.subhalos @property def subhalos(self) -> SubhaloCatalogue: """A HaloCatalogue object containing only the subhalos of this halo.""" return self._halo_catalogue._get_subhalo_catalogue(self._halo_number)
[docs] def physical_units(self, distance='kpc', velocity='km s^-1', mass='Msol', persistent=True, convert_parent=True): if convert_parent: self._halo_catalogue.physical_units( distance=distance, velocity=velocity, mass=mass, persistent=persistent ) else: # Convert own properties self._autoconvert_properties()
[docs] class HaloCatalogue(snapshot.util.ContainerWithPhysicalUnitsOption, iter_subclasses.IterableSubclasses): """Generic halo catalogue object. To the user, this presents a simple interface where calling ``h[i]`` returns halo ``i``. Properties of halos can be retrieved without loading the halo via :meth:`get_properties_one_halo` or :meth:`get_properties_all_halos`. More information for users can be found in the :ref:`halo catalogue tutorial <halo_tutorial>`; see also the :ref:`supported halo finders <supported_halo_finders>`. Implementing a new format ^^^^^^^^^^^^^^^^^^^^^^^^^ To support a new format, subclass :class:`HaloCatalogue` and implement the following methods: * :meth:`__init__` * :meth:`_can_load` * :meth:`_get_all_particle_indices` * :meth:`_get_particle_indices_one_halo` [only if it's possible to do this more efficiently than :meth:`_get_all_particle_indices` for users accessing only a few halos] * :meth:`get_properties_all_halos` [only if you have halo finder-provided properties to expose] * :meth:`get_properties_one_halo` [only if you have halo finder-provided properties to expose, and it's efficient to expose them one halo at a time; the default implementation will call get_properties_all_halos and extract] * :meth:`get_group_array` [only if it's possible to do this more efficiently than the default implementation] Nomenclature/conventions are worth being aware of if you are implementing a new format: * The halo number is the user-exposed identifier for a halo. It is typically assigned by the halo finder, although subclasses are free to assign their own (e.g. some have a `halo_number` option that can be passed to the constructor to override the halo finder's numbering). The halo numbers are used to access individual halos via the [] operator. * The halo *index* is the zero-based offset within the catalogue, which may be different from the halo number. Internally, *pynbody* converts between these using a :class:`details.number_mapping.HaloNumberMapper` object, which is set up in the :meth:`__init__` method. * Particle indices should be returned from methods like :meth:`_get_particle_indices_one_halo` as zero-relative offsets within the snapshot, not particle IDs or 'iord's. Many halo finders output particle IDs which must therefore be mapped. To aid this, call :meth:`_init_iord_to_fpos` in your :meth:`__init__` method, which creates a mapper as :attr:`_iord_to_fpos`. See :mod:`details.iord_mapping` for more information. """
[docs] def __init__(self, sim, number_mapper): self._base: weakref[snapshot.SimSnap] = weakref.ref(sim) self.number_mapper: HaloNumberMapper = number_mapper self._index_lists: HaloParticleIndices | None = None self._properties: dict | None = None self._cached_halos: dict[int, Halo] = {} self._persistent_units = None
[docs] def load_all(self): """Loads all halos, which is normally more efficient if a large fraction of them will be accessed.""" if not self._index_lists: index_lists = self._get_all_particle_indices() properties = self.get_properties_all_halos(with_units=True) if isinstance(index_lists, tuple): index_lists = HaloParticleIndices(*index_lists) self._index_lists = index_lists if len(properties)>0: self._properties = properties if self._persistent_units is not None: self._cached_properties_to_physical_units(self._persistent_units)
[docs] @util.deprecated("precalculate has been renamed to load_all") def precalculate(self): """Deprecated alias for :meth:`load_all`""" self.load_all()
def _get_num_halos(self): return len(self.number_mapper) def _get_all_particle_indices_cached(self): """Get the index information for all halos, using a cached version if available""" self.load_all() return self._index_lists def _get_all_particle_indices(self) -> HaloParticleIndices | tuple[np.ndarray, np.ndarray]: """Returns information about the index list for all halos. Returns an HaloParticleIndices object, which is a container for the following information: - particle_ids: particle IDs contained in halos, sorted by halo ID - boundaries: the indices in particle_ids where each halo starts and ends """ raise NotImplementedError("This halo catalogue does not support loading all halos at once")
[docs] def get_properties_one_halo(self, halo_number) -> dict: """Returns a dictionary of properties for a single halo, given a halo_number """ # Default implementation: extract from all halos. Subclasses may override this if they can load properties # for a single halo more efficiently. self._properties = self.get_properties_all_halos(with_units=True) return self._get_properties_one_halo_using_cache_if_available(halo_number, self.number_mapper.number_to_index(halo_number))
[docs] def get_properties_all_halos(self, with_units=True) -> dict: """Returns a dictionary of properties for all halos. If with_units is True, the properties are returned as SimArrays with units if possible. Otherwise, numpy arrays are returned. Note that the returned properties are in contiguous arrays, and as a result may be in a different order to the halo numbers which are used to access individual halos. To map between halo numbers and properties, use the .number_mapper object; or access individual property dictionaries by halo number using get_properties_one_halo.""" return {}
def _get_properties_one_halo_using_cache_if_available(self, halo_number, halo_index): if self._properties is None: return self.get_properties_one_halo(halo_number) else: return {k: units.get_item_with_unit(self._properties[k],halo_index) for k in self._properties} def _get_particle_indices_one_halo(self, halo_number) -> NDArray[int]: """Get the index list for a single halo, given a halo_number. A generic implementation is provided that fetches index lists for all halos and then extracts the one""" self.load_all() return self._index_lists.get_particle_index_list_for_halo( self.number_mapper.number_to_index(halo_number) ) def _get_particle_indices_one_halo_using_list_if_available(self, halo_number, halo_index) -> NDArray[int]: if self._index_lists: return self._index_lists.get_particle_index_list_for_halo(halo_index) else: if len(self._cached_halos) == 5: warnings.warn("Accessing multiple halos may be more efficient if you call load_all() on the " "halo catalogue", RuntimeWarning) return self._get_particle_indices_one_halo(halo_number) # NB subclasses may implement loading one halo direct from disk in the above # if not, the default implementation will populate _cached_index_lists def _get_halo_cached(self, halo_number) -> Halo: if halo_number not in self._cached_halos: self._cached_halos[halo_number] = self._get_halo(halo_number) return self._cached_halos[halo_number] def _get_halo(self, halo_number) -> Halo: halo_index = self.number_mapper.number_to_index(halo_number) return Halo(halo_number, self._get_properties_one_halo_using_cache_if_available(halo_number, halo_index), self, self.base, self._get_particle_indices_one_halo_using_list_if_available(halo_number, halo_index))
[docs] def get_dummy_halo(self, halo_number) -> DummyHalo: """Return a DummyHalo object containing only the halo properties, no particle information""" h = DummyHalo() h.properties.update(self.get_properties_one_halo(halo_number)) return h
def __len__(self) -> int: return self._get_num_halos() def __iter__(self) -> Iterable[Halo]: self.load_all() for i in self.number_mapper: yield self[i] def __repr__(self): return f"<{type(self).__name__}, length {len(self)}>"
[docs] def keys(self): """Return an iterable of all halo numbers in the catalogue.""" return self.number_mapper.all_numbers
def __getitem__(self, item) -> Halo | SubhaloCatalogue: from .subhalo_catalogue import SubhaloCatalogue if isinstance(item, slice): return SubhaloCatalogue(self, np.arange(*item.indices(len(self)))) elif hasattr(item, "__len__"): return SubhaloCatalogue(self, item) else: return self._get_halo_cached(item) @property def base(self) -> snapshot.SimSnap: """The snapshot object that this halo catalogue is based on.""" return self._base() def _init_iord_to_fpos(self): """Create a member array, _iord_to_fpos, that maps particle IDs to file positions. This is a convenience function for subclasses to use.""" if not hasattr(self, "_iord_to_fpos"): if 'iord' in self.base.loadable_keys() or 'iord' in self.base.keys(): self._iord_to_fpos = make_iord_to_offset_mapper(self.base['iord']) else: warnings.warn("No iord array available; assuming halo catalogue is using sequential particle IDs", RuntimeWarning) class OneToOneIndex: def __getitem__(self, i): return i self._iord_to_fpos = OneToOneIndex() def _get_subhalo_catalogue(self, parent_halo_number: int) -> SubhaloCatalogue: from .subhalo_catalogue import SubhaloCatalogue props = self.get_properties_one_halo(parent_halo_number) if 'children' in props: return SubhaloCatalogue(self, props['children']) else: raise ValueError(f"This halo catalogue does not support subhalos")
[docs] @util.deprecated("This method is deprecated and will be removed in a future release. Use python `in` syntax instead.") def contains(self, halo_number: int) -> bool: """Deprecated alias; instead of ``h.contains(number)`` use ``number in h``.""" return halo_number in self
def __contains__(self, halo_number) -> bool: """Returns True if the halo catalogue contains the specified halo number.""" return halo_number in self.number_mapper
[docs] def get_group_array(self, family=None, use_index=False, fill_value=-1): """Return an array with an integer for each particle in the simulation, indicating the halo of that particle. If there are multiple levels (i.e. subhalos), the number returned corresponds to the lowest level, i.e. the smallest subhalo. Parameters ---------- family : str, optional If specified, return only the group array for the specified family. use_index: bool, optional If True, return the halo index rather than the halo number. (See the class documentation for the distinction between halo numbers and indices.) fill_value : int, optional The value to fill for particles not in any halo. """ self.load_all() number_per_particle = self._index_lists.get_halo_number_per_particle(len(self.base), None if use_index else self.number_mapper, fill_value = fill_value) if family is not None: return number_per_particle[self.base._get_family_slice(family)] else: return number_per_particle
[docs] def load_copy(self, halo_number): """Load a fresh SimSnap with only the particles in specified halo This relies on the underlying SimSnap being capable of partial loading.""" from .. import load halo_index = self.number_mapper.number_to_index(halo_number) return load(self.base.filename, take=self._get_particle_indices_one_halo_using_list_if_available(halo_number, halo_index))
[docs] def physical_units(self, distance='kpc', velocity='km s^-1', mass='Msol', persistent=True, convert_parent=False): self.base.physical_units(distance=distance, velocity=velocity, mass=mass, persistent=persistent) # Convert all instantiated subhalos for halo in self._cached_halos.values(): halo.physical_units( distance, velocity, mass, persistent=persistent, convert_parent=False ) all_units = [units.Unit(x) for x in (distance, velocity, mass, 'a', 'h', 'K')] if persistent: self._persistent_units = all_units self._cached_properties_to_physical_units(all_units)
def _cached_properties_to_physical_units(self, all_units): if self._properties is not None: for k in self._properties: if isinstance(self._properties[k], array.SimArray) and units.has_unit(self._properties[k]): self.base._autoconvert_array_unit(self._properties[k], all_units) @classmethod def _can_load(cls, sim): return False
from . import ( adaptahop, ahf, hbtplus, hop, number_array, rockstar, subfind, subfindhdf, velociraptor, ) def _fix_american_spelling(p): """Map American to British spelling (used by SimSnap.halos to allow flexible spelling)""" if isinstance(p, str) and p.endswith('Catalog'): return p.replace('Catalog', 'Catalogue') else: return p def _alias_american_spelling(): """Create American spelling aliases for all HaloCatalogue subclasses.""" for c in HaloCatalogue.iter_subclasses(): american_name = c.__name__.replace("Catalogue", "Catalog") # put american_name into the same module as c (not this module) if c.__module__.startswith('pynbody.halo.'): module = eval(c.__module__.replace('pynbody.halo.', '')) setattr(module, american_name, c) globals()['HaloCatalog'] = HaloCatalogue _alias_american_spelling()