Source code for pynbody.snapshot.gadget

"""
Implements reading old-style gadget binary files (format 1 and 2, but not HDF5).

The gadget array names are mapped into pynbody array names according to the mappings given by the `config.ini`
section `[gadget-name-mapping]`.

Very old gadget-1 style files have a fixed block order, which is specified in the config.ini section
`[gadget-1-blocks]`.



"""


import configparser
import copy
import errno
import itertools
import os
import pathlib
import struct
import sys
import warnings

import numpy as np

from .. import array, config_parser, family, units
from . import SimSnap, namemapper

# This is set here and not in a config file because too many things break
# if it is not 6


N_TYPE = 6

_type_map = {}

for name, gtypes in config_parser.items('gadget-type-mapping'):
    try:
        gtypes = np.array([int(q) for q in gtypes.split(",")])
        if (gtypes >= N_TYPE).any() or (gtypes < 0).any():
            raise ValueError(
                "Type specified for family " + name + " is out of bounds (" + gtypes + ").")
        _type_map[family.get_family(name)] = gtypes
    except configparser.NoOptionError:
        pass

_name_map, _rev_name_map = namemapper.setup_name_maps(
    'gadget-name-mapping', gadget_blocks=True)
_translate_array_name = namemapper.name_map_function(_name_map, _rev_name_map)


def _to_raw(s):
    if isinstance(s, str) and sys.version_info[0] > 2:
        return s.encode('utf-8')
    else:
        return s



[docs]
def gadget_type(fam):
    if isinstance(fam, list):
        l = []
        for sf in fam:
            l.extend(gadget_type(sf))
        return l
    elif fam is None:
        return list(np.arange(0, N_TYPE))
    else:
        return _type_map[fam]



class _GadgetBlock:

    """Class to describe each block.
    Each block has a start, a length, and a length-per-particle"""

    def __init__(self, start=0, length=0, partlen=0, dtype=np.float32, p_types=np.zeros(N_TYPE, bool)):
        # Start of block in file
        self.start = start
        # Length of block in file
        self.length = length
        # Bytes per particle in file
        self.partlen = partlen
        # Data type of block
        self.data_type = dtype
        # Types of particle this block contains
        self.p_types = p_types


def _output_order_gadget(all_keys):

    out = []
    out_dregs = copy.copy(all_keys)
    for X in map(str.strip, config_parser.get('gadget-default-output', 'field-ordering').split(',')):
        if X in out_dregs:
            del out_dregs[out_dregs.index(X)]
            out.append(X)

    return out + out_dregs


def _construct_gadget_header(data, endian='='):
    """Create a GadgetHeader from a byte range read from a file."""
    npart = np.zeros(N_TYPE, dtype=np.uint32)
    mass = np.zeros(N_TYPE)
    time = 0.
    redshift = 0.
    npartTotal = np.zeros(N_TYPE, dtype=np.int32)
    num_files = 0
    BoxSize = 0.
    Omega0 = 0.
    OmegaLambda = 0.
    HubbleParam = 0.
    NallHW = np.zeros(N_TYPE, dtype=np.int32)
    if data == '':
        return
    fmt = endian + "IIIIIIddddddddiiIIIIIIiiddddiiIIIIIIiiif48s"
    if struct.calcsize(fmt) != 256:
        raise Exception(
            "There is a bug in gadget.py; the header format string is not 256 bytes")
    (npart[0], npart[1], npart[2], npart[3], npart[4], npart[5],
     mass[0], mass[1], mass[2], mass[3], mass[4], mass[5],
     time, redshift,  flag_sfr, flag_feedback,
     npartTotal[0], npartTotal[1], npartTotal[
     2], npartTotal[3], npartTotal[4], npartTotal[5],
     flag_cooling, num_files, BoxSize, Omega0, OmegaLambda, HubbleParam, flag_stellarage, flag_metals,
     NallHW[0], NallHW[1], NallHW[2], NallHW[3], NallHW[4], NallHW[5],
     flag_entropy_instead_u, flag_doubleprecision, flag_ic_info, lpt_scalingfactor, fill) = struct.unpack(fmt, data)

    header = _GadgetHeader(npart, mass, time, redshift,
                           BoxSize, Omega0, OmegaLambda, HubbleParam, num_files)
    header.flag_sfr = flag_sfr
    header.flag_feedback = flag_feedback
    header.npartTotal = npartTotal
    header.flag_cooling = flag_cooling
    header.flag_stellarage = flag_stellarage
    header.flag_metals = flag_metals
    header.NallHW = NallHW
    header.flag_entropy_instead_u = flag_entropy_instead_u
    header.flag_doubleprecision = flag_doubleprecision
    header.flag_ic_info = flag_ic_info
    header.lpt_scalingfactor = lpt_scalingfactor
    header.endian = endian

    return header


class _GadgetHeader:

    """Describes the header of gadget class files; this is all our metadata, so we are going to store it inline"""

    def __init__(self, npart, mass, time, redshift, BoxSize, Omega0, OmegaLambda, HubbleParam, num_files=1):
        "Construct a header from values, instead of a datastring."""
        assert(len(mass) == 6)
        assert(len(npart) == 6)
        # Mass of each particle type in this file. If zero,
        # particle mass stored in snapshot.
        self.mass = mass
        # Time of snapshot
        self.time = time
        # Redshift of snapshot
        self.redshift = redshift
        # Boolean to test the presence of star formation
        self.flag_sfr = False
        # Boolean to test the presence of feedback
        self.flag_feedback = False
        # Boolean to test the presence of cooling
        self.flag_cooling = False
        # Number of files expected in this snapshot
        self.num_files = num_files
        # Box size of the simulation
        self.BoxSize = BoxSize
        # Omega_Matter. Note this is Omega_DM + Omega_Baryons
        self.Omega0 = Omega0
        # Dark energy density
        self.OmegaLambda = OmegaLambda
        # Hubble parameter, in units where it is around 70.
        self.HubbleParam = HubbleParam
        # Boolean to test whether stars have an age
        self.flag_stellarage = False
        # Boolean to test the presence of metals
        self.flag_metals = False
        # flags that IC-file contains entropy instead of u
        self.flag_entropy_instead_u = False
        # flags that snapshot contains double-precision instead of single
        # precision
        self.flag_doubleprecision = False
        self.flag_ic_info = False
        # flag to inform whether IC files are generated with Zeldovich approximation,
        # or whether they contain 2nd order lagrangian perturbation theory ICs.
        #    FLAG_ZELDOVICH_ICS     (1)   - IC file based on Zeldovich
        #    FLAG_SECOND_ORDER_ICS  (2)   - Special IC-file containing 2lpt masses
        #    FLAG_EVOLVED_ZELDOVICH (3)   - snapshot evolved from Zeldovich ICs
        #    FLAG_EVOLVED_2LPT      (4)   - snapshot evolved from 2lpt ICs
        #    FLAG_NORMALICS_2LPT    (5)   - standard gadget file format with 2lpt ICs
        # All other values, including 0 are interpreted as "don't know" for
        # backwards compatability.
        self.lpt_scalingfactor = 0.    # scaling factor for 2lpt initial conditions
        self.endian = ""
        # Number of particles
        self.npart = np.array(npart, dtype=np.uint32)
        if (npart < 2 ** 31).all():
            # First 32-bits of total number of particles in the simulation
            self.npartTotal = np.array(npart, dtype=np.int32)
            # Long word of the total number of particles in the simulation.
            # At least one version of N-GenICs sets this to something entirely
            # different.
            self.NallHW = np.zeros(N_TYPE, dtype=np.int32)
        else:
            self.header.NallHW = np.array(npart // 2 ** 32, dtype=np.int32)
            self.header.npartTotal = np.array(
                npart - 2 ** 32 * self.header.NallHW, dtype=np.int32)

    def serialize(self):
        """This takes the header structure and returns it as a packed string"""
        fmt = self.endian + "IIIIIIddddddddiiIIIIIIiiddddiiIIIIIIiiif"
        # Do not attempt to include padding in the serialised data; the most common use of serialise
        # is to write to a file and we don't want to overwrite extra data that
        # might be present
        if struct.calcsize(fmt) != 256 - 48:
            raise Exception(
                "There is a bug in gadget.py; the header format string is not 256 bytes")
        # WARNING: On at least python 2.6.3 and numpy 1.3.0 on windows, castless code fails with:
        # SystemError: ..\Objects\longobject.c:336: bad argument to internal function
        # This is because self.npart, etc, has type np.uint32 and not int.
        # This is I think a problem with python/numpy, but cast things to ints
        # until I can determine how widespread it is.
        data = struct.pack(
            fmt, int(self.npart[0]), int(self.npart[1]), int(self.npart[
                                                             2]), int(
                                                                 self.npart[3]), int(
                                                                     self.npart[4]), int(
                                                                         self.npart[
                                                                             5]),
            self.mass[0], self.mass[1], self.mass[
                2], self.mass[3], self.mass[4], self.mass[5],
            self.time, self.redshift,  self.flag_sfr, self.flag_feedback,
            int(self.npartTotal[0]), int(self.npartTotal[1]), int(self.npartTotal[
                                                                  2]), int(
                                                                      self.npartTotal[3]), int(
                                                                          self.npartTotal[4]), int(
                                                                              self.npartTotal[
                                                                                  5]),
            self.flag_cooling, self.num_files, self.BoxSize, self.Omega0, self.OmegaLambda, self.HubbleParam, self.flag_stellarage, self.flag_metals,
            int(self.NallHW[0]), int(self.NallHW[1]), int(self.NallHW[
                                                          2]), int(
                                                              self.NallHW[3]), int(
                                                                  self.NallHW[4]), int(
                                                                      self.NallHW[
                                                                          5]),
            self.flag_entropy_instead_u, self.flag_doubleprecision, self.flag_ic_info, self.lpt_scalingfactor)
        return data


class _GadgetFile:

    """Gadget file management class. Users should access gadget files through
    :class:`~pynbody.snapshot.gadget.GadgetSnap`."""

    def __init__(self, filename):
        self._filename = filename
        self.blocks = {}
        self.endian = ''
        self.format2 = True
        t_part = 0
        with open(filename, "rb") as fd:
            self.check_format(fd)
            # If format 1, load the block definitions.
            if not self.format2:
                self.block_names = config_parser.get(
                    'gadget-1-blocks', "blocks").split(",")
                self.block_names = [q.upper().ljust(4) for q in self.block_names]
                if sys.version_info[0] > 2:
                    self.block_names = [str.encode(x, 'utf-8') for x in self.block_names]
                # This is a counter for the fallback
                self.extra = 0
            while True:
                block = _GadgetBlock()
                (name, block.length) = self.read_block_head(fd)
                if block.length == 0:
                    break
                # Do special things for the HEAD block
                if name[0:4] == b"HEAD":
                    if block.length != 256:
                        raise OSError("Mis-sized HEAD block in " + filename)
                    self.header = fd.read(256)
                    if len(self.header) != 256:
                        raise OSError("Could not read HEAD block in " + filename)
                    self.header = _construct_gadget_header(
                        self.header, self.endian)
                    record_size = self.read_block_foot(fd)
                    if record_size != 256:
                        raise OSError("Bad record size for HEAD in " + filename)
                    t_part = self.header.npart.sum()
                    if  ((not self.format2) and
                        ((self.header.npart != 0) * (self.header.mass == 0)).sum()==0):
                        # The "Spec" says that if all the existing particle masses
                        # are in the header, we shouldn't have a MASS block
                        self.block_names.remove(b"MASS")
                    continue
                # Set the partlen, using our amazing heuristics
                success = False
                if name[0:4] == b"POS " or name[0:4] == b"VEL ":
                    if block.length == t_part * 24:
                        block.partlen = 24
                        block.data_type = np.float64
                    else:
                        block.partlen = 12
                        block.data_type = np.float32
                    block.p_types = self.header.npart != 0
                    success = True
                elif name[0:4] == b"ID  ":
                    # Heuristic for long (64-bit) IDs
                    if block.length == t_part * 4:
                        block.partlen = 4
                        block.data_type = np.int32
                    else:
                        block.partlen = 8
                        block.data_type = np.int64
                    block.p_types = self.header.npart != 0
                    success = True

                block.start = fd.tell()
                # Check for the case where the record size overflows an int.
                # If this is true, we can't get record size from the length and we just have to guess
                # At least the record sizes at either end should be consistently wrong.
                # Better hope this only happens for blocks where all particles are
                # present.
                extra_len = int(t_part) * block.partlen
                if extra_len >= 2 ** 32:
                    fd.seek(extra_len, 1)
                else:
                    fd.seek(block.length, 1)
                record_size = self.read_block_foot(fd)
                if record_size != block.length:
                    raise OSError("Corrupt record in " +
                                filename + " footer for block " + name + "dtype" + str(block.data_type))
                if extra_len >= 2 ** 32:
                    block.length = extra_len

                if not success:
                    # Figure out what particles are here and what types
                    # they have. This also is a heuristic, which assumes
                    # that blocks are either fully present or not for a
                    # given particle. It also has to try all
                    # possibilities of dimensions of array and data type.
                    for dim, tp in (1, np.float32), (1, np.float64), (3, np.float32), (3, np.float64), (11, np.float32):
                        try:
                            block.data_type = tp
                            block.partlen = np.dtype(tp).itemsize * dim
                            block.p_types = self.get_block_types(
                                block, self.header.npart)
                            success = True
                            break
                        except ValueError:
                            continue

                if not success:
                    warnings.warn("Encountered a gadget block %r which could not be interpreted - is it a strange length or data type (length=%d)?" %
                                (name, block.length), RuntimeWarning)
                else:
                    self.blocks[name[0:4]] = block

        # Make a mass block if one isn't found.
        if b'MASS' not in self.blocks:
            block = _GadgetBlock()
            block.length = 0
            block.start = 0

            # Mass should be the same type as POS (see issue #321)
            block.data_type = self.blocks[b'POS '].data_type
            block.partlen = np.dtype(block.data_type).itemsize
            self.blocks[b'MASS'] = block

    def get_block_types(self, block, npart):
        """ Set up the particle types in the block, with a heuristic,
        which assumes that blocks are either fully present or not for a given particle type"""

        totalnpart = npart.astype(np.int64).sum()
        if block.length == totalnpart * block.partlen:
            p_types = np.ones(N_TYPE, bool)
            return p_types
        p_types = np.zeros(N_TYPE, bool)
        for blocknpart in [1, 2, 3, 4, 5]:
            # iterate of differeent possible combinations of particles in the bloc
            # we stop when we can we match the length of the block
            for perm in itertools.permutations(list(range(0, N_TYPE)), blocknpart):
                # the 64-bit calculation is important here
                if block.length == (npart[list(perm)]).astype(np.int64).sum() * block.partlen:
                    p_types[list(perm)] = True
                    return p_types
        raise ValueError("Could not determine particle types for block")

    def check_format(self, fd):
        """This function reads the first character of a file and, depending on its value, determines
        whether we have a format 1 or 2 file, and whether the endianness is swapped. For the endianness,
        it then determines the correct byteorder string to pass to struct.unpack. There is not string
        for 'not native', so this is more complex than it needs to be"""
        fd.seek(0, 0)
        (r,) = struct.unpack('=I', fd.read(4))
        if r == 8:
            self.endian = '='
            self.format2 = True
        elif r == 134217728:
            if sys.byteorder == 'little':
                self.endian = '>'
            else:
                self.endian = '<'
            self.format2 = True
        elif r == 65536:
            if sys.byteorder == 'little':
                self.endian = '>'
            else:
                self.endian = '<'
            self.format2 = False
        elif r == 256:
            self.endian = '='
            self.format2 = False
        else:
            raise OSError("File corrupt. First integer is: " + str(r))
        fd.seek(0, 0)
        return

    def read_block_foot(self, fd):
        """Unpacks the block footer, into a single integer"""
        record_size = fd.read(4)
        if len(record_size) != 4:
            raise OSError("Could not read block footer")
        (record_size,) = struct.unpack(self.endian + 'I', record_size)
        return record_size

    def read_block_head(self, fd):
        """Read the Gadget 2 "block header" record, ie, 8 name, length, 8.
           Takes an open file and returns a (name, length) tuple """
        if self.format2:
            head = fd.read(5 * 4)
            # If we have run out of file, we don't want an exception,
            # we just want a zero length empty block
            if len(head) != 5 * 4:
                return ("    ", 0)
            head = struct.unpack(self.endian + 'I4sIII', head)
            if head[0] != 8 or head[3] != 8 or head[4] != head[2] - 8:
                raise OSError(
                    "Corrupt header record. Possibly incorrect file format")
            # Don't include the two "record_size" indicators in the total
            # length count
            return (head[1], head[2] - 8)
        else:
            record_size = fd.read(4)
            if len(record_size) != 4:
                return ("    ", 0)
            (record_size,) = struct.unpack(self.endian + 'I', record_size)
            try:
                name = self.block_names[0]
                self.block_names = self.block_names[1:]
            except IndexError:
                if self.extra == 0:
                    warnings.warn(
                        "Run out of block names in the config file. Using fallbacks: UNK*", RuntimeWarning)
                name = _to_raw("UNK" + str(self.extra))
                self.extra += 1
            return (name, record_size)

    def get_block(self, name, p_type, p_toread):
        """Get a particle range from this file, starting at p_start,
        and reading a maximum of p_toread particles"""
        name = _to_raw(name)

        p_read = 0
        cur_block = self.blocks[name]
        parts = self.get_block_parts(name, p_type)
        p_start = self.get_start_part(name, p_type)
        if p_toread > parts:
            p_toread = parts
        with open(self._filename, 'rb') as fd:
            fd.seek(cur_block.start + int(cur_block.partlen * p_start), 0)
            # This is just so that we can get a size for the type
            dt = np.dtype(cur_block.data_type)
            n_type = p_toread * cur_block.partlen // dt.itemsize
            data = np.fromfile(
                fd, dtype=cur_block.data_type, count=n_type, sep='')

        if self.endian != '=':
            data = data.byteswap(True)
        return (p_toread, data)

    def get_block_parts(self, name, p_type):
        """Get the number of particles present in a block in this file"""
        if name not in self.blocks:
            return 0
        cur_block = self.blocks[name]
        if p_type == -1:
            return cur_block.length // cur_block.partlen
        else:
            return self.header.npart[p_type] * cur_block.p_types[p_type]

    def get_start_part(self, name, p_type):
        """Find particle to skip to before starting, if reading particular type"""
        if p_type == -1:
            return 0
        else:
            if name not in self.blocks:
                return 0
            cur_block = self.blocks[name]
            return (cur_block.p_types * self.header.npart)[0:p_type].sum().astype(int)

    def get_block_dims(self, name):
        """Get the dimensionality of the block, eg, 3 for POS, 1 for most other things"""
        if name not in self.blocks:
            return 0
        cur_block = self.blocks[name]
        dt = np.dtype(cur_block.data_type)
        return cur_block.partlen // dt.itemsize

    # The following functions are for writing blocks back to the file
    def write_block(self, name, p_type, big_data, filename=None):
        """Write a full block of data in this file. Any particle type can be written. If the particle type is not present in this file,
        an exception KeyError is thrown. If there are too many particles, ValueError is thrown.
        big_data contains a reference to the data to be written. Type -1 is all types"""
        try:
            cur_block = self.blocks[name]
        except KeyError:
            raise KeyError("Block " + name + " not in file " + self._filename)

        if not cur_block.p_types[p_type]:
            return

        parts = self.get_block_parts(name, p_type)
        p_start = self.get_start_part(name, p_type)
        MinType = np.ravel(np.where(cur_block.p_types * self.header.npart))[0]
        MaxType = np.ravel(np.where(cur_block.p_types * self.header.npart))[-1]
        # Have we been given the right number of particles?
        if np.size(big_data) > parts * self.get_block_dims(name):
            raise ValueError("Space for " + str(parts) + " particles of type " + str(
                p_type) + " in file " + self._filename + ", " + str(np.shape(big_data)[0]) + " requested.")
        # Do we have the right type?
        dt = np.dtype(cur_block.data_type)
        bt = big_data.dtype
        if bt.kind != dt.kind:
            raise ValueError("Data of incorrect type passed to write_block")
        # Open the file
        fn = self._filename if filename is None else filename

        with open(fn, 'r+b') as fd:
            # Seek to the start of the block
            fd.seek(cur_block.start + cur_block.partlen * p_start, 0)
            # Add the block header if we are at the start of a block
            if p_type == MinType or p_type < 0:
                data = self.write_block_header(name, cur_block.length)
                # Better seek back a bit first.
                fd.seek(-len(data), 1)
                fd.write(data)

            if self.endian != '=':
                big_data = big_data.byteswap(False)

            # Actually write the data
            # Make sure to ravel it, otherwise the wrong amount will be written,
            # because it will also write nulls every time the first array dimension
            # changes.
            d = np.ravel(big_data.astype(dt)).tobytes()
            fd.write(d)
            if p_type == MaxType or p_type < 0:
                data = self.write_block_footer(name, cur_block.length)
                fd.write(data)

    def add_file_block(self, name, blocksize, partlen=4, dtype=np.float32, p_types=None):
        """Add a block to the block table at the end of the file. Do not actually write anything"""
        name = _to_raw(name)

        if name in self.blocks:
            raise KeyError(
                "Block " + name + " already present in file. Not adding")

        # Get last block
        lb = max(list(self.blocks.values()), key=lambda val: val.start)

        if np.issubdtype(dtype, np.floating):
            dtype = np.float32  # coerce to single precision

        # Make new block
        block = _GadgetBlock(length=blocksize, partlen=partlen, dtype=dtype)
        block.start = lb.start + lb.length + 6 * \
            4  # For the block header, and footer of the previous block
        if p_types is None:
            block.p_types = np.ones(N_TYPE, bool)
        else:
            block.p_types = p_types
        self.blocks[name] = block

    def write_block_header(self, name, blocksize):
        """Create a string for a Gadget-style block header, but do not actually write it, for atomicity."""
        if self.format2:
            # This is the block header record, which we want for format two
            # files only
            blkheadsize = 4 + 4 * 1
            # 1 int and 4 chars
            nextblock = blocksize + 2 * 4
            # Relative location of next block; the extra 2 uints are for storing the headers.
            # Write format 2 header header
            head = struct.pack(
                self.endian + 'I4sII', blkheadsize, name, nextblock, blkheadsize)
        # Also write the record size, which we want for all files*/
        head += self.write_block_footer(name, blocksize)
        return head

    def write_block_footer(self, name, blocksize):
        """(Re) write a Gadget-style block footer."""
        return struct.pack(self.endian + 'I', blocksize)

    def write_header(self, head_in, filename=None):
        """Write a file header. Overwrites npart in the argument with the npart of the file, so a consistent file is always written."""
        # Construct new header with the passed header and overwrite npart with the file header.
        # This has ref. semantics so use copy
        head = copy.deepcopy(head_in)
        head.npart = np.array(self.header.npart)
        data = self.write_block_header(b"HEAD", 256)
        data += head.serialize()
        if filename is None:
            filename = self._filename

        try:
            fd = open(filename, "r+b")
        except OSError as error:
            # If we couldn't open it because it doesn't exist open it for
            # writing.
            (err, strerror) = error.args
            # If we couldn't open it because it doesn't exist open it for
            # writing.
            if err == errno.ENOENT:
                fd = open(filename, "w+b")
            # If we couldn't open it for any other reason, reraise exception
            else:
                raise OSError(err, strerror)

        try:
            fd.seek(0)  # Header always at start of file
            # Write header
            fd.write(data)
            # Seek 48 bytes forward, to skip the padding (which may contain extra
            # data)
            fd.seek(48, 1)
            data = self.write_block_footer(b"HEAD", 256)
            fd.write(data)
        finally:
            fd.close()


class _GadgetWriteFile(_GadgetFile):

    """Class for write-only snapshots, as when we are creating a new set of files from, eg, a TipsySnap.
        Should not be used directly. block_names is a list so we can specify an on-disc ordering."""

    def __init__(self, filename, npart, block_names, header, format2=True):
        self.header = header
        self._filename = filename
        self.endian = '='  # write with default endian of this system
        self.format2 = format2
        self.blocks = {}
        self.header.npart = np.array(npart)
        # Set up the positions
        header_size = 4
        if format2:
            header_size += 3 * 4 + 4
        footer_size = 4
        # First block is just past the header.
        cur_pos = 256 + header_size + footer_size
        for block in block_names:
            # Add block if present for some types
            if block.types.sum():
                b_part = npart * block.types
                b = _GadgetBlock(
                    start=cur_pos + header_size, partlen=block.partlen,
                    length=block.partlen * b_part.sum(), dtype=block.dtype, p_types=block.types)
                cur_pos += b.length + header_size + footer_size
                self.blocks[_to_raw(block.name)] = b


class _WriteBlock:

    """Internal structure for passing data around between file and snapshot"""

    def __init__(self, partlen=4, dtype=np.float32, types=np.zeros(N_TYPE, bool), name="    "):

        if np.issubdtype(dtype, np.floating):
            dtype = np.float32
        if np.issubdtype(dtype, np.signedinteger):
            dtype = np.int32

        self.partlen = partlen * np.dtype(dtype).itemsize
        self.dtype = dtype

        self.types = types
        self.name = name



[docs]
class GadgetSnap(SimSnap):
    """Class for reading Gadget-1 and Gadget-2 old-style (i.e. pre-HDF5) snapshots."""


[docs]
    def __init__(self, filename: pathlib.Path, only_header=False, must_have_paramfile=False, ignore_cosmo=False):

        filename = str(filename)

        global config
        super().__init__()
        self._files = []
        self._filename = filename
        self._ignore_cosmo = ignore_cosmo

        # Check whether the file exists, and get the ".0" right
        if os.path.exists(filename):
            files = [filename]
        elif os.path.exists(filename + ".0"):
            filename = filename + ".0"
            files = None

        if filename[-2:] == ".0":
            self._filename = filename[:-2]
        # Read the first file and use it to get an idea of how many files we
        # are expecting.
        first_file = _GadgetFile(filename)
        self._files.append(first_file)
        files_expected = self._files[0].header.num_files
        npart = np.array(self._files[0].header.npart,dtype=np.uint64) # 64-bit necessary in numpy 2.0 because of changes to data type promotion rules in the 2 * 32 calc below

        if files is None:
            # we want to load all files
            base_filename = filename[:-2]
            files = [base_filename + "." + str(i)
                     for i in range(files_expected)]

        for filename in files[1:]:
            tmp_file = _GadgetFile(filename)
            if not self.check_headers(tmp_file.header, self._files[0].header):
                warnings.warn("file " + str(
                    i) + " is not part of this snapshot set!", RuntimeWarning)
                continue
            self._files.append(tmp_file)
            npart = npart + tmp_file.header.npart
        # Set up things from the parent class
        self._num_particles = npart.sum()
        # Set up global header
        self.header = copy.deepcopy(self._files[0].header)
        self.header.npart = npart
        # Check and fix npartTotal and NallHW if they are wrong.
        if npart is not self.header.npartTotal.astype(np.uint64) + 2 ** 32 * self.header.NallHW.astype(np.uint64):
            self.header.NallHW = npart // 2 ** 32
            self.header.npartTotal = npart - 2 ** 32 * self.header.NallHW
            for f in self._files:
                f.header.npartTotal = self.header.npartTotal
                f.header.NallHW = self.header.NallHW

        self._family_slice = {}

        self._loadable_keys = set()
        self._family_keys = set()
        self._family_arrays = {}
        self._arrays = {}
        #self.properties = {}

        # Set up _family_slice
        current = 0
        for fam in _type_map:
            g_types = _type_map[fam]
            length = 0
            for f in self._files:
                length += sum(f.header.npart[x] for x in g_types)
            self._family_slice[fam] = slice(current, current + length)
            current += length

        # Set up _loadable_keys
        for f in self._files:
            self._loadable_keys = self._loadable_keys.union(
                set(f.blocks.keys()))

        # Add default mapping to unpadded lower case if not in config file.
        for nn in self._loadable_keys:
            if sys.version_info[0] == 2:
                mm = nn.lower().strip()
            else:
                mm = nn.lower().strip().decode('utf-8')
            if nn not in _rev_name_map:
                _rev_name_map[nn] = mm
            if mm not in _name_map:
                _name_map[mm] = nn

        # Use translated keys only
        self._loadable_keys = [_translate_array_name(
            x, reverse=True) for x in self._loadable_keys]
        # Set up block list, with attached families, as a caching mechanism
        self._block_list = self.get_block_list()

        self._decorate()



[docs]
    def loadable_family_keys(self, fam=None):
        """Return list of arrays which are loadable for specific families,
        but not for all families."""
        warnings.warn(
            "loadable_family_keys functionality has now been incorporated into loadable_keys", warnings.DeprecationWarning)
        return self.loadable_keys(fam)



[docs]
    def loadable_keys(self, fam=None):
        if fam is not None:
            return [x for x in self._loadable_keys if self._family_has_loadable_array(fam, x)]
        else:
            return [x for x in self._loadable_keys if self._family_has_loadable_array(None, x)]


    def _family_has_loadable_array(self, fam, name):
        """Returns True if the array can be loaded for the specified family.
        If fam is None, returns True if the array can be loaded for all families."""
        if name in self._block_list:
            if fam is not None:
                return fam in self._block_list[name]
            else:
                return set(self.families()) <= set(self._block_list[name])
        else:
            return False


[docs]
    def get_block_list(self):
        """Get list of unique blocks in snapshot, with the types they refer to"""
        b_list = {}
        for f in self._files:
            for (n, b) in f.blocks.items():
                if n in b_list:
                    b_list[n] += b.p_types
                else:
                    b_list[n] = np.array(b.p_types, dtype=bool)
        # Special case mass. Note b_list has reference semantics.
        if b"MASS" in b_list:
            b_list[b"MASS"] += np.array(self.header.mass, dtype=bool)
        # Translate this array into families and external names
        out_list = {}
        for k, b in b_list.items():
            b_name = _translate_array_name(k, reverse=True)
            # Make this be only if there are actually particles of that type in
            # the snap
            b_types = [f for f in self.families() if b[np.intersect1d(
                gadget_type(f), np.ravel(np.where(self.header.npart != 0)))].all()]
            out_list[b_name] = b_types
        return out_list



[docs]
    def get_block_parts(self, name, family):
        """Get the number of particles present in a block, of a given type"""
        total = 0
        for f in self._files:
            total += sum(f.get_block_parts(
                name, gfam) for gfam in gadget_type(family))
        # Special-case MASS
        if name == b"MASS":
            total += sum(self.header.npart[p] * np.array(self.header.mass[
                         p], dtype=bool) for p in gadget_type(family))
        return total



[docs]
    def check_headers(self, head1, head2):
        """Check two headers for consistency"""
        if (head1.time != head2.time or head1.redshift != head2.redshift or
                head1.flag_sfr != head2.flag_sfr or
                head1.flag_feedback != head2.flag_feedback or
                head1.num_files != head2.num_files or
                head1.BoxSize != head2.BoxSize or
                head1.Omega0 != head2.Omega0 or
                head1.OmegaLambda != head2.OmegaLambda or
                head1.HubbleParam != head2.HubbleParam or
                head1.flag_stellarage != head2.flag_stellarage or
                head1.flag_metals != head2.flag_metals):
            return False
        # Check array quantities
        if (((head1.mass - head2.mass) > 1e-5 * head1.mass).any() or
                (head1.npartTotal != head2.npartTotal).any()):
            return False
        #  At least one version of N-GenICs writes a header file which
        #  ignores everything past flag_metals (!), leaving it uninitialised.
        #  Therefore, we can't check them.
        return True


    def _get_array_type(self, name):
        """Get the type for the array given in name"""
        g_name = _translate_array_name(name)
        return self._get_array_type_g(g_name)

    def _get_array_type_g(self, name):
        """Get the type for the array given in name"""
        return self._files[0].blocks[name].data_type

    def _get_array_dims(self, name):
        """Get the dimensions of an array; ie, is it 3d or 1d"""
        g_name = _translate_array_name(name)
        return self._files[0].get_block_dims(g_name)

    def _load_array(self, name, fam=None):
        """Read in data from a Gadget file.
        If fam is not None, loads only data for that particle family"""
        # g_name is the internal name
        g_name = _translate_array_name(name)

        if not self._family_has_loadable_array(fam, name):
            if fam is None and name in self._block_list:
                raise KeyError(
                    "Block " + name + " is not available for all families")
            else:
                raise OSError("No such array on disk")

        ndim = self._get_array_dims(name)

        if ndim == 1:
            dims = [int(self.get_block_parts(g_name, fam)), ]
        else:
            dims = [int(self.get_block_parts(g_name, fam)), ndim]

        if fam is not None:
            p_types = gadget_type(fam)
        else:
            p_types = gadget_type(self.families())

        # Get the data. Get one type at a time and then concatenate.
        # A possible optimisation is to special-case loading all particles.
        data = np.array([], dtype=self._get_array_type(name))
        for p in p_types:
            # Special-case mass
            if g_name == b"MASS" and self.header.mass[p] != 0.:
                mass_as_correct_type = self.header.mass[p].astype(self._get_array_type(name))
                data = np.append(data, np.repeat(mass_as_correct_type, self.header.npart[p]))
            else:
                data = np.append(data, self.__load_array(g_name, p))

        if fam is None:
            self[name] = data.reshape(dims, order='C').view(array.SimArray)
            self[name].set_default_units(quiet=True)
        else:
            self[fam][name] = data.reshape(
                dims, order='C').view(array.SimArray)
            self[fam][name].set_default_units(quiet=True)

    def __load_array(self, g_name, p_type):
       """Internal helper function for _load_array that takes a g_name and a gadget type,
       gets the data from each file and returns it as one long array."""
       # int cast necessary because numpy makes int * uint64 a float!
       data = np.zeros(int(self._get_array_dims(g_name) * self.header.npart[p_type]), dtype=self._get_array_type_g(g_name))
       # Get a type from each file
       ipos = 0
       for f in self._files:
           f_parts = f.get_block_parts(g_name, p_type)
           if f_parts == 0:
               continue
           (f_read, f_data) = f.get_block(g_name, p_type, f_parts)
           if f_read != f_parts:
               raise OSError("Read of " + f._filename + " asked for " + str(
                   f_parts) + " particles but got " + str(f_read))
           iread = self._get_array_dims(g_name)*f.header.npart[p_type]
           data[ipos:ipos + iread] = f_data
           ipos += iread
       return data

    @classmethod
    def _can_load(cls, f: pathlib.Path):
        """Check whether we can load the file as Gadget format by reading
        the first 4 bytes"""
        fname = f
        if not f.exists():
            fname = f.parent / (f.name + ".0")
            if not fname.exists():
                return False

        with open(fname, "br") as fd:
            r, = struct.unpack('=I', fd.read(4))

        # First int32 is 8 for a Gadget 2 file, or 256 for Gadget 1, or the
        # byte swapped equivalent.
        if r in (8, 134217728, 65536, 256):
            return True
        else:
            return False

    @staticmethod
    def _write(self, filename=None):
        """Write an entire Gadget file (actually an entire set of snapshots)."""

        with self.lazy_derive_off:
            # If caller is not a GadgetSnap, construct the GadgetFiles,
            # so that format conversion works.
            all_keys = set(self.loadable_keys()).union(
                list(self.keys())).union(self.family_keys())
            all_keys = [
                k for k in all_keys if k not in ["x", "y", "z", "vx", "vy", "vz"]]
            # This code supports (limited) format conversions
            if self.__class__ is not GadgetSnap:
                # We need a filename if we are writing to a new type
                if filename is None:
                    raise Exception(
                        "Please specify a filename to write a new file.")
            # Splitting the files correctly is hard; the particles need to be reordered, and
                # we need to know which families correspond to which gadget types.
                # So don't do it.

                # Make sure the data fits into one files. The magic numbers are:
                # 12 - the largest block is likely to  be POS with 12 bytes per particle.
                # 2**31 is the largest size a gadget block can safely have
                if self.__len__() * 12. > 2 ** 31 - 1:
                    raise OSError(
                        "Data too large to fit into a single gadget file, and splitting not implemented. Cannot write.")
                # Make npart
                npart = np.zeros(N_TYPE, int)
                arr_name = (list(self.keys()) + self.loadable_keys())[0]
                for f in self.families():
                    # Note that if we have more than one type per family, we cannot
                    # determine which type each individual particle is, so
                    # assume they are all the first.
                    npart[np.min(gadget_type(f))] = len(self[f][arr_name])
                # Construct a header
                # npart, mass, time, redshift, BoxSize,Omega0, OmegaLambda,
                # HubbleParam, num_files=1
                gheader = _GadgetHeader(
                    npart, np.zeros(N_TYPE, float), self.properties[
                        "a"], self.properties["z"],
                    self.properties["boxsize"].in_units(self[
                                                        'pos'].units, **self.conversion_context()),
                    self.properties["omegaM0"], self.properties["omegaL0"], self.properties["h"], 1)
                # Construct the block_names; each block_name needs partlen, data_type, and p_types,
                # as well as a name. Blocks will hit the disc in the order they are in all_keys.
                # First, make pos the first block and vel the second.

                # all_keys[all_keys.index("pos")]=all_keys[0]
                # all_keys[0] = "pos"
                # all_keys[all_keys.index("vel")]=all_keys[1]
                # all_keys[1] = "vel"
                all_keys = _output_order_gadget(all_keys)

                # No writing format 1 files.
                block_names = []
                for k in all_keys:
                    types = np.zeros(N_TYPE, bool)
                    for f in self.families():
                        try:
                            dtype = self[f][k].dtype
                            types[np.min(gadget_type(f))] += True
                            try:
                                partlen = np.shape(self[
                                                   f][k])[1]  # *dtype.itemsize
                            except IndexError:
                                partlen = 1  # dtype.itemsize
                        except KeyError:
                            pass
                    bb = _WriteBlock(partlen, dtype=dtype, types=types, name=_translate_array_name(
                        k).upper().ljust(4)[0:4])
                    block_names.append(bb)
                # Create an output file
                out_file = _GadgetWriteFile(
                    filename, npart, block_names, gheader)
                # Write the header
                out_file.write_header(gheader, filename)
                # Write all the arrays
                for x in all_keys:
                    g_name = _to_raw(
                        _translate_array_name(x).upper().ljust(4)[0:4])

                    for fam in self.families():
                        try:
                            data = self[fam][x]
                            gfam = np.min(gadget_type(fam))
                            out_file.write_block(
                                g_name, gfam, data, filename=filename)
                        except KeyError:
                            pass
                return

            # Write headers
            if filename is not None:
                if np.size(self._files) > 1:
                    for i in np.arange(0, np.size(self._files)):
                        ffile = filename + "." + str(i)
                        self._files[i].write_header(self.header, ffile)
                else:
                    self._files[0].write_header(self.header, filename)
            else:
                # Call write_header for every file.
                [f.write_header(self.header) for f in self._files]
            # Call _write_array for every array.
            for x in all_keys:
                GadgetSnap._write_array(self, x, filename=filename)

    @staticmethod
    def _write_array(self, array_name, fam=None, filename=None):
        """Write a data array back to a Gadget snapshot, splitting it across files."""
        write_fam = fam or self.families()

        # Make the name a four-character upper case name, possibly with
        # trailing spaces
        g_name = _to_raw(
            _translate_array_name(array_name).upper().ljust(4)[0:4])
        nfiles = np.size(self._files)
        # Find where each particle goes
        f_parts = [f.get_block_parts(g_name, -1) for f in self._files]
        # If there is no block corresponding to this name in the file,
        # add it (so we can write derived arrays).
        if np.sum(f_parts) == 0:
            # Get p_type
            p_types = np.zeros(N_TYPE, bool)
            npart = 0
            for fam in self.families():
                gfam = np.min(gadget_type(fam))
                # We get the particle types we want by trying to load all
                # particle types (from memory) and seeing which ones work
                p_types[gfam] = array_name in self[fam]
                if p_types[gfam]:
                    ashape = np.shape(self[fam][array_name])
                    # If the partlen is 1, append so the shape array has the
                    # right shape.
                    if np.size(ashape) < 2:
                        ashape = (ashape[0], 1)
                    npart += ashape[0]
            if p_types.sum():
                per_file = npart // nfiles
                for f in self._files[:-2]:
                    f.add_file_block(array_name, per_file, ashape[
                                     1], dtype=self[array_name].dtype, p_types=p_types)
                self._files[-1].add_file_block(
                    array_name, npart - (nfiles - 1) * per_file, ashape[1])

        # Write blocks on a family level, so that we don't have to worry about
        # the file-level re-ordering.
        for fam in write_fam:
            if self._family_has_loadable_array(fam, array_name):
                data = self[fam][array_name]
                s = 0
                for gfam in gadget_type(fam):
                    # Find where each particle goes
                    f_parts = [f.get_block_parts(
                        g_name, gfam) for f in self._files]
                    for i in np.arange(0, nfiles):
                        if f_parts[i]==0:
                            continue
                        # Set up filename
                        if filename is not None:
                            ffile = filename + "." + str(i)
                            if nfiles == 1:
                                ffile = filename
                        else:
                            ffile = None
                        # Special-case MASS.
                        if g_name == b"MASS" and self.header.mass[gfam] != 0.:
                            nmass = np.min(
                                data[s:(s + self.header.npart[gfam])])
                            # Warn if there are now different masses for this particle type,
                            # as this information cannot be represented in this
                            # snapshot.
                            if nmass != np.max(data[s:(s + self.header.npart[gfam])]):
                                warnings.warn("Cannot write variable masses for type " + str(
                                    gfam) + ", as masses are stored in the header.", RuntimeWarning)
                            elif self.header.mass[gfam] != nmass:
                                self.header.mass[gfam] = nmass
                                self._files[i].write_header(
                                    self.header, filename=ffile)
                        else:
                            # Write data
                            if np.issubdtype(data.dtype, np.floating):
                                data = np.asanyarray(data, dtype=np.float32)
                            self._files[i].write_block(g_name, gfam, data[
                                                       s:(s + f_parts[i])], filename=ffile)
                        s += f_parts[i]



def _header_suggests_cosmological(gadget_header):
    return (gadget_header.HubbleParam != 0.) and (gadget_header.Omega0 != 0.) and (gadget_header.BoxSize != 0.)

@GadgetSnap.decorator
def _do_units(sim):
    vel_unit = config_parser.get('gadget-units', 'vel')
    dist_unit = config_parser.get('gadget-units', 'pos')
    mass_unit = config_parser.get('gadget-units', 'mass')

    vel_unit, dist_unit, mass_unit = (
        units.Unit(x) for x in (vel_unit, dist_unit, mass_unit))

    if sim._ignore_cosmo or not _header_suggests_cosmological(sim.header):
        # remove a and h dependences
        vel_unit = units.Unit(
            "km s^-1") * vel_unit.in_units("km s^-1", a=1, h=1)
        mass_unit = units.Unit("Msol") * mass_unit.in_units("Msol", a=1, h=1)
        dist_unit = units.Unit("kpc") * dist_unit.in_units("kpc", a=1, h=1)

    sim._file_units_system = [units.Unit("K"), vel_unit, dist_unit, mass_unit]
    sim._override_units_system()


@GadgetSnap.decorator
def _do_properties(sim):
    h = sim.header

    if not(sim._ignore_cosmo) and _header_suggests_cosmological(h):
        from .. import analysis
        sim.properties['omegaM0'] = h.Omega0
        # sim.properties['omegaB0'] = ... This one is non-trivial to calculate
        sim.properties['omegaL0'] = h.OmegaLambda
        sim.properties['boxsize'] = sim.infer_original_units("kpc")*h.BoxSize
        sim.properties['z'] = h.redshift
        sim.properties['h'] = h.HubbleParam
        time_units = sim.infer_original_units("s")
        sim.properties['time'] = analysis.cosmology.age(sim,unit=time_units)*time_units
    else:
        sim.properties['time'] = sim.infer_original_units("s") * h.time