Source code for pynbody.snapshot.gadget

"""
Implements reading old-style gadget binary files (format 1 and 2, but not HDF5).

The gadget array names are mapped into pynbody array names according to the mappings given by the `config.ini`
section `[gadget-name-mapping]`.

Very old gadget-1 style files have a fixed block order, which is specified in the config.ini section
`[gadget-1-blocks]`.



"""


import configparser
import copy
import errno
import itertools
import os
import pathlib
import struct
import sys
import warnings

import numpy as np

from .. import array, config_parser, family, units
from . import SimSnap, namemapper

# This is set here and not in a config file because too many things break
# if it is not 6


N_TYPE = 6

_type_map = {}

for name, gtypes in config_parser.items('gadget-type-mapping'):
    try:
        gtypes = np.array([int(q) for q in gtypes.split(",")])
        if (gtypes >= N_TYPE).any() or (gtypes < 0).any():
            raise ValueError(
                "Type specified for family " + name + " is out of bounds (" + gtypes + ").")
        _type_map[family.get_family(name)] = gtypes
    except configparser.NoOptionError:
        pass

_name_map, _rev_name_map = namemapper.setup_name_maps(
    'gadget-name-mapping', gadget_blocks=True)
_translate_array_name = namemapper.name_map_function(_name_map, _rev_name_map)


def _to_raw(s):
    if isinstance(s, str) and sys.version_info[0] > 2:
        return s.encode('utf-8')
    else:
        return s


[docs] def gadget_type(fam): if isinstance(fam, list): l = [] for sf in fam: l.extend(gadget_type(sf)) return l elif fam is None: return list(np.arange(0, N_TYPE)) else: return _type_map[fam]
class _GadgetBlock: """Class to describe each block. Each block has a start, a length, and a length-per-particle""" def __init__(self, start=0, length=0, partlen=0, dtype=np.float32, p_types=np.zeros(N_TYPE, bool)): # Start of block in file self.start = start # Length of block in file self.length = length # Bytes per particle in file self.partlen = partlen # Data type of block self.data_type = dtype # Types of particle this block contains self.p_types = p_types def _output_order_gadget(all_keys): out = [] out_dregs = copy.copy(all_keys) for X in map(str.strip, config_parser.get('gadget-default-output', 'field-ordering').split(',')): if X in out_dregs: del out_dregs[out_dregs.index(X)] out.append(X) return out + out_dregs def _construct_gadget_header(data, endian='='): """Create a GadgetHeader from a byte range read from a file.""" npart = np.zeros(N_TYPE, dtype=np.uint32) mass = np.zeros(N_TYPE) time = 0. redshift = 0. npartTotal = np.zeros(N_TYPE, dtype=np.int32) num_files = 0 BoxSize = 0. Omega0 = 0. OmegaLambda = 0. HubbleParam = 0. NallHW = np.zeros(N_TYPE, dtype=np.int32) if data == '': return fmt = endian + "IIIIIIddddddddiiIIIIIIiiddddiiIIIIIIiiif48s" if struct.calcsize(fmt) != 256: raise Exception( "There is a bug in gadget.py; the header format string is not 256 bytes") (npart[0], npart[1], npart[2], npart[3], npart[4], npart[5], mass[0], mass[1], mass[2], mass[3], mass[4], mass[5], time, redshift, flag_sfr, flag_feedback, npartTotal[0], npartTotal[1], npartTotal[ 2], npartTotal[3], npartTotal[4], npartTotal[5], flag_cooling, num_files, BoxSize, Omega0, OmegaLambda, HubbleParam, flag_stellarage, flag_metals, NallHW[0], NallHW[1], NallHW[2], NallHW[3], NallHW[4], NallHW[5], flag_entropy_instead_u, flag_doubleprecision, flag_ic_info, lpt_scalingfactor, fill) = struct.unpack(fmt, data) header = _GadgetHeader(npart, mass, time, redshift, BoxSize, Omega0, OmegaLambda, HubbleParam, num_files) header.flag_sfr = flag_sfr header.flag_feedback = flag_feedback header.npartTotal = npartTotal header.flag_cooling = flag_cooling header.flag_stellarage = flag_stellarage header.flag_metals = flag_metals header.NallHW = NallHW header.flag_entropy_instead_u = flag_entropy_instead_u header.flag_doubleprecision = flag_doubleprecision header.flag_ic_info = flag_ic_info header.lpt_scalingfactor = lpt_scalingfactor header.endian = endian return header class _GadgetHeader: """Describes the header of gadget class files; this is all our metadata, so we are going to store it inline""" def __init__(self, npart, mass, time, redshift, BoxSize, Omega0, OmegaLambda, HubbleParam, num_files=1): "Construct a header from values, instead of a datastring.""" assert(len(mass) == 6) assert(len(npart) == 6) # Mass of each particle type in this file. If zero, # particle mass stored in snapshot. self.mass = mass # Time of snapshot self.time = time # Redshift of snapshot self.redshift = redshift # Boolean to test the presence of star formation self.flag_sfr = False # Boolean to test the presence of feedback self.flag_feedback = False # Boolean to test the presence of cooling self.flag_cooling = False # Number of files expected in this snapshot self.num_files = num_files # Box size of the simulation self.BoxSize = BoxSize # Omega_Matter. Note this is Omega_DM + Omega_Baryons self.Omega0 = Omega0 # Dark energy density self.OmegaLambda = OmegaLambda # Hubble parameter, in units where it is around 70. self.HubbleParam = HubbleParam # Boolean to test whether stars have an age self.flag_stellarage = False # Boolean to test the presence of metals self.flag_metals = False # flags that IC-file contains entropy instead of u self.flag_entropy_instead_u = False # flags that snapshot contains double-precision instead of single # precision self.flag_doubleprecision = False self.flag_ic_info = False # flag to inform whether IC files are generated with Zeldovich approximation, # or whether they contain 2nd order lagrangian perturbation theory ICs. # FLAG_ZELDOVICH_ICS (1) - IC file based on Zeldovich # FLAG_SECOND_ORDER_ICS (2) - Special IC-file containing 2lpt masses # FLAG_EVOLVED_ZELDOVICH (3) - snapshot evolved from Zeldovich ICs # FLAG_EVOLVED_2LPT (4) - snapshot evolved from 2lpt ICs # FLAG_NORMALICS_2LPT (5) - standard gadget file format with 2lpt ICs # All other values, including 0 are interpreted as "don't know" for # backwards compatability. self.lpt_scalingfactor = 0. # scaling factor for 2lpt initial conditions self.endian = "" # Number of particles self.npart = np.array(npart, dtype=np.uint32) if (npart < 2 ** 31).all(): # First 32-bits of total number of particles in the simulation self.npartTotal = np.array(npart, dtype=np.int32) # Long word of the total number of particles in the simulation. # At least one version of N-GenICs sets this to something entirely # different. self.NallHW = np.zeros(N_TYPE, dtype=np.int32) else: self.header.NallHW = np.array(npart // 2 ** 32, dtype=np.int32) self.header.npartTotal = np.array( npart - 2 ** 32 * self.header.NallHW, dtype=np.int32) def serialize(self): """This takes the header structure and returns it as a packed string""" fmt = self.endian + "IIIIIIddddddddiiIIIIIIiiddddiiIIIIIIiiif" # Do not attempt to include padding in the serialised data; the most common use of serialise # is to write to a file and we don't want to overwrite extra data that # might be present if struct.calcsize(fmt) != 256 - 48: raise Exception( "There is a bug in gadget.py; the header format string is not 256 bytes") # WARNING: On at least python 2.6.3 and numpy 1.3.0 on windows, castless code fails with: # SystemError: ..\Objects\longobject.c:336: bad argument to internal function # This is because self.npart, etc, has type np.uint32 and not int. # This is I think a problem with python/numpy, but cast things to ints # until I can determine how widespread it is. data = struct.pack( fmt, int(self.npart[0]), int(self.npart[1]), int(self.npart[ 2]), int( self.npart[3]), int( self.npart[4]), int( self.npart[ 5]), self.mass[0], self.mass[1], self.mass[ 2], self.mass[3], self.mass[4], self.mass[5], self.time, self.redshift, self.flag_sfr, self.flag_feedback, int(self.npartTotal[0]), int(self.npartTotal[1]), int(self.npartTotal[ 2]), int( self.npartTotal[3]), int( self.npartTotal[4]), int( self.npartTotal[ 5]), self.flag_cooling, self.num_files, self.BoxSize, self.Omega0, self.OmegaLambda, self.HubbleParam, self.flag_stellarage, self.flag_metals, int(self.NallHW[0]), int(self.NallHW[1]), int(self.NallHW[ 2]), int( self.NallHW[3]), int( self.NallHW[4]), int( self.NallHW[ 5]), self.flag_entropy_instead_u, self.flag_doubleprecision, self.flag_ic_info, self.lpt_scalingfactor) return data class _GadgetFile: """Gadget file management class. Users should access gadget files through :class:`~pynbody.snapshot.gadget.GadgetSnap`.""" def __init__(self, filename): self._filename = filename self.blocks = {} self.endian = '' self.format2 = True t_part = 0 with open(filename, "rb") as fd: self.check_format(fd) # If format 1, load the block definitions. if not self.format2: self.block_names = config_parser.get( 'gadget-1-blocks', "blocks").split(",") self.block_names = [q.upper().ljust(4) for q in self.block_names] if sys.version_info[0] > 2: self.block_names = [str.encode(x, 'utf-8') for x in self.block_names] # This is a counter for the fallback self.extra = 0 while True: block = _GadgetBlock() (name, block.length) = self.read_block_head(fd) if block.length == 0: break # Do special things for the HEAD block if name[0:4] == b"HEAD": if block.length != 256: raise OSError("Mis-sized HEAD block in " + filename) self.header = fd.read(256) if len(self.header) != 256: raise OSError("Could not read HEAD block in " + filename) self.header = _construct_gadget_header( self.header, self.endian) record_size = self.read_block_foot(fd) if record_size != 256: raise OSError("Bad record size for HEAD in " + filename) t_part = self.header.npart.sum() if ((not self.format2) and ((self.header.npart != 0) * (self.header.mass == 0)).sum()==0): # The "Spec" says that if all the existing particle masses # are in the header, we shouldn't have a MASS block self.block_names.remove(b"MASS") continue # Set the partlen, using our amazing heuristics success = False if name[0:4] == b"POS " or name[0:4] == b"VEL ": if block.length == t_part * 24: block.partlen = 24 block.data_type = np.float64 else: block.partlen = 12 block.data_type = np.float32 block.p_types = self.header.npart != 0 success = True elif name[0:4] == b"ID ": # Heuristic for long (64-bit) IDs if block.length == t_part * 4: block.partlen = 4 block.data_type = np.int32 else: block.partlen = 8 block.data_type = np.int64 block.p_types = self.header.npart != 0 success = True block.start = fd.tell() # Check for the case where the record size overflows an int. # If this is true, we can't get record size from the length and we just have to guess # At least the record sizes at either end should be consistently wrong. # Better hope this only happens for blocks where all particles are # present. extra_len = int(t_part) * block.partlen if extra_len >= 2 ** 32: fd.seek(extra_len, 1) else: fd.seek(block.length, 1) record_size = self.read_block_foot(fd) if record_size != block.length: raise OSError("Corrupt record in " + filename + " footer for block " + name + "dtype" + str(block.data_type)) if extra_len >= 2 ** 32: block.length = extra_len if not success: # Figure out what particles are here and what types # they have. This also is a heuristic, which assumes # that blocks are either fully present or not for a # given particle. It also has to try all # possibilities of dimensions of array and data type. for dim, tp in (1, np.float32), (1, np.float64), (3, np.float32), (3, np.float64), (11, np.float32): try: block.data_type = tp block.partlen = np.dtype(tp).itemsize * dim block.p_types = self.get_block_types( block, self.header.npart) success = True break except ValueError: continue if not success: warnings.warn("Encountered a gadget block %r which could not be interpreted - is it a strange length or data type (length=%d)?" % (name, block.length), RuntimeWarning) else: self.blocks[name[0:4]] = block # Make a mass block if one isn't found. if b'MASS' not in self.blocks: block = _GadgetBlock() block.length = 0 block.start = 0 # Mass should be the same type as POS (see issue #321) block.data_type = self.blocks[b'POS '].data_type block.partlen = np.dtype(block.data_type).itemsize self.blocks[b'MASS'] = block def get_block_types(self, block, npart): """ Set up the particle types in the block, with a heuristic, which assumes that blocks are either fully present or not for a given particle type""" totalnpart = npart.astype(np.int64).sum() if block.length == totalnpart * block.partlen: p_types = np.ones(N_TYPE, bool) return p_types p_types = np.zeros(N_TYPE, bool) for blocknpart in [1, 2, 3, 4, 5]: # iterate of differeent possible combinations of particles in the bloc # we stop when we can we match the length of the block for perm in itertools.permutations(list(range(0, N_TYPE)), blocknpart): # the 64-bit calculation is important here if block.length == (npart[list(perm)]).astype(np.int64).sum() * block.partlen: p_types[list(perm)] = True return p_types raise ValueError("Could not determine particle types for block") def check_format(self, fd): """This function reads the first character of a file and, depending on its value, determines whether we have a format 1 or 2 file, and whether the endianness is swapped. For the endianness, it then determines the correct byteorder string to pass to struct.unpack. There is not string for 'not native', so this is more complex than it needs to be""" fd.seek(0, 0) (r,) = struct.unpack('=I', fd.read(4)) if r == 8: self.endian = '=' self.format2 = True elif r == 134217728: if sys.byteorder == 'little': self.endian = '>' else: self.endian = '<' self.format2 = True elif r == 65536: if sys.byteorder == 'little': self.endian = '>' else: self.endian = '<' self.format2 = False elif r == 256: self.endian = '=' self.format2 = False else: raise OSError("File corrupt. First integer is: " + str(r)) fd.seek(0, 0) return def read_block_foot(self, fd): """Unpacks the block footer, into a single integer""" record_size = fd.read(4) if len(record_size) != 4: raise OSError("Could not read block footer") (record_size,) = struct.unpack(self.endian + 'I', record_size) return record_size def read_block_head(self, fd): """Read the Gadget 2 "block header" record, ie, 8 name, length, 8. Takes an open file and returns a (name, length) tuple """ if self.format2: head = fd.read(5 * 4) # If we have run out of file, we don't want an exception, # we just want a zero length empty block if len(head) != 5 * 4: return (" ", 0) head = struct.unpack(self.endian + 'I4sIII', head) if head[0] != 8 or head[3] != 8 or head[4] != head[2] - 8: raise OSError( "Corrupt header record. Possibly incorrect file format") # Don't include the two "record_size" indicators in the total # length count return (head[1], head[2] - 8) else: record_size = fd.read(4) if len(record_size) != 4: return (" ", 0) (record_size,) = struct.unpack(self.endian + 'I', record_size) try: name = self.block_names[0] self.block_names = self.block_names[1:] except IndexError: if self.extra == 0: warnings.warn( "Run out of block names in the config file. Using fallbacks: UNK*", RuntimeWarning) name = _to_raw("UNK" + str(self.extra)) self.extra += 1 return (name, record_size) def get_block(self, name, p_type, p_toread): """Get a particle range from this file, starting at p_start, and reading a maximum of p_toread particles""" name = _to_raw(name) p_read = 0 cur_block = self.blocks[name] parts = self.get_block_parts(name, p_type) p_start = self.get_start_part(name, p_type) if p_toread > parts: p_toread = parts with open(self._filename, 'rb') as fd: fd.seek(cur_block.start + int(cur_block.partlen * p_start), 0) # This is just so that we can get a size for the type dt = np.dtype(cur_block.data_type) n_type = p_toread * cur_block.partlen // dt.itemsize data = np.fromfile( fd, dtype=cur_block.data_type, count=n_type, sep='') if self.endian != '=': data = data.byteswap(True) return (p_toread, data) def get_block_parts(self, name, p_type): """Get the number of particles present in a block in this file""" if name not in self.blocks: return 0 cur_block = self.blocks[name] if p_type == -1: return cur_block.length // cur_block.partlen else: return self.header.npart[p_type] * cur_block.p_types[p_type] def get_start_part(self, name, p_type): """Find particle to skip to before starting, if reading particular type""" if p_type == -1: return 0 else: if name not in self.blocks: return 0 cur_block = self.blocks[name] return (cur_block.p_types * self.header.npart)[0:p_type].sum().astype(int) def get_block_dims(self, name): """Get the dimensionality of the block, eg, 3 for POS, 1 for most other things""" if name not in self.blocks: return 0 cur_block = self.blocks[name] dt = np.dtype(cur_block.data_type) return cur_block.partlen // dt.itemsize # The following functions are for writing blocks back to the file def write_block(self, name, p_type, big_data, filename=None): """Write a full block of data in this file. Any particle type can be written. If the particle type is not present in this file, an exception KeyError is thrown. If there are too many particles, ValueError is thrown. big_data contains a reference to the data to be written. Type -1 is all types""" try: cur_block = self.blocks[name] except KeyError: raise KeyError("Block " + name + " not in file " + self._filename) if not cur_block.p_types[p_type]: return parts = self.get_block_parts(name, p_type) p_start = self.get_start_part(name, p_type) MinType = np.ravel(np.where(cur_block.p_types * self.header.npart))[0] MaxType = np.ravel(np.where(cur_block.p_types * self.header.npart))[-1] # Have we been given the right number of particles? if np.size(big_data) > parts * self.get_block_dims(name): raise ValueError("Space for " + str(parts) + " particles of type " + str( p_type) + " in file " + self._filename + ", " + str(np.shape(big_data)[0]) + " requested.") # Do we have the right type? dt = np.dtype(cur_block.data_type) bt = big_data.dtype if bt.kind != dt.kind: raise ValueError("Data of incorrect type passed to write_block") # Open the file fn = self._filename if filename is None else filename with open(fn, 'r+b') as fd: # Seek to the start of the block fd.seek(cur_block.start + cur_block.partlen * p_start, 0) # Add the block header if we are at the start of a block if p_type == MinType or p_type < 0: data = self.write_block_header(name, cur_block.length) # Better seek back a bit first. fd.seek(-len(data), 1) fd.write(data) if self.endian != '=': big_data = big_data.byteswap(False) # Actually write the data # Make sure to ravel it, otherwise the wrong amount will be written, # because it will also write nulls every time the first array dimension # changes. d = np.ravel(big_data.astype(dt)).tobytes() fd.write(d) if p_type == MaxType or p_type < 0: data = self.write_block_footer(name, cur_block.length) fd.write(data) def add_file_block(self, name, blocksize, partlen=4, dtype=np.float32, p_types=None): """Add a block to the block table at the end of the file. Do not actually write anything""" name = _to_raw(name) if name in self.blocks: raise KeyError( "Block " + name + " already present in file. Not adding") # Get last block lb = max(list(self.blocks.values()), key=lambda val: val.start) if np.issubdtype(dtype, np.floating): dtype = np.float32 # coerce to single precision # Make new block block = _GadgetBlock(length=blocksize, partlen=partlen, dtype=dtype) block.start = lb.start + lb.length + 6 * \ 4 # For the block header, and footer of the previous block if p_types is None: block.p_types = np.ones(N_TYPE, bool) else: block.p_types = p_types self.blocks[name] = block def write_block_header(self, name, blocksize): """Create a string for a Gadget-style block header, but do not actually write it, for atomicity.""" if self.format2: # This is the block header record, which we want for format two # files only blkheadsize = 4 + 4 * 1 # 1 int and 4 chars nextblock = blocksize + 2 * 4 # Relative location of next block; the extra 2 uints are for storing the headers. # Write format 2 header header head = struct.pack( self.endian + 'I4sII', blkheadsize, name, nextblock, blkheadsize) # Also write the record size, which we want for all files*/ head += self.write_block_footer(name, blocksize) return head def write_block_footer(self, name, blocksize): """(Re) write a Gadget-style block footer.""" return struct.pack(self.endian + 'I', blocksize) def write_header(self, head_in, filename=None): """Write a file header. Overwrites npart in the argument with the npart of the file, so a consistent file is always written.""" # Construct new header with the passed header and overwrite npart with the file header. # This has ref. semantics so use copy head = copy.deepcopy(head_in) head.npart = np.array(self.header.npart) data = self.write_block_header(b"HEAD", 256) data += head.serialize() if filename is None: filename = self._filename try: fd = open(filename, "r+b") except OSError as error: # If we couldn't open it because it doesn't exist open it for # writing. (err, strerror) = error.args # If we couldn't open it because it doesn't exist open it for # writing. if err == errno.ENOENT: fd = open(filename, "w+b") # If we couldn't open it for any other reason, reraise exception else: raise OSError(err, strerror) try: fd.seek(0) # Header always at start of file # Write header fd.write(data) # Seek 48 bytes forward, to skip the padding (which may contain extra # data) fd.seek(48, 1) data = self.write_block_footer(b"HEAD", 256) fd.write(data) finally: fd.close() class _GadgetWriteFile(_GadgetFile): """Class for write-only snapshots, as when we are creating a new set of files from, eg, a TipsySnap. Should not be used directly. block_names is a list so we can specify an on-disc ordering.""" def __init__(self, filename, npart, block_names, header, format2=True): self.header = header self._filename = filename self.endian = '=' # write with default endian of this system self.format2 = format2 self.blocks = {} self.header.npart = np.array(npart) # Set up the positions header_size = 4 if format2: header_size += 3 * 4 + 4 footer_size = 4 # First block is just past the header. cur_pos = 256 + header_size + footer_size for block in block_names: # Add block if present for some types if block.types.sum(): b_part = npart * block.types b = _GadgetBlock( start=cur_pos + header_size, partlen=block.partlen, length=block.partlen * b_part.sum(), dtype=block.dtype, p_types=block.types) cur_pos += b.length + header_size + footer_size self.blocks[_to_raw(block.name)] = b class _WriteBlock: """Internal structure for passing data around between file and snapshot""" def __init__(self, partlen=4, dtype=np.float32, types=np.zeros(N_TYPE, bool), name=" "): if np.issubdtype(dtype, np.floating): dtype = np.float32 if np.issubdtype(dtype, np.signedinteger): dtype = np.int32 self.partlen = partlen * np.dtype(dtype).itemsize self.dtype = dtype self.types = types self.name = name
[docs] class GadgetSnap(SimSnap): """Class for reading Gadget-1 and Gadget-2 old-style (i.e. pre-HDF5) snapshots."""
[docs] def __init__(self, filename: pathlib.Path, only_header=False, must_have_paramfile=False, ignore_cosmo=False): filename = str(filename) global config super().__init__() self._files = [] self._filename = filename self._ignore_cosmo = ignore_cosmo # Check whether the file exists, and get the ".0" right if os.path.exists(filename): files = [filename] elif os.path.exists(filename + ".0"): filename = filename + ".0" files = None if filename[-2:] == ".0": self._filename = filename[:-2] # Read the first file and use it to get an idea of how many files we # are expecting. first_file = _GadgetFile(filename) self._files.append(first_file) files_expected = self._files[0].header.num_files npart = np.array(self._files[0].header.npart,dtype=np.uint64) # 64-bit necessary in numpy 2.0 because of changes to data type promotion rules in the 2 * 32 calc below if files is None: # we want to load all files base_filename = filename[:-2] files = [base_filename + "." + str(i) for i in range(files_expected)] for filename in files[1:]: tmp_file = _GadgetFile(filename) if not self.check_headers(tmp_file.header, self._files[0].header): warnings.warn("file " + str( i) + " is not part of this snapshot set!", RuntimeWarning) continue self._files.append(tmp_file) npart = npart + tmp_file.header.npart # Set up things from the parent class self._num_particles = npart.sum() # Set up global header self.header = copy.deepcopy(self._files[0].header) self.header.npart = npart # Check and fix npartTotal and NallHW if they are wrong. if npart is not self.header.npartTotal.astype(np.uint64) + 2 ** 32 * self.header.NallHW.astype(np.uint64): self.header.NallHW = npart // 2 ** 32 self.header.npartTotal = npart - 2 ** 32 * self.header.NallHW for f in self._files: f.header.npartTotal = self.header.npartTotal f.header.NallHW = self.header.NallHW self._family_slice = {} self._loadable_keys = set() self._family_keys = set() self._family_arrays = {} self._arrays = {} #self.properties = {} # Set up _family_slice current = 0 for fam in _type_map: g_types = _type_map[fam] length = 0 for f in self._files: length += sum(f.header.npart[x] for x in g_types) self._family_slice[fam] = slice(current, current + length) current += length # Set up _loadable_keys for f in self._files: self._loadable_keys = self._loadable_keys.union( set(f.blocks.keys())) # Add default mapping to unpadded lower case if not in config file. for nn in self._loadable_keys: if sys.version_info[0] == 2: mm = nn.lower().strip() else: mm = nn.lower().strip().decode('utf-8') if nn not in _rev_name_map: _rev_name_map[nn] = mm if mm not in _name_map: _name_map[mm] = nn # Use translated keys only self._loadable_keys = [_translate_array_name( x, reverse=True) for x in self._loadable_keys] # Set up block list, with attached families, as a caching mechanism self._block_list = self.get_block_list() self._decorate()
[docs] def loadable_family_keys(self, fam=None): """Return list of arrays which are loadable for specific families, but not for all families.""" warnings.warn( "loadable_family_keys functionality has now been incorporated into loadable_keys", warnings.DeprecationWarning) return self.loadable_keys(fam)
[docs] def loadable_keys(self, fam=None): if fam is not None: return [x for x in self._loadable_keys if self._family_has_loadable_array(fam, x)] else: return [x for x in self._loadable_keys if self._family_has_loadable_array(None, x)]
def _family_has_loadable_array(self, fam, name): """Returns True if the array can be loaded for the specified family. If fam is None, returns True if the array can be loaded for all families.""" if name in self._block_list: if fam is not None: return fam in self._block_list[name] else: return set(self.families()) <= set(self._block_list[name]) else: return False
[docs] def get_block_list(self): """Get list of unique blocks in snapshot, with the types they refer to""" b_list = {} for f in self._files: for (n, b) in f.blocks.items(): if n in b_list: b_list[n] += b.p_types else: b_list[n] = np.array(b.p_types, dtype=bool) # Special case mass. Note b_list has reference semantics. if b"MASS" in b_list: b_list[b"MASS"] += np.array(self.header.mass, dtype=bool) # Translate this array into families and external names out_list = {} for k, b in b_list.items(): b_name = _translate_array_name(k, reverse=True) # Make this be only if there are actually particles of that type in # the snap b_types = [f for f in self.families() if b[np.intersect1d( gadget_type(f), np.ravel(np.where(self.header.npart != 0)))].all()] out_list[b_name] = b_types return out_list
[docs] def get_block_parts(self, name, family): """Get the number of particles present in a block, of a given type""" total = 0 for f in self._files: total += sum(f.get_block_parts( name, gfam) for gfam in gadget_type(family)) # Special-case MASS if name == b"MASS": total += sum(self.header.npart[p] * np.array(self.header.mass[ p], dtype=bool) for p in gadget_type(family)) return total
[docs] def check_headers(self, head1, head2): """Check two headers for consistency""" if (head1.time != head2.time or head1.redshift != head2.redshift or head1.flag_sfr != head2.flag_sfr or head1.flag_feedback != head2.flag_feedback or head1.num_files != head2.num_files or head1.BoxSize != head2.BoxSize or head1.Omega0 != head2.Omega0 or head1.OmegaLambda != head2.OmegaLambda or head1.HubbleParam != head2.HubbleParam or head1.flag_stellarage != head2.flag_stellarage or head1.flag_metals != head2.flag_metals): return False # Check array quantities if (((head1.mass - head2.mass) > 1e-5 * head1.mass).any() or (head1.npartTotal != head2.npartTotal).any()): return False # At least one version of N-GenICs writes a header file which # ignores everything past flag_metals (!), leaving it uninitialised. # Therefore, we can't check them. return True
def _get_array_type(self, name): """Get the type for the array given in name""" g_name = _translate_array_name(name) return self._get_array_type_g(g_name) def _get_array_type_g(self, name): """Get the type for the array given in name""" return self._files[0].blocks[name].data_type def _get_array_dims(self, name): """Get the dimensions of an array; ie, is it 3d or 1d""" g_name = _translate_array_name(name) return self._files[0].get_block_dims(g_name) def _load_array(self, name, fam=None): """Read in data from a Gadget file. If fam is not None, loads only data for that particle family""" # g_name is the internal name g_name = _translate_array_name(name) if not self._family_has_loadable_array(fam, name): if fam is None and name in self._block_list: raise KeyError( "Block " + name + " is not available for all families") else: raise OSError("No such array on disk") ndim = self._get_array_dims(name) if ndim == 1: dims = [int(self.get_block_parts(g_name, fam)), ] else: dims = [int(self.get_block_parts(g_name, fam)), ndim] if fam is not None: p_types = gadget_type(fam) else: p_types = gadget_type(self.families()) # Get the data. Get one type at a time and then concatenate. # A possible optimisation is to special-case loading all particles. data = np.array([], dtype=self._get_array_type(name)) for p in p_types: # Special-case mass if g_name == b"MASS" and self.header.mass[p] != 0.: mass_as_correct_type = self.header.mass[p].astype(self._get_array_type(name)) data = np.append(data, np.repeat(mass_as_correct_type, self.header.npart[p])) else: data = np.append(data, self.__load_array(g_name, p)) if fam is None: self[name] = data.reshape(dims, order='C').view(array.SimArray) self[name].set_default_units(quiet=True) else: self[fam][name] = data.reshape( dims, order='C').view(array.SimArray) self[fam][name].set_default_units(quiet=True) def __load_array(self, g_name, p_type): """Internal helper function for _load_array that takes a g_name and a gadget type, gets the data from each file and returns it as one long array.""" # int cast necessary because numpy makes int * uint64 a float! data = np.zeros(int(self._get_array_dims(g_name) * self.header.npart[p_type]), dtype=self._get_array_type_g(g_name)) # Get a type from each file ipos = 0 for f in self._files: f_parts = f.get_block_parts(g_name, p_type) if f_parts == 0: continue (f_read, f_data) = f.get_block(g_name, p_type, f_parts) if f_read != f_parts: raise OSError("Read of " + f._filename + " asked for " + str( f_parts) + " particles but got " + str(f_read)) iread = self._get_array_dims(g_name)*f.header.npart[p_type] data[ipos:ipos + iread] = f_data ipos += iread return data @classmethod def _can_load(cls, f: pathlib.Path): """Check whether we can load the file as Gadget format by reading the first 4 bytes""" fname = f if not f.exists(): fname = f.parent / (f.name + ".0") if not fname.exists(): return False with open(fname, "br") as fd: r, = struct.unpack('=I', fd.read(4)) # First int32 is 8 for a Gadget 2 file, or 256 for Gadget 1, or the # byte swapped equivalent. if r in (8, 134217728, 65536, 256): return True else: return False @staticmethod def _write(self, filename=None): """Write an entire Gadget file (actually an entire set of snapshots).""" with self.lazy_derive_off: # If caller is not a GadgetSnap, construct the GadgetFiles, # so that format conversion works. all_keys = set(self.loadable_keys()).union( list(self.keys())).union(self.family_keys()) all_keys = [ k for k in all_keys if k not in ["x", "y", "z", "vx", "vy", "vz"]] # This code supports (limited) format conversions if self.__class__ is not GadgetSnap: # We need a filename if we are writing to a new type if filename is None: raise Exception( "Please specify a filename to write a new file.") # Splitting the files correctly is hard; the particles need to be reordered, and # we need to know which families correspond to which gadget types. # So don't do it. # Make sure the data fits into one files. The magic numbers are: # 12 - the largest block is likely to be POS with 12 bytes per particle. # 2**31 is the largest size a gadget block can safely have if self.__len__() * 12. > 2 ** 31 - 1: raise OSError( "Data too large to fit into a single gadget file, and splitting not implemented. Cannot write.") # Make npart npart = np.zeros(N_TYPE, int) arr_name = (list(self.keys()) + self.loadable_keys())[0] for f in self.families(): # Note that if we have more than one type per family, we cannot # determine which type each individual particle is, so # assume they are all the first. npart[np.min(gadget_type(f))] = len(self[f][arr_name]) # Construct a header # npart, mass, time, redshift, BoxSize,Omega0, OmegaLambda, # HubbleParam, num_files=1 gheader = _GadgetHeader( npart, np.zeros(N_TYPE, float), self.properties[ "a"], self.properties["z"], self.properties["boxsize"].in_units(self[ 'pos'].units, **self.conversion_context()), self.properties["omegaM0"], self.properties["omegaL0"], self.properties["h"], 1) # Construct the block_names; each block_name needs partlen, data_type, and p_types, # as well as a name. Blocks will hit the disc in the order they are in all_keys. # First, make pos the first block and vel the second. # all_keys[all_keys.index("pos")]=all_keys[0] # all_keys[0] = "pos" # all_keys[all_keys.index("vel")]=all_keys[1] # all_keys[1] = "vel" all_keys = _output_order_gadget(all_keys) # No writing format 1 files. block_names = [] for k in all_keys: types = np.zeros(N_TYPE, bool) for f in self.families(): try: dtype = self[f][k].dtype types[np.min(gadget_type(f))] += True try: partlen = np.shape(self[ f][k])[1] # *dtype.itemsize except IndexError: partlen = 1 # dtype.itemsize except KeyError: pass bb = _WriteBlock(partlen, dtype=dtype, types=types, name=_translate_array_name( k).upper().ljust(4)[0:4]) block_names.append(bb) # Create an output file out_file = _GadgetWriteFile( filename, npart, block_names, gheader) # Write the header out_file.write_header(gheader, filename) # Write all the arrays for x in all_keys: g_name = _to_raw( _translate_array_name(x).upper().ljust(4)[0:4]) for fam in self.families(): try: data = self[fam][x] gfam = np.min(gadget_type(fam)) out_file.write_block( g_name, gfam, data, filename=filename) except KeyError: pass return # Write headers if filename is not None: if np.size(self._files) > 1: for i in np.arange(0, np.size(self._files)): ffile = filename + "." + str(i) self._files[i].write_header(self.header, ffile) else: self._files[0].write_header(self.header, filename) else: # Call write_header for every file. [f.write_header(self.header) for f in self._files] # Call _write_array for every array. for x in all_keys: GadgetSnap._write_array(self, x, filename=filename) @staticmethod def _write_array(self, array_name, fam=None, filename=None): """Write a data array back to a Gadget snapshot, splitting it across files.""" write_fam = fam or self.families() # Make the name a four-character upper case name, possibly with # trailing spaces g_name = _to_raw( _translate_array_name(array_name).upper().ljust(4)[0:4]) nfiles = np.size(self._files) # Find where each particle goes f_parts = [f.get_block_parts(g_name, -1) for f in self._files] # If there is no block corresponding to this name in the file, # add it (so we can write derived arrays). if np.sum(f_parts) == 0: # Get p_type p_types = np.zeros(N_TYPE, bool) npart = 0 for fam in self.families(): gfam = np.min(gadget_type(fam)) # We get the particle types we want by trying to load all # particle types (from memory) and seeing which ones work p_types[gfam] = array_name in self[fam] if p_types[gfam]: ashape = np.shape(self[fam][array_name]) # If the partlen is 1, append so the shape array has the # right shape. if np.size(ashape) < 2: ashape = (ashape[0], 1) npart += ashape[0] if p_types.sum(): per_file = npart // nfiles for f in self._files[:-2]: f.add_file_block(array_name, per_file, ashape[ 1], dtype=self[array_name].dtype, p_types=p_types) self._files[-1].add_file_block( array_name, npart - (nfiles - 1) * per_file, ashape[1]) # Write blocks on a family level, so that we don't have to worry about # the file-level re-ordering. for fam in write_fam: if self._family_has_loadable_array(fam, array_name): data = self[fam][array_name] s = 0 for gfam in gadget_type(fam): # Find where each particle goes f_parts = [f.get_block_parts( g_name, gfam) for f in self._files] for i in np.arange(0, nfiles): if f_parts[i]==0: continue # Set up filename if filename is not None: ffile = filename + "." + str(i) if nfiles == 1: ffile = filename else: ffile = None # Special-case MASS. if g_name == b"MASS" and self.header.mass[gfam] != 0.: nmass = np.min( data[s:(s + self.header.npart[gfam])]) # Warn if there are now different masses for this particle type, # as this information cannot be represented in this # snapshot. if nmass != np.max(data[s:(s + self.header.npart[gfam])]): warnings.warn("Cannot write variable masses for type " + str( gfam) + ", as masses are stored in the header.", RuntimeWarning) elif self.header.mass[gfam] != nmass: self.header.mass[gfam] = nmass self._files[i].write_header( self.header, filename=ffile) else: # Write data if np.issubdtype(data.dtype, np.floating): data = np.asanyarray(data, dtype=np.float32) self._files[i].write_block(g_name, gfam, data[ s:(s + f_parts[i])], filename=ffile) s += f_parts[i]
def _header_suggests_cosmological(gadget_header): return (gadget_header.HubbleParam != 0.) and (gadget_header.Omega0 != 0.) and (gadget_header.BoxSize != 0.) @GadgetSnap.decorator def _do_units(sim): vel_unit = config_parser.get('gadget-units', 'vel') dist_unit = config_parser.get('gadget-units', 'pos') mass_unit = config_parser.get('gadget-units', 'mass') vel_unit, dist_unit, mass_unit = ( units.Unit(x) for x in (vel_unit, dist_unit, mass_unit)) if sim._ignore_cosmo or not _header_suggests_cosmological(sim.header): # remove a and h dependences vel_unit = units.Unit( "km s^-1") * vel_unit.in_units("km s^-1", a=1, h=1) mass_unit = units.Unit("Msol") * mass_unit.in_units("Msol", a=1, h=1) dist_unit = units.Unit("kpc") * dist_unit.in_units("kpc", a=1, h=1) sim._file_units_system = [units.Unit("K"), vel_unit, dist_unit, mass_unit] sim._override_units_system() @GadgetSnap.decorator def _do_properties(sim): h = sim.header if not(sim._ignore_cosmo) and _header_suggests_cosmological(h): from .. import analysis sim.properties['omegaM0'] = h.Omega0 # sim.properties['omegaB0'] = ... This one is non-trivial to calculate sim.properties['omegaL0'] = h.OmegaLambda sim.properties['boxsize'] = sim.infer_original_units("kpc")*h.BoxSize sim.properties['z'] = h.redshift sim.properties['h'] = h.HubbleParam time_units = sim.infer_original_units("s") sim.properties['time'] = analysis.cosmology.age(sim,unit=time_units)*time_units else: sim.properties['time'] = sim.infer_original_units("s") * h.time