Source code for geoh5py.data.reference_value_map

#  Copyright (c) 2024 Mira Geoscience Ltd.
#
#  This file is part of geoh5py.
#
#  geoh5py is free software: you can redistribute it and/or modify
#  it under the terms of the GNU Lesser General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  geoh5py is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU Lesser General Public License for more details.
#
#  You should have received a copy of the GNU Lesser General Public License
#  along with geoh5py.  If not, see <https://www.gnu.org/licenses/>.

from __future__ import annotations

from abc import ABC

import numpy as np
from h5py import special_dtype


[docs] class ReferenceValueMap(ABC): """Maps from reference index to reference value of ReferencedData.""" MAP_DTYPE = np.dtype([("Key", "<u4"), ("Value", special_dtype(vlen=str))]) def __init__( self, value_map: dict[int, str] | np.ndarray | tuple, name: str = "Value map", ): self._map: np.ndarray = self.validate_value_map(value_map) self.name = name def __getitem__(self, item: int) -> str: return dict(self._map)[item] def __len__(self) -> int: return len(self._map) def __call__(self) -> dict: try: map_string = self._map.astype(np.dtype([("Key", "<u4"), ("Value", "U25")])) except UnicodeDecodeError: map_string = self._map return dict(map_string)
[docs] @classmethod def validate_value_map(cls, value_map: np.ndarray | dict) -> np.ndarray: """ Verify that the key and value are valid. It raises errors if there is an issue :param value_map: Array of key, value pairs. """ if isinstance(value_map, tuple): value_map = dict(value_map) if isinstance(value_map, np.ndarray) and value_map.dtype.names is None: if value_map.ndim == 1: unique_set = set(value_map) unique_set.discard(0) value_map = {i + 1: str(val) for i, val in enumerate(unique_set)} value_map = dict(value_map) if isinstance(value_map, dict): if not np.all(np.asarray(list(value_map)) >= 0): raise KeyError("Key must be an positive integer") value_list = list(value_map.items()) value_map = np.array( value_list, dtype=[("Key", "<u4"), ("Value", special_dtype(vlen=str))] ) value_map["Value"] = np.char.encode( value_map["Value"].astype("U25"), "utf-8" ) if not isinstance(value_map, np.ndarray): raise TypeError("Value map must be a numpy array or dict.") if value_map.dtype != cls.MAP_DTYPE: raise ValueError(f"Array of 'value_map' must be of dtype = {cls.MAP_DTYPE}") return value_map
@property def map(self) -> np.ndarray: """ A reference array mapping values to strings. The keys are positive integers, and the values description. The key '0' is always 'Unknown'. """ return self._map
[docs] def map_values(self, values: np.ndarray) -> np.ndarray: """ Map the values to the reference values. :param values: The values to map. :return: The mapped values. """ mapper = np.sort(self.map, order="Key") indices = np.searchsorted(mapper["Key"], values) return mapper["Value"][indices]
BOOLEAN_VALUE_MAP = np.array( [(0, b"False"), (1, b"True")], dtype=ReferenceValueMap.MAP_DTYPE, )