# Copyright (c) 2024 Mira Geoscience Ltd.
#
# This file is part of geoh5py.
#
# geoh5py is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# geoh5py is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with geoh5py. If not, see <https://www.gnu.org/licenses/>.
# pylint: disable=too-many-arguments, too-many-instance-attributes
from __future__ import annotations
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Literal, get_args
from uuid import UUID
import numpy as np
from ..shared import EntityType, utils
from .color_map import ColorMap
from .primitive_type_enum import DataTypeEnum, PrimitiveTypeEnum
from .reference_value_map import BOOLEAN_VALUE_MAP, ReferenceValueMap
if TYPE_CHECKING: # pragma: no cover
from ..objects import ObjectBase
from ..workspace import Workspace
from .data import Data
from .referenced_data import ReferencedData
ColorMapping = Literal[
"linear",
"equal_area",
"logarithmic",
"cdf",
"cumulative_distribution_function",
"missing",
]
[docs]
class DataType(EntityType):
"""
DataType class.
Controls all the attributes of the data type for displays in Geoscience ANALYST.
:param workspace: An active Workspace.
:param primitive_type: The primitive type of the data.
:param color_map: The colormap used for plotting.
:param duplicate_on_copy: Force a copy on copy of the data entity.
:param duplicate_type_on_copy: Force a copy on copy of the data entity.
:param hidden: If the data are hidden or not.
:param mapping: The type of color stretching to plot the colormap.
:param number_of_bins: The number of bins used by the histogram.
:param precision: The decimals precision of the data to display.
:param scale: The type of scale of the data.
:param scientific_notation: If the data should be displayed in scientific notation.
:param transparent_no_data: If the no data values are displayed as transparent or not.
:param units: The type of the units of the data.
:param kwargs: Additional keyword arguments to set as attributes
(see :obj:`...shared.entity_type.EntityType`).
"""
_attribute_map = EntityType._attribute_map.copy()
_attribute_map.update(
{
"Hidden": "hidden",
"Mapping": "mapping",
"Number of bins": "number_of_bins",
"Precision": "precision",
"Primitive type": "primitive_type",
"Transparent no data": "transparent_no_data",
"Scale": "scale",
"Scientific notation": "scientific_notation",
}
)
def __init__(
self,
workspace: Workspace,
*,
primitive_type: (
type[Data] | PrimitiveTypeEnum | str
) = PrimitiveTypeEnum.INVALID,
color_map: ColorMap | None = None,
duplicate_on_copy: bool = False,
duplicate_type_on_copy: bool = False,
hidden: bool = False,
mapping: ColorMapping = "equal_area",
number_of_bins: int | None = None,
precision: int = 2,
scale: str | None = None,
scientific_notation: bool = False,
transparent_no_data: bool = True,
units: str | None = None,
**kwargs,
):
super().__init__(workspace, **kwargs)
self.color_map = color_map
self.duplicate_on_copy = duplicate_on_copy
self.duplicate_type_on_copy = duplicate_type_on_copy
self.hidden = hidden
self.mapping = mapping
self.number_of_bins = number_of_bins
self.precision = precision
self.primitive_type = self.validate_primitive_type(primitive_type)
self.scale = scale
self.scientific_notation = scientific_notation
self.transparent_no_data = transparent_no_data
self.units = units
@property
def color_map(self) -> ColorMap | None:
r"""
The Colormap used for plotting
The colormap can be set from a dictionary of sorted values with
corresponding RGBA color.
Or from a numpy array containing the RGBA values.
.. code-block:: python
color_map = {
val_1: [r_1, g_1, b_1, a_1],
...,
val_i: [r_i, g_i, b_i, a_i]
}
It can be set to None if non-existing.
"""
return self._color_map
@color_map.setter
def color_map(self, color_map: ColorMap | dict | np.ndarray | None):
if isinstance(color_map, dict):
color_map = ColorMap(**color_map)
elif isinstance(color_map, np.ndarray):
color_map = ColorMap(values=color_map)
if not isinstance(color_map, (ColorMap, type(None))):
raise TypeError(
f"Attribute 'color_map' must be of type {ColorMap},"
f"numpy.ndarray or dict with 'values'."
)
if isinstance(color_map, ColorMap):
color_map.parent = self
self._color_map: ColorMap | None = color_map
self.workspace.update_attribute(self, "color_map")
@property
def dtype(self) -> type:
"""
The data type of the data.
"""
return DataTypeEnum.from_primitive_type(self.primitive_type)
@property
def duplicate_on_copy(self) -> bool:
"""
If the data type should be duplicated on copy.
"""
return self._duplicate_on_copy
@duplicate_on_copy.setter
def duplicate_on_copy(self, value: bool):
if not isinstance(value, bool) and value not in [1, 0]:
raise TypeError(
f"Attribute 'duplicate_on_copy' must be a bool, not {type(value)}"
)
self._duplicate_on_copy = bool(value)
if self.on_file:
self.workspace.update_attribute(self, "attributes")
@property
def duplicate_type_on_copy(self) -> bool:
"""
If the data type should be duplicated on copy.
"""
return self._duplicate_type_on_copy
@duplicate_type_on_copy.setter
def duplicate_type_on_copy(self, value: bool):
if not isinstance(value, bool) and value != 1 and value != 0:
raise TypeError(
f"Attribute 'duplicate_type_on copy' must be a bool, not {type(value)}"
)
self._duplicate_type_on_copy = bool(value)
self.workspace.update_attribute(self, "attributes")
[docs]
@classmethod
def find_or_create_type(
cls,
workspace: Workspace,
primitive_type: PrimitiveTypeEnum | str,
dynamic_implementation_id: str | UUID | None = None,
uid: UUID | None = None,
**kwargs,
) -> DataType:
"""
Get the data type for geometric data.
:param workspace: An active Workspace class
:param primitive_type: The primitive type of the data.
:param uid: The unique identifier of the entity type.
:param kwargs: The attributes of the entity type.
:return: EntityType
"""
if uid is not None:
entity_type = DataType.find(workspace, uid)
if entity_type is not None:
return entity_type
primitive_type = cls.validate_primitive_type(primitive_type)
if primitive_type == PrimitiveTypeEnum.BOOLEAN:
return ReferencedBooleanType(
workspace, primitive_type=primitive_type, uid=uid, **kwargs
)
if (
primitive_type == PrimitiveTypeEnum.GEOMETRIC
and dynamic_implementation_id is not None
):
data_type = DYNAMIC_CLASS_IDS.get(
utils.str2uuid(dynamic_implementation_id), DataType
)
return data_type(
workspace, primitive_type=primitive_type, uid=uid, **kwargs
)
if primitive_type == PrimitiveTypeEnum.REFERENCED:
return ReferencedValueMapType(
workspace, primitive_type=primitive_type, uid=uid, **kwargs
)
return cls(workspace, primitive_type=primitive_type, uid=uid, **kwargs)
@property
def hidden(self) -> bool:
"""
If the data are hidden or not.
"""
return self._hidden
@hidden.setter
def hidden(self, value: bool):
if not isinstance(value, bool) and value != 1 and value != 0:
raise TypeError(f"Attribute 'hidden' must be a bool, not {type(value)}")
self._hidden: bool = bool(value)
self.workspace.update_attribute(self, "attributes")
@property
def mapping(self) -> str:
"""
The type of color stretching to plot the colormap.
It chan be one of the following:
'linear', 'equal_area', 'logarithmic', 'cdf', 'missing'
"""
return self._mapping
@mapping.setter
def mapping(self, value: ColorMapping):
if value not in get_args(ColorMapping):
raise ValueError(
f"Attribute 'mapping' should be one of {get_args(ColorMapping)}. "
f"Value '{value}' was provided."
)
self._mapping: str = value
self.workspace.update_attribute(self, "attributes")
@property
def number_of_bins(self) -> int | None:
"""
The number of bins used by the histogram.
It can be None if no histogram is used.
"""
return self._number_of_bins
@number_of_bins.setter
def number_of_bins(self, n_bins: int | None):
if n_bins is None:
pass
elif not isinstance(n_bins, (int, np.integer)) or n_bins < 1:
raise ValueError(
"Attribute 'number_of_bins' should be an integer greater than 0 "
f"or None, not {n_bins}"
)
self._number_of_bins: int | None = n_bins
self.workspace.update_attribute(self, "attributes")
@property
def precision(self) -> int:
"""
The decimals precision of the data to display.
"""
return self._precision
@precision.setter
def precision(self, value: int):
if (
not isinstance(value, (int, float, np.integer, np.floating))
or (isinstance(value, (float, np.floating)) and not value.is_integer())
or value < 0
):
raise TypeError(
f"Attribute 'precision' must be an integer greater than 0, not {value}"
)
self._precision = int(value)
self.workspace.update_attribute(self, "attributes")
@property
def primitive_type(self) -> PrimitiveTypeEnum:
"""
The primitive type of the data.
"""
return self._primitive_type
@primitive_type.setter
def primitive_type(self, value: PrimitiveTypeEnum):
if not isinstance(value, PrimitiveTypeEnum):
raise ValueError(
"Attribute 'primitive_type' value must be of type "
f"{PrimitiveTypeEnum}, find {type(value)}"
)
self._primitive_type = value
@property
def transparent_no_data(self) -> bool:
"""
If the no data values are displayed as transparent or not.
"""
return self._transparent_no_data
@transparent_no_data.setter
def transparent_no_data(self, value: bool):
if not isinstance(value, bool) and value != 1 and value != 0:
raise TypeError(
f"Attribute 'transparent_no_data' must be a bool, not {type(value)}"
)
self._transparent_no_data = bool(value)
self.workspace.update_attribute(self, "attributes")
@property
def units(self) -> str | None:
"""
The type of the units of the data.
"""
return self._units
@units.setter
def units(self, unit: str | None):
if not isinstance(unit, (str, type(None))):
raise TypeError(f"Attribute 'units' must be a string, not {type(unit)}")
self._units = unit
self.workspace.update_attribute(self, "attributes")
[docs]
@staticmethod
def primitive_type_from_values(values: np.ndarray | None) -> PrimitiveTypeEnum:
"""
Validate the primitive type of the data.
:param values: The values to validate.
:return: The equivalent primitive type of the data.
"""
if values is None or (
isinstance(values, np.ndarray) and np.issubdtype(values.dtype, np.floating)
):
primitive_type = PrimitiveTypeEnum.FLOAT
elif isinstance(values, np.ndarray) and (
np.issubdtype(values.dtype, np.integer)
):
primitive_type = PrimitiveTypeEnum.INTEGER
elif isinstance(values, str) or (
isinstance(values, np.ndarray) and values.dtype.kind in ["U", "S"]
):
primitive_type = PrimitiveTypeEnum.TEXT
elif isinstance(values, np.ndarray) and (values.dtype == bool):
primitive_type = PrimitiveTypeEnum.BOOLEAN
else:
raise NotImplementedError(
"Only add_data values of type FLOAT, INTEGER,"
"BOOLEAN and TEXT have been implemented"
)
return primitive_type
@property
def scale(self) -> str | None:
"""
The type of scale of the data.
"""
return self._scale
@scale.setter
def scale(self, value: str | None):
if value not in ["Linear", "Log", None]:
raise ValueError(
f"Attribute 'scale' must be one of 'Linear', 'Log', NoneType, not {value}"
)
self._scale = value
self.workspace.update_attribute(self, "attributes")
@property
def scientific_notation(self) -> bool:
"""
If the data should be displayed in scientific notation.
"""
return self._scientific_notation
@scientific_notation.setter
def scientific_notation(self, value: bool):
if value not in [True, False, 1, 0]:
raise TypeError(
f"Attribute 'scientific_notation' must be a bool, not {type(value)}"
)
self._scientific_notation = bool(value)
self.workspace.update_attribute(self, "attributes")
[docs]
@staticmethod
def validate_primitive_type(
primitive_type: PrimitiveTypeEnum | str | type[Data],
) -> PrimitiveTypeEnum:
"""
Validate the primitive type of the data.
:param primitive_type: Some reference to the primitive type of the data.
:return: A known primitive type.
"""
if isinstance(primitive_type, str):
primitive_type = getattr(
PrimitiveTypeEnum, utils.INV_KEY_MAP.get(primitive_type, primitive_type)
)
if isinstance(primitive_type, type) and hasattr(
primitive_type, "primitive_type"
):
primitive_type = primitive_type.primitive_type()
if not isinstance(primitive_type, PrimitiveTypeEnum):
raise ValueError(
f"Attribute 'primitive_type' should be one of {PrimitiveTypeEnum.__members__}"
)
return primitive_type
[docs]
class ReferenceDataType(DataType):
"""
DataType class.
Controls all the attributes of reference data.
:param value_map: Reference value to map index with description.
"""
def __init__(
self,
workspace: Workspace,
value_map: dict[int, str] | np.ndarray | tuple | ReferenceValueMap = (
(0, "Unknown"),
),
**kwargs,
):
super().__init__(workspace, **kwargs)
self.value_map = self.validate_value_map(value_map)
[docs]
@staticmethod
@abstractmethod
def validate_keys(value_map: ReferenceValueMap) -> ReferenceValueMap:
"""
Validate the keys of the value map.
"""
[docs]
def validate_value_map(
self,
value_map: dict[int, str] | np.ndarray | tuple | ReferenceValueMap,
) -> ReferenceValueMap | None:
"""
Validate the attribute of ReferencedDataType
"""
if value_map is None:
return None
if isinstance(value_map, dict | np.ndarray | tuple):
value_map = ReferenceValueMap(value_map)
if not isinstance(value_map, ReferenceValueMap):
raise TypeError(
"Attribute 'value_map' must be provided as a dict, tuple[dict], "
f"numpy.ndarray or {ReferenceValueMap}."
)
self.validate_keys(value_map)
return value_map
@property
def value_map(self) -> ReferenceValueMap | None:
r"""
Reference value map for to map index with description.
The value_map can be set from a dictionary of sorted values int
values with text description.
.. code-block:: python
value_map = {
val_1: str_1,
...,
val_i: str_i
}
"""
return self._value_map
@value_map.setter
def value_map(
self, value_map: dict[int, str] | np.ndarray | tuple | ReferenceValueMap
):
self._value_map = self.validate_value_map(value_map)
if self.on_file:
self.workspace.update_attribute(self, "value_map")
[docs]
class ReferencedValueMapType(ReferenceDataType):
"""
Data container for referenced value map.
"""
_TYPE_UID = UUID(fields=(0x2D5D6C1E, 0x4D8C, 0x4F3A, 0x9B, 0x3F, 0x2E5A0D8E1C1F))
def __init__(
self,
workspace: Workspace,
**kwargs,
):
super().__init__(workspace, **kwargs)
[docs]
@staticmethod
def validate_keys(value_map: ReferenceValueMap):
"""
Validate the keys of the value map.
"""
if 0 not in value_map.map["Key"]:
value_map.map.resize(len(value_map) + 1, refcheck=False)
value_map.map[-1] = (0, b"Unknown")
if dict(value_map.map)[0] not in ["Unknown", b"Unknown"]:
raise ValueError("Value for key 0 must be b'Unknown'")
[docs]
class ReferencedBooleanType(ReferenceDataType):
"""
Data container for referenced boolean data.
"""
def __init__(
self,
workspace: Workspace,
value_map: (
dict[int, str] | np.ndarray | tuple | ReferenceValueMap
) = BOOLEAN_VALUE_MAP,
**kwargs,
):
super().__init__(workspace, value_map=value_map, **kwargs)
[docs]
@staticmethod
def validate_keys(value_map: ReferenceValueMap):
"""
Validate the keys of the value map.
"""
if not np.all(value_map.map == BOOLEAN_VALUE_MAP):
raise ValueError("Boolean value map must be (0: 'False', 1: 'True'")
[docs]
class GeometricDynamicDataType(DataType, ABC):
"""
Data container for dynamic geometric data.
"""
_attribute_map = DataType._attribute_map.copy()
_attribute_map.update(
{
"Dynamic implementation ID": "dynamic_implementation_id",
}
)
_TYPE_UID: UUID | None
_DYNAMIC_IMPLEMENTATION_ID: UUID
def __init__(
self,
workspace: Workspace,
uid: UUID | None = None,
**kwargs,
):
if uid is None:
uid = self._TYPE_UID
super().__init__(workspace, uid=uid, **kwargs)
[docs]
@classmethod
def default_type_uid(cls) -> UUID | None:
"""
Default uuid for the entity type.
"""
return cls._TYPE_UID
@property
def dynamic_implementation_id(self) -> UUID:
"""
The dynamic implementation id.
"""
return self._DYNAMIC_IMPLEMENTATION_ID
[docs]
class GeometricDataValueMapType(ReferenceDataType, GeometricDynamicDataType):
"""
Data container for value map
"""
_DYNAMIC_IMPLEMENTATION_ID = UUID("{4b6ecb37-0623-4ea0-95f1-4873008890a8}")
_TYPE_UID = None
def __init__(
self,
workspace: Workspace,
*,
value_map: dict[int, str] | tuple | ReferenceValueMap | None = None,
parent: ObjectBase | None = None,
description: str = "Dynamic referenced data",
primitive_type: PrimitiveTypeEnum | str = PrimitiveTypeEnum.GEOMETRIC,
**kwargs,
):
self._referenced_data = None
super().__init__(
workspace,
value_map=value_map,
description=description,
primitive_type=primitive_type,
**kwargs,
)
self._parent = parent
[docs]
def get_parent_reference(self, parent: ObjectBase):
"""
Recover the parent ReferencedData by name.
"""
ref_data_name = self.name.rsplit(":")[0]
ref_data = []
for child in parent.children:
if (
isinstance(child.entity_type, ReferencedValueMapType)
and child.entity_type.name == ref_data_name
):
ref_data.append(child)
if len(ref_data) == 0:
raise ValueError(f"Parent data '{ref_data_name}' not found.")
return ref_data[0]
@property
def referenced_data(self) -> ReferencedData | None:
"""
Reference data type holding the value map.
"""
if self._referenced_data is None and self._parent is not None:
self._referenced_data = self.get_parent_reference(self._parent)
return self._referenced_data
[docs]
@staticmethod
def validate_keys(value_map: ReferenceValueMap):
"""
Validate the keys of the value map.
"""
[docs]
def validate_value_map(
self,
value_map: dict[int, str] | np.ndarray | tuple | ReferenceValueMap,
) -> ReferenceValueMap | None:
"""
Validate the attribute of ReferencedDataType
"""
if value_map is None:
return None
if isinstance(value_map, dict | np.ndarray | tuple):
value_map = ReferenceValueMap(value_map, name=self.name.rsplit(": ")[1])
if not isinstance(value_map, ReferenceValueMap):
raise TypeError(
"Attribute 'value_map' must be provided as a dict, tuple[dict] "
f"or {ReferenceValueMap}."
)
return value_map
@property
def value_map(self) -> ReferenceValueMap | None:
r"""
Reference value to map index with description.
The value_map can be set from a dictionary of sorted integer
values with text description.
.. code-block:: python
value_map = {
val_1: str_1,
...,
val_i: str_i
}
"""
if self._value_map is None and self.referenced_data is not None:
if (
self.referenced_data.data_maps is None
or self.referenced_data.metadata is None
):
raise ValueError("Referenced data has no data maps.")
value_map = None
for count, name in enumerate(self.referenced_data.metadata):
if name == self.name.rsplit(": ")[1]:
value_map = self.workspace.fetch_array_attribute(
self.referenced_data.entity_type, f"Value map {count + 1}"
)
if value_map is not None:
self._value_map = self.validate_value_map(
value_map.astype(ReferenceValueMap.MAP_DTYPE)
)
return self._value_map
@value_map.setter
def value_map(self, value_map: dict | tuple | ReferenceValueMap | None):
self._value_map = self.validate_value_map(value_map)
if self.on_file and self.referenced_data is not None:
self.workspace.update_attribute(self.referenced_data, "data_map")
[docs]
class GeometricDataXType(GeometricDynamicDataType):
"""
Data container for X values
"""
_DYNAMIC_IMPLEMENTATION_ID = UUID("{2dbf303e-05d6-44ba-9692-39474e88d516}")
_TYPE_UID = UUID(fields=(0xE9E6B408, 0x4109, 0x4E42, 0xB6, 0xA8, 0x685C37A802EE))
[docs]
class GeometricDataYType(GeometricDynamicDataType):
"""
Data container for Y values
"""
_DYNAMIC_IMPLEMENTATION_ID = UUID("{d56406dc-5eeb-418d-add4-a1282a6ef668}")
_TYPE_UID = UUID(fields=(0xF55B07BD, 0xD8A0, 0x4DFF, 0xBA, 0xE5, 0xC975D490D71C))
[docs]
class GeometricDataZType(GeometricDynamicDataType):
"""
Data container for Z values
"""
_DYNAMIC_IMPLEMENTATION_ID = UUID("{9dacdc3b-6878-408d-93ae-e9a95e640f0c}")
_TYPE_UID = UUID(fields=(0xDBAFB885, 0x1531, 0x410C, 0xB1, 0x8E, 0x6AC9A40B4466))
DYNAMIC_CLASS_IDS = {
cls._DYNAMIC_IMPLEMENTATION_ID: cls # pylint: disable=protected-access
for cls in GeometricDynamicDataType.__subclasses__()
}