# Copyright (c) 2024 Mira Geoscience Ltd.
#
# This file is part of geoh5py.
#
# geoh5py is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# geoh5py is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with geoh5py. If not, see <https://www.gnu.org/licenses/>.
from __future__ import annotations
import json
import uuid
from typing import Any
import h5py
import numpy as np
from ..shared import FLOAT_NDV, fetch_h5_handle
from ..shared.utils import (
INV_KEY_MAP,
KEY_MAP,
as_str_if_utf8_bytes,
as_str_if_uuid,
str2uuid,
str_json_to_dict,
)
[docs]
class H5Reader:
"""
Class to read information from a geoh5 file.
"""
[docs]
@classmethod
def fetch_attributes(
cls,
file: str | h5py.File,
uid: uuid.UUID,
entity_type: str,
) -> tuple[dict, dict, dict] | tuple[None, None, None]:
"""
Get attributes of an :obj:`~geoh5py.shared.entity.Entity`.
:param file: :obj:`h5py.File` or name of the target geoh5 file
:param uid: Unique identifier
:param entity_type: Type of entity from
'group', 'data', 'object', 'group_type', 'data_type', 'object_type'
Returns
-------
attributes: :obj:`dict` of attributes for the :obj:`~geoh5py.shared.entity.Entity`
type_attributes: :obj:`dict` of attributes for the :obj:`~geoh5py.shared.entity.EntityType`
property_groups: :obj:`dict` of data :obj:`uuid.UUID`
"""
with fetch_h5_handle(file) as h5file:
name = list(h5file)[0]
entity_type = cls.format_type_string(entity_type)
if entity_type == "Root":
entity = h5file[name].get(entity_type)
else:
entity = h5file[name][entity_type].get(as_str_if_uuid(uid))
if entity is None:
return None, None, None
attributes: dict = {}
type_attributes: dict = {}
property_groups: dict = {}
for key, value in entity.attrs.items():
attributes[INV_KEY_MAP.get(key, key)] = value
# TODO Use lazy pointer to data
if entity_type != "Data":
for key, value in entity.items():
if key.lower() in ["metadata", "options"]:
attributes[INV_KEY_MAP[key]] = cls.fetch_metadata(
file, uid, entity_type, key
)
elif (
key in INV_KEY_MAP
and isinstance(value, h5py.Dataset)
and value.ndim > 0
):
attributes[INV_KEY_MAP[key]] = value[:]
if "Type" in entity:
type_attributes = cls.fetch_type_attributes(entity["Type"])
# Check if the entity has property_group
if "PropertyGroups" in entity:
property_groups = cls.fetch_property_groups(file, uid)
return attributes, type_attributes, property_groups
[docs]
@classmethod
def fetch_array_attribute(
cls, file: str | h5py.File, uid: uuid.UUID, entity_type: str, key: str
) -> np.ndarray | None:
"""
Get an entity attribute stores as array such as
:obj:`~geoh5py.objects.object_base.ObjectBase.cells`.
:param file: :obj:`h5py.File` or name of the target geoh5 file
:param uid: Unique identifier of the target object.
:param entity_type: Group type to fetch entity from.
:param key: Field attribute name.
:return cells: :obj:`numpy.ndarray` of :obj:`int`.
"""
with fetch_h5_handle(file) as h5file:
name = list(h5file)[0]
label = KEY_MAP.get(key, key)
if "types" in entity_type:
root_handle = h5file[name]["Types"][entity_type]
else:
root_handle = h5file[name][entity_type]
try:
values = root_handle[as_str_if_uuid(uid)][label][:]
return values
except KeyError:
return None
[docs]
@classmethod
def fetch_children(
cls, file: str | h5py.File, uid: uuid.UUID, entity_type: str
) -> dict:
"""
Get :obj:`~geoh5py.shared.entity.Entity.children` of an
:obj:`~geoh5py.shared.entity.Entity`.
:param file: :obj:`h5py.File` or name of the target geoh5 file
:param uid: Unique identifier
:param entity_type: Type of entity from
'group', 'data', 'object', 'group_type', 'data_type', 'object_type'
:return children: [{uuid: type}, ... ]
List of dictionaries for the children uid and type
"""
with fetch_h5_handle(file) as h5file:
name = list(h5file)[0]
children: dict = {}
entity_type = cls.format_type_string(entity_type)
if (
entity_type not in h5file[name]
or as_str_if_uuid(uid) not in h5file[name][entity_type]
):
return children
entity = h5file[name][entity_type][as_str_if_uuid(uid)]
for child_type, child_list in entity.items():
if child_type in ["Type", "PropertyGroups", "Concatenated Data"]:
continue
if isinstance(child_list, h5py.Group):
for uid_str in child_list:
children[str2uuid(uid_str)] = child_type.replace(
"s", ""
).lower()
return children
[docs]
@classmethod
def fetch_concatenated_values(
cls,
file: str | h5py.File,
uid: uuid.UUID,
entity_type: str,
label: str,
) -> tuple | None:
"""
Get :obj:`~geoh5py.shared.entity.Entity.children` values of concatenated group.
:param file: :obj:`h5py.File` or name of the target geoh5 file
:param uid: Unique identifier
:param entity_type: Type of entity from
'group', 'data', 'object', 'group_type', 'data_type', 'object_type'
:param label: Group identifier for the attribute requested.
:return children: [{uuid: type}, ... ]
List of dictionaries for the children uid and type
"""
with fetch_h5_handle(file) as h5file:
name = list(h5file)[0]
entity_type = cls.format_type_string(entity_type)
label = KEY_MAP.get(label, label)
try:
group = h5file[name][entity_type][as_str_if_uuid(uid)][
"Concatenated Data"
]
indices = group["Index"].get(label.replace("/", "\u2044"))
if indices is None:
return None
array = group["Data"].get(label.replace("/", "\u2044"))
if array is None:
array = group.get(label.replace("/", "\u2044"))
attribute = array[:]
if attribute.dtype in [float, "float64", "float32"]:
attribute[attribute == FLOAT_NDV] = np.nan
if (
attribute.dtype == object
and len(attribute) > 0
and isinstance(attribute[0], bytes)
):
attribute = np.char.decode(attribute.astype(np.bytes_), "UTF-8")
return attribute, indices[:]
except KeyError:
return None
[docs]
@classmethod
def fetch_concatenated_attributes(
cls,
file: str | h5py.File,
uid: uuid.UUID,
entity_type: str,
label: str,
) -> list | dict | None:
"""
Get 'Attributes', 'Data' or 'Index' from Concatenator group.
:param file: :obj:`h5py.File` or name of the target geoh5 file
:param uid: Unique identifier
:param entity_type: Type of entity from
'group', 'data', 'object', 'group_type', 'data_type', 'object_type'
:param label: Group identifier for the attribute requested.
:return children: [{uuid: type}, ... ]
List of dictionaries for the children uid and type
"""
with fetch_h5_handle(file) as h5file:
name = list(h5file)[0]
entity_type = cls.format_type_string(entity_type)
label = KEY_MAP.get(label, label)
try:
group = h5file[name][entity_type][as_str_if_uuid(uid)][
"Concatenated Data"
]
if label == "Attributes":
attribute = group[label][()]
if isinstance(attribute, np.ndarray):
attribute = attribute[0]
return json.loads(as_str_if_utf8_bytes(attribute))
if label == "Attributes Jsons":
attribute = group[label][()]
return {
"Attributes": [
json.loads(as_str_if_utf8_bytes(val)) for val in attribute
]
}
return list(group[label])
except KeyError:
return None
[docs]
@classmethod
def fetch_project_attributes(cls, file: str | h5py.File) -> dict[Any, Any]:
"""
Get attributes of an :obj:`~geoh5py.shared.entity.Entity`.
:param file: :obj:`h5py.File` or name of the target geoh5 file
:return attributes: :obj:`dict` of attributes.
"""
with fetch_h5_handle(file) as h5file:
name = list(h5file)
if len(name) != 1:
raise FileNotFoundError
attributes = {}
for key, value in h5file[name[0]].attrs.items():
attributes[key] = value
return attributes
[docs]
@classmethod
def fetch_property_groups(
cls, file: str | h5py.File, uid: uuid.UUID
) -> dict[str, dict[str, str]]:
r"""
Get the property groups.
:param file: :obj:`h5py.File` or name of the target geoh5 file
:param uid: Unique identifier of the target entity
:return property_group_attributes: :obj:`dict` of property groups
and respective attributes.
.. code-block:: python
property_group = {
"group_1": {"attribute": value, ...},
...,
"group_N": {"attribute": value, ...},
}
"""
with fetch_h5_handle(file) as h5file:
name = list(h5file)[0]
property_groups: dict[str, dict[str, str]] = {}
try:
pg_handle = h5file[name]["Objects"][as_str_if_uuid(uid)][
"PropertyGroups"
]
for pg_uid in pg_handle:
property_groups[pg_uid] = {}
for attr, value in pg_handle[pg_uid].attrs.items():
property_groups[pg_uid][INV_KEY_MAP.get(attr, attr)] = value
except KeyError:
pass
return property_groups
[docs]
@classmethod
def fetch_type(
cls, file: str | h5py.File, uid: uuid.UUID, entity_type: str
) -> dict:
"""
Fetch a type from the target geoh5.
:param file: :obj:`h5py.File` or name of the target geoh5 file
:param uid: Unique identifier of the target entity
:param entity_type: One of 'Data', 'Object' or 'Group'
:return property_group_attributes: :obj:`dict` of property groups
and respective attributes.
"""
with fetch_h5_handle(file) as h5file:
name = list(h5file)[0]
entity_type = entity_type + " types"
type_handle = h5file[name]["Types"][entity_type][as_str_if_uuid(uid)]
return cls.fetch_type_attributes(type_handle)
[docs]
@classmethod
def fetch_type_attributes(cls, type_handle: h5py.Group) -> dict:
"""
Fetch type attributes from a given h5 handle.
"""
type_attributes = {}
for key, value in type_handle.attrs.items():
type_attributes[INV_KEY_MAP.get(key, key)] = value
if "Color map" in type_handle:
type_attributes["color_map"] = {}
for key, value in type_handle["Color map"].attrs.items():
type_attributes["color_map"][key] = value
type_attributes["color_map"]["values"] = type_handle["Color map"][:]
if "Value map" in type_handle:
type_attributes["value_map"] = type_handle["Value map"][:].astype(
[("Key", "<u4"), ("Value", h5py.special_dtype(vlen=str))]
)
return type_attributes
[docs]
@classmethod
def fetch_uuids(cls, file: str | h5py.File, entity_type: str) -> list:
"""
Fetch all uuids of a given type from geoh5
:param file: :obj:`h5py.File` or name of the target geoh5 file
:param entity_type: Type of entity from
'group', 'data', 'object', 'group_type', 'data_type', 'object_type'
:return uuids: [uuid1, uuid2, ...]
List of uuids
"""
with fetch_h5_handle(file) as h5file:
name = list(h5file)[0]
entity_type = cls.format_type_string(entity_type)
try:
uuids = [str2uuid(uid) for uid in h5file[name][entity_type]]
except KeyError:
uuids = []
return uuids
[docs]
@classmethod
def fetch_file_object(
cls, file: str | h5py.File, uid: uuid.UUID, file_name: str
) -> bytes | None:
"""
Load data associated with an image file
:param file: Name of the target geoh5 file
:param uid: Unique identifier of the target entity
:param file_name: Name of the file stored as bytes data.
:return values: Data file stored as bytes
"""
with fetch_h5_handle(file) as h5file:
name = list(h5file)[0]
try:
bytes_value = h5file[name]["Data"][as_str_if_uuid(uid)][file_name][
()
].tobytes()
except KeyError:
bytes_value = None
return bytes_value
[docs]
@classmethod
def fetch_values(
cls, file: str | h5py.File, uid: uuid.UUID
) -> np.ndarray | str | float | None:
"""
Get data :obj:`~geoh5py.data.data.Data.values`
:param file: :obj:`h5py.File` or name of the target geoh5 file
:param uid: Unique identifier of the target entity
:return values: :obj:`numpy.array` of :obj:`float`
"""
with fetch_h5_handle(file) as h5file:
name = list(h5file)[0]
try:
values = np.r_[h5file[name]["Data"][as_str_if_uuid(uid)]["Data"]]
if isinstance(values[0], (str, bytes)):
values = np.asarray([as_str_if_utf8_bytes(val) for val in values])
if len(values) == 1:
values = values[0]
else:
if values.dtype in [float, "float64", "float32"]:
ind = values == FLOAT_NDV
values[ind] = np.nan
except KeyError:
values = None
return values