Source code for geoh5py.data.text_data

#  Copyright (c) 2024 Mira Geoscience Ltd.
#
#  This file is part of geoh5py.
#
#  geoh5py is free software: you can redistribute it and/or modify
#  it under the terms of the GNU Lesser General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  geoh5py is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU Lesser General Public License for more details.
#
#  You should have received a copy of the GNU Lesser General Public License
#  along with geoh5py.  If not, see <https://www.gnu.org/licenses/>.

from __future__ import annotations

import json

import numpy as np
from numpy import ndarray

from ..shared.utils import as_str_if_uuid, dict_mapper
from .data import Data
from .primitive_type_enum import PrimitiveTypeEnum



[docs]
def text_formating(values: None | np.ndarray | str) -> ndarray | None:
    """
    Format text values to utf-8.

    :param values: The values to format.

    :return: The formatted values.
    """
    # todo: values[0] seems dangerous here
    if values is None or isinstance(values[0], bytes):
        return values

    return np.char.encode(values, encoding="utf-8").astype("O")




[docs]
class TextData(Data):
    @property
    def formatted_values(self):
        return text_formating(self.values)

    @property
    def nan_value(self):
        """
        Nan-Data-Value to be used in arrays
        """
        return ""


[docs]
    @classmethod
    def primitive_type(cls) -> PrimitiveTypeEnum:
        return PrimitiveTypeEnum.TEXT



[docs]
    def validate_values(
        self, values: np.ndarray | str | None
    ) -> np.ndarray | str | None:
        if isinstance(values, bytes):
            values = values.decode()

        if isinstance(values, np.ndarray) and values.dtype == object:
            values = np.array(
                [v.decode("utf-8") if isinstance(v, bytes) else v for v in values]
            )

            if self.n_values is not None and len(values) < self.n_values:
                full_array = np.full(self.n_values, self.nan_value, dtype=values.dtype)
                full_array[: len(values)] = values
                return full_array

        if (not isinstance(values, (str, type(None), np.ndarray))) or (
            isinstance(values, np.ndarray) and values.dtype.kind not in ["U", "S"]
        ):
            raise ValueError(
                f"Input 'values' for {self} must be of type {np.ndarray}  str or None."
            )

        return values





[docs]
class CommentsData(Data):
    """
    Comments added to an Object or Group.
    Stored as a list of dictionaries with the following keys:

        .. code-block:: python

            comments = [
                {
                    "Author": "username",
                    "Date": "2020-05-21T10:12:15",
                    "Text": "A text comment."
                },
            ]
    """

    @property
    def formatted_values(self):
        return json.dumps(dict_mapper(self.values, [as_str_if_uuid]))


[docs]
    @classmethod
    def primitive_type(cls) -> PrimitiveTypeEnum:
        return PrimitiveTypeEnum.TEXT



[docs]
    def validate_values(self, values) -> dict | None:
        if isinstance(values, str):
            values = json.loads(values)

        if values is None:
            return None

        if not isinstance(values, dict):
            raise TypeError("Input 'values' for CommentsData must be a dict.")

        if "Comments" not in values:
            raise ValueError(
                "Input 'values' for CommentsData must contain key 'Comments'."
            )

        for value in values["Comments"]:
            if not isinstance(value, dict):
                raise TypeError(
                    "Error setting CommentsData with expected input of type list[dict].\n"
                    f"Input {type(values)} provided."
                )

            if not len(set(value.keys()).union({"Author", "Date", "Text"})) == 3:
                raise ValueError(
                    "Comment dictionaries must include keys 'Author', 'Date' and 'Text'.\n"
                    f"Keys {list(value.keys())} provided."
                )

        return values





[docs]
class MultiTextData(Data):
    _values: np.ndarray | str | None

    @property
    def formatted_values(self):
        return text_formating(self.values)

    @property
    def nan_value(self):
        """
        Value used to represent missing data in python.
        """
        return ""


[docs]
    @classmethod
    def primitive_type(cls) -> PrimitiveTypeEnum:
        return PrimitiveTypeEnum.MULTI_TEXT



[docs]
    def validate_values(
        self, values: np.ndarray | str | None
    ) -> np.ndarray | str | None:
        if isinstance(values, np.ndarray) and self.n_values is not None:
            if len(values) < self.n_values:
                full_array = np.full(self.n_values, self.nan_value, dtype=values.dtype)
                full_array[: len(values)] = values
                return full_array

        elif not isinstance(values, str | type(None)):
            raise ValueError(
                f"Input 'values' for {self} must be of type {np.ndarray}  str or None."
            )

        return values
Source code for geoh5py.data.text_data

geoh5py

Navigation

Related Topics