Source code for tfields.lib.util

"""
Various utility functions
"""
import itertools
import typing
from six import string_types
import numpy as np


[docs]def pairwise(iterable):
    """
    iterator s -> (s0,s1), (s1,s2), (s2, s3), ...
    Source:
        https://stackoverflow.com/questions/5434891/iterate-a-list-as-pair-current-next-in-python
    Returns:
        two iterators, one ahead of the other
    """
    # pylint:disable=invalid-name
    a, b = itertools.tee(iterable)
    next(b, None)
    return zip(a, b)


[docs]def flatten(seq, container=None, keep_types=None, key: typing.Callable = None):
    """
    Approach to flatten a nested sequence.

    Args:
        seq (iterable): iterable to be flattened
        containter (iterable): iterable defining an append method. Values will
            be appended there
        keep_types (list of type): types that should not be flattened but kept
            in nested form
        key (callable): callable with the signature key(iterable) -> iterable

    Examples:
        >>> from tfields.lib.util import flatten
        >>> import numpy as np
        >>> flatten([[1,2,3],4,[[5,[6]]]])
        [1, 2, 3, 4, 5, 6]
        >>> flatten([[1,2,3],4,[[5,[{6:1}]]]], keep_types=[dict])
        [1, 2, 3, 4, 5, {6: 1}]
        >>> flatten([[1,2,3],4,[[5,[np.array([6])]]]], keep_types=[np.ndarray])
        [1, 2, 3, 4, 5, array([6])]

        Strings work although they have the __iter__ attribute in python3
        >>> flatten([[0, 0, 0, 'A'], [1, 2, 3]])
        [0, 0, 0, 'A', 1, 2, 3]

        Dictionaries will return flattened keys
        >>> flatten({"a": 1, "b": 2})
        ['a', 'b']

        You can use the key keyword to specify a transformation on the iterable:
        >>> flatten({"a": {"a1": 1, "a2": 4}, "b": 2}, key=dict.values)
        [1, 4, 2]

        >>> def dict_flat_key(item):
        ...     if isinstance(item, dict):
        ...         return item.values()
        ...     return item
        >>> flatten({"a": {"a1": 1, "a2": [3, 4]}, "b": 2}, key=dict_flat_key)
        [1, 3, 4, 2]
    """
    if keep_types is None:
        keep_types = []
    if container is None:
        container = []
    if key is not None:
        seq = key(seq)
    for item in seq:
        if (
            hasattr(item, "__iter__")
            and not isinstance(item, string_types)
            and not any((isinstance(item, t) for t in keep_types))
        ):
            flatten(item, container, keep_types, key=key)
        else:
            container.append(item)
    return container


[docs]def multi_sort(array, *others, **kwargs):
    """
    Sort all given lists parralel with array sorting, ie rearrange the items in
    the other lists in the same way, you rearrange them for array due to array
    sorting

    Args:
        array (iterable)
        *others (iterable)
        **kwargs:
            method (function): sorting function. Default is 'sorted'
            ...: further arguments are passed to method. Default rest is
                'key=array[0]'
            reversed (bool): wether to reverse the results or not
            cast_type (type): type of returned iterables

    Examples:
        >>> from tfields.lib.util import multi_sort
        >>> multi_sort([1,2,3,6,4], [1,2,3,4,5])
        ([1, 2, 3, 4, 6], [1, 2, 3, 5, 4])
        >>> a, b = multi_sort([1,2,3,6,4], [1,2,3,4,5])
        >>> b
        [1, 2, 3, 5, 4]

        Expanded to sort as many objects as needed
        >>> multi_sort([1,2,3,6,4], [1,2,3,4,5], [6,5,4,3,2])
        ([1, 2, 3, 4, 6], [1, 2, 3, 5, 4], [6, 5, 4, 2, 3])

        Reverse argument
        >>> multi_sort([1,2,3,6,4], [1,2,3,4,5], [6,5,4,3,2], reverse=True)
        ([6, 4, 3, 2, 1], [4, 5, 3, 2, 1], [3, 2, 4, 5, 6])

    Returns:
        tuple(cast_type): One iterable for each
        >>> multi_sort([], [], [])
        ([], [], [])
        >>> multi_sort([], [], [], cast_type=tuple)
        ((), (), ())

    """
    method = kwargs.pop("method", None)
    cast_type = kwargs.pop("cast_type", list)

    if len(array) == 0:
        return tuple(cast_type(x) for x in [array] + list(others))

    if method is None:
        method = sorted
        if "key" not in kwargs:
            kwargs["key"] = lambda pair: pair[0]

    reverse = kwargs.pop("reverse", False)
    if reverse:
        cast_type = lambda x: list(reversed(x))  # NOQA

    return tuple(cast_type(x) for x in zip(*method(zip(array, *others), **kwargs)))


[docs]def convert_nan(arr, value=0.0):
    """
    Replace all occuring NaN values by value
    """
    nan_indices = np.isnan(arr)
    arr[nan_indices] = value


[docs]def view_1d(arr):
    """
    Delete duplicate columns of the input array
    https://stackoverflow.com/a/44999009/ @Divakar
    """
    arr = np.ascontiguousarray(arr)
    coid_dt = np.dtype((np.void, arr.dtype.itemsize * arr.shape[1]))
    return arr.view(coid_dt).ravel()


[docs]def argsort_unique(idx):
    """
    https://stackoverflow.com/a/43411559/ @Divakar
    """
    num = idx.size
    sidx = np.empty(num, dtype=int)
    sidx[idx] = np.arange(num)
    return sidx


[docs]def duplicates(arr, axis=None):
    """
    View1D version of duplicate search
    Speed up version after
    https://stackoverflow.com/questions/46284660 \
        /python-numpy-speed-up-2d-duplicate-search/46294916#46294916

    Args:
        arr (array_like): array
        other args: see np.isclose

    Examples:
        >>> import tfields
        >>> import numpy as np
        >>> a = np.array([[1, 0, 0], [1, 0, 0], [2, 3, 4]])
        >>> tfields.lib.util.duplicates(a, axis=0)
        array([0, 0, 2])

        An empty sequence will not throw errors
        >>> assert np.array_equal(tfields.lib.util.duplicates([], axis=0), [])

    Returns:
        list of int: int is pointing to first occurence of unique value
    """
    if len(arr) == 0:
        return np.array([])
    if axis != 0:
        raise NotImplementedError()
    sidx = np.lexsort(arr.T)
    sorted_ = arr[sidx]

    group_index_0 = np.flatnonzero((sorted_[1:] != sorted_[:-1]).any(1)) + 1
    group_index = np.concatenate(([0], group_index_0, [sorted_.shape[0]]))
    ids = np.repeat(range(len(group_index) - 1), np.diff(group_index))
    sidx_mapped = argsort_unique(sidx)
    ids_mapped = ids[sidx_mapped]

    grp_minidx = sidx[group_index[:-1]]
    out = grp_minidx[ids_mapped]
    return out


# pylint:disable=too-many-arguments
[docs]def index(arr, entry, rtol=0, atol=0, equal_nan=False, axis=None):
    """
    Examples:
        >>> import numpy as np
        >>> import tfields
        >>> a = np.array([[1, 0, 0], [1, 0, 0], [2, 3, 4]])
        >>> tfields.lib.util.index(a, [2, 3, 4], axis=0)
        2

        >>> a = np.array([[1, 0, 0], [2, 3, 4]])
        >>> tfields.lib.util.index(a, 4)
        5

    Returns:
        int: index of entry in arr
    """
    if axis is None:
        arr = arr.flatten()
    elif axis != 0:
        raise NotImplementedError()
    for i, part in enumerate(arr):
        isclose = np.isclose(part, entry, rtol=rtol, atol=atol, equal_nan=equal_nan)
        if axis is not None:
            isclose = isclose.all()
        if isclose:
            return i


[docs]def is_full_slice(index, shape):
    """
    Determine if an index is the full slice (i.e. __getitem__ with this index returns the full
    array) w.r.t the shape given.

    Examples:
        >>> import numpy as np
        >>> import tfields
        >>> class index_getter:
        ...     def __getitem__(self, index):
        ...         return index
        >>> get_index = index_getter()
        >>> a = np.array([[1, 0, 0], [1, 0, 0], [2, 3, 4]])
        >>> shape = a.shape
        >>> tfields.lib.util.is_full_slice(get_index[:], shape)
        True
        >>> tfields.lib.util.is_full_slice(get_index[:, :], shape)
        True
        >>> tfields.lib.util.is_full_slice(get_index[:, 1], shape)
        False
        >>> tfields.lib.util.is_full_slice(get_index[1:, :], shape)
        False
        >>> tfields.lib.util.is_full_slice(get_index[:1, :], shape)
        False
        >>> tfields.lib.util.is_full_slice(get_index[:, 1:], shape)
        False
        >>> tfields.lib.util.is_full_slice(get_index[:, :1], shape)
        False
        >>> tfields.lib.util.is_full_slice(get_index[:, :-1], shape)
        True
        >>> tfields.lib.util.is_full_slice(get_index[np.array([True, True, True])], shape)
        True
        >>> tfields.lib.util.is_full_slice(get_index[np.array([True, True, False])], shape)
        False
    """
    if isinstance(index, slice):
        if (
            index.step in (None, 1)
            and index.start in (None, 0)
            and index.stop in (None, -1, shape[0])
        ):
            # full slice -> no type change
            return True
        else:
            return False
    elif isinstance(index, tuple):
        return all((is_full_slice(ind, (shp,)) for ind, shp in zip(index, shape)))
    elif isinstance(index, int):
        return index == 0 and shape[0] == 1
    elif isinstance(index, (np.ndarray, list)):
        return all(index)
    else:
        raise NotImplementedError("Index Type %s", type(index))


if __name__ == "__main__":
    import doctest

    doctest.testmod()