"""
Various utility functions
"""
import itertools
import typing
from six import string_types
import numpy as np
[docs]def pairwise(iterable):
"""
iterator s -> (s0,s1), (s1,s2), (s2, s3), ...
Source:
https://stackoverflow.com/questions/5434891/iterate-a-list-as-pair-current-next-in-python
Returns:
two iterators, one ahead of the other
"""
# pylint:disable=invalid-name
a, b = itertools.tee(iterable)
next(b, None)
return zip(a, b)
[docs]def flatten(seq, container=None, keep_types=None, key: typing.Callable = None):
"""
Approach to flatten a nested sequence.
Args:
seq (iterable): iterable to be flattened
containter (iterable): iterable defining an append method. Values will
be appended there
keep_types (list of type): types that should not be flattened but kept
in nested form
key (callable): callable with the signature key(iterable) -> iterable
Examples:
>>> from tfields.lib.util import flatten
>>> import numpy as np
>>> flatten([[1,2,3],4,[[5,[6]]]])
[1, 2, 3, 4, 5, 6]
>>> flatten([[1,2,3],4,[[5,[{6:1}]]]], keep_types=[dict])
[1, 2, 3, 4, 5, {6: 1}]
>>> flatten([[1,2,3],4,[[5,[np.array([6])]]]], keep_types=[np.ndarray])
[1, 2, 3, 4, 5, array([6])]
Strings work although they have the __iter__ attribute in python3
>>> flatten([[0, 0, 0, 'A'], [1, 2, 3]])
[0, 0, 0, 'A', 1, 2, 3]
Dictionaries will return flattened keys
>>> flatten({"a": 1, "b": 2})
['a', 'b']
You can use the key keyword to specify a transformation on the iterable:
>>> flatten({"a": {"a1": 1, "a2": 4}, "b": 2}, key=dict.values)
[1, 4, 2]
>>> def dict_flat_key(item):
... if isinstance(item, dict):
... return item.values()
... return item
>>> flatten({"a": {"a1": 1, "a2": [3, 4]}, "b": 2}, key=dict_flat_key)
[1, 3, 4, 2]
"""
if keep_types is None:
keep_types = []
if container is None:
container = []
if key is not None:
seq = key(seq)
for item in seq:
if (
hasattr(item, "__iter__")
and not isinstance(item, string_types)
and not any((isinstance(item, t) for t in keep_types))
):
flatten(item, container, keep_types, key=key)
else:
container.append(item)
return container
[docs]def multi_sort(array, *others, **kwargs):
"""
Sort all given lists parralel with array sorting, ie rearrange the items in
the other lists in the same way, you rearrange them for array due to array
sorting
Args:
array (iterable)
*others (iterable)
**kwargs:
method (function): sorting function. Default is 'sorted'
...: further arguments are passed to method. Default rest is
'key=array[0]'
reversed (bool): wether to reverse the results or not
cast_type (type): type of returned iterables
Examples:
>>> from tfields.lib.util import multi_sort
>>> multi_sort([1,2,3,6,4], [1,2,3,4,5])
([1, 2, 3, 4, 6], [1, 2, 3, 5, 4])
>>> a, b = multi_sort([1,2,3,6,4], [1,2,3,4,5])
>>> b
[1, 2, 3, 5, 4]
Expanded to sort as many objects as needed
>>> multi_sort([1,2,3,6,4], [1,2,3,4,5], [6,5,4,3,2])
([1, 2, 3, 4, 6], [1, 2, 3, 5, 4], [6, 5, 4, 2, 3])
Reverse argument
>>> multi_sort([1,2,3,6,4], [1,2,3,4,5], [6,5,4,3,2], reverse=True)
([6, 4, 3, 2, 1], [4, 5, 3, 2, 1], [3, 2, 4, 5, 6])
Returns:
tuple(cast_type): One iterable for each
>>> multi_sort([], [], [])
([], [], [])
>>> multi_sort([], [], [], cast_type=tuple)
((), (), ())
"""
method = kwargs.pop("method", None)
cast_type = kwargs.pop("cast_type", list)
if len(array) == 0:
return tuple(cast_type(x) for x in [array] + list(others))
if method is None:
method = sorted
if "key" not in kwargs:
kwargs["key"] = lambda pair: pair[0]
reverse = kwargs.pop("reverse", False)
if reverse:
cast_type = lambda x: list(reversed(x)) # NOQA
return tuple(cast_type(x) for x in zip(*method(zip(array, *others), **kwargs)))
[docs]def convert_nan(arr, value=0.0):
"""
Replace all occuring NaN values by value
"""
nan_indices = np.isnan(arr)
arr[nan_indices] = value
[docs]def view_1d(arr):
"""
Delete duplicate columns of the input array
https://stackoverflow.com/a/44999009/ @Divakar
"""
arr = np.ascontiguousarray(arr)
coid_dt = np.dtype((np.void, arr.dtype.itemsize * arr.shape[1]))
return arr.view(coid_dt).ravel()
[docs]def argsort_unique(idx):
"""
https://stackoverflow.com/a/43411559/ @Divakar
"""
num = idx.size
sidx = np.empty(num, dtype=int)
sidx[idx] = np.arange(num)
return sidx
[docs]def duplicates(arr, axis=None):
"""
View1D version of duplicate search
Speed up version after
https://stackoverflow.com/questions/46284660 \
/python-numpy-speed-up-2d-duplicate-search/46294916#46294916
Args:
arr (array_like): array
other args: see np.isclose
Examples:
>>> import tfields
>>> import numpy as np
>>> a = np.array([[1, 0, 0], [1, 0, 0], [2, 3, 4]])
>>> tfields.lib.util.duplicates(a, axis=0)
array([0, 0, 2])
An empty sequence will not throw errors
>>> assert np.array_equal(tfields.lib.util.duplicates([], axis=0), [])
Returns:
list of int: int is pointing to first occurence of unique value
"""
if len(arr) == 0:
return np.array([])
if axis != 0:
raise NotImplementedError()
sidx = np.lexsort(arr.T)
sorted_ = arr[sidx]
group_index_0 = np.flatnonzero((sorted_[1:] != sorted_[:-1]).any(1)) + 1
group_index = np.concatenate(([0], group_index_0, [sorted_.shape[0]]))
ids = np.repeat(range(len(group_index) - 1), np.diff(group_index))
sidx_mapped = argsort_unique(sidx)
ids_mapped = ids[sidx_mapped]
grp_minidx = sidx[group_index[:-1]]
out = grp_minidx[ids_mapped]
return out
# pylint:disable=too-many-arguments
[docs]def index(arr, entry, rtol=0, atol=0, equal_nan=False, axis=None):
"""
Examples:
>>> import numpy as np
>>> import tfields
>>> a = np.array([[1, 0, 0], [1, 0, 0], [2, 3, 4]])
>>> tfields.lib.util.index(a, [2, 3, 4], axis=0)
2
>>> a = np.array([[1, 0, 0], [2, 3, 4]])
>>> tfields.lib.util.index(a, 4)
5
Returns:
int: index of entry in arr
"""
if axis is None:
arr = arr.flatten()
elif axis != 0:
raise NotImplementedError()
for i, part in enumerate(arr):
isclose = np.isclose(part, entry, rtol=rtol, atol=atol, equal_nan=equal_nan)
if axis is not None:
isclose = isclose.all()
if isclose:
return i
[docs]def is_full_slice(index, shape):
"""
Determine if an index is the full slice (i.e. __getitem__ with this index returns the full
array) w.r.t the shape given.
Examples:
>>> import numpy as np
>>> import tfields
>>> class index_getter:
... def __getitem__(self, index):
... return index
>>> get_index = index_getter()
>>> a = np.array([[1, 0, 0], [1, 0, 0], [2, 3, 4]])
>>> shape = a.shape
>>> tfields.lib.util.is_full_slice(get_index[:], shape)
True
>>> tfields.lib.util.is_full_slice(get_index[:, :], shape)
True
>>> tfields.lib.util.is_full_slice(get_index[:, 1], shape)
False
>>> tfields.lib.util.is_full_slice(get_index[1:, :], shape)
False
>>> tfields.lib.util.is_full_slice(get_index[:1, :], shape)
False
>>> tfields.lib.util.is_full_slice(get_index[:, 1:], shape)
False
>>> tfields.lib.util.is_full_slice(get_index[:, :1], shape)
False
>>> tfields.lib.util.is_full_slice(get_index[:, :-1], shape)
True
>>> tfields.lib.util.is_full_slice(get_index[np.array([True, True, True])], shape)
True
>>> tfields.lib.util.is_full_slice(get_index[np.array([True, True, False])], shape)
False
"""
if isinstance(index, slice):
if (
index.step in (None, 1)
and index.start in (None, 0)
and index.stop in (None, -1, shape[0])
):
# full slice -> no type change
return True
else:
return False
elif isinstance(index, tuple):
return all((is_full_slice(ind, (shp,)) for ind, shp in zip(index, shape)))
elif isinstance(index, int):
return index == 0 and shape[0] == 1
elif isinstance(index, (np.ndarray, list)):
return all(index)
else:
raise NotImplementedError("Index Type %s", type(index))
if __name__ == "__main__":
import doctest
doctest.testmod()