# -*- coding: utf-8 -*-
from .indexed import Indexed
from .ndprint import ndprint
from .odict import ODict
from ..utils.common import wls
from .dataset import make_title_meta_l0, CompositeDataset
from .shaped import Shaped
from .metadata import MetaData
try:
from .tabledataset_datamodel import Model
except ImportError:
Model = {'metadata': {}}
import sys
from collections.abc import Sequence
from collections import OrderedDict
import itertools
from copy import copy
if sys.version_info[0] + 0.1 * sys.version_info[1] >= 3.3:
PY33 = True
from collections.abc import Container, Sequence, Mapping
seqlist = Sequence
maplist = Mapping
else:
assert 0, 'python 3'
PY33 = False
from .collectionsMockUp import ContainerMockUp as Container
from .collectionsMockUp import SequenceMockUp as Sequence
from .collectionsMockUp import MappingMockUp as Mapping
seqlist = (tuple, list, Sequence, str)
# ,types.XRangeType, types.BufferType)
maplist = (dict, Mapping)
import logging
# create logger
logger = logging.getLogger(__name__)
# logger.debug('level %d' % (logger.getEffectiveLevel()))
[docs]class TableModel():
""" to interrogate a tabular data model
"""
[docs] def __init__(self, **kwds):
"""
"""
super().__init__(**kwds)
[docs] def getColumnClass(self, columnIndex):
""" Returns the class for the first cell
values in the column.
"""
return self.getColumn(columnIndex)[0].__class__
[docs] def getColumnCount(self):
""" Returns the number of columns in the model. """
return len(self.getData())
[docs] def getColumnName(self, columnIndex):
""" Returns the name of the column at columnIndex.
returns a set of columns if key is a slice.
"""
return self.getColumnNames()[columnIndex]
[docs] def getColumnNames(self):
""" Returns the column names. """
return list(self.getData().keys())
[docs] def getRowCount(self):
""" Returns the number of rows in the model. """
return len(self.getColumn(0))
[docs] def getValueAt(self, rowIndex, columnIndex):
""" Returns the value for the cell at columnIndex and rowIndex. """
return self.getColumn(columnIndex).data[rowIndex]
[docs] def isCellEditable(self, rowIndex, columnIndex):
""" Returns true if the cell at rowIndex and columnIndex
is editable. """
return True
[docs] def setValueAt(self, value, rowIndex, columnIndex):
"""Sets the value in the cell at columnIndex and rowIndex
to Value.
"""
self.getColumn(columnIndex).data[rowIndex] = value
[docs]def maybe2rows(header_names, units, col_width, sep='.', one_row=False, linebreak='\n'):
""" makes one-row or two-row column headers
:sep: a string of separator characters to split header into two fragments. a header only uses the first matching char from left. Grouping does not distinguish which sep-char was used to split a header. Example: '.' (default), '._/'
:one_row: Force one row but add line breaks at sep
"""
if col_width is None:
col_width = -1
found_repeat = False
hd, hd2 = [], []
last = None
for x in header_names:
try:
# only test if there is '.'
if not any(s in x for s in sep):
raise ValueError()
f = float(x)
hd.append(x)
hd2.append('')
if not found_repeat:
last = ''
except ValueError:
# 'a.b', 'a.c' -> ('a','b'), ('a','c')
r = str(x)
# try splitting with all sep chars until the first successful split to get the right-most fragment
for s in sep:
p = r.rsplit(s, 1)
if len(p) > 1:
break
# p0 is the group. p1 the sub-name
if len(p) > 1:
# 'foo.'.rsplit('.') == ['foo','']
p0 = p[0]
p1 = p[1]
else:
# 'bar'.rsplit('.') == ['bar']
p0 = ''
p1 = r
hd.append(p1)
hd2.append(p0)
# repeat is never found if one row
if not one_row and not found_repeat:
if p0 != '' and p0 == last:
found_repeat = True
last = p0
# [(column name, unit), ...]. Widths of column head is limited
hdr1 = [wls(x.replace('.', '.\n') if one_row else x,
width=col_width, linebreak=linebreak)
for x in (hd if found_repeat else header_names)]
# name and unit rows.
hdr = list('%s%s(%s)' % (nu[0], linebreak, nu[1])
for nu in zip(hdr1, units))
if 0: # one_row:
return list(linebreak.join(hd2, hdr))
if found_repeat:
# if there is found_repeat. use 2-row header
return list(zip(hd2, hdr))
else:
return hdr
MdpInfo = Model['metadata']
[docs]class TableDataset(CompositeDataset, TableModel, Shaped):
""" Special dataset that contains a single Array Data object.
A TableDataset is a tabular collection of Columns. It is optimized to work on array data..
The column-wise approach is convenient in many cases. For example, one has an event list, and each algorithm is adding a new field to the events (i.e. a new column, for example a quality mask).
Although mechanisms are provided to grow the table row-wise, one should use these with care especially in performance driven environments as this orthogonal approach (adding rows rather than adding columns) is expensive.
General Note:
For reasons of flexibility, memory consumption and performance, this class is not checking whether all columns are of the same length: this is the responsibility of the user/developer. See also the library documentation for more information about this.
Note on column names:
If a column is added without specifying a name, the name ColumnX is created, where X denotes the index of that column.
Column name duplicity is not allowed.
Developers:
See "Writing special datasets or products" at the developer's documentation also.
Please see also this selection example.
"""
[docs] def __init__(self, data=None,
description=None,
typ_=None,
version=None,
zInfo=None,
alwaysMeta=True,
**kwds):
"""
"""
self._list = []
# collect MDPs from args-turned-local-variables.
metasToBeInstalled = copy(locals())
metasToBeInstalled.pop('__class__', None)
metasToBeInstalled.pop('kwds', None)
metasToBeInstalled.pop('self', None)
metasToBeInstalled.pop('zInfo', None)
global Model
if zInfo is None:
zInfo = Model
super().__init__(zInfo=zInfo, **metasToBeInstalled,
**kwds) # initialize data, meta, unit
self.updateShape()
# def getData(self):
# """ Optimized for _data being an ``ODict/dict`` implemented with ``DataContaier``.
# """
# return self._data
[docs] def setData(self, data):
""" sets name-column pairs from data.
Valid formd include: {str:Column, ...} or [(str, [num, ...], str)]
or [(str, Column), ...] or [[num ...], [num ...], ...]
[{'name':str,'column':Column}] form is deprecated.
Existing data will be discarded except when the provided data is a list of lists, where existing column names and units will remain but data replaced, and extra data items will form new columns named 'column'+index (index counting from 1) with unit None.
"""
# logging.debug(data.__class__)
if data is None:
super(TableDataset, self).setData(ODict())
self.updateShape()
return
current_data = self.getData()
# list of keys of current data
curdk = list(current_data.keys()) if current_data else []
super(TableDataset, self).setData(ODict())
if issubclass(data.__class__, seqlist):
from .arraydataset import Column
for ind, x in enumerate(data):
if issubclass(x.__class__, maplist) \
and 'name' in x and 'column' in x:
raise DeprecationWarning(
'Do not use [{"name":name, "column":column}...]. Use {name:column, ...} instead.')
if issubclass(x.__class__, (list, tuple)):
# check out string-started columns (2-col not included
if len(x) > 1 and issubclass(x[0].__class__, str) and not issubclass(x[1].__class__, str):
if issubclass(x[1].__class__, (list, tuple)):
u = x[2] if len(x) > 2 else ''
self.setColumn(x[0], Column(data=x[1], unit=u))
elif issubclass(x[1].__class__, Column):
self.setColumn(x[0], x[1])
else:
raise ValueError(
'[[str, [], str]...], [[str, []]...], [[str, Column]...] needed.')
else:
# x is not string-started
# e.g. [[1, 2, 3], [4, 5, 6]]
if current_data is None or len(current_data) <= ind:
# update the data of the ind-th column
self.setColumn('', Column(data=x, unit=None))
else:
colname = curdk[ind]
current_data[colname].data = x
self.setColumn(colname, current_data[colname])
else:
raise ValueError(
'Cannot extract name and column from list member ' + str(x))
elif issubclass(data.__class__, maplist):
# [Column, ... ]
for k, v in data.items():
self.setColumn(k, v)
else:
raise TypeError('must be a Sequence or a Mapping. ' +
data.__class__.__name__ + ' found.')
self.updateShape()
[docs] def addColumn(self, name, column, col_des=True):
""" Adds the specified column to this table, and attaches a name
to it.
If the name is null, a dummy name "column"+column_count+1 is created, such that it can be accessed by getColumn(str).
If column name exists the corresponding column is substituted.
Parameters:
name - column name.
column - column to be added.
col_des - if True (default) and if column description is 'UNKNOWN' or `None`, set to column name.
"""
d = self.getData()
if d is None:
d = ODict()
if name == '' or name is None:
idx = self.getColumnCount()
name = 'column' + str(idx+1)
self._list.append(column.getData())
else:
try:
self._list[self.indexOf(name)] = column.getData()
except ValueError as e:
self._list.append(column.getData())
des = column.getDescription()
if col_des and des == 'UNKNOWN' or des is None:
column.setDescription(name)
d[name] = column
self.updateShape()
[docs] def removeColumn(self, key):
""" Removes the columns specified by ``key``.
ref. ``getColumnMap`` on ``key`` usage.
"""
for name in self.getColumnMap(key).keys():
self._list.pop(self.indexOf(name))
del(self.data[name])
self.updateShape()
[docs] def indexOf(self, key):
""" Returns the index of specified column.
if the key is a Column,
it looks for equal references (same column objects), not for
equal values.
If the key is a string, Returns the index of specified Column name.
"""
from .arraydataset import Column
if issubclass(key.__class__, str):
ks = list(self.getData().keys())
k = key
elif issubclass(key.__class__, Column):
ks = list(id(v) for v in self.getData().values())
k = id(key)
else:
raise "key must be string or Column, not %s." % type(key).__name__
self.updateShape()
return ks.index(k)
[docs] def addRow(self, row, rows=False):
""" Adds the specified map as a new row to this table.
row: row is a dict with names as keys and row data as values.
rows: append each element in row if the row data is a list.
"""
d = self.getData()
if len(row) < len(d):
msg = 'row width d% should be %d.' % (len(row), len(d))
raise ValueError(msg)
for c in d.keys():
if rows:
d[c].data.extend(row[c])
else:
d[c].data.append(row[c])
if hasattr(d[c], 'updateShape'):
d[c].updateShape()
self.updateShape()
[docs] def getRowMap(self, rowIndex):
""" Returns a dict of column-names as the keys and the objects located at a particular row(s) as the values.
rowIndex: return the following as the value for each key-value pair:
* int: the int-th row's elements;
* ``Slice`` object, a list of rows from slicing the column. Example ``a.getRow(Slice(3,,))``;
* list of integers: they are used as the row index to select the rows.
* list of booleans: rows where the corresponding boolean is True are chosen.
"""
cl = rowIndex.__class__
d = self.getData()
if issubclass(cl, (int, slice)):
return {n: c.getData()[rowIndex] for n, c in d.items()}
if issubclass(cl, list):
if type(rowIndex[0]) == int:
return {n: [c.getData()[i] for i in rowIndex] for n, c in d.items()}
if type(rowIndex[0]) == bool:
# if len(rowIndex) != len(n):
# logger.info('%s Selection length %d should be %d.' %
# (name, len(rowIndex), len(n)))
return {n: [x for x, s in zip(c.getData(), rowIndex) if s] for n, c in d.items()}
else:
raise ValueError(
'RowIndex must be an int, a slice, or a list of ints or bools.')
[docs] def getRow(self, rowIndex):
""" Returns a list containing the objects located in a particular row, or a list of rows.
rowIndex: ref ``getRowMap()``
* int: return the int-th row in a list of elements;
* ``Slice`` object, list of integers, list of booleans: return a list of rows each represented by a tuple. Example ``a.getRow(Slice(3,,))``, ``[2,4]``, ``[True, False...]``.
"""
it = self.getRowMap(rowIndex).values()
if issubclass(rowIndex.__class__, int):
# return a list of row elements
return list(it)
# return transposed in a list
return list(zip(*it))
[docs] def select(self, selection):
""" Select a number of rows from this table dataset and
return a new TableDataset object containing only the selected rows.
selection: to form a new Tabledataset with ref ``getRowMap()``
"""
d = ODict()
if issubclass(selection.__class__, int):
for name, data in self.getRowMap(selection).items():
d[name] = Column(
data=[data], unit=self.getColumn(name).getUnit())
return TableDataset(data=d)
from .arraydataset import Column
for name, data in self.getRowMap(selection).items():
d[name] = Column(data=data, unit=self.getColumn(name).getUnit())
return TableDataset(data=d)
[docs] def removeRow(self, rowIndex):
""" Removes a row with specified index from this table.
rowIndex: int or a ``Slice`` object. Example ``a.removeRow(Slice(3,,))``.
return: removed row data.
"""
if issubclass(rowIndex.__class__, slice):
ret = []
for x in self.getData().values():
ret.append(x.data[rowIndex])
del x.data[rowIndex]
x.updateShape()
self.updateShape()
return ret
r = []
for x in self.getData().values():
r.append(x.data.pop(rowIndex))
x.updateShape()
self.updateShape()
return r
@ property
def rowCount(self):
return self.getRowCount()
@ rowCount.setter
def rowCount(self, newRowCount):
self.setRowCount(newRowCount)
[docs] def setRowCount(self, rowCount):
""" cannot do this.
"""
raise ValueError('Cannot set row count.')
@ property
def columnCount(self):
return self.getColumnCount()
@ columnCount.setter
def columnCount(self, newColumnCount):
self.setColumnCount(newColumnCount)
[docs] def setColumnCount(self, columnCount):
""" cannot do this.
"""
raise ValueError('Cannot set column count.')
@ property
def list(self):
return self._list
@ list.setter
def list(self, l):
raise NotImplemented
[docs] def getColumnMap(self, key=None):
""" Returns a dict of column-names as the keys and the column(s) as the values.
key: return the following as the value for each key-value pair:
* int: name-value where value is the int-th column.
* ``Slice`` object, a list of name-columns from slicing the column index. Example ``a.getColumn(Slice(3,,))``;
* Sequence of integers/strings: they are used as the column index/name to select the columns.
* Sequence of booleans: columns where the corresponding boolean is True are chosen.
Default is to return all columns.
"""
d = self.getData()
try:
if d is None or key is None or len(d) == 0:
return d
except TypeError:
pass
cl = key.__class__
if issubclass(cl, int):
t = list(d.items())[key]
return {t[0]: t[1]} # {str:Column}
if issubclass(cl, slice):
return ODict(list(d.items())[key]) # {str:Column, ...}
if issubclass(cl, str):
return {key: d[key]} # {str:Column}
if issubclass(cl, Sequence):
if type(key[0]) == int:
# {str:Column, ...}
return ODict(list(d.items())[i] for i in key)
if type(key[0]) == str:
return {n: d[n] for n in key} # {str:Column, ...}
if type(key[0]) == bool:
# {str:Column, ...}
return ODict(x for x, s in zip(d.items(), key) if s)
else:
raise ValueError(
'``key`` must be an int, a string, a slice, or a list of ints, strings, or bools.')
[docs] def getColumn(self, key):
""" Returns the particular column, or a list of columns.
key: ref ``getColumnMap()``
* int/str: return the int-th/named column;
* ``Slice`` object, list of columns of sliced column indices;
* list of integers/strings: return a list of columns corresponding to the given column index/name, or where key is True. Example ``a.getColumn(Slice(3,,))``, ``[2, 4]``, ``['time', ``energy']``.
* list of booleans: return a list of columns where key is True. Example ``[True, False...]``.
"""
it = self.getColumnMap(key).values()
if len(it) == 0:
return []
if issubclass(key.__class__, (int, str)):
# return a list of row elements
return list(it)[0]
# return transposed in a list
return list(it)
[docs] def setColumn(self, key, value):
""" Replaces a column in this table with specified name to specified column if key is a string and exists, or if the key is an integer in 0 to the number of columns, insert at column-index=key, with the name 'column'+key, else add a new coolumn.
"""
if self.getData() is None:
key = ''
elif type(key) == int:
nms = self.getColumnNames()
if 0 <= key and key < len(nms):
key = nms[key]
else:
key = ''
self.addColumn(name=key, column=value)
self.updateShape()
def __iter__(self):
for x in self.getData():
yield x
[docs] def items(self):
""" for k,v in tabledataset.items()
"""
return self.getData().items()
def __getitem__(self, key):
""" return colmn of given key.
ref. ``getColumn()``.
"""
return self.getColumn(key)
def __setitem__(self, key, value):
"""
"""
self.setColumn(key, value)
def __delitem__(self, key):
""" delete colmn of given key.
ref. ``removeColumn()``.
"""
return self.removeColumn(key)
keys = TableModel.getColumnNames
# __len__ = TableModel.getColumnCount
# def __contains__(self, name):
# """ if 'name` is found in column names.
# """
# return name in self.getData()
def __repr__(self):
return self.toString(level=2)
[docs] def toString(self, level=0, extra=False, param_widths=None,
tablefmt='grid', tablefmt1='simple', tablefmt2='plain',
width=0, matprint=None, trans=True,
heavy=True, center=-1, **kwds):
"""
tablefmt2: format of 2D data, others see `MetaData.toString`.
"""
if matprint is None:
matprint = ndprint
cn = self.__class__.__name__
if level > 1:
s = cn + '('
s += self.meta.toString(
level=level, extra=extra, param_widths=param_widths,
tablefmt=tablefmt, tablefmt1=tablefmt1, tablefmt2=tablefmt2,
width=width,
**kwds)
return s + 'data= {' + \
', '.join('"%s": %s' % (k, v.toString(
level=level, extra=extra, param_widths=param_widths,
tablefmt=tablefmt, tablefmt1=tablefmt1, tablefmt2=tablefmt,
width=width, heavy=heavy, **kwds))
for k, v in self.getColumnMap().items()) + \
'})'
html = 'html' in tablefmt.lower() or 'html' in tablefmt2.lower()
br = '<br>' if html else '\n'
if html:
tablefmt = tablefmt2 = 'unsafehtml'
s, last = make_title_meta_l0(self, level=level, extra=extra, param_widths=param_widths,
tablefmt=tablefmt, tablefmt1=tablefmt1,
tablefmt2=tablefmt2, center=center,
width=width, heavy=heavy,
html=html, excpt=['description'],
**kwds)
width = len(last)-1
if level == 0:
if html:
d = '<center><u>%s</u></center>\n' % 'DATA'
else:
d = 'DATA'.center(width) + '\n' + '----'.center(width) + '\n'
else:
d = ''
rowlimit = 2 if level > 1 else 20 if level == 1 else None
cols = self.getData().values()
# This is obsolete for tabulate >= 0.8.10:
# widest width in all of default and in param_widths
# w = MetaData.MaxDefWidth if param_widths == -1 or \
# param_widths is None else \
# max(MetaData.MaxDefWidth, max(param_widths.values()))
# if html:
# w = w
w = MetaData.MaxDefWidth
coldata = [list(itertools.islice(x.data, rowlimit)) for x in cols]
# table headers
hdr = maybe2rows(self.getData().keys(),
(str(x.unit) for x in cols),
col_width=w, one_row=html,
linebreak=br)
d += matprint(coldata, trans=trans, headers=hdr,
tablefmt=tablefmt, tablefmt1=tablefmt1,
tablefmt2=tablefmt2, center=center,
mdim=2, param_widths=param_widths,
maxElem=sys.maxsize, **kwds)
collen = self.getRowCount()
if level and rowlimit is not None and rowlimit < collen:
d += '(Only display %d rows of %d for level=%d.)' % (rowlimit, collen, level)
return f'{s}\n{d}{last}\n'
string = toString
txt = toString
def __getstate__(self):
""" Can be encoded with serializableEncoder """
return OrderedDict(
_ATTR_meta=getattr(self, '_meta', None),
**self.getData())
[docs]class IndexedTableDataset(Indexed, TableDataset):
""" TableDataset with an index table for efficient row look-up.
"""
[docs] def __init__(self, **kwds):
"""
"""
self._indexCols = [0]
self._rowIndexTable = {}
super().__init__(**kwds) # initialize data, meta, unit
[docs] def getColumnsToLookup(self):
""" returns an iterator that gives a number of sequences to looking up over.
"""
# list of Column's arrays
return [x.data for x in self.getColumn(self._indexPattern)]
[docs] def setData(self, data):
""" sets name-column pairs from data and updates index if needed
"""
d = self.getData()
if d:
reindex = False
lcd = len(d)
if issubclass(data.__class__, seqlist):
for ind, x in enumerate(data):
if lcd > ind:
if reindex == False and ind in self._indexPattern:
reindex = True
else:
reindex = True
super().setData(data)
if reindex:
self.updateToc()
[docs] def vLookUp(self, key, return_index=True, multiple=False):
""" Similar to Excel VLOOKUP, return all records (rows) that match the key.
key: taken as a dictionary key unless ``multiple`` is True.
return_index: if True (default) return index in the array of columns.
multiple: if True (default is False) loop through key as a sequence of keys and return a sequece.
"""
if multiple:
if return_index:
toc = self._tableOfContent
return [toc[k] for k in key]
else:
toc = self._tableOfContent
# return [[c[toc[k]] for c in self._list] for k in key]
return list(zip(*((c[toc[k]] for k in key) for c in self._list)))
else:
if return_index:
return self._tableOfContent[key]
else:
rec_ind = self._tableOfContent[key]
return [c[rec_ind] for c in self._list]
[docs] def hashx(self):
s = self.__getstate__().values()
l = []
return super().hash(hash_list=self.data.values())
def __getstate__(self):
""" Can be encoded with serializableEncoder """
# try:
# description = self.description
# except (AttributeError, KeyError):
# description = None
return Indexed.__getstate__(self).update(
_ATTR_meta=getattr(self, '_meta', None),
**self.getData())