# -*- coding: utf-8 -*-
from .comparable import Comparable
from ..dataset.serializable import Serializable
from ..dataset.odict import ODict
from ..dataset.eq import DeepEqual
from ..dataset.classes import Class_Look_Up
from ..utils.common import fullname
import sys
import os
import functools
from collections import OrderedDict, ChainMap
import logging
# create logger
logger = logging.getLogger(__name__)
# logger.debug('level %d' % (logger.getEffectiveLevel()))
# + 0.1 * sys.version_info[1] >= 3.3:
if sys.version_info[0] >= 3:
PY3 = True
strset = str
from urllib.parse import urlparse
else:
PY3 = False
strset = (str, unicode)
from urlparse import urlparse
[docs]def is_urn(u):
sp = u.split(':')
return len(sp) == 4 and sp[0].lower() and sp[3].isdigit()
[docs]def makeUrn(poolname, typename, index):
""" assembles a URN or a list of URNs with infos of the pool, the resource type, and the index.
:poolname: str or list of them.
:typename: str, name of data type. or list of them.
index: int or string or list of them.
If any two or more are lists, all have to be list of the same length.
"""
lp = lt = li = 0
lp = len(poolname) if isinstance(poolname, list) else 0
lt = len(typename) if isinstance(typename, list) else 0
li = len(index) if isinstance(index, list) else 0
sz = max(lp, lt, li)
if sz:
if not ((lp == 0 or lp == sz) and (lt == 0 or lt == sz) and (li == 0 or li == sz)):
raise TypeError(
f'At least two args have different sizes {lp}, {ly}, {li}.')
# at least one is a list. make them all lists of sz size.
po = poolname if lp else [poolname]*sz
ty = typename if lt else [typename]*sz
sn = index if li else [index]*sz
urns = [f'urn:{po[i]}:{ty[i]}:{sn[i]}' for i in range(sz)]
return urns
else:
urns = f'urn:{poolname}:{typename}:{index}'
return urns
[docs]class Urn(DeepEqual, Serializable, Comparable):
""" The object representation of the product URN string.
The memory consumed by sets of this object are much less than sets
of URN strings.
Only when the class types in URN string are not in classpath,
the urn object will consume equals or a little more than URN string
as the object has to hold the original urn string. However this should
be considered as exceptional cases.
Using this object representation also help to avoid parsing cost of URN string.
URN string should be immutable.
About_URN
The Universial Resource Name (**URN**, https://datatracker.ietf.org/doc/html/rfc2141 ) string has this format:
urn:<poolname>:<resourcetype>:<serialnumber>
with modified rules desribed below.
:<poolname>: Also called poolID. It consists of 1-32 characters, is case-sensitive, which deviates from rfc2141. Character allowed are ``alpha``, ``digit``, ``safe``, defined in rfc1630 (https://datatracker.ietf.org/doc/html/rfc1630). These are excluded: `` ``, ``%``, ``?``, ``!``, ``*``,``'``, ``"``, ``(``, ``)``, ``=``, ``/``, and what listed in ``mod:poolmanager:Invalid_Pool_Names``, e.g. ``pools``, ``urn``, ``URN``, ``api``.
:<resourcetype>: type name of the data item (usually class name of data products inheriting :class:`BaseProduct`)
:<serialnumber>: internal index for a certain <resourcetype>.
The ``poolname`` in a URN is a label. Some examples:
- urn:pool_mh:fdi.dataset.product.Product:2
- urn:20.20:svom.products.SVOMMapContext:0
URNs are used to to identify data be cause URNs are location agnostic. Storage Pools (subclasses of :class:`ProductPool`) are where data item reside. The **PoolURL** is used to give practical information of a pool, such as a poolname, its location, and its access scheme. PoolURL is designed to be a local set-up detail that is supposed to be hidden from pool users. Data processing software use ``URN``s to refer to products, without specifying pool location. The poolID in a URN could be a :class:`LocalPool` on the development laptop and a :class:`HTTPClientPool` on the production cloud.
"""
[docs] def __init__(self, urn=None, poolname=None, cls=None, index=None, poolurl=None, **kwds):
"""
Creates the URN object with the urn string or components.
give urn and optional poolurl, or all poolname, cls, index arguments.
if urn is given and pool, class, etc are also specified,
the latter are ignored. else the URN object is constructed from them.
Urn(u) will make a Urn object out of u.
All arguements are None by default.
Parameters
urn: string. A URM string.
poolname: string, provides pool name part of URN if URN is missing from input.
cls: type. the full class name is used for datatype.
index: int.
poolurl: string. IF specified will provide info of the pool involved.
"""
super(Urn, self).__init__(**kwds)
if urn is None:
if cls is None or poolname is None or index is None:
if cls is None and poolname is None and index is None:
self._scheme = None
self._place = None
self._poolname = None
self._class = None
self._index = None
self._poolpath = None
self._urn = None
return
else:
raise ValueError(
'give urn and optional poolurl, or all poolname, cls, index arguments')
if not issubclass(cls.__class__, type):
raise TypeError('cls is a ' + cls.__class__ +
', not a class type.')
urn = makeUrn(poolname=poolname,
typename=fullname(cls),
index=index)
self.setUrn(urn, poolurl=poolurl)
@ property
def urn(self):
""" property
"""
return self.getUrn()
@ urn.setter
def urn(self, urn):
""" property
"""
self.setUrn(urn)
[docs] def setUrn(self, urn, poolurl=None):
""" parse urn to get poolname, resource, index.
"""
if hasattr(self, '_urn') and self._urn and urn:
raise TypeError('URN is immutable.')
poolname, resourcetype, index = parseUrn(urn)
cls = Class_Look_Up[resourcetype.split('.')[-1]]
self._poolname = poolname
self._class = cls
self._index = index
self._urn = urn
if poolurl:
poolpath, scheme, place, poolname, self._username, self._password = parse_poolurl(
poolurl, poolname)
self._poolpath = poolpath
self._scheme = scheme
self._place = place
else:
self._poolpath = None
self._scheme = None
self._place = None
self._username, self._password = None, None
[docs] def getUrn(self):
""" Returns the urn in this """
return self._urn
[docs] def getType(self):
""" Returns class type of Urn
"""
return self._class
[docs] def getTypeName(self):
""" Returns class type name of Urn.
"""
return fullname(self._class)
[docs] def getIndex(self):
""" Returns the product index.
"""
return self._index
[docs] def getScheme(self):
""" Returns the urn scheme.
"""
return self._scheme
[docs] def getUrnWithoutPoolId(self):
return fullname(self._class) + ':' + str(self._index)
@ property
def place(self):
return self.getPlace()
[docs] def getPlace(self):
""" Returns the netloc in this """
return self._place
[docs] def getPoolpath(self):
""" returns the poolpath stored
"""
return self._poolpath
@ property
def pool(self):
""" returns the poolname.
"""
return self.getPoolId()
[docs] def getPoolId(self):
""" Returns the pool URN in this """
return self._poolname
[docs] def getPool(self):
""" Returns the pool name in this """
return self.getPoolId()
def __getstate__(self):
""" Can be encoded with serializableEncoder """
return OrderedDict(urn=self._urn if hasattr(self, '_urn') else None)
[docs] def toString(self, level=0,
**kwds):
return self.__class__.__name__ + \
'(%s, scheme:%s, place:%s, pool:%s, type:%s, index:%d, poolpath: %s)' % (
self._urn,
self._scheme,
self._place,
self._poolname,
self._class, # .__name__,
self._index,
self._poolpath
)
string = toString
txt = toString
[docs]def parseUrn(urn, int_index=True, check_poolename=None):
"""
Checks the URN string is valid in its form and splits it.
A Product URN has several segment. For example if the urn is ``urn:mypool/v2:proj1.product:322``
* poolname, also called poolURN or poolID, optionally path-like: ``mypool/v2``,
* resource type (usually class) name ``proj1.product``,
* index number ``322``,
If urn is None or empty returns (None,None,None)
Parameter
---------
urn : str,list
One or a list of URN strings to be decomposed.
int_index : bool
If `True` (default) returns integer index, else string index.
check_poolename : str
Raise `ValueError` is any `urn` has a poolname different from the value of `check_poolename`.
Return
------
tuple
* If `urn` is `None` or a zero-length string, returns `(None, None, None)`.
* If `urn` is a non-zero-length string, returns a tuple of
:poolname: Name of the pool
:resourceclass: type of resource/products
:index: One or a list of (int) serial number of resourceclass in the pool.
* If `urn` is a list URNs and all of them have identical poolname and identical resourceclasses, returns a tuple `(poolname, resourceclass, list-of-index])`
* If `urn` is a list URNs and not all of them have identical poolname or not identical resourceclasses, returns a tuple `(list_of_poolname, list_of_resourceclass, list-of-index), ...)`
"""
@functools.lru_cache(maxsize=512)
def _parse(urn):
if not issubclass(urn.__class__, strset):
raise ValueError('a string is needed: ' + str(urn))
# is a urn str?
sp1 = urn.split(':')
if sp1[0].lower() != 'urn':
raise ValueError('Not a URN: ' + urn)
# this is a product URN
if len(sp1) != 4:
# must have 4 segments
raise ValueError(
'Bad URN. Must have 4 ":"-separators: ' + str(sp1))
index = int(sp1[3]) if int_index else sp1[3]
resourcetype = sp1[2]
poolname = sp1[1]
if check_poolename and check_poolename != poolname:
raise ValueError(
urn + ' is not from the pool ' + self._poolname)
if len(poolname) == 0:
poolname = None
if len(resourcetype) == 0:
resourcetype = None
return poolname, resourcetype, index
if urn is None or urn == '':
return (None, None, None)
if issubclass(urn.__class__, (list, tuple)):
urns = urn
alist = True
else:
urns = [urn]
alist = False
res, pnames, ptypes, inds = [], [], [], []
first = True
same_pname, same_type = True, True
for urn in urns:
poolname, resourcetype, index = _parse(urn)
if first:
first_pname, first_type = poolname, resourcetype
first = False
else:
if first_pname != poolname:
same_pname = False
if first_type != resourcetype:
same_type = False
pnames.append(poolname)
ptypes.append(resourcetype)
inds.append(index)
if alist:
if same_pname and same_type:
return (first_pname, first_type, inds)
# poolname and resourceclass not repeating
return (pnames, ptypes, inds)
else:
return (first_pname, first_type, inds[0])
[docs]def parse_poolurl(url, poolhint=None):
"""
Disassambles a pool URL.
A Pool URL is It is generated to desribe . For example:
input:
* url: to be decomposed.
* poolhint: A urn or a poolname (the first distinctive substring) needs to be given if the poolname has more than one level.
returns: poolpath, scheme, place, poolname.
returns (None, None,None,None) if url is None or empty.
About_poolURL
The ``PoolURL`` format is in the form of a URL that preceeds its poolname part:
<scheme>://<place><poolpath>/<poolname>
:<scheme>: Implementation protocol including ``file`` for :class:`LocalPool`, ``mem`` for :class:`MemPool`, ``http``, ``https`` for :class:`HttpclientPool`.
:<place>: IP:port such as``192.168.5.6:8080`` for ``http`` and ``https`` schemes, or an empty string for ``file`` and ``mem`` schemes.
:<poolname>: same as in URN.
:<poolpath>: The part between ``place`` and an optional ``poolhint``::
:<username>:
:<password>:
- For ``file`` or ``server`` schemes, e.g. poolpath is ``/c:/tmp`` in ``http://localhost:9000/c:/tmp/mypool/`` with ``poolhint`` keyword arguement of :func:`parse_poolurl` not given, or given as ``mypool`` (or ``myp`` or ``my`` ...).
- For ``http`` and ``https`` schemes, it is e.g. ``/0.6/tmp`` in ``https://10.0.0.114:5000/v0.6/tmp/mypool`` with ``poolhint`` keyword arguement not given, or given as ``mypool`` (or ``myp` or 'my' ...). The meaning of poolpath is subject to interpretation by the server. In the preceeding example the poolpath has an API version. :meth:`ProductPool.transformpath` is used to map it further. Note that trailing blank and ``/`` are ignored, and stripped in the output.
Examples:
- file:///tmp/mydata for pool ```mydata```
- file:///d:/data/test2--v2 for pool ``test2--v2``
- mem:///dummy for pool ``dummy``
- https://10.0.0.114:5000/v0.6/obs for a httpclientpool ``obs``
- server:///tmp/data/0.4/test for a pool ``test`` used on a server.
"""
if url is None or url == '':
return (None, None, None, None)
if not issubclass(url.__class__, strset):
raise ValueError('a string is needed: ' + str(url))
sp1 = url.split(':')
if len(sp1) > 4: # after scheme and a possible windows path, and one for user:pass
raise ValueError(
'a pool URN can have no more than 3 \':\'.')
pr = urlparse(url.strip())
scheme = pr.scheme # file
place = pr.netloc
# Note that trailing blank and ``/`` are ignored.
path = pr.path.strip().rstrip('/')
# convenient access path
# get the poolname
if poolhint:
ps = poolhint.split(':')
poolin = ps[1] if ps[0].lower() == 'urn' else ps[0]
pind = path.index(poolin)
poolname = path[pind:]
poolpath = path[:pind].rstrip('/')
else:
# the last level is assumed to be the poolname
sp = path.rsplit('/', 1)
poolname = sp[1]
poolpath = sp[0]
poolpath = place + poolpath if scheme in ('file') else poolpath
return poolpath, scheme, place, poolname, pr.username, pr.password
[docs]class UrnUtils():
[docs] @ staticmethod
def checkUrn(identifier):
""" Throw a ValueError if the identifier is not a legal URN."""
if not issubclass(identifier.__class__, str):
raise ValueError('Not a string: %s' % str(identifier))
return parseUrn(identifier)
[docs] @ staticmethod
def containsUrn(poolobj, urn):
""" Informs whether a URN belongs to the given pool. """
return poolobj.exists(urn)
[docs] @ staticmethod
def getClass(urn):
""" Get the class contained in a URN. """
pn, prod, sn = parseUrn(urn)
return Class_Look_Up[prod.rsplit('.', 1)[1]]
[docs] @ staticmethod
def getClassName(urn):
""" Get the class name contained in a URN. """
pn, prod, sn = parseUrn(urn)
return prod
[docs] @ staticmethod
def getLater(urn1, urn2):
""" Returns the later of two urns. """
pn1, prod1, sn1 = parseUrn(urn1)
pn2, prod2, sn2 = parseUrn(urn2)
return urn1 if sn1 > sn2 else urn2
[docs] @ staticmethod
def getPool(urn, pools):
""" Returns the pool corresponding to the pool id inside the given urn.
pools: ProductPool or subclass
"""
if issubclass(urn.__class__, Urn):
urn = urn.urn
pn, prod, sn = parseUrn(urn)
for p in pools:
if pn == p.getId():
return p
raise KeyError(pn + ' not found in pools')
[docs] @ staticmethod
def getPoolId(urn):
""" Returns the pool id part of the URN. """
pn, prod, sn = parseUrn(urn)
return pn
[docs] @ staticmethod
def getProductId(urn):
""" Returns the product id part of the URN, that is, the last token. """
pn, prod, sn = parseUrn(urn)
return sn
[docs] @ staticmethod
def isUrn(identifier):
""" Informs whether the given identifier corresponds to a URN. """
try:
UrnUtils.checkUrn(identifier)
except ValueError:
return False
return True