woc.local

This type stub file was generated by cyright.

def fnvhash(data: bytes) -> bytes:

Returns the 32 bit FNV-1a hash value for the given data.

>>> hex(fnvhash('foo'))
'0xa9f37ed7'
def unber(buf: bytes) -> List[int]:

Perl BER unpacking. BER is a way to pack several variable-length ints into one binary string. Here we do the reverse. Format definition: from http://perldoc.perl.org/functions/pack.html (see "w" template description)

Parameters
  • buf: a binary string with packed values
Returns

a list of unpacked values

>>> unber(b'\x00\x83M')
[0, 461]
>>> unber(b'\x83M\x96\x14')
[461, 2836]
>>> unber(b'\x99a\x89\x12')
[3297, 1170]
def lzf_length(raw_data: bytes) -> Tuple[int, int]:

Get length of uncompressed data from a header of Compress::LZF output.

Check Compress::LZF sources for the definition of this bit magic: (namely, LZF.xs, decompress_sv) https://metacpan.org/source/MLEHMANN/Compress-LZF-3.8/LZF.xs

Parameters
  • **raw_data: data compressed with Perl Compress:**: LZF
Returns

(header_size, uncompressed_content_length) in bytes

>>> lzf_length(b'\xc4\x9b')
(2, 283)
>>> lzf_length(b'\xc3\xa4')
(2, 228)
>>> lzf_length(b'\xc3\x8a')
(2, 202)
>>> lzf_length(b'\xca\x87')
(2, 647)
>>> lzf_length(b'\xe1\xaf\xa9')
(3, 7145)
>>> lzf_length(b'\xe0\xa7\x9c')
(3, 2524)
def decomp(raw_data: bytes) -> bytes:

lzf wrapper to handle perl tweaks in Compress::LZF

This function extracts uncompressed size header and then does usual lzf decompression.

Parameters
  • **raw_data: data compressed with Perl Compress:**: LZF
Returns

unpacked data

def decomp_or_raw(raw_data: bytes) -> bytes:

Try to decompress raw_data, return raw_data if it fails

def slice20(raw_data: bytes) -> Tuple[bytes, ...]:

Slice raw_data into 20-byte chunks and hex encode each of them It returns tuple in order to be cacheable

def decode_str(raw_data: bytes, encoding: str = 'utf-8') -> str:

Aggressively decode raw_data, return empty string if it fails

def get_tch(path: str) -> woc.tch.TCHashDB:

Cache TCHashDB objects

def get_shard(key: bytes, sharding_bits: int, use_fnv_keys: bool) -> int:

Get shard id

def decode_value( value: bytes, out_dtype: str) -> 'List[str] | Tuple[str, str, str] | List[Tuple[str, str, str]] | Tuple[str, Any]':

Decode values from tch maps.

def decode_tree(value: bytes) -> List[Tuple[str, str, str]]:

Decode a tree binary object into tuples.

Python: 4.77 µs, Cython: 280 ns Reference: https://stackoverflow.com/questions/14790681/

>>> decode_tree(b'100644 .gitignore\x00\x8e\x9e\x1f...')
[('100644', '.gitignore', '8e9e1...'), ...]
def decode_commit( commit_bin: bytes) -> Tuple[str, Tuple[str, str, str], Tuple[str, str, str], str]:

Decode git commit objects into tuples.

Python: 2.35 µs, Cython: 855 ns Reference: https://git-scm.com/book/en/v2/Git-Internals-Git-Objects

>>> decode_commit(b'tree f1b66dcca490b5c4455af319bc961a34f69c72c2\n...')
('f1b66dcca490b5c4455af319bc961a34f69c72c2',
 ('c19ff598808b181f1ab2383ff0214520cb3ec659',),
 ('Audris Mockus <audris@utk.edu> 1410029988', '1410029988', '-0400'),
 ('Audris Mockus <audris@utk.edu>', '1410029988', '-0400'),
 'News for Sep 5, 2014\n')
def decode_tag(tag: bytes):

Decode git tag objects into tuples.

decode_tag(b'object fcadcb9366d4a011039e384affa10961e99cf2c4 type commit tag eccube-2.11.1 tagger nanasess 1303788649 +0000

Added tags/eccube-2.11.1 ') ('fcadcb9366d4a011039e384affa10961e99cf2c4', 'commit', 'eccube-2.11.1', 'nanasess ' , '1303788649', '+0000')

def read_large_random_access( path: str, dtype: str, offset: int = Ellipsis, length: int = Ellipsis) -> Tuple[bytes, Union[int, NoneType]]:

Read a .large. and return its content.

Parameters
  • path: path to the file
  • dtype: data type
  • offset: offset to start reading. It is either 0 or after the last separator.
  • length: length to read. It should be longer than the longest record.
Returns

a tuple of bytes and the next offset, None if EOF. Returned bytes must not begin or end with a separator.

class WocMapsLocal(woc.base.WocMapsBase):
WocMapsLocal( profile_path: Union[str, Iterable[str], NoneType] = Ellipsis, version: Union[str, Iterable[str], NoneType] = Ellipsis, on_large: Literal['ignore', 'head', 'all'] = Ellipsis, on_bad: Literal['allow', 'error'] = Ellipsis)
def get_values(self, map_name: str, key: Union[bytes, str]):

Eqivalent to getValues in WoC Perl API.

>>> self.get_values('P2c', 'user2589_minicms')
['05cf84081b63cda822ee407e688269b494a642de', ...]
def iter_values(self, map_name: str, key: Union[bytes, str]):

Eqivalent to getValues in WoC Perl API.

>>> self.get_values('P2c', 'user2589_minicms')
['05cf84081b63cda822ee407e688269b494a642de', ...]
def show_content(self, obj_name: str, key: Union[bytes, str]):

Eqivalent to showCnt in WoC perl API

>>> self.show_content('tree', '7a374e58c5b9dec5f7508391246c48b73c40d200')
[('100644', '.gitignore', '8e9e1...'), ...]
def count(self, map_name) -> int:

Count the number of keys in a map (# of larges + # of tch keys)

def all_keys(self, map_name: str) -> Generator[bytes, NoneType, NoneType]:

Iterate over all keys in a map.

>>> for key in self.iter_map('P2c'):
...     print(key)  # hash or encoded string
Inherited Members
woc.base.WocMapsBase
maps
objects