vkit.element.lexicon
1# Copyright 2022 vkit-x Administrator. All Rights Reserved. 2# 3# This project (vkit-x/vkit) is dual-licensed under commercial and SSPL licenses. 4# 5# The commercial license gives you the full rights to create and distribute software 6# on your own terms without any SSPL license obligations. For more information, 7# please see the "LICENSE_COMMERCIAL.txt" file. 8# 9# This project is also available under Server Side Public License (SSPL). 10# The SSPL licensing is ideal for use cases such as open source projects with 11# SSPL distribution, student/academic purposes, hobby projects, internal research 12# projects without external distribution, or other projects where all SSPL 13# obligations can be met. For more information, please see the "LICENSE_SSPL.txt" file. 14from typing import Mapping, Sequence, Optional, DefaultDict, List 15from collections import defaultdict 16import hashlib 17 18import attrs 19import cattrs 20import iolite as io 21 22from vkit.utility import attrs_lazy_field, unwrap_optional_field, dyn_structure, PathType 23 24 25@attrs.define(frozen=True) 26class Lexicon: 27 char: str 28 aliases: Sequence[str] = attrs.field(factory=tuple) 29 tags: Sequence[str] = attrs.field(factory=tuple) 30 meta: Optional[Mapping[str, str]] = None 31 32 def __attrs_post_init__(self): 33 object.__setattr__(self, "aliases", tuple(self.aliases)) 34 object.__setattr__(self, "tags", tuple(self.tags)) 35 36 @property 37 def char_and_aliases(self): 38 return [self.char, *self.aliases] 39 40 @property 41 def unicode_id(self): 42 return hex(ord(self.char)).upper()[2:] 43 44 45KEY_NO_TAG = '__no_tag' 46 47 48@attrs.define 49class LexiconCollection: 50 lexicons: Sequence[Lexicon] 51 52 _char_to_lexicon: Optional[Mapping[str, Lexicon]] = attrs_lazy_field() 53 _tag_to_lexicons: Optional[Mapping[str, Sequence[Lexicon]]] = attrs_lazy_field() 54 _tags: Optional[Sequence[str]] = attrs_lazy_field() 55 56 def lazy_post_init(self): 57 initialized = (self._char_to_lexicon is not None) 58 if initialized: 59 return 60 61 self._char_to_lexicon = {} 62 for lexicon in self.lexicons: 63 for char in lexicon.char_and_aliases: 64 assert char not in self._char_to_lexicon 65 self._char_to_lexicon[char] = lexicon 66 67 tag_to_lexicons: DefaultDict[str, List[Lexicon]] = defaultdict(list) 68 for lexicon in self.lexicons: 69 if lexicon.tags: 70 for tag in lexicon.tags: 71 tag_to_lexicons[tag].append(lexicon) 72 else: 73 tag_to_lexicons[KEY_NO_TAG].append(lexicon) 74 self._tag_to_lexicons = dict(tag_to_lexicons) 75 self._tags = sorted(self._tag_to_lexicons) 76 77 @property 78 def char_to_lexicon(self): 79 self.lazy_post_init() 80 return unwrap_optional_field(self._char_to_lexicon) 81 82 @property 83 def tag_to_lexicons(self): 84 self.lazy_post_init() 85 return unwrap_optional_field(self._tag_to_lexicons) 86 87 @property 88 def tags(self): 89 self.lazy_post_init() 90 return unwrap_optional_field(self._tags) 91 92 def has_char(self, char: str): 93 return char in self.char_to_lexicon 94 95 def get_lexicon(self, char: str): 96 return self.char_to_lexicon[char] 97 98 @classmethod 99 def from_file(cls, path: PathType): 100 lexicons = dyn_structure(path, Sequence[Lexicon], force_path_type=True) 101 return cls(lexicons=lexicons) 102 103 def to_file(self, path: PathType): 104 io.write_json( 105 path, 106 cattrs.unstructure(self.lexicons), 107 indent=2, 108 ensure_ascii=False, 109 ) 110 111 def get_hash(self): 112 sha256_algo = hashlib.sha256() 113 for lexicon in self.lexicons: 114 sha256_algo.update(lexicon.char.encode()) 115 for alias in lexicon.aliases: 116 sha256_algo.update(alias.encode()) 117 return sha256_algo.hexdigest()
class
Lexicon:
27class Lexicon: 28 char: str 29 aliases: Sequence[str] = attrs.field(factory=tuple) 30 tags: Sequence[str] = attrs.field(factory=tuple) 31 meta: Optional[Mapping[str, str]] = None 32 33 def __attrs_post_init__(self): 34 object.__setattr__(self, "aliases", tuple(self.aliases)) 35 object.__setattr__(self, "tags", tuple(self.tags)) 36 37 @property 38 def char_and_aliases(self): 39 return [self.char, *self.aliases] 40 41 @property 42 def unicode_id(self): 43 return hex(ord(self.char)).upper()[2:]
Lexicon( char: str, aliases: Sequence[str] = NOTHING, tags: Sequence[str] = NOTHING, meta: Union[Mapping[str, str], NoneType] = None)
2def __init__(self, char, aliases=NOTHING, tags=NOTHING, meta=attr_dict['meta'].default): 3 _setattr = _cached_setattr_get(self) 4 _setattr('char', char) 5 if aliases is not NOTHING: 6 _setattr('aliases', aliases) 7 else: 8 _setattr('aliases', __attr_factory_aliases()) 9 if tags is not NOTHING: 10 _setattr('tags', tags) 11 else: 12 _setattr('tags', __attr_factory_tags()) 13 _setattr('meta', meta) 14 self.__attrs_post_init__()
Method generated by attrs for class Lexicon.
class
LexiconCollection:
50class LexiconCollection: 51 lexicons: Sequence[Lexicon] 52 53 _char_to_lexicon: Optional[Mapping[str, Lexicon]] = attrs_lazy_field() 54 _tag_to_lexicons: Optional[Mapping[str, Sequence[Lexicon]]] = attrs_lazy_field() 55 _tags: Optional[Sequence[str]] = attrs_lazy_field() 56 57 def lazy_post_init(self): 58 initialized = (self._char_to_lexicon is not None) 59 if initialized: 60 return 61 62 self._char_to_lexicon = {} 63 for lexicon in self.lexicons: 64 for char in lexicon.char_and_aliases: 65 assert char not in self._char_to_lexicon 66 self._char_to_lexicon[char] = lexicon 67 68 tag_to_lexicons: DefaultDict[str, List[Lexicon]] = defaultdict(list) 69 for lexicon in self.lexicons: 70 if lexicon.tags: 71 for tag in lexicon.tags: 72 tag_to_lexicons[tag].append(lexicon) 73 else: 74 tag_to_lexicons[KEY_NO_TAG].append(lexicon) 75 self._tag_to_lexicons = dict(tag_to_lexicons) 76 self._tags = sorted(self._tag_to_lexicons) 77 78 @property 79 def char_to_lexicon(self): 80 self.lazy_post_init() 81 return unwrap_optional_field(self._char_to_lexicon) 82 83 @property 84 def tag_to_lexicons(self): 85 self.lazy_post_init() 86 return unwrap_optional_field(self._tag_to_lexicons) 87 88 @property 89 def tags(self): 90 self.lazy_post_init() 91 return unwrap_optional_field(self._tags) 92 93 def has_char(self, char: str): 94 return char in self.char_to_lexicon 95 96 def get_lexicon(self, char: str): 97 return self.char_to_lexicon[char] 98 99 @classmethod 100 def from_file(cls, path: PathType): 101 lexicons = dyn_structure(path, Sequence[Lexicon], force_path_type=True) 102 return cls(lexicons=lexicons) 103 104 def to_file(self, path: PathType): 105 io.write_json( 106 path, 107 cattrs.unstructure(self.lexicons), 108 indent=2, 109 ensure_ascii=False, 110 ) 111 112 def get_hash(self): 113 sha256_algo = hashlib.sha256() 114 for lexicon in self.lexicons: 115 sha256_algo.update(lexicon.char.encode()) 116 for alias in lexicon.aliases: 117 sha256_algo.update(alias.encode()) 118 return sha256_algo.hexdigest()
LexiconCollection(lexicons: Sequence[vkit.element.lexicon.Lexicon])
2def __init__(self, lexicons): 3 self.lexicons = lexicons 4 self._char_to_lexicon = attr_dict['_char_to_lexicon'].default 5 self._tag_to_lexicons = attr_dict['_tag_to_lexicons'].default 6 self._tags = attr_dict['_tags'].default
Method generated by attrs for class LexiconCollection.
def
lazy_post_init(self):
57 def lazy_post_init(self): 58 initialized = (self._char_to_lexicon is not None) 59 if initialized: 60 return 61 62 self._char_to_lexicon = {} 63 for lexicon in self.lexicons: 64 for char in lexicon.char_and_aliases: 65 assert char not in self._char_to_lexicon 66 self._char_to_lexicon[char] = lexicon 67 68 tag_to_lexicons: DefaultDict[str, List[Lexicon]] = defaultdict(list) 69 for lexicon in self.lexicons: 70 if lexicon.tags: 71 for tag in lexicon.tags: 72 tag_to_lexicons[tag].append(lexicon) 73 else: 74 tag_to_lexicons[KEY_NO_TAG].append(lexicon) 75 self._tag_to_lexicons = dict(tag_to_lexicons) 76 self._tags = sorted(self._tag_to_lexicons)