vkit.engine.char_sampler.datetime
1# Copyright 2022 vkit-x Administrator. All Rights Reserved. 2# 3# This project (vkit-x/vkit) is dual-licensed under commercial and SSPL licenses. 4# 5# The commercial license gives you the full rights to create and distribute software 6# on your own terms without any SSPL license obligations. For more information, 7# please see the "LICENSE_COMMERCIAL.txt" file. 8# 9# This project is also available under Server Side Public License (SSPL). 10# The SSPL licensing is ideal for use cases such as open source projects with 11# SSPL distribution, student/academic purposes, hobby projects, internal research 12# projects without external distribution, or other projects where all SSPL 13# obligations can be met. For more information, please see the "LICENSE_SSPL.txt" file. 14from typing import Sequence, Tuple, List, Optional 15from datetime import date, datetime 16import time 17 18import attrs 19from numpy.random import Generator as RandomGenerator 20import pytz 21 22from vkit.utility import rng_choice 23from vkit.engine.interface import Engine, EngineExecutorFactory 24from .type import CharSamplerEngineInitResource, CharSamplerEngineRunConfig 25 26 27@attrs.define 28class CharSamplerDatetimeEngineInitConfig: 29 datetime_formats: Sequence[str] 30 timezones: Sequence[str] 31 datetime_begin: Tuple[int, int, int] = (1991, 12, 25) 32 datetime_end: Tuple[int, int, int] = (2050, 12, 31) 33 34 35CharSamplerDatetimeEngineInitResource = CharSamplerEngineInitResource 36 37 38class CharSamplerDatetimeEngine( 39 Engine[ 40 CharSamplerDatetimeEngineInitConfig, 41 CharSamplerDatetimeEngineInitResource, 42 CharSamplerEngineRunConfig, 43 Sequence[str], 44 ] 45): # yapf: disable 46 47 @classmethod 48 def get_type_name(cls) -> str: 49 return 'datetime' 50 51 def __init__( 52 self, 53 init_config: CharSamplerDatetimeEngineInitConfig, 54 init_resource: Optional[CharSamplerDatetimeEngineInitResource] = None, 55 ): 56 super().__init__(init_config, init_resource) 57 58 assert init_resource 59 self.lexicon_collection = init_resource.lexicon_collection 60 self.delimiters = [ 61 char for char in ['/', ':', '-', ',', '.', '*'] 62 if self.lexicon_collection.has_char(char) 63 ] 64 self.ticks_begin = int(time.mktime(date(*init_config.datetime_begin).timetuple())) 65 self.ticks_end = int(time.mktime(date(*init_config.datetime_end).timetuple())) 66 67 def sample_datetime_text(self, rng: RandomGenerator): 68 # Datetime. 69 ticks = rng.integers(self.ticks_begin, self.ticks_end + 1) 70 # I don't know why, but it works. 71 dt = datetime.fromtimestamp(ticks) 72 tz = pytz.timezone(rng_choice(rng, self.init_config.timezones)) 73 dt = tz.localize(dt) 74 75 # Datetime format. 76 datetime_format = rng_choice(rng, self.init_config.datetime_formats) 77 delimiters = [delimiter for delimiter in self.delimiters if delimiter in datetime_format] 78 if delimiters: 79 selected_delimiter = rng_choice(rng, delimiters) 80 other_delimiters = [ 81 delimiter for delimiter in self.delimiters if delimiter != selected_delimiter 82 ] 83 other_delimiters.append(' ') 84 repl_delimiter = rng_choice(rng, other_delimiters) 85 datetime_format = datetime_format.replace(selected_delimiter, repl_delimiter) 86 87 # To text. 88 text = dt.strftime(datetime_format) 89 return ''.join( 90 char for char in text if char.isspace() or self.lexicon_collection.has_char(char) 91 ).strip() 92 93 def run(self, run_config: CharSamplerEngineRunConfig, rng: RandomGenerator) -> Sequence[str]: 94 if not run_config.enable_aggregator_mode: 95 num_chars = run_config.num_chars 96 97 texts: List[str] = [] 98 num_chars_in_texts = 0 99 while num_chars_in_texts + len(texts) - 1 < num_chars: 100 text = self.sample_datetime_text(rng) 101 texts.append(text) 102 num_chars_in_texts += len(text) 103 104 chars = list(' '.join(texts)) 105 106 # Trim and make sure the last char is not space. 107 if len(chars) > num_chars: 108 rest = chars[num_chars:] 109 chars = chars[:num_chars] 110 if chars[-1].isspace(): 111 chars.pop() 112 assert not rest[0].isspace() 113 chars.append(rest[0]) 114 115 return chars 116 117 else: 118 return self.sample_datetime_text(rng) 119 120 121char_sampler_datetime_engine_executor_factory = EngineExecutorFactory(CharSamplerDatetimeEngine)
class
CharSamplerDatetimeEngineInitConfig:
29class CharSamplerDatetimeEngineInitConfig: 30 datetime_formats: Sequence[str] 31 timezones: Sequence[str] 32 datetime_begin: Tuple[int, int, int] = (1991, 12, 25) 33 datetime_end: Tuple[int, int, int] = (2050, 12, 31)
CharSamplerDatetimeEngineInitConfig( datetime_formats: Sequence[str], timezones: Sequence[str], datetime_begin: Tuple[int, int, int] = (1991, 12, 25), datetime_end: Tuple[int, int, int] = (2050, 12, 31))
2def __init__(self, datetime_formats, timezones, datetime_begin=attr_dict['datetime_begin'].default, datetime_end=attr_dict['datetime_end'].default): 3 self.datetime_formats = datetime_formats 4 self.timezones = timezones 5 self.datetime_begin = datetime_begin 6 self.datetime_end = datetime_end
Method generated by attrs for class CharSamplerDatetimeEngineInitConfig.
class
CharSamplerDatetimeEngine(vkit.engine.interface.Engine[vkit.engine.char_sampler.datetime.CharSamplerDatetimeEngineInitConfig, vkit.engine.char_sampler.type.CharSamplerEngineInitResource, vkit.engine.char_sampler.type.CharSamplerEngineRunConfig, typing.Sequence[str]]):
39class CharSamplerDatetimeEngine( 40 Engine[ 41 CharSamplerDatetimeEngineInitConfig, 42 CharSamplerDatetimeEngineInitResource, 43 CharSamplerEngineRunConfig, 44 Sequence[str], 45 ] 46): # yapf: disable 47 48 @classmethod 49 def get_type_name(cls) -> str: 50 return 'datetime' 51 52 def __init__( 53 self, 54 init_config: CharSamplerDatetimeEngineInitConfig, 55 init_resource: Optional[CharSamplerDatetimeEngineInitResource] = None, 56 ): 57 super().__init__(init_config, init_resource) 58 59 assert init_resource 60 self.lexicon_collection = init_resource.lexicon_collection 61 self.delimiters = [ 62 char for char in ['/', ':', '-', ',', '.', '*'] 63 if self.lexicon_collection.has_char(char) 64 ] 65 self.ticks_begin = int(time.mktime(date(*init_config.datetime_begin).timetuple())) 66 self.ticks_end = int(time.mktime(date(*init_config.datetime_end).timetuple())) 67 68 def sample_datetime_text(self, rng: RandomGenerator): 69 # Datetime. 70 ticks = rng.integers(self.ticks_begin, self.ticks_end + 1) 71 # I don't know why, but it works. 72 dt = datetime.fromtimestamp(ticks) 73 tz = pytz.timezone(rng_choice(rng, self.init_config.timezones)) 74 dt = tz.localize(dt) 75 76 # Datetime format. 77 datetime_format = rng_choice(rng, self.init_config.datetime_formats) 78 delimiters = [delimiter for delimiter in self.delimiters if delimiter in datetime_format] 79 if delimiters: 80 selected_delimiter = rng_choice(rng, delimiters) 81 other_delimiters = [ 82 delimiter for delimiter in self.delimiters if delimiter != selected_delimiter 83 ] 84 other_delimiters.append(' ') 85 repl_delimiter = rng_choice(rng, other_delimiters) 86 datetime_format = datetime_format.replace(selected_delimiter, repl_delimiter) 87 88 # To text. 89 text = dt.strftime(datetime_format) 90 return ''.join( 91 char for char in text if char.isspace() or self.lexicon_collection.has_char(char) 92 ).strip() 93 94 def run(self, run_config: CharSamplerEngineRunConfig, rng: RandomGenerator) -> Sequence[str]: 95 if not run_config.enable_aggregator_mode: 96 num_chars = run_config.num_chars 97 98 texts: List[str] = [] 99 num_chars_in_texts = 0 100 while num_chars_in_texts + len(texts) - 1 < num_chars: 101 text = self.sample_datetime_text(rng) 102 texts.append(text) 103 num_chars_in_texts += len(text) 104 105 chars = list(' '.join(texts)) 106 107 # Trim and make sure the last char is not space. 108 if len(chars) > num_chars: 109 rest = chars[num_chars:] 110 chars = chars[:num_chars] 111 if chars[-1].isspace(): 112 chars.pop() 113 assert not rest[0].isspace() 114 chars.append(rest[0]) 115 116 return chars 117 118 else: 119 return self.sample_datetime_text(rng)
Abstract base class for generic types.
A generic type is typically declared by inheriting from this class parameterized with one or more type variables. For example, a generic mapping type might be defined as::
class Mapping(Generic[KT, VT]): def __getitem__(self, key: KT) -> VT: ... # Etc.
This class can then be used as follows::
def lookup_name(mapping: Mapping[KT, VT], key: KT, default: VT) -> VT: try: return mapping[key] except KeyError: return default
CharSamplerDatetimeEngine( init_config: vkit.engine.char_sampler.datetime.CharSamplerDatetimeEngineInitConfig, init_resource: Union[vkit.engine.char_sampler.type.CharSamplerEngineInitResource, NoneType] = None)
52 def __init__( 53 self, 54 init_config: CharSamplerDatetimeEngineInitConfig, 55 init_resource: Optional[CharSamplerDatetimeEngineInitResource] = None, 56 ): 57 super().__init__(init_config, init_resource) 58 59 assert init_resource 60 self.lexicon_collection = init_resource.lexicon_collection 61 self.delimiters = [ 62 char for char in ['/', ':', '-', ',', '.', '*'] 63 if self.lexicon_collection.has_char(char) 64 ] 65 self.ticks_begin = int(time.mktime(date(*init_config.datetime_begin).timetuple())) 66 self.ticks_end = int(time.mktime(date(*init_config.datetime_end).timetuple()))
def
sample_datetime_text(self, rng: numpy.random._generator.Generator):
68 def sample_datetime_text(self, rng: RandomGenerator): 69 # Datetime. 70 ticks = rng.integers(self.ticks_begin, self.ticks_end + 1) 71 # I don't know why, but it works. 72 dt = datetime.fromtimestamp(ticks) 73 tz = pytz.timezone(rng_choice(rng, self.init_config.timezones)) 74 dt = tz.localize(dt) 75 76 # Datetime format. 77 datetime_format = rng_choice(rng, self.init_config.datetime_formats) 78 delimiters = [delimiter for delimiter in self.delimiters if delimiter in datetime_format] 79 if delimiters: 80 selected_delimiter = rng_choice(rng, delimiters) 81 other_delimiters = [ 82 delimiter for delimiter in self.delimiters if delimiter != selected_delimiter 83 ] 84 other_delimiters.append(' ') 85 repl_delimiter = rng_choice(rng, other_delimiters) 86 datetime_format = datetime_format.replace(selected_delimiter, repl_delimiter) 87 88 # To text. 89 text = dt.strftime(datetime_format) 90 return ''.join( 91 char for char in text if char.isspace() or self.lexicon_collection.has_char(char) 92 ).strip()
def
run( self, run_config: vkit.engine.char_sampler.type.CharSamplerEngineRunConfig, rng: numpy.random._generator.Generator) -> Sequence[str]:
94 def run(self, run_config: CharSamplerEngineRunConfig, rng: RandomGenerator) -> Sequence[str]: 95 if not run_config.enable_aggregator_mode: 96 num_chars = run_config.num_chars 97 98 texts: List[str] = [] 99 num_chars_in_texts = 0 100 while num_chars_in_texts + len(texts) - 1 < num_chars: 101 text = self.sample_datetime_text(rng) 102 texts.append(text) 103 num_chars_in_texts += len(text) 104 105 chars = list(' '.join(texts)) 106 107 # Trim and make sure the last char is not space. 108 if len(chars) > num_chars: 109 rest = chars[num_chars:] 110 chars = chars[:num_chars] 111 if chars[-1].isspace(): 112 chars.pop() 113 assert not rest[0].isspace() 114 chars.append(rest[0]) 115 116 return chars 117 118 else: 119 return self.sample_datetime_text(rng)