vkit.engine.char_sampler.datetime

  1# Copyright 2022 vkit-x Administrator. All Rights Reserved.
  2#
  3# This project (vkit-x/vkit) is dual-licensed under commercial and SSPL licenses.
  4#
  5# The commercial license gives you the full rights to create and distribute software
  6# on your own terms without any SSPL license obligations. For more information,
  7# please see the "LICENSE_COMMERCIAL.txt" file.
  8#
  9# This project is also available under Server Side Public License (SSPL).
 10# The SSPL licensing is ideal for use cases such as open source projects with
 11# SSPL distribution, student/academic purposes, hobby projects, internal research
 12# projects without external distribution, or other projects where all SSPL
 13# obligations can be met. For more information, please see the "LICENSE_SSPL.txt" file.
 14from typing import Sequence, Tuple, List, Optional
 15from datetime import date, datetime
 16import time
 17
 18import attrs
 19from numpy.random import Generator as RandomGenerator
 20import pytz
 21
 22from vkit.utility import rng_choice
 23from vkit.engine.interface import Engine, EngineExecutorFactory
 24from .type import CharSamplerEngineInitResource, CharSamplerEngineRunConfig
 25
 26
 27@attrs.define
 28class CharSamplerDatetimeEngineInitConfig:
 29    datetime_formats: Sequence[str]
 30    timezones: Sequence[str]
 31    datetime_begin: Tuple[int, int, int] = (1991, 12, 25)
 32    datetime_end: Tuple[int, int, int] = (2050, 12, 31)
 33
 34
 35CharSamplerDatetimeEngineInitResource = CharSamplerEngineInitResource
 36
 37
 38class CharSamplerDatetimeEngine(
 39    Engine[
 40        CharSamplerDatetimeEngineInitConfig,
 41        CharSamplerDatetimeEngineInitResource,
 42        CharSamplerEngineRunConfig,
 43        Sequence[str],
 44    ]
 45):  # yapf: disable
 46
 47    @classmethod
 48    def get_type_name(cls) -> str:
 49        return 'datetime'
 50
 51    def __init__(
 52        self,
 53        init_config: CharSamplerDatetimeEngineInitConfig,
 54        init_resource: Optional[CharSamplerDatetimeEngineInitResource] = None,
 55    ):
 56        super().__init__(init_config, init_resource)
 57
 58        assert init_resource
 59        self.lexicon_collection = init_resource.lexicon_collection
 60        self.delimiters = [
 61            char for char in ['/', ':', '-', ',', '.', '*']
 62            if self.lexicon_collection.has_char(char)
 63        ]
 64        self.ticks_begin = int(time.mktime(date(*init_config.datetime_begin).timetuple()))
 65        self.ticks_end = int(time.mktime(date(*init_config.datetime_end).timetuple()))
 66
 67    def sample_datetime_text(self, rng: RandomGenerator):
 68        # Datetime.
 69        ticks = rng.integers(self.ticks_begin, self.ticks_end + 1)
 70        # I don't know why, but it works.
 71        dt = datetime.fromtimestamp(ticks)
 72        tz = pytz.timezone(rng_choice(rng, self.init_config.timezones))
 73        dt = tz.localize(dt)
 74
 75        # Datetime format.
 76        datetime_format = rng_choice(rng, self.init_config.datetime_formats)
 77        delimiters = [delimiter for delimiter in self.delimiters if delimiter in datetime_format]
 78        if delimiters:
 79            selected_delimiter = rng_choice(rng, delimiters)
 80            other_delimiters = [
 81                delimiter for delimiter in self.delimiters if delimiter != selected_delimiter
 82            ]
 83            other_delimiters.append(' ')
 84            repl_delimiter = rng_choice(rng, other_delimiters)
 85            datetime_format = datetime_format.replace(selected_delimiter, repl_delimiter)
 86
 87        # To text.
 88        text = dt.strftime(datetime_format)
 89        return ''.join(
 90            char for char in text if char.isspace() or self.lexicon_collection.has_char(char)
 91        ).strip()
 92
 93    def run(self, run_config: CharSamplerEngineRunConfig, rng: RandomGenerator) -> Sequence[str]:
 94        if not run_config.enable_aggregator_mode:
 95            num_chars = run_config.num_chars
 96
 97            texts: List[str] = []
 98            num_chars_in_texts = 0
 99            while num_chars_in_texts + len(texts) - 1 < num_chars:
100                text = self.sample_datetime_text(rng)
101                texts.append(text)
102                num_chars_in_texts += len(text)
103
104            chars = list(' '.join(texts))
105
106            # Trim and make sure the last char is not space.
107            if len(chars) > num_chars:
108                rest = chars[num_chars:]
109                chars = chars[:num_chars]
110                if chars[-1].isspace():
111                    chars.pop()
112                    assert not rest[0].isspace()
113                    chars.append(rest[0])
114
115            return chars
116
117        else:
118            return self.sample_datetime_text(rng)
119
120
121char_sampler_datetime_engine_executor_factory = EngineExecutorFactory(CharSamplerDatetimeEngine)
class CharSamplerDatetimeEngineInitConfig:
29class CharSamplerDatetimeEngineInitConfig:
30    datetime_formats: Sequence[str]
31    timezones: Sequence[str]
32    datetime_begin: Tuple[int, int, int] = (1991, 12, 25)
33    datetime_end: Tuple[int, int, int] = (2050, 12, 31)
CharSamplerDatetimeEngineInitConfig( datetime_formats: Sequence[str], timezones: Sequence[str], datetime_begin: Tuple[int, int, int] = (1991, 12, 25), datetime_end: Tuple[int, int, int] = (2050, 12, 31))
2def __init__(self, datetime_formats, timezones, datetime_begin=attr_dict['datetime_begin'].default, datetime_end=attr_dict['datetime_end'].default):
3    self.datetime_formats = datetime_formats
4    self.timezones = timezones
5    self.datetime_begin = datetime_begin
6    self.datetime_end = datetime_end

Method generated by attrs for class CharSamplerDatetimeEngineInitConfig.

 39class CharSamplerDatetimeEngine(
 40    Engine[
 41        CharSamplerDatetimeEngineInitConfig,
 42        CharSamplerDatetimeEngineInitResource,
 43        CharSamplerEngineRunConfig,
 44        Sequence[str],
 45    ]
 46):  # yapf: disable
 47
 48    @classmethod
 49    def get_type_name(cls) -> str:
 50        return 'datetime'
 51
 52    def __init__(
 53        self,
 54        init_config: CharSamplerDatetimeEngineInitConfig,
 55        init_resource: Optional[CharSamplerDatetimeEngineInitResource] = None,
 56    ):
 57        super().__init__(init_config, init_resource)
 58
 59        assert init_resource
 60        self.lexicon_collection = init_resource.lexicon_collection
 61        self.delimiters = [
 62            char for char in ['/', ':', '-', ',', '.', '*']
 63            if self.lexicon_collection.has_char(char)
 64        ]
 65        self.ticks_begin = int(time.mktime(date(*init_config.datetime_begin).timetuple()))
 66        self.ticks_end = int(time.mktime(date(*init_config.datetime_end).timetuple()))
 67
 68    def sample_datetime_text(self, rng: RandomGenerator):
 69        # Datetime.
 70        ticks = rng.integers(self.ticks_begin, self.ticks_end + 1)
 71        # I don't know why, but it works.
 72        dt = datetime.fromtimestamp(ticks)
 73        tz = pytz.timezone(rng_choice(rng, self.init_config.timezones))
 74        dt = tz.localize(dt)
 75
 76        # Datetime format.
 77        datetime_format = rng_choice(rng, self.init_config.datetime_formats)
 78        delimiters = [delimiter for delimiter in self.delimiters if delimiter in datetime_format]
 79        if delimiters:
 80            selected_delimiter = rng_choice(rng, delimiters)
 81            other_delimiters = [
 82                delimiter for delimiter in self.delimiters if delimiter != selected_delimiter
 83            ]
 84            other_delimiters.append(' ')
 85            repl_delimiter = rng_choice(rng, other_delimiters)
 86            datetime_format = datetime_format.replace(selected_delimiter, repl_delimiter)
 87
 88        # To text.
 89        text = dt.strftime(datetime_format)
 90        return ''.join(
 91            char for char in text if char.isspace() or self.lexicon_collection.has_char(char)
 92        ).strip()
 93
 94    def run(self, run_config: CharSamplerEngineRunConfig, rng: RandomGenerator) -> Sequence[str]:
 95        if not run_config.enable_aggregator_mode:
 96            num_chars = run_config.num_chars
 97
 98            texts: List[str] = []
 99            num_chars_in_texts = 0
100            while num_chars_in_texts + len(texts) - 1 < num_chars:
101                text = self.sample_datetime_text(rng)
102                texts.append(text)
103                num_chars_in_texts += len(text)
104
105            chars = list(' '.join(texts))
106
107            # Trim and make sure the last char is not space.
108            if len(chars) > num_chars:
109                rest = chars[num_chars:]
110                chars = chars[:num_chars]
111                if chars[-1].isspace():
112                    chars.pop()
113                    assert not rest[0].isspace()
114                    chars.append(rest[0])
115
116            return chars
117
118        else:
119            return self.sample_datetime_text(rng)

Abstract base class for generic types.

A generic type is typically declared by inheriting from this class parameterized with one or more type variables. For example, a generic mapping type might be defined as::

class Mapping(Generic[KT, VT]): def __getitem__(self, key: KT) -> VT: ... # Etc.

This class can then be used as follows::

def lookup_name(mapping: Mapping[KT, VT], key: KT, default: VT) -> VT: try: return mapping[key] except KeyError: return default

CharSamplerDatetimeEngine( init_config: vkit.engine.char_sampler.datetime.CharSamplerDatetimeEngineInitConfig, init_resource: Union[vkit.engine.char_sampler.type.CharSamplerEngineInitResource, NoneType] = None)
52    def __init__(
53        self,
54        init_config: CharSamplerDatetimeEngineInitConfig,
55        init_resource: Optional[CharSamplerDatetimeEngineInitResource] = None,
56    ):
57        super().__init__(init_config, init_resource)
58
59        assert init_resource
60        self.lexicon_collection = init_resource.lexicon_collection
61        self.delimiters = [
62            char for char in ['/', ':', '-', ',', '.', '*']
63            if self.lexicon_collection.has_char(char)
64        ]
65        self.ticks_begin = int(time.mktime(date(*init_config.datetime_begin).timetuple()))
66        self.ticks_end = int(time.mktime(date(*init_config.datetime_end).timetuple()))
@classmethod
def get_type_name(cls) -> str:
48    @classmethod
49    def get_type_name(cls) -> str:
50        return 'datetime'
def sample_datetime_text(self, rng: numpy.random._generator.Generator):
68    def sample_datetime_text(self, rng: RandomGenerator):
69        # Datetime.
70        ticks = rng.integers(self.ticks_begin, self.ticks_end + 1)
71        # I don't know why, but it works.
72        dt = datetime.fromtimestamp(ticks)
73        tz = pytz.timezone(rng_choice(rng, self.init_config.timezones))
74        dt = tz.localize(dt)
75
76        # Datetime format.
77        datetime_format = rng_choice(rng, self.init_config.datetime_formats)
78        delimiters = [delimiter for delimiter in self.delimiters if delimiter in datetime_format]
79        if delimiters:
80            selected_delimiter = rng_choice(rng, delimiters)
81            other_delimiters = [
82                delimiter for delimiter in self.delimiters if delimiter != selected_delimiter
83            ]
84            other_delimiters.append(' ')
85            repl_delimiter = rng_choice(rng, other_delimiters)
86            datetime_format = datetime_format.replace(selected_delimiter, repl_delimiter)
87
88        # To text.
89        text = dt.strftime(datetime_format)
90        return ''.join(
91            char for char in text if char.isspace() or self.lexicon_collection.has_char(char)
92        ).strip()
def run( self, run_config: vkit.engine.char_sampler.type.CharSamplerEngineRunConfig, rng: numpy.random._generator.Generator) -> Sequence[str]:
 94    def run(self, run_config: CharSamplerEngineRunConfig, rng: RandomGenerator) -> Sequence[str]:
 95        if not run_config.enable_aggregator_mode:
 96            num_chars = run_config.num_chars
 97
 98            texts: List[str] = []
 99            num_chars_in_texts = 0
100            while num_chars_in_texts + len(texts) - 1 < num_chars:
101                text = self.sample_datetime_text(rng)
102                texts.append(text)
103                num_chars_in_texts += len(text)
104
105            chars = list(' '.join(texts))
106
107            # Trim and make sure the last char is not space.
108            if len(chars) > num_chars:
109                rest = chars[num_chars:]
110                chars = chars[:num_chars]
111                if chars[-1].isspace():
112                    chars.pop()
113                    assert not rest[0].isspace()
114                    chars.append(rest[0])
115
116            return chars
117
118        else:
119            return self.sample_datetime_text(rng)