vkit.pipeline.text_detection.page_image

  1# Copyright 2022 vkit-x Administrator. All Rights Reserved.
  2#
  3# This project (vkit-x/vkit) is dual-licensed under commercial and SSPL licenses.
  4#
  5# The commercial license gives you the full rights to create and distribute software
  6# on your own terms without any SSPL license obligations. For more information,
  7# please see the "LICENSE_COMMERCIAL.txt" file.
  8#
  9# This project is also available under Server Side Public License (SSPL).
 10# The SSPL licensing is ideal for use cases such as open source projects with
 11# SSPL distribution, student/academic purposes, hobby projects, internal research
 12# projects without external distribution, or other projects where all SSPL
 13# obligations can be met. For more information, please see the "LICENSE_SSPL.txt" file.
 14from typing import Sequence, Mapping, Any, List, Union
 15
 16import attrs
 17from numpy.random import Generator as RandomGenerator
 18
 19from vkit.utility import PathType, rng_choice
 20from vkit.element import Image, Box
 21from vkit.engine.image import image_engine_executor_aggregator_factory
 22from vkit.mechanism.distortion import rotate
 23from ..interface import PipelineStep, PipelineStepFactory
 24from .page_layout import PageLayoutStepOutput
 25
 26
 27@attrs.define
 28class PageImageStepConfig:
 29    image_configs: Union[Sequence[Mapping[str, Any]], PathType]
 30    alpha_min: float = 0.25
 31    alpha_max: float = 1.0
 32
 33
 34@attrs.define
 35class PageImageStepInput:
 36    page_layout_step_output: PageLayoutStepOutput
 37
 38
 39@attrs.define
 40class PageImage:
 41    image: Image
 42    box: Box
 43    alpha: float
 44
 45
 46@attrs.define
 47class PageImageCollection:
 48    height: int
 49    width: int
 50    page_images: Sequence[PageImage]
 51
 52
 53@attrs.define
 54class PageImageStepOutput:
 55    page_image_collection: PageImageCollection
 56    # For filling inactive region caused by distortion.
 57    page_bottom_layer_image: Image
 58
 59
 60class PageImageStep(
 61    PipelineStep[
 62        PageImageStepConfig,
 63        PageImageStepInput,
 64        PageImageStepOutput,
 65    ]
 66):  # yapf: disable
 67
 68    def __init__(self, config: PageImageStepConfig):
 69        super().__init__(config)
 70
 71        self.image_engine_executor_aggregator = \
 72            image_engine_executor_aggregator_factory.create(self.config.image_configs)
 73
 74    def run(self, input: PageImageStepInput, rng: RandomGenerator):
 75        page_layout_step_output = input.page_layout_step_output
 76        page_layout = page_layout_step_output.page_layout
 77
 78        page_images: List[PageImage] = []
 79        for layout_image in page_layout.layout_images:
 80            image = self.image_engine_executor_aggregator.run(
 81                {
 82                    'height': layout_image.box.height,
 83                    'width': layout_image.box.width,
 84                },
 85                rng,
 86            )
 87            alpha = float(rng.uniform(self.config.alpha_min, self.config.alpha_max))
 88            page_images.append(PageImage(
 89                image=image,
 90                box=layout_image.box,
 91                alpha=alpha,
 92            ))
 93
 94        page_image_collection = PageImageCollection(
 95            height=page_layout.height,
 96            width=page_layout.width,
 97            page_images=page_images,
 98        )
 99
100        page_bottom_layer_image = self.image_engine_executor_aggregator.run(
101            {
102                'height': 0,
103                'width': 0,
104                'disable_resizing': True,
105            },
106            rng,
107        )
108        # Random rotate.
109        rotate_angle = rng_choice(rng, (0, 90, 180, 270))
110        page_bottom_layer_image = rotate.distort_image(
111            {'angle': rotate_angle},
112            page_bottom_layer_image,
113        )
114
115        return PageImageStepOutput(
116            page_image_collection=page_image_collection,
117            page_bottom_layer_image=page_bottom_layer_image,
118        )
119
120
121page_image_step_factory = PipelineStepFactory(PageImageStep)
class PageImageStepConfig:
29class PageImageStepConfig:
30    image_configs: Union[Sequence[Mapping[str, Any]], PathType]
31    alpha_min: float = 0.25
32    alpha_max: float = 1.0
PageImageStepConfig( image_configs: Union[Sequence[Mapping[str, Any]], str, os.PathLike], alpha_min: float = 0.25, alpha_max: float = 1.0)
2def __init__(self, image_configs, alpha_min=attr_dict['alpha_min'].default, alpha_max=attr_dict['alpha_max'].default):
3    self.image_configs = image_configs
4    self.alpha_min = alpha_min
5    self.alpha_max = alpha_max

Method generated by attrs for class PageImageStepConfig.

class PageImageStepInput:
36class PageImageStepInput:
37    page_layout_step_output: PageLayoutStepOutput
PageImageStepInput( page_layout_step_output: vkit.pipeline.text_detection.page_layout.PageLayoutStepOutput)
2def __init__(self, page_layout_step_output):
3    self.page_layout_step_output = page_layout_step_output

Method generated by attrs for class PageImageStepInput.

class PageImage:
41class PageImage:
42    image: Image
43    box: Box
44    alpha: float
PageImage( image: vkit.element.image.Image, box: vkit.element.box.Box, alpha: float)
2def __init__(self, image, box, alpha):
3    self.image = image
4    self.box = box
5    self.alpha = alpha

Method generated by attrs for class PageImage.

class PageImageCollection:
48class PageImageCollection:
49    height: int
50    width: int
51    page_images: Sequence[PageImage]
PageImageCollection( height: int, width: int, page_images: Sequence[vkit.pipeline.text_detection.page_image.PageImage])
2def __init__(self, height, width, page_images):
3    self.height = height
4    self.width = width
5    self.page_images = page_images

Method generated by attrs for class PageImageCollection.

class PageImageStepOutput:
55class PageImageStepOutput:
56    page_image_collection: PageImageCollection
57    # For filling inactive region caused by distortion.
58    page_bottom_layer_image: Image
PageImageStepOutput( page_image_collection: vkit.pipeline.text_detection.page_image.PageImageCollection, page_bottom_layer_image: vkit.element.image.Image)
2def __init__(self, page_image_collection, page_bottom_layer_image):
3    self.page_image_collection = page_image_collection
4    self.page_bottom_layer_image = page_bottom_layer_image

Method generated by attrs for class PageImageStepOutput.

 61class PageImageStep(
 62    PipelineStep[
 63        PageImageStepConfig,
 64        PageImageStepInput,
 65        PageImageStepOutput,
 66    ]
 67):  # yapf: disable
 68
 69    def __init__(self, config: PageImageStepConfig):
 70        super().__init__(config)
 71
 72        self.image_engine_executor_aggregator = \
 73            image_engine_executor_aggregator_factory.create(self.config.image_configs)
 74
 75    def run(self, input: PageImageStepInput, rng: RandomGenerator):
 76        page_layout_step_output = input.page_layout_step_output
 77        page_layout = page_layout_step_output.page_layout
 78
 79        page_images: List[PageImage] = []
 80        for layout_image in page_layout.layout_images:
 81            image = self.image_engine_executor_aggregator.run(
 82                {
 83                    'height': layout_image.box.height,
 84                    'width': layout_image.box.width,
 85                },
 86                rng,
 87            )
 88            alpha = float(rng.uniform(self.config.alpha_min, self.config.alpha_max))
 89            page_images.append(PageImage(
 90                image=image,
 91                box=layout_image.box,
 92                alpha=alpha,
 93            ))
 94
 95        page_image_collection = PageImageCollection(
 96            height=page_layout.height,
 97            width=page_layout.width,
 98            page_images=page_images,
 99        )
100
101        page_bottom_layer_image = self.image_engine_executor_aggregator.run(
102            {
103                'height': 0,
104                'width': 0,
105                'disable_resizing': True,
106            },
107            rng,
108        )
109        # Random rotate.
110        rotate_angle = rng_choice(rng, (0, 90, 180, 270))
111        page_bottom_layer_image = rotate.distort_image(
112            {'angle': rotate_angle},
113            page_bottom_layer_image,
114        )
115
116        return PageImageStepOutput(
117            page_image_collection=page_image_collection,
118            page_bottom_layer_image=page_bottom_layer_image,
119        )

Abstract base class for generic types.

A generic type is typically declared by inheriting from this class parameterized with one or more type variables. For example, a generic mapping type might be defined as::

class Mapping(Generic[KT, VT]): def __getitem__(self, key: KT) -> VT: ... # Etc.

This class can then be used as follows::

def lookup_name(mapping: Mapping[KT, VT], key: KT, default: VT) -> VT: try: return mapping[key] except KeyError: return default

69    def __init__(self, config: PageImageStepConfig):
70        super().__init__(config)
71
72        self.image_engine_executor_aggregator = \
73            image_engine_executor_aggregator_factory.create(self.config.image_configs)
def run( self, input: vkit.pipeline.text_detection.page_image.PageImageStepInput, rng: numpy.random._generator.Generator):
 75    def run(self, input: PageImageStepInput, rng: RandomGenerator):
 76        page_layout_step_output = input.page_layout_step_output
 77        page_layout = page_layout_step_output.page_layout
 78
 79        page_images: List[PageImage] = []
 80        for layout_image in page_layout.layout_images:
 81            image = self.image_engine_executor_aggregator.run(
 82                {
 83                    'height': layout_image.box.height,
 84                    'width': layout_image.box.width,
 85                },
 86                rng,
 87            )
 88            alpha = float(rng.uniform(self.config.alpha_min, self.config.alpha_max))
 89            page_images.append(PageImage(
 90                image=image,
 91                box=layout_image.box,
 92                alpha=alpha,
 93            ))
 94
 95        page_image_collection = PageImageCollection(
 96            height=page_layout.height,
 97            width=page_layout.width,
 98            page_images=page_images,
 99        )
100
101        page_bottom_layer_image = self.image_engine_executor_aggregator.run(
102            {
103                'height': 0,
104                'width': 0,
105                'disable_resizing': True,
106            },
107            rng,
108        )
109        # Random rotate.
110        rotate_angle = rng_choice(rng, (0, 90, 180, 270))
111        page_bottom_layer_image = rotate.distort_image(
112            {'angle': rotate_angle},
113            page_bottom_layer_image,
114        )
115
116        return PageImageStepOutput(
117            page_image_collection=page_image_collection,
118            page_bottom_layer_image=page_bottom_layer_image,
119        )