vkit.utility.text.const.whitespace

Consts for detecting whitespace chars.

 1# Copyright 2022 vkit-x Administrator. All Rights Reserved.
 2#
 3# This project (vkit-x/vkit) is dual-licensed under commercial and SSPL licenses.
 4#
 5# The commercial license gives you the full rights to create and distribute software
 6# on your own terms without any SSPL license obligations. For more information,
 7# please see the "LICENSE_COMMERCIAL.txt" file.
 8#
 9# This project is also available under Server Side Public License (SSPL).
10# The SSPL licensing is ideal for use cases such as open source projects with
11# SSPL distribution, student/academic purposes, hobby projects, internal research
12# projects without external distribution, or other projects where all SSPL
13# obligations can be met. For more information, please see the "LICENSE_SSPL.txt" file.
14'''
15Consts for detecting whitespace chars.
16'''
17from typing import Sequence, Tuple
18
19#: Whitespace Chars.
20#: Pulled from https://en.wikipedia.org/wiki/Whitespace_character
21#:
22#: Table 1.
23#:
24#: 0x9
25#: 0xA
26#: 0xB
27#: 0xC
28#: 0xD
29#: 0x20
30#: 0x85
31#: 0xA0
32#: 0x1680
33#: 0x2000
34#: 0x2001
35#: 0x2002
36#: 0x2003
37#: 0x2004
38#: 0x2005
39#: 0x2006
40#: 0x2007
41#: 0x2008
42#: 0x2009
43#: 0x200A
44#: 0x2028
45#: 0x2029
46#: 0x202F
47#: 0x205F
48#: 0x3000
49#:
50#: Table 2.
51#:
52#: 0x180E
53#: 0x200B
54#: 0x200C
55#: 0x200D
56#: 0x2060
57#: 0xFEFF
58#:
59#: Table 3.
60#:
61#: 0xB7
62#: 0x237D
63#: 0x2420
64#: 0x2422
65#: 0x2423
66#:
67ITV_WHITESPACE: Sequence[Sequence[Tuple[int, int]]] = [[
68    (0x9, 0xD),
69    (0x20, 0x20),
70    (0x85, 0x85),
71    (0xA0, 0xA0),
72
73    # Move the "middle dot" to delimiter category,
74    # since this one is commonly used in Chinese news material.
75    # (0xB7, 0xB7),
76    (0x1680, 0x1680),
77    (0x180E, 0x180E),
78
79    # (0x2000, 0x200D),
80    # Fix with:
81    (0x2000, 0x200F),
82
83    # (0x2028, 0x2029),
84    # Fix with:
85    (0x2028, 0x202C),
86    (0x202F, 0x202F),
87
88    # (0x205F, 0x2060),
89    # Fix with:
90    (0x205F, 0x206F),
91    (0x237D, 0x237D),
92    (0x2420, 0x2420),
93    (0x2422, 0x2423),
94    (0x3000, 0x3000),
95    (0xFEFF, 0xFEFF),
96]]