Coverage for potnia/scripts/hittite.py: 100.00%
26 statements
« prev ^ index » next coverage.py v7.6.3, created at 2025-04-03 23:35 +0000
« prev ^ index » next coverage.py v7.6.3, created at 2025-04-03 23:35 +0000
1from dataclasses import dataclass
2from ..script import Script
5@dataclass
6class Hittite(Script):
7 """
8 Class for handling text transliteration and unicode conversion to Hittite.
10 To use the singleton instance, import like so:
11 ``from potnia import hittite``
13 Designed especially for texts from the Catalog der Texte der Hethiter (CTH): https://www.hethport.uni-wuerzburg.de/CTH/index.php
15 Attributes:
16 config (str): Path to the configuration file or configuration data in string format.
17 By default, it uses the 'hittite.yaml file in the 'data' directory.
18 """
19 config:str = "hittite"
21 def tokenize_transliteration(self, input_string:str) -> list[str]:
22 """
23 Tokenizes transliterated text according to specific patterns.
25 Args:
26 text (str): Input text in transliterated format.
28 Returns:
29 list[str]: List of tokens
30 """
31 tokens = []
32 token = ""
33 i = 0
35 while i < len(input_string):
36 char = input_string[i]
38 # Handle characters ']', '[', and ' '
39 if char in '[] ':
40 if token:
41 tokens.append(token)
42 token = ""
43 tokens.append(char)
44 # Handle other characters
45 elif char in ['-','‑']:
46 if token:
47 tokens.append(token)
48 token = ""
49 else:
50 token += char
51 i += 1
53 # Add the last token if it exists
54 if token:
55 tokens.append(token)
57 return tokens
62hittite = Hittite()