Coverage for potnia/scripts/hittite.py: 100.00%

26 statements  

« prev     ^ index     » next       coverage.py v7.6.3, created at 2025-04-03 23:35 +0000

1from dataclasses import dataclass 

2from ..script import Script 

3 

4 

5@dataclass 

6class Hittite(Script): 

7 """ 

8 Class for handling text transliteration and unicode conversion to Hittite. 

9 

10 To use the singleton instance, import like so: 

11 ``from potnia import hittite`` 

12 

13 Designed especially for texts from the Catalog der Texte der Hethiter (CTH): https://www.hethport.uni-wuerzburg.de/CTH/index.php 

14 

15 Attributes: 

16 config (str): Path to the configuration file or configuration data in string format.  

17 By default, it uses the 'hittite.yaml file in the 'data' directory. 

18 """ 

19 config:str = "hittite" 

20 

21 def tokenize_transliteration(self, input_string:str) -> list[str]: 

22 """ 

23 Tokenizes transliterated text according to specific patterns. 

24 

25 Args: 

26 text (str): Input text in transliterated format. 

27 

28 Returns: 

29 list[str]: List of tokens 

30 """ 

31 tokens = [] 

32 token = "" 

33 i = 0 

34 

35 while i < len(input_string): 

36 char = input_string[i] 

37 

38 # Handle characters ']', '[', and ' ' 

39 if char in '[] ': 

40 if token: 

41 tokens.append(token) 

42 token = "" 

43 tokens.append(char) 

44 # Handle other characters 

45 elif char in ['-','‑']: 

46 if token: 

47 tokens.append(token) 

48 token = "" 

49 else: 

50 token += char 

51 i += 1 

52 

53 # Add the last token if it exists 

54 if token: 

55 tokens.append(token) 

56 

57 return tokens 

58 

59 

60 

61 

62hittite = Hittite()