diff --git a/transcription.py b/transcription.py index 038ea08..e113095 100644 --- a/transcription.py +++ b/transcription.py @@ -50,7 +50,7 @@ SPOKEN_TO_CHAR = { def _normalize_transcription(raw_text): """Wandelt eine Whisper-Transkription in die tatsächlichen Captcha-Zeichen um.""" # Entferne Satzzeichen und splitte in Tokens - tokens = re.split(r'[,.\s]+', raw_text.lower().strip()) + tokens = re.split(r'[-,\.\s]+', raw_text.lower().strip()) result = [] for token in tokens: if not token: