diff --git a/main.py b/main.py index bf89dac..f5b8f15 100644 --- a/main.py +++ b/main.py @@ -12,10 +12,10 @@ from selenium.webdriver.common.by import By from bs4 import BeautifulSoup try: - from settings import DOCUMENT_ID, WEBHOOK_URL + from settings import DOCUMENT_ID, WEBHOOK_URL, MODEL_NAME except ImportError: print("settings.py nicht gefunden, verwende settings_example.py") - from settings_example import DOCUMENT_ID, WEBHOOK_URL + from settings_example import DOCUMENT_ID, WEBHOOK_URL, MODEL_NAME from transcription import transcribe_audio_with_whisper @@ -146,13 +146,17 @@ def test_transcription(): accuracy = 0 total = 0 + # Load Whisper model once for efficiency + import whisper + model = whisper.load_model(MODEL_NAME) + # create or overwrite CSV file for transcription results with open("transcription_results.csv", "w") as csvfile: csvfile.write("filename,transcription\n") for mp3_path in download_dir.glob("*.mp3"): print(f"Verarbeite Datei: {mp3_path}") - transcription = transcribe_audio_with_whisper(mp3_path) + transcription = transcribe_audio_with_whisper(mp3_path, model=model) print(f"Transkription: {transcription}") diff --git a/transcription.py b/transcription.py index e109336..440d0bf 100644 --- a/transcription.py +++ b/transcription.py @@ -1,5 +1,11 @@ import re +try: + from settings import MODEL_NAME +except ImportError: + print("settings.py nicht gefunden, verwende settings_example.py") + from settings_example import MODEL_NAME + # Mapping von gesprochenen Buchstaben (deutsch) auf Zeichen SPOKEN_TO_CHAR = { "a": "a", "ah": "a", @@ -60,9 +66,11 @@ def _normalize_transcription(raw_text): print(f"Unbekanntes Token: '{token}'") return ''.join(result) -def transcribe_audio_with_whisper(mp3_path): - import whisper - model = whisper.load_model("small") +def transcribe_audio_with_whisper(mp3_path, model=None): + if model is None: + import whisper + model = whisper.load_model(MODEL_NAME) + result = model.transcribe(str(mp3_path), language='de') raw_text = result["text"] print("Raw transcription:", raw_text)