prefer timestamp from title (which includes German tz)

This commit is contained in:
Matthias Jacob
2026-05-05 03:28:07 +02:00
parent b5950b6ce1
commit 883e3aa9d1
+17
View File
@@ -29,7 +29,24 @@ def extract_station_name(title: str):
m = re.search(r"Pegel\s+(.+)$", title) m = re.search(r"Pegel\s+(.+)$", title)
return m.group(1).strip() if m else title return m.group(1).strip() if m else title
def parse_timestamp_from_title(title: str):
if not title:
return None
m = re.search(r"(\d{2}\.\d{2}\.\d{4})\s+(\d{2}:\d{2})\s+Uhr\s+\((MESZ|MEZ)", title)
if not m:
return None
date_part, time_part, tz_hint = m.groups()
parsed = dt.datetime.strptime(f"{date_part} {time_part}", "%d.%m.%Y %H:%M")
offset = dt.timedelta(hours=2 if tz_hint == "MESZ" else 1)
return parsed.replace(tzinfo=dt.timezone(offset)).isoformat()
def parse_timestamp(entry): def parse_timestamp(entry):
ts_from_title = parse_timestamp_from_title(entry.get("title"))
if ts_from_title:
return ts_from_title
if entry.get("dc_date"): if entry.get("dc_date"):
return entry.get("dc_date") return entry.get("dc_date")