prefer timestamp from title (which includes German tz)

This commit is contained in:
Matthias Jacob
2026-05-05 03:28:07 +02:00
parent b5950b6ce1
commit 883e3aa9d1
+18 -1
View File
@@ -29,7 +29,24 @@ def extract_station_name(title: str):
m = re.search(r"Pegel\s+(.+)$", title)
return m.group(1).strip() if m else title
def parse_timestamp_from_title(title: str):
if not title:
return None
m = re.search(r"(\d{2}\.\d{2}\.\d{4})\s+(\d{2}:\d{2})\s+Uhr\s+\((MESZ|MEZ)", title)
if not m:
return None
date_part, time_part, tz_hint = m.groups()
parsed = dt.datetime.strptime(f"{date_part} {time_part}", "%d.%m.%Y %H:%M")
offset = dt.timedelta(hours=2 if tz_hint == "MESZ" else 1)
return parsed.replace(tzinfo=dt.timezone(offset)).isoformat()
def parse_timestamp(entry):
ts_from_title = parse_timestamp_from_title(entry.get("title"))
if ts_from_title:
return ts_from_title
if entry.get("dc_date"):
return entry.get("dc_date")
@@ -76,4 +93,4 @@ def get_pegel(pegel_id: str):
"entries": entries,
"source": url,
"fetched_at": dt.datetime.now(dt.UTC).isoformat(),
})
})