From 883e3aa9d125db0159e23596ba11db1bf3b8256c Mon Sep 17 00:00:00 2001 From: Matthias Jacob Date: Tue, 5 May 2026 03:28:07 +0200 Subject: [PATCH] prefer timestamp from title (which includes German tz) --- api/app.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/api/app.py b/api/app.py index 29469bf..e7158a0 100644 --- a/api/app.py +++ b/api/app.py @@ -29,7 +29,24 @@ def extract_station_name(title: str): m = re.search(r"Pegel\s+(.+)$", title) return m.group(1).strip() if m else title +def parse_timestamp_from_title(title: str): + if not title: + return None + + m = re.search(r"(\d{2}\.\d{2}\.\d{4})\s+(\d{2}:\d{2})\s+Uhr\s+\((MESZ|MEZ)", title) + if not m: + return None + + date_part, time_part, tz_hint = m.groups() + parsed = dt.datetime.strptime(f"{date_part} {time_part}", "%d.%m.%Y %H:%M") + offset = dt.timedelta(hours=2 if tz_hint == "MESZ" else 1) + return parsed.replace(tzinfo=dt.timezone(offset)).isoformat() + def parse_timestamp(entry): + ts_from_title = parse_timestamp_from_title(entry.get("title")) + if ts_from_title: + return ts_from_title + if entry.get("dc_date"): return entry.get("dc_date") @@ -76,4 +93,4 @@ def get_pegel(pegel_id: str): "entries": entries, "source": url, "fetched_at": dt.datetime.now(dt.UTC).isoformat(), - }) \ No newline at end of file + })