prefer timestamp from title (which includes German tz)
This commit is contained in:
+17
@@ -29,7 +29,24 @@ def extract_station_name(title: str):
|
||||
m = re.search(r"Pegel\s+(.+)$", title)
|
||||
return m.group(1).strip() if m else title
|
||||
|
||||
def parse_timestamp_from_title(title: str):
|
||||
if not title:
|
||||
return None
|
||||
|
||||
m = re.search(r"(\d{2}\.\d{2}\.\d{4})\s+(\d{2}:\d{2})\s+Uhr\s+\((MESZ|MEZ)", title)
|
||||
if not m:
|
||||
return None
|
||||
|
||||
date_part, time_part, tz_hint = m.groups()
|
||||
parsed = dt.datetime.strptime(f"{date_part} {time_part}", "%d.%m.%Y %H:%M")
|
||||
offset = dt.timedelta(hours=2 if tz_hint == "MESZ" else 1)
|
||||
return parsed.replace(tzinfo=dt.timezone(offset)).isoformat()
|
||||
|
||||
def parse_timestamp(entry):
|
||||
ts_from_title = parse_timestamp_from_title(entry.get("title"))
|
||||
if ts_from_title:
|
||||
return ts_from_title
|
||||
|
||||
if entry.get("dc_date"):
|
||||
return entry.get("dc_date")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user