from fastapi import FastAPI, HTTPException from fastapi.responses import JSONResponse import feedparser import re import datetime as dt from email.utils import parsedate_to_datetime app = FastAPI() BASE = "https://www.umwelt.sachsen.de/umwelt/infosysteme/hwims/portal/web/feed/wasserstand-pegel-{}" def parse_number(pattern, text): m = re.search(pattern, text, re.I) return float(m.group(1).replace(",", ".")) if m else None def parse_text(pattern, text): m = re.search(pattern, text, re.I) return m.group(1).strip() if m else None def clean_text(text): text = re.sub(r"", "\n", text, flags=re.I) text = re.sub(r"<[^>]+>", " ", text) return text def extract_station_name(title: str): if not title: return None m = re.search(r"Pegel\s+(.+)$", title) return m.group(1).strip() if m else title def parse_timestamp_from_title(title: str): if not title: return None m = re.search(r"(\d{2}\.\d{2}\.\d{4})\s+(\d{2}:\d{2})\s+Uhr\s+\((MESZ|MEZ)", title) if not m: return None date_part, time_part, tz_hint = m.groups() parsed = dt.datetime.strptime(f"{date_part} {time_part}", "%d.%m.%Y %H:%M") offset = dt.timedelta(hours=2 if tz_hint == "MESZ" else 1) return parsed.replace(tzinfo=dt.timezone(offset)).isoformat() def parse_timestamp(entry): ts_from_title = parse_timestamp_from_title(entry.get("title")) if ts_from_title: return ts_from_title if entry.get("dc_date"): return entry.get("dc_date") if entry.get("published"): return parsedate_to_datetime(entry.get("published")).isoformat() return None def parse_entry(entry): text = clean_text(entry.get("description", "")) return { "timestamp": parse_timestamp(entry), "title": entry.get("title"), "water_level_cm": parse_number( r"Wasserstand:\s*([0-9]+(?:[,.][0-9]+)?)\s*cm", text ), "flow_m3s": parse_number( r"Durchfluss:\s*([0-9]+(?:[,.][0-9]+)?)\s*m³/s", text ), "flood_alert_level": parse_text( r"Meldestufe:\s*([^\n\r]+)", text ), } @app.get("/api/pegel/{pegel_id}") def get_pegel(pegel_id: str): if not re.fullmatch(r"\d{6,}", pegel_id): raise HTTPException(400, "Ungültige Pegel-ID") url = BASE.format(pegel_id) feed = feedparser.parse(url) if not feed.entries: raise HTTPException(404, "Keine Daten gefunden") entries = [parse_entry(entry) for entry in feed.entries] return JSONResponse({ "station_id": pegel_id, "station_title": extract_station_name(feed.feed.get("title")), "count": len(entries), "latest": entries[0], "entries": entries, "source": url, "fetched_at": dt.datetime.now(dt.UTC).isoformat(), })