79 lines
2.2 KiB
Python
79 lines
2.2 KiB
Python
from fastapi import FastAPI, HTTPException
|
|
from fastapi.responses import JSONResponse
|
|
import feedparser
|
|
import re
|
|
import datetime as dt
|
|
from email.utils import parsedate_to_datetime
|
|
|
|
app = FastAPI()
|
|
|
|
BASE = "https://www.umwelt.sachsen.de/umwelt/infosysteme/hwims/portal/web/feed/wasserstand-pegel-{}"
|
|
|
|
def parse_number(pattern, text):
|
|
m = re.search(pattern, text, re.I)
|
|
return float(m.group(1).replace(",", ".")) if m else None
|
|
|
|
def parse_text(pattern, text):
|
|
m = re.search(pattern, text, re.I)
|
|
return m.group(1).strip() if m else None
|
|
|
|
def clean_text(text):
|
|
text = re.sub(r"<br\s*/?>", "\n", text, flags=re.I)
|
|
text = re.sub(r"<[^>]+>", " ", text)
|
|
return text
|
|
|
|
def extract_station_name(title: str):
|
|
if not title:
|
|
return None
|
|
|
|
m = re.search(r"Pegel\s+(.+)$", title)
|
|
return m.group(1).strip() if m else title
|
|
|
|
def parse_timestamp(entry):
|
|
if entry.get("dc_date"):
|
|
return entry.get("dc_date")
|
|
|
|
if entry.get("published"):
|
|
return parsedate_to_datetime(entry.get("published")).isoformat()
|
|
|
|
return None
|
|
|
|
def parse_entry(entry):
|
|
text = clean_text(entry.get("description", ""))
|
|
|
|
return {
|
|
"timestamp": parse_timestamp(entry),
|
|
"title": entry.get("title"),
|
|
"water_level_cm": parse_number(
|
|
r"Wasserstand:\s*([0-9]+(?:[,.][0-9]+)?)\s*cm", text
|
|
),
|
|
"flow_m3s": parse_number(
|
|
r"Durchfluss:\s*([0-9]+(?:[,.][0-9]+)?)\s*m³/s", text
|
|
),
|
|
"flood_alert_level": parse_text(
|
|
r"Meldestufe:\s*([^\n\r]+)", text
|
|
),
|
|
}
|
|
|
|
@app.get("/api/pegel/{pegel_id}")
|
|
def get_pegel(pegel_id: str):
|
|
if not re.fullmatch(r"\d{6,}", pegel_id):
|
|
raise HTTPException(400, "Ungültige Pegel-ID")
|
|
|
|
url = BASE.format(pegel_id)
|
|
feed = feedparser.parse(url)
|
|
|
|
if not feed.entries:
|
|
raise HTTPException(404, "Keine Daten gefunden")
|
|
|
|
entries = [parse_entry(entry) for entry in feed.entries]
|
|
|
|
return JSONResponse({
|
|
"station_id": pegel_id,
|
|
"station_title": extract_station_name(feed.feed.get("title")),
|
|
"count": len(entries),
|
|
"latest": entries[0],
|
|
"entries": entries,
|
|
"source": url,
|
|
"fetched_at": dt.datetime.now(dt.UTC).isoformat(),
|
|
}) |