add index endpoint
This commit is contained in:
@@ -46,6 +46,39 @@ Response:
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Index request (all discovered stations):
|
||||||
|
|
||||||
|
```
|
||||||
|
GET /api/pegel
|
||||||
|
```
|
||||||
|
|
||||||
|
Index request with limit:
|
||||||
|
|
||||||
|
```
|
||||||
|
GET /api/pegel?limit=5
|
||||||
|
```
|
||||||
|
|
||||||
|
Index response shape:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"count": 5,
|
||||||
|
"total_available": 312,
|
||||||
|
"items": [
|
||||||
|
{
|
||||||
|
"station_id": "560021",
|
||||||
|
"station_title": "Golzern 1 / Vereinigte Mulde",
|
||||||
|
"source": "https://www.umwelt.sachsen.de/umwelt/infosysteme/hwims/portal/web/feed/wasserstand-pegel-560021"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": "https://www.umwelt.sachsen.de/umwelt/infosysteme/hwims/portal/web/wasserstand-uebersicht",
|
||||||
|
"fetched_at": "2026-05-05T01:00:00+00:00",
|
||||||
|
"warnings": [
|
||||||
|
"optional, only present for partial failures"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 🚀 Local Development
|
## 🚀 Local Development
|
||||||
@@ -62,6 +95,7 @@ uvicorn api.app:app --reload
|
|||||||
Open:
|
Open:
|
||||||
|
|
||||||
```
|
```
|
||||||
|
http://127.0.0.1:8000/api/pegel
|
||||||
http://127.0.0.1:8000/api/pegel/560021
|
http://127.0.0.1:8000/api/pegel/560021
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -109,6 +143,16 @@ Pegel ID is passed via URL:
|
|||||||
/api/pegel/<id>
|
/api/pegel/<id>
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Index endpoint:
|
||||||
|
|
||||||
|
```
|
||||||
|
/api/pegel
|
||||||
|
```
|
||||||
|
|
||||||
|
Query params for index:
|
||||||
|
|
||||||
|
* `limit` (positive integer, optional; invalid values return HTTP 400)
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 📡 Data Source
|
## 📡 Data Source
|
||||||
|
|||||||
+152
-2
@@ -2,12 +2,31 @@ from fastapi import FastAPI, HTTPException
|
|||||||
from fastapi.responses import JSONResponse
|
from fastapi.responses import JSONResponse
|
||||||
import feedparser
|
import feedparser
|
||||||
import re
|
import re
|
||||||
|
import html
|
||||||
import datetime as dt
|
import datetime as dt
|
||||||
|
from urllib.request import urlopen, Request
|
||||||
|
from urllib.error import URLError, HTTPError
|
||||||
from email.utils import parsedate_to_datetime
|
from email.utils import parsedate_to_datetime
|
||||||
|
from typing import TypedDict, Any
|
||||||
|
|
||||||
app = FastAPI()
|
app = FastAPI()
|
||||||
|
|
||||||
BASE = "https://www.umwelt.sachsen.de/umwelt/infosysteme/hwims/portal/web/feed/wasserstand-pegel-{}"
|
BASE = "https://www.umwelt.sachsen.de/umwelt/infosysteme/hwims/portal/web/feed/wasserstand-pegel-{}"
|
||||||
|
DISCOVERY_URL = "https://www.umwelt.sachsen.de/umwelt/infosysteme/hwims/portal/web/wasserstand-uebersicht"
|
||||||
|
INDEX_CACHE_TTL_SECONDS = 900
|
||||||
|
LATEST_CACHE_TTL_SECONDS = 120
|
||||||
|
|
||||||
|
class IndexCache(TypedDict):
|
||||||
|
items: list[dict[str, Any]] | None
|
||||||
|
fetched_at: dt.datetime | None
|
||||||
|
|
||||||
|
|
||||||
|
_index_cache: IndexCache = {
|
||||||
|
"items": None,
|
||||||
|
"fetched_at": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
_latest_cache: dict[str, dict[str, Any]] = {}
|
||||||
|
|
||||||
def parse_number(pattern, text):
|
def parse_number(pattern, text):
|
||||||
m = re.search(pattern, text, re.I)
|
m = re.search(pattern, text, re.I)
|
||||||
@@ -22,13 +41,79 @@ def clean_text(text):
|
|||||||
text = re.sub(r"<[^>]+>", " ", text)
|
text = re.sub(r"<[^>]+>", " ", text)
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def extract_station_name(title: str):
|
def extract_station_name(title: str | None):
|
||||||
if not title:
|
if not title:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
m = re.search(r"Pegel\s+(.+)$", title)
|
m = re.search(r"Pegel\s+(.+)$", title)
|
||||||
return m.group(1).strip() if m else title
|
return m.group(1).strip() if m else title
|
||||||
|
|
||||||
|
def _now_utc():
|
||||||
|
return dt.datetime.now(dt.UTC)
|
||||||
|
|
||||||
|
def _is_cache_fresh(fetched_at, ttl_seconds: int):
|
||||||
|
if not fetched_at:
|
||||||
|
return False
|
||||||
|
return (_now_utc() - fetched_at).total_seconds() < ttl_seconds
|
||||||
|
|
||||||
|
def _set_index_cache(items):
|
||||||
|
_index_cache["items"] = items
|
||||||
|
_index_cache["fetched_at"] = _now_utc()
|
||||||
|
|
||||||
|
def _extract_station_title_from_id(pegel_id: str):
|
||||||
|
feed = feedparser.parse(BASE.format(pegel_id))
|
||||||
|
feed_obj = getattr(feed, "feed", None)
|
||||||
|
feed_title = feed_obj.get("title") if isinstance(feed_obj, dict) else None
|
||||||
|
if isinstance(feed_title, str) and feed_title:
|
||||||
|
return extract_station_name(feed_title)
|
||||||
|
if feed.entries:
|
||||||
|
first_title = feed.entries[0].get("title")
|
||||||
|
if isinstance(first_title, str):
|
||||||
|
return extract_station_name(first_title)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _parse_stations_from_overview(body: str):
|
||||||
|
stations: dict[str, str | None] = {}
|
||||||
|
|
||||||
|
paired_pattern = re.compile(
|
||||||
|
r'<a\s+href="wasserstand-pegel-(\d{6,})"[^>]*></a>\s*'
|
||||||
|
r'<div\s+class="popUp\s+popUpMs"[^>]*>.*?'
|
||||||
|
r'<div\s+class="popUpTitle">\s*<span\s+class="popUpTitleBold">(.*?)</span>',
|
||||||
|
re.I | re.S,
|
||||||
|
)
|
||||||
|
|
||||||
|
for pegel_id, raw_title in paired_pattern.findall(body):
|
||||||
|
title = re.sub(r"\s+", " ", html.unescape(raw_title)).strip() or None
|
||||||
|
stations[pegel_id] = title
|
||||||
|
|
||||||
|
for pegel_id in re.findall(r"wasserstand-pegel-(\d{6,})", body):
|
||||||
|
stations.setdefault(pegel_id, None)
|
||||||
|
|
||||||
|
return stations
|
||||||
|
|
||||||
|
def discover_stations():
|
||||||
|
req = Request(
|
||||||
|
DISCOVERY_URL,
|
||||||
|
headers={"User-Agent": "sachsen-pegel-proxy/1.0"},
|
||||||
|
)
|
||||||
|
with urlopen(req, timeout=12) as resp:
|
||||||
|
body = resp.read().decode("utf-8", errors="ignore")
|
||||||
|
|
||||||
|
stations = _parse_stations_from_overview(body)
|
||||||
|
ids = sorted(stations.keys())
|
||||||
|
if not ids:
|
||||||
|
raise ValueError("Keine Pegel-IDs in Discovery-Quelle gefunden")
|
||||||
|
|
||||||
|
items = []
|
||||||
|
for pegel_id in ids:
|
||||||
|
station_title = stations.get(pegel_id)
|
||||||
|
items.append({
|
||||||
|
"station_id": pegel_id,
|
||||||
|
"station_title": station_title or _extract_station_title_from_id(pegel_id),
|
||||||
|
"source": BASE.format(pegel_id),
|
||||||
|
})
|
||||||
|
return items
|
||||||
|
|
||||||
def parse_timestamp_from_title(title: str):
|
def parse_timestamp_from_title(title: str):
|
||||||
if not title:
|
if not title:
|
||||||
return None
|
return None
|
||||||
@@ -72,6 +157,68 @@ def parse_entry(entry):
|
|||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _get_latest_for_station(station_id: str):
|
||||||
|
cache_hit = _latest_cache.get(station_id)
|
||||||
|
if cache_hit and _is_cache_fresh(cache_hit.get("fetched_at"), LATEST_CACHE_TTL_SECONDS):
|
||||||
|
return cache_hit.get("latest")
|
||||||
|
|
||||||
|
feed = feedparser.parse(BASE.format(station_id))
|
||||||
|
if not feed.entries:
|
||||||
|
raise ValueError("Keine Daten gefunden")
|
||||||
|
|
||||||
|
latest = parse_entry(feed.entries[0])
|
||||||
|
_latest_cache[station_id] = {
|
||||||
|
"latest": latest,
|
||||||
|
"fetched_at": _now_utc(),
|
||||||
|
}
|
||||||
|
return latest
|
||||||
|
|
||||||
|
@app.get("/api/pegel")
|
||||||
|
def list_pegel(limit: int | None = None):
|
||||||
|
if limit is not None and limit <= 0:
|
||||||
|
raise HTTPException(400, "Ungültiger limit-Parameter")
|
||||||
|
|
||||||
|
warnings = []
|
||||||
|
discovered_items: list[dict[str, Any]] | None = None
|
||||||
|
source_used = DISCOVERY_URL
|
||||||
|
cache_fallback = False
|
||||||
|
|
||||||
|
if _is_cache_fresh(_index_cache.get("fetched_at"), INDEX_CACHE_TTL_SECONDS):
|
||||||
|
discovered_items = _index_cache.get("items")
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
discovered_items = discover_stations()
|
||||||
|
_set_index_cache(discovered_items)
|
||||||
|
except (URLError, HTTPError, TimeoutError, ValueError, OSError) as e:
|
||||||
|
if _index_cache.get("items"):
|
||||||
|
discovered_items = _index_cache.get("items")
|
||||||
|
source_used = "cache"
|
||||||
|
cache_fallback = True
|
||||||
|
warnings.append(f"Discovery fehlgeschlagen, nutze Cache: {str(e)}")
|
||||||
|
else:
|
||||||
|
raise HTTPException(502, "Upstream-Discovery nicht verfügbar")
|
||||||
|
|
||||||
|
if discovered_items is None:
|
||||||
|
raise HTTPException(502, "Upstream-Discovery nicht verfügbar")
|
||||||
|
|
||||||
|
total_available = len(discovered_items)
|
||||||
|
items = discovered_items[:limit] if limit is not None else list(discovered_items)
|
||||||
|
|
||||||
|
response = {
|
||||||
|
"count": len(items),
|
||||||
|
"total_available": total_available,
|
||||||
|
"items": items,
|
||||||
|
"source": source_used,
|
||||||
|
"fetched_at": _now_utc().isoformat(),
|
||||||
|
}
|
||||||
|
if warnings:
|
||||||
|
response["warnings"] = warnings
|
||||||
|
cache_fetched_at = _index_cache.get("fetched_at")
|
||||||
|
if cache_fallback and cache_fetched_at:
|
||||||
|
response["cache_fetched_at"] = cache_fetched_at.isoformat()
|
||||||
|
|
||||||
|
return JSONResponse(response)
|
||||||
|
|
||||||
@app.get("/api/pegel/{pegel_id}")
|
@app.get("/api/pegel/{pegel_id}")
|
||||||
def get_pegel(pegel_id: str):
|
def get_pegel(pegel_id: str):
|
||||||
if not re.fullmatch(r"\d{6,}", pegel_id):
|
if not re.fullmatch(r"\d{6,}", pegel_id):
|
||||||
@@ -85,9 +232,12 @@ def get_pegel(pegel_id: str):
|
|||||||
|
|
||||||
entries = [parse_entry(entry) for entry in feed.entries]
|
entries = [parse_entry(entry) for entry in feed.entries]
|
||||||
|
|
||||||
|
feed_obj = getattr(feed, "feed", None)
|
||||||
|
feed_title = feed_obj.get("title") if isinstance(feed_obj, dict) else None
|
||||||
|
|
||||||
return JSONResponse({
|
return JSONResponse({
|
||||||
"station_id": pegel_id,
|
"station_id": pegel_id,
|
||||||
"station_title": extract_station_name(feed.feed.get("title")),
|
"station_title": extract_station_name(feed_title),
|
||||||
"count": len(entries),
|
"count": len(entries),
|
||||||
"latest": entries[0],
|
"latest": entries[0],
|
||||||
"entries": entries,
|
"entries": entries,
|
||||||
|
|||||||
Reference in New Issue
Block a user