Coverage for app/airworthiness_sync.py: 100%
104 statements
« prev ^ index » next coverage.py v7.14.3, created at 2026-06-28 23:33 +0000
« prev ^ index » next coverage.py v7.14.3, created at 2026-06-28 23:33 +0000
1"""
2EASA Safety Publications Tool sync job.
4Queries the EASA AD search endpoint for each EASASourceNode, diffs against
5stored AirworthinessDocument records, and creates pending_review statuses for
6newly discovered documents on all aircraft that have the relevant component.
8Public API
9----------
10sync_all_nodes(app) — called by the background scheduler; syncs every node.
11sync_aircraft(ac) — called from the manual-trigger route; syncs only the
12 nodes that belong to this aircraft's components.
13"""
15import logging
16import re
17import time
18import urllib.error
19import urllib.parse
20import urllib.request
21from datetime import datetime, timezone
23from models import ( # pyright: ignore[reportMissingImports]
24 Aircraft,
25 AirworthinessDocument,
26 AirworthinessDocStatus,
27 AirworthinessDocType,
28 AirworthinessDocumentStatus,
29 EASASourceNode,
30 db,
31)
33_log = logging.getLogger(__name__)
35_EASA_SEARCH_URL = "https://ad.easa.europa.eu/search/advanced/result/"
36_REQUEST_TIMEOUT = 15
37_COURTESY_DELAY = 2.0 # seconds between requests
38_USER_AGENT = "OpenHangar/airworthiness-sync (+https://github.com/e2jk/OpenHangar)"
40# Matches "AD 2023-0048", "AD 2006-0345R", etc.
41_AD_RE = re.compile(r"\bAD\s+\d{4}-\d+[A-Z]*\b")
42# Matches "SIB 2024-01" etc.
43_SIB_RE = re.compile(r"\bSIB\s+\d{4}-\d+[A-Z]*\b")
46def _build_tree_path(node: EASASourceNode) -> str:
47 return (
48 f"{node.tc_holder_node_id}@@@@0@@{node.tc_holder_name}"
49 f"|||{node.type_node_id}@@{node.tc_holder_node_id}@@1@@{node.type_name}"
50 f"|||{node.model_node_id}@@{node.type_node_id}@@2@@{node.model_name}"
51 )
54def _fetch_references(node: EASASourceNode) -> list[tuple[str, str]]:
55 """
56 POST to the EASA search endpoint and return a list of (reference, doc_type)
57 tuples for all documents found. Raises requests.RequestException on failure.
58 """
59 payload = {
60 "fi_action": "advanced",
61 "fi_tree": _build_tree_path(node),
62 "fi_keyword": "",
63 "fi_date_start": "",
64 "fi_date_end": "",
65 "ps_src_tree": "",
66 "fi_notification": "N",
67 "is_default": "N",
68 "fi_basket[]": node.model_node_id,
69 }
70 data = urllib.parse.urlencode(payload).encode()
71 req = urllib.request.Request(
72 _EASA_SEARCH_URL,
73 data=data,
74 headers={"User-Agent": _USER_AGENT},
75 )
76 with urllib.request.urlopen(req, timeout=_REQUEST_TIMEOUT) as resp:
77 html = resp.read().decode()
79 refs: list[tuple[str, str]] = []
80 for m in _AD_RE.finditer(html):
81 refs.append((m.group().strip(), AirworthinessDocType.AD))
82 for m in _SIB_RE.finditer(html):
83 refs.append((m.group().strip(), AirworthinessDocType.SIB))
84 return refs
87def _easa_doc_url(reference: str) -> str:
88 slug = reference.replace(" ", "_").replace("/", "-")
89 return f"https://ad.easa.europa.eu/ad/{slug}"
92def _process_node(node: EASASourceNode) -> tuple[int, bool]:
93 """
94 Sync one node. Returns (new_docs_added, had_error).
95 Creates AirworthinessDocumentStatus records (pending_review) for each
96 aircraft that has a component referencing this node.
97 """
98 try:
99 refs = _fetch_references(node)
100 except Exception as exc:
101 _log.warning(
102 "EASA sync error for node %s (%s): %s", node.id, node.display_path, exc
103 )
104 node.consecutive_errors = (node.consecutive_errors or 0) + 1
105 db.session.commit()
106 return 0, True
108 # Existing references for this node
109 existing = {
110 d.reference
111 for d in AirworthinessDocument.query.filter_by(source_node_id=node.id).all()
112 }
114 aircraft_id = node.component.aircraft_id
116 added = 0
117 for reference, doc_type in refs:
118 if reference in existing:
119 continue
120 doc = AirworthinessDocument(
121 doc_type=doc_type,
122 reference=reference,
123 source_node_id=node.id,
124 doc_url=_easa_doc_url(reference),
125 )
126 db.session.add(doc)
127 db.session.flush()
129 # Create pending_review status for the aircraft
130 st = AirworthinessDocumentStatus(
131 aircraft_id=aircraft_id,
132 document_id=doc.id,
133 status=AirworthinessDocStatus.PENDING_REVIEW,
134 )
135 db.session.add(st)
136 added += 1
138 node.consecutive_errors = 0
139 node.last_synced_at = datetime.now(timezone.utc)
140 db.session.commit()
141 return added, False
144def sync_aircraft(ac: Aircraft) -> tuple[int, int]:
145 """
146 Sync all EASA source nodes for the given aircraft.
147 Returns (total_new_docs, total_error_nodes).
148 """
149 total_added = 0
150 total_errors = 0
151 first = True
152 for comp in ac.components: # type: ignore[attr-defined]
153 for node in comp.easa_source_nodes:
154 if not first:
155 time.sleep(_COURTESY_DELAY)
156 first = False
157 added, had_error = _process_node(node)
158 total_added += added
159 if had_error:
160 total_errors += 1
161 return total_added, total_errors
164def sync_all_nodes(app: object) -> None:
165 """
166 Sync every EASASourceNode in the database. Called by the background
167 scheduler (once per 24 h). Logs a warning if a node has not synced
168 successfully in 72 h.
169 """
170 import flask # pyright: ignore[reportMissingImports]
172 assert isinstance(app, flask.Flask)
174 with app.app_context():
175 nodes = EASASourceNode.query.all()
176 _log.info("EASA sync: starting sync for %d node(s)", len(nodes))
177 total_added = 0
178 total_errors = 0
179 skipped = 0
180 now = datetime.now(timezone.utc)
181 first_processed = True
182 for node in nodes:
183 # Exponential backoff: after 2+ consecutive failures, wait before retrying.
184 # backoff = min(2^errors, 7) days from last successful sync.
185 errors = node.consecutive_errors or 0
186 if errors >= 2 and node.last_synced_at is not None:
187 backoff_days = min(2**errors, 7)
188 last = (
189 node.last_synced_at.replace(tzinfo=timezone.utc)
190 if node.last_synced_at.tzinfo is None
191 else node.last_synced_at
192 )
193 if (now - last).days < backoff_days:
194 _log.info(
195 "EASA sync: skipping node %s (%s) — %d error(s), backoff %d day(s)",
196 node.id,
197 node.display_path,
198 errors,
199 backoff_days,
200 )
201 skipped += 1
202 continue
204 if not first_processed:
205 time.sleep(_COURTESY_DELAY)
206 first_processed = False
208 added, had_error = _process_node(node)
209 total_added += added
210 if had_error:
211 total_errors += 1
212 else:
213 _log.debug(
214 "EASA sync: node %s (%s) — %d new doc(s)",
215 node.id,
216 node.display_path,
217 added,
218 )
220 _log.info(
221 "EASA sync complete: %d new document(s), %d error(s), %d skipped (backoff)",
222 total_added,
223 total_errors,
224 skipped,
225 )
227 # Warn for nodes overdue (72 h without a successful sync)
228 from datetime import timedelta
230 cutoff = datetime.now(timezone.utc) - timedelta(hours=72)
231 overdue = [
232 n
233 for n in nodes
234 if n.last_synced_at is None
235 or (
236 n.last_synced_at.replace(tzinfo=timezone.utc)
237 if n.last_synced_at.tzinfo is None
238 else n.last_synced_at
239 )
240 < cutoff
241 ]
242 for node in overdue:
243 _log.warning(
244 "[AIRWORTHINESS] EASA sync overdue for node %s (%s) — last success: %s",
245 node.id,
246 node.display_path,
247 node.last_synced_at,
248 )