Coverage for app/airworthiness_sync.py: 100%

104 statements  

« prev     ^ index     » next       coverage.py v7.14.3, created at 2026-06-28 23:33 +0000

1""" 

2EASA Safety Publications Tool sync job. 

3 

4Queries the EASA AD search endpoint for each EASASourceNode, diffs against 

5stored AirworthinessDocument records, and creates pending_review statuses for 

6newly discovered documents on all aircraft that have the relevant component. 

7 

8Public API 

9---------- 

10sync_all_nodes(app) — called by the background scheduler; syncs every node. 

11sync_aircraft(ac) — called from the manual-trigger route; syncs only the 

12 nodes that belong to this aircraft's components. 

13""" 

14 

15import logging 

16import re 

17import time 

18import urllib.error 

19import urllib.parse 

20import urllib.request 

21from datetime import datetime, timezone 

22 

23from models import ( # pyright: ignore[reportMissingImports] 

24 Aircraft, 

25 AirworthinessDocument, 

26 AirworthinessDocStatus, 

27 AirworthinessDocType, 

28 AirworthinessDocumentStatus, 

29 EASASourceNode, 

30 db, 

31) 

32 

33_log = logging.getLogger(__name__) 

34 

35_EASA_SEARCH_URL = "https://ad.easa.europa.eu/search/advanced/result/" 

36_REQUEST_TIMEOUT = 15 

37_COURTESY_DELAY = 2.0 # seconds between requests 

38_USER_AGENT = "OpenHangar/airworthiness-sync (+https://github.com/e2jk/OpenHangar)" 

39 

40# Matches "AD 2023-0048", "AD 2006-0345R", etc. 

41_AD_RE = re.compile(r"\bAD\s+\d{4}-\d+[A-Z]*\b") 

42# Matches "SIB 2024-01" etc. 

43_SIB_RE = re.compile(r"\bSIB\s+\d{4}-\d+[A-Z]*\b") 

44 

45 

46def _build_tree_path(node: EASASourceNode) -> str: 

47 return ( 

48 f"{node.tc_holder_node_id}@@@@0@@{node.tc_holder_name}" 

49 f"|||{node.type_node_id}@@{node.tc_holder_node_id}@@1@@{node.type_name}" 

50 f"|||{node.model_node_id}@@{node.type_node_id}@@2@@{node.model_name}" 

51 ) 

52 

53 

54def _fetch_references(node: EASASourceNode) -> list[tuple[str, str]]: 

55 """ 

56 POST to the EASA search endpoint and return a list of (reference, doc_type) 

57 tuples for all documents found. Raises requests.RequestException on failure. 

58 """ 

59 payload = { 

60 "fi_action": "advanced", 

61 "fi_tree": _build_tree_path(node), 

62 "fi_keyword": "", 

63 "fi_date_start": "", 

64 "fi_date_end": "", 

65 "ps_src_tree": "", 

66 "fi_notification": "N", 

67 "is_default": "N", 

68 "fi_basket[]": node.model_node_id, 

69 } 

70 data = urllib.parse.urlencode(payload).encode() 

71 req = urllib.request.Request( 

72 _EASA_SEARCH_URL, 

73 data=data, 

74 headers={"User-Agent": _USER_AGENT}, 

75 ) 

76 with urllib.request.urlopen(req, timeout=_REQUEST_TIMEOUT) as resp: 

77 html = resp.read().decode() 

78 

79 refs: list[tuple[str, str]] = [] 

80 for m in _AD_RE.finditer(html): 

81 refs.append((m.group().strip(), AirworthinessDocType.AD)) 

82 for m in _SIB_RE.finditer(html): 

83 refs.append((m.group().strip(), AirworthinessDocType.SIB)) 

84 return refs 

85 

86 

87def _easa_doc_url(reference: str) -> str: 

88 slug = reference.replace(" ", "_").replace("/", "-") 

89 return f"https://ad.easa.europa.eu/ad/{slug}" 

90 

91 

92def _process_node(node: EASASourceNode) -> tuple[int, bool]: 

93 """ 

94 Sync one node. Returns (new_docs_added, had_error). 

95 Creates AirworthinessDocumentStatus records (pending_review) for each 

96 aircraft that has a component referencing this node. 

97 """ 

98 try: 

99 refs = _fetch_references(node) 

100 except Exception as exc: 

101 _log.warning( 

102 "EASA sync error for node %s (%s): %s", node.id, node.display_path, exc 

103 ) 

104 node.consecutive_errors = (node.consecutive_errors or 0) + 1 

105 db.session.commit() 

106 return 0, True 

107 

108 # Existing references for this node 

109 existing = { 

110 d.reference 

111 for d in AirworthinessDocument.query.filter_by(source_node_id=node.id).all() 

112 } 

113 

114 aircraft_id = node.component.aircraft_id 

115 

116 added = 0 

117 for reference, doc_type in refs: 

118 if reference in existing: 

119 continue 

120 doc = AirworthinessDocument( 

121 doc_type=doc_type, 

122 reference=reference, 

123 source_node_id=node.id, 

124 doc_url=_easa_doc_url(reference), 

125 ) 

126 db.session.add(doc) 

127 db.session.flush() 

128 

129 # Create pending_review status for the aircraft 

130 st = AirworthinessDocumentStatus( 

131 aircraft_id=aircraft_id, 

132 document_id=doc.id, 

133 status=AirworthinessDocStatus.PENDING_REVIEW, 

134 ) 

135 db.session.add(st) 

136 added += 1 

137 

138 node.consecutive_errors = 0 

139 node.last_synced_at = datetime.now(timezone.utc) 

140 db.session.commit() 

141 return added, False 

142 

143 

144def sync_aircraft(ac: Aircraft) -> tuple[int, int]: 

145 """ 

146 Sync all EASA source nodes for the given aircraft. 

147 Returns (total_new_docs, total_error_nodes). 

148 """ 

149 total_added = 0 

150 total_errors = 0 

151 first = True 

152 for comp in ac.components: # type: ignore[attr-defined] 

153 for node in comp.easa_source_nodes: 

154 if not first: 

155 time.sleep(_COURTESY_DELAY) 

156 first = False 

157 added, had_error = _process_node(node) 

158 total_added += added 

159 if had_error: 

160 total_errors += 1 

161 return total_added, total_errors 

162 

163 

164def sync_all_nodes(app: object) -> None: 

165 """ 

166 Sync every EASASourceNode in the database. Called by the background 

167 scheduler (once per 24 h). Logs a warning if a node has not synced 

168 successfully in 72 h. 

169 """ 

170 import flask # pyright: ignore[reportMissingImports] 

171 

172 assert isinstance(app, flask.Flask) 

173 

174 with app.app_context(): 

175 nodes = EASASourceNode.query.all() 

176 _log.info("EASA sync: starting sync for %d node(s)", len(nodes)) 

177 total_added = 0 

178 total_errors = 0 

179 skipped = 0 

180 now = datetime.now(timezone.utc) 

181 first_processed = True 

182 for node in nodes: 

183 # Exponential backoff: after 2+ consecutive failures, wait before retrying. 

184 # backoff = min(2^errors, 7) days from last successful sync. 

185 errors = node.consecutive_errors or 0 

186 if errors >= 2 and node.last_synced_at is not None: 

187 backoff_days = min(2**errors, 7) 

188 last = ( 

189 node.last_synced_at.replace(tzinfo=timezone.utc) 

190 if node.last_synced_at.tzinfo is None 

191 else node.last_synced_at 

192 ) 

193 if (now - last).days < backoff_days: 

194 _log.info( 

195 "EASA sync: skipping node %s (%s) — %d error(s), backoff %d day(s)", 

196 node.id, 

197 node.display_path, 

198 errors, 

199 backoff_days, 

200 ) 

201 skipped += 1 

202 continue 

203 

204 if not first_processed: 

205 time.sleep(_COURTESY_DELAY) 

206 first_processed = False 

207 

208 added, had_error = _process_node(node) 

209 total_added += added 

210 if had_error: 

211 total_errors += 1 

212 else: 

213 _log.debug( 

214 "EASA sync: node %s (%s) — %d new doc(s)", 

215 node.id, 

216 node.display_path, 

217 added, 

218 ) 

219 

220 _log.info( 

221 "EASA sync complete: %d new document(s), %d error(s), %d skipped (backoff)", 

222 total_added, 

223 total_errors, 

224 skipped, 

225 ) 

226 

227 # Warn for nodes overdue (72 h without a successful sync) 

228 from datetime import timedelta 

229 

230 cutoff = datetime.now(timezone.utc) - timedelta(hours=72) 

231 overdue = [ 

232 n 

233 for n in nodes 

234 if n.last_synced_at is None 

235 or ( 

236 n.last_synced_at.replace(tzinfo=timezone.utc) 

237 if n.last_synced_at.tzinfo is None 

238 else n.last_synced_at 

239 ) 

240 < cutoff 

241 ] 

242 for node in overdue: 

243 _log.warning( 

244 "[AIRWORTHINESS] EASA sync overdue for node %s (%s) — last success: %s", 

245 node.id, 

246 node.display_path, 

247 node.last_synced_at, 

248 )