Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/bin/env python 

2# -*- coding: utf-8 -*- 

3 

4# This file is part of Vallenato.fr. 

5# 

6# Vallenato.fr is free software: you can redistribute it and/or modify 

7# it under the terms of the GNU Affero General Public License as published by 

8# the Free Software Foundation, either version 3 of the License, or 

9# (at your option) any later version. 

10# 

11# Vallenato.fr is distributed in the hope that it will be useful, 

12# but WITHOUT ANY WARRANTY; without even the implied warranty of 

13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

14# GNU Affero General Public License for more details. 

15# 

16# You should have received a copy of the GNU Affero General Public License 

17# along with Vallenato.fr. If not, see <http://www.gnu.org/licenses/>. 

18 

19import logging 

20import os 

21import sys 

22import json 

23import shutil 

24from slugify import slugify 

25import sitemap.generator as generator 

26import re 

27import datetime 

28 

29from youtube import HttpError 

30from youtube import yt_get_authenticated_service 

31from youtube import yt_get_my_uploads_list 

32from youtube import yt_list_my_uploaded_videos 

33 

34# File that can contain the data downloaded from YouTube 

35UPLOADED_VIDEOS_DUMP_FILE = "data/uploaded_videos_dump.json" 

36# File containing the list of videos that have hardcoded locations 

37LOCATION_SPECIAL_CASES_FILE = "data/location_special_cases.json" 

38# File containing the already-identified latitude/longitude 

39GEOLOCATIONS_FILE = "data/geolocations.json" 

40# Output file used for the website 

41WEBSITE_DATA_FILE = "../website/src/data.js" 

42# Sitemap file 

43SITEMAP_FILE = "../website/prod/sitemap.xml" 

44# Version of the external libraries 

45LEAFLET_VERSION = "1.5.1" 

46BOOTSTRAP_VERSION = "4.3.1" 

47JQUERY_VERSION = "3.3.1" 

48BOOTSTRAP_TOGGLE_VERSION = "3.6.1" 

49 

50 

51def get_dumped_uploaded_videos(dump_file): 

52 uploaded_videos = [] 

53 # Used a previously dumped file if it exists, to bypass the network transactions 

54 if os.path.exists(dump_file): 

55 with open(dump_file) as in_file: 

56 uploaded_videos = json.load(in_file) 

57 return uploaded_videos 

58 

59def save_uploaded_videos(uploaded_videos, dump_file): 

60 with open(dump_file, 'w') as out_file: 

61 json.dump(uploaded_videos, out_file, sort_keys=True, indent=2) 

62 

63def determine_videos_slug(uploaded_videos): 

64 logging.debug("Determining each video's slug...") 

65 for vid in uploaded_videos: 

66 vid["slug"] = slugify(vid["title"]).replace("-desde-", "-") 

67 return uploaded_videos 

68 

69def get_uploaded_videos(args, dump_file): 

70 uploaded_videos = get_dumped_uploaded_videos(dump_file) 

71 if not uploaded_videos: 

72 youtube = yt_get_authenticated_service(args) 

73 # Get the list of videos uploaded to YouTube 

74 try: 

75 uploads_playlist_id = yt_get_my_uploads_list(youtube) 

76 if uploads_playlist_id: 

77 uploaded_videos = yt_list_my_uploaded_videos(uploads_playlist_id, youtube) 

78 logging.debug("Uploaded videos: %s" % uploaded_videos) 

79 else: 

80 logging.info('There is no uploaded videos playlist for this user.') 

81 except HttpError as e: 

82 logging.debug('An HTTP error %d occurred:\n%s' % (e.resp.status, e.content)) 

83 logging.critical("Exiting...") 

84 sys.exit(19) 

85 # Create a slug for each video (to be used for the website URLs) 

86 uploaded_videos = determine_videos_slug(uploaded_videos) 

87 if args.dump_uploaded_videos: 

88 save_uploaded_videos(uploaded_videos, dump_file) 

89 return uploaded_videos 

90 

91def identify_locations_names(uploaded_videos, location_special_cases_file, dump_file): 

92 with open(location_special_cases_file) as in_file: 

93 special_cases = json.load(in_file) 

94 locations = {} 

95 incomplete_locations = False 

96 for vid in uploaded_videos: 

97 vid["location"] = identify_single_location_name(vid, special_cases) 

98 if not vid["location"]: 

99 incomplete_locations = True 

100 elif vid["location"] not in locations: 

101 locations[vid["location"]] = {"latitude": None, "longitude": None} 

102 if incomplete_locations: 

103 # The script is going to exit, to prevent unnecessary downloading from 

104 # YouTube again, save the downloaded information regardless of the 

105 # --dump_uploaded_videos parameter 

106 save_uploaded_videos(uploaded_videos, dump_file) 

107 logging.warning("Dumping the list of uploaded videos from YouTube to the '%s' file, so as not to have to download it again after you have edited the '%s' file." % (dump_file, location_special_cases_file)) 

108 logging.critical("Please add the new/missing location to the file '%s'. Exiting..." % location_special_cases_file) 

109 sys.exit(20) 

110 logging.info("Found %d different location name." % len(locations)) 

111 return (uploaded_videos, locations) 

112 

113def identify_single_location_name(vid, special_cases): 

114 location = None 

115 if vid["id"] in special_cases: 

116 location = special_cases[vid["id"]] 

117 logging.debug("Video %s, location '%s'" % (vid["id"], location)) 

118 else: 

119 for search_string in (", desde ", ", cerca de "): 

120 loc_index = vid["title"].find(search_string) 

121 if loc_index > 0: 

122 location = vid["title"][loc_index + len(search_string):] 

123 logging.debug("Video %s, location '%s'" % (vid["id"], location)) 

124 break 

125 

126 # Each video should now have a location identified. If not, this will end the script. 

127 if not location: 

128 logging.critical("No Location found for %s, '%s'" % (vid["id"], vid["title"])) 

129 return location 

130 

131def determine_geolocation(locations, geolocations_file): 

132 logging.debug("Searching geolocation for %d locations..." % len(locations)) 

133 # Load the list of saved geolocations 

134 with open(geolocations_file) as in_file: 

135 geolocations = json.load(in_file) 

136 incomplete_geolocations = 0 

137 for l in locations: 

138 if l in geolocations and geolocations[l]["latitude"] and geolocations[l]["longitude"]: 

139 logging.debug("Geolocation found for %s: lat %f, lon %f" % (l, geolocations[l]["latitude"], geolocations[l]["longitude"])) 

140 locations[l]["latitude"] = geolocations[l]["latitude"] 

141 locations[l]["longitude"] = geolocations[l]["longitude"] 

142 else: 

143 logging.critical("No geolocation found for %s." % l) 

144 #TODO: Search and suggest a geolocation 

145 geolocations[l] = {"latitude": None, "longitude": None} 

146 incomplete_geolocations += 1 

147 

148 if incomplete_geolocations > 0: 

149 #Save the geolocations_file file with the placeholders for the unknown latitude and longitude 

150 with open(geolocations_file, 'w') as out_file: 

151 json.dump(geolocations, out_file, sort_keys=True, indent=2) 

152 logging.critical("Please add the %d new/missing unknown latitude and longitude to the file '%s'. Exiting..." % (incomplete_geolocations, geolocations_file)) 

153 sys.exit(21) 

154 

155 logging.info("Found geolocation information for the %d locations." % len(locations)) 

156 return locations 

157 

158def add_videos_to_locations_array(uploaded_videos, locations): 

159 logging.debug("Adding videos in each location array...") 

160 for vid in uploaded_videos: 

161 if not "videos" in locations[vid["location"]]: 

162 locations[vid["location"]]["videos"] = [] 

163 locations[vid["location"]]["videos"].append(vid) 

164 return locations 

165 

166def determine_locations_slug(locations): 

167 logging.debug("Determining each location's slug...") 

168 for loc in locations: 

169 locations[loc]["slug"] = slugify(loc) 

170 return locations 

171 

172def save_website_data(locations, website_data_file): 

173 json_content = json.dumps(locations, sort_keys=True, indent=2) 

174 # Make it JS (and not just JSON) for direct use in the HTML document 

175 js_content = "var locations = %s;" % json_content 

176 with open(website_data_file, 'w') as out_file: 

177 out_file.write(js_content) 

178 

179def ignored_files_in_prod(adir, filenames): 

180 ignored_files = [] 

181 if "../website/src" == adir: 

182 ignored_files = [ 

183 'bootstrap-%s-dist' % BOOTSTRAP_VERSION, 

184 'bootstrap4-toggle-%s' % BOOTSTRAP_TOGGLE_VERSION, 

185 'jquery-%s.slim.min.js' % JQUERY_VERSION, 

186 'leaflet' 

187 ] 

188 if "../website/src/aprender" == adir: 

189 ignored_files = [ 

190 'temp', 

191 'videos' 

192 ] 

193 return [filename for filename in filenames if filename in ignored_files] 

194 

195def get_stats(locations, uploaded_videos): 

196 num_videos = len(uploaded_videos) 

197 

198 songs = [] 

199 skipped_titles = ["Vallenato at Epic", "La Guaneña navideña"] 

200 for v in uploaded_videos: 

201 song = v["title"].split(",")[0] 

202 if song not in songs and song not in skipped_titles: 

203 songs.append(song) 

204 num_songs = len(songs) 

205 

206 num_places = len(locations) 

207 

208 countries = [] 

209 for l in locations: 

210 country = l.split(",")[-1] 

211 if country not in countries: 

212 countries.append(country) 

213 num_countries = len(countries) 

214 

215 navidad_2017 = datetime.date(2017, 12, 25) 

216 today = datetime.date.today() 

217 years = today.year - navidad_2017.year 

218 if today.month == 12: # December 

219 duration_since_navidad_2017 = "%d años" % years 

220 elif today.month == 1: # January 

221 duration_since_navidad_2017 = "%d años" % (years - 1) 

222 else: 

223 duration_since_navidad_2017 = "%d años y %d meses" % (years - 1, today.month) 

224 

225 stats = "El Vallenatero Francés les presenta %d videos de %d canciones tocadas en %d lugares de %d paises. El empezo a aprender el Acordeón Vallenato en la Navidad 2017 (hace mas o menos %s)." % \ 

226 (num_videos, num_songs, num_places, num_countries, duration_since_navidad_2017) 

227 

228 return stats 

229 

230def generate_website(locations, uploaded_videos): 

231 input_src_folder = "../website/src" 

232 output_prod_folder = "../website/prod" 

233 

234 # Delete the previous production output folder (if existing) 

235 if os.path.exists(output_prod_folder): 

236 shutil.rmtree(output_prod_folder) 

237 

238 # Update statistics 

239 stats = get_stats(locations, uploaded_videos) 

240 index_src_file = "%s/index.html" % input_src_folder 

241 with open(index_src_file, 'r') as file : 

242 index_data = file.read() 

243 index_data = re.sub('<div id="stats">.*</div>', '<div id="stats">%s</div>' % stats, index_data) 

244 with open(index_src_file, 'w') as file: 

245 file.write(index_data) 

246 

247 # Update the values accordingly for prod 

248 # Main difference between development (src) and production websites: 

249 # - src contains a full copy of the leaflet, Bootstrap and jQuery libraries 

250 # - prod uses CDNs 

251 

252 # Copy src to prod folder, ignoring the files and folder replaced by CDNs in prod 

253 # The videos are also not copied, as we're going to hard-link them 

254 shutil.copytree(input_src_folder, output_prod_folder, ignore=ignored_files_in_prod) 

255 

256 # Create hard links for the videos in the prod folder 

257 # (hard links can only be created for files, need to recreate the folder structure) 

258 os.mkdir("%s/aprender/videos" % output_prod_folder) 

259 for d in os.listdir("%s/aprender/videos" % input_src_folder): 

260 if d not in ["TODO", "blabla-bla"]: 

261 # Create a folder for that tutorial's video files 

262 #TODO: copy folder without content in order to keep the original folder's 

263 # creation date, in order to not confuse the rsync upload process 

264 os.mkdir("%s/aprender/videos/%s" % (output_prod_folder, d)) 

265 for f in os.listdir("%s/aprender/videos/%s" % (input_src_folder, d)): 

266 # Create a hard link to the video file 

267 os.link("%s/aprender/videos/%s/%s" % (input_src_folder, d, f), 

268 "%s/aprender/videos/%s/%s" % (output_prod_folder, d, f)) 

269 

270 # Update links to leaflet (CDN) 

271 # Read the prod files 

272 with open("%s/index.html" % output_prod_folder, 'r') as file : 

273 index_data = file.read() 

274 with open("%s/aprender/index.html" % output_prod_folder, 'r') as file : 

275 index_aprender_data = file.read() 

276 # Replace the target strings 

277 # Leaflet 

278 index_data = index_data.replace( 

279 '<link rel="stylesheet" href="leaflet/%s/leaflet.css">' % LEAFLET_VERSION, 

280 '<link rel="stylesheet" href="https://unpkg.com/leaflet@%s/dist/leaflet.css"\n integrity="sha512-xwE/Az9zrjBIphAcBb3F6JVqxf46+CDLwfLMHloNu6KEQCAWi6HcDUbeOfBIptF7tcCzusKFjFw2yuvEpDL9wQ=="\n crossorigin=""/>' % LEAFLET_VERSION) 

281 index_data = index_data.replace( 

282 '<script type = "text/javascript" src="leaflet/%s/leaflet.js"></script>' % LEAFLET_VERSION, 

283 '<script src="https://unpkg.com/leaflet@%s/dist/leaflet.js"\n integrity="sha512-GffPMF3RvMeYyc1LWMHtK8EbPv0iNZ8/oTtHPx9/cc2ILxQ+u905qIwdpULaqDkyBKgOaB57QTMg7ztg8Jm2Og=="\n crossorigin="">\n </script>' % LEAFLET_VERSION) 

284 # Bootstrap 

285 index_data = index_data.replace( 

286 '<link rel="stylesheet" href="bootstrap-%s-dist/css/bootstrap.min.css">' % BOOTSTRAP_VERSION, 

287 '<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/%s/css/bootstrap.min.css"\n integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T"\n crossorigin="anonymous">' % BOOTSTRAP_VERSION) 

288 index_data = index_data.replace( 

289 '<script src="bootstrap-%s-dist/js/bootstrap.min.js"></script>' % BOOTSTRAP_VERSION, 

290 '<script src="https://stackpath.bootstrapcdn.com/bootstrap/%s/js/bootstrap.min.js"\n integrity="sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM"\n crossorigin="anonymous"></script>' % BOOTSTRAP_VERSION) 

291 index_aprender_data = index_aprender_data.replace( 

292 '<link rel="stylesheet" href="../bootstrap-%s-dist/css/bootstrap.min.css">' % BOOTSTRAP_VERSION, 

293 '<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/%s/css/bootstrap.min.css"\n integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T"\n crossorigin="anonymous">' % BOOTSTRAP_VERSION) 

294 index_aprender_data = index_aprender_data.replace( 

295 '<script src="../bootstrap-%s-dist/js/bootstrap.min.js"></script>' % BOOTSTRAP_VERSION, 

296 '<script src="https://stackpath.bootstrapcdn.com/bootstrap/%s/js/bootstrap.min.js"\n integrity="sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM"\n crossorigin="anonymous"></script>' % BOOTSTRAP_VERSION) 

297 # jQuery (for Bootstrap) 

298 index_data = index_data.replace( 

299 '<script src="jquery-%s.slim.min.js"></script>' % JQUERY_VERSION, 

300 '<script src="https://code.jquery.com/jquery-%s.slim.min.js"\n integrity="sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo"\n crossorigin="anonymous"></script>' % JQUERY_VERSION) 

301 index_aprender_data = index_aprender_data.replace( 

302 '<script src="../jquery-%s.slim.min.js"></script>' % JQUERY_VERSION, 

303 '<script src="https://code.jquery.com/jquery-%s.slim.min.js"\n integrity="sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo"\n crossorigin="anonymous"></script>' % JQUERY_VERSION) 

304 # Bootstrap-toggle 

305 index_aprender_data = index_aprender_data.replace( 

306 '<link rel="stylesheet" href="../bootstrap4-toggle-%s/css/bootstrap4-toggle.min.css">' % BOOTSTRAP_TOGGLE_VERSION, 

307 '<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/gitbrent/bootstrap4-toggle@%s/css/bootstrap4-toggle.min.css"\n integrity="sha384-yakM86Cz9KJ6CeFVbopALOEQGGvyBFdmA4oHMiYuHcd9L59pLkCEFSlr6M9m434E"\n crossorigin="anonymous">' % BOOTSTRAP_TOGGLE_VERSION) 

308 index_aprender_data = index_aprender_data.replace( 

309 '<script src="../bootstrap4-toggle-%s/js/bootstrap4-toggle.min.js"></script>' % BOOTSTRAP_TOGGLE_VERSION, 

310 '<script src="https://cdn.jsdelivr.net/gh/gitbrent/bootstrap4-toggle@%s/js/bootstrap4-toggle.min.js"\n integrity="sha384-Q9RsZ4GMzjlu4FFkJw4No9Hvvm958HqHmXI9nqo5Np2dA/uOVBvKVxAvlBQrDhk4"\n crossorigin="anonymous"></script>' % BOOTSTRAP_TOGGLE_VERSION) 

311 

312 # Save edited prod files 

313 with open("%s/index.html" % output_prod_folder, 'w') as file: 

314 file.write(index_data) 

315 with open("%s/aprender/index.html" % output_prod_folder, 'w') as file: 

316 file.write(index_aprender_data) 

317 

318 # Create full HTML pages for Prod /aprender tutorials 

319 with open("../website/src/aprender/tutoriales.js") as in_file: 

320 # Remove the JS bits to keep only the JSON content 

321 tutoriales_json_content = (in_file.read()[17:-2]) 

322 tutoriales = json.loads(tutoriales_json_content) 

323 for t in tutoriales: 

324 output_prod_tutorial_file = "%s/aprender/%s.html" % \ 

325 (output_prod_folder, t["slug"]) 

326 shutil.copy("%s/aprender/index.html" % output_prod_folder, 

327 output_prod_tutorial_file) 

328 with open(output_prod_tutorial_file, 'r') as file : 

329 prod_tutorial_file_data = file.read() 

330 if t["author"]: 

331 tuto_title = "%s - %s" % (t["title"], t["author"]) 

332 else: 

333 tuto_title = t["title"] 

334 prod_tutorial_file_data = prod_tutorial_file_data.replace( 

335 "<title>Aprender a tocar el Acordeón Vallenato - El Vallenatero Francés</title>", 

336 "<title>%s - Aprender a tocar el Acordeón Vallenato</title>" % tuto_title 

337 ) 

338 prod_tutorial_file_data = prod_tutorial_file_data.replace( 

339 '<h1 id="tutorialFullTitle">TITLE</h1>', 

340 '<h1 id="tutorialFullTitle">%s</h1>' % tuto_title 

341 ) 

342 with open(output_prod_tutorial_file, 'w') as file: 

343 file.write(prod_tutorial_file_data) 

344 

345def generate_sitemap(sitemap_file, locations, uploaded_videos): 

346 base_url = "https://vallenato.fr" 

347 sitemap = generator.Sitemap() 

348 

349 # vallenato.fr index 

350 sitemap.add(base_url, 

351 # Timestamp of the most recently uploaded video 

352 lastmod=uploaded_videos[0]["publishedAt"][:10], 

353 changefreq="monthly", 

354 priority="1.0") 

355 

356 # Locations and individual videos 

357 sitemap.add("%s/mundo-entero" % base_url, 

358 # Timestamp of the most recently uploaded video 

359 lastmod=uploaded_videos[0]["publishedAt"][:10], 

360 changefreq="monthly", 

361 priority="0.6") 

362 for l in locations: 

363 # Locations 

364 sitemap.add("%s/%s" % (base_url, locations[l]["slug"]), 

365 # Timestamp of the most recently uploaded video at that location 

366 lastmod=locations[l]["videos"][0]["publishedAt"][:10], 

367 changefreq="yearly", 

368 priority="0.5") 

369 for v in locations[l]["videos"]: 

370 # Individual videos 

371 sitemap.add("%s/%s/%s" % (base_url, v["slug"], v["id"]), 

372 # Timestamp of that video 

373 lastmod=v["publishedAt"][:10], 

374 changefreq="yearly", 

375 priority="0.5") 

376 

377 # Aprender index 

378 sitemap.add("%s/aprender/" % base_url, 

379 changefreq="monthly", 

380 priority="0.9") 

381 

382 # Aprender: individual tutorials 

383 with open("../website/src/aprender/tutoriales.js") as in_file: 

384 # Remove the JS bits to keep only the JSON content 

385 tutoriales_json_content = (in_file.read()[17:-2]) 

386 tutoriales = json.loads(tutoriales_json_content) 

387 for t in tutoriales: 

388 tuto_url = "%s/aprender/%s" % (base_url, t["slug"]) 

389 sitemap.add(tuto_url, 

390 changefreq="yearly", 

391 priority="0.5") 

392 

393 sitemap_xml = sitemap.generate() 

394 

395 # Prettify the XML "by hand" 

396 sitemap_xml = sitemap_xml.replace("<url>", " <url>") 

397 sitemap_xml = sitemap_xml.replace("</url>", " </url>") 

398 sitemap_xml = sitemap_xml.replace("<loc>", " <loc>") 

399 sitemap_xml = sitemap_xml.replace("<lastmod>", " <lastmod>") 

400 sitemap_xml = sitemap_xml.replace("<changefreq>", " <changefreq>") 

401 sitemap_xml = sitemap_xml.replace("<priority>", " <priority>") 

402 

403 with open(sitemap_file, 'w') as file: 

404 file.write(sitemap_xml) 

405 

406def website(args): 

407 # Retrieve the list of uploaded videos 

408 uploaded_videos = get_uploaded_videos(args, UPLOADED_VIDEOS_DUMP_FILE) 

409 logging.info("There are %d uploaded videos." % len(uploaded_videos)) 

410 

411 # Identify each video's location 

412 (uploaded_videos, locations) = identify_locations_names(uploaded_videos, LOCATION_SPECIAL_CASES_FILE, UPLOADED_VIDEOS_DUMP_FILE) 

413 

414 # Determine the geolocation of each location 

415 locations = determine_geolocation(locations, GEOLOCATIONS_FILE) 

416 

417 # Create a slug for each location (to be used for the website URLs) 

418 locations = determine_locations_slug(locations) 

419 

420 # Add the videos in each location array 

421 locations = add_videos_to_locations_array(uploaded_videos, locations) 

422 

423 # Generate the JavaScript data file to be used by the website 

424 save_website_data(locations, WEBSITE_DATA_FILE) 

425 

426 # Generate the development and production website files 

427 generate_website(locations, uploaded_videos) 

428 

429 # Generate the Sitemap 

430 generate_sitemap(SITEMAP_FILE, locations, uploaded_videos)