Coverage for website.py : 99%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
4# This file is part of Vallenato.fr.
5#
6# Vallenato.fr is free software: you can redistribute it and/or modify
7# it under the terms of the GNU Affero General Public License as published by
8# the Free Software Foundation, either version 3 of the License, or
9# (at your option) any later version.
10#
11# Vallenato.fr is distributed in the hope that it will be useful,
12# but WITHOUT ANY WARRANTY; without even the implied warranty of
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14# GNU Affero General Public License for more details.
15#
16# You should have received a copy of the GNU Affero General Public License
17# along with Vallenato.fr. If not, see <http://www.gnu.org/licenses/>.
19import logging
20import os
21import sys
22import json
23import shutil
24from slugify import slugify
25import sitemap.generator as generator
26import re
27import datetime
29from youtube import HttpError
30from youtube import yt_get_authenticated_service
31from youtube import yt_get_my_uploads_list
32from youtube import yt_list_my_uploaded_videos
34# File that can contain the data downloaded from YouTube
35UPLOADED_VIDEOS_DUMP_FILE = "data/uploaded_videos_dump.json"
36# File containing the list of videos that have hardcoded locations
37LOCATION_SPECIAL_CASES_FILE = "data/location_special_cases.json"
38# File containing the already-identified latitude/longitude
39GEOLOCATIONS_FILE = "data/geolocations.json"
40# Output file used for the website
41WEBSITE_DATA_FILE = "../website/src/data.js"
42# Sitemap file
43SITEMAP_FILE = "../website/prod/sitemap.xml"
44# Version of the external libraries
45LEAFLET_VERSION = "1.5.1"
46BOOTSTRAP_VERSION = "4.3.1"
47JQUERY_VERSION = "3.3.1"
48BOOTSTRAP_TOGGLE_VERSION = "3.6.1"
51def get_dumped_uploaded_videos(dump_file):
52 uploaded_videos = []
53 # Used a previously dumped file if it exists, to bypass the network transactions
54 if os.path.exists(dump_file):
55 with open(dump_file) as in_file:
56 uploaded_videos = json.load(in_file)
57 return uploaded_videos
59def save_uploaded_videos(uploaded_videos, dump_file):
60 with open(dump_file, 'w') as out_file:
61 json.dump(uploaded_videos, out_file, sort_keys=True, indent=2)
63def determine_videos_slug(uploaded_videos):
64 logging.debug("Determining each video's slug...")
65 for vid in uploaded_videos:
66 vid["slug"] = slugify(vid["title"]).replace("-desde-", "-")
67 return uploaded_videos
69def get_uploaded_videos(args, dump_file):
70 uploaded_videos = get_dumped_uploaded_videos(dump_file)
71 if not uploaded_videos:
72 youtube = yt_get_authenticated_service(args)
73 # Get the list of videos uploaded to YouTube
74 try:
75 uploads_playlist_id = yt_get_my_uploads_list(youtube)
76 if uploads_playlist_id:
77 uploaded_videos = yt_list_my_uploaded_videos(uploads_playlist_id, youtube)
78 logging.debug("Uploaded videos: %s" % uploaded_videos)
79 else:
80 logging.info('There is no uploaded videos playlist for this user.')
81 except HttpError as e:
82 logging.debug('An HTTP error %d occurred:\n%s' % (e.resp.status, e.content))
83 logging.critical("Exiting...")
84 sys.exit(19)
85 # Create a slug for each video (to be used for the website URLs)
86 uploaded_videos = determine_videos_slug(uploaded_videos)
87 if args.dump_uploaded_videos:
88 save_uploaded_videos(uploaded_videos, dump_file)
89 return uploaded_videos
91def identify_locations_names(uploaded_videos, location_special_cases_file, dump_file):
92 with open(location_special_cases_file) as in_file:
93 special_cases = json.load(in_file)
94 locations = {}
95 incomplete_locations = False
96 for vid in uploaded_videos:
97 vid["location"] = identify_single_location_name(vid, special_cases)
98 if not vid["location"]:
99 incomplete_locations = True
100 elif vid["location"] not in locations:
101 locations[vid["location"]] = {"latitude": None, "longitude": None}
102 if incomplete_locations:
103 # The script is going to exit, to prevent unnecessary downloading from
104 # YouTube again, save the downloaded information regardless of the
105 # --dump_uploaded_videos parameter
106 save_uploaded_videos(uploaded_videos, dump_file)
107 logging.warning("Dumping the list of uploaded videos from YouTube to the '%s' file, so as not to have to download it again after you have edited the '%s' file." % (dump_file, location_special_cases_file))
108 logging.critical("Please add the new/missing location to the file '%s'. Exiting..." % location_special_cases_file)
109 sys.exit(20)
110 logging.info("Found %d different location name." % len(locations))
111 return (uploaded_videos, locations)
113def identify_single_location_name(vid, special_cases):
114 location = None
115 if vid["id"] in special_cases:
116 location = special_cases[vid["id"]]
117 logging.debug("Video %s, location '%s'" % (vid["id"], location))
118 else:
119 for search_string in (", desde ", ", cerca de "):
120 loc_index = vid["title"].find(search_string)
121 if loc_index > 0:
122 location = vid["title"][loc_index + len(search_string):]
123 logging.debug("Video %s, location '%s'" % (vid["id"], location))
124 break
126 # Each video should now have a location identified. If not, this will end the script.
127 if not location:
128 logging.critical("No Location found for %s, '%s'" % (vid["id"], vid["title"]))
129 return location
131def determine_geolocation(locations, geolocations_file):
132 logging.debug("Searching geolocation for %d locations..." % len(locations))
133 # Load the list of saved geolocations
134 with open(geolocations_file) as in_file:
135 geolocations = json.load(in_file)
136 incomplete_geolocations = 0
137 for l in locations:
138 if l in geolocations and geolocations[l]["latitude"] and geolocations[l]["longitude"]:
139 logging.debug("Geolocation found for %s: lat %f, lon %f" % (l, geolocations[l]["latitude"], geolocations[l]["longitude"]))
140 locations[l]["latitude"] = geolocations[l]["latitude"]
141 locations[l]["longitude"] = geolocations[l]["longitude"]
142 else:
143 logging.critical("No geolocation found for %s." % l)
144 #TODO: Search and suggest a geolocation
145 geolocations[l] = {"latitude": None, "longitude": None}
146 incomplete_geolocations += 1
148 if incomplete_geolocations > 0:
149 #Save the geolocations_file file with the placeholders for the unknown latitude and longitude
150 with open(geolocations_file, 'w') as out_file:
151 json.dump(geolocations, out_file, sort_keys=True, indent=2)
152 logging.critical("Please add the %d new/missing unknown latitude and longitude to the file '%s'. Exiting..." % (incomplete_geolocations, geolocations_file))
153 sys.exit(21)
155 logging.info("Found geolocation information for the %d locations." % len(locations))
156 return locations
158def add_videos_to_locations_array(uploaded_videos, locations):
159 logging.debug("Adding videos in each location array...")
160 for vid in uploaded_videos:
161 if not "videos" in locations[vid["location"]]:
162 locations[vid["location"]]["videos"] = []
163 locations[vid["location"]]["videos"].append(vid)
164 return locations
166def determine_locations_slug(locations):
167 logging.debug("Determining each location's slug...")
168 for loc in locations:
169 locations[loc]["slug"] = slugify(loc)
170 return locations
172def save_website_data(locations, website_data_file):
173 json_content = json.dumps(locations, sort_keys=True, indent=2)
174 # Make it JS (and not just JSON) for direct use in the HTML document
175 js_content = "var locations = %s;" % json_content
176 with open(website_data_file, 'w') as out_file:
177 out_file.write(js_content)
179def ignored_files_in_prod(adir, filenames):
180 ignored_files = []
181 if "../website/src" == adir:
182 ignored_files = [
183 'bootstrap-%s-dist' % BOOTSTRAP_VERSION,
184 'bootstrap4-toggle-%s' % BOOTSTRAP_TOGGLE_VERSION,
185 'jquery-%s.slim.min.js' % JQUERY_VERSION,
186 'leaflet'
187 ]
188 if "../website/src/aprender" == adir:
189 ignored_files = [
190 'temp',
191 'videos'
192 ]
193 return [filename for filename in filenames if filename in ignored_files]
195def get_stats(locations, uploaded_videos):
196 num_videos = len(uploaded_videos)
198 songs = []
199 skipped_titles = ["Vallenato at Epic", "La Guaneña navideña"]
200 for v in uploaded_videos:
201 song = v["title"].split(",")[0]
202 if song not in songs and song not in skipped_titles:
203 songs.append(song)
204 num_songs = len(songs)
206 num_places = len(locations)
208 countries = []
209 for l in locations:
210 country = l.split(",")[-1]
211 if country not in countries:
212 countries.append(country)
213 num_countries = len(countries)
215 navidad_2017 = datetime.date(2017, 12, 25)
216 today = datetime.date.today()
217 years = today.year - navidad_2017.year
218 if today.month == 12: # December
219 duration_since_navidad_2017 = "%d años" % years
220 elif today.month == 1: # January
221 duration_since_navidad_2017 = "%d años" % (years - 1)
222 else:
223 duration_since_navidad_2017 = "%d años y %d meses" % (years - 1, today.month)
225 stats = "El Vallenatero Francés les presenta %d videos de %d canciones tocadas en %d lugares de %d paises. El empezo a aprender el Acordeón Vallenato en la Navidad 2017 (hace mas o menos %s)." % \
226 (num_videos, num_songs, num_places, num_countries, duration_since_navidad_2017)
228 return stats
230def generate_website(locations, uploaded_videos):
231 input_src_folder = "../website/src"
232 output_prod_folder = "../website/prod"
234 # Delete the previous production output folder (if existing)
235 if os.path.exists(output_prod_folder):
236 shutil.rmtree(output_prod_folder)
238 # Update statistics
239 stats = get_stats(locations, uploaded_videos)
240 index_src_file = "%s/index.html" % input_src_folder
241 with open(index_src_file, 'r') as file :
242 index_data = file.read()
243 index_data = re.sub('<div id="stats">.*</div>', '<div id="stats">%s</div>' % stats, index_data)
244 with open(index_src_file, 'w') as file:
245 file.write(index_data)
247 # Update the values accordingly for prod
248 # Main difference between development (src) and production websites:
249 # - src contains a full copy of the leaflet, Bootstrap and jQuery libraries
250 # - prod uses CDNs
252 # Copy src to prod folder, ignoring the files and folder replaced by CDNs in prod
253 # The videos are also not copied, as we're going to hard-link them
254 shutil.copytree(input_src_folder, output_prod_folder, ignore=ignored_files_in_prod)
256 # Create hard links for the videos in the prod folder
257 # (hard links can only be created for files, need to recreate the folder structure)
258 os.mkdir("%s/aprender/videos" % output_prod_folder)
259 for d in os.listdir("%s/aprender/videos" % input_src_folder):
260 if d not in ["TODO", "blabla-bla"]:
261 # Create a folder for that tutorial's video files
262 #TODO: copy folder without content in order to keep the original folder's
263 # creation date, in order to not confuse the rsync upload process
264 os.mkdir("%s/aprender/videos/%s" % (output_prod_folder, d))
265 for f in os.listdir("%s/aprender/videos/%s" % (input_src_folder, d)):
266 # Create a hard link to the video file
267 os.link("%s/aprender/videos/%s/%s" % (input_src_folder, d, f),
268 "%s/aprender/videos/%s/%s" % (output_prod_folder, d, f))
270 # Update links to leaflet (CDN)
271 # Read the prod files
272 with open("%s/index.html" % output_prod_folder, 'r') as file :
273 index_data = file.read()
274 with open("%s/aprender/index.html" % output_prod_folder, 'r') as file :
275 index_aprender_data = file.read()
276 # Replace the target strings
277 # Leaflet
278 index_data = index_data.replace(
279 '<link rel="stylesheet" href="leaflet/%s/leaflet.css">' % LEAFLET_VERSION,
280 '<link rel="stylesheet" href="https://unpkg.com/leaflet@%s/dist/leaflet.css"\n integrity="sha512-xwE/Az9zrjBIphAcBb3F6JVqxf46+CDLwfLMHloNu6KEQCAWi6HcDUbeOfBIptF7tcCzusKFjFw2yuvEpDL9wQ=="\n crossorigin=""/>' % LEAFLET_VERSION)
281 index_data = index_data.replace(
282 '<script type = "text/javascript" src="leaflet/%s/leaflet.js"></script>' % LEAFLET_VERSION,
283 '<script src="https://unpkg.com/leaflet@%s/dist/leaflet.js"\n integrity="sha512-GffPMF3RvMeYyc1LWMHtK8EbPv0iNZ8/oTtHPx9/cc2ILxQ+u905qIwdpULaqDkyBKgOaB57QTMg7ztg8Jm2Og=="\n crossorigin="">\n </script>' % LEAFLET_VERSION)
284 # Bootstrap
285 index_data = index_data.replace(
286 '<link rel="stylesheet" href="bootstrap-%s-dist/css/bootstrap.min.css">' % BOOTSTRAP_VERSION,
287 '<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/%s/css/bootstrap.min.css"\n integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T"\n crossorigin="anonymous">' % BOOTSTRAP_VERSION)
288 index_data = index_data.replace(
289 '<script src="bootstrap-%s-dist/js/bootstrap.min.js"></script>' % BOOTSTRAP_VERSION,
290 '<script src="https://stackpath.bootstrapcdn.com/bootstrap/%s/js/bootstrap.min.js"\n integrity="sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM"\n crossorigin="anonymous"></script>' % BOOTSTRAP_VERSION)
291 index_aprender_data = index_aprender_data.replace(
292 '<link rel="stylesheet" href="../bootstrap-%s-dist/css/bootstrap.min.css">' % BOOTSTRAP_VERSION,
293 '<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/%s/css/bootstrap.min.css"\n integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T"\n crossorigin="anonymous">' % BOOTSTRAP_VERSION)
294 index_aprender_data = index_aprender_data.replace(
295 '<script src="../bootstrap-%s-dist/js/bootstrap.min.js"></script>' % BOOTSTRAP_VERSION,
296 '<script src="https://stackpath.bootstrapcdn.com/bootstrap/%s/js/bootstrap.min.js"\n integrity="sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM"\n crossorigin="anonymous"></script>' % BOOTSTRAP_VERSION)
297 # jQuery (for Bootstrap)
298 index_data = index_data.replace(
299 '<script src="jquery-%s.slim.min.js"></script>' % JQUERY_VERSION,
300 '<script src="https://code.jquery.com/jquery-%s.slim.min.js"\n integrity="sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo"\n crossorigin="anonymous"></script>' % JQUERY_VERSION)
301 index_aprender_data = index_aprender_data.replace(
302 '<script src="../jquery-%s.slim.min.js"></script>' % JQUERY_VERSION,
303 '<script src="https://code.jquery.com/jquery-%s.slim.min.js"\n integrity="sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo"\n crossorigin="anonymous"></script>' % JQUERY_VERSION)
304 # Bootstrap-toggle
305 index_aprender_data = index_aprender_data.replace(
306 '<link rel="stylesheet" href="../bootstrap4-toggle-%s/css/bootstrap4-toggle.min.css">' % BOOTSTRAP_TOGGLE_VERSION,
307 '<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/gitbrent/bootstrap4-toggle@%s/css/bootstrap4-toggle.min.css"\n integrity="sha384-yakM86Cz9KJ6CeFVbopALOEQGGvyBFdmA4oHMiYuHcd9L59pLkCEFSlr6M9m434E"\n crossorigin="anonymous">' % BOOTSTRAP_TOGGLE_VERSION)
308 index_aprender_data = index_aprender_data.replace(
309 '<script src="../bootstrap4-toggle-%s/js/bootstrap4-toggle.min.js"></script>' % BOOTSTRAP_TOGGLE_VERSION,
310 '<script src="https://cdn.jsdelivr.net/gh/gitbrent/bootstrap4-toggle@%s/js/bootstrap4-toggle.min.js"\n integrity="sha384-Q9RsZ4GMzjlu4FFkJw4No9Hvvm958HqHmXI9nqo5Np2dA/uOVBvKVxAvlBQrDhk4"\n crossorigin="anonymous"></script>' % BOOTSTRAP_TOGGLE_VERSION)
312 # Save edited prod files
313 with open("%s/index.html" % output_prod_folder, 'w') as file:
314 file.write(index_data)
315 with open("%s/aprender/index.html" % output_prod_folder, 'w') as file:
316 file.write(index_aprender_data)
318 # Create full HTML pages for Prod /aprender tutorials
319 with open("../website/src/aprender/tutoriales.js") as in_file:
320 # Remove the JS bits to keep only the JSON content
321 tutoriales_json_content = (in_file.read()[17:-2])
322 tutoriales = json.loads(tutoriales_json_content)
323 for t in tutoriales:
324 output_prod_tutorial_file = "%s/aprender/%s.html" % \
325 (output_prod_folder, t["slug"])
326 shutil.copy("%s/aprender/index.html" % output_prod_folder,
327 output_prod_tutorial_file)
328 with open(output_prod_tutorial_file, 'r') as file :
329 prod_tutorial_file_data = file.read()
330 if t["author"]:
331 tuto_title = "%s - %s" % (t["title"], t["author"])
332 else:
333 tuto_title = t["title"]
334 prod_tutorial_file_data = prod_tutorial_file_data.replace(
335 "<title>Aprender a tocar el Acordeón Vallenato - El Vallenatero Francés</title>",
336 "<title>%s - Aprender a tocar el Acordeón Vallenato</title>" % tuto_title
337 )
338 prod_tutorial_file_data = prod_tutorial_file_data.replace(
339 '<h1 id="tutorialFullTitle">TITLE</h1>',
340 '<h1 id="tutorialFullTitle">%s</h1>' % tuto_title
341 )
342 with open(output_prod_tutorial_file, 'w') as file:
343 file.write(prod_tutorial_file_data)
345def generate_sitemap(sitemap_file, locations, uploaded_videos):
346 base_url = "https://vallenato.fr"
347 sitemap = generator.Sitemap()
349 # vallenato.fr index
350 sitemap.add(base_url,
351 # Timestamp of the most recently uploaded video
352 lastmod=uploaded_videos[0]["publishedAt"][:10],
353 changefreq="monthly",
354 priority="1.0")
356 # Locations and individual videos
357 sitemap.add("%s/mundo-entero" % base_url,
358 # Timestamp of the most recently uploaded video
359 lastmod=uploaded_videos[0]["publishedAt"][:10],
360 changefreq="monthly",
361 priority="0.6")
362 for l in locations:
363 # Locations
364 sitemap.add("%s/%s" % (base_url, locations[l]["slug"]),
365 # Timestamp of the most recently uploaded video at that location
366 lastmod=locations[l]["videos"][0]["publishedAt"][:10],
367 changefreq="yearly",
368 priority="0.5")
369 for v in locations[l]["videos"]:
370 # Individual videos
371 sitemap.add("%s/%s/%s" % (base_url, v["slug"], v["id"]),
372 # Timestamp of that video
373 lastmod=v["publishedAt"][:10],
374 changefreq="yearly",
375 priority="0.5")
377 # Aprender index
378 sitemap.add("%s/aprender/" % base_url,
379 changefreq="monthly",
380 priority="0.9")
382 # Aprender: individual tutorials
383 with open("../website/src/aprender/tutoriales.js") as in_file:
384 # Remove the JS bits to keep only the JSON content
385 tutoriales_json_content = (in_file.read()[17:-2])
386 tutoriales = json.loads(tutoriales_json_content)
387 for t in tutoriales:
388 tuto_url = "%s/aprender/%s" % (base_url, t["slug"])
389 sitemap.add(tuto_url,
390 changefreq="yearly",
391 priority="0.5")
393 sitemap_xml = sitemap.generate()
395 # Prettify the XML "by hand"
396 sitemap_xml = sitemap_xml.replace("<url>", " <url>")
397 sitemap_xml = sitemap_xml.replace("</url>", " </url>")
398 sitemap_xml = sitemap_xml.replace("<loc>", " <loc>")
399 sitemap_xml = sitemap_xml.replace("<lastmod>", " <lastmod>")
400 sitemap_xml = sitemap_xml.replace("<changefreq>", " <changefreq>")
401 sitemap_xml = sitemap_xml.replace("<priority>", " <priority>")
403 with open(sitemap_file, 'w') as file:
404 file.write(sitemap_xml)
406def website(args):
407 # Retrieve the list of uploaded videos
408 uploaded_videos = get_uploaded_videos(args, UPLOADED_VIDEOS_DUMP_FILE)
409 logging.info("There are %d uploaded videos." % len(uploaded_videos))
411 # Identify each video's location
412 (uploaded_videos, locations) = identify_locations_names(uploaded_videos, LOCATION_SPECIAL_CASES_FILE, UPLOADED_VIDEOS_DUMP_FILE)
414 # Determine the geolocation of each location
415 locations = determine_geolocation(locations, GEOLOCATIONS_FILE)
417 # Create a slug for each location (to be used for the website URLs)
418 locations = determine_locations_slug(locations)
420 # Add the videos in each location array
421 locations = add_videos_to_locations_array(uploaded_videos, locations)
423 # Generate the JavaScript data file to be used by the website
424 save_website_data(locations, WEBSITE_DATA_FILE)
426 # Generate the development and production website files
427 generate_website(locations, uploaded_videos)
429 # Generate the Sitemap
430 generate_sitemap(SITEMAP_FILE, locations, uploaded_videos)