Coverage for website.py: 99%

Hot-keys on this page

r m x p toggle line displays

j k next/prev highlighted chunk

0 (zero) top of page

1 (one) first highlighted chunk

1#!/usr/bin/env python

2# -*- coding: utf-8 -*-

4# This file is part of Vallenato.fr.

6# Vallenato.fr is free software: you can redistribute it and/or modify

7# it under the terms of the GNU Affero General Public License as published by

8# the Free Software Foundation, either version 3 of the License, or

9# (at your option) any later version.

10#

11# Vallenato.fr is distributed in the hope that it will be useful,

12# but WITHOUT ANY WARRANTY; without even the implied warranty of

13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

14# GNU Affero General Public License for more details.

15#

16# You should have received a copy of the GNU Affero General Public License

17# along with Vallenato.fr. If not, see <http://www.gnu.org/licenses/>.

19import logging

20import os

21import sys

22import json

23import shutil

24from slugify import slugify

25import sitemap.generator as generator

26import re

27import datetime

29from youtube import HttpError

30from youtube import yt_get_authenticated_service

31from youtube import yt_get_my_uploads_list

32from youtube import yt_list_my_uploaded_videos

34# File that can contain the data downloaded from YouTube

35UPLOADED_VIDEOS_DUMP_FILE = "data/uploaded_videos_dump.json"

36# File containing the list of videos that have hardcoded locations

37LOCATION_SPECIAL_CASES_FILE = "data/location_special_cases.json"

38# File containing the already-identified latitude/longitude

39GEOLOCATIONS_FILE = "data/geolocations.json"

40# Output file used for the website

41WEBSITE_DATA_FILE = "../website/src/data.js"

42# Sitemap file

43SITEMAP_FILE = "../website/prod/sitemap.xml"

44# Version of the external libraries

45LEAFLET_VERSION = "1.5.1"

46BOOTSTRAP_VERSION = "4.3.1"

47JQUERY_VERSION = "3.3.1"

48BOOTSTRAP_TOGGLE_VERSION = "3.6.1"

51def get_dumped_uploaded_videos(dump_file):

52 uploaded_videos = []

53 # Used a previously dumped file if it exists, to bypass the network transactions

54 if os.path.exists(dump_file):

55 with open(dump_file) as in_file:

56 uploaded_videos = json.load(in_file)

57 return uploaded_videos

59def save_uploaded_videos(uploaded_videos, dump_file):

60 with open(dump_file, 'w') as out_file:

61 json.dump(uploaded_videos, out_file, sort_keys=True, indent=2)

63def determine_videos_slug(uploaded_videos):

64 logging.debug("Determining each video's slug...")

65 for vid in uploaded_videos:

66 vid["slug"] = slugify(vid["title"]).replace("-desde-", "-")

67 return uploaded_videos

69def get_uploaded_videos(args, dump_file):

70 uploaded_videos = get_dumped_uploaded_videos(dump_file)

71 if not uploaded_videos:

72 youtube = yt_get_authenticated_service(args)

73 # Get the list of videos uploaded to YouTube

74 try:

75 uploads_playlist_id = yt_get_my_uploads_list(youtube)

76 if uploads_playlist_id:

77 uploaded_videos = yt_list_my_uploaded_videos(uploads_playlist_id, youtube)

78 logging.debug("Uploaded videos: %s" % uploaded_videos)

79 else:

80 logging.info('There is no uploaded videos playlist for this user.')

81 except HttpError as e:

82 logging.debug('An HTTP error %d occurred:\n%s' % (e.resp.status, e.content))

83 logging.critical("Exiting...")

84 sys.exit(19)

85 # Create a slug for each video (to be used for the website URLs)

86 uploaded_videos = determine_videos_slug(uploaded_videos)

87 if args.dump_uploaded_videos:

88 save_uploaded_videos(uploaded_videos, dump_file)

89 return uploaded_videos

91def identify_locations_names(uploaded_videos, location_special_cases_file, dump_file):

92 with open(location_special_cases_file) as in_file:

93 special_cases = json.load(in_file)

94 locations = {}

95 incomplete_locations = False

96 for vid in uploaded_videos:

97 vid["location"] = identify_single_location_name(vid, special_cases)

98 if not vid["location"]:

99 incomplete_locations = True

100 elif vid["location"] not in locations:

101 locations[vid["location"]] = {"latitude": None, "longitude": None}

102 if incomplete_locations:

103 # The script is going to exit, to prevent unnecessary downloading from

104 # YouTube again, save the downloaded information regardless of the

105 # --dump_uploaded_videos parameter

106 save_uploaded_videos(uploaded_videos, dump_file)

107 logging.warning("Dumping the list of uploaded videos from YouTube to the '%s' file, so as not to have to download it again after you have edited the '%s' file." % (dump_file, location_special_cases_file))

108 logging.critical("Please add the new/missing location to the file '%s'. Exiting..." % location_special_cases_file)

109 sys.exit(20)

110 logging.info("Found %d different location name." % len(locations))

111 return (uploaded_videos, locations)

112

113def identify_single_location_name(vid, special_cases):

114 location = None

115 if vid["id"] in special_cases:

116 location = special_cases[vid["id"]]

117 logging.debug("Video %s, location '%s'" % (vid["id"], location))

118 else:

119 for search_string in (", desde ", ", cerca de "):

120 loc_index = vid["title"].find(search_string)

121 if loc_index > 0:

122 location = vid["title"][loc_index + len(search_string):]

123 logging.debug("Video %s, location '%s'" % (vid["id"], location))

124 break

125

126 # Each video should now have a location identified. If not, this will end the script.

127 if not location:

128 logging.critical("No Location found for %s, '%s'" % (vid["id"], vid["title"]))

129 return location

130

131def determine_geolocation(locations, geolocations_file):

132 logging.debug("Searching geolocation for %d locations..." % len(locations))

133 # Load the list of saved geolocations

134 with open(geolocations_file) as in_file:

135 geolocations = json.load(in_file)

136 incomplete_geolocations = 0

137 for l in locations:

138 if l in geolocations and geolocations[l]["latitude"] and geolocations[l]["longitude"]:

139 logging.debug("Geolocation found for %s: lat %f, lon %f" % (l, geolocations[l]["latitude"], geolocations[l]["longitude"]))

140 locations[l]["latitude"] = geolocations[l]["latitude"]

141 locations[l]["longitude"] = geolocations[l]["longitude"]

142 else:

143 logging.critical("No geolocation found for %s." % l)

144 #TODO: Search and suggest a geolocation

145 geolocations[l] = {"latitude": None, "longitude": None}

146 incomplete_geolocations += 1

147

148 if incomplete_geolocations > 0:

149 #Save the geolocations_file file with the placeholders for the unknown latitude and longitude

150 with open(geolocations_file, 'w') as out_file:

151 json.dump(geolocations, out_file, sort_keys=True, indent=2)

152 logging.critical("Please add the %d new/missing unknown latitude and longitude to the file '%s'. Exiting..." % (incomplete_geolocations, geolocations_file))

153 sys.exit(21)

154

155 logging.info("Found geolocation information for the %d locations." % len(locations))

156 return locations

157

158def add_videos_to_locations_array(uploaded_videos, locations):

159 logging.debug("Adding videos in each location array...")

160 for vid in uploaded_videos:

161 if not "videos" in locations[vid["location"]]:

162 locations[vid["location"]]["videos"] = []

163 locations[vid["location"]]["videos"].append(vid)

164 return locations

165

166def determine_locations_slug(locations):

167 logging.debug("Determining each location's slug...")

168 for loc in locations:

169 locations[loc]["slug"] = slugify(loc)

170 return locations

171

172def save_website_data(locations, website_data_file):

173 json_content = json.dumps(locations, sort_keys=True, indent=2)

174 # Make it JS (and not just JSON) for direct use in the HTML document

175 js_content = "var locations = %s;" % json_content

176 with open(website_data_file, 'w') as out_file:

177 out_file.write(js_content)

178

179def ignored_files_in_prod(adir, filenames):

180 ignored_files = []

181 if "../website/src" == adir:

182 ignored_files = [

183 'bootstrap-%s-dist' % BOOTSTRAP_VERSION,

184 'bootstrap4-toggle-%s' % BOOTSTRAP_TOGGLE_VERSION,

185 'jquery-%s.slim.min.js' % JQUERY_VERSION,

186 'leaflet'

187 ]

188 if "../website/src/aprender" == adir:

189 ignored_files = [

190 'temp',

191 'videos'

192 ]

193 return [filename for filename in filenames if filename in ignored_files]

194

195def get_stats(locations, uploaded_videos):

196 num_videos = len(uploaded_videos)

197

198 songs = []

199 skipped_titles = ["Vallenato at Epic", "La Guaneña navideña"]

200 for v in uploaded_videos:

201 song = v["title"].split(",")[0]

202 if song not in songs and song not in skipped_titles:

203 songs.append(song)

204 num_songs = len(songs)

205

206 num_places = len(locations)

207

208 countries = []

209 for l in locations:

210 country = l.split(",")[-1]

211 if country not in countries:

212 countries.append(country)

213 num_countries = len(countries)

214

215 navidad_2017 = datetime.date(2017, 12, 25)

216 today = datetime.date.today()

217 years = today.year - navidad_2017.year

218 if today.month == 12: # December

219 duration_since_navidad_2017 = "%d años" % years

220 elif today.month == 1: # January

221 duration_since_navidad_2017 = "%d años" % (years - 1)

222 else:

223 duration_since_navidad_2017 = "%d años y %d meses" % (years - 1, today.month)

224

225 stats = "El Vallenatero Francés les presenta %d videos de %d canciones tocadas en %d lugares de %d paises. El empezo a aprender el Acordeón Vallenato en la Navidad 2017 (hace mas o menos %s)." % \

226 (num_videos, num_songs, num_places, num_countries, duration_since_navidad_2017)

227

228 return stats

229

230def generate_website(locations, uploaded_videos):

231 input_src_folder = "../website/src"

232 output_prod_folder = "../website/prod"

233

234 # Delete the previous production output folder (if existing)

235 if os.path.exists(output_prod_folder):

236 shutil.rmtree(output_prod_folder)

237

238 # Update statistics

239 stats = get_stats(locations, uploaded_videos)

240 index_src_file = "%s/index.html" % input_src_folder

241 with open(index_src_file, 'r') as file :

242 index_data = file.read()

243 index_data = re.sub('<div id="stats">.*</div>', '<div id="stats">%s</div>' % stats, index_data)

244 with open(index_src_file, 'w') as file:

245 file.write(index_data)

246

247 # Update the values accordingly for prod

248 # Main difference between development (src) and production websites:

249 # - src contains a full copy of the leaflet, Bootstrap and jQuery libraries

250 # - prod uses CDNs

251

252 # Copy src to prod folder, ignoring the files and folder replaced by CDNs in prod

253 # The videos are also not copied, as we're going to hard-link them

254 shutil.copytree(input_src_folder, output_prod_folder, ignore=ignored_files_in_prod)

255

256 # Create hard links for the videos in the prod folder

257 # (hard links can only be created for files, need to recreate the folder structure)

258 os.mkdir("%s/aprender/videos" % output_prod_folder)

259 for d in os.listdir("%s/aprender/videos" % input_src_folder):

260 if d not in ["TODO", "blabla-bla"]:

261 # Create a folder for that tutorial's video files

262 #TODO: copy folder without content in order to keep the original folder's

263 # creation date, in order to not confuse the rsync upload process

264 os.mkdir("%s/aprender/videos/%s" % (output_prod_folder, d))

265 for f in os.listdir("%s/aprender/videos/%s" % (input_src_folder, d)):

266 # Create a hard link to the video file

267 os.link("%s/aprender/videos/%s/%s" % (input_src_folder, d, f),

268 "%s/aprender/videos/%s/%s" % (output_prod_folder, d, f))

269

270 # Update links to leaflet (CDN)

271 # Read the prod files

272 with open("%s/index.html" % output_prod_folder, 'r') as file :

273 index_data = file.read()

274 with open("%s/aprender/index.html" % output_prod_folder, 'r') as file :

275 index_aprender_data = file.read()

276 # Replace the target strings

277 # Leaflet

278 index_data = index_data.replace(

279 '<link rel="stylesheet" href="leaflet/%s/leaflet.css">' % LEAFLET_VERSION,

280 '<link rel="stylesheet" href="https://unpkg.com/leaflet@%s/dist/leaflet.css"\n integrity="sha512-xwE/Az9zrjBIphAcBb3F6JVqxf46+CDLwfLMHloNu6KEQCAWi6HcDUbeOfBIptF7tcCzusKFjFw2yuvEpDL9wQ=="\n crossorigin=""/>' % LEAFLET_VERSION)

281 index_data = index_data.replace(

282 '<script type = "text/javascript" src="leaflet/%s/leaflet.js"></script>' % LEAFLET_VERSION,

283 '<script src="https://unpkg.com/leaflet@%s/dist/leaflet.js"\n integrity="sha512-GffPMF3RvMeYyc1LWMHtK8EbPv0iNZ8/oTtHPx9/cc2ILxQ+u905qIwdpULaqDkyBKgOaB57QTMg7ztg8Jm2Og=="\n crossorigin="">\n </script>' % LEAFLET_VERSION)

284 # Bootstrap

285 index_data = index_data.replace(

286 '<link rel="stylesheet" href="bootstrap-%s-dist/css/bootstrap.min.css">' % BOOTSTRAP_VERSION,

287 '<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/%s/css/bootstrap.min.css"\n integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T"\n crossorigin="anonymous">' % BOOTSTRAP_VERSION)

288 index_data = index_data.replace(

289 '<script src="bootstrap-%s-dist/js/bootstrap.min.js"></script>' % BOOTSTRAP_VERSION,

290 '<script src="https://stackpath.bootstrapcdn.com/bootstrap/%s/js/bootstrap.min.js"\n integrity="sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM"\n crossorigin="anonymous"></script>' % BOOTSTRAP_VERSION)

291 index_aprender_data = index_aprender_data.replace(

292 '<link rel="stylesheet" href="../bootstrap-%s-dist/css/bootstrap.min.css">' % BOOTSTRAP_VERSION,

293 '<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/%s/css/bootstrap.min.css"\n integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T"\n crossorigin="anonymous">' % BOOTSTRAP_VERSION)

294 index_aprender_data = index_aprender_data.replace(

295 '<script src="../bootstrap-%s-dist/js/bootstrap.min.js"></script>' % BOOTSTRAP_VERSION,

296 '<script src="https://stackpath.bootstrapcdn.com/bootstrap/%s/js/bootstrap.min.js"\n integrity="sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM"\n crossorigin="anonymous"></script>' % BOOTSTRAP_VERSION)

297 # jQuery (for Bootstrap)

298 index_data = index_data.replace(

299 '<script src="jquery-%s.slim.min.js"></script>' % JQUERY_VERSION,

300 '<script src="https://code.jquery.com/jquery-%s.slim.min.js"\n integrity="sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo"\n crossorigin="anonymous"></script>' % JQUERY_VERSION)

301 index_aprender_data = index_aprender_data.replace(

302 '<script src="../jquery-%s.slim.min.js"></script>' % JQUERY_VERSION,

303 '<script src="https://code.jquery.com/jquery-%s.slim.min.js"\n integrity="sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo"\n crossorigin="anonymous"></script>' % JQUERY_VERSION)

304 # Bootstrap-toggle

305 index_aprender_data = index_aprender_data.replace(

306 '<link rel="stylesheet" href="../bootstrap4-toggle-%s/css/bootstrap4-toggle.min.css">' % BOOTSTRAP_TOGGLE_VERSION,

307 '<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/gitbrent/bootstrap4-toggle@%s/css/bootstrap4-toggle.min.css"\n integrity="sha384-yakM86Cz9KJ6CeFVbopALOEQGGvyBFdmA4oHMiYuHcd9L59pLkCEFSlr6M9m434E"\n crossorigin="anonymous">' % BOOTSTRAP_TOGGLE_VERSION)

308 index_aprender_data = index_aprender_data.replace(

309 '<script src="../bootstrap4-toggle-%s/js/bootstrap4-toggle.min.js"></script>' % BOOTSTRAP_TOGGLE_VERSION,

310 '<script src="https://cdn.jsdelivr.net/gh/gitbrent/bootstrap4-toggle@%s/js/bootstrap4-toggle.min.js"\n integrity="sha384-Q9RsZ4GMzjlu4FFkJw4No9Hvvm958HqHmXI9nqo5Np2dA/uOVBvKVxAvlBQrDhk4"\n crossorigin="anonymous"></script>' % BOOTSTRAP_TOGGLE_VERSION)

311

312 # Save edited prod files

313 with open("%s/index.html" % output_prod_folder, 'w') as file: