File indexing completed on 2025-01-18 10:18:42
0001 '''Check that a workshop's index.html metadata is valid. See the
0002 docstrings on the checking functions for a summary of the checks.
0003 '''
0004
0005
0006 import sys
0007 import os
0008 import re
0009 from datetime import date
0010 from util import split_metadata, load_yaml, check_unwanted_files
0011 from reporter import Reporter
0012
0013
0014 EMAIL_PATTERN = r'[^@]+@[^@]+\.[^@]+'
0015 HUMANTIME_PATTERN = r'((0?[1-9]|1[0-2]):[0-5]\d(am|pm)(-|to)(0?[1-9]|1[0-2]):[0-5]\d(am|pm))|((0?\d|1\d|2[0-3]):[0-5]\d(-|to)(0?\d|1\d|2[0-3]):[0-5]\d)'
0016 EVENTBRITE_PATTERN = r'\d{9,10}'
0017 URL_PATTERN = r'https?://.+'
0018
0019
0020 CARPENTRIES = ("dc", "swc", "lc", "cp")
0021 DEFAULT_CONTACT_EMAIL = 'team@carpentries.org'
0022
0023 USAGE = 'Usage: "workshop_check.py path/to/root/directory"'
0024
0025
0026
0027
0028 ISO_COUNTRY = [
0029 'ad', 'ae', 'af', 'ag', 'ai', 'al', 'am', 'an', 'ao', 'aq', 'ar', 'as',
0030 'at', 'au', 'aw', 'ax', 'az', 'ba', 'bb', 'bd', 'be', 'bf', 'bg', 'bh',
0031 'bi', 'bj', 'bm', 'bn', 'bo', 'br', 'bs', 'bt', 'bv', 'bw', 'by', 'bz',
0032 'ca', 'cc', 'cd', 'cf', 'cg', 'ch', 'ci', 'ck', 'cl', 'cm', 'cn', 'co',
0033 'cr', 'cu', 'cv', 'cx', 'cy', 'cz', 'de', 'dj', 'dk', 'dm', 'do', 'dz',
0034 'ec', 'ee', 'eg', 'eh', 'er', 'es', 'et', 'eu', 'fi', 'fj', 'fk', 'fm',
0035 'fo', 'fr', 'ga', 'gb', 'gd', 'ge', 'gf', 'gg', 'gh', 'gi', 'gl', 'gm',
0036 'gn', 'gp', 'gq', 'gr', 'gs', 'gt', 'gu', 'gw', 'gy', 'hk', 'hm', 'hn',
0037 'hr', 'ht', 'hu', 'id', 'ie', 'il', 'im', 'in', 'io', 'iq', 'ir', 'is',
0038 'it', 'je', 'jm', 'jo', 'jp', 'ke', 'kg', 'kh', 'ki', 'km', 'kn', 'kp',
0039 'kr', 'kw', 'ky', 'kz', 'la', 'lb', 'lc', 'li', 'lk', 'lr', 'ls', 'lt',
0040 'lu', 'lv', 'ly', 'ma', 'mc', 'md', 'me', 'mg', 'mh', 'mk', 'ml', 'mm',
0041 'mn', 'mo', 'mp', 'mq', 'mr', 'ms', 'mt', 'mu', 'mv', 'mw', 'mx', 'my',
0042 'mz', 'na', 'nc', 'ne', 'nf', 'ng', 'ni', 'nl', 'no', 'np', 'nr', 'nu',
0043 'nz', 'om', 'pa', 'pe', 'pf', 'pg', 'ph', 'pk', 'pl', 'pm', 'pn', 'pr',
0044 'ps', 'pt', 'pw', 'py', 'qa', 're', 'ro', 'rs', 'ru', 'rw', 'sa', 'sb',
0045 'sc', 'sd', 'se', 'sg', 'sh', 'si', 'sj', 'sk', 'sl', 'sm', 'sn', 'so',
0046 'sr', 'st', 'sv', 'sy', 'sz', 'tc', 'td', 'tf', 'tg', 'th', 'tj', 'tk',
0047 'tl', 'tm', 'tn', 'to', 'tr', 'tt', 'tv', 'tw', 'tz', 'ua', 'ug', 'um',
0048 'us', 'uy', 'uz', 'va', 'vc', 've', 'vg', 'vi', 'vn', 'vu', 'wf', 'ws',
0049 'ye', 'yt', 'za', 'zm', 'zw'
0050 ]
0051
0052 ISO_LANGUAGE = [
0053 'aa', 'ab', 'ae', 'af', 'ak', 'am', 'an', 'ar', 'as', 'av', 'ay', 'az',
0054 'ba', 'be', 'bg', 'bh', 'bi', 'bm', 'bn', 'bo', 'br', 'bs', 'ca', 'ce',
0055 'ch', 'co', 'cr', 'cs', 'cu', 'cv', 'cy', 'da', 'de', 'dv', 'dz', 'ee',
0056 'el', 'en', 'eo', 'es', 'et', 'eu', 'fa', 'ff', 'fi', 'fj', 'fo', 'fr',
0057 'fy', 'ga', 'gd', 'gl', 'gn', 'gu', 'gv', 'ha', 'he', 'hi', 'ho', 'hr',
0058 'ht', 'hu', 'hy', 'hz', 'ia', 'id', 'ie', 'ig', 'ii', 'ik', 'io', 'is',
0059 'it', 'iu', 'ja', 'jv', 'ka', 'kg', 'ki', 'kj', 'kk', 'kl', 'km', 'kn',
0060 'ko', 'kr', 'ks', 'ku', 'kv', 'kw', 'ky', 'la', 'lb', 'lg', 'li', 'ln',
0061 'lo', 'lt', 'lu', 'lv', 'mg', 'mh', 'mi', 'mk', 'ml', 'mn', 'mr', 'ms',
0062 'mt', 'my', 'na', 'nb', 'nd', 'ne', 'ng', 'nl', 'nn', 'no', 'nr', 'nv',
0063 'ny', 'oc', 'oj', 'om', 'or', 'os', 'pa', 'pi', 'pl', 'ps', 'pt', 'qu',
0064 'rm', 'rn', 'ro', 'ru', 'rw', 'sa', 'sc', 'sd', 'se', 'sg', 'si', 'sk',
0065 'sl', 'sm', 'sn', 'so', 'sq', 'sr', 'ss', 'st', 'su', 'sv', 'sw', 'ta',
0066 'te', 'tg', 'th', 'ti', 'tk', 'tl', 'tn', 'to', 'tr', 'ts', 'tt', 'tw',
0067 'ty', 'ug', 'uk', 'ur', 'uz', 've', 'vi', 'vo', 'wa', 'wo', 'xh', 'yi',
0068 'yo', 'za', 'zh', 'zu'
0069 ]
0070
0071
0072 def look_for_fixme(func):
0073 """Decorator to fail test if text argument starts with "FIXME"."""
0074
0075 def inner(arg):
0076 if (arg is not None) and \
0077 isinstance(arg, str) and \
0078 arg.lstrip().startswith('FIXME'):
0079 return False
0080 return func(arg)
0081 return inner
0082
0083
0084 @look_for_fixme
0085 def check_layout(layout):
0086 '''"layout" in YAML header must be "workshop".'''
0087
0088 return layout == 'workshop'
0089
0090
0091 @look_for_fixme
0092 def check_carpentry(layout):
0093 '''"carpentry" in YAML header must be "dc", "swc", "lc", or "cp".'''
0094
0095 return layout in CARPENTRIES
0096
0097
0098 @look_for_fixme
0099 def check_country(country):
0100 '''"country" must be a lowercase ISO-3166 two-letter code.'''
0101
0102 return country in ISO_COUNTRY
0103
0104
0105 @look_for_fixme
0106 def check_language(language):
0107 '''"language" must be a lowercase ISO-639 two-letter code.'''
0108
0109 return language in ISO_LANGUAGE
0110
0111
0112 @look_for_fixme
0113 def check_humandate(date):
0114 """
0115 'humandate' must be a human-readable date with a 3-letter month
0116 and 4-digit year. Examples include 'Feb 18-20, 2025' and 'Feb 18
0117 and 20, 2025'. It may be in languages other than English, but the
0118 month name should be kept short to aid formatting of the main
0119 Carpentries web site.
0120 """
0121
0122 if ',' not in date:
0123 return False
0124
0125 month_dates, year = date.split(',')
0126
0127
0128 month = month_dates[:3]
0129 if any(char == ' ' for char in month):
0130 return False
0131
0132
0133 if month_dates[3] != ' ':
0134 return False
0135
0136
0137 try:
0138 int(year)
0139 except:
0140 return False
0141
0142 return True
0143
0144
0145 @look_for_fixme
0146 def check_humantime(time):
0147 """
0148 'humantime' is a human-readable start and end time for the
0149 workshop, such as '09:00 - 16:00'.
0150 """
0151
0152 return bool(re.match(HUMANTIME_PATTERN, time.replace(' ', '')))
0153
0154
0155 def check_date(this_date):
0156 """
0157 'startdate' and 'enddate' are machine-readable start and end dates
0158 for the workshop, and must be in YYYY-MM-DD format, e.g.,
0159 '2015-07-01'.
0160 """
0161
0162
0163 return isinstance(this_date, date)
0164
0165
0166 @look_for_fixme
0167 def check_latitude_longitude(latlng):
0168 """
0169 'latlng' must be a valid latitude and longitude represented as two
0170 floating-point numbers separated by a comma.
0171 """
0172
0173 try:
0174 lat, lng = latlng.split(',')
0175 lat = float(lat)
0176 lng = float(lng)
0177 return (-90.0 <= lat <= 90.0) and (-180.0 <= lng <= 180.0)
0178 except ValueError:
0179 return False
0180
0181
0182 def check_instructors(instructors):
0183 """
0184 'instructor' must be a non-empty comma-separated list of quoted
0185 names, e.g. ['First name', 'Second name', ...']. Do not use 'TBD'
0186 or other placeholders.
0187 """
0188
0189
0190 return isinstance(instructors, list) and len(instructors) > 0
0191
0192
0193 def check_helpers(helpers):
0194 """
0195 'helper' must be a comma-separated list of quoted names,
0196 e.g. ['First name', 'Second name', ...']. The list may be empty.
0197 Do not use 'TBD' or other placeholders.
0198 """
0199
0200
0201 return isinstance(helpers, list) and len(helpers) >= 0
0202
0203
0204 @look_for_fixme
0205 def check_emails(emails):
0206 """
0207 'emails' must be a comma-separated list of valid email addresses.
0208 The list may be empty. A valid email address consists of characters,
0209 an '@', and more characters. It should not contain the default contact
0210 """
0211
0212
0213 if (isinstance(emails, list) and len(emails) >= 0):
0214 for email in emails:
0215 if ((not bool(re.match(EMAIL_PATTERN, email))) or (email == DEFAULT_CONTACT_EMAIL)):
0216 return False
0217 else:
0218 return False
0219
0220 return True
0221
0222
0223 def check_eventbrite(eventbrite):
0224 """
0225 'eventbrite' (the Eventbrite registration key) must be 9 or more
0226 digits. It may appear as an integer or as a string.
0227 """
0228
0229 if isinstance(eventbrite, int):
0230 return True
0231 else:
0232 return bool(re.match(EVENTBRITE_PATTERN, eventbrite))
0233
0234
0235 @look_for_fixme
0236 def check_collaborative_notes(collaborative_notes):
0237 """
0238 'collaborative_notes' must be a valid URL.
0239 """
0240
0241 return bool(re.match(URL_PATTERN, collaborative_notes))
0242
0243
0244 @look_for_fixme
0245 def check_pass(value):
0246 """
0247 This test always passes (it is used for 'checking' things like the
0248 workshop address, for which no sensible validation is feasible).
0249 """
0250
0251 return True
0252
0253
0254 HANDLERS = {
0255 'layout': (True, check_layout, 'layout isn\'t "workshop"'),
0256
0257 'carpentry': (True, check_carpentry, 'carpentry isn\'t in ' +
0258 ', '.join(CARPENTRIES)),
0259
0260 'country': (True, check_country,
0261 'country invalid: must use lowercase two-letter ISO code ' +
0262 'from ' + ', '.join(ISO_COUNTRY)),
0263
0264 'language': (False, check_language,
0265 'language invalid: must use lowercase two-letter ISO code' +
0266 ' from ' + ', '.join(ISO_LANGUAGE)),
0267
0268 'humandate': (True, check_humandate,
0269 'humandate invalid. Please use three-letter months like ' +
0270 '"Jan" and four-letter years like "2025"'),
0271
0272 'humantime': (True, check_humantime,
0273 'humantime doesn\'t include numbers'),
0274
0275 'startdate': (True, check_date,
0276 'startdate invalid. Must be of format year-month-day, ' +
0277 'i.e., 2014-01-31'),
0278
0279 'enddate': (False, check_date,
0280 'enddate invalid. Must be of format year-month-day, i.e.,' +
0281 ' 2014-01-31'),
0282
0283 'latlng': (True, check_latitude_longitude,
0284 'latlng invalid. Check that it is two floating point ' +
0285 'numbers, separated by a comma'),
0286
0287 'instructor': (True, check_instructors,
0288 'instructor list isn\'t a valid list of format ' +
0289 '["First instructor", "Second instructor",..]'),
0290
0291 'helper': (True, check_helpers,
0292 'helper list isn\'t a valid list of format ' +
0293 '["First helper", "Second helper",..]'),
0294
0295 'email': (True, check_emails,
0296 'contact email list isn\'t a valid list of format ' +
0297 '["me@example.org", "you@example.org",..] or contains incorrectly formatted email addresses or ' +
0298 '"{0}".'.format(DEFAULT_CONTACT_EMAIL)),
0299
0300 'eventbrite': (False, check_eventbrite, 'Eventbrite key appears invalid'),
0301
0302 'collaborative_notes': (False, check_collaborative_notes, 'Collaborative Notes URL appears invalid'),
0303
0304 'venue': (False, check_pass, 'venue name not specified'),
0305
0306 'address': (False, check_pass, 'address not specified')
0307 }
0308
0309
0310 REQUIRED = {k for k in HANDLERS if HANDLERS[k][0]}
0311
0312
0313 OPTIONAL = {k for k in HANDLERS if not HANDLERS[k][0]}
0314
0315
0316 def check_blank_lines(reporter, raw):
0317 """
0318 Blank lines are not allowed in category headers.
0319 """
0320
0321 lines = [(i, x) for (i, x) in enumerate(
0322 raw.strip().split('\n')) if not x.strip()]
0323 reporter.check(not lines,
0324 None,
0325 'Blank line(s) in header: {0}',
0326 ', '.join(["{0}: {1}".format(i, x.rstrip()) for (i, x) in lines]))
0327
0328
0329 def check_categories(reporter, left, right, msg):
0330 """
0331 Report differences (if any) between two sets of categories.
0332 """
0333
0334 diff = left - right
0335 reporter.check(len(diff) == 0,
0336 None,
0337 '{0}: offending entries {1}',
0338 msg, sorted(list(diff)))
0339
0340
0341 def check_file(reporter, path, data):
0342 """
0343 Get header from file, call all other functions, and check file for
0344 validity.
0345 """
0346
0347
0348 raw, header, body = split_metadata(path, data)
0349
0350
0351 check_blank_lines(reporter, raw)
0352
0353
0354
0355
0356
0357 for category in HANDLERS:
0358 required, handler, message = HANDLERS[category]
0359 if category in header:
0360 if required or header[category]:
0361 reporter.check(handler(header[category]),
0362 None,
0363 '{0}\n actual value "{1}"',
0364 message, header[category])
0365 elif required:
0366 reporter.add(None,
0367 'Missing mandatory key "{0}"',
0368 category)
0369
0370
0371 seen_categories = set(header.keys())
0372 check_categories(reporter, REQUIRED, seen_categories,
0373 'Missing categories')
0374 check_categories(reporter, seen_categories, REQUIRED.union(OPTIONAL),
0375 'Superfluous categories')
0376
0377
0378 def check_config(reporter, filename):
0379 """
0380 Check YAML configuration file.
0381 """
0382
0383 config = load_yaml(filename)
0384
0385 kind = config.get('kind', None)
0386 reporter.check(kind == 'workshop',
0387 filename,
0388 'Missing or unknown kind of event: {0}',
0389 kind)
0390
0391 carpentry = config.get('carpentry', None)
0392 reporter.check(carpentry in ('swc', 'dc', 'lc', 'cp'),
0393 filename,
0394 'Missing or unknown carpentry: {0}',
0395 carpentry)
0396
0397
0398 def main():
0399 '''Run as the main program.'''
0400
0401 if len(sys.argv) != 2:
0402 print(USAGE, file=sys.stderr)
0403 sys.exit(1)
0404
0405 root_dir = sys.argv[1]
0406 index_file = os.path.join(root_dir, 'index.html')
0407 config_file = os.path.join(root_dir, '_config.yml')
0408
0409 reporter = Reporter()
0410 check_config(reporter, config_file)
0411 check_unwanted_files(root_dir, reporter)
0412 with open(index_file, encoding='utf-8') as reader:
0413 data = reader.read()
0414 check_file(reporter, index_file, data)
0415 reporter.report()
0416
0417
0418 if __name__ == '__main__':
0419 main()