Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 10:18:35

0001 '''Check that a workshop's index.html metadata is valid.  See the
0002 docstrings on the checking functions for a summary of the checks.
0003 '''
0004 
0005 
0006 import sys
0007 import os
0008 import re
0009 from datetime import date
0010 from util import split_metadata, load_yaml, check_unwanted_files
0011 from reporter import Reporter
0012 
0013 # Metadata field patterns.
0014 EMAIL_PATTERN = r'[^@]+@[^@]+\.[^@]+'
0015 HUMANTIME_PATTERN = r'((0?[1-9]|1[0-2]):[0-5]\d(am|pm)(-|to)(0?[1-9]|1[0-2]):[0-5]\d(am|pm))|((0?\d|1\d|2[0-3]):[0-5]\d(-|to)(0?\d|1\d|2[0-3]):[0-5]\d)'
0016 EVENTBRITE_PATTERN = r'\d{9,10}'
0017 URL_PATTERN = r'https?://.+'
0018 
0019 # Defaults.
0020 CARPENTRIES = ("dc", "swc", "lc", "cp")
0021 DEFAULT_CONTACT_EMAIL = 'team@carpentries.org'
0022 
0023 USAGE = 'Usage: "workshop_check.py path/to/root/directory"'
0024 
0025 # Country and language codes.  Note that codes mean different things: 'ar'
0026 # is 'Arabic' as a language but 'Argentina' as a country.
0027 
0028 ISO_COUNTRY = [
0029     'ad', 'ae', 'af', 'ag', 'ai', 'al', 'am', 'an', 'ao', 'aq', 'ar', 'as',
0030     'at', 'au', 'aw', 'ax', 'az', 'ba', 'bb', 'bd', 'be', 'bf', 'bg', 'bh',
0031     'bi', 'bj', 'bm', 'bn', 'bo', 'br', 'bs', 'bt', 'bv', 'bw', 'by', 'bz',
0032     'ca', 'cc', 'cd', 'cf', 'cg', 'ch', 'ci', 'ck', 'cl', 'cm', 'cn', 'co',
0033     'cr', 'cu', 'cv', 'cx', 'cy', 'cz', 'de', 'dj', 'dk', 'dm', 'do', 'dz',
0034     'ec', 'ee', 'eg', 'eh', 'er', 'es', 'et', 'eu', 'fi', 'fj', 'fk', 'fm',
0035     'fo', 'fr', 'ga', 'gb', 'gd', 'ge', 'gf', 'gg', 'gh', 'gi', 'gl', 'gm',
0036     'gn', 'gp', 'gq', 'gr', 'gs', 'gt', 'gu', 'gw', 'gy', 'hk', 'hm', 'hn',
0037     'hr', 'ht', 'hu', 'id', 'ie', 'il', 'im', 'in', 'io', 'iq', 'ir', 'is',
0038     'it', 'je', 'jm', 'jo', 'jp', 'ke', 'kg', 'kh', 'ki', 'km', 'kn', 'kp',
0039     'kr', 'kw', 'ky', 'kz', 'la', 'lb', 'lc', 'li', 'lk', 'lr', 'ls', 'lt',
0040     'lu', 'lv', 'ly', 'ma', 'mc', 'md', 'me', 'mg', 'mh', 'mk', 'ml', 'mm',
0041     'mn', 'mo', 'mp', 'mq', 'mr', 'ms', 'mt', 'mu', 'mv', 'mw', 'mx', 'my',
0042     'mz', 'na', 'nc', 'ne', 'nf', 'ng', 'ni', 'nl', 'no', 'np', 'nr', 'nu',
0043     'nz', 'om', 'pa', 'pe', 'pf', 'pg', 'ph', 'pk', 'pl', 'pm', 'pn', 'pr',
0044     'ps', 'pt', 'pw', 'py', 'qa', 're', 'ro', 'rs', 'ru', 'rw', 'sa', 'sb',
0045     'sc', 'sd', 'se', 'sg', 'sh', 'si', 'sj', 'sk', 'sl', 'sm', 'sn', 'so',
0046     'sr', 'st', 'sv', 'sy', 'sz', 'tc', 'td', 'tf', 'tg', 'th', 'tj', 'tk',
0047     'tl', 'tm', 'tn', 'to', 'tr', 'tt', 'tv', 'tw', 'tz', 'ua', 'ug', 'um',
0048     'us', 'uy', 'uz', 'va', 'vc', 've', 'vg', 'vi', 'vn', 'vu', 'wf', 'ws',
0049     'ye', 'yt', 'za', 'zm', 'zw'
0050 ]
0051 
0052 ISO_LANGUAGE = [
0053     'aa', 'ab', 'ae', 'af', 'ak', 'am', 'an', 'ar', 'as', 'av', 'ay', 'az',
0054     'ba', 'be', 'bg', 'bh', 'bi', 'bm', 'bn', 'bo', 'br', 'bs', 'ca', 'ce',
0055     'ch', 'co', 'cr', 'cs', 'cu', 'cv', 'cy', 'da', 'de', 'dv', 'dz', 'ee',
0056     'el', 'en', 'eo', 'es', 'et', 'eu', 'fa', 'ff', 'fi', 'fj', 'fo', 'fr',
0057     'fy', 'ga', 'gd', 'gl', 'gn', 'gu', 'gv', 'ha', 'he', 'hi', 'ho', 'hr',
0058     'ht', 'hu', 'hy', 'hz', 'ia', 'id', 'ie', 'ig', 'ii', 'ik', 'io', 'is',
0059     'it', 'iu', 'ja', 'jv', 'ka', 'kg', 'ki', 'kj', 'kk', 'kl', 'km', 'kn',
0060     'ko', 'kr', 'ks', 'ku', 'kv', 'kw', 'ky', 'la', 'lb', 'lg', 'li', 'ln',
0061     'lo', 'lt', 'lu', 'lv', 'mg', 'mh', 'mi', 'mk', 'ml', 'mn', 'mr', 'ms',
0062     'mt', 'my', 'na', 'nb', 'nd', 'ne', 'ng', 'nl', 'nn', 'no', 'nr', 'nv',
0063     'ny', 'oc', 'oj', 'om', 'or', 'os', 'pa', 'pi', 'pl', 'ps', 'pt', 'qu',
0064     'rm', 'rn', 'ro', 'ru', 'rw', 'sa', 'sc', 'sd', 'se', 'sg', 'si', 'sk',
0065     'sl', 'sm', 'sn', 'so', 'sq', 'sr', 'ss', 'st', 'su', 'sv', 'sw', 'ta',
0066     'te', 'tg', 'th', 'ti', 'tk', 'tl', 'tn', 'to', 'tr', 'ts', 'tt', 'tw',
0067     'ty', 'ug', 'uk', 'ur', 'uz', 've', 'vi', 'vo', 'wa', 'wo', 'xh', 'yi',
0068     'yo', 'za', 'zh', 'zu'
0069 ]
0070 
0071 
0072 def look_for_fixme(func):
0073     """Decorator to fail test if text argument starts with "FIXME"."""
0074 
0075     def inner(arg):
0076         if (arg is not None) and \
0077            isinstance(arg, str) and \
0078            arg.lstrip().startswith('FIXME'):
0079             return False
0080         return func(arg)
0081     return inner
0082 
0083 
0084 @look_for_fixme
0085 def check_layout(layout):
0086     '''"layout" in YAML header must be "workshop".'''
0087 
0088     return layout == 'workshop'
0089 
0090 
0091 @look_for_fixme
0092 def check_carpentry(layout):
0093     '''"carpentry" in YAML header must be "dc", "swc", "lc", or "cp".'''
0094 
0095     return layout in CARPENTRIES
0096 
0097 
0098 @look_for_fixme
0099 def check_country(country):
0100     '''"country" must be a lowercase ISO-3166 two-letter code.'''
0101 
0102     return country in ISO_COUNTRY
0103 
0104 
0105 @look_for_fixme
0106 def check_language(language):
0107     '''"language" must be a lowercase ISO-639 two-letter code.'''
0108 
0109     return language in ISO_LANGUAGE
0110 
0111 
0112 @look_for_fixme
0113 def check_humandate(date):
0114     """
0115     'humandate' must be a human-readable date with a 3-letter month
0116     and 4-digit year.  Examples include 'Feb 18-20, 2025' and 'Feb 18
0117     and 20, 2025'.  It may be in languages other than English, but the
0118     month name should be kept short to aid formatting of the main
0119     Carpentries web site.
0120     """
0121 
0122     if ',' not in date:
0123         return False
0124 
0125     month_dates, year = date.split(',')
0126 
0127     # The first three characters of month_dates are not empty
0128     month = month_dates[:3]
0129     if any(char == ' ' for char in month):
0130         return False
0131 
0132     # But the fourth character is empty ("February" is illegal)
0133     if month_dates[3] != ' ':
0134         return False
0135 
0136     # year contains *only* numbers
0137     try:
0138         int(year)
0139     except:
0140         return False
0141 
0142     return True
0143 
0144 
0145 @look_for_fixme
0146 def check_humantime(time):
0147     """
0148     'humantime' is a human-readable start and end time for the
0149     workshop, such as '09:00 - 16:00'.
0150     """
0151 
0152     return bool(re.match(HUMANTIME_PATTERN, time.replace(' ', '')))
0153 
0154 
0155 def check_date(this_date):
0156     """
0157     'startdate' and 'enddate' are machine-readable start and end dates
0158     for the workshop, and must be in YYYY-MM-DD format, e.g.,
0159     '2015-07-01'.
0160     """
0161 
0162     # YAML automatically loads valid dates as datetime.date.
0163     return isinstance(this_date, date)
0164 
0165 
0166 @look_for_fixme
0167 def check_latitude_longitude(latlng):
0168     """
0169     'latlng' must be a valid latitude and longitude represented as two
0170     floating-point numbers separated by a comma.
0171     """
0172 
0173     try:
0174         lat, lng = latlng.split(',')
0175         lat = float(lat)
0176         lng = float(lng)
0177         return (-90.0 <= lat <= 90.0) and (-180.0 <= lng <= 180.0)
0178     except ValueError:
0179         return False
0180 
0181 
0182 def check_instructors(instructors):
0183     """
0184     'instructor' must be a non-empty comma-separated list of quoted
0185     names, e.g. ['First name', 'Second name', ...'].  Do not use 'TBD'
0186     or other placeholders.
0187     """
0188 
0189     # YAML automatically loads list-like strings as lists.
0190     return isinstance(instructors, list) and len(instructors) > 0
0191 
0192 
0193 def check_helpers(helpers):
0194     """
0195     'helper' must be a comma-separated list of quoted names,
0196     e.g. ['First name', 'Second name', ...'].  The list may be empty.
0197     Do not use 'TBD' or other placeholders.
0198     """
0199 
0200     # YAML automatically loads list-like strings as lists.
0201     return isinstance(helpers, list) and len(helpers) >= 0
0202 
0203 
0204 @look_for_fixme
0205 def check_emails(emails):
0206     """
0207     'emails' must be a comma-separated list of valid email addresses.
0208     The list may be empty. A valid email address consists of characters,
0209     an '@', and more characters.  It should not contain the default contact
0210     """
0211 
0212     # YAML automatically loads list-like strings as lists.
0213     if (isinstance(emails, list) and len(emails) >= 0):
0214         for email in emails:
0215             if ((not bool(re.match(EMAIL_PATTERN, email))) or (email == DEFAULT_CONTACT_EMAIL)):
0216                 return False
0217     else:
0218         return False
0219 
0220     return True
0221 
0222 
0223 def check_eventbrite(eventbrite):
0224     """
0225     'eventbrite' (the Eventbrite registration key) must be 9 or more
0226     digits.  It may appear as an integer or as a string.
0227     """
0228 
0229     if isinstance(eventbrite, int):
0230         return True
0231     else:
0232         return bool(re.match(EVENTBRITE_PATTERN, eventbrite))
0233 
0234 
0235 @look_for_fixme
0236 def check_collaborative_notes(collaborative_notes):
0237     """
0238     'collaborative_notes' must be a valid URL.
0239     """
0240 
0241     return bool(re.match(URL_PATTERN, collaborative_notes))
0242 
0243 
0244 @look_for_fixme
0245 def check_pass(value):
0246     """
0247     This test always passes (it is used for 'checking' things like the
0248     workshop address, for which no sensible validation is feasible).
0249     """
0250 
0251     return True
0252 
0253 
0254 HANDLERS = {
0255     'layout':     (True, check_layout, 'layout isn\'t "workshop"'),
0256 
0257     'carpentry':  (True, check_carpentry, 'carpentry isn\'t in ' +
0258                    ', '.join(CARPENTRIES)),
0259 
0260     'country':    (True, check_country,
0261                    'country invalid: must use lowercase two-letter ISO code ' +
0262                    'from ' + ', '.join(ISO_COUNTRY)),
0263 
0264     'language':   (False,  check_language,
0265                    'language invalid: must use lowercase two-letter ISO code' +
0266                    ' from ' + ', '.join(ISO_LANGUAGE)),
0267 
0268     'humandate':  (True, check_humandate,
0269                    'humandate invalid. Please use three-letter months like ' +
0270                    '"Jan" and four-letter years like "2025"'),
0271 
0272     'humantime':  (True, check_humantime,
0273                    'humantime doesn\'t include numbers'),
0274 
0275     'startdate':  (True, check_date,
0276                    'startdate invalid. Must be of format year-month-day, ' +
0277                    'i.e., 2014-01-31'),
0278 
0279     'enddate':    (False, check_date,
0280                    'enddate invalid. Must be of format year-month-day, i.e.,' +
0281                    ' 2014-01-31'),
0282 
0283     'latlng':     (True, check_latitude_longitude,
0284                    'latlng invalid. Check that it is two floating point ' +
0285                    'numbers, separated by a comma'),
0286 
0287     'instructor': (True, check_instructors,
0288                    'instructor list isn\'t a valid list of format ' +
0289                    '["First instructor", "Second instructor",..]'),
0290 
0291     'helper':     (True, check_helpers,
0292                    'helper list isn\'t a valid list of format ' +
0293                    '["First helper", "Second helper",..]'),
0294 
0295     'email':    (True, check_emails,
0296                  'contact email list isn\'t a valid list of format ' +
0297                  '["me@example.org", "you@example.org",..] or contains incorrectly formatted email addresses or ' +
0298                  '"{0}".'.format(DEFAULT_CONTACT_EMAIL)),
0299 
0300     'eventbrite': (False, check_eventbrite, 'Eventbrite key appears invalid'),
0301 
0302     'collaborative_notes':   (False, check_collaborative_notes, 'Collaborative Notes URL appears invalid'),
0303 
0304     'venue':      (False, check_pass, 'venue name not specified'),
0305 
0306     'address':    (False, check_pass, 'address not specified')
0307 }
0308 
0309 # REQUIRED is all required categories.
0310 REQUIRED = {k for k in HANDLERS if HANDLERS[k][0]}
0311 
0312 # OPTIONAL is all optional categories.
0313 OPTIONAL = {k for k in HANDLERS if not HANDLERS[k][0]}
0314 
0315 
0316 def check_blank_lines(reporter, raw):
0317     """
0318     Blank lines are not allowed in category headers.
0319     """
0320 
0321     lines = [(i, x) for (i, x) in enumerate(
0322         raw.strip().split('\n')) if not x.strip()]
0323     reporter.check(not lines,
0324                    None,
0325                    'Blank line(s) in header: {0}',
0326                    ', '.join(["{0}: {1}".format(i, x.rstrip()) for (i, x) in lines]))
0327 
0328 
0329 def check_categories(reporter, left, right, msg):
0330     """
0331     Report differences (if any) between two sets of categories.
0332     """
0333 
0334     diff = left - right
0335     reporter.check(len(diff) == 0,
0336                    None,
0337                    '{0}: offending entries {1}',
0338                    msg, sorted(list(diff)))
0339 
0340 
0341 def check_file(reporter, path, data):
0342     """
0343     Get header from file, call all other functions, and check file for
0344     validity.
0345     """
0346 
0347     # Get metadata as text and as YAML.
0348     raw, header, body = split_metadata(path, data)
0349 
0350     # Do we have any blank lines in the header?
0351     check_blank_lines(reporter, raw)
0352 
0353     # Look through all header entries.  If the category is in the input
0354     # file and is either required or we have actual data (as opposed to
0355     # a commented-out entry), we check it.  If it *isn't* in the header
0356     # but is required, report an error.
0357     for category in HANDLERS:
0358         required, handler, message = HANDLERS[category]
0359         if category in header:
0360             if required or header[category]:
0361                 reporter.check(handler(header[category]),
0362                                None,
0363                                '{0}\n    actual value "{1}"',
0364                                message, header[category])
0365         elif required:
0366             reporter.add(None,
0367                          'Missing mandatory key "{0}"',
0368                          category)
0369 
0370     # Check whether we have missing or too many categories
0371     seen_categories = set(header.keys())
0372     check_categories(reporter, REQUIRED, seen_categories,
0373                      'Missing categories')
0374     check_categories(reporter, seen_categories, REQUIRED.union(OPTIONAL),
0375                      'Superfluous categories')
0376 
0377 
0378 def check_config(reporter, filename):
0379     """
0380     Check YAML configuration file.
0381     """
0382 
0383     config = load_yaml(filename)
0384 
0385     kind = config.get('kind', None)
0386     reporter.check(kind == 'workshop',
0387                    filename,
0388                    'Missing or unknown kind of event: {0}',
0389                    kind)
0390 
0391     carpentry = config.get('carpentry', None)
0392     reporter.check(carpentry in ('swc', 'dc', 'lc', 'cp'),
0393                    filename,
0394                    'Missing or unknown carpentry: {0}',
0395                    carpentry)
0396 
0397 
0398 def main():
0399     '''Run as the main program.'''
0400 
0401     if len(sys.argv) != 2:
0402         print(USAGE, file=sys.stderr)
0403         sys.exit(1)
0404 
0405     root_dir = sys.argv[1]
0406     index_file = os.path.join(root_dir, 'index.html')
0407     config_file = os.path.join(root_dir, '_config.yml')
0408 
0409     reporter = Reporter()
0410     check_config(reporter, config_file)
0411     check_unwanted_files(root_dir, reporter)
0412     with open(index_file, encoding='utf-8') as reader:
0413         data = reader.read()
0414         check_file(reporter, index_file, data)
0415     reporter.report()
0416 
0417 
0418 if __name__ == '__main__':
0419     main()