Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:16:08

0001 #!/usr/bin/env python3
0002 
0003 """Check Geant4 source code modules
0004 
0005 Geant4 organises source code into "Modules", with each Module being a
0006 directory containing headers and sources in `include/` and `src/`
0007 subdirectories respectively. One or more Modules are grouped/compiled
0008 into the actual libraries.
0009 
0010 This program can be used to query Modules as declared to the build system to:
0011 
0012 - List all declared modules
0013 - Print the list of public headers provided by a module
0014 - Print modules required by a module based on inclusion of headers
0015 - Check consistency between declared dependencies of a module and
0016   those its sources actually use via inclusion of headers
0017 - Check for cycles in the graph of declared module dependencies
0018 """
0019 
0020 import argparse
0021 import csv
0022 import os
0023 import sys
0024 import re
0025 import json
0026 import graphlib
0027 from collections import defaultdict, OrderedDict
0028 
0029 
0030 def initdb(filename):
0031     db = {'modules': {}, 'build_settings': set()}
0032     with open(filename, 'r') as csvfile:
0033         reader = csv.reader(csvfile)
0034         for row in reader:
0035             db['modules'][row[0]] = {'location': row[1],
0036                                      'headers': set(row[2].split(';')) - set(['']),
0037                                      'private_headers': set(row[3].split(';')) - set(['']),
0038                                      'sources': set(row[4].split(';')) - set(['']),
0039                                      'public_deps': set(row[5].split(';')) - set(['']),
0040                                      'private_deps': set(row[6].split(';')) - set(['']),
0041                                      'interface_deps': set(row[7].split(';')) - set(['']),
0042                                      'parent_target': row[8]}
0043             db['build_settings'].add(row[9])
0044 
0045     return db
0046 
0047 
0048 def get_modules(db):
0049     return db['modules'].keys()
0050 
0051 
0052 def get_module(name, db):
0053     return db['modules'][name]
0054 
0055 
0056 def get_location(name, db):
0057     return get_module(name, db)['location']
0058 
0059 
0060 def get_header_path(name, db):
0061     return os.path.join(get_location(name, db), 'include')
0062 
0063 
0064 def get_private_header_path(name, db):
0065     return os.path.join(get_location(name, db), 'include/private')
0066 
0067 
0068 def get_source_path(name, db):
0069     return os.path.join(get_location(name, db), 'src')
0070 
0071 
0072 def get_headers(name, db):
0073     return get_module(name, db)['headers']
0074 
0075 
0076 def get_headers_absolute(name, db):
0077     base_path = get_header_path(name, db)
0078     return set(os.path.join(base_path, h) for h in get_headers(name, db))
0079 
0080 
0081 def get_private_headers(name, db):
0082     return get_module(name, db)['private_headers']
0083 
0084 
0085 def get_private_headers_absolute(name, db):
0086     base_path = get_private_header_path(name, db)
0087     return set(os.path.join(base_path, h) for h in get_private_headers(name, db))
0088 
0089 
0090 def get_sources(name, db):
0091     return get_module(name, db)['sources']
0092 
0093 
0094 def get_sources_absolute(name, db):
0095     base_path = get_source_path(name, db)
0096     return set(os.path.join(base_path, h) for h in get_sources(name, db))
0097 
0098 
0099 def get_public_deps(name, db):
0100     return get_module(name, db)['public_deps']
0101 
0102 
0103 def get_private_deps(name, db):
0104     return get_module(name, db)['private_deps']
0105 
0106 
0107 def get_interface_deps(name, db):
0108     return get_module(name, db)['interface_deps']
0109 
0110 
0111 def get_parent_target(name, db):
0112     return get_module(name, db)['parent_target']
0113 
0114 
0115 def what_provides(h, db):
0116     return set([m for m in get_modules(db) if h in get_headers(m, db)])
0117 
0118 
0119 def what_provides_private(h, db):
0120     return set([m for m in get_modules(db) if h in get_private_headers(m, db)])
0121 
0122 
0123 def what_requires(m, db):
0124     return set([om for om in get_modules(db) if m in get_public_deps(om, db)])
0125 
0126 
0127 def what_requires_private(m, db):
0128     return set([om for om in get_modules(db) if m in get_private_deps(om, db)])
0129 
0130 
0131 def has_setting(setting, build_settings):
0132     if setting.startswith('!'):
0133         return setting[1:] not in build_settings
0134     else:
0135         return setting in build_settings
0136 
0137 
0138 def scan_includes(input_file, settings):
0139     result = set()
0140     lines = open(input_file).readlines()
0141     for line in lines:
0142         m = re.search("^ *# *include *\"(.*)\"(.*)$", line)
0143         if m:
0144             c = re.search(
0145                 "\\s*//\\s*no_geant4_module_check\\s*(\\((.*)\\))?.*$", m.group(2))
0146             if c:
0147                 conds = set()
0148                 if c.group(2):
0149                     conds = set(
0150                         filter(None, [''.join(c.split(' ')) for c in c.group(2).strip().split(',')]))
0151 
0152                 if not conds or any(has_setting(s, settings) for s in conds):
0153                     continue
0154 
0155             result.add(m.group(1))
0156 
0157     return result
0158 
0159 
0160 def scan_files(files, settings):
0161     used_headers = set()
0162     for f in files:
0163         used_headers |= scan_includes(f, settings)
0164 
0165     return used_headers
0166 
0167 
0168 def scan_module_headers(name, db, full_scan):
0169     if full_scan:
0170         # NB: know that will have private/ subdir, so only consider files
0171         header_list = [e.path for e in os.scandir(
0172             get_header_path(name, db)) if e.is_file()]
0173     else:
0174         header_list = get_headers_absolute(name, db)
0175 
0176     return scan_files(header_list, db['build_settings'])
0177 
0178 
0179 def scan_module_private_headers(name, db, full_scan):
0180     if full_scan:
0181         header_list = [e.path for e in os.scandir(
0182             get_private_header_path(name, db))]
0183     else:
0184         header_list = get_private_headers_absolute(name, db)
0185 
0186     return scan_files(header_list, db['build_settings'])
0187 
0188 
0189 def scan_module_sources(name, db, full_scan):
0190     if full_scan:
0191         source_list = [e.path for e in os.scandir(get_source_path(name, db))]
0192     else:
0193         source_list = get_sources_absolute(name, db)
0194 
0195     return scan_files(source_list, db['build_settings'])
0196 
0197 
0198 def find_modules(list_of_headers, module_db):
0199     found_modules = set()
0200     blame_headers = defaultdict(set)
0201     orphan_hdrs = set()
0202     for h in list_of_headers:
0203         ms = what_provides(h, module_db)
0204         found_modules |= ms
0205         if not ms:
0206             orphan_hdrs.add(h)
0207         else:
0208             blame_headers[ms.pop()].add(h)
0209 
0210     return {'modules': found_modules, 'blame': {k: sorted(v) for k, v in blame_headers.items()}, 'headers': orphan_hdrs}
0211 
0212 
0213 def find_modules_private(list_of_headers, module_db):
0214     found_modules = set()
0215     blame_headers = defaultdict(set)
0216     orphan_hdrs = set()
0217     for h in list_of_headers:
0218         ms = what_provides_private(h, module_db)
0219         found_modules |= ms
0220         if not ms:
0221             orphan_hdrs.add(h)
0222         else:
0223             blame_headers[ms.pop()].add(h)
0224 
0225     return {'modules': found_modules, 'blame': {k: sorted(v) for k, v in blame_headers.items()}, 'headers': orphan_hdrs}
0226 
0227 
0228 def usage_requirements(module_name, module_db, full_scan):
0229     """ Find modules needed publically and privately by input module
0230     """
0231     try:
0232         module_path = get_location(module_name, module_db)
0233         module_headers = get_headers(module_name, module_db)
0234         module_private_headers = get_private_headers(module_name, module_db)
0235     except KeyError as err:
0236         print(f'No module named \'{module_name}\'', file=sys.stderr)
0237         sys.exit(1)
0238 
0239     # Determine public usage reqs
0240     includes_from_headers = scan_module_headers(
0241         module_name, module_db, full_scan)
0242     includes_from_headers -= module_headers
0243     public_deps = find_modules(includes_from_headers, module_db)
0244     access_violation_public = find_modules_private(
0245         includes_from_headers, module_db)
0246 
0247     # Determine private usage reqs
0248     # private headers...
0249     try:
0250         includes_from_private_headers = scan_module_private_headers(
0251             module_name, module_db, full_scan)
0252         includes_from_private_headers -= module_headers
0253         includes_from_private_headers -= module_private_headers
0254     except FileNotFoundError:
0255         includes_from_private_headers = set()
0256 
0257     # ... and sources if they exist
0258     try:
0259         includes_from_srcs = scan_module_sources(
0260             module_name, module_db, full_scan)
0261         includes_from_srcs -= module_headers
0262         includes_from_srcs -= module_private_headers
0263     except FileNotFoundError:
0264         includes_from_srcs = set()
0265 
0266     # Join private/src and remove public deps, which are higher priority
0267     # We don't do this for blames as these can be unique
0268     private_deps = find_modules(
0269         includes_from_private_headers | includes_from_srcs, module_db)
0270     private_deps['modules'] -= public_deps['modules']
0271     private_deps['headers'] -= public_deps['headers']
0272 
0273     # Determine any access violations
0274     access_violation_private = find_modules_private(
0275         includes_from_private_headers | includes_from_srcs, module_db)
0276 
0277     # Transform results to output dict
0278     d = {'module': module_name,
0279          'dependencies': {
0280              'public': sorted(public_deps['modules']),
0281              'private': sorted(private_deps['modules'])
0282          },
0283          'blame': {
0284              'public': OrderedDict(sorted(public_deps['blame'].items())),
0285              'private': OrderedDict(sorted(private_deps['blame'].items()))
0286          },
0287          'external_headers': {
0288              'public': sorted(public_deps['headers']),
0289              'private': sorted(private_deps['headers'])
0290          },
0291          'access_violations': {
0292              'public': sorted(access_violation_public['modules']),
0293              'private': sorted(access_violation_private['modules'])
0294          }
0295          }
0296     return d
0297 
0298 
0299 def check_consistency(module_name, module_db, full_scan):
0300     """ Check module declared/apparent dependencies for consistency
0301     """
0302     # NB: Can have false positives from externals G4expat, G4clhep, G4zlib, G4tools (plus imported :: targets and stdlib)
0303     def filter_dependencies(dep_list, module_name):
0304         return set([x for x in dep_list if not re.match("^.+::.+", x)]) - set(['G4expat', 'G4clhep', 'G4zlib', 'G4tools', 'G4ptl', 'stdc++fs'])
0305 
0306     ur = usage_requirements(module_name, module_db, full_scan)
0307     apparent_public_deps = filter_dependencies(
0308         ur['dependencies']['public'], module_name)
0309     apparent_private_deps = filter_dependencies(
0310         ur['dependencies']['private'], module_name)
0311 
0312     declared_public_deps = filter_dependencies(
0313         get_public_deps(module_name, module_db), module_name)
0314     declared_interface_deps = filter_dependencies(
0315         get_interface_deps(module_name, module_db), module_name)
0316     declared_private_deps = filter_dependencies(
0317         get_private_deps(module_name, module_db), module_name)
0318 
0319     public_access_violations = filter_dependencies(
0320         ur['access_violations']['public'], module_name)
0321     private_access_violations = filter_dependencies(
0322         ur['access_violations']['private'], module_name)
0323 
0324     # Collate any consistency errors
0325     report = []
0326     # - Declared dependencies duplicated between public/private
0327     #   Later checks will help distinguish what to do with this
0328     duplicated_deps = declared_public_deps & declared_private_deps
0329     if duplicated_deps:
0330         report.append(
0331             f'  - has duplicated PUBLIC/PRIVATE dependencies: {duplicated_deps}')
0332 
0333     # - Apparent dep not declared
0334     missing_public_deps = apparent_public_deps - \
0335         declared_public_deps - declared_interface_deps
0336     if missing_public_deps:
0337         report.append(
0338             f'  + may require PUBLIC or INTERFACE dependencies: {missing_public_deps}')
0339 
0340     missing_private_deps = apparent_private_deps - declared_private_deps
0341     if missing_private_deps:
0342         report.append(
0343             f'  + may require PRIVATE dependencies: {missing_private_deps}')
0344 
0345     # - Declared dep not in apparent (overdeclared)
0346     overdeclared_public_deps = declared_public_deps - apparent_public_deps
0347     if overdeclared_public_deps:
0348         report.append(
0349             f'  - may not require PUBLIC dependencies: {overdeclared_public_deps}')
0350 
0351     overdeclared_private_deps = declared_private_deps - apparent_private_deps
0352     if overdeclared_private_deps:
0353         report.append(
0354             f'  - may not require PRIVATE dependencies: {overdeclared_private_deps}')
0355 
0356     # - Module exposes a header that's declared private locally or in another module
0357     if public_access_violations:
0358         report.append(
0359             f'  ! public interface #include-s PRIVATE headers from modules: {public_access_violations}')
0360 
0361     # - Module uses a header that's declared private
0362     if private_access_violations:
0363         report.append(
0364             f'  ! implementation uses PRIVATE headers from modules: {private_access_violations}')
0365 
0366     return report
0367 
0368 
0369 def do_provides(header_name, module_db, verbose):
0370     # find the header, there should not be duplicates!
0371     mods = what_provides(header_name, module_db)
0372     if len(mods) == 0:
0373         print(f"No module provides header '{header_name}'", file=sys.stderr)
0374         sys.exit(1)
0375     if len(mods) > 1:
0376         print(
0377             f"Header '{args.provides}' is provided by multiple modules: {mods}", file=sys.stderr)
0378         sys.exit(1)
0379 
0380     print(mods.pop())
0381 
0382 
0383 def do_requires(module_name, module_db):
0384     """find all modules that use module_name
0385     """
0386     try:
0387         public_users = sorted(what_requires(module_name, module_db))
0388         private_users = sorted(what_requires_private(module_name, module_db))
0389     except KeyError as err:
0390         print(f'No module named \'{module_name}\'', file=sys.stderr)
0391         sys.exit(1)
0392 
0393     print(f'{module_name} is required by:')
0394 
0395     if len(public_users) != 0:
0396         print('- PUBLIC:')
0397         print("  -", "\n  - ".join(public_users))
0398 
0399     if len(private_users) != 0:
0400         print('PRIVATE:')
0401         print("  -", "\n  - ".join(private_users))
0402 
0403 
0404 def do_count_usage(module_db, verbose):
0405     """print modules ordered by number of modules linking to them"""
0406     reqcount = dict.fromkeys(get_modules(module_db), 0)
0407 
0408     for m in get_modules(module_db):
0409         reqcount[m] += len(what_requires(m, module_db))
0410         reqcount[m] += len(what_requires_private(m, module_db))
0411 
0412     for m in sorted(reqcount, key=reqcount.get, reverse=True):
0413         if verbose:
0414             print(m, f'({reqcount[m]} users)')
0415         else:
0416             print(m)
0417 
0418 
0419 def do_check_consistency(module_name, module_db, full_scan):
0420     try:
0421         cc = check_consistency(module_name, module_db, full_scan)
0422         if cc:
0423             print(f'{module_name} has inconsistent dependencies:', file=sys.stderr)
0424             print("\n".join(cc), file=sys.stderr)
0425             sys.exit(1)
0426         else:
0427             print(f'Module {module_name} appears consistent')
0428     except KeyError as err:
0429         print(f'No module named \'{module_name}\'', file=sys.stderr)
0430         sys.exit(1)
0431 
0432 
0433 def do_find_cycles(module_db, verbose):
0434     """ Check for any cycles in the complete module dependency graph
0435     """
0436     # Build adjacency list
0437     adjlist = {}
0438 
0439     for m in get_modules(module_db):
0440         adjlist[m] = list(get_public_deps(m, module_db)
0441                           | get_private_deps(m, module_db)
0442                           | get_interface_deps(m, module_db))
0443 
0444     try:
0445         # Topo sort throws cycle error if one occurs during prepare
0446         ts = graphlib.TopologicalSorter(adjlist)
0447         ts.prepare()
0448 
0449         # Verbose print of nodes in topological order
0450         # NB: This uses the full graphlib interface in case we want to
0451         # print out nodes grouped by level in the graph.
0452         if verbose:
0453             generation = 0
0454             while ts.is_active():
0455                 nodes = ts.get_ready()
0456                 print(f'Generation: {generation}')
0457                 for n in nodes:
0458                     print(f'  {n}')
0459                 ts.done(*nodes)
0460                 generation += 1
0461 
0462         print(f"No cycles detected in module dependency graph")
0463     except graphlib.CycleError as err:
0464         print(
0465             f"Cycles detected in module dependency graph:",
0466             file=sys.stderr,
0467         )
0468         cycle = " -> ".join(reversed(err.args[1]))
0469         print(f"{cycle}", file=sys.stderr)
0470         sys.exit(1)
0471 
0472 
0473 def do_find_inconsistencies(db, verbose, full_scan):
0474     inconsistent = {}
0475     for m in get_modules(db):
0476         cc = check_consistency(m, db, full_scan)
0477         if cc:
0478             inconsistent[m] = "\n".join(cc)
0479 
0480     if len(inconsistent) > 0:
0481         for k, v in inconsistent.items():
0482             print(f'{k}:\n{v}', file=sys.stderr)
0483 
0484         sys.exit(1)
0485     else:
0486         print(
0487             "No inconsistencies detected in declared/apparent module dependencies")
0488 
0489 
0490 def do_libraries(db, verbose):
0491     libmap = defaultdict(set)
0492 
0493     for m in get_modules(db):
0494         libmap[get_parent_target(m, db)].add(m)
0495 
0496     for k, v in sorted(libmap.items()):
0497         print(f'{k}: {v}')
0498 
0499 
0500 if __name__ == "__main__":
0501     parser = argparse.ArgumentParser(
0502         description=str(__doc__), formatter_class=argparse.RawDescriptionHelpFormatter
0503     )
0504     parser.add_argument("-db", default='G4ModuleInterfaceMap.csv',
0505                         metavar="FILE", help="module interface map file")
0506     parser.add_argument("-v", "--verbose",
0507                         action="store_true", help="verbose output")
0508     parser.add_argument("--all-files",
0509                         action="store_true",
0510                         help="consider all files in source tree for consistency checks")
0511 
0512     query_group = parser.add_mutually_exclusive_group(required=True)
0513 
0514     query_group.add_argument(
0515         "-l", "--list",
0516         action='store_true',
0517         help="list declared source code modules"
0518     )
0519     query_group.add_argument(
0520         "-s", "--source",
0521         metavar="<module>",
0522         help="print directory holding CMake file where module is defined"
0523     )
0524     query_group.add_argument(
0525         "-i", "--interface",
0526         metavar="<module>",
0527         help="print public headers of module",
0528     )
0529     query_group.add_argument(
0530         "-p", "--provides",
0531         metavar="<header>",
0532         help="print module that provides this header"
0533     )
0534     query_group.add_argument(
0535         "-u", "--usage-requirements",
0536         metavar="<module>",
0537         help="print usage requirements of module"
0538     )
0539     query_group.add_argument(
0540         "-r", "--requires",
0541         metavar="<module>",
0542         help="print modules linking to this module"
0543     )
0544     query_group.add_argument(
0545         "--count-usage",
0546         action="store_true",
0547         help="print modules from most to least linked to"
0548     )
0549     query_group.add_argument(
0550         "-c", "--check-consistency",
0551         metavar="<module>",
0552         help="check declared and apparent module dependencies for basic consistency"
0553     )
0554     query_group.add_argument(
0555         "--find-cycles",
0556         action="store_true",
0557         help="find cycles in graph of declared modules dependencies"
0558     )
0559     query_group.add_argument(
0560         "--find-inconsistencies",
0561         action="store_true",
0562         help="find inconsistencies in apparent/declared dependencies of all modules"
0563     )
0564     query_group.add_argument(
0565         "--library",
0566         metavar="<module>",
0567         help="print final library (.so/.dll) that module is compiled into"
0568     )
0569     query_group.add_argument(
0570         "--libraries",
0571         action="store_true",
0572         help="print final libraries and their module compositions"
0573     )
0574     args = parser.parse_args()
0575 
0576     # Initialize the module/header "database"
0577     try:
0578         db = initdb(args.db)
0579     except OSError as err:
0580         print(f"Could not initalize module DB: {err}", file=sys.stderr)
0581         sys.exit(1)
0582 
0583     # Implementations
0584     if args.list:
0585         print("\n".join(get_modules(db)))
0586     elif args.source:
0587         try:
0588             print(get_location(args.source, db))
0589         except KeyError as err:
0590             print(f"No module named {err}", file=sys.stderr)
0591             sys.exit(1)
0592     elif args.interface:
0593         try:
0594             print(
0595                 "\n".join(sorted(get_headers(args.interface, db))))
0596         except KeyError as err:
0597             print(f"No module named {err}", file=sys.stderr)
0598             sys.exit(1)
0599     elif args.provides:
0600         do_provides(args.provides, db, args.verbose)
0601     elif args.usage_requirements:
0602         ur = usage_requirements(args.usage_requirements, db, args.all_files)
0603         print(json.dumps(ur, indent=2))
0604     elif args.count_usage:
0605         do_count_usage(db, args.verbose)
0606     elif args.requires:
0607         do_requires(args.requires, db)
0608     elif args.check_consistency:
0609         do_check_consistency(args.check_consistency, db, args.all_files)
0610     elif args.find_cycles:
0611         do_find_cycles(db, args.verbose)
0612     elif args.find_inconsistencies:
0613         do_find_inconsistencies(db, args.verbose, args.all_files)
0614     elif args.library:
0615         try:
0616             print(get_parent_target(args.library, db))
0617         except KeyError as err:
0618             print(f"No module named {err}", file=sys.stderr)
0619             sys.exit(1)
0620     elif args.libraries:
0621         do_libraries(db, args.verbose)