Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-17 09:12:58

0001 import MySQLdb
0002 import os
0003 import sys
0004 import argparse
0005 import json
0006 from utils.ConnectToDB import DBManager
0007 import cv2
0008 from datetime import datetime
0009 import json
0010 
0011 connector = None
0012 
0013 def scan_image_paths(locations):
0014     image_paths = []
0015     
0016     def recursive_scan(root_dir, current_metadata=None):
0017         for root, dirs, files in os.walk(root_dir):
0018             metadata_path = os.path.join(root, "metadata.json")
0019             if os.path.isfile(metadata_path):
0020                 print(f'Found metadata file: {metadata_path}')
0021                 with open(metadata_path, 'r') as f:
0022                     current_metadata = json.load(f)
0023 
0024             # Process image files in the current directory
0025             for file in files:
0026                 if file.endswith(".png"):  # ignoring pdfs for now
0027                     full_path = os.path.join(root, file)
0028                     print(f'Found image: {full_path}')
0029                     
0030                     image_paths.append({
0031                         "path": full_path,
0032                         "metadata": current_metadata
0033                     })
0034 
0035     # Start scanning from each specified location
0036     for location in locations:
0037         recursive_scan(location)
0038     
0039     return image_paths
0040 
0041 def ensure_trailing_slash(path):
0042     if not path.endswith('/'):
0043         path += '/'
0044     return path
0045 
0046 def get_super_plot_group_id():
0047     query = 'SELECT ID FROM PlotGroups WHERE Name = "Pipeline"'
0048     result = connector.FetchAll(query)
0049     
0050     if result:
0051         super_plot_group_id = result[0]["ID"]
0052         print(f'Using SuperPlotGroup_ID from PlotGroups where Name="Pipeline": {super_plot_group_id}')
0053         return super_plot_group_id
0054     else:
0055         print("Error: 'Pipeline' entry not found in PlotGroups table.")
0056         exit(1)
0057 
0058 def insert_into_supergroups(plot_group_id, super_plot_group_id):
0059     supergroup_check_q = f'SELECT ID FROM SuperGroups WHERE PlotGroup_ID = {plot_group_id} AND SuperPlotGroup_ID = {super_plot_group_id}'
0060     supergroup_check = connector.FetchAll(supergroup_check_q)
0061     
0062     if not supergroup_check:
0063         #insert if it doesn't exist
0064         supergroup_insert_q = f'''
0065             INSERT INTO SuperGroups (PlotGroup_ID, SuperPlotGroup_ID)
0066             VALUES ({plot_group_id},{super_plot_group_id})
0067         ''' 
0068         connector.Update(supergroup_insert_q)
0069         print(f'Inserted PlotGroup_ID {plot_group_id} into SuperGroups with SuperPlotGroup_ID {super_plot_group_id}')
0070     else:
0071         print(f'SuperGroup entry already exists for PlotGroup_ID {plot_group_id} and SuperPlotGroup_ID {super_plot_group_id}')
0072 
0073 def process_image(filepath, metadata):
0074     try:
0075         plot = os.path.basename(filepath)
0076         locale, subloc = os.path.split(os.path.dirname(filepath))
0077         print(f"scanning locale:  {locale}")
0078         print(f"Scanning sublocation: {subloc}")
0079         
0080         pipeline_id = None
0081         plot_group_id = None
0082         super_plot_group_id = None
0083         
0084         #if pipeline is in the path to the image, grab the pipeline number and add it into plot groups else continue
0085         if 'pipeline' in locale:
0086             super_plot_group_id = get_super_plot_group_id()
0087             if super_plot_group_id is None:
0088                 print("Skipping SuperGroups insertion")
0089                 return
0090             
0091             locale_parts = locale.split(os.sep)
0092             for part in locale_parts:
0093                 if part.startswith('pipeline-'):
0094                     pipeline_id = part.split('pipeline-')[-1]
0095                     break
0096                 
0097             if pipeline_id:
0098                 print(f"found pipeline id: {pipeline_id}")
0099                 plot_group_q = f'SELECT ID FROM PlotGroups WHERE Name="{pipeline_id}"'
0100                 PlotGroup = connector.FetchAll(plot_group_q)
0101                 
0102                 if len(PlotGroup) == 0:
0103                     insert_pg_q = f'INSERT INTO PlotGroups (Name) VALUES ("{pipeline_id}")'
0104                     print(insert_pg_q)
0105                     connector.Update(insert_pg_q)
0106                     
0107                     PlotGroup = connector.FetchAll(plot_group_q)
0108                     if PlotGroup:
0109                         plot_group_id = PlotGroup[0]["ID"]
0110                         print(f'inserted new PlotGroup with ID: {plot_group_id}')
0111                     else:
0112                         print(f"Error: Could not retrieve PlotGroup ID for pipeline {pipeline_id}")
0113                         return
0114                         
0115                 else:
0116                     plot_group_id = PlotGroup[0]['ID']
0117                     print(f"PlotGroup already exists with ID: {plot_group_id}")
0118             
0119                 if plot_group_id and super_plot_group_id:
0120                     print(f'inserting into supergroups {plot_group_id} and {super_plot_group_id}')
0121                     insert_into_supergroups(plot_group_id, super_plot_group_id)
0122             else:
0123                 print(f"coud not find pipeline id in locale: {locale}")
0124         else:
0125             print('no pipeline in locale')
0126     
0127 
0128         RunNumber = 0
0129         RunPeriod = ensure_trailing_slash(f"{locale}/{subloc}")
0130         Name = plot.rsplit(".", 1)[0]
0131 
0132         print(f"Name of plot: {Name}, Run Period: {RunPeriod}")
0133 
0134         Plot_Type_ID_q = f'SELECT ID FROM Plot_Types WHERE Name="{Name}" AND FileType="png"'
0135         Plot_Type_ID = connector.FetchAll(Plot_Type_ID_q)
0136         print(f'Plot type ID query result: {Plot_Type_ID}')
0137 
0138         if len(Plot_Type_ID) != 1:
0139             return
0140         else:
0141             PT_ID = Plot_Type_ID[0]["ID"]
0142             print(f'Plot type ID: {PT_ID}')
0143 
0144         unique_plot_q = f'SELECT ID FROM Plots WHERE Plot_Types_ID={PT_ID} AND RunNumber={RunNumber} AND RunPeriod="{RunPeriod}"'
0145         Plot = connector.FetchAll(unique_plot_q)
0146 
0147         if len(Plot) == 0:
0148             read_img = cv2.imread(filepath)
0149             if read_img is None or read_img.size == 0:
0150                 return
0151             print("Inserting plot")
0152             # Insert into Plots with MetaData as NULL if metadata is None
0153             metadata_value = "NULL" if metadata is None else f"'{json.dumps(metadata)}'"
0154             insert_q = f'''
0155                 INSERT INTO Plots (Plot_Types_ID, RunPeriod, RunNumber, InsertDateTime, MetaData)
0156                 VALUES ({PT_ID}, "{RunPeriod}", {RunNumber}, NOW(), {metadata_value})
0157             '''
0158             connector.Update(insert_q)
0159         else:
0160             print("Plot already inserted")
0161     except Exception as e:
0162         print(f"Error processing image {filepath}: {e}")
0163 
0164 def main(argv):
0165     global connector
0166 
0167     ap = argparse.ArgumentParser()
0168     ap.add_argument("-c", "--config", required=True, help="path to hydra config file")
0169     args = vars(ap.parse_args())
0170     configPath = args["config"]
0171 
0172     try:
0173         with open(configPath) as parms_json:
0174             parms = json.load(parms_json)
0175             locations_to_scan = parms["DATA_LOCATION"]["ImageCaches"]
0176     except Exception as e:
0177         print(f"Error reading config file: {e}")
0178         exit(1)
0179 
0180     connector = DBManager(configPath=configPath)
0181     
0182     crawler_pidFile = f"/tmp/{str(locations_to_scan[0]).replace('/', '_')}_img_crawler_pid"
0183     if os.path.exists(crawler_pidFile):
0184         try:
0185             with open(crawler_pidFile, "r") as cpidf:
0186                 cpid = cpidf.readline().strip()
0187                 os.kill(int(cpid), 0)
0188         except OSError:
0189             pass
0190         else:
0191             print("Crawler is already running")
0192             exit(0)
0193 
0194     with open(crawler_pidFile, 'w') as pidf:
0195         pidf.write(str(os.getpid()))
0196 
0197     print(f"Scanning: {locations_to_scan}")
0198     image_paths = scan_image_paths(locations_to_scan)
0199 
0200     for image in image_paths:
0201         process_image(image["path"], image["metadata"])
0202 
0203 if __name__ == "__main__":
0204     main(sys.argv[1:])