File indexing completed on 2025-01-17 09:12:58
0001 import MySQLdb
0002 import os
0003 import sys
0004 import argparse
0005 import json
0006 from utils.ConnectToDB import DBManager
0007 import cv2
0008 from datetime import datetime
0009 import json
0010
0011 connector = None
0012
0013 def scan_image_paths(locations):
0014 image_paths = []
0015
0016 def recursive_scan(root_dir, current_metadata=None):
0017 for root, dirs, files in os.walk(root_dir):
0018 metadata_path = os.path.join(root, "metadata.json")
0019 if os.path.isfile(metadata_path):
0020 print(f'Found metadata file: {metadata_path}')
0021 with open(metadata_path, 'r') as f:
0022 current_metadata = json.load(f)
0023
0024
0025 for file in files:
0026 if file.endswith(".png"):
0027 full_path = os.path.join(root, file)
0028 print(f'Found image: {full_path}')
0029
0030 image_paths.append({
0031 "path": full_path,
0032 "metadata": current_metadata
0033 })
0034
0035
0036 for location in locations:
0037 recursive_scan(location)
0038
0039 return image_paths
0040
0041 def ensure_trailing_slash(path):
0042 if not path.endswith('/'):
0043 path += '/'
0044 return path
0045
0046 def get_super_plot_group_id():
0047 query = 'SELECT ID FROM PlotGroups WHERE Name = "Pipeline"'
0048 result = connector.FetchAll(query)
0049
0050 if result:
0051 super_plot_group_id = result[0]["ID"]
0052 print(f'Using SuperPlotGroup_ID from PlotGroups where Name="Pipeline": {super_plot_group_id}')
0053 return super_plot_group_id
0054 else:
0055 print("Error: 'Pipeline' entry not found in PlotGroups table.")
0056 exit(1)
0057
0058 def insert_into_supergroups(plot_group_id, super_plot_group_id):
0059 supergroup_check_q = f'SELECT ID FROM SuperGroups WHERE PlotGroup_ID = {plot_group_id} AND SuperPlotGroup_ID = {super_plot_group_id}'
0060 supergroup_check = connector.FetchAll(supergroup_check_q)
0061
0062 if not supergroup_check:
0063
0064 supergroup_insert_q = f'''
0065 INSERT INTO SuperGroups (PlotGroup_ID, SuperPlotGroup_ID)
0066 VALUES ({plot_group_id},{super_plot_group_id})
0067 '''
0068 connector.Update(supergroup_insert_q)
0069 print(f'Inserted PlotGroup_ID {plot_group_id} into SuperGroups with SuperPlotGroup_ID {super_plot_group_id}')
0070 else:
0071 print(f'SuperGroup entry already exists for PlotGroup_ID {plot_group_id} and SuperPlotGroup_ID {super_plot_group_id}')
0072
0073 def process_image(filepath, metadata):
0074 try:
0075 plot = os.path.basename(filepath)
0076 locale, subloc = os.path.split(os.path.dirname(filepath))
0077 print(f"scanning locale: {locale}")
0078 print(f"Scanning sublocation: {subloc}")
0079
0080 pipeline_id = None
0081 plot_group_id = None
0082 super_plot_group_id = None
0083
0084
0085 if 'pipeline' in locale:
0086 super_plot_group_id = get_super_plot_group_id()
0087 if super_plot_group_id is None:
0088 print("Skipping SuperGroups insertion")
0089 return
0090
0091 locale_parts = locale.split(os.sep)
0092 for part in locale_parts:
0093 if part.startswith('pipeline-'):
0094 pipeline_id = part.split('pipeline-')[-1]
0095 break
0096
0097 if pipeline_id:
0098 print(f"found pipeline id: {pipeline_id}")
0099 plot_group_q = f'SELECT ID FROM PlotGroups WHERE Name="{pipeline_id}"'
0100 PlotGroup = connector.FetchAll(plot_group_q)
0101
0102 if len(PlotGroup) == 0:
0103 insert_pg_q = f'INSERT INTO PlotGroups (Name) VALUES ("{pipeline_id}")'
0104 print(insert_pg_q)
0105 connector.Update(insert_pg_q)
0106
0107 PlotGroup = connector.FetchAll(plot_group_q)
0108 if PlotGroup:
0109 plot_group_id = PlotGroup[0]["ID"]
0110 print(f'inserted new PlotGroup with ID: {plot_group_id}')
0111 else:
0112 print(f"Error: Could not retrieve PlotGroup ID for pipeline {pipeline_id}")
0113 return
0114
0115 else:
0116 plot_group_id = PlotGroup[0]['ID']
0117 print(f"PlotGroup already exists with ID: {plot_group_id}")
0118
0119 if plot_group_id and super_plot_group_id:
0120 print(f'inserting into supergroups {plot_group_id} and {super_plot_group_id}')
0121 insert_into_supergroups(plot_group_id, super_plot_group_id)
0122 else:
0123 print(f"coud not find pipeline id in locale: {locale}")
0124 else:
0125 print('no pipeline in locale')
0126
0127
0128 RunNumber = 0
0129 RunPeriod = ensure_trailing_slash(f"{locale}/{subloc}")
0130 Name = plot.rsplit(".", 1)[0]
0131
0132 print(f"Name of plot: {Name}, Run Period: {RunPeriod}")
0133
0134 Plot_Type_ID_q = f'SELECT ID FROM Plot_Types WHERE Name="{Name}" AND FileType="png"'
0135 Plot_Type_ID = connector.FetchAll(Plot_Type_ID_q)
0136 print(f'Plot type ID query result: {Plot_Type_ID}')
0137
0138 if len(Plot_Type_ID) != 1:
0139 return
0140 else:
0141 PT_ID = Plot_Type_ID[0]["ID"]
0142 print(f'Plot type ID: {PT_ID}')
0143
0144 unique_plot_q = f'SELECT ID FROM Plots WHERE Plot_Types_ID={PT_ID} AND RunNumber={RunNumber} AND RunPeriod="{RunPeriod}"'
0145 Plot = connector.FetchAll(unique_plot_q)
0146
0147 if len(Plot) == 0:
0148 read_img = cv2.imread(filepath)
0149 if read_img is None or read_img.size == 0:
0150 return
0151 print("Inserting plot")
0152
0153 metadata_value = "NULL" if metadata is None else f"'{json.dumps(metadata)}'"
0154 insert_q = f'''
0155 INSERT INTO Plots (Plot_Types_ID, RunPeriod, RunNumber, InsertDateTime, MetaData)
0156 VALUES ({PT_ID}, "{RunPeriod}", {RunNumber}, NOW(), {metadata_value})
0157 '''
0158 connector.Update(insert_q)
0159 else:
0160 print("Plot already inserted")
0161 except Exception as e:
0162 print(f"Error processing image {filepath}: {e}")
0163
0164 def main(argv):
0165 global connector
0166
0167 ap = argparse.ArgumentParser()
0168 ap.add_argument("-c", "--config", required=True, help="path to hydra config file")
0169 args = vars(ap.parse_args())
0170 configPath = args["config"]
0171
0172 try:
0173 with open(configPath) as parms_json:
0174 parms = json.load(parms_json)
0175 locations_to_scan = parms["DATA_LOCATION"]["ImageCaches"]
0176 except Exception as e:
0177 print(f"Error reading config file: {e}")
0178 exit(1)
0179
0180 connector = DBManager(configPath=configPath)
0181
0182 crawler_pidFile = f"/tmp/{str(locations_to_scan[0]).replace('/', '_')}_img_crawler_pid"
0183 if os.path.exists(crawler_pidFile):
0184 try:
0185 with open(crawler_pidFile, "r") as cpidf:
0186 cpid = cpidf.readline().strip()
0187 os.kill(int(cpid), 0)
0188 except OSError:
0189 pass
0190 else:
0191 print("Crawler is already running")
0192 exit(0)
0193
0194 with open(crawler_pidFile, 'w') as pidf:
0195 pidf.write(str(os.getpid()))
0196
0197 print(f"Scanning: {locations_to_scan}")
0198 image_paths = scan_image_paths(locations_to_scan)
0199
0200 for image in image_paths:
0201 process_image(image["path"], image["metadata"])
0202
0203 if __name__ == "__main__":
0204 main(sys.argv[1:])