File indexing completed on 2025-07-01 07:56:42
0001 import os
0002 import re
0003 import sys
0004 import glob
0005 import pandas as pd
0006
0007
0008
0009
0010
0011
0012
0013
0014 try:
0015 from XRootD import client
0016 from XRootD.client.flags import OpenFlags
0017 except ImportError:
0018 client = None
0019
0020 def clean_key(key):
0021 if ' ' not in key:
0022 key = key.replace('[', '_').replace(']', '').replace('#', '')
0023 key = re.sub(r'[^\w_]', '', key)
0024 return key.strip()
0025
0026 def read_xrdfs_file_lines(xrd_url):
0027 if client is None:
0028 raise ImportError("XRootD client not installed. Run: pip install XRootD")
0029
0030 fixed_url = re.sub(r'^(root://[^/]+)(/[^/])', r'\1/\2', xrd_url)
0031 if not re.match(r'root://[^/]+//', fixed_url):
0032 fixed_url = fixed_url.replace("root://", "root:///", 1)
0033
0034 f = client.File()
0035 status, _ = f.open(fixed_url, OpenFlags.READ)
0036 if not status.ok:
0037 raise IOError(f"Failed to open {fixed_url}: {status.message}")
0038
0039 lines = []
0040 offset = 0
0041 chunk_size = 65536
0042 while True:
0043 status, data = f.read(offset, chunk_size)
0044 if not status.ok or not data:
0045 break
0046 lines.append(data.decode('utf-8'))
0047 offset += len(data)
0048
0049 f.close()
0050 return ''.join(lines).splitlines()
0051
0052 def parse_info_file(filepath):
0053 print(f"Parsing info file: {os.path.basename(filepath)}")
0054 lines = read_xrdfs_file_lines(filepath) if filepath.startswith("root://") else open(filepath).readlines()
0055
0056 data = {}
0057 for line in lines:
0058 line = line.split('#', 1)[0].strip()
0059 if not line or line.startswith('*'):
0060 continue
0061 if ':' in line:
0062 key, val = line.split(':', 1)
0063 data[key.strip()] = val.strip()
0064 elif '=' in line:
0065 key, val = line.split('=', 1)
0066 data[clean_key(key)] = val.strip()
0067 elif re.match(r'\w+\[?\d*\]?\s+', line):
0068 parts = re.split(r'\s+', line, maxsplit=1)
0069 if len(parts) == 2:
0070 data[clean_key(parts[0])] = parts[1].strip()
0071
0072 content = "\n".join(lines)
0073 if "Run" not in data:
0074 run_match = re.search(r'Run n\.\s*(\d+)', content)
0075 if run_match:
0076 data["Run"] = int(run_match.group(1))
0077
0078 if "Run" in data:
0079 data["Run"] = int(re.search(r'\d+', str(data["Run"])).group())
0080
0081 if "Start Time" not in data:
0082 match = re.search(r'Start Time:\s*(.+)', content)
0083 if match:
0084 data["Start Time"] = match.group(1).strip()
0085 if "Stop Time" not in data:
0086 match = re.search(r'Stop Time:\s*(.+)', content)
0087 if match:
0088 data["Stop Time"] = match.group(1).strip()
0089 if "Elapsed (s)" not in data:
0090 match = re.search(r'Elapsed time\s*=\s*([\d.]+)', content)
0091 if match:
0092 data["Elapsed (s)"] = float(match.group(1))
0093
0094 return data
0095
0096 def count_triggers_in_listfile(filepath):
0097 print(f" Fast counting triggers in file: {os.path.basename(filepath)}")
0098 last_trgid = None
0099 chunk_size = 65536
0100
0101 if filepath.startswith("root://"):
0102 if client is None:
0103 raise ImportError("XRootD client not installed.")
0104 fixed_url = re.sub(r'^(root://[^/]+)(/[^/])', r'\1/\2', filepath)
0105 if not re.match(r'root://[^/]+//', fixed_url):
0106 fixed_url = fixed_url.replace("root://", "root:///", 1)
0107 f = client.File()
0108 status, _ = f.open(fixed_url, OpenFlags.READ)
0109 if not status.ok:
0110 raise IOError(f"Failed to open {fixed_url}: {status.message}")
0111 status, statinfo = f.stat()
0112 filesize = statinfo.size
0113 offset = max(0, filesize - chunk_size)
0114 status, data = f.read(offset, min(chunk_size, filesize))
0115 f.close()
0116 lines = data.decode("utf-8").splitlines()
0117 else:
0118 with open(filepath, 'rb') as f:
0119 f.seek(0, os.SEEK_END)
0120 filesize = f.tell()
0121 offset = max(0, filesize - chunk_size)
0122 f.seek(offset)
0123 lines = f.read().decode("utf-8").splitlines()
0124
0125 lines.reverse()
0126 for line in lines:
0127 line = line.strip()
0128 if not line or line.startswith('//'):
0129 continue
0130 parts = re.split(r'\s+', line)
0131 if len(parts) >= 6:
0132 try:
0133 last_trgid = int(parts[5])
0134 break
0135 except ValueError:
0136 continue
0137
0138 return (last_trgid + 1) if last_trgid is not None else 0
0139
0140 def list_remote_files(xrd_base):
0141 if client is None:
0142 raise ImportError("XRootD client not installed.")
0143 server = xrd_base.split('//')[1].split('/')[0]
0144 path = '/' + '/'.join(xrd_base.split('//')[1].split('/')[1:])
0145 fs = client.FileSystem(server)
0146 status, listing = fs.dirlist(path)
0147 if not status.ok:
0148 raise IOError(f"Failed to list {xrd_base}: {status.message}")
0149 return [f"{xrd_base.rstrip('/')}/{entry.name}" for entry in listing]
0150
0151 def summarize_runs(directory):
0152 summary = []
0153 all_files = list_remote_files(directory) if directory.startswith("root://") else glob.glob(os.path.join(directory, "*"))
0154
0155 info_files = sorted(f for f in all_files if re.search(r'Run\d+_Info\.txt$', f))
0156
0157 if not info_files:
0158 print("No Run*_Info.txt files found.")
0159 return pd.DataFrame()
0160
0161 for info_path in info_files:
0162 try:
0163 info_data = parse_info_file(info_path)
0164 run_number = info_data['Run']
0165 except Exception as e:
0166 print(f" Skipping {info_path}: {e}")
0167 continue
0168
0169 base = f"Run{run_number}"
0170 part_files = []
0171
0172 for f in all_files:
0173
0174 if re.match(f".*{base}\\.\\d+_list\\.(txt|csv)$", f) or re.match(f".*{base}_list\\.(txt|csv)$", f):
0175 print(f" --> Matched: {os.path.basename(f)}")
0176 part_files.append(f)
0177
0178 if not part_files:
0179 print(f" Warning: no list files found for {base}")
0180 continue
0181
0182 n_parts = len(part_files)
0183 n_triggers = sum(count_triggers_in_listfile(f) for f in part_files)
0184
0185 total_size = 0
0186
0187 for f in part_files:
0188 if f.startswith("root://"):
0189 server = f.split('//')[1].split('/')[0]
0190 path = '/' + '/'.join(f.split('//')[1].split('/')[1:])
0191 fs = client.FileSystem(server)
0192 status, statinfo = fs.stat(path)
0193 if status.ok:
0194 total_size += statinfo.size
0195 else:
0196 print(f" Warning: Could not get size for {f}")
0197 else:
0198 try:
0199 total_size += os.path.getsize(f)
0200 except:
0201 print(f" Warning: Could not get size for {f}")
0202
0203 info_data["NumParts"] = n_parts
0204 info_data["NumTriggers"] = n_triggers
0205 info_data["TotalSize_MB"] = round(total_size / (1024 * 1024), 2)
0206 summary.append(info_data)
0207
0208 return pd.DataFrame(summary)
0209
0210 if __name__ == "__main__":
0211 import argparse
0212 parser = argparse.ArgumentParser(description="Summarize CAEN DT5202 run files")
0213 parser.add_argument("path", help="Local directory or XRootD path (e.g. ./data or root://server/path/)")
0214 args = parser.parse_args()
0215
0216 summary_df = summarize_runs(args.path)
0217 outfile = "../configs/run_summary_full.csv"
0218 if "Run" in summary_df.columns:
0219 summary_df = summary_df.sort_values(by="Run")
0220 print("Writing file: ", outfile)
0221 summary_df.to_csv(outfile, index=False)
0222 print(summary_df.head())
0223