SND@LHC Software
Loading...
Searching...
No Matches
makeRunListDB.py
Go to the documentation of this file.
1import pymongo
2import argparse
3import xml.etree.ElementTree as ET
4from datetime import datetime
5from collections import defaultdict
6import os
7
8parser = argparse.ArgumentParser(
9 prog="makeRunListDB",
10 description="Extracts a list of runs from the SND@LHC DB, matching the conditions given in the arguments. Produces an XML file with the run list and a summary of the selection.")
11parser.add_argument("--name", type=str, help="Run list name", required=True)
12parser.add_argument("--years", nargs="+", type=int, help="Years to be included, e.g. 2022 2023", required=True)
13parser.add_argument("--min_events", type=int, help="Minimum number of events in run", default=0)
14parser.add_argument("--min_lumi", type=float, help="Minimum integrated luminosity for a run to be included, in fb-1", default=0.)
15parser.add_argument("--min_stable_time", type=float, help="Minimum stable beams time of the corresponding LHC fill for a run to be included, in minutes", default=0.)
16parser.add_argument("--particle_B1", type=str, help="Beam 1 particle, e.g. p+ or PB82", required=True)
17parser.add_argument("--particle_B2", type=str, help="Beam 2 particle, e.g. p+ or PB82", required=True)
18parser.add_argument("--min_energy", type=float, help="Minimum beam energy (in GeV)", default=0.)
19parser.add_argument("--max_energy", type=float, help="Maximum beam energy (in GeV)", default=0.)
20parser.add_argument("--min_bunches_IP1", type=int, help="Minimum number of bunches colliding at IP1", default=0)
21parser.add_argument("--max_bunches_IP1", type=int, help="Maximum number of bunches colliding at IP1", default=0)
22parser.add_argument("--include_runs", nargs="+", type=int, help="Runs to include, regardless of the other criteria", default=[])
23parser.add_argument("--exclude_runs", nargs="+", type=int, help="Runs to exclude from the list", default=[])
24args = parser.parse_args()
25
26client = pymongo.MongoClient("sndrundb.cern.ch")
27db = client.sndrundb
28
29pipeline = []
30
31# Get the run list corresponding to the selected years
32pipeline.append({"$match": {"$expr": {"$in": [{"$year": "$start"}, args.years]}}})
33
34# Select runs with a minimum of min_events events:
35if args.min_events > 0:
36 pipeline.append({"$match": {"events": {"$gt": args.min_events}}})
37
38# Combine data fill from the LPC
39pipeline.append({"$lookup": {"from": "FILL_LPC", "localField": "fill", "foreignField": "_id", "as": "LPC"}})
40
41if args.min_lumi > 0.:
42 # Select runs with at least min_lumi integrated luminosity
43 pipeline.append({"$match": {"LPC.ATLAS_Int_Lumi": {"$gt": args.min_lumi*1e6}}})
44
45if args.min_stable_time > 0.:
46 # Select runs with at least min_stable_time minutes of stable beams
47 pipeline.append({"$match": {"LPC.Stable_time": {"$gt": args.min_stable_time/60.}}})
48
49# Select B1 particle
50pipeline.append({"$match": {"LPC.Particle_B1": args.particle_B1}})
51
52# Select B2 particle
53pipeline.append({"$match": {"LPC.Particle_B2": args.particle_B2}})
54
55if args.min_energy > 0:
56 # Select runs with args.min_energy energy
57 pipeline.append({"$match": {"LPC.Energy": {"$gt": args.min_energy}}})
58
59if args.max_energy > 0:
60 # Select runs with args.max_energy energy
61 pipeline.append({"$match": {"LPC.Energy": {"$lt": args.max_energy}}})
62
63if args.min_bunches_IP1 > 0:
64 # Select runs with at least min_bunches_IP1 bunches colliding at IP1
65 pipeline.append({"$match": {"LPC.Coll_IP_1_5": {"$gt": args.min_bunches_IP1}}})
66
67if args.max_bunches_IP1 > 0:
68 # Select runs with at most max_bunches_IP1 bunches colliding at IP1
69 pipeline.append({"$match": {"LPC.Coll_IP_1_5": {"$lt": args.max_bunches_IP1}}})
70
71if len(args.exclude_runs) > 0:
72 pipeline.append({"$match": {"runNumber": {"$nin" : args.exclude_runs}}})
73
74# Expression for Calculating run length from start and stop datetimes
75run_length_expr = {"$dateDiff": {"startDate": "$start", "endDate": "$stop", "unit": "minute"}}
76
77# Extract the following data from the DB
78projection = {"$project":{"_id": 0, # Do not extract DB entry ID
79 "run_number": "$runNumber", # Run number
80 "n_events": "$events", # Number of events
81 "start": 1, # Start date
82 "end": 1, # End date
83 "duration": run_length_expr, # Run duration
84 "n_files": {"$size": "$files"}, # Number of files
85 "path": {"$first": "$files.file"}, # Path of the first file
86 "fill_number": "$fill", # Fill number
87 "fill_int_lumi": {"$first": "$LPC.ATLAS_Int_Lumi"}, # Integrated luminosity
88 "fill_stable_time" : {"$first": "$LPC.Stable_time"}} # Stable beams duration
89 }
90
91pipeline.append(projection)
92
93result = list(db["EcsData"].aggregate(pipeline))
94
95if len(args.include_runs) > 0:
96 include_pipeline = []
97 include_pipeline.append({"$match": {"runNumber": {"$in": args.include_runs}}})
98 include_pipeline.append(projection)
99
100 include_result = list(db["EcsData"].aggregate(include_pipeline))
101
102 result.extend(list(include_result))
103
104result.sort(key=lambda x: x["run_number"])
105
106# Get the time now
107now = datetime.now()
108
109# Format into xml tree
110root = ET.Element("runlist")
111
112meta_data = ET.SubElement(root, "meta")
113ET.SubElement(meta_data, "name").text = args.name
114ET.SubElement(meta_data, "datetime").text = now.strftime("%Y-%m-%dT%H:%M:%S.%f")
115selection = ET.SubElement(meta_data, "selection")
116ET.SubElement(selection, "years").text = ','.join([str(y) for y in args.years])
117for criterion in ["min_events", "min_lumi", "min_stable_time", "particle_B1", "particle_B2", "min_energy", "max_energy", "min_bunches_IP1", "max_bunches_IP1", "exclude_runs", "include_runs"]:
118 ET.SubElement(selection, criterion).text = str(getattr(args, criterion))
119runs = ET.SubElement(root, "runs")
120
121# Counters for summary statistics
122n_runs = 0
123totals = defaultdict(int)
124
125# Run loop
126for run in result:
127 this_run = ET.SubElement(runs, "run")
128 totals["n_runs"] += 1
129 for field_name in ["run_number", "start", "end", "n_events", "duration", "n_files", "fill_number", "fill_int_lumi", "fill_stable_time", "path"]:
130 try:
131 data = run[field_name]
132 except KeyError:
133 continue
134
135 if field_name == "path":
136 data = os.path.dirname(data)
137
138 ET.SubElement(this_run, field_name).text = str(data)
139
140 if field_name not in ["run_number", "start", "end", "fill_number", "path", "fill_int_lumi", "fill_stable_time"] and data is not None:
141 totals["tot_"+field_name] += data
142
143stats = ET.SubElement(meta_data, "statistics")
144for key, data in totals.items():
145 ET.SubElement(stats, key).text = str(data)
146
147# Write to xml file
148tree = ET.ElementTree(root)
149ET.indent(tree, space=" ")
150tree.write(args.name+"_"+str(now.timestamp())+".xml", encoding="utf-8", xml_declaration=True)