Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions Rucio/Chaining/rdataframe_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from rucio.client import Client
import ROOT

# Initialize Rucio client
client = Client()

# Define the dataset DID
dataset_did = "epic:/RECO/26.02.0/epic_craterlake/SINGLE/e+/500MeV/3to50deg"
scope, name = dataset_did.split(':', 1)

# Get the list of files in the dataset
files = list(client.list_files(scope, name))
dids = [{'scope': f['scope'], 'name': f['name']} for f in files]

# Get one replica PFN for each file in the dataset
file_paths = [
next(iter(replica['pfns'])) # Get first PFN URL (dict keys are the URLs)
for replica in client.list_replicas(dids, rse_expression='isopenaccess=true')
if replica['pfns']
]

# Create an RDataFrame with all files in the dataset
# Replace "events" with the actual tree name
rdf = ROOT.RDataFrame("events", file_paths)

# Create histogram of EventHeader.eventNumber
h_eventNumber = rdf.Histo1D(
("h_eventNumber", "Event Number Distribution;Event Number;Count", 100, 0, 100),
"EventHeader.eventNumber"
)

# Draw and save the histogram
canvas = ROOT.TCanvas("c1", "Event Number Distribution", 800, 600)
h_eventNumber.Draw()
canvas.SaveAs("event_number_distribution.png")

# Print total entries
print(f"Total entries in dataset: {rdf.Count().GetValue()}")
print("Saved histogram to event_number_distribution.png")
36 changes: 36 additions & 0 deletions Rucio/Chaining/tchain_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from rucio.client import Client
import ROOT

# Initialize Rucio client
client = Client()

# Define the dataset DID
dataset_did = "epic:/RECO/26.02.0/epic_craterlake/SINGLE/e+/500MeV/3to50deg"
scope, name = dataset_did.split(':', 1)

# Get the list of files in the dataset
files = list(client.list_files(scope, name))
dids = [{'scope': f['scope'], 'name': f['name']} for f in files]

# Get one replica PFN for each file in the dataset
file_paths = [
next(iter(replica['pfns'])) # Get first PFN URL (dict keys are the URLs)
for replica in client.list_replicas(dids, rse_expression='isopenaccess=true')
if replica['pfns']
]

# Create a TChain to process all files as a single dataset
# Replace "events" with the actual tree name in your files
chain = ROOT.TChain("events")

for file_path in file_paths:
chain.Add(file_path)

# Create histogram of EventHeader.eventNumber
canvas = ROOT.TCanvas("c1", "Event Number Distribution", 800, 600)
chain.Draw("EventHeader.eventNumber>>h_eventNumber(100)")
canvas.SaveAs("event_number_distribution.png")

# Print total entries
print(f"Total entries in dataset: {chain.GetEntries()}")
print("Saved histogram to event_number_distribution.png")
37 changes: 37 additions & 0 deletions Rucio/Chaining/uproot_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from rucio.client import Client
import uproot
import matplotlib.pyplot as plt
import numpy as np

# Initialize Rucio client
client = Client()

# Define the dataset DID
dataset_did = "epic:/RECO/26.02.0/epic_craterlake/SINGLE/e+/500MeV/3to50deg"
scope, name = dataset_did.split(':', 1)

# Get the list of files in the dataset
files = list(client.list_files(scope, name))
dids = [{'scope': f['scope'], 'name': f['name']} for f in files]

# Get one replica PFN for each file in the dataset
file_paths = [
next(iter(replica['pfns'])) # Get first PFN URL (dict keys are the URLs)
for replica in client.list_replicas(dids, rse_expression='isopenaccess=true')
if replica['pfns']
]

# Collect EventHeader.eventNumber from all files
event_numbers = []
for file_path in file_paths:
with uproot.open(file_path) as f:
tree = f["events"] # Replace "events" with the actual tree name
event_numbers.extend(tree["EventHeader.eventNumber"].array())

# Create histogram
plt.hist(event_numbers, bins=50)
plt.xlabel("Event Number")
plt.ylabel("Count")
plt.title("EventHeader.eventNumber Distribution")
plt.savefig("event_number_distribution.png")
print("Saved histogram to event_number_distribution.png")