// Copyright 2019-2023 CERN and copyright holders of ALICE O2. // See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. // All rights not expressly granted are reserved. // // This software is distributed under the terms of the GNU General Public // License v3 (GPL Version 3), copied verbatim in the file "COPYING". // // In applying this license CERN does not waive the privileges and immunities // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. /// \file applyMlSelection.cxx /// \brief Showcase usage of trained ML model exported to ONNX for candidate selection in analysis /// /// \author Fabio Catalano , CERN #include "Framework/AnalysisTask.h" #include "Framework/HistogramRegistry.h" #include "Framework/runDataProcessing.h" #include "Tools/ML/MlResponse.h" #include "PWGHF/Core/HfHelper.h" #include "PWGHF/DataModel/CandidateReconstructionTables.h" using namespace o2; using namespace o2::analysis; using namespace o2::framework; using namespace o2::framework::expressions; static constexpr double defaultCutsMl[1][3] = {{0.5, 0.5, 0.5}}; struct applyMlSelection { // Analysis configuration Configurable ptCandMin{"ptCandMin", 1., "Lower bound of candidate pT"}; Configurable ptCandMax{"ptCandMax", 36., "Upper bound of candidate pT"}; Configurable yCandRecoMax{"yCandRecoMax", 0.8, "max. cand. rapidity"}; // ML inference Configurable> binsPtMl{"binsPtMl", std::vector{1., 36.}, "pT bin limits for ML application"}; Configurable> cutDirMl{"cutDirMl", std::vector{cuts_ml::CutSmaller, cuts_ml::CutNot, cuts_ml::CutNot}, "Whether to reject score values greater or smaller than the threshold"}; Configurable> cutsMl{"cutsMl", {defaultCutsMl[0], 1, 3, {"pT bin 0"}, {"score prompt", "score non-prompt", "score bkg"}}, "ML selections per pT bin"}; Configurable nClassesMl{"nClassesMl", (int8_t)3, "Number of classes in ML model"}; // Model file names Configurable> onnxFileNames{"onnxFileNames", std::vector{"model_onnx.onnx"}, "ONNX file names for each pT bin (if not from CCDB full path)"}; // Bonus: CCDB configuration (needed for ML application on the GRID) Configurable loadModelsFromCCDB{"loadModelsFromCCDB", false, "Flag to enable or disable the loading of models from CCDB"}; Configurable ccdbUrl{"ccdbUrl", "http://alice-ccdb.cern.ch", "url of the ccdb repository"}; Configurable modelPathsCCDB{"modelPathsCCDB", "Users/f/fcatalan/O2AT3/MlInference", "Path on CCDB"}; Configurable timestampCCDB{"timestampCCDB", -1, "timestamp of the ONNX file for ML model used to query in CCDB"}; Filter filterDsFlag = (o2::aod::hf_track_index::hfflag & static_cast(BIT(aod::hf_cand_3prong::DecayType::DsToKKPi))) != static_cast(0); HfHelper hfHelper; o2::ccdb::CcdbApi ccdbApi; int nCandidates = 0; // Add objects needed for ML inference o2::analysis::MlResponse mlResponse; std::vector outputMl = {}; // Add histograms for other BDT scores and for distributions after selections HistogramRegistry registry{ "registry", {{"hMassBeforeSel", "Ds candidates before selection;inv. mass (KK#pi) (GeV/#it{c}^{2});entries", {HistType::kTH1F, {{100, 1.77, 2.17}}}}, {"hPromptScoreBeforeSel", "Prompt score before selection;BDT first score;entries", {HistType::kTH1F, {{100, 0., 1.}}}}, {"hNonPromptScoreBeforeSel", "Non prompt score before selection;BDT second score;entries", {HistType::kTH1F, {{100, 0., 1.}}}}, {"hBkgScoreBeforeSel", "Bkg score before selection;BDT third score;entries", {HistType::kTH1F, {{100, 0., 1.}}}}, {"hMassAfterSel", "Ds candidates after selection;inv. mass (KK#pi) (GeV/#it{c}^{2});entries", {HistType::kTH1F, {{100, 1.77, 2.17}}}}, {"hPromptScoreAfterSel", "Prompt score after selection;BDT first score;entries", {HistType::kTH1F, {{100, 0., 1.}}}}, {"hNonPromptScoreAfterSel", "Non prompt score after selection;BDT second score;entries", {HistType::kTH1F, {{100, 0., 1.}}}}, {"hBkgScoreAfterSel", "Bkg score after selection;BDT third score;entries", {HistType::kTH1F, {{100, 0., 1.}}}}}}; void init(InitContext const&) { // Add histograms vs pT (only for selected candidates) auto vbins = (std::vector)binsPtMl; registry.add("hMassAfterSelVsPt", "Ds candidates;inv. mass (KK#pi) (GeV/#it{c}^{2});entries", {HistType::kTH2F, {{100, 1.77, 2.17}, {vbins, "#it{p}_{T} (GeV/#it{c})"}}}); registry.add("hPromptScoreAfterSelVsPt", "Prompt score after selection;BDT first score;entries", {HistType::kTH2F, {{100, 0., 1.}, {vbins, "#it{p}_{T} (GeV/#it{c})"}}}); registry.add("hNonPromptScoreAfterSelVsPt", "Non prompt score after selection;BDT second score;entries", {HistType::kTH2F, {{100, 0., 1.}, {vbins, "#it{p}_{T} (GeV/#it{c})"}}}); registry.add("hBkgScoreAfterSelVsPt", "Bkg score after selection;BDT third score;entries", {HistType::kTH2F, {{100, 0., 1.}, {vbins, "#it{p}_{T} (GeV/#it{c})"}}}); // Configure and initialise the ML class mlResponse.configure(binsPtMl, cutsMl, cutDirMl, nClassesMl); // Bonus: retrieve the model from CCDB (needed for ML application on the GRID) if (loadModelsFromCCDB) { ccdbApi.init(ccdbUrl); mlResponse.setModelPathsCCDB(onnxFileNames, ccdbApi, modelPathsCCDB.value, timestampCCDB); } else { mlResponse.setModelPathsLocal(onnxFileNames); } mlResponse.init(); } void process(soa::Filtered const& candidates) { // Looping over Ds candidates for (const auto& candidate : candidates) { nCandidates++; if (nCandidates % 1000 == 0) { LOG(info) << "Candidates processed: " << nCandidates; } auto candpT = candidate.pt(); // Check that the candidate pT is within the analysis range if (candpT < ptCandMin || candpT > ptCandMax) { continue; } // Check that the candidate rapidity is within the analysis range if (yCandRecoMax >= 0. && std::abs(hfHelper.yDs(candidate)) > yCandRecoMax) { continue; } // Fill mass histogram before ML selections registry.fill(HIST("hMassBeforeSel"), hfHelper.invMassDsToPiKK(candidate)); registry.fill(HIST("hMassBeforeSel"), hfHelper.invMassDsToKKPi(candidate)); // Perform ML selections for one mass hypothesis (Ds -> PhiPi -> PiKK) std::vector inputFeaturesPiKK{candidate.cpa(), candidate.cpaXY(), candidate.decayLength(), candidate.decayLengthXY(), static_cast(hfHelper.deltaMassPhiDsToPiKK(candidate)), candidate.impactParameterXY(), static_cast(hfHelper.cos3PiKDsToPiKK(candidate)), candidate.maxNormalisedDeltaIP()}; // Retrieve model output and selection outcome bool isSelectedMlPiKK = mlResponse.isSelectedMl(inputFeaturesPiKK, candidate.pt(), outputMl); // Fill BDT score histograms before selection registry.fill(HIST("hPromptScoreBeforeSel"), outputMl[0]); registry.fill(HIST("hNonPromptScoreBeforeSel"), outputMl[1]); registry.fill(HIST("hBkgScoreBeforeSel"), outputMl[2]); // Fill histograms for selected candidates if (isSelectedMlPiKK) { registry.fill(HIST("hMassAfterSel"), hfHelper.invMassDsToPiKK(candidate)); registry.fill(HIST("hPromptScoreAfterSel"), outputMl[0]); registry.fill(HIST("hNonPromptScoreAfterSel"), outputMl[1]); registry.fill(HIST("hBkgScoreAfterSel"), outputMl[2]); registry.fill(HIST("hMassAfterSelVsPt"), hfHelper.invMassDsToPiKK(candidate), candidate.pt()); registry.fill(HIST("hPromptScoreAfterSelVsPt"), outputMl[0], candidate.pt()); registry.fill(HIST("hNonPromptScoreAfterSelVsPt"), outputMl[1], candidate.pt()); registry.fill(HIST("hBkgScoreAfterSelVsPt"), outputMl[2], candidate.pt()); } outputMl.clear(); // not necessary in this case but for good measure // Perform ML selections for other mass hypothesis (Ds -> PhiPi -> KKPi) std::vector inputFeaturesKKPi{candidate.cpa(), candidate.cpaXY(), candidate.decayLength(), candidate.decayLengthXY(), static_cast(hfHelper.deltaMassPhiDsToKKPi(candidate)), candidate.impactParameterXY(), static_cast(hfHelper.cos3PiKDsToKKPi(candidate)), candidate.maxNormalisedDeltaIP()}; // Retrieve model output and selection outcome bool isSelectedMlKKPi = mlResponse.isSelectedMl(inputFeaturesPiKK, candidate.pt(), outputMl); // Fill BDT score histograms before selection registry.fill(HIST("hPromptScoreBeforeSel"), outputMl[0]); registry.fill(HIST("hNonPromptScoreBeforeSel"), outputMl[1]); registry.fill(HIST("hBkgScoreBeforeSel"), outputMl[2]); // Fill histograms for selected candidates if (isSelectedMlKKPi) { registry.fill(HIST("hMassAfterSel"), hfHelper.invMassDsToKKPi(candidate)); registry.fill(HIST("hPromptScoreAfterSel"), outputMl[0]); registry.fill(HIST("hNonPromptScoreAfterSel"), outputMl[1]); registry.fill(HIST("hBkgScoreAfterSel"), outputMl[2]); registry.fill(HIST("hMassAfterSelVsPt"), hfHelper.invMassDsToKKPi(candidate), candidate.pt()); registry.fill(HIST("hPromptScoreAfterSelVsPt"), outputMl[0], candidate.pt()); registry.fill(HIST("hNonPromptScoreAfterSelVsPt"), outputMl[1], candidate.pt()); registry.fill(HIST("hBkgScoreAfterSelVsPt"), outputMl[2], candidate.pt()); } outputMl.clear(); // not necessary in this case but for good measure } } }; WorkflowSpec defineDataProcessing(ConfigContext const& cfgc) { return WorkflowSpec{adaptAnalysisTask(cfgc)}; }