Semantics & points of interest

Imports and configuration

If needed, you can edit the path to lartpc_mlreco3d library and to the data folder.

import os
SOFTWARE_DIR = '%s/lartpc_mlreco3d' % os.environ.get('HOME')
DATA_DIR = os.environ.get('DATA_DIR')

The usual imports and setting the right PYTHON_PATH… click if you need to see them.

import sys, os
# set software directory
sys.path.insert(0, SOFTWARE_DIR)
import numpy as np
import yaml
import torch
import plotly
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode
init_notebook_mode(connected=False)

from mlreco.visualization import scatter_points, plotly_layout3d
from mlreco.visualization.gnn import scatter_clusters, network_topology, network_schematic
from mlreco.utils.ppn import uresnet_ppn_type_point_selector
from mlreco.utils.cluster.dense_cluster import fit_predict_np, gaussian_kernel
from mlreco.main_funcs import process_config, prepare
from mlreco.utils.gnn.cluster import get_cluster_label
from mlreco.utils.deghosting import adapt_labels_numpy as adapt_labels
from mlreco.visualization.gnn import network_topology

from larcv import larcv
/usr/local/lib/python3.8/dist-packages/MinkowskiEngine/__init__.py:36: UserWarning:

The environment variable `OMP_NUM_THREADS` not set. MinkowskiEngine will automatically set `OMP_NUM_THREADS=16`. If you want to set `OMP_NUM_THREADS` manually, please export it on the command line before running a python script. e.g. `export OMP_NUM_THREADS=12; python your_program.py`. It is recommended to set it below 24.
Welcome to JupyROOT 6.22/09

The configuration is loaded from the file inference.cfg.

cfg=yaml.load(open('%s/inference.cfg' % DATA_DIR, 'r').read().replace('DATA_DIR', DATA_DIR),Loader=yaml.Loader)
# pre-process configuration (checks + certain non-specified default settings)
process_config(cfg)
# prepare function configures necessary "handlers"
hs=prepare(cfg)
Config processed at: Linux ampt017 3.10.0-1160.42.2.el7.x86_64 #1 SMP Tue Sep 7 14:49:57 UTC 2021 x86_64 x86_64 x86_64 GNU/Linux

$CUDA_VISIBLE_DEVICES="0"

{   'iotool': {   'batch_size': 10,
                  'collate_fn': 'CollateSparse',
                  'dataset': {   'data_keys': [   '/sdf/home/l/ldomine/lartpc_mlreco3d_tutorials/book/data/mpvmpr_062022_test_small.root'],
                                 'limit_num_files': 10,
                                 'name': 'LArCVDataset',
                                 'schema': {   'cluster_label': [   'parse_cluster3d_clean_full',
                                                                    'cluster3d_pcluster',
                                                                    'particle_pcluster',
                                                                    'particle_mpv',
                                                                    'sparse3d_pcluster_semantics'],
                                               'input_data': [   'parse_sparse3d_scn',
                                                                 'sparse3d_reco',
                                                                 'sparse3d_reco_chi2',
                                                                 'sparse3d_reco_hit_charge0',
                                                                 'sparse3d_reco_hit_charge1',
                                                                 'sparse3d_reco_hit_charge2',
                                                                 'sparse3d_reco_hit_key0',
                                                                 'sparse3d_reco_hit_key1',
                                                                 'sparse3d_reco_hit_key2'],
                                               'kinematics_label': [   'parse_cluster3d_kinematics_clean',
                                                                       'cluster3d_pcluster',
                                                                       'particle_corrected',
                                                                       'particle_mpv',
                                                                       'sparse3d_pcluster_semantics'],
                                               'particle_graph': [   'parse_particle_graph_corrected',
                                                                     'particle_corrected',
                                                                     'cluster3d_pcluster'],
                                               'particles_asis': [   'parse_particle_asis',
                                                                     'particle_pcluster',
                                                                     'cluster3d_pcluster'],
                                               'particles_label': [   'parse_particle_points_with_tagging',
                                                                      'sparse3d_pcluster',
                                                                      'particle_corrected'],
                                               'segment_label': [   'parse_sparse3d_scn',
                                                                    'sparse3d_pcluster_semantics_ghost']}},
                  'minibatch_size': 10,
                  'num_workers': 1,
                  'shuffle': False},
    'model': {   'loss_input': [   'segment_label',
                                   'particles_label',
                                   'cluster_label',
                                   'kinematics_label',
                                   'particle_graph'],
                 'modules': {   'chain': {   'enable_charge_rescaling': True,
                                             'enable_cnn_clust': True,
                                             'enable_cosmic': False,
                                             'enable_dbscan': True,
                                             'enable_ghost': True,
                                             'enable_gnn_inter': True,
                                             'enable_gnn_kinematics': False,
                                             'enable_gnn_shower': True,
                                             'enable_gnn_track': True,
                                             'enable_ppn': True,
                                             'enable_uresnet': True,
                                             'process_fragments': True,
                                             'use_ppn_in_gnn': True,
                                             'use_supp_in_gnn': True,
                                             'use_true_fragments': False,
                                             'verbose': True},
                                'cosmic_discriminator': {   'res_encoder': {   'coordConv': True,
                                                                               'latent_size': 2,
                                                                               'pool_mode': 'avg',
                                                                               'spatial_size': 6144},
                                                            'use_input_data': False,
                                                            'use_true_interactions': False},
                                'cosmic_loss': {   'node_loss': {   'balance_classes': True,
                                                                    'name': 'type',
                                                                    'target_col': 8}},
                                'dbscan': {   'dbscan_fragment_manager': {   'cluster_classes': [   0,
                                                                                                    2,
                                                                                                    3],
                                                                             'delta_label': 3,
                                                                             'eps': [   1.999,
                                                                                        1.999,
                                                                                        4.999],
                                                                             'michel_label': 2,
                                                                             'num_classes': 4,
                                                                             'ppn_score_threshold': 0.5,
                                                                             'ppn_type_score_threshold': 0.5,
                                                                             'track_clustering_method': 'masked_dbscan',
                                                                             'track_label': 1}},
                                'graph_spice': {   'constructor_cfg': {   'cluster_col': 5,
                                                                          'edge_cut_threshold': 0.1,
                                                                          'edge_mode': 'attributes',
                                                                          'hyper_dimension': 22,
                                                                          'mode': 'knn',
                                                                          'seg_col': -1},
                                                   'embedder_cfg': {   'graph_spice_embedder': {   'covariance_mode': 'softplus',
                                                                                                   'feature_embedding_dim': 16,
                                                                                                   'num_classes': 5,
                                                                                                   'occupancy_mode': 'softplus',
                                                                                                   'segmentationLayer': False,
                                                                                                   'spatial_embedding_dim': 3},
                                                                       'uresnet': {   'activation': {   'args': {   'negative_slope': 0.33},
                                                                                                        'name': 'lrelu'},
                                                                                      'allow_bias': False,
                                                                                      'depth': 5,
                                                                                      'filters': 32,
                                                                                      'input_kernel': 5,
                                                                                      'norm_layer': {   'args': {   'eps': 0.0001,
                                                                                                                    'momentum': 0.01},
                                                                                                        'name': 'batch_norm'},
                                                                                      'num_input': 4,
                                                                                      'reps': 2,
                                                                                      'spatial_size': 6144}},
                                                   'freeze_weights': True,
                                                   'kernel_cfg': {   'name': 'bilinear',
                                                                     'num_features': 32},
                                                   'min_points': 3,
                                                   'node_dim': 22,
                                                   'skip_classes': [0, 2, 3, 4],
                                                   'use_raw_features': True,
                                                   'use_true_labels': False},
                                'graph_spice_loss': {   'edge_loss_cfg': {   'loss_type': 'LogDice'},
                                                        'eval': True,
                                                        'invert': True,
                                                        'kernel_lossfn': 'lovasz_hinge',
                                                        'name': 'graph_spice_edge_loss'},
                                'grappa_inter': {   'base': {   'add_start_dir': True,
                                                                'add_start_point': True,
                                                                'kinematics_mlp': True,
                                                                'kinematics_type': True,
                                                                'node_min_size': 3,
                                                                'node_type': [   0,
                                                                                 1,
                                                                                 2,
                                                                                 3],
                                                                'start_dir_max_dist': 5,
                                                                'vertex_mlp': True},
                                                    'edge_encoder': {   'name': 'geo',
                                                                        'use_numpy': True},
                                                    'gnn_model': {   'edge_classes': 2,
                                                                     'edge_feats': 19,
                                                                     'edge_output_feats': 64,
                                                                     'name': 'meta',
                                                                     'node_classes': 2,
                                                                     'node_feats': 28,
                                                                     'node_output_feats': 64},
                                                    'node_encoder': {   'name': 'geo',
                                                                        'use_numpy': True},
                                                    'type_net': {   'num_hidden': 32},
                                                    'use_shower_primary': True,
                                                    'use_true_particles': False,
                                                    'vertex_net': {   'num_hidden': 32}},
                                'grappa_inter_loss': {   'edge_loss': {   'name': 'channel',
                                                                          'source_col': 6,
                                                                          'target_col': 7},
                                                         'node_loss': {   'balance_classes': True,
                                                                          'name': 'kinematics',
                                                                          'spatial_size': 6144}},
                                'grappa_kinematics': {   'base': {   'edge_dist_metric': 'set',
                                                                     'edge_dist_numpy': True,
                                                                     'edge_max_dist': -1,
                                                                     'kinematics_mlp': True,
                                                                     'kinematics_momentum': True,
                                                                     'network': 'complete',
                                                                     'node_min_size': -1,
                                                                     'node_type': -1},
                                                         'edge_encoder': {   'cnn_encoder': {   'name': 'cnn',
                                                                                                'res_encoder': {   'coordConv': True,
                                                                                                                   'latent_size': 32,
                                                                                                                   'pool_mode': 'avg',
                                                                                                                   'spatial_size': 6144}},
                                                                             'geo_encoder': {   'more_feats': True},
                                                                             'name': 'mix_debug',
                                                                             'normalize': True},
                                                         'gnn_model': {   'edge_classes': 2,
                                                                          'edge_feats': 51,
                                                                          'edge_output_feats': 64,
                                                                          'leak': 0.33,
                                                                          'name': 'nnconv_old',
                                                                          'node_classes': 5,
                                                                          'node_feats': 83,
                                                                          'node_output_feats': 128},
                                                         'momentum_net': {   'num_hidden': 32},
                                                         'node_encoder': {   'cnn_encoder': {   'name': 'cnn',
                                                                                                'res_encoder': {   'coordConv': True,
                                                                                                                   'input_kernel': 3,
                                                                                                                   'latent_size': 64,
                                                                                                                   'pool_mode': 'avg',
                                                                                                                   'spatial_size': 6144}},
                                                                             'geo_encoder': {   'more_feats': True},
                                                                             'name': 'mix_debug',
                                                                             'normalize': True},
                                                         'use_true_particles': False},
                                'grappa_kinematics_loss': {   'edge_loss': {   'name': 'channel',
                                                                               'target': 'particle_forest'},
                                                              'node_loss': {   'name': 'kinematics',
                                                                               'reg_loss': 'l2'}},
                                'grappa_shower': {   'base': {   'add_start_dir': True,
                                                                 'add_start_point': True,
                                                                 'node_min_size': -1,
                                                                 'node_type': 0,
                                                                 'start_dir_max_dist': 5},
                                                     'edge_encoder': {   'name': 'geo',
                                                                         'use_numpy': True},
                                                     'freeze_weights': True,
                                                     'gnn_model': {   'edge_classes': 2,
                                                                      'edge_feats': 19,
                                                                      'edge_output_feats': 64,
                                                                      'name': 'meta',
                                                                      'node_classes': 2,
                                                                      'node_feats': 28,
                                                                      'node_output_feats': 64},
                                                     'node_encoder': {   'name': 'geo',
                                                                         'use_numpy': True}},
                                'grappa_shower_loss': {   'edge_loss': {   'high_purity': True,
                                                                           'name': 'channel',
                                                                           'source_col': 5,
                                                                           'target_col': 6},
                                                          'node_loss': {   'high_purity': True,
                                                                           'name': 'primary',
                                                                           'use_group_pred': True}},
                                'grappa_track': {   'base': {   'add_start_dir': True,
                                                                'add_start_point': True,
                                                                'node_min_size': 3,
                                                                'node_type': 1,
                                                                'start_dir_max_dist': 5},
                                                    'edge_encoder': {   'name': 'geo',
                                                                        'use_numpy': True},
                                                    'freeze_weights': True,
                                                    'gnn_model': {   'edge_classes': 2,
                                                                     'edge_feats': 19,
                                                                     'edge_output_feats': 64,
                                                                     'name': 'meta',
                                                                     'node_classes': 2,
                                                                     'node_feats': 28,
                                                                     'node_output_feats': 64},
                                                    'node_encoder': {   'name': 'geo',
                                                                        'use_numpy': True}},
                                'grappa_track_loss': {   'edge_loss': {   'name': 'channel',
                                                                          'source_col': 5,
                                                                          'target_col': 6}},
                                'uresnet_deghost': {   'freeze_weights': True,
                                                       'uresnet_lonely': {   'activation': {   'args': {   'negative_slope': 0.33},
                                                                                               'name': 'lrelu'},
                                                                             'allow_bias': False,
                                                                             'depth': 5,
                                                                             'filters': 32,
                                                                             'ghost': False,
                                                                             'norm_layer': {   'args': {   'eps': 0.0001,
                                                                                                           'momentum': 0.01},
                                                                                               'name': 'batch_norm'},
                                                                             'num_classes': 2,
                                                                             'num_input': 2,
                                                                             'reps': 2,
                                                                             'spatial_size': 6144}},
                                'uresnet_ppn': {   'ppn': {   'classify_endpoints': True,
                                                              'depth': 5,
                                                              'filters': 32,
                                                              'freeze_weights': True,
                                                              'mask_loss_name': 'BCE',
                                                              'num_classes': 5,
                                                              'particles_label_seg_col': -3,
                                                              'ppn_resolution': 1.0,
                                                              'ppn_score_threshold': 0.6,
                                                              'spatial_size': 6144},
                                                   'uresnet_lonely': {   'activation': {   'args': {   'negative_slope': 0.33},
                                                                                           'name': 'lrelu'},
                                                                         'allow_bias': False,
                                                                         'depth': 5,
                                                                         'filters': 32,
                                                                         'freeze_weights': True,
                                                                         'norm_layer': {   'args': {   'eps': 0.0001,
                                                                                                       'momentum': 0.01},
                                                                                           'name': 'batch_norm'},
                                                                         'num_classes': 5,
                                                                         'num_input': 2,
                                                                         'reps': 2,
                                                                         'spatial_size': 6144}}},
                 'name': 'full_chain',
                 'network_input': [   'input_data',
                                      'segment_label',
                                      'cluster_label']},
    'trainval': {   'checkpoint_step': 100,
                    'concat_result': [   'input_edge_features',
                                         'input_node_features',
                                         'points',
                                         'coordinates',
                                         'particle_node_features',
                                         'particle_edge_features',
                                         'track_node_features',
                                         'shower_node_features',
                                         'ppn_coords',
                                         'mask_ppn',
                                         'ppn_layers',
                                         'classify_endpoints',
                                         'vertex_layers',
                                         'vertex_coords',
                                         'primary_label_scales',
                                         'segment_label_scales',
                                         'seediness',
                                         'margins',
                                         'embeddings',
                                         'fragments',
                                         'fragments_seg',
                                         'shower_fragments',
                                         'shower_edge_index',
                                         'shower_edge_pred',
                                         'shower_node_pred',
                                         'shower_group_pred',
                                         'track_fragments',
                                         'track_edge_index',
                                         'track_node_pred',
                                         'track_edge_pred',
                                         'track_group_pred',
                                         'particle_fragments',
                                         'particle_edge_index',
                                         'particle_node_pred',
                                         'particle_edge_pred',
                                         'particle_group_pred',
                                         'particles',
                                         'inter_edge_index',
                                         'inter_node_pred',
                                         'inter_edge_pred',
                                         'inter_group_pred',
                                         'inter_particles',
                                         'node_pred_p',
                                         'node_pred_type',
                                         'kinematics_node_pred_p',
                                         'kinematics_node_pred_type',
                                         'flow_edge_pred',
                                         'kinematics_particles',
                                         'kinematics_edge_index',
                                         'clust_fragments',
                                         'clust_frag_seg',
                                         'interactions',
                                         'inter_cosmic_pred',
                                         'node_pred_vtx',
                                         'total_num_points',
                                         'total_nonghost_points',
                                         'spatial_embeddings',
                                         'occupancy',
                                         'hypergraph_features',
                                         'features',
                                         'feature_embeddings',
                                         'covariance',
                                         'clusts',
                                         'edge_index',
                                         'edge_pred',
                                         'node_pred'],
                    'debug': False,
                    'gpus': [0],
                    'iterations': 10,
                    'log_dir': './log_trash',
                    'minibatch_size': -1,
                    'model_path': '/sdf/home/l/ldomine/lartpc_mlreco3d_tutorials/book/data/weights_full_mpvmpr_062022.ckpt',
                    'optimizer': {'args': {'lr': 0.001}, 'name': 'Adam'},
                    'report_step': 1,
                    'seed': 123,
                    'train': False,
                    'unwrapper': 'unwrap_3d_mink',
                    'weight_prefix': './weights_trash/snapshot'}}
Loading file: /sdf/home/l/ldomine/lartpc_mlreco3d_tutorials/book/data/mpvmpr_062022_test_small.root
Loading tree sparse3d_reco
Loading tree sparse3d_reco_chi2
Loading tree sparse3d_reco_hit_charge0
Loading tree sparse3d_reco_hit_charge1
Loading tree sparse3d_reco_hit_charge2
Loading tree sparse3d_reco_hit_key0
Loading tree sparse3d_reco_hit_key1
Loading tree sparse3d_reco_hit_key2
Loading tree sparse3d_pcluster_semantics_ghost
Loading tree cluster3d_pcluster
Loading tree particle_pcluster
Loading tree particle_mpv
Loading tree sparse3d_pcluster_semantics
Loading tree sparse3d_pcluster
Loading tree particle_corrected
Found 101 events in file(s)
Shower GNN: True
Track GNN: True
Particle GNN: False
Interaction GNN: True
Kinematics GNN: False
Cosmic GNN: False

            Since one of the GNNs are turned on, process_fragments is turned ON.
            

        Fragment processing is turned ON. When training CNN models from
         scratch, we recommend turning fragment processing OFF as without
         reliable segmentation and/or cnn clustering outputs this could take
         prohibitively large training iterations.
        
Shower GNN: True
Track GNN: True
Particle GNN: False
Interaction GNN: True
Kinematics GNN: False
Cosmic GNN: False

            Since one of the GNNs are turned on, process_fragments is turned ON.
            

        Fragment processing is turned ON. When training CNN models from
         scratch, we recommend turning fragment processing OFF as without
         reliable segmentation and/or cnn clustering outputs this could take
         prohibitively large training iterations.
        
Freezing 82 weights for a sub-module ppn
Freezing 141 weights for a sub-module uresnet_lonely
Freezing 141 weights for a sub-module uresnet_deghost
Freezing 146 weights for a sub-module graph_spice
Freezing 120 weights for a sub-module grappa_track
Freezing 120 weights for a sub-module grappa_shower
Restoring weights for  from /sdf/home/l/ldomine/lartpc_mlreco3d_tutorials/book/data/weights_full_mpvmpr_062022.ckpt...
Done.
Warning in <TClass::Init>: no dictionary for class larcv::EventNeutrino is available
Warning in <TClass::Init>: no dictionary for class larcv::NeutrinoSet is available
Warning in <TClass::Init>: no dictionary for class larcv::Neutrino is available

The output is hidden because it reprints the entire (lengthy) configuration. Feel free to take a look if you are curious!

Finally we run the chain for 1 iteration:

# Call forward to run the net, store the output in "res"
data, output = hs.trainer.forward(hs.data_io_iter)
Deghosting Accuracy: 0.9830
Segmentation Accuracy: 0.9900
PPN Accuracy: 0.8843
Clustering Accuracy: 0.2691
Clustering Edge Accuracy: 0.1252
Shower fragment clustering accuracy: 0.9581
Shower primary prediction accuracy: 0.9434
Track fragment clustering accuracy: 0.9937
Interaction grouping accuracy: 0.9763
Particle ID accuracy: 0.8409
Primary particle score accuracy: 0.9755

Now we can play with data and output to visualize what we are interested in.

Semantic segmentation (UResNet)

Let us take a look at the first entry. Feel free to change the entry index if you want to look at a different entry!

entry = 0

We extract quantities of interest from the data and output dictionaries. Here, we want the input_data (voxel coordinates and corresponding reconstructed energy depositions) and segment_label (true semantic labels for each voxel). We will use the predicted ghost_mask (binary mask ghost / non-ghost voxel) and the UResNet predictions segment_pred are obtained from output['segmentation'] (softmax scores).

input_data = data['input_data'][entry]
segment_label = data['segment_label'][entry][:, -1]

ghost_mask = output['ghost'][entry].argmax(axis=1) == 0
segment_pred = output['segmentation'][entry].argmax(axis=1)

We use Plotly to visualize the result:

trace = []

edep = input_data[segment_label < 5]

trace+= scatter_points(input_data[segment_label < 5],markersize=1,color=segment_label[segment_label < 5], cmin=0, cmax=10, colorscale=plotly.colors.qualitative.D3)
trace[-1].name = 'True semantic labels (true no-ghost mask)'

trace+= scatter_points(input_data[ghost_mask],markersize=1,color=segment_pred[ghost_mask], cmin=0, cmax=10, colorscale=plotly.colors.qualitative.D3)
trace[-1].name = 'Predicted semantic labels (predicted no-ghost mask)'

fig = go.Figure(data=trace,layout=plotly_layout3d())
fig.update_layout(legend=dict(x=1.0, y=0.8))

iplot(fig)

Points of interest (PPN)

PPN makes a prediction for each non-zero voxel. We need to apply a post-processing function to apply the predicted attention mask and bring down the number of point proposals.

ppn = uresnet_ppn_type_point_selector(data['input_data'][entry], output, entry=entry,
                                      score_threshold=0.5, type_threshold=2)
ppn_voxels = ppn[:, 1:4]
ppn_score = ppn[:, 5]
ppn_type = ppn[:, 12]
ppn_endpoints = np.argmax(ppn[:, 13:], axis=1)

The columns of ppn contain in this order:

  • point coordinates x, y, z

  • batch id

  • detection score (2 softmax values)

  • occupancy (how many points were merge to this single point during post processing)

  • softmax scores for 5 semantic types

  • type prediction (max softmax score among 5 semantic types)

You can also use the softmax scores for the 5 semantic types to make finer point type predictions - for example at a vertex, you can expect these scores to be high for two or more types.

We remove points that have a high score for being Delta rays starting points:

delta_label = 3
is_not_delta = ppn[:, 7 + delta_label] < 0.5
ppn_voxels = ppn_voxels[is_not_delta]
ppn_score = ppn_score[is_not_delta]
ppn_type = ppn_type[is_not_delta]
ppn_endpoints = ppn_endpoints[is_not_delta]

And at last! We can visualize both the true and predicted points:

trace = []

trace+= scatter_points(input_data[ghost_mask],markersize=1,color=segment_pred[ghost_mask], cmin=0, cmax=10, colorscale=plotly.colors.qualitative.D3)
trace[-1].name = 'Predicted semantic labels (predicted no-ghost mask)'

trace += scatter_points(ppn_voxels, markersize=5, color=ppn_type, cmin=0, cmax=10, colorscale=plotly.colors.qualitative.D3, hovertext=ppn_score)
trace[-1].name = "PPN predictions (w/ type prediction)"

trace += scatter_points(ppn_voxels[ppn_type == 1], markersize=5, color=ppn_endpoints[ppn_type == 1], cmin=0, cmax=10, colorscale=plotly.colors.qualitative.D3, hovertext=ppn_endpoints)
trace[-1].name = "PPN predictions (start/end)"

trace += scatter_points(data['particles_label'][entry], markersize=5, color=data['particles_label'][entry][:, 4], cmin=0, cmax=10, colorscale=plotly.colors.qualitative.D3)
trace[-1].name = "True point labels"

trace += scatter_points(data['particles_label'][entry], markersize=5, color=data['particles_label'][entry][:, 6], cmin=0, cmax=10, colorscale=plotly.colors.qualitative.D3)
trace[-1].name = "True point labels (start/end)"

fig = go.Figure(data=trace,layout=plotly_layout3d())
fig.update_layout(legend=dict(x=1.0, y=0.8))

iplot(fig)

The color of the points corresponds to either their semantic type, or a binary start/end classification.


This is all there is to know about UResNet + PPN output and its visualization.