@@ -830,8 +830,12 @@ void StaticRuntime::benchmark(
830
830
const double ms = p.second ;
831
831
std::cout << std::setw (15 ) << ms << " ms. " << std::setw (10 )
832
832
<< results.percent_per_node_type [kind] << " %. " << kind << " ("
833
- << results.instances_per_node_type [kind] << " nodes)"
834
- << std::endl;
833
+ << results.instances_per_node_type [kind] << " nodes" ;
834
+ if (results.out_nodes .count (kind) == 0 ) {
835
+ std::cout << " )" << std::endl;
836
+ } else {
837
+ std::cout << " , out variant)" << std::endl;
838
+ }
835
839
}
836
840
std::cout << std::setw (15 ) << results.total_time << " ms. in Total"
837
841
<< std::endl;
@@ -851,6 +855,12 @@ void StaticRuntime::benchmark(
851
855
std::cout << " Total number of reused tensors: "
852
856
<< planner_->total_reused_tensors () << std::endl;
853
857
}
858
+ std::cout << " Total number of 'out' variant nodes/total number of nodes: "
859
+ << results.out_nodes_count << " /" << results.total_nodes_count
860
+ << " ("
861
+ << 100.0 * (results.out_nodes_count ) /
862
+ static_cast <float >(results.total_nodes_count )
863
+ << " %)" << std::endl;
854
864
}
855
865
check_for_memory_leak ();
856
866
}
@@ -978,8 +988,13 @@ StaticRuntime::IndividualMetrics StaticRuntime::benchmark_individual_ops(
978
988
results.time_per_node [i] /= static_cast <float >(main_runs);
979
989
results.time_per_node_type [kind] += results.time_per_node [i];
980
990
results.instances_per_node_type [kind]++;
991
+ if (nodes_[i].has_out_variant ()) {
992
+ results.out_nodes .insert (kind);
993
+ results.out_nodes_count ++;
994
+ }
981
995
results.total_time += results.time_per_node [i];
982
996
}
997
+ results.total_nodes_count = nodes_.size ();
983
998
results.memory_alloc_time /= static_cast <float >(main_runs);
984
999
results.memory_dealloc_time /= static_cast <float >(main_runs);
985
1000
results.output_dealloc_time /= static_cast <float >(main_runs);
0 commit comments