Skip to content

Commit e77d55d

Browse files
[Packer] Created GreedyClusterer Class
Began encapsulating the different parts of the packer into classes. This will help organize the packer better, which will make it easier to modify in the future. My plan is to clean up the different parts of the packer so I can add flat placement information into the gain calculation so it can be integrated into the AP flow.
1 parent 920e8ab commit e77d55d

File tree

8 files changed

+239
-164
lines changed

8 files changed

+239
-164
lines changed

vpr/src/analytical_place/full_legalizer.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
#include "ShowSetup.h"
1818
#include "ap_netlist_fwd.h"
1919
#include "check_netlist.h"
20-
#include "cluster.h"
2120
#include "cluster_legalizer.h"
2221
#include "cluster_util.h"
2322
#include "clustered_netlist.h"

vpr/src/base/vpr_api.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,6 @@
6464
#include "check_route.h"
6565
#include "constant_nets.h"
6666
#include "atom_netlist_utils.h"
67-
#include "cluster.h"
6867
#include "output_clustering.h"
6968
#include "vpr_constraints_reader.h"
7069
#include "place_constraints.h"

vpr/src/pack/cluster.h

Lines changed: 0 additions & 32 deletions
This file was deleted.

vpr/src/pack/cluster_util.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1879,6 +1879,35 @@ void print_pb_type_count_recurr(t_pb_type* pb_type, size_t max_name_chars, size_
18791879
}
18801880
}
18811881

1882+
/**
1883+
* Print the total number of used physical blocks for each pb type in the architecture
1884+
*/
1885+
void print_pb_type_count(const ClusteredNetlist& clb_nlist) {
1886+
auto& device_ctx = g_vpr_ctx.device();
1887+
1888+
std::map<t_pb_type*, int> pb_type_count;
1889+
1890+
size_t max_depth = 0;
1891+
for (ClusterBlockId blk : clb_nlist.blocks()) {
1892+
size_t pb_max_depth = update_pb_type_count(clb_nlist.block_pb(blk), pb_type_count, 0);
1893+
1894+
max_depth = std::max(max_depth, pb_max_depth);
1895+
}
1896+
1897+
size_t max_pb_type_name_chars = 0;
1898+
for (auto& pb_type : pb_type_count) {
1899+
max_pb_type_name_chars = std::max(max_pb_type_name_chars, strlen(pb_type.first->name));
1900+
}
1901+
1902+
VTR_LOG("\nPb types usage...\n");
1903+
for (const auto& logical_block_type : device_ctx.logical_block_types) {
1904+
if (!logical_block_type.pb_type) continue;
1905+
1906+
print_pb_type_count_recurr(logical_block_type.pb_type, max_pb_type_name_chars + max_depth, 0, pb_type_count);
1907+
}
1908+
VTR_LOG("\n");
1909+
}
1910+
18821911
t_logical_block_type_ptr identify_logic_block_type(std::map<const t_model*, std::vector<t_logical_block_type_ptr>>& primitive_candidate_block_types) {
18831912
std::string lut_name = ".names";
18841913

vpr/src/pack/cluster_util.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,8 @@ void update_le_count(const t_pb* pb, const t_logical_block_type_ptr logic_block_
481481

482482
void print_pb_type_count_recurr(t_pb_type* type, size_t max_name_chars, size_t curr_depth, std::map<t_pb_type*, int>& pb_type_count);
483483

484+
void print_pb_type_count(const ClusteredNetlist& clb_nlist);
485+
484486
/*
485487
* @brief This function identifies the logic block type which is defined by the
486488
* block type which has a lut primitive.

vpr/src/pack/cluster.cpp renamed to vpr/src/pack/greedy_clusterer.cpp

Lines changed: 69 additions & 108 deletions
Large diffs are not rendered by default.

vpr/src/pack/greedy_clusterer.h

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
/**
2+
* @file
3+
* @author Alex Singer
4+
* @date November 2024
5+
* @brief The declarations of the Greedy Clusterer class which is used to
6+
* encapsulate the process of greedy clustering.
7+
*/
8+
9+
#pragma once
10+
11+
#include <map>
12+
#include <unordered_set>
13+
#include "physical_types.h"
14+
15+
// Forward declarations
16+
class AtomNetId;
17+
class AtomNetlist;
18+
class AttractionInfo;
19+
class ClusterLegalizer;
20+
class Prepacker;
21+
struct t_analysis_opts;
22+
struct t_clustering_data;
23+
struct t_pack_high_fanout_thresholds;
24+
struct t_packer_opts;
25+
26+
/**
27+
* @brief A clusterer that generates clusters by greedily choosing the clusters
28+
* which appear to have the best gain for a given neighbor.
29+
*
30+
* This clusterer generates one cluster at a time by finding candidate molecules
31+
* and selecting the molecule with the highest gain.
32+
*/
33+
class GreedyClusterer {
34+
public:
35+
/**
36+
* @brief Constructor of the Greedy Clusterer class.
37+
*
38+
* The clusterer may be invoked many times during the packing flow. This
39+
* constructor will pre-compute information before clustering which can
40+
* improve the performance of the clusterer.
41+
*
42+
* @param packer_opts
43+
* Options passed by the user to configure the packing and
44+
* clustering algorithms.
45+
* @param analysis_opts
46+
* Options passed by the user to configure timing analysis in
47+
* the clusterer.
48+
* @param atom_netlist
49+
* The atom netlist to cluster over.
50+
* @param arch
51+
* The architecture to cluster over.
52+
* @param high_fanout_thresholds
53+
* The thresholds for what to consider as a high-fanout net
54+
* for each logical block type.
55+
* @param is_clock
56+
* The set of clock nets in the Atom Netlist.
57+
* @param is_global
58+
* The set of global nets in the Atom Netlist.
59+
*/
60+
GreedyClusterer(const t_packer_opts& packer_opts,
61+
const t_analysis_opts& analysis_opts,
62+
const AtomNetlist& atom_netlist,
63+
const t_arch* arch,
64+
const t_pack_high_fanout_thresholds& high_fanout_thresholds,
65+
const std::unordered_set<AtomNetId>& is_clock,
66+
const std::unordered_set<AtomNetId>& is_global);
67+
68+
/**
69+
* @brief Performs clustering on the pack molecules formed by the prepacker.
70+
*
71+
* The clustering is contained within the Cluster Legalizer.
72+
*
73+
* @param cluster_legalizer
74+
* The cluster legalizer which is used to create clusters and
75+
* grow clusters by adding molecules to a cluster.
76+
* @param prepacker
77+
* The prepacker object which contains the pack molecules that
78+
* atoms are pre-packed into before clustering.
79+
* @param allow_unrelated_clustering
80+
* Allows primitives which have no attraction to the given
81+
* cluster to be packed into it.
82+
* @param balance_block_type_utilization
83+
* When true, tries to create clusters that balance the logical
84+
* block type utilization.
85+
* @param attraction_groups
86+
* Information on the attraction groups used during the
87+
* clustering process.
88+
*
89+
* @return num_used_type_instances
90+
* The number of used logical block types by the clustering.
91+
* This information may be useful when detecting if the
92+
* clustering can fit on the device.
93+
*/
94+
std::map<t_logical_block_type_ptr, size_t>
95+
do_clustering(ClusterLegalizer& cluster_legalizer,
96+
Prepacker& prepacker,
97+
bool allow_unrelated_clustering,
98+
bool balance_block_type_utilization,
99+
AttractionInfo& attraction_groups);
100+
101+
private:
102+
/*
103+
* When attraction groups are created, the purpose is to pack more densely by adding more molecules
104+
* from the cluster's attraction group to the cluster. In a normal flow, (when attraction groups are
105+
* not on), the cluster keeps being packed until the get_molecule routines return either a repeated
106+
* molecule or a nullptr. When attraction groups are on, we want to keep exploring molecules for the
107+
* cluster until a nullptr is returned. So, the number of repeated molecules is changed from 1 to 500,
108+
* effectively making the clusterer pack a cluster until a nullptr is returned.
109+
*/
110+
static constexpr int attraction_groups_max_repeated_molecules_ = 500;
111+
112+
const t_packer_opts& packer_opts_;
113+
const t_analysis_opts& analysis_opts_;
114+
const AtomNetlist& atom_netlist_;
115+
const t_arch* arch_ = nullptr;
116+
const t_pack_high_fanout_thresholds& high_fanout_thresholds_;
117+
const std::unordered_set<AtomNetId>& is_clock_;
118+
const std::unordered_set<AtomNetId>& is_global_;
119+
120+
/// @brief Pre-computed logical block types for each model in the architecture.
121+
std::map<const t_model*, std::vector<t_logical_block_type_ptr>> primitive_candidate_block_types_;
122+
};
123+

vpr/src/pack/pack.cpp

Lines changed: 16 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
#include <unordered_set>
22

33
#include "SetupGrid.h"
4-
#include "cluster.h"
54
#include "cluster_legalizer.h"
65
#include "cluster_util.h"
6+
#include "constraints_report.h"
77
#include "globals.h"
8+
#include "greedy_clusterer.h"
89
#include "pack.h"
910
#include "prepack.h"
1011
#include "vpr_context.h"
@@ -29,7 +30,6 @@ bool try_pack(t_packer_opts* packer_opts,
2930
const DeviceContext& device_ctx = g_vpr_ctx.device();
3031

3132
std::unordered_set<AtomNetId> is_clock, is_global;
32-
t_clustering_data clustering_data;
3333
VTR_LOG("Begin packing '%s'.\n", packer_opts->circuit_file_name.c_str());
3434

3535
is_clock = alloc_and_load_is_clock();
@@ -91,7 +91,6 @@ bool try_pack(t_packer_opts* packer_opts,
9191
}
9292

9393
int pack_iteration = 1;
94-
bool floorplan_regions_overfull = false;
9594

9695
// Initialize the cluster legalizer.
9796
ClusterLegalizer cluster_legalizer(atom_ctx.nlist,
@@ -110,27 +109,24 @@ bool try_pack(t_packer_opts* packer_opts,
110109
VTR_LOG("Packing with pin utilization targets: %s\n", cluster_legalizer.get_target_external_pin_util().to_string().c_str());
111110
VTR_LOG("Packing with high fanout thresholds: %s\n", high_fanout_thresholds.to_string().c_str());
112111

113-
while (true) {
114-
free_clustering_data(*packer_opts, clustering_data);
115-
112+
// Initialize the greedy clusterer.
113+
GreedyClusterer clusterer(*packer_opts,
114+
*analysis_opts,
115+
arch,
116+
high_fanout_thresholds,
117+
is_clock,
118+
is_global);
116119

120+
while (true) {
117121
//Cluster the netlist
118122
// num_used_type_instances: A map used to save the number of used
119123
// instances from each logical block type.
120124
std::map<t_logical_block_type_ptr, size_t> num_used_type_instances;
121-
num_used_type_instances = do_clustering(*packer_opts,
122-
*analysis_opts,
123-
arch,
124-
prepacker,
125-
cluster_legalizer,
126-
is_clock,
127-
is_global,
128-
allow_unrelated_clustering,
129-
balance_block_type_util,
130-
attraction_groups,
131-
floorplan_regions_overfull,
132-
high_fanout_thresholds,
133-
clustering_data);
125+
num_used_type_instances = clusterer.do_clustering(cluster_legalizer,
126+
prepacker,
127+
allow_unrelated_clustering,
128+
balance_block_type_util,
129+
attraction_groups);
134130

135131
//Try to size/find a device
136132
bool fits_on_device = try_size_device_grid(*arch, num_used_type_instances, packer_opts->target_device_utilization, packer_opts->device_layout);
@@ -139,6 +135,7 @@ bool try_pack(t_packer_opts* packer_opts,
139135
* is not dense enough and there are floorplan constraints, it is presumed that the constraints are the cause
140136
* of the floorplan not fitting, so attraction groups are turned on for later iterations.
141137
*/
138+
bool floorplan_regions_overfull = floorplan_constraints_regions_overfull(cluster_legalizer);
142139
bool floorplan_not_fitting = (floorplan_regions_overfull || g_vpr_ctx.floorplanning().constraints.get_num_partitions() > 0);
143140

144141
if (fits_on_device && !floorplan_regions_overfull) {
@@ -261,9 +258,6 @@ bool try_pack(t_packer_opts* packer_opts,
261258
//check clustering and output it
262259
check_and_output_clustering(cluster_legalizer, *packer_opts, is_clock, arch);
263260

264-
// Free Data Structures
265-
free_clustering_data(*packer_opts, clustering_data);
266-
267261
VTR_LOG("\n");
268262
VTR_LOG("Netlist conversion complete.\n");
269263
VTR_LOG("\n");

0 commit comments

Comments
 (0)