Skip to content

Commit

Permalink
Merge pull request #2457 from verilog-to-routing/compressed_router_lo…
Browse files Browse the repository at this point in the history
…okahead

Updating router lookahead
  • Loading branch information
vaughnbetz committed Feb 8, 2024
2 parents 3e15bf3 + 899b63a commit fe9089c
Show file tree
Hide file tree
Showing 38 changed files with 4,402 additions and 3,539 deletions.
10 changes: 8 additions & 2 deletions vpr/src/base/ShowSetup.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,9 @@ static void ShowRouterOpts(const t_router_opts& RouterOpts) {
case e_router_lookahead::MAP:
VTR_LOG("MAP\n");
break;
case e_router_lookahead::COMPRESSED_MAP:
VTR_LOG("COMPRESSED_MAP\n");
break;
case e_router_lookahead::EXTENDED_MAP:
VTR_LOG("EXTENDED_MAP\n");
break;
Expand Down Expand Up @@ -512,6 +515,9 @@ static void ShowRouterOpts(const t_router_opts& RouterOpts) {
case e_router_lookahead::MAP:
VTR_LOG("MAP\n");
break;
case e_router_lookahead::COMPRESSED_MAP:
VTR_LOG("COMPRESSED_MAP\n");
break;
case e_router_lookahead::EXTENDED_MAP:
VTR_LOG("EXTENDED_MAP\n");
break;
Expand Down Expand Up @@ -628,8 +634,8 @@ static void ShowPlacerOpts(const t_placer_opts& PlacerOpts,
VPR_FATAL_ERROR(VPR_ERROR_UNKNOWN, "Unknown delay_model_reducer\n");
VTR_LOG("PlacerOpts.delay_model_reducer: %s\n", e_reducer_strings[(size_t)PlacerOpts.delay_model_reducer].c_str());

std::string place_delay_model_strings[2] = {"DELTA", "DELTA_OVERRIDE"};
if ((size_t)PlacerOpts.delay_model_type > 1)
std::string place_delay_model_strings[3] = {"SIMPLE", "DELTA", "DELTA_OVERRIDE"};
if ((size_t)PlacerOpts.delay_model_type > 2)
VPR_FATAL_ERROR(VPR_ERROR_UNKNOWN, "Unknown delay_model_type\n");
VTR_LOG("PlacerOpts.delay_model_type: %s\n", place_delay_model_strings[(size_t)PlacerOpts.delay_model_type].c_str());
}
Expand Down
20 changes: 16 additions & 4 deletions vpr/src/base/read_options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -902,11 +902,14 @@ struct ParseRouteBBUpdate {

struct ParseRouterLookahead {
ConvertedValue<e_router_lookahead> from_str(std::string str) {
std::transform(str.begin(), str.end(), str.begin(), ::tolower);
ConvertedValue<e_router_lookahead> conv_value;
if (str == "classic")
conv_value.set_value(e_router_lookahead::CLASSIC);
else if (str == "map")
conv_value.set_value(e_router_lookahead::MAP);
else if (str == "compressed_map")
conv_value.set_value(e_router_lookahead::COMPRESSED_MAP);
else if (str == "extended_map")
conv_value.set_value(e_router_lookahead::EXTENDED_MAP);
else {
Expand All @@ -926,6 +929,8 @@ struct ParseRouterLookahead {
conv_value.set_value("classic");
else if (val == e_router_lookahead::MAP) {
conv_value.set_value("map");
} else if (val == e_router_lookahead::COMPRESSED_MAP) {
conv_value.set_value("compressed_map");
} else {
VTR_ASSERT(val == e_router_lookahead::EXTENDED_MAP);
conv_value.set_value("extended_map");
Expand All @@ -934,14 +939,16 @@ struct ParseRouterLookahead {
}

std::vector<std::string> default_choices() {
return {"classic", "map", "extended_map"};
return {"classic", "map", "compressed_map", "extended_map"};
}
};

struct ParsePlaceDelayModel {
ConvertedValue<PlaceDelayModelType> from_str(std::string str) {
ConvertedValue<PlaceDelayModelType> conv_value;
if (str == "delta")
if (str == "simple") {
conv_value.set_value(PlaceDelayModelType::SIMPLE);
} else if (str == "delta")
conv_value.set_value(PlaceDelayModelType::DELTA);
else if (str == "delta_override")
conv_value.set_value(PlaceDelayModelType::DELTA_OVERRIDE);
Expand All @@ -955,7 +962,9 @@ struct ParsePlaceDelayModel {

ConvertedValue<std::string> to_str(PlaceDelayModelType val) {
ConvertedValue<std::string> conv_value;
if (val == PlaceDelayModelType::DELTA)
if (val == PlaceDelayModelType::SIMPLE)
conv_value.set_value("simple");
else if (val == PlaceDelayModelType::DELTA)
conv_value.set_value("delta");
else if (val == PlaceDelayModelType::DELTA_OVERRIDE)
conv_value.set_value("delta_override");
Expand All @@ -968,7 +977,7 @@ struct ParsePlaceDelayModel {
}

std::vector<std::string> default_choices() {
return {"delta", "delta_override"};
return {"simple", "delta", "delta_override"};
}
};

Expand Down Expand Up @@ -2245,6 +2254,7 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg
"This option controls what information is considered and how"
" the placement delay model is constructed.\n"
"Valid options:\n"
" * 'simple' uses map router lookahead\n"
" * 'delta' uses differences in position only\n"
" * 'delta_override' uses differences in position with overrides for direct connects\n")
.default_value("delta")
Expand Down Expand Up @@ -2566,6 +2576,8 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg
" * classic: The classic VPR lookahead (may perform better on un-buffered routing\n"
" architectures)\n"
" * map: An advanced lookahead which accounts for diverse wire type\n"
" * compressed_map: The algorithm is similar to map lookahead with the exception of saprse sampling of the chip"
" to reduce the run-time to build the router lookahead and also its memory footprint\n"
" * extended_map: A more advanced and extended lookahead which accounts for a more\n"
" exhaustive node sampling method\n"
"\n"
Expand Down
10 changes: 6 additions & 4 deletions vpr/src/base/vpr_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,10 +119,11 @@ constexpr auto INVALID_BLOCK_ID = ClusterBlockId(-2);
#endif

enum class e_router_lookahead {
CLASSIC, ///<VPR's classic lookahead (assumes uniform wire types)
MAP, ///<Lookahead considering different wire types (see Oleg Petelin's MASc Thesis)
EXTENDED_MAP, ///<Lookahead with a more extensive node sampling method
NO_OP ///<A no-operation lookahead which always returns zero
CLASSIC, ///<VPR's classic lookahead (assumes uniform wire types)
MAP, ///<Lookahead considering different wire types (see Oleg Petelin's MASc Thesis)
COMPRESSED_MAP, /// Similar to MAP, but use a sparse sampling of the chip
EXTENDED_MAP, ///<Lookahead with a more extensive node sampling method
NO_OP ///<A no-operation lookahead which always returns zero
};

enum class e_route_bb_update {
Expand Down Expand Up @@ -1129,6 +1130,7 @@ enum e_place_effort_scaling {
};

enum class PlaceDelayModelType {
SIMPLE,
DELTA, ///<Delta x/y based delay model
DELTA_OVERRIDE, ///<Delta x/y based delay model with special case delay overrides
};
Expand Down
1 change: 0 additions & 1 deletion vpr/src/place/place.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -619,7 +619,6 @@ void try_place(const Netlist<>& net_list,
if (placer_opts.place_algorithm.is_timing_driven()) {
/*do this before the initial placement to avoid messing up the initial placement */
place_delay_model = alloc_lookups_and_delay_model(net_list,
device_ctx.arch_switch_inf,
chan_width_dist,
placer_opts,
router_opts,
Expand Down
10 changes: 8 additions & 2 deletions vpr/src/place/place_delay_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,14 @@ void OverrideDelayModel::set_base_delay_model(std::unique_ptr<DeltaDelayModel> b
base_delay_model_ = std::move(base_delay_model_obj);
}

float SimpleDelayModel::delay(const t_physical_tile_loc& from_loc, int /*from_pin*/, const t_physical_tile_loc& to_loc, int /*to_pin*/) const {
int delta_x = std::abs(from_loc.x - to_loc.x);
int delta_y = std::abs(from_loc.y - to_loc.y);

int from_tile_idx = g_vpr_ctx.device().grid.get_physical_type(from_loc)->index;
return delays_[from_tile_idx][from_loc.layer_num][to_loc.layer_num][delta_x][delta_y];
}

/**
* When writing capnp targetted serialization, always allow compilation when
* VTR_ENABLE_CAPNPROTO=OFF. Generally this means throwing an exception instead.
Expand Down Expand Up @@ -319,7 +327,6 @@ void OverrideDelayModel::write(const std::string& file) const {

///@brief Initialize the placer delay model.
std::unique_ptr<PlaceDelayModel> alloc_lookups_and_delay_model(const Netlist<>& net_list,
const std::vector<t_arch_switch_inf>& arch_switch_inf,
t_chan_width_dist chan_width_dist,
const t_placer_opts& placer_opts,
const t_router_opts& router_opts,
Expand All @@ -331,7 +338,6 @@ std::unique_ptr<PlaceDelayModel> alloc_lookups_and_delay_model(const Netlist<>&
return compute_place_delay_model(placer_opts,
router_opts,
net_list,
arch_switch_inf,
det_routing_arch,
segment_inf,
chan_width_dist,
Expand Down
44 changes: 42 additions & 2 deletions vpr/src/place/place_delay_model.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ class PlaceDelayModel;

///@brief Initialize the placer delay model.
std::unique_ptr<PlaceDelayModel> alloc_lookups_and_delay_model(const Netlist<>& net_list,
const std::vector<t_arch_switch_inf>& arch_switch_inf,
t_chan_width_dist chan_width_dist,
const t_placer_opts& place_opts,
const t_router_opts& router_opts,
Expand Down Expand Up @@ -113,7 +112,13 @@ class DeltaDelayModel : public PlaceDelayModel {

private:
vtr::NdMatrix<float, 3> delays_; // [0..num_layers-1][0..max_dx][0..max_dy]
/**
* @brief The minimum delay of inter-layer connections
*/
float cross_layer_delay_;
/**
* @brief Indicates whether the router is a two-stage or run-flat
*/
bool is_flat_;
};

Expand Down Expand Up @@ -144,8 +149,13 @@ class OverrideDelayModel : public PlaceDelayModel {

private:
std::unique_ptr<DeltaDelayModel> base_delay_model_;
/* Minimum delay of cross-layer connections */
/**
* @brief Minimum delay of cross-layer connections
*/
float cross_layer_delay_;
/**
* @brief Indicates whether the router is a two-stage or run-flat
*/
bool is_flat_;

void compute_override_delay_model(RouterDelayProfiler& router,
Expand Down Expand Up @@ -217,3 +227,33 @@ class OverrideDelayModel : public PlaceDelayModel {
static_assert(sizeof(t_override::delta_x) == sizeof(short), "Expect all t_override data members to be shorts");
static_assert(sizeof(t_override::delta_y) == sizeof(short), "Expect all t_override data members to be shorts");
};

///@brief A simple delay model based on the information stored in router lookahead
/// This is in contrast to other placement delay models that get the cost of getting from one location to another by running the router
class SimpleDelayModel : public PlaceDelayModel {
public:
SimpleDelayModel() {}

void compute(
RouterDelayProfiler& router,
const t_placer_opts& placer_opts,
const t_router_opts& router_opts,
int longest_length) override;
float delay(const t_physical_tile_loc& from_loc, int /*from_pin*/, const t_physical_tile_loc& to_loc, int /*to_pin*/) const override;
void dump_echo(std::string /*filepath*/) const override {}

void read(const std::string& /*file*/) override {}
void write(const std::string& /*file*/) const override {}

private:
/**
* @brief The matrix to store the minimum delay between different points on different layers.
*
*The matrix used to store delay information is a 5D matrix. This data structure stores the minimum delay for each tile type on each layer to other layers
*for each dx and dy. We decided to separate the delay for each physical type on each die to accommodate cases where the connectivity of a physical type differs
*on each layer. Additionally, instead of using d_layer, we distinguish between the destination layer to handle scenarios where connectivity between layers
*is not uniform. For example, if the number of inter-layer connections between layer 1 and 2 differs from the number of connections between layer 0 and 1.
*One might argue that this variability could also occur for dx and dy. However, we are operating under the assumption that the FPGA fabric architecture is regular.
*/
vtr::NdMatrix<float, 5> delays_; // [0..num_physical_type-1][0..num_layers-1][0..num_layers-1][0..max_dx][0..max_dy]
};
56 changes: 51 additions & 5 deletions vpr/src/place/timing_place_lookup.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,13 @@ static vtr::NdMatrix<float, 3> compute_delta_delay_model(
int longest_length,
bool is_flat);

/**
* @brief Use the information in the router lookahead to fill the delay matrix instead of running the router
* @param route_profiler
* @return The delay matrix that contain the minimum cost between two locations
*/
static vtr::NdMatrix<float, 5> compute_simple_delay_model(RouterDelayProfiler& route_profiler);

static bool find_direct_connect_sample_locations(const t_direct_inf* direct,
t_physical_tile_type_ptr from_type,
int from_pin,
Expand All @@ -167,7 +174,6 @@ static float find_neightboring_average(vtr::NdMatrix<float, 3>& matrix, t_physic
std::unique_ptr<PlaceDelayModel> compute_place_delay_model(const t_placer_opts& placer_opts,
const t_router_opts& router_opts,
const Netlist<>& net_list,
const std::vector<t_arch_switch_inf>& arch_switch_inf,
t_det_routing_arch* det_routing_arch,
std::vector<t_segment_inf>& segment_inf,
t_chan_width_dist chan_width_dist,
Expand Down Expand Up @@ -195,10 +201,11 @@ std::unique_ptr<PlaceDelayModel> compute_place_delay_model(const t_placer_opts&

/*now setup and compute the actual arrays */
std::unique_ptr<PlaceDelayModel> place_delay_model;
float min_cross_layer_delay = get_min_cross_layer_delay(arch_switch_inf,
segment_inf,
det_routing_arch->wire_to_arch_ipin_switch_between_dice);
if (placer_opts.delay_model_type == PlaceDelayModelType::DELTA) {
float min_cross_layer_delay = get_min_cross_layer_delay();

if (placer_opts.delay_model_type == PlaceDelayModelType::SIMPLE) {
place_delay_model = std::make_unique<SimpleDelayModel>();
} else if (placer_opts.delay_model_type == PlaceDelayModelType::DELTA) {
place_delay_model = std::make_unique<DeltaDelayModel>(min_cross_layer_delay, is_flat);
} else if (placer_opts.delay_model_type == PlaceDelayModelType::DELTA_OVERRIDE) {
place_delay_model = std::make_unique<OverrideDelayModel>(min_cross_layer_delay, is_flat);
Expand Down Expand Up @@ -250,6 +257,14 @@ void OverrideDelayModel::compute(
compute_override_delay_model(route_profiler, router_opts);
}

void SimpleDelayModel::compute(
RouterDelayProfiler& router,
const t_placer_opts& /*placer_opts*/,
const t_router_opts& /*router_opts*/,
int /*longest_length*/) {
delays_ = compute_simple_delay_model(router);
}

/******* File Accessible Functions **********/

std::vector<int> get_best_classes(enum e_pin_type pintype, t_physical_tile_type_ptr type) {
Expand Down Expand Up @@ -1004,6 +1019,37 @@ static vtr::NdMatrix<float, 3> compute_delta_delay_model(
return delta_delays;
}

static vtr::NdMatrix<float, 5> compute_simple_delay_model(RouterDelayProfiler& route_profiler) {
const auto& grid = g_vpr_ctx.device().grid;
int num_physical_tile_types = static_cast<int>(g_vpr_ctx.device().physical_tile_types.size());
// Initializing the delay matrix to [num_physical_types][num_layers][num_layers][width][height]
// The second index related to the layer that the source location is on and the third index is for the sink layer
vtr::NdMatrix<float, 5> delta_delays({static_cast<unsigned long>(num_physical_tile_types),
static_cast<unsigned long>(grid.get_num_layers()),
static_cast<unsigned long>(grid.get_num_layers()),
grid.width(),
grid.height()});

for (int physical_tile_type_idx = 0; physical_tile_type_idx < num_physical_tile_types; ++physical_tile_type_idx) {
for (int from_layer = 0; from_layer < grid.get_num_layers(); ++from_layer) {
for (int to_layer = 0; to_layer < grid.get_num_layers(); ++to_layer) {
for (int dx = 0; dx < static_cast<int>(grid.width()); ++dx) {
for (int dy = 0; dy < static_cast<int>(grid.height()); ++dy) {
float min_delay = route_profiler.get_min_delay(physical_tile_type_idx,
from_layer,
to_layer,
dx,
dy);
delta_delays[physical_tile_type_idx][from_layer][to_layer][dx][dy] = min_delay;
}
}
}
}
}

return delta_delays;
}

//Finds a src_rr and sink_rr appropriate for measuring the delay of the current direct specification
static bool find_direct_connect_sample_locations(const t_direct_inf* direct,
t_physical_tile_type_ptr from_type,
Expand Down
1 change: 0 additions & 1 deletion vpr/src/place/timing_place_lookup.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
std::unique_ptr<PlaceDelayModel> compute_place_delay_model(const t_placer_opts& placer_opts,
const t_router_opts& router_opts,
const Netlist<>& net_list,
const std::vector<t_arch_switch_inf>& arch_switch_inf,
t_det_routing_arch* det_routing_arch,
std::vector<t_segment_inf>& segment_inf,
t_chan_width_dist chan_width_dist,
Expand Down

0 comments on commit fe9089c

Please sign in to comment.