37 std::cout <<
"\n=== CPU Layout Optimization Demo ===" << std::endl;
38 std::cout <<
"This demonstrates why EDA layout is a complex, highly-coupled problem" << std::endl;
55 create_cpu_components(*optimizer);
56 create_cpu_nets(*optimizer);
58 std::cout <<
"Created realistic CPU design with:" << std::endl;
59 std::cout <<
"- ALU, FPU, Cache blocks" << std::endl;
60 std::cout <<
"- Critical timing paths" << std::endl;
61 std::cout <<
"- Power density constraints" << std::endl;
64 auto start = std::chrono::high_resolution_clock::now();
65 auto result = optimizer->optimize();
66 auto end = std::chrono::high_resolution_clock::now();
68 auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
71 std::cout <<
"\nOptimization Results:" << std::endl;
72 std::cout <<
" Total cost: " << result.total_cost << std::endl;
73 std::cout <<
" Wirelength cost: " << result.wirelength_cost << std::endl;
74 std::cout <<
" Timing cost: " << result.timing_cost << std::endl;
75 std::cout <<
" Area cost: " << result.area_cost << std::endl;
76 std::cout <<
" Power cost: " << result.power_cost << std::endl;
77 std::cout <<
" Constraint violations: " << result.constraint_violations << std::endl;
78 std::cout <<
" Optimization time: " << duration.count() <<
" ms" << std::endl;
80 auto stats = optimizer->get_statistics();
81 std::cout <<
" Acceptance rate: " << stats.acceptance_rate * 100 <<
"%" << std::endl;
82 std::cout <<
" Improvement rate: " << stats.improvement_rate * 100 <<
"%" << std::endl;
84 if (result.is_feasible()) {
85 std::cout <<
"✅ Layout optimization successful!" << std::endl;
87 std::cout <<
"⚠️ Layout has constraint violations" << std::endl;
95 std::cout <<
"\n=== Hierarchical Optimization Demo ===" << std::endl;
96 std::cout <<
"This shows how to handle billion-scale layouts through hierarchy" << std::endl;
111 optimizer->create_ip_block(
"Memory_Controller",
geometry::Rectangle(1000, 6000, 18000, 4000));
115 create_hierarchical_design(*optimizer);
117 std::cout <<
"Created hierarchical design with:" << std::endl;
118 std::cout <<
"- Multiple CPU cores" << std::endl;
119 std::cout <<
"- GPU compute block" << std::endl;
120 std::cout <<
"- Memory subsystem" << std::endl;
121 std::cout <<
"- I/O interfaces" << std::endl;
124 auto start = std::chrono::high_resolution_clock::now();
125 auto result = optimizer->optimize();
126 auto end = std::chrono::high_resolution_clock::now();
128 auto duration = std::chrono::duration_cast<std::chrono::seconds>(end - start);
130 std::cout <<
"\nHierarchical Optimization Results:" << std::endl;
131 std::cout <<
" Total cost: " << result.total_cost << std::endl;
132 std::cout <<
" Optimization time: " << duration.count() <<
" seconds" << std::endl;
134 auto final_layout = optimizer->get_final_layout();
135 std::cout <<
" Final layout components: " << final_layout.size() << std::endl;
137 std::cout <<
"✅ Hierarchical optimization demonstrates scalability to billion components!" << std::endl;
144 std::cout <<
"\n=== Force-Directed Placement Demo ===" << std::endl;
145 std::cout <<
"Fast initial placement using physics simulation" << std::endl;
160 placer->add_component(&comp);
167 clk_net.
sinks = {{
"AND1",
"CLK"}, {
"OR1",
"CLK"}, {
"MUX1",
"CLK"}};
169 placer->add_net(clk_net);
171 Net data_net(
"DATA");
174 data_net.
sinks = {{
"OR1",
"IN1"}, {
"NOT1",
"IN"}};
175 placer->add_net(data_net);
178 auto start = std::chrono::high_resolution_clock::now();
179 bool converged = placer->optimize(1000);
180 auto end = std::chrono::high_resolution_clock::now();
182 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
184 std::cout <<
"Force-directed placement results:" << std::endl;
185 std::cout <<
" Converged: " << (converged ?
"Yes" :
"No") << std::endl;
186 std::cout <<
" Time: " << duration.count() <<
" μs" << std::endl;
188 std::cout <<
"Final component positions:" << std::endl;
190 std::cout <<
" " << comp.name <<
": ("
191 << comp.position.x <<
", " << comp.position.y <<
")" << std::endl;
194 std::cout <<
"✅ Force-directed placement provides fast initial solution!" << std::endl;
201 std::cout <<
"\n=== Why GPU Acceleration is Limited for EDA Layout ===" << std::endl;
203 std::cout <<
"EDA layout optimization has several characteristics that make GPU acceleration challenging:" << std::endl;
205 std::cout <<
"\n1. 🔗 High Coupling:" << std::endl;
206 std::cout <<
" - Moving one component affects all connected components" << std::endl;
207 std::cout <<
" - Like a 'Huarong Dao' puzzle - every move has global effects" << std::endl;
208 std::cout <<
" - Requires sequential decision making, not parallel computation" << std::endl;
210 std::cout <<
"\n2. 🎯 Complex Objectives:" << std::endl;
211 std::cout <<
" - Multi-objective optimization (area + timing + power + manufacturing)" << std::endl;
212 std::cout <<
" - Non-linear constraints (e.g., timing depends on path delays)" << std::endl;
213 std::cout <<
" - Requires sophisticated cost evaluation, not simple arithmetic" << std::endl;
215 std::cout <<
"\n3. 🧠 Algorithm Nature:" << std::endl;
216 std::cout <<
" - Simulated annealing uses random moves and acceptance probability" << std::endl;
217 std::cout <<
" - Force-directed algorithms need iterative convergence" << std::endl;
218 std::cout <<
" - These are inherently sequential, adaptive algorithms" << std::endl;
220 std::cout <<
"\n4. 📊 Where GPU CAN Help:" << std::endl;
221 std::cout <<
" - Massive geometry queries (our spatial indexing)" << std::endl;
222 std::cout <<
" - Design rule checking (parallel DRC on many shapes)" << std::endl;
223 std::cout <<
" - Timing analysis (parallel path evaluation)" << std::endl;
224 std::cout <<
" - But NOT the core placement optimization" << std::endl;
226 std::cout <<
"\n✅ Our focus on CPU-based algorithms with sophisticated heuristics" << std::endl;
227 std::cout <<
" is the right approach for EDA layout optimization!" << std::endl;
234 std::cout <<
"\n=== Algorithm Performance Comparison ===" << std::endl;
239 std::vector<Component> test_components;
240 std::vector<Net> test_nets;
241 create_test_circuit(test_components, test_nets, 20);
243 std::cout <<
"Testing with " << test_components.size() <<
" components and "
244 << test_nets.size() <<
" nets" << std::endl;
247 std::cout <<
"\n--- Force-Directed Placement ---" << std::endl;
248 auto start = std::chrono::high_resolution_clock::now();
251 for (
auto& comp : test_components) {
252 force_placer->add_component(&comp);
254 for (
const auto& net : test_nets) {
255 force_placer->add_net(net);
258 bool fd_converged = force_placer->optimize(500);
259 auto fd_time = std::chrono::high_resolution_clock::now();
260 auto fd_duration = std::chrono::duration_cast<std::chrono::milliseconds>(fd_time - start);
262 std::cout <<
" Time: " << fd_duration.count() <<
" ms" << std::endl;
263 std::cout <<
" Converged: " << (fd_converged ?
"Yes" :
"No") << std::endl;
264 std::cout <<
" Best for: Fast initial placement" << std::endl;
267 std::cout <<
"\n--- Simulated Annealing ---" << std::endl;
268 start = std::chrono::high_resolution_clock::now();
274 for (
const auto& comp : test_components) {
275 sa_optimizer->add_component(comp);
277 for (
const auto& net : test_nets) {
278 sa_optimizer->add_net(net);
281 auto sa_result = sa_optimizer->optimize();
282 auto sa_time = std::chrono::high_resolution_clock::now();
283 auto sa_duration = std::chrono::duration_cast<std::chrono::milliseconds>(sa_time - start);
285 std::cout <<
" Time: " << sa_duration.count() <<
" ms" << std::endl;
286 std::cout <<
" Final cost: " << sa_result.total_cost << std::endl;
287 std::cout <<
" Feasible: " << (sa_result.is_feasible() ?
"Yes" :
"No") << std::endl;
288 std::cout <<
" Best for: High-quality final placement" << std::endl;
290 auto sa_stats = sa_optimizer->get_statistics();
291 std::cout <<
" Acceptance rate: " << sa_stats.acceptance_rate * 100 <<
"%" << std::endl;
294 std::cout <<
"\n--- Algorithm Recommendation ---" << std::endl;
296 test_components.size(), test_nets.size(),
true);
298 std::string rec_name;
299 switch (recommended) {
301 rec_name =
"Force-Directed";
break;
303 rec_name =
"Simulated Annealing";
break;
305 rec_name =
"Hierarchical";
break;
307 rec_name =
"Timing-Driven";
break;
309 rec_name =
"Analytical";
break;
312 std::cout <<
" Recommended algorithm: " << rec_name << std::endl;
313 std::cout <<
" (Based on problem size and timing criticality)" << std::endl;
320 alu.power_consumption = 500.0;
325 fpu.power_consumption = 600.0;
330 l1_cache.power_consumption = 200.0;
335 reg_file.power_consumption = 300.0;
340 ctrl_unit.power_consumption = 150.0;
345 decoder.power_consumption = 100.0;
349 void create_cpu_nets(SimulatedAnnealingOptimizer& optimizer) {
351 Net clk_net(
"CLK_TREE");
352 clk_net.driver_component =
"CTRL_UNIT";
353 clk_net.driver_pin =
"CLK_OUT";
355 {
"ALU",
"CLK"}, {
"FPU",
"CLK"},
356 {
"L1_CACHE",
"CLK"}, {
"REG_FILE",
"CLK"}, {
"DECODER",
"CLK"}
358 clk_net.criticality = 1.0;
359 clk_net.weight = 2.0;
363 Net data_net(
"DATA_BUS");
364 data_net.driver_component =
"REG_FILE";
365 data_net.driver_pin =
"DATA_OUT";
366 data_net.sinks = {{
"ALU",
"A_IN"}, {
"FPU",
"A_IN"}};
367 data_net.criticality = 0.9;
368 data_net.weight = 1.5;
372 Net inst_net(
"INST_BUS");
373 inst_net.driver_component =
"L1_CACHE";
374 inst_net.driver_pin =
"INST_OUT";
375 inst_net.sinks = {{
"DECODER",
"INST_IN"}, {
"CTRL_UNIT",
"INST_IN"}};
376 inst_net.criticality = 0.8;
380 Net ctrl_net(
"CTRL_SIGNALS");
381 ctrl_net.driver_component =
"CTRL_UNIT";
382 ctrl_net.driver_pin =
"CTRL_OUT";
383 ctrl_net.sinks = {{
"ALU",
"CTRL"}, {
"FPU",
"CTRL"}, {
"REG_FILE",
"CTRL"}};
384 ctrl_net.criticality = 0.7;
388 void create_hierarchical_design(HierarchicalOptimizer& optimizer) {
390 for (
int core = 0; core < 2; ++core) {
391 std::string block_name =
"CPU_Core_" + std::to_string(core);
394 Component(
"ALU_" + std::to_string(core), geometry::Rectangle(0, 0, 50, 40)));
396 Component(
"FPU_" + std::to_string(core), geometry::Rectangle(0, 0, 60, 45)));
398 Component(
"L1_" + std::to_string(core), geometry::Rectangle(0, 0, 100, 75)));
402 for (
int sm = 0; sm < 4; ++sm) {
404 Component(
"SM_" + std::to_string(sm), geometry::Rectangle(0, 0, 80, 60)));
409 Component(
"DDR_CTRL", geometry::Rectangle(0, 0, 200, 100)));
411 Component(
"L3_CACHE", geometry::Rectangle(0, 0, 300, 150)));
414 Net memory_bus(
"MEMORY_BUS");
415 memory_bus.driver_component =
"DDR_CTRL";
416 memory_bus.sinks = {{
"L3_CACHE",
"MEM_IN"}, {
"L1_0",
"MEM_IN"}, {
"L1_1",
"MEM_IN"}};
417 memory_bus.criticality = 0.8;
421 void create_test_circuit(std::vector<Component>& components,
422 std::vector<Net>& nets,
size_t component_count) {
427 std::uniform_real_distribution<double> size_dist(10, 50);
428 std::uniform_real_distribution<double> power_dist(10, 100);
430 for (
size_t i = 0; i < component_count; ++i) {
431 double width = size_dist(rng);
432 double height = size_dist(rng);
434 Component
comp(
"COMP_" + std::to_string(i),
435 geometry::Rectangle(0, 0, width, height));
436 comp.power_consumption = power_dist(rng);
437 components.push_back(comp);
441 std::uniform_int_distribution<size_t> comp_dist(0, component_count - 1);
442 std::uniform_int_distribution<size_t> fanout_dist(1, 4);
444 for (
size_t i = 0; i < component_count / 2; ++i) {
445 Net net(
"NET_" + std::to_string(i));
448 size_t driver_idx = comp_dist(rng);
449 net.driver_component = components[driver_idx].name;
450 net.driver_pin =
"OUT";
453 size_t fanout = fanout_dist(rng);
454 for (
size_t j = 0; j < fanout; ++j) {
455 size_t sink_idx = comp_dist(rng);
456 if (sink_idx != driver_idx) {
457 net.sinks.emplace_back(components[sink_idx].name,
"IN");
461 net.criticality = (i < 3) ? 0.9 : 0.5;