Mercurial > hg > CbC > CbC_gcc
comparison gcc/tree-ssa-math-opts.c @ 132:d34655255c78
update gcc-8.2
author | mir3636 |
---|---|
date | Thu, 25 Oct 2018 10:21:07 +0900 |
parents | 84e7813d76e9 |
children | 1830386684a0 |
comparison
equal
deleted
inserted
replaced
130:e108057fa461 | 132:d34655255c78 |
---|---|
1 /* Global, SSA-based optimizations using mathematical identities. | 1 /* Global, SSA-based optimizations using mathematical identities. |
2 Copyright (C) 2005-2017 Free Software Foundation, Inc. | 2 Copyright (C) 2005-2018 Free Software Foundation, Inc. |
3 | 3 |
4 This file is part of GCC. | 4 This file is part of GCC. |
5 | 5 |
6 GCC is free software; you can redistribute it and/or modify it | 6 GCC is free software; you can redistribute it and/or modify it |
7 under the terms of the GNU General Public License as published by the | 7 under the terms of the GNU General Public License as published by the |
113 #include "internal-fn.h" | 113 #include "internal-fn.h" |
114 #include "case-cfn-macros.h" | 114 #include "case-cfn-macros.h" |
115 #include "optabs-libfuncs.h" | 115 #include "optabs-libfuncs.h" |
116 #include "tree-eh.h" | 116 #include "tree-eh.h" |
117 #include "targhooks.h" | 117 #include "targhooks.h" |
118 #include "domwalk.h" | |
118 | 119 |
119 /* This structure represents one basic block that either computes a | 120 /* This structure represents one basic block that either computes a |
120 division, or is a common dominator for basic block that compute a | 121 division, or is a common dominator for basic block that compute a |
121 division. */ | 122 division. */ |
122 struct occurrence { | 123 struct occurrence { |
125 | 126 |
126 /* If non-NULL, the SSA_NAME holding the definition for a reciprocal | 127 /* If non-NULL, the SSA_NAME holding the definition for a reciprocal |
127 inserted in BB. */ | 128 inserted in BB. */ |
128 tree recip_def; | 129 tree recip_def; |
129 | 130 |
131 /* If non-NULL, the SSA_NAME holding the definition for a squared | |
132 reciprocal inserted in BB. */ | |
133 tree square_recip_def; | |
134 | |
130 /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that | 135 /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that |
131 was inserted in BB. */ | 136 was inserted in BB. */ |
132 gimple *recip_def_stmt; | 137 gimple *recip_def_stmt; |
133 | 138 |
134 /* Pointer to a list of "struct occurrence"s for blocks dominated | 139 /* Pointer to a list of "struct occurrence"s for blocks dominated |
162 static struct | 167 static struct |
163 { | 168 { |
164 /* Number of cexpi calls inserted. */ | 169 /* Number of cexpi calls inserted. */ |
165 int inserted; | 170 int inserted; |
166 } sincos_stats; | 171 } sincos_stats; |
167 | |
168 static struct | |
169 { | |
170 /* Number of hand-written 16-bit nop / bswaps found. */ | |
171 int found_16bit; | |
172 | |
173 /* Number of hand-written 32-bit nop / bswaps found. */ | |
174 int found_32bit; | |
175 | |
176 /* Number of hand-written 64-bit nop / bswaps found. */ | |
177 int found_64bit; | |
178 } nop_stats, bswap_stats; | |
179 | 172 |
180 static struct | 173 static struct |
181 { | 174 { |
182 /* Number of widening multiplication ops inserted. */ | 175 /* Number of widening multiplication ops inserted. */ |
183 int widen_mults_inserted; | 176 int widen_mults_inserted; |
280 /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */ | 273 /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */ |
281 new_occ->next = *p_head; | 274 new_occ->next = *p_head; |
282 *p_head = new_occ; | 275 *p_head = new_occ; |
283 } | 276 } |
284 | 277 |
285 /* Register that we found a division in BB. */ | 278 /* Register that we found a division in BB. |
279 IMPORTANCE is a measure of how much weighting to give | |
280 that division. Use IMPORTANCE = 2 to register a single | |
281 division. If the division is going to be found multiple | |
282 times use 1 (as it is with squares). */ | |
286 | 283 |
287 static inline void | 284 static inline void |
288 register_division_in (basic_block bb) | 285 register_division_in (basic_block bb, int importance) |
289 { | 286 { |
290 struct occurrence *occ; | 287 struct occurrence *occ; |
291 | 288 |
292 occ = (struct occurrence *) bb->aux; | 289 occ = (struct occurrence *) bb->aux; |
293 if (!occ) | 290 if (!occ) |
295 occ = occ_new (bb, NULL); | 292 occ = occ_new (bb, NULL); |
296 insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head); | 293 insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head); |
297 } | 294 } |
298 | 295 |
299 occ->bb_has_division = true; | 296 occ->bb_has_division = true; |
300 occ->num_divisions++; | 297 occ->num_divisions += importance; |
301 } | 298 } |
302 | 299 |
303 | 300 |
304 /* Compute the number of divisions that postdominate each block in OCC and | 301 /* Compute the number of divisions that postdominate each block in OCC and |
305 its children. */ | 302 its children. */ |
338 confused later by replacing all immediate uses x in such | 335 confused later by replacing all immediate uses x in such |
339 a stmt. */ | 336 a stmt. */ |
340 && gimple_assign_rhs1 (use_stmt) != def; | 337 && gimple_assign_rhs1 (use_stmt) != def; |
341 } | 338 } |
342 | 339 |
340 /* Return TRUE if USE_STMT is a multiplication of DEF by A. */ | |
341 static inline bool | |
342 is_mult_by (gimple *use_stmt, tree def, tree a) | |
343 { | |
344 if (gimple_code (use_stmt) == GIMPLE_ASSIGN | |
345 && gimple_assign_rhs_code (use_stmt) == MULT_EXPR) | |
346 { | |
347 tree op0 = gimple_assign_rhs1 (use_stmt); | |
348 tree op1 = gimple_assign_rhs2 (use_stmt); | |
349 | |
350 return (op0 == def && op1 == a) | |
351 || (op0 == a && op1 == def); | |
352 } | |
353 return 0; | |
354 } | |
355 | |
356 /* Return whether USE_STMT is DEF * DEF. */ | |
357 static inline bool | |
358 is_square_of (gimple *use_stmt, tree def) | |
359 { | |
360 return is_mult_by (use_stmt, def, def); | |
361 } | |
362 | |
363 /* Return whether USE_STMT is a floating-point division by | |
364 DEF * DEF. */ | |
365 static inline bool | |
366 is_division_by_square (gimple *use_stmt, tree def) | |
367 { | |
368 if (gimple_code (use_stmt) == GIMPLE_ASSIGN | |
369 && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR | |
370 && gimple_assign_rhs1 (use_stmt) != gimple_assign_rhs2 (use_stmt)) | |
371 { | |
372 tree denominator = gimple_assign_rhs2 (use_stmt); | |
373 if (TREE_CODE (denominator) == SSA_NAME) | |
374 { | |
375 return is_square_of (SSA_NAME_DEF_STMT (denominator), def); | |
376 } | |
377 } | |
378 return 0; | |
379 } | |
380 | |
343 /* Walk the subset of the dominator tree rooted at OCC, setting the | 381 /* Walk the subset of the dominator tree rooted at OCC, setting the |
344 RECIP_DEF field to a definition of 1.0 / DEF that can be used in | 382 RECIP_DEF field to a definition of 1.0 / DEF that can be used in |
345 the given basic block. The field may be left NULL, of course, | 383 the given basic block. The field may be left NULL, of course, |
346 if it is not possible or profitable to do the optimization. | 384 if it is not possible or profitable to do the optimization. |
347 | 385 |
348 DEF_BSI is an iterator pointing at the statement defining DEF. | 386 DEF_BSI is an iterator pointing at the statement defining DEF. |
349 If RECIP_DEF is set, a dominator already has a computation that can | 387 If RECIP_DEF is set, a dominator already has a computation that can |
350 be used. */ | 388 be used. |
389 | |
390 If should_insert_square_recip is set, then this also inserts | |
391 the square of the reciprocal immediately after the definition | |
392 of the reciprocal. */ | |
351 | 393 |
352 static void | 394 static void |
353 insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ, | 395 insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ, |
354 tree def, tree recip_def, int threshold) | 396 tree def, tree recip_def, tree square_recip_def, |
397 int should_insert_square_recip, int threshold) | |
355 { | 398 { |
356 tree type; | 399 tree type; |
357 gassign *new_stmt; | 400 gassign *new_stmt, *new_square_stmt; |
358 gimple_stmt_iterator gsi; | 401 gimple_stmt_iterator gsi; |
359 struct occurrence *occ_child; | 402 struct occurrence *occ_child; |
360 | 403 |
361 if (!recip_def | 404 if (!recip_def |
362 && (occ->bb_has_division || !flag_trapping_math) | 405 && (occ->bb_has_division || !flag_trapping_math) |
363 && occ->num_divisions >= threshold) | 406 /* Divide by two as all divisions are counted twice in |
407 the costing loop. */ | |
408 && occ->num_divisions / 2 >= threshold) | |
364 { | 409 { |
365 /* Make a variable with the replacement and substitute it. */ | 410 /* Make a variable with the replacement and substitute it. */ |
366 type = TREE_TYPE (def); | 411 type = TREE_TYPE (def); |
367 recip_def = create_tmp_reg (type, "reciptmp"); | 412 recip_def = create_tmp_reg (type, "reciptmp"); |
368 new_stmt = gimple_build_assign (recip_def, RDIV_EXPR, | 413 new_stmt = gimple_build_assign (recip_def, RDIV_EXPR, |
369 build_one_cst (type), def); | 414 build_one_cst (type), def); |
370 | 415 |
416 if (should_insert_square_recip) | |
417 { | |
418 square_recip_def = create_tmp_reg (type, "powmult_reciptmp"); | |
419 new_square_stmt = gimple_build_assign (square_recip_def, MULT_EXPR, | |
420 recip_def, recip_def); | |
421 } | |
422 | |
371 if (occ->bb_has_division) | 423 if (occ->bb_has_division) |
372 { | 424 { |
373 /* Case 1: insert before an existing division. */ | 425 /* Case 1: insert before an existing division. */ |
374 gsi = gsi_after_labels (occ->bb); | 426 gsi = gsi_after_labels (occ->bb); |
375 while (!gsi_end_p (gsi) && !is_division_by (gsi_stmt (gsi), def)) | 427 while (!gsi_end_p (gsi) |
428 && (!is_division_by (gsi_stmt (gsi), def)) | |
429 && (!is_division_by_square (gsi_stmt (gsi), def))) | |
376 gsi_next (&gsi); | 430 gsi_next (&gsi); |
377 | 431 |
378 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); | 432 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); |
379 } | 433 if (should_insert_square_recip) |
434 gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT); | |
435 } | |
380 else if (def_gsi && occ->bb == def_gsi->bb) | 436 else if (def_gsi && occ->bb == def_gsi->bb) |
381 { | 437 { |
382 /* Case 2: insert right after the definition. Note that this will | 438 /* Case 2: insert right after the definition. Note that this will |
383 never happen if the definition statement can throw, because in | 439 never happen if the definition statement can throw, because in |
384 that case the sole successor of the statement's basic block will | 440 that case the sole successor of the statement's basic block will |
385 dominate all the uses as well. */ | 441 dominate all the uses as well. */ |
386 gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT); | 442 gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT); |
387 } | 443 if (should_insert_square_recip) |
444 gsi_insert_after (def_gsi, new_square_stmt, GSI_NEW_STMT); | |
445 } | |
388 else | 446 else |
389 { | 447 { |
390 /* Case 3: insert in a basic block not containing defs/uses. */ | 448 /* Case 3: insert in a basic block not containing defs/uses. */ |
391 gsi = gsi_after_labels (occ->bb); | 449 gsi = gsi_after_labels (occ->bb); |
392 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); | 450 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); |
393 } | 451 if (should_insert_square_recip) |
452 gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT); | |
453 } | |
394 | 454 |
395 reciprocal_stats.rdivs_inserted++; | 455 reciprocal_stats.rdivs_inserted++; |
396 | 456 |
397 occ->recip_def_stmt = new_stmt; | 457 occ->recip_def_stmt = new_stmt; |
398 } | 458 } |
399 | 459 |
400 occ->recip_def = recip_def; | 460 occ->recip_def = recip_def; |
461 occ->square_recip_def = square_recip_def; | |
401 for (occ_child = occ->children; occ_child; occ_child = occ_child->next) | 462 for (occ_child = occ->children; occ_child; occ_child = occ_child->next) |
402 insert_reciprocals (def_gsi, occ_child, def, recip_def, threshold); | 463 insert_reciprocals (def_gsi, occ_child, def, recip_def, |
464 square_recip_def, should_insert_square_recip, | |
465 threshold); | |
466 } | |
467 | |
468 /* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)). | |
469 Take as argument the use for (x * x). */ | |
470 static inline void | |
471 replace_reciprocal_squares (use_operand_p use_p) | |
472 { | |
473 gimple *use_stmt = USE_STMT (use_p); | |
474 basic_block bb = gimple_bb (use_stmt); | |
475 struct occurrence *occ = (struct occurrence *) bb->aux; | |
476 | |
477 if (optimize_bb_for_speed_p (bb) && occ->square_recip_def | |
478 && occ->recip_def) | |
479 { | |
480 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); | |
481 gimple_assign_set_rhs_code (use_stmt, MULT_EXPR); | |
482 gimple_assign_set_rhs2 (use_stmt, occ->square_recip_def); | |
483 SET_USE (use_p, occ->square_recip_def); | |
484 fold_stmt_inplace (&gsi); | |
485 update_stmt (use_stmt); | |
486 } | |
403 } | 487 } |
404 | 488 |
405 | 489 |
406 /* Replace the division at USE_P with a multiplication by the reciprocal, if | 490 /* Replace the division at USE_P with a multiplication by the reciprocal, if |
407 possible. */ | 491 possible. */ |
448 | 532 |
449 return child; | 533 return child; |
450 } | 534 } |
451 } | 535 } |
452 | 536 |
537 /* Transform sequences like | |
538 t = sqrt (a) | |
539 x = 1.0 / t; | |
540 r1 = x * x; | |
541 r2 = a * x; | |
542 into: | |
543 t = sqrt (a) | |
544 r1 = 1.0 / a; | |
545 r2 = t; | |
546 x = r1 * r2; | |
547 depending on the uses of x, r1, r2. This removes one multiplication and | |
548 allows the sqrt and division operations to execute in parallel. | |
549 DEF_GSI is the gsi of the initial division by sqrt that defines | |
550 DEF (x in the example above). */ | |
551 | |
552 static void | |
553 optimize_recip_sqrt (gimple_stmt_iterator *def_gsi, tree def) | |
554 { | |
555 gimple *use_stmt; | |
556 imm_use_iterator use_iter; | |
557 gimple *stmt = gsi_stmt (*def_gsi); | |
558 tree x = def; | |
559 tree orig_sqrt_ssa_name = gimple_assign_rhs2 (stmt); | |
560 tree div_rhs1 = gimple_assign_rhs1 (stmt); | |
561 | |
562 if (TREE_CODE (orig_sqrt_ssa_name) != SSA_NAME | |
563 || TREE_CODE (div_rhs1) != REAL_CST | |
564 || !real_equal (&TREE_REAL_CST (div_rhs1), &dconst1)) | |
565 return; | |
566 | |
567 gcall *sqrt_stmt | |
568 = dyn_cast <gcall *> (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name)); | |
569 | |
570 if (!sqrt_stmt || !gimple_call_lhs (sqrt_stmt)) | |
571 return; | |
572 | |
573 switch (gimple_call_combined_fn (sqrt_stmt)) | |
574 { | |
575 CASE_CFN_SQRT: | |
576 CASE_CFN_SQRT_FN: | |
577 break; | |
578 | |
579 default: | |
580 return; | |
581 } | |
582 tree a = gimple_call_arg (sqrt_stmt, 0); | |
583 | |
584 /* We have 'a' and 'x'. Now analyze the uses of 'x'. */ | |
585 | |
586 /* Statements that use x in x * x. */ | |
587 auto_vec<gimple *> sqr_stmts; | |
588 /* Statements that use x in a * x. */ | |
589 auto_vec<gimple *> mult_stmts; | |
590 bool has_other_use = false; | |
591 bool mult_on_main_path = false; | |
592 | |
593 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, x) | |
594 { | |
595 if (is_gimple_debug (use_stmt)) | |
596 continue; | |
597 if (is_square_of (use_stmt, x)) | |
598 { | |
599 sqr_stmts.safe_push (use_stmt); | |
600 if (gimple_bb (use_stmt) == gimple_bb (stmt)) | |
601 mult_on_main_path = true; | |
602 } | |
603 else if (is_mult_by (use_stmt, x, a)) | |
604 { | |
605 mult_stmts.safe_push (use_stmt); | |
606 if (gimple_bb (use_stmt) == gimple_bb (stmt)) | |
607 mult_on_main_path = true; | |
608 } | |
609 else | |
610 has_other_use = true; | |
611 } | |
612 | |
613 /* In the x * x and a * x cases we just rewire stmt operands or | |
614 remove multiplications. In the has_other_use case we introduce | |
615 a multiplication so make sure we don't introduce a multiplication | |
616 on a path where there was none. */ | |
617 if (has_other_use && !mult_on_main_path) | |
618 return; | |
619 | |
620 if (sqr_stmts.is_empty () && mult_stmts.is_empty ()) | |
621 return; | |
622 | |
623 /* If x = 1.0 / sqrt (a) has uses other than those optimized here we want | |
624 to be able to compose it from the sqr and mult cases. */ | |
625 if (has_other_use && (sqr_stmts.is_empty () || mult_stmts.is_empty ())) | |
626 return; | |
627 | |
628 if (dump_file) | |
629 { | |
630 fprintf (dump_file, "Optimizing reciprocal sqrt multiplications of\n"); | |
631 print_gimple_stmt (dump_file, sqrt_stmt, 0, TDF_NONE); | |
632 print_gimple_stmt (dump_file, stmt, 0, TDF_NONE); | |
633 fprintf (dump_file, "\n"); | |
634 } | |
635 | |
636 bool delete_div = !has_other_use; | |
637 tree sqr_ssa_name = NULL_TREE; | |
638 if (!sqr_stmts.is_empty ()) | |
639 { | |
640 /* r1 = x * x. Transform the original | |
641 x = 1.0 / t | |
642 into | |
643 tmp1 = 1.0 / a | |
644 r1 = tmp1. */ | |
645 | |
646 sqr_ssa_name | |
647 = make_temp_ssa_name (TREE_TYPE (a), NULL, "recip_sqrt_sqr"); | |
648 | |
649 if (dump_file) | |
650 { | |
651 fprintf (dump_file, "Replacing original division\n"); | |
652 print_gimple_stmt (dump_file, stmt, 0, TDF_NONE); | |
653 fprintf (dump_file, "with new division\n"); | |
654 } | |
655 gimple_assign_set_lhs (stmt, sqr_ssa_name); | |
656 gimple_assign_set_rhs2 (stmt, a); | |
657 fold_stmt_inplace (def_gsi); | |
658 update_stmt (stmt); | |
659 | |
660 if (dump_file) | |
661 print_gimple_stmt (dump_file, stmt, 0, TDF_NONE); | |
662 | |
663 delete_div = false; | |
664 gimple *sqr_stmt; | |
665 unsigned int i; | |
666 FOR_EACH_VEC_ELT (sqr_stmts, i, sqr_stmt) | |
667 { | |
668 gimple_stmt_iterator gsi2 = gsi_for_stmt (sqr_stmt); | |
669 gimple_assign_set_rhs_from_tree (&gsi2, sqr_ssa_name); | |
670 update_stmt (sqr_stmt); | |
671 } | |
672 } | |
673 if (!mult_stmts.is_empty ()) | |
674 { | |
675 /* r2 = a * x. Transform this into: | |
676 r2 = t (The original sqrt (a)). */ | |
677 unsigned int i; | |
678 gimple *mult_stmt = NULL; | |
679 FOR_EACH_VEC_ELT (mult_stmts, i, mult_stmt) | |
680 { | |
681 gimple_stmt_iterator gsi2 = gsi_for_stmt (mult_stmt); | |
682 | |
683 if (dump_file) | |
684 { | |
685 fprintf (dump_file, "Replacing squaring multiplication\n"); | |
686 print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE); | |
687 fprintf (dump_file, "with assignment\n"); | |
688 } | |
689 gimple_assign_set_rhs_from_tree (&gsi2, orig_sqrt_ssa_name); | |
690 fold_stmt_inplace (&gsi2); | |
691 update_stmt (mult_stmt); | |
692 if (dump_file) | |
693 print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE); | |
694 } | |
695 } | |
696 | |
697 if (has_other_use) | |
698 { | |
699 /* Using the two temporaries tmp1, tmp2 from above | |
700 the original x is now: | |
701 x = tmp1 * tmp2. */ | |
702 gcc_assert (orig_sqrt_ssa_name); | |
703 gcc_assert (sqr_ssa_name); | |
704 | |
705 gimple *new_stmt | |
706 = gimple_build_assign (x, MULT_EXPR, | |
707 orig_sqrt_ssa_name, sqr_ssa_name); | |
708 gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT); | |
709 update_stmt (stmt); | |
710 } | |
711 else if (delete_div) | |
712 { | |
713 /* Remove the original division. */ | |
714 gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt); | |
715 gsi_remove (&gsi2, true); | |
716 release_defs (stmt); | |
717 } | |
718 } | |
453 | 719 |
454 /* Look for floating-point divisions among DEF's uses, and try to | 720 /* Look for floating-point divisions among DEF's uses, and try to |
455 replace them by multiplications with the reciprocal. Add | 721 replace them by multiplications with the reciprocal. Add |
456 as many statements computing the reciprocal as needed. | 722 as many statements computing the reciprocal as needed. |
457 | 723 |
458 DEF must be a GIMPLE register of a floating-point type. */ | 724 DEF must be a GIMPLE register of a floating-point type. */ |
459 | 725 |
460 static void | 726 static void |
461 execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def) | 727 execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def) |
462 { | 728 { |
463 use_operand_p use_p; | 729 use_operand_p use_p, square_use_p; |
464 imm_use_iterator use_iter; | 730 imm_use_iterator use_iter, square_use_iter; |
731 tree square_def; | |
465 struct occurrence *occ; | 732 struct occurrence *occ; |
466 int count = 0, threshold; | 733 int count = 0; |
467 | 734 int threshold; |
468 gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && is_gimple_reg (def)); | 735 int square_recip_count = 0; |
736 int sqrt_recip_count = 0; | |
737 | |
738 gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && TREE_CODE (def) == SSA_NAME); | |
739 threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def))); | |
740 | |
741 /* If DEF is a square (x * x), count the number of divisions by x. | |
742 If there are more divisions by x than by (DEF * DEF), prefer to optimize | |
743 the reciprocal of x instead of DEF. This improves cases like: | |
744 def = x * x | |
745 t0 = a / def | |
746 t1 = b / def | |
747 t2 = c / x | |
748 Reciprocal optimization of x results in 1 division rather than 2 or 3. */ | |
749 gimple *def_stmt = SSA_NAME_DEF_STMT (def); | |
750 | |
751 if (is_gimple_assign (def_stmt) | |
752 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR | |
753 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME | |
754 && gimple_assign_rhs1 (def_stmt) == gimple_assign_rhs2 (def_stmt)) | |
755 { | |
756 tree op0 = gimple_assign_rhs1 (def_stmt); | |
757 | |
758 FOR_EACH_IMM_USE_FAST (use_p, use_iter, op0) | |
759 { | |
760 gimple *use_stmt = USE_STMT (use_p); | |
761 if (is_division_by (use_stmt, op0)) | |
762 sqrt_recip_count++; | |
763 } | |
764 } | |
469 | 765 |
470 FOR_EACH_IMM_USE_FAST (use_p, use_iter, def) | 766 FOR_EACH_IMM_USE_FAST (use_p, use_iter, def) |
471 { | 767 { |
472 gimple *use_stmt = USE_STMT (use_p); | 768 gimple *use_stmt = USE_STMT (use_p); |
473 if (is_division_by (use_stmt, def)) | 769 if (is_division_by (use_stmt, def)) |
474 { | 770 { |
475 register_division_in (gimple_bb (use_stmt)); | 771 register_division_in (gimple_bb (use_stmt), 2); |
476 count++; | 772 count++; |
477 } | 773 } |
478 } | 774 |
775 if (is_square_of (use_stmt, def)) | |
776 { | |
777 square_def = gimple_assign_lhs (use_stmt); | |
778 FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def) | |
779 { | |
780 gimple *square_use_stmt = USE_STMT (square_use_p); | |
781 if (is_division_by (square_use_stmt, square_def)) | |
782 { | |
783 /* This is executed twice for each division by a square. */ | |
784 register_division_in (gimple_bb (square_use_stmt), 1); | |
785 square_recip_count++; | |
786 } | |
787 } | |
788 } | |
789 } | |
790 | |
791 /* Square reciprocals were counted twice above. */ | |
792 square_recip_count /= 2; | |
793 | |
794 /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x). */ | |
795 if (sqrt_recip_count > square_recip_count) | |
796 return; | |
479 | 797 |
480 /* Do the expensive part only if we can hope to optimize something. */ | 798 /* Do the expensive part only if we can hope to optimize something. */ |
481 threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def))); | 799 if (count + square_recip_count >= threshold && count >= 1) |
482 if (count >= threshold) | |
483 { | 800 { |
484 gimple *use_stmt; | 801 gimple *use_stmt; |
485 for (occ = occ_head; occ; occ = occ->next) | 802 for (occ = occ_head; occ; occ = occ->next) |
486 { | 803 { |
487 compute_merit (occ); | 804 compute_merit (occ); |
488 insert_reciprocals (def_gsi, occ, def, NULL, threshold); | 805 insert_reciprocals (def_gsi, occ, def, NULL, NULL, |
806 square_recip_count, threshold); | |
489 } | 807 } |
490 | 808 |
491 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def) | 809 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def) |
492 { | 810 { |
493 if (is_division_by (use_stmt, def)) | 811 if (is_division_by (use_stmt, def)) |
494 { | 812 { |
495 FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter) | 813 FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter) |
496 replace_reciprocal (use_p); | 814 replace_reciprocal (use_p); |
497 } | 815 } |
816 else if (square_recip_count > 0 && is_square_of (use_stmt, def)) | |
817 { | |
818 FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter) | |
819 { | |
820 /* Find all uses of the square that are divisions and | |
821 * replace them by multiplications with the inverse. */ | |
822 imm_use_iterator square_iterator; | |
823 gimple *powmult_use_stmt = USE_STMT (use_p); | |
824 tree powmult_def_name = gimple_assign_lhs (powmult_use_stmt); | |
825 | |
826 FOR_EACH_IMM_USE_STMT (powmult_use_stmt, | |
827 square_iterator, powmult_def_name) | |
828 FOR_EACH_IMM_USE_ON_STMT (square_use_p, square_iterator) | |
829 { | |
830 gimple *powmult_use_stmt = USE_STMT (square_use_p); | |
831 if (is_division_by (powmult_use_stmt, powmult_def_name)) | |
832 replace_reciprocal_squares (square_use_p); | |
833 } | |
834 } | |
835 } | |
498 } | 836 } |
499 } | 837 } |
500 | 838 |
501 for (occ = occ_head; occ; ) | 839 for (occ = occ_head; occ; ) |
502 occ = free_bb (occ); | 840 occ = free_bb (occ); |
513 internal_fn ifn; | 851 internal_fn ifn; |
514 | 852 |
515 switch (gimple_call_combined_fn (call)) | 853 switch (gimple_call_combined_fn (call)) |
516 { | 854 { |
517 CASE_CFN_SQRT: | 855 CASE_CFN_SQRT: |
856 CASE_CFN_SQRT_FN: | |
518 ifn = IFN_RSQRT; | 857 ifn = IFN_RSQRT; |
519 break; | 858 break; |
520 | 859 |
521 default: | 860 default: |
522 return IFN_LAST; | 861 return IFN_LAST; |
536 const pass_data pass_data_cse_reciprocals = | 875 const pass_data pass_data_cse_reciprocals = |
537 { | 876 { |
538 GIMPLE_PASS, /* type */ | 877 GIMPLE_PASS, /* type */ |
539 "recip", /* name */ | 878 "recip", /* name */ |
540 OPTGROUP_NONE, /* optinfo_flags */ | 879 OPTGROUP_NONE, /* optinfo_flags */ |
541 TV_NONE, /* tv_id */ | 880 TV_TREE_RECIP, /* tv_id */ |
542 PROP_ssa, /* properties_required */ | 881 PROP_ssa, /* properties_required */ |
543 0, /* properties_provided */ | 882 0, /* properties_provided */ |
544 0, /* properties_destroyed */ | 883 0, /* properties_destroyed */ |
545 0, /* todo_flags_start */ | 884 0, /* todo_flags_start */ |
546 TODO_update_ssa, /* todo_flags_finish */ | 885 TODO_update_ssa, /* todo_flags_finish */ |
605 | 944 |
606 if (gimple_has_lhs (stmt) | 945 if (gimple_has_lhs (stmt) |
607 && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL | 946 && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL |
608 && FLOAT_TYPE_P (TREE_TYPE (def)) | 947 && FLOAT_TYPE_P (TREE_TYPE (def)) |
609 && TREE_CODE (def) == SSA_NAME) | 948 && TREE_CODE (def) == SSA_NAME) |
610 execute_cse_reciprocals_1 (&gsi, def); | 949 { |
950 execute_cse_reciprocals_1 (&gsi, def); | |
951 stmt = gsi_stmt (gsi); | |
952 if (flag_unsafe_math_optimizations | |
953 && is_gimple_assign (stmt) | |
954 && !stmt_can_throw_internal (cfun, stmt) | |
955 && gimple_assign_rhs_code (stmt) == RDIV_EXPR) | |
956 optimize_recip_sqrt (&gsi, def); | |
957 } | |
611 } | 958 } |
612 | 959 |
613 if (optimize_bb_for_size_p (bb)) | 960 if (optimize_bb_for_size_p (bb)) |
614 continue; | 961 continue; |
615 | 962 |
642 internal_fn ifn = internal_fn_reciprocal (call); | 989 internal_fn ifn = internal_fn_reciprocal (call); |
643 if (ifn == IFN_LAST) | 990 if (ifn == IFN_LAST) |
644 { | 991 { |
645 fndecl = gimple_call_fndecl (call); | 992 fndecl = gimple_call_fndecl (call); |
646 if (!fndecl | 993 if (!fndecl |
647 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_MD) | 994 || !fndecl_built_in_p (fndecl, BUILT_IN_MD)) |
648 continue; | 995 continue; |
649 fndecl = targetm.builtin_reciprocal (fndecl); | 996 fndecl = targetm.builtin_reciprocal (fndecl); |
650 if (!fndecl) | 997 if (!fndecl) |
651 continue; | 998 continue; |
652 } | 999 } |
1750 const pass_data pass_data_cse_sincos = | 2097 const pass_data pass_data_cse_sincos = |
1751 { | 2098 { |
1752 GIMPLE_PASS, /* type */ | 2099 GIMPLE_PASS, /* type */ |
1753 "sincos", /* name */ | 2100 "sincos", /* name */ |
1754 OPTGROUP_NONE, /* optinfo_flags */ | 2101 OPTGROUP_NONE, /* optinfo_flags */ |
1755 TV_NONE, /* tv_id */ | 2102 TV_TREE_SINCOS, /* tv_id */ |
1756 PROP_ssa, /* properties_required */ | 2103 PROP_ssa, /* properties_required */ |
1757 PROP_gimple_opt_math, /* properties_provided */ | 2104 PROP_gimple_opt_math, /* properties_provided */ |
1758 0, /* properties_destroyed */ | 2105 0, /* properties_destroyed */ |
1759 0, /* todo_flags_start */ | 2106 0, /* todo_flags_start */ |
1760 TODO_update_ssa, /* todo_flags_finish */ | 2107 TODO_update_ssa, /* todo_flags_finish */ |
1931 make_pass_cse_sincos (gcc::context *ctxt) | 2278 make_pass_cse_sincos (gcc::context *ctxt) |
1932 { | 2279 { |
1933 return new pass_cse_sincos (ctxt); | 2280 return new pass_cse_sincos (ctxt); |
1934 } | 2281 } |
1935 | 2282 |
1936 /* A symbolic number structure is used to detect byte permutation and selection | |
1937 patterns of a source. To achieve that, its field N contains an artificial | |
1938 number consisting of BITS_PER_MARKER sized markers tracking where does each | |
1939 byte come from in the source: | |
1940 | |
1941 0 - target byte has the value 0 | |
1942 FF - target byte has an unknown value (eg. due to sign extension) | |
1943 1..size - marker value is the byte index in the source (0 for lsb). | |
1944 | |
1945 To detect permutations on memory sources (arrays and structures), a symbolic | |
1946 number is also associated: | |
1947 - a base address BASE_ADDR and an OFFSET giving the address of the source; | |
1948 - a range which gives the difference between the highest and lowest accessed | |
1949 memory location to make such a symbolic number; | |
1950 - the address SRC of the source element of lowest address as a convenience | |
1951 to easily get BASE_ADDR + offset + lowest bytepos; | |
1952 - number of expressions N_OPS bitwise ored together to represent | |
1953 approximate cost of the computation. | |
1954 | |
1955 Note 1: the range is different from size as size reflects the size of the | |
1956 type of the current expression. For instance, for an array char a[], | |
1957 (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while | |
1958 (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this | |
1959 time a range of 1. | |
1960 | |
1961 Note 2: for non-memory sources, range holds the same value as size. | |
1962 | |
1963 Note 3: SRC points to the SSA_NAME in case of non-memory source. */ | |
1964 | |
1965 struct symbolic_number { | |
1966 uint64_t n; | |
1967 tree type; | |
1968 tree base_addr; | |
1969 tree offset; | |
1970 HOST_WIDE_INT bytepos; | |
1971 tree src; | |
1972 tree alias_set; | |
1973 tree vuse; | |
1974 unsigned HOST_WIDE_INT range; | |
1975 int n_ops; | |
1976 }; | |
1977 | |
1978 #define BITS_PER_MARKER 8 | |
1979 #define MARKER_MASK ((1 << BITS_PER_MARKER) - 1) | |
1980 #define MARKER_BYTE_UNKNOWN MARKER_MASK | |
1981 #define HEAD_MARKER(n, size) \ | |
1982 ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER))) | |
1983 | |
1984 /* The number which the find_bswap_or_nop_1 result should match in | |
1985 order to have a nop. The number is masked according to the size of | |
1986 the symbolic number before using it. */ | |
1987 #define CMPNOP (sizeof (int64_t) < 8 ? 0 : \ | |
1988 (uint64_t)0x08070605 << 32 | 0x04030201) | |
1989 | |
1990 /* The number which the find_bswap_or_nop_1 result should match in | |
1991 order to have a byte swap. The number is masked according to the | |
1992 size of the symbolic number before using it. */ | |
1993 #define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \ | |
1994 (uint64_t)0x01020304 << 32 | 0x05060708) | |
1995 | |
1996 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic | |
1997 number N. Return false if the requested operation is not permitted | |
1998 on a symbolic number. */ | |
1999 | |
2000 static inline bool | |
2001 do_shift_rotate (enum tree_code code, | |
2002 struct symbolic_number *n, | |
2003 int count) | |
2004 { | |
2005 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT; | |
2006 unsigned head_marker; | |
2007 | |
2008 if (count % BITS_PER_UNIT != 0) | |
2009 return false; | |
2010 count = (count / BITS_PER_UNIT) * BITS_PER_MARKER; | |
2011 | |
2012 /* Zero out the extra bits of N in order to avoid them being shifted | |
2013 into the significant bits. */ | |
2014 if (size < 64 / BITS_PER_MARKER) | |
2015 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1; | |
2016 | |
2017 switch (code) | |
2018 { | |
2019 case LSHIFT_EXPR: | |
2020 n->n <<= count; | |
2021 break; | |
2022 case RSHIFT_EXPR: | |
2023 head_marker = HEAD_MARKER (n->n, size); | |
2024 n->n >>= count; | |
2025 /* Arithmetic shift of signed type: result is dependent on the value. */ | |
2026 if (!TYPE_UNSIGNED (n->type) && head_marker) | |
2027 for (i = 0; i < count / BITS_PER_MARKER; i++) | |
2028 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN | |
2029 << ((size - 1 - i) * BITS_PER_MARKER); | |
2030 break; | |
2031 case LROTATE_EXPR: | |
2032 n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count)); | |
2033 break; | |
2034 case RROTATE_EXPR: | |
2035 n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count)); | |
2036 break; | |
2037 default: | |
2038 return false; | |
2039 } | |
2040 /* Zero unused bits for size. */ | |
2041 if (size < 64 / BITS_PER_MARKER) | |
2042 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1; | |
2043 return true; | |
2044 } | |
2045 | |
2046 /* Perform sanity checking for the symbolic number N and the gimple | |
2047 statement STMT. */ | |
2048 | |
2049 static inline bool | |
2050 verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt) | |
2051 { | |
2052 tree lhs_type; | |
2053 | |
2054 lhs_type = gimple_expr_type (stmt); | |
2055 | |
2056 if (TREE_CODE (lhs_type) != INTEGER_TYPE) | |
2057 return false; | |
2058 | |
2059 if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type)) | |
2060 return false; | |
2061 | |
2062 return true; | |
2063 } | |
2064 | |
2065 /* Initialize the symbolic number N for the bswap pass from the base element | |
2066 SRC manipulated by the bitwise OR expression. */ | |
2067 | |
2068 static bool | |
2069 init_symbolic_number (struct symbolic_number *n, tree src) | |
2070 { | |
2071 int size; | |
2072 | |
2073 if (! INTEGRAL_TYPE_P (TREE_TYPE (src))) | |
2074 return false; | |
2075 | |
2076 n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE; | |
2077 n->src = src; | |
2078 | |
2079 /* Set up the symbolic number N by setting each byte to a value between 1 and | |
2080 the byte size of rhs1. The highest order byte is set to n->size and the | |
2081 lowest order byte to 1. */ | |
2082 n->type = TREE_TYPE (src); | |
2083 size = TYPE_PRECISION (n->type); | |
2084 if (size % BITS_PER_UNIT != 0) | |
2085 return false; | |
2086 size /= BITS_PER_UNIT; | |
2087 if (size > 64 / BITS_PER_MARKER) | |
2088 return false; | |
2089 n->range = size; | |
2090 n->n = CMPNOP; | |
2091 n->n_ops = 1; | |
2092 | |
2093 if (size < 64 / BITS_PER_MARKER) | |
2094 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1; | |
2095 | |
2096 return true; | |
2097 } | |
2098 | |
2099 /* Check if STMT might be a byte swap or a nop from a memory source and returns | |
2100 the answer. If so, REF is that memory source and the base of the memory area | |
2101 accessed and the offset of the access from that base are recorded in N. */ | |
2102 | |
2103 bool | |
2104 find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n) | |
2105 { | |
2106 /* Leaf node is an array or component ref. Memorize its base and | |
2107 offset from base to compare to other such leaf node. */ | |
2108 HOST_WIDE_INT bitsize, bitpos; | |
2109 machine_mode mode; | |
2110 int unsignedp, reversep, volatilep; | |
2111 tree offset, base_addr; | |
2112 | |
2113 /* Not prepared to handle PDP endian. */ | |
2114 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN) | |
2115 return false; | |
2116 | |
2117 if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt)) | |
2118 return false; | |
2119 | |
2120 base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode, | |
2121 &unsignedp, &reversep, &volatilep); | |
2122 | |
2123 if (TREE_CODE (base_addr) == MEM_REF) | |
2124 { | |
2125 offset_int bit_offset = 0; | |
2126 tree off = TREE_OPERAND (base_addr, 1); | |
2127 | |
2128 if (!integer_zerop (off)) | |
2129 { | |
2130 offset_int boff, coff = mem_ref_offset (base_addr); | |
2131 boff = coff << LOG2_BITS_PER_UNIT; | |
2132 bit_offset += boff; | |
2133 } | |
2134 | |
2135 base_addr = TREE_OPERAND (base_addr, 0); | |
2136 | |
2137 /* Avoid returning a negative bitpos as this may wreak havoc later. */ | |
2138 if (wi::neg_p (bit_offset)) | |
2139 { | |
2140 offset_int mask = wi::mask <offset_int> (LOG2_BITS_PER_UNIT, false); | |
2141 offset_int tem = wi::bit_and_not (bit_offset, mask); | |
2142 /* TEM is the bitpos rounded to BITS_PER_UNIT towards -Inf. | |
2143 Subtract it to BIT_OFFSET and add it (scaled) to OFFSET. */ | |
2144 bit_offset -= tem; | |
2145 tem >>= LOG2_BITS_PER_UNIT; | |
2146 if (offset) | |
2147 offset = size_binop (PLUS_EXPR, offset, | |
2148 wide_int_to_tree (sizetype, tem)); | |
2149 else | |
2150 offset = wide_int_to_tree (sizetype, tem); | |
2151 } | |
2152 | |
2153 bitpos += bit_offset.to_shwi (); | |
2154 } | |
2155 | |
2156 if (bitpos % BITS_PER_UNIT) | |
2157 return false; | |
2158 if (bitsize % BITS_PER_UNIT) | |
2159 return false; | |
2160 if (reversep) | |
2161 return false; | |
2162 | |
2163 if (!init_symbolic_number (n, ref)) | |
2164 return false; | |
2165 n->base_addr = base_addr; | |
2166 n->offset = offset; | |
2167 n->bytepos = bitpos / BITS_PER_UNIT; | |
2168 n->alias_set = reference_alias_ptr_type (ref); | |
2169 n->vuse = gimple_vuse (stmt); | |
2170 return true; | |
2171 } | |
2172 | |
2173 /* Compute the symbolic number N representing the result of a bitwise OR on 2 | |
2174 symbolic number N1 and N2 whose source statements are respectively | |
2175 SOURCE_STMT1 and SOURCE_STMT2. */ | |
2176 | |
2177 static gimple * | |
2178 perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1, | |
2179 gimple *source_stmt2, struct symbolic_number *n2, | |
2180 struct symbolic_number *n) | |
2181 { | |
2182 int i, size; | |
2183 uint64_t mask; | |
2184 gimple *source_stmt; | |
2185 struct symbolic_number *n_start; | |
2186 | |
2187 tree rhs1 = gimple_assign_rhs1 (source_stmt1); | |
2188 if (TREE_CODE (rhs1) == BIT_FIELD_REF | |
2189 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME) | |
2190 rhs1 = TREE_OPERAND (rhs1, 0); | |
2191 tree rhs2 = gimple_assign_rhs1 (source_stmt2); | |
2192 if (TREE_CODE (rhs2) == BIT_FIELD_REF | |
2193 && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME) | |
2194 rhs2 = TREE_OPERAND (rhs2, 0); | |
2195 | |
2196 /* Sources are different, cancel bswap if they are not memory location with | |
2197 the same base (array, structure, ...). */ | |
2198 if (rhs1 != rhs2) | |
2199 { | |
2200 uint64_t inc; | |
2201 HOST_WIDE_INT start_sub, end_sub, end1, end2, end; | |
2202 struct symbolic_number *toinc_n_ptr, *n_end; | |
2203 basic_block bb1, bb2; | |
2204 | |
2205 if (!n1->base_addr || !n2->base_addr | |
2206 || !operand_equal_p (n1->base_addr, n2->base_addr, 0)) | |
2207 return NULL; | |
2208 | |
2209 if (!n1->offset != !n2->offset | |
2210 || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0))) | |
2211 return NULL; | |
2212 | |
2213 if (n1->bytepos < n2->bytepos) | |
2214 { | |
2215 n_start = n1; | |
2216 start_sub = n2->bytepos - n1->bytepos; | |
2217 } | |
2218 else | |
2219 { | |
2220 n_start = n2; | |
2221 start_sub = n1->bytepos - n2->bytepos; | |
2222 } | |
2223 | |
2224 bb1 = gimple_bb (source_stmt1); | |
2225 bb2 = gimple_bb (source_stmt2); | |
2226 if (dominated_by_p (CDI_DOMINATORS, bb1, bb2)) | |
2227 source_stmt = source_stmt1; | |
2228 else | |
2229 source_stmt = source_stmt2; | |
2230 | |
2231 /* Find the highest address at which a load is performed and | |
2232 compute related info. */ | |
2233 end1 = n1->bytepos + (n1->range - 1); | |
2234 end2 = n2->bytepos + (n2->range - 1); | |
2235 if (end1 < end2) | |
2236 { | |
2237 end = end2; | |
2238 end_sub = end2 - end1; | |
2239 } | |
2240 else | |
2241 { | |
2242 end = end1; | |
2243 end_sub = end1 - end2; | |
2244 } | |
2245 n_end = (end2 > end1) ? n2 : n1; | |
2246 | |
2247 /* Find symbolic number whose lsb is the most significant. */ | |
2248 if (BYTES_BIG_ENDIAN) | |
2249 toinc_n_ptr = (n_end == n1) ? n2 : n1; | |
2250 else | |
2251 toinc_n_ptr = (n_start == n1) ? n2 : n1; | |
2252 | |
2253 n->range = end - n_start->bytepos + 1; | |
2254 | |
2255 /* Check that the range of memory covered can be represented by | |
2256 a symbolic number. */ | |
2257 if (n->range > 64 / BITS_PER_MARKER) | |
2258 return NULL; | |
2259 | |
2260 /* Reinterpret byte marks in symbolic number holding the value of | |
2261 bigger weight according to target endianness. */ | |
2262 inc = BYTES_BIG_ENDIAN ? end_sub : start_sub; | |
2263 size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT; | |
2264 for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER) | |
2265 { | |
2266 unsigned marker | |
2267 = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK; | |
2268 if (marker && marker != MARKER_BYTE_UNKNOWN) | |
2269 toinc_n_ptr->n += inc; | |
2270 } | |
2271 } | |
2272 else | |
2273 { | |
2274 n->range = n1->range; | |
2275 n_start = n1; | |
2276 source_stmt = source_stmt1; | |
2277 } | |
2278 | |
2279 if (!n1->alias_set | |
2280 || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set)) | |
2281 n->alias_set = n1->alias_set; | |
2282 else | |
2283 n->alias_set = ptr_type_node; | |
2284 n->vuse = n_start->vuse; | |
2285 n->base_addr = n_start->base_addr; | |
2286 n->offset = n_start->offset; | |
2287 n->src = n_start->src; | |
2288 n->bytepos = n_start->bytepos; | |
2289 n->type = n_start->type; | |
2290 size = TYPE_PRECISION (n->type) / BITS_PER_UNIT; | |
2291 | |
2292 for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER) | |
2293 { | |
2294 uint64_t masked1, masked2; | |
2295 | |
2296 masked1 = n1->n & mask; | |
2297 masked2 = n2->n & mask; | |
2298 if (masked1 && masked2 && masked1 != masked2) | |
2299 return NULL; | |
2300 } | |
2301 n->n = n1->n | n2->n; | |
2302 n->n_ops = n1->n_ops + n2->n_ops; | |
2303 | |
2304 return source_stmt; | |
2305 } | |
2306 | |
2307 /* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform | |
2308 the operation given by the rhs of STMT on the result. If the operation | |
2309 could successfully be executed the function returns a gimple stmt whose | |
2310 rhs's first tree is the expression of the source operand and NULL | |
2311 otherwise. */ | |
2312 | |
2313 static gimple * | |
2314 find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit) | |
2315 { | |
2316 enum tree_code code; | |
2317 tree rhs1, rhs2 = NULL; | |
2318 gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1; | |
2319 enum gimple_rhs_class rhs_class; | |
2320 | |
2321 if (!limit || !is_gimple_assign (stmt)) | |
2322 return NULL; | |
2323 | |
2324 rhs1 = gimple_assign_rhs1 (stmt); | |
2325 | |
2326 if (find_bswap_or_nop_load (stmt, rhs1, n)) | |
2327 return stmt; | |
2328 | |
2329 /* Handle BIT_FIELD_REF. */ | |
2330 if (TREE_CODE (rhs1) == BIT_FIELD_REF | |
2331 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME) | |
2332 { | |
2333 unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1)); | |
2334 unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2)); | |
2335 if (bitpos % BITS_PER_UNIT == 0 | |
2336 && bitsize % BITS_PER_UNIT == 0 | |
2337 && init_symbolic_number (n, TREE_OPERAND (rhs1, 0))) | |
2338 { | |
2339 /* Handle big-endian bit numbering in BIT_FIELD_REF. */ | |
2340 if (BYTES_BIG_ENDIAN) | |
2341 bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize; | |
2342 | |
2343 /* Shift. */ | |
2344 if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos)) | |
2345 return NULL; | |
2346 | |
2347 /* Mask. */ | |
2348 uint64_t mask = 0; | |
2349 uint64_t tmp = (1 << BITS_PER_UNIT) - 1; | |
2350 for (unsigned i = 0; i < bitsize / BITS_PER_UNIT; | |
2351 i++, tmp <<= BITS_PER_UNIT) | |
2352 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER); | |
2353 n->n &= mask; | |
2354 | |
2355 /* Convert. */ | |
2356 n->type = TREE_TYPE (rhs1); | |
2357 if (!n->base_addr) | |
2358 n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT; | |
2359 | |
2360 return verify_symbolic_number_p (n, stmt) ? stmt : NULL; | |
2361 } | |
2362 | |
2363 return NULL; | |
2364 } | |
2365 | |
2366 if (TREE_CODE (rhs1) != SSA_NAME) | |
2367 return NULL; | |
2368 | |
2369 code = gimple_assign_rhs_code (stmt); | |
2370 rhs_class = gimple_assign_rhs_class (stmt); | |
2371 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1); | |
2372 | |
2373 if (rhs_class == GIMPLE_BINARY_RHS) | |
2374 rhs2 = gimple_assign_rhs2 (stmt); | |
2375 | |
2376 /* Handle unary rhs and binary rhs with integer constants as second | |
2377 operand. */ | |
2378 | |
2379 if (rhs_class == GIMPLE_UNARY_RHS | |
2380 || (rhs_class == GIMPLE_BINARY_RHS | |
2381 && TREE_CODE (rhs2) == INTEGER_CST)) | |
2382 { | |
2383 if (code != BIT_AND_EXPR | |
2384 && code != LSHIFT_EXPR | |
2385 && code != RSHIFT_EXPR | |
2386 && code != LROTATE_EXPR | |
2387 && code != RROTATE_EXPR | |
2388 && !CONVERT_EXPR_CODE_P (code)) | |
2389 return NULL; | |
2390 | |
2391 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1); | |
2392 | |
2393 /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and | |
2394 we have to initialize the symbolic number. */ | |
2395 if (!source_stmt1) | |
2396 { | |
2397 if (gimple_assign_load_p (stmt) | |
2398 || !init_symbolic_number (n, rhs1)) | |
2399 return NULL; | |
2400 source_stmt1 = stmt; | |
2401 } | |
2402 | |
2403 switch (code) | |
2404 { | |
2405 case BIT_AND_EXPR: | |
2406 { | |
2407 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT; | |
2408 uint64_t val = int_cst_value (rhs2), mask = 0; | |
2409 uint64_t tmp = (1 << BITS_PER_UNIT) - 1; | |
2410 | |
2411 /* Only constants masking full bytes are allowed. */ | |
2412 for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT) | |
2413 if ((val & tmp) != 0 && (val & tmp) != tmp) | |
2414 return NULL; | |
2415 else if (val & tmp) | |
2416 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER); | |
2417 | |
2418 n->n &= mask; | |
2419 } | |
2420 break; | |
2421 case LSHIFT_EXPR: | |
2422 case RSHIFT_EXPR: | |
2423 case LROTATE_EXPR: | |
2424 case RROTATE_EXPR: | |
2425 if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2))) | |
2426 return NULL; | |
2427 break; | |
2428 CASE_CONVERT: | |
2429 { | |
2430 int i, type_size, old_type_size; | |
2431 tree type; | |
2432 | |
2433 type = gimple_expr_type (stmt); | |
2434 type_size = TYPE_PRECISION (type); | |
2435 if (type_size % BITS_PER_UNIT != 0) | |
2436 return NULL; | |
2437 type_size /= BITS_PER_UNIT; | |
2438 if (type_size > 64 / BITS_PER_MARKER) | |
2439 return NULL; | |
2440 | |
2441 /* Sign extension: result is dependent on the value. */ | |
2442 old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT; | |
2443 if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size | |
2444 && HEAD_MARKER (n->n, old_type_size)) | |
2445 for (i = 0; i < type_size - old_type_size; i++) | |
2446 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN | |
2447 << ((type_size - 1 - i) * BITS_PER_MARKER); | |
2448 | |
2449 if (type_size < 64 / BITS_PER_MARKER) | |
2450 { | |
2451 /* If STMT casts to a smaller type mask out the bits not | |
2452 belonging to the target type. */ | |
2453 n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1; | |
2454 } | |
2455 n->type = type; | |
2456 if (!n->base_addr) | |
2457 n->range = type_size; | |
2458 } | |
2459 break; | |
2460 default: | |
2461 return NULL; | |
2462 }; | |
2463 return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL; | |
2464 } | |
2465 | |
2466 /* Handle binary rhs. */ | |
2467 | |
2468 if (rhs_class == GIMPLE_BINARY_RHS) | |
2469 { | |
2470 struct symbolic_number n1, n2; | |
2471 gimple *source_stmt, *source_stmt2; | |
2472 | |
2473 if (code != BIT_IOR_EXPR) | |
2474 return NULL; | |
2475 | |
2476 if (TREE_CODE (rhs2) != SSA_NAME) | |
2477 return NULL; | |
2478 | |
2479 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2); | |
2480 | |
2481 switch (code) | |
2482 { | |
2483 case BIT_IOR_EXPR: | |
2484 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1); | |
2485 | |
2486 if (!source_stmt1) | |
2487 return NULL; | |
2488 | |
2489 source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1); | |
2490 | |
2491 if (!source_stmt2) | |
2492 return NULL; | |
2493 | |
2494 if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type)) | |
2495 return NULL; | |
2496 | |
2497 if (!n1.vuse != !n2.vuse | |
2498 || (n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0))) | |
2499 return NULL; | |
2500 | |
2501 source_stmt | |
2502 = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n); | |
2503 | |
2504 if (!source_stmt) | |
2505 return NULL; | |
2506 | |
2507 if (!verify_symbolic_number_p (n, stmt)) | |
2508 return NULL; | |
2509 | |
2510 break; | |
2511 default: | |
2512 return NULL; | |
2513 } | |
2514 return source_stmt; | |
2515 } | |
2516 return NULL; | |
2517 } | |
2518 | |
2519 /* Check if STMT completes a bswap implementation or a read in a given | |
2520 endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP | |
2521 accordingly. It also sets N to represent the kind of operations | |
2522 performed: size of the resulting expression and whether it works on | |
2523 a memory source, and if so alias-set and vuse. At last, the | |
2524 function returns a stmt whose rhs's first tree is the source | |
2525 expression. */ | |
2526 | |
2527 static gimple * | |
2528 find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap) | |
2529 { | |
2530 unsigned rsize; | |
2531 uint64_t tmpn, mask; | |
2532 /* The number which the find_bswap_or_nop_1 result should match in order | |
2533 to have a full byte swap. The number is shifted to the right | |
2534 according to the size of the symbolic number before using it. */ | |
2535 uint64_t cmpxchg = CMPXCHG; | |
2536 uint64_t cmpnop = CMPNOP; | |
2537 | |
2538 gimple *ins_stmt; | |
2539 int limit; | |
2540 | |
2541 /* The last parameter determines the depth search limit. It usually | |
2542 correlates directly to the number n of bytes to be touched. We | |
2543 increase that number by log2(n) + 1 here in order to also | |
2544 cover signed -> unsigned conversions of the src operand as can be seen | |
2545 in libgcc, and for initial shift/and operation of the src operand. */ | |
2546 limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt))); | |
2547 limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit); | |
2548 ins_stmt = find_bswap_or_nop_1 (stmt, n, limit); | |
2549 | |
2550 if (!ins_stmt) | |
2551 return NULL; | |
2552 | |
2553 /* Find real size of result (highest non-zero byte). */ | |
2554 if (n->base_addr) | |
2555 for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++); | |
2556 else | |
2557 rsize = n->range; | |
2558 | |
2559 /* Zero out the bits corresponding to untouched bytes in original gimple | |
2560 expression. */ | |
2561 if (n->range < (int) sizeof (int64_t)) | |
2562 { | |
2563 mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1; | |
2564 cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER; | |
2565 cmpnop &= mask; | |
2566 } | |
2567 | |
2568 /* Zero out the bits corresponding to unused bytes in the result of the | |
2569 gimple expression. */ | |
2570 if (rsize < n->range) | |
2571 { | |
2572 if (BYTES_BIG_ENDIAN) | |
2573 { | |
2574 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1; | |
2575 cmpxchg &= mask; | |
2576 cmpnop >>= (n->range - rsize) * BITS_PER_MARKER; | |
2577 } | |
2578 else | |
2579 { | |
2580 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1; | |
2581 cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER; | |
2582 cmpnop &= mask; | |
2583 } | |
2584 n->range = rsize; | |
2585 } | |
2586 | |
2587 /* A complete byte swap should make the symbolic number to start with | |
2588 the largest digit in the highest order byte. Unchanged symbolic | |
2589 number indicates a read with same endianness as target architecture. */ | |
2590 if (n->n == cmpnop) | |
2591 *bswap = false; | |
2592 else if (n->n == cmpxchg) | |
2593 *bswap = true; | |
2594 else | |
2595 return NULL; | |
2596 | |
2597 /* Useless bit manipulation performed by code. */ | |
2598 if (!n->base_addr && n->n == cmpnop && n->n_ops == 1) | |
2599 return NULL; | |
2600 | |
2601 n->range *= BITS_PER_UNIT; | |
2602 return ins_stmt; | |
2603 } | |
2604 | |
2605 namespace { | |
2606 | |
2607 const pass_data pass_data_optimize_bswap = | |
2608 { | |
2609 GIMPLE_PASS, /* type */ | |
2610 "bswap", /* name */ | |
2611 OPTGROUP_NONE, /* optinfo_flags */ | |
2612 TV_NONE, /* tv_id */ | |
2613 PROP_ssa, /* properties_required */ | |
2614 0, /* properties_provided */ | |
2615 0, /* properties_destroyed */ | |
2616 0, /* todo_flags_start */ | |
2617 0, /* todo_flags_finish */ | |
2618 }; | |
2619 | |
2620 class pass_optimize_bswap : public gimple_opt_pass | |
2621 { | |
2622 public: | |
2623 pass_optimize_bswap (gcc::context *ctxt) | |
2624 : gimple_opt_pass (pass_data_optimize_bswap, ctxt) | |
2625 {} | |
2626 | |
2627 /* opt_pass methods: */ | |
2628 virtual bool gate (function *) | |
2629 { | |
2630 return flag_expensive_optimizations && optimize; | |
2631 } | |
2632 | |
2633 virtual unsigned int execute (function *); | |
2634 | |
2635 }; // class pass_optimize_bswap | |
2636 | |
2637 /* Perform the bswap optimization: replace the expression computed in the rhs | |
2638 of CUR_STMT by an equivalent bswap, load or load + bswap expression. | |
2639 Which of these alternatives replace the rhs is given by N->base_addr (non | |
2640 null if a load is needed) and BSWAP. The type, VUSE and set-alias of the | |
2641 load to perform are also given in N while the builtin bswap invoke is given | |
2642 in FNDEL. Finally, if a load is involved, SRC_STMT refers to one of the | |
2643 load statements involved to construct the rhs in CUR_STMT and N->range gives | |
2644 the size of the rhs expression for maintaining some statistics. | |
2645 | |
2646 Note that if the replacement involve a load, CUR_STMT is moved just after | |
2647 SRC_STMT to do the load with the same VUSE which can lead to CUR_STMT | |
2648 changing of basic block. */ | |
2649 | |
2650 static bool | |
2651 bswap_replace (gimple *cur_stmt, gimple *ins_stmt, tree fndecl, | |
2652 tree bswap_type, tree load_type, struct symbolic_number *n, | |
2653 bool bswap) | |
2654 { | |
2655 gimple_stmt_iterator gsi; | |
2656 tree src, tmp, tgt; | |
2657 gimple *bswap_stmt; | |
2658 | |
2659 gsi = gsi_for_stmt (cur_stmt); | |
2660 src = n->src; | |
2661 tgt = gimple_assign_lhs (cur_stmt); | |
2662 | |
2663 /* Need to load the value from memory first. */ | |
2664 if (n->base_addr) | |
2665 { | |
2666 gimple_stmt_iterator gsi_ins = gsi_for_stmt (ins_stmt); | |
2667 tree addr_expr, addr_tmp, val_expr, val_tmp; | |
2668 tree load_offset_ptr, aligned_load_type; | |
2669 gimple *addr_stmt, *load_stmt; | |
2670 unsigned align; | |
2671 HOST_WIDE_INT load_offset = 0; | |
2672 basic_block ins_bb, cur_bb; | |
2673 | |
2674 ins_bb = gimple_bb (ins_stmt); | |
2675 cur_bb = gimple_bb (cur_stmt); | |
2676 if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb)) | |
2677 return false; | |
2678 | |
2679 align = get_object_alignment (src); | |
2680 | |
2681 /* Move cur_stmt just before one of the load of the original | |
2682 to ensure it has the same VUSE. See PR61517 for what could | |
2683 go wrong. */ | |
2684 if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt)) | |
2685 reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt)); | |
2686 gsi_move_before (&gsi, &gsi_ins); | |
2687 gsi = gsi_for_stmt (cur_stmt); | |
2688 | |
2689 /* Compute address to load from and cast according to the size | |
2690 of the load. */ | |
2691 addr_expr = build_fold_addr_expr (unshare_expr (src)); | |
2692 if (is_gimple_mem_ref_addr (addr_expr)) | |
2693 addr_tmp = addr_expr; | |
2694 else | |
2695 { | |
2696 addr_tmp = make_temp_ssa_name (TREE_TYPE (addr_expr), NULL, | |
2697 "load_src"); | |
2698 addr_stmt = gimple_build_assign (addr_tmp, addr_expr); | |
2699 gsi_insert_before (&gsi, addr_stmt, GSI_SAME_STMT); | |
2700 } | |
2701 | |
2702 /* Perform the load. */ | |
2703 aligned_load_type = load_type; | |
2704 if (align < TYPE_ALIGN (load_type)) | |
2705 aligned_load_type = build_aligned_type (load_type, align); | |
2706 load_offset_ptr = build_int_cst (n->alias_set, load_offset); | |
2707 val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp, | |
2708 load_offset_ptr); | |
2709 | |
2710 if (!bswap) | |
2711 { | |
2712 if (n->range == 16) | |
2713 nop_stats.found_16bit++; | |
2714 else if (n->range == 32) | |
2715 nop_stats.found_32bit++; | |
2716 else | |
2717 { | |
2718 gcc_assert (n->range == 64); | |
2719 nop_stats.found_64bit++; | |
2720 } | |
2721 | |
2722 /* Convert the result of load if necessary. */ | |
2723 if (!useless_type_conversion_p (TREE_TYPE (tgt), load_type)) | |
2724 { | |
2725 val_tmp = make_temp_ssa_name (aligned_load_type, NULL, | |
2726 "load_dst"); | |
2727 load_stmt = gimple_build_assign (val_tmp, val_expr); | |
2728 gimple_set_vuse (load_stmt, n->vuse); | |
2729 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT); | |
2730 gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp); | |
2731 } | |
2732 else | |
2733 { | |
2734 gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr); | |
2735 gimple_set_vuse (cur_stmt, n->vuse); | |
2736 } | |
2737 update_stmt (cur_stmt); | |
2738 | |
2739 if (dump_file) | |
2740 { | |
2741 fprintf (dump_file, | |
2742 "%d bit load in target endianness found at: ", | |
2743 (int) n->range); | |
2744 print_gimple_stmt (dump_file, cur_stmt, 0); | |
2745 } | |
2746 return true; | |
2747 } | |
2748 else | |
2749 { | |
2750 val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst"); | |
2751 load_stmt = gimple_build_assign (val_tmp, val_expr); | |
2752 gimple_set_vuse (load_stmt, n->vuse); | |
2753 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT); | |
2754 } | |
2755 src = val_tmp; | |
2756 } | |
2757 else if (!bswap) | |
2758 { | |
2759 gimple *g; | |
2760 if (!useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src))) | |
2761 { | |
2762 if (!is_gimple_val (src)) | |
2763 return false; | |
2764 g = gimple_build_assign (tgt, NOP_EXPR, src); | |
2765 } | |
2766 else | |
2767 g = gimple_build_assign (tgt, src); | |
2768 if (n->range == 16) | |
2769 nop_stats.found_16bit++; | |
2770 else if (n->range == 32) | |
2771 nop_stats.found_32bit++; | |
2772 else | |
2773 { | |
2774 gcc_assert (n->range == 64); | |
2775 nop_stats.found_64bit++; | |
2776 } | |
2777 if (dump_file) | |
2778 { | |
2779 fprintf (dump_file, | |
2780 "%d bit reshuffle in target endianness found at: ", | |
2781 (int) n->range); | |
2782 print_gimple_stmt (dump_file, cur_stmt, 0); | |
2783 } | |
2784 gsi_replace (&gsi, g, true); | |
2785 return true; | |
2786 } | |
2787 else if (TREE_CODE (src) == BIT_FIELD_REF) | |
2788 src = TREE_OPERAND (src, 0); | |
2789 | |
2790 if (n->range == 16) | |
2791 bswap_stats.found_16bit++; | |
2792 else if (n->range == 32) | |
2793 bswap_stats.found_32bit++; | |
2794 else | |
2795 { | |
2796 gcc_assert (n->range == 64); | |
2797 bswap_stats.found_64bit++; | |
2798 } | |
2799 | |
2800 tmp = src; | |
2801 | |
2802 /* Convert the src expression if necessary. */ | |
2803 if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type)) | |
2804 { | |
2805 gimple *convert_stmt; | |
2806 | |
2807 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc"); | |
2808 convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src); | |
2809 gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT); | |
2810 } | |
2811 | |
2812 /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values | |
2813 are considered as rotation of 2N bit values by N bits is generally not | |
2814 equivalent to a bswap. Consider for instance 0x01020304 r>> 16 which | |
2815 gives 0x03040102 while a bswap for that value is 0x04030201. */ | |
2816 if (bswap && n->range == 16) | |
2817 { | |
2818 tree count = build_int_cst (NULL, BITS_PER_UNIT); | |
2819 src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count); | |
2820 bswap_stmt = gimple_build_assign (NULL, src); | |
2821 } | |
2822 else | |
2823 bswap_stmt = gimple_build_call (fndecl, 1, tmp); | |
2824 | |
2825 tmp = tgt; | |
2826 | |
2827 /* Convert the result if necessary. */ | |
2828 if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type)) | |
2829 { | |
2830 gimple *convert_stmt; | |
2831 | |
2832 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst"); | |
2833 convert_stmt = gimple_build_assign (tgt, NOP_EXPR, tmp); | |
2834 gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT); | |
2835 } | |
2836 | |
2837 gimple_set_lhs (bswap_stmt, tmp); | |
2838 | |
2839 if (dump_file) | |
2840 { | |
2841 fprintf (dump_file, "%d bit bswap implementation found at: ", | |
2842 (int) n->range); | |
2843 print_gimple_stmt (dump_file, cur_stmt, 0); | |
2844 } | |
2845 | |
2846 gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT); | |
2847 gsi_remove (&gsi, true); | |
2848 return true; | |
2849 } | |
2850 | |
2851 /* Find manual byte swap implementations as well as load in a given | |
2852 endianness. Byte swaps are turned into a bswap builtin invokation | |
2853 while endian loads are converted to bswap builtin invokation or | |
2854 simple load according to the target endianness. */ | |
2855 | |
2856 unsigned int | |
2857 pass_optimize_bswap::execute (function *fun) | |
2858 { | |
2859 basic_block bb; | |
2860 bool bswap32_p, bswap64_p; | |
2861 bool changed = false; | |
2862 tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE; | |
2863 | |
2864 if (BITS_PER_UNIT != 8) | |
2865 return 0; | |
2866 | |
2867 bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32) | |
2868 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing); | |
2869 bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64) | |
2870 && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing | |
2871 || (bswap32_p && word_mode == SImode))); | |
2872 | |
2873 /* Determine the argument type of the builtins. The code later on | |
2874 assumes that the return and argument type are the same. */ | |
2875 if (bswap32_p) | |
2876 { | |
2877 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32); | |
2878 bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl))); | |
2879 } | |
2880 | |
2881 if (bswap64_p) | |
2882 { | |
2883 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64); | |
2884 bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl))); | |
2885 } | |
2886 | |
2887 memset (&nop_stats, 0, sizeof (nop_stats)); | |
2888 memset (&bswap_stats, 0, sizeof (bswap_stats)); | |
2889 calculate_dominance_info (CDI_DOMINATORS); | |
2890 | |
2891 FOR_EACH_BB_FN (bb, fun) | |
2892 { | |
2893 gimple_stmt_iterator gsi; | |
2894 | |
2895 /* We do a reverse scan for bswap patterns to make sure we get the | |
2896 widest match. As bswap pattern matching doesn't handle previously | |
2897 inserted smaller bswap replacements as sub-patterns, the wider | |
2898 variant wouldn't be detected. */ | |
2899 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);) | |
2900 { | |
2901 gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi); | |
2902 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type; | |
2903 enum tree_code code; | |
2904 struct symbolic_number n; | |
2905 bool bswap; | |
2906 | |
2907 /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt | |
2908 might be moved to a different basic block by bswap_replace and gsi | |
2909 must not points to it if that's the case. Moving the gsi_prev | |
2910 there make sure that gsi points to the statement previous to | |
2911 cur_stmt while still making sure that all statements are | |
2912 considered in this basic block. */ | |
2913 gsi_prev (&gsi); | |
2914 | |
2915 if (!is_gimple_assign (cur_stmt)) | |
2916 continue; | |
2917 | |
2918 code = gimple_assign_rhs_code (cur_stmt); | |
2919 switch (code) | |
2920 { | |
2921 case LROTATE_EXPR: | |
2922 case RROTATE_EXPR: | |
2923 if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt)) | |
2924 || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt)) | |
2925 % BITS_PER_UNIT) | |
2926 continue; | |
2927 /* Fall through. */ | |
2928 case BIT_IOR_EXPR: | |
2929 break; | |
2930 default: | |
2931 continue; | |
2932 } | |
2933 | |
2934 ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap); | |
2935 | |
2936 if (!ins_stmt) | |
2937 continue; | |
2938 | |
2939 switch (n.range) | |
2940 { | |
2941 case 16: | |
2942 /* Already in canonical form, nothing to do. */ | |
2943 if (code == LROTATE_EXPR || code == RROTATE_EXPR) | |
2944 continue; | |
2945 load_type = bswap_type = uint16_type_node; | |
2946 break; | |
2947 case 32: | |
2948 load_type = uint32_type_node; | |
2949 if (bswap32_p) | |
2950 { | |
2951 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32); | |
2952 bswap_type = bswap32_type; | |
2953 } | |
2954 break; | |
2955 case 64: | |
2956 load_type = uint64_type_node; | |
2957 if (bswap64_p) | |
2958 { | |
2959 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64); | |
2960 bswap_type = bswap64_type; | |
2961 } | |
2962 break; | |
2963 default: | |
2964 continue; | |
2965 } | |
2966 | |
2967 if (bswap && !fndecl && n.range != 16) | |
2968 continue; | |
2969 | |
2970 if (bswap_replace (cur_stmt, ins_stmt, fndecl, bswap_type, load_type, | |
2971 &n, bswap)) | |
2972 changed = true; | |
2973 } | |
2974 } | |
2975 | |
2976 statistics_counter_event (fun, "16-bit nop implementations found", | |
2977 nop_stats.found_16bit); | |
2978 statistics_counter_event (fun, "32-bit nop implementations found", | |
2979 nop_stats.found_32bit); | |
2980 statistics_counter_event (fun, "64-bit nop implementations found", | |
2981 nop_stats.found_64bit); | |
2982 statistics_counter_event (fun, "16-bit bswap implementations found", | |
2983 bswap_stats.found_16bit); | |
2984 statistics_counter_event (fun, "32-bit bswap implementations found", | |
2985 bswap_stats.found_32bit); | |
2986 statistics_counter_event (fun, "64-bit bswap implementations found", | |
2987 bswap_stats.found_64bit); | |
2988 | |
2989 return (changed ? TODO_update_ssa : 0); | |
2990 } | |
2991 | |
2992 } // anon namespace | |
2993 | |
2994 gimple_opt_pass * | |
2995 make_pass_optimize_bswap (gcc::context *ctxt) | |
2996 { | |
2997 return new pass_optimize_bswap (ctxt); | |
2998 } | |
2999 | |
3000 /* Return true if stmt is a type conversion operation that can be stripped | 2283 /* Return true if stmt is a type conversion operation that can be stripped |
3001 when used in a widening multiply operation. */ | 2284 when used in a widening multiply operation. */ |
3002 static bool | 2285 static bool |
3003 widening_mult_conversion_strippable_p (tree result_type, gimple *stmt) | 2286 widening_mult_conversion_strippable_p (tree result_type, gimple *stmt) |
3004 { | 2287 { |
3109 tree *type1_out, tree *rhs1_out, | 2392 tree *type1_out, tree *rhs1_out, |
3110 tree *type2_out, tree *rhs2_out) | 2393 tree *type2_out, tree *rhs2_out) |
3111 { | 2394 { |
3112 tree type = TREE_TYPE (gimple_assign_lhs (stmt)); | 2395 tree type = TREE_TYPE (gimple_assign_lhs (stmt)); |
3113 | 2396 |
3114 if (TREE_CODE (type) != INTEGER_TYPE | 2397 if (TREE_CODE (type) == INTEGER_TYPE) |
3115 && TREE_CODE (type) != FIXED_POINT_TYPE) | 2398 { |
2399 if (TYPE_OVERFLOW_TRAPS (type)) | |
2400 return false; | |
2401 } | |
2402 else if (TREE_CODE (type) != FIXED_POINT_TYPE) | |
3116 return false; | 2403 return false; |
3117 | 2404 |
3118 if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out, | 2405 if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out, |
3119 rhs1_out)) | 2406 rhs1_out)) |
3120 return false; | 2407 return false; |
3240 static bool | 2527 static bool |
3241 convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi) | 2528 convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi) |
3242 { | 2529 { |
3243 tree lhs, rhs1, rhs2, type, type1, type2; | 2530 tree lhs, rhs1, rhs2, type, type1, type2; |
3244 enum insn_code handler; | 2531 enum insn_code handler; |
3245 machine_mode to_mode, from_mode, actual_mode; | 2532 scalar_int_mode to_mode, from_mode, actual_mode; |
3246 optab op; | 2533 optab op; |
3247 int actual_precision; | 2534 int actual_precision; |
3248 location_t loc = gimple_location (stmt); | 2535 location_t loc = gimple_location (stmt); |
3249 bool from_unsigned1, from_unsigned2; | 2536 bool from_unsigned1, from_unsigned2; |
3250 | 2537 |
3256 if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2)) | 2543 if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2)) |
3257 return false; | 2544 return false; |
3258 | 2545 |
3259 to_mode = SCALAR_INT_TYPE_MODE (type); | 2546 to_mode = SCALAR_INT_TYPE_MODE (type); |
3260 from_mode = SCALAR_INT_TYPE_MODE (type1); | 2547 from_mode = SCALAR_INT_TYPE_MODE (type1); |
2548 if (to_mode == from_mode) | |
2549 return false; | |
2550 | |
3261 from_unsigned1 = TYPE_UNSIGNED (type1); | 2551 from_unsigned1 = TYPE_UNSIGNED (type1); |
3262 from_unsigned2 = TYPE_UNSIGNED (type2); | 2552 from_unsigned2 = TYPE_UNSIGNED (type2); |
3263 | 2553 |
3264 if (from_unsigned1 && from_unsigned2) | 2554 if (from_unsigned1 && from_unsigned2) |
3265 op = umul_widen_optab; | 2555 op = umul_widen_optab; |
3267 op = smul_widen_optab; | 2557 op = smul_widen_optab; |
3268 else | 2558 else |
3269 op = usmul_widen_optab; | 2559 op = usmul_widen_optab; |
3270 | 2560 |
3271 handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode, | 2561 handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode, |
3272 0, &actual_mode); | 2562 &actual_mode); |
3273 | 2563 |
3274 if (handler == CODE_FOR_nothing) | 2564 if (handler == CODE_FOR_nothing) |
3275 { | 2565 { |
3276 if (op != smul_widen_optab) | 2566 if (op != smul_widen_optab) |
3277 { | 2567 { |
3288 return false; | 2578 return false; |
3289 } | 2579 } |
3290 | 2580 |
3291 op = smul_widen_optab; | 2581 op = smul_widen_optab; |
3292 handler = find_widening_optab_handler_and_mode (op, to_mode, | 2582 handler = find_widening_optab_handler_and_mode (op, to_mode, |
3293 from_mode, 0, | 2583 from_mode, |
3294 &actual_mode); | 2584 &actual_mode); |
3295 | 2585 |
3296 if (handler == CODE_FOR_nothing) | 2586 if (handler == CODE_FOR_nothing) |
3297 return false; | 2587 return false; |
3298 | 2588 |
3348 tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs; | 2638 tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs; |
3349 enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK; | 2639 enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK; |
3350 optab this_optab; | 2640 optab this_optab; |
3351 enum tree_code wmult_code; | 2641 enum tree_code wmult_code; |
3352 enum insn_code handler; | 2642 enum insn_code handler; |
3353 scalar_mode to_mode, from_mode; | 2643 scalar_mode to_mode, from_mode, actual_mode; |
3354 machine_mode actual_mode; | |
3355 location_t loc = gimple_location (stmt); | 2644 location_t loc = gimple_location (stmt); |
3356 int actual_precision; | 2645 int actual_precision; |
3357 bool from_unsigned1, from_unsigned2; | 2646 bool from_unsigned1, from_unsigned2; |
3358 | 2647 |
3359 lhs = gimple_assign_lhs (stmt); | 2648 lhs = gimple_assign_lhs (stmt); |
3447 else | 2736 else |
3448 return false; | 2737 return false; |
3449 | 2738 |
3450 to_mode = SCALAR_TYPE_MODE (type); | 2739 to_mode = SCALAR_TYPE_MODE (type); |
3451 from_mode = SCALAR_TYPE_MODE (type1); | 2740 from_mode = SCALAR_TYPE_MODE (type1); |
2741 if (to_mode == from_mode) | |
2742 return false; | |
2743 | |
3452 from_unsigned1 = TYPE_UNSIGNED (type1); | 2744 from_unsigned1 = TYPE_UNSIGNED (type1); |
3453 from_unsigned2 = TYPE_UNSIGNED (type2); | 2745 from_unsigned2 = TYPE_UNSIGNED (type2); |
3454 optype = type1; | 2746 optype = type1; |
3455 | 2747 |
3456 /* There's no such thing as a mixed sign madd yet, so use a wider mode. */ | 2748 /* There's no such thing as a mixed sign madd yet, so use a wider mode. */ |
3507 /* Verify that the machine can perform a widening multiply | 2799 /* Verify that the machine can perform a widening multiply |
3508 accumulate in this mode/signedness combination, otherwise | 2800 accumulate in this mode/signedness combination, otherwise |
3509 this transformation is likely to pessimize code. */ | 2801 this transformation is likely to pessimize code. */ |
3510 this_optab = optab_for_tree_code (wmult_code, optype, optab_default); | 2802 this_optab = optab_for_tree_code (wmult_code, optype, optab_default); |
3511 handler = find_widening_optab_handler_and_mode (this_optab, to_mode, | 2803 handler = find_widening_optab_handler_and_mode (this_optab, to_mode, |
3512 from_mode, 0, &actual_mode); | 2804 from_mode, &actual_mode); |
3513 | 2805 |
3514 if (handler == CODE_FOR_nothing) | 2806 if (handler == CODE_FOR_nothing) |
3515 return false; | 2807 return false; |
3516 | 2808 |
3517 /* Ensure that the inputs to the handler are in the correct precison | 2809 /* Ensure that the inputs to the handler are in the correct precison |
3544 update_stmt (gsi_stmt (*gsi)); | 2836 update_stmt (gsi_stmt (*gsi)); |
3545 widen_mul_stats.maccs_inserted++; | 2837 widen_mul_stats.maccs_inserted++; |
3546 return true; | 2838 return true; |
3547 } | 2839 } |
3548 | 2840 |
2841 /* Given a result MUL_RESULT which is a result of a multiplication of OP1 and | |
2842 OP2 and which we know is used in statements that can be, together with the | |
2843 multiplication, converted to FMAs, perform the transformation. */ | |
2844 | |
2845 static void | |
2846 convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2) | |
2847 { | |
2848 tree type = TREE_TYPE (mul_result); | |
2849 gimple *use_stmt; | |
2850 imm_use_iterator imm_iter; | |
2851 gcall *fma_stmt; | |
2852 | |
2853 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result) | |
2854 { | |
2855 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); | |
2856 tree addop, mulop1 = op1, result = mul_result; | |
2857 bool negate_p = false; | |
2858 gimple_seq seq = NULL; | |
2859 | |
2860 if (is_gimple_debug (use_stmt)) | |
2861 continue; | |
2862 | |
2863 if (is_gimple_assign (use_stmt) | |
2864 && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR) | |
2865 { | |
2866 result = gimple_assign_lhs (use_stmt); | |
2867 use_operand_p use_p; | |
2868 gimple *neguse_stmt; | |
2869 single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt); | |
2870 gsi_remove (&gsi, true); | |
2871 release_defs (use_stmt); | |
2872 | |
2873 use_stmt = neguse_stmt; | |
2874 gsi = gsi_for_stmt (use_stmt); | |
2875 negate_p = true; | |
2876 } | |
2877 | |
2878 tree cond, else_value, ops[3]; | |
2879 tree_code code; | |
2880 if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code, | |
2881 ops, &else_value)) | |
2882 gcc_unreachable (); | |
2883 addop = ops[0] == result ? ops[1] : ops[0]; | |
2884 | |
2885 if (code == MINUS_EXPR) | |
2886 { | |
2887 if (ops[0] == result) | |
2888 /* a * b - c -> a * b + (-c) */ | |
2889 addop = gimple_build (&seq, NEGATE_EXPR, type, addop); | |
2890 else | |
2891 /* a - b * c -> (-b) * c + a */ | |
2892 negate_p = !negate_p; | |
2893 } | |
2894 | |
2895 if (negate_p) | |
2896 mulop1 = gimple_build (&seq, NEGATE_EXPR, type, mulop1); | |
2897 | |
2898 if (seq) | |
2899 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); | |
2900 | |
2901 if (cond) | |
2902 fma_stmt = gimple_build_call_internal (IFN_COND_FMA, 5, cond, mulop1, | |
2903 op2, addop, else_value); | |
2904 else | |
2905 fma_stmt = gimple_build_call_internal (IFN_FMA, 3, mulop1, op2, addop); | |
2906 gimple_set_lhs (fma_stmt, gimple_get_lhs (use_stmt)); | |
2907 gimple_call_set_nothrow (fma_stmt, !stmt_can_throw_internal (cfun, | |
2908 use_stmt)); | |
2909 gsi_replace (&gsi, fma_stmt, true); | |
2910 /* Follow all SSA edges so that we generate FMS, FNMA and FNMS | |
2911 regardless of where the negation occurs. */ | |
2912 if (fold_stmt (&gsi, follow_all_ssa_edges)) | |
2913 update_stmt (gsi_stmt (gsi)); | |
2914 | |
2915 if (dump_file && (dump_flags & TDF_DETAILS)) | |
2916 { | |
2917 fprintf (dump_file, "Generated FMA "); | |
2918 print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE); | |
2919 fprintf (dump_file, "\n"); | |
2920 } | |
2921 | |
2922 widen_mul_stats.fmas_inserted++; | |
2923 } | |
2924 } | |
2925 | |
2926 /* Data necessary to perform the actual transformation from a multiplication | |
2927 and an addition to an FMA after decision is taken it should be done and to | |
2928 then delete the multiplication statement from the function IL. */ | |
2929 | |
2930 struct fma_transformation_info | |
2931 { | |
2932 gimple *mul_stmt; | |
2933 tree mul_result; | |
2934 tree op1; | |
2935 tree op2; | |
2936 }; | |
2937 | |
2938 /* Structure containing the current state of FMA deferring, i.e. whether we are | |
2939 deferring, whether to continue deferring, and all data necessary to come | |
2940 back and perform all deferred transformations. */ | |
2941 | |
2942 class fma_deferring_state | |
2943 { | |
2944 public: | |
2945 /* Class constructor. Pass true as PERFORM_DEFERRING in order to actually | |
2946 do any deferring. */ | |
2947 | |
2948 fma_deferring_state (bool perform_deferring) | |
2949 : m_candidates (), m_mul_result_set (), m_initial_phi (NULL), | |
2950 m_last_result (NULL_TREE), m_deferring_p (perform_deferring) {} | |
2951 | |
2952 /* List of FMA candidates for which we the transformation has been determined | |
2953 possible but we at this point in BB analysis we do not consider them | |
2954 beneficial. */ | |
2955 auto_vec<fma_transformation_info, 8> m_candidates; | |
2956 | |
2957 /* Set of results of multiplication that are part of an already deferred FMA | |
2958 candidates. */ | |
2959 hash_set<tree> m_mul_result_set; | |
2960 | |
2961 /* The PHI that supposedly feeds back result of a FMA to another over loop | |
2962 boundary. */ | |
2963 gphi *m_initial_phi; | |
2964 | |
2965 /* Result of the last produced FMA candidate or NULL if there has not been | |
2966 one. */ | |
2967 tree m_last_result; | |
2968 | |
2969 /* If true, deferring might still be profitable. If false, transform all | |
2970 candidates and no longer defer. */ | |
2971 bool m_deferring_p; | |
2972 }; | |
2973 | |
2974 /* Transform all deferred FMA candidates and mark STATE as no longer | |
2975 deferring. */ | |
2976 | |
2977 static void | |
2978 cancel_fma_deferring (fma_deferring_state *state) | |
2979 { | |
2980 if (!state->m_deferring_p) | |
2981 return; | |
2982 | |
2983 for (unsigned i = 0; i < state->m_candidates.length (); i++) | |
2984 { | |
2985 if (dump_file && (dump_flags & TDF_DETAILS)) | |
2986 fprintf (dump_file, "Generating deferred FMA\n"); | |
2987 | |
2988 const fma_transformation_info &fti = state->m_candidates[i]; | |
2989 convert_mult_to_fma_1 (fti.mul_result, fti.op1, fti.op2); | |
2990 | |
2991 gimple_stmt_iterator gsi = gsi_for_stmt (fti.mul_stmt); | |
2992 gsi_remove (&gsi, true); | |
2993 release_defs (fti.mul_stmt); | |
2994 } | |
2995 state->m_deferring_p = false; | |
2996 } | |
2997 | |
2998 /* If OP is an SSA name defined by a PHI node, return the PHI statement. | |
2999 Otherwise return NULL. */ | |
3000 | |
3001 static gphi * | |
3002 result_of_phi (tree op) | |
3003 { | |
3004 if (TREE_CODE (op) != SSA_NAME) | |
3005 return NULL; | |
3006 | |
3007 return dyn_cast <gphi *> (SSA_NAME_DEF_STMT (op)); | |
3008 } | |
3009 | |
3010 /* After processing statements of a BB and recording STATE, return true if the | |
3011 initial phi is fed by the last FMA candidate result ore one such result from | |
3012 previously processed BBs marked in LAST_RESULT_SET. */ | |
3013 | |
3014 static bool | |
3015 last_fma_candidate_feeds_initial_phi (fma_deferring_state *state, | |
3016 hash_set<tree> *last_result_set) | |
3017 { | |
3018 ssa_op_iter iter; | |
3019 use_operand_p use; | |
3020 FOR_EACH_PHI_ARG (use, state->m_initial_phi, iter, SSA_OP_USE) | |
3021 { | |
3022 tree t = USE_FROM_PTR (use); | |
3023 if (t == state->m_last_result | |
3024 || last_result_set->contains (t)) | |
3025 return true; | |
3026 } | |
3027 | |
3028 return false; | |
3029 } | |
3030 | |
3549 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2 | 3031 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2 |
3550 with uses in additions and subtractions to form fused multiply-add | 3032 with uses in additions and subtractions to form fused multiply-add |
3551 operations. Returns true if successful and MUL_STMT should be removed. */ | 3033 operations. Returns true if successful and MUL_STMT should be removed. |
3034 | |
3035 If STATE indicates that we are deferring FMA transformation, that means | |
3036 that we do not produce FMAs for basic blocks which look like: | |
3037 | |
3038 <bb 6> | |
3039 # accumulator_111 = PHI <0.0(5), accumulator_66(6)> | |
3040 _65 = _14 * _16; | |
3041 accumulator_66 = _65 + accumulator_111; | |
3042 | |
3043 or its unrolled version, i.e. with several FMA candidates that feed result | |
3044 of one into the addend of another. Instead, we add them to a list in STATE | |
3045 and if we later discover an FMA candidate that is not part of such a chain, | |
3046 we go back and perform all deferred past candidates. */ | |
3552 | 3047 |
3553 static bool | 3048 static bool |
3554 convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2) | 3049 convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2, |
3050 fma_deferring_state *state) | |
3555 { | 3051 { |
3556 tree mul_result = gimple_get_lhs (mul_stmt); | 3052 tree mul_result = gimple_get_lhs (mul_stmt); |
3557 tree type = TREE_TYPE (mul_result); | 3053 tree type = TREE_TYPE (mul_result); |
3558 gimple *use_stmt, *neguse_stmt; | 3054 gimple *use_stmt, *neguse_stmt; |
3559 gassign *fma_stmt; | |
3560 use_operand_p use_p; | 3055 use_operand_p use_p; |
3561 imm_use_iterator imm_iter; | 3056 imm_use_iterator imm_iter; |
3562 | 3057 |
3563 if (FLOAT_TYPE_P (type) | 3058 if (FLOAT_TYPE_P (type) |
3564 && flag_fp_contract_mode == FP_CONTRACT_OFF) | 3059 && flag_fp_contract_mode == FP_CONTRACT_OFF) |
3565 return false; | 3060 return false; |
3566 | 3061 |
3567 /* We don't want to do bitfield reduction ops. */ | 3062 /* We don't want to do bitfield reduction ops. */ |
3568 if (INTEGRAL_TYPE_P (type) | 3063 if (INTEGRAL_TYPE_P (type) |
3569 && !type_has_mode_precision_p (type)) | 3064 && (!type_has_mode_precision_p (type) || TYPE_OVERFLOW_TRAPS (type))) |
3570 return false; | 3065 return false; |
3571 | 3066 |
3572 /* If the target doesn't support it, don't generate it. We assume that | 3067 /* If the target doesn't support it, don't generate it. We assume that |
3573 if fma isn't available then fms, fnma or fnms are not either. */ | 3068 if fma isn't available then fms, fnma or fnms are not either. */ |
3574 if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing) | 3069 optimization_type opt_type = bb_optimization_type (gimple_bb (mul_stmt)); |
3070 if (!direct_internal_fn_supported_p (IFN_FMA, type, opt_type)) | |
3575 return false; | 3071 return false; |
3576 | 3072 |
3577 /* If the multiplication has zero uses, it is kept around probably because | 3073 /* If the multiplication has zero uses, it is kept around probably because |
3578 of -fnon-call-exceptions. Don't optimize it away in that case, | 3074 of -fnon-call-exceptions. Don't optimize it away in that case, |
3579 it is DCE job. */ | 3075 it is DCE job. */ |
3580 if (has_zero_uses (mul_result)) | 3076 if (has_zero_uses (mul_result)) |
3581 return false; | 3077 return false; |
3582 | 3078 |
3079 bool check_defer | |
3080 = (state->m_deferring_p | |
3081 && (tree_to_shwi (TYPE_SIZE (type)) | |
3082 <= PARAM_VALUE (PARAM_AVOID_FMA_MAX_BITS))); | |
3083 bool defer = check_defer; | |
3583 /* Make sure that the multiplication statement becomes dead after | 3084 /* Make sure that the multiplication statement becomes dead after |
3584 the transformation, thus that all uses are transformed to FMAs. | 3085 the transformation, thus that all uses are transformed to FMAs. |
3585 This means we assume that an FMA operation has the same cost | 3086 This means we assume that an FMA operation has the same cost |
3586 as an addition. */ | 3087 as an addition. */ |
3587 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result) | 3088 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result) |
3588 { | 3089 { |
3589 enum tree_code use_code; | |
3590 tree result = mul_result; | 3090 tree result = mul_result; |
3591 bool negate_p = false; | 3091 bool negate_p = false; |
3592 | 3092 |
3593 use_stmt = USE_STMT (use_p); | 3093 use_stmt = USE_STMT (use_p); |
3594 | 3094 |
3605 to form a fma in the then block and sink the multiplication to the | 3105 to form a fma in the then block and sink the multiplication to the |
3606 else block. */ | 3106 else block. */ |
3607 if (gimple_bb (use_stmt) != gimple_bb (mul_stmt)) | 3107 if (gimple_bb (use_stmt) != gimple_bb (mul_stmt)) |
3608 return false; | 3108 return false; |
3609 | 3109 |
3610 if (!is_gimple_assign (use_stmt)) | |
3611 return false; | |
3612 | |
3613 use_code = gimple_assign_rhs_code (use_stmt); | |
3614 | |
3615 /* A negate on the multiplication leads to FNMA. */ | 3110 /* A negate on the multiplication leads to FNMA. */ |
3616 if (use_code == NEGATE_EXPR) | 3111 if (is_gimple_assign (use_stmt) |
3112 && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR) | |
3617 { | 3113 { |
3618 ssa_op_iter iter; | 3114 ssa_op_iter iter; |
3619 use_operand_p usep; | 3115 use_operand_p usep; |
3620 | 3116 |
3621 result = gimple_assign_lhs (use_stmt); | 3117 result = gimple_assign_lhs (use_stmt); |
3633 | 3129 |
3634 /* Re-validate. */ | 3130 /* Re-validate. */ |
3635 use_stmt = neguse_stmt; | 3131 use_stmt = neguse_stmt; |
3636 if (gimple_bb (use_stmt) != gimple_bb (mul_stmt)) | 3132 if (gimple_bb (use_stmt) != gimple_bb (mul_stmt)) |
3637 return false; | 3133 return false; |
3638 if (!is_gimple_assign (use_stmt)) | 3134 |
3639 return false; | |
3640 | |
3641 use_code = gimple_assign_rhs_code (use_stmt); | |
3642 negate_p = true; | 3135 negate_p = true; |
3643 } | 3136 } |
3644 | 3137 |
3645 switch (use_code) | 3138 tree cond, else_value, ops[3]; |
3139 tree_code code; | |
3140 if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code, ops, | |
3141 &else_value)) | |
3142 return false; | |
3143 | |
3144 switch (code) | |
3646 { | 3145 { |
3647 case MINUS_EXPR: | 3146 case MINUS_EXPR: |
3648 if (gimple_assign_rhs2 (use_stmt) == result) | 3147 if (ops[1] == result) |
3649 negate_p = !negate_p; | 3148 negate_p = !negate_p; |
3650 break; | 3149 break; |
3651 case PLUS_EXPR: | 3150 case PLUS_EXPR: |
3652 break; | 3151 break; |
3653 default: | 3152 default: |
3654 /* FMA can only be formed from PLUS and MINUS. */ | 3153 /* FMA can only be formed from PLUS and MINUS. */ |
3655 return false; | 3154 return false; |
3656 } | 3155 } |
3657 | 3156 |
3658 /* If the subtrahend (gimple_assign_rhs2 (use_stmt)) is computed | 3157 if (cond) |
3659 by a MULT_EXPR that we'll visit later, we might be able to | 3158 { |
3660 get a more profitable match with fnma. | 3159 if (cond == result || else_value == result) |
3160 return false; | |
3161 if (!direct_internal_fn_supported_p (IFN_COND_FMA, type, opt_type)) | |
3162 return false; | |
3163 } | |
3164 | |
3165 /* If the subtrahend (OPS[1]) is computed by a MULT_EXPR that | |
3166 we'll visit later, we might be able to get a more profitable | |
3167 match with fnma. | |
3661 OTOH, if we don't, a negate / fma pair has likely lower latency | 3168 OTOH, if we don't, a negate / fma pair has likely lower latency |
3662 that a mult / subtract pair. */ | 3169 that a mult / subtract pair. */ |
3663 if (use_code == MINUS_EXPR && !negate_p | 3170 if (code == MINUS_EXPR |
3664 && gimple_assign_rhs1 (use_stmt) == result | 3171 && !negate_p |
3665 && optab_handler (fms_optab, TYPE_MODE (type)) == CODE_FOR_nothing | 3172 && ops[0] == result |
3666 && optab_handler (fnma_optab, TYPE_MODE (type)) != CODE_FOR_nothing) | 3173 && !direct_internal_fn_supported_p (IFN_FMS, type, opt_type) |
3667 { | 3174 && direct_internal_fn_supported_p (IFN_FNMA, type, opt_type) |
3668 tree rhs2 = gimple_assign_rhs2 (use_stmt); | 3175 && TREE_CODE (ops[1]) == SSA_NAME |
3669 | 3176 && has_single_use (ops[1])) |
3670 if (TREE_CODE (rhs2) == SSA_NAME) | 3177 { |
3178 gimple *stmt2 = SSA_NAME_DEF_STMT (ops[1]); | |
3179 if (is_gimple_assign (stmt2) | |
3180 && gimple_assign_rhs_code (stmt2) == MULT_EXPR) | |
3181 return false; | |
3182 } | |
3183 | |
3184 /* We can't handle a * b + a * b. */ | |
3185 if (ops[0] == ops[1]) | |
3186 return false; | |
3187 /* If deferring, make sure we are not looking at an instruction that | |
3188 wouldn't have existed if we were not. */ | |
3189 if (state->m_deferring_p | |
3190 && (state->m_mul_result_set.contains (ops[0]) | |
3191 || state->m_mul_result_set.contains (ops[1]))) | |
3192 return false; | |
3193 | |
3194 if (check_defer) | |
3195 { | |
3196 tree use_lhs = gimple_get_lhs (use_stmt); | |
3197 if (state->m_last_result) | |
3671 { | 3198 { |
3672 gimple *stmt2 = SSA_NAME_DEF_STMT (rhs2); | 3199 if (ops[1] == state->m_last_result |
3673 if (has_single_use (rhs2) | 3200 || ops[0] == state->m_last_result) |
3674 && is_gimple_assign (stmt2) | 3201 defer = true; |
3675 && gimple_assign_rhs_code (stmt2) == MULT_EXPR) | 3202 else |
3676 return false; | 3203 defer = false; |
3677 } | 3204 } |
3678 } | 3205 else |
3679 | 3206 { |
3680 /* We can't handle a * b + a * b. */ | 3207 gcc_checking_assert (!state->m_initial_phi); |
3681 if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt)) | 3208 gphi *phi; |
3682 return false; | 3209 if (ops[0] == result) |
3683 | 3210 phi = result_of_phi (ops[1]); |
3684 /* While it is possible to validate whether or not the exact form | 3211 else |
3685 that we've recognized is available in the backend, the assumption | 3212 { |
3686 is that the transformation is never a loss. For instance, suppose | 3213 gcc_assert (ops[1] == result); |
3687 the target only has the plain FMA pattern available. Consider | 3214 phi = result_of_phi (ops[0]); |
3688 a*b-c -> fma(a,b,-c): we've exchanged MUL+SUB for FMA+NEG, which | 3215 } |
3689 is still two operations. Consider -(a*b)-c -> fma(-a,b,-c): we | 3216 |
3690 still have 3 operations, but in the FMA form the two NEGs are | 3217 if (phi) |
3691 independent and could be run in parallel. */ | 3218 { |
3692 } | 3219 state->m_initial_phi = phi; |
3693 | 3220 defer = true; |
3694 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result) | 3221 } |
3695 { | 3222 else |
3696 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); | 3223 defer = false; |
3697 enum tree_code use_code; | 3224 } |
3698 tree addop, mulop1 = op1, result = mul_result; | 3225 |
3699 bool negate_p = false; | 3226 state->m_last_result = use_lhs; |
3700 | 3227 check_defer = false; |
3701 if (is_gimple_debug (use_stmt)) | |
3702 continue; | |
3703 | |
3704 use_code = gimple_assign_rhs_code (use_stmt); | |
3705 if (use_code == NEGATE_EXPR) | |
3706 { | |
3707 result = gimple_assign_lhs (use_stmt); | |
3708 single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt); | |
3709 gsi_remove (&gsi, true); | |
3710 release_defs (use_stmt); | |
3711 | |
3712 use_stmt = neguse_stmt; | |
3713 gsi = gsi_for_stmt (use_stmt); | |
3714 use_code = gimple_assign_rhs_code (use_stmt); | |
3715 negate_p = true; | |
3716 } | |
3717 | |
3718 if (gimple_assign_rhs1 (use_stmt) == result) | |
3719 { | |
3720 addop = gimple_assign_rhs2 (use_stmt); | |
3721 /* a * b - c -> a * b + (-c) */ | |
3722 if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR) | |
3723 addop = force_gimple_operand_gsi (&gsi, | |
3724 build1 (NEGATE_EXPR, | |
3725 type, addop), | |
3726 true, NULL_TREE, true, | |
3727 GSI_SAME_STMT); | |
3728 } | 3228 } |
3729 else | 3229 else |
3730 { | 3230 defer = false; |
3731 addop = gimple_assign_rhs1 (use_stmt); | 3231 |
3732 /* a - b * c -> (-b) * c + a */ | 3232 /* While it is possible to validate whether or not the exact form that |
3733 if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR) | 3233 we've recognized is available in the backend, the assumption is that |
3734 negate_p = !negate_p; | 3234 if the deferring logic above did not trigger, the transformation is |
3735 } | 3235 never a loss. For instance, suppose the target only has the plain FMA |
3736 | 3236 pattern available. Consider a*b-c -> fma(a,b,-c): we've exchanged |
3737 if (negate_p) | 3237 MUL+SUB for FMA+NEG, which is still two operations. Consider |
3738 mulop1 = force_gimple_operand_gsi (&gsi, | 3238 -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA |
3739 build1 (NEGATE_EXPR, | 3239 form the two NEGs are independent and could be run in parallel. */ |
3740 type, mulop1), | 3240 } |
3741 true, NULL_TREE, true, | 3241 |
3742 GSI_SAME_STMT); | 3242 if (defer) |
3743 | 3243 { |
3744 fma_stmt = gimple_build_assign (gimple_assign_lhs (use_stmt), | 3244 fma_transformation_info fti; |
3745 FMA_EXPR, mulop1, op2, addop); | 3245 fti.mul_stmt = mul_stmt; |
3746 gsi_replace (&gsi, fma_stmt, true); | 3246 fti.mul_result = mul_result; |
3747 widen_mul_stats.fmas_inserted++; | 3247 fti.op1 = op1; |
3748 } | 3248 fti.op2 = op2; |
3749 | 3249 state->m_candidates.safe_push (fti); |
3750 return true; | 3250 state->m_mul_result_set.add (mul_result); |
3251 | |
3252 if (dump_file && (dump_flags & TDF_DETAILS)) | |
3253 { | |
3254 fprintf (dump_file, "Deferred generating FMA for multiplication "); | |
3255 print_gimple_stmt (dump_file, mul_stmt, 0, TDF_NONE); | |
3256 fprintf (dump_file, "\n"); | |
3257 } | |
3258 | |
3259 return false; | |
3260 } | |
3261 else | |
3262 { | |
3263 if (state->m_deferring_p) | |
3264 cancel_fma_deferring (state); | |
3265 convert_mult_to_fma_1 (mul_result, op1, op2); | |
3266 return true; | |
3267 } | |
3751 } | 3268 } |
3752 | 3269 |
3753 | 3270 |
3754 /* Helper function of match_uaddsub_overflow. Return 1 | 3271 /* Helper function of match_uaddsub_overflow. Return 1 |
3755 if USE_STMT is unsigned overflow check ovf != 0 for | 3272 if USE_STMT is unsigned overflow check ovf != 0 for |
4016 IMAGPART_EXPR for mod). */ | 3533 IMAGPART_EXPR for mod). */ |
4017 | 3534 |
4018 static bool | 3535 static bool |
4019 convert_to_divmod (gassign *stmt) | 3536 convert_to_divmod (gassign *stmt) |
4020 { | 3537 { |
4021 if (stmt_can_throw_internal (stmt) | 3538 if (stmt_can_throw_internal (cfun, stmt) |
4022 || !divmod_candidate_p (stmt)) | 3539 || !divmod_candidate_p (stmt)) |
4023 return false; | 3540 return false; |
4024 | 3541 |
4025 tree op1 = gimple_assign_rhs1 (stmt); | 3542 tree op1 = gimple_assign_rhs1 (stmt); |
4026 tree op2 = gimple_assign_rhs2 (stmt); | 3543 tree op2 = gimple_assign_rhs2 (stmt); |
4042 && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR | 3559 && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR |
4043 || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR) | 3560 || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR) |
4044 && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0) | 3561 && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0) |
4045 && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0)) | 3562 && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0)) |
4046 { | 3563 { |
4047 if (stmt_can_throw_internal (use_stmt)) | 3564 if (stmt_can_throw_internal (cfun, use_stmt)) |
4048 continue; | 3565 continue; |
4049 | 3566 |
4050 basic_block bb = gimple_bb (use_stmt); | 3567 basic_block bb = gimple_bb (use_stmt); |
4051 | 3568 |
4052 if (bb == top_bb) | 3569 if (bb == top_bb) |
4080 || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR) | 3597 || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR) |
4081 && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0) | 3598 && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0) |
4082 && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0)) | 3599 && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0)) |
4083 { | 3600 { |
4084 if (use_stmt == top_stmt | 3601 if (use_stmt == top_stmt |
4085 || stmt_can_throw_internal (use_stmt) | 3602 || stmt_can_throw_internal (cfun, use_stmt) |
4086 || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb)) | 3603 || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb)) |
4087 continue; | 3604 continue; |
4088 | 3605 |
4089 stmts.safe_push (use_stmt); | 3606 stmts.safe_push (use_stmt); |
4090 if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR) | 3607 if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR) |
4150 const pass_data pass_data_optimize_widening_mul = | 3667 const pass_data pass_data_optimize_widening_mul = |
4151 { | 3668 { |
4152 GIMPLE_PASS, /* type */ | 3669 GIMPLE_PASS, /* type */ |
4153 "widening_mul", /* name */ | 3670 "widening_mul", /* name */ |
4154 OPTGROUP_NONE, /* optinfo_flags */ | 3671 OPTGROUP_NONE, /* optinfo_flags */ |
4155 TV_NONE, /* tv_id */ | 3672 TV_TREE_WIDEN_MUL, /* tv_id */ |
4156 PROP_ssa, /* properties_required */ | 3673 PROP_ssa, /* properties_required */ |
4157 0, /* properties_provided */ | 3674 0, /* properties_provided */ |
4158 0, /* properties_destroyed */ | 3675 0, /* properties_destroyed */ |
4159 0, /* todo_flags_start */ | 3676 0, /* todo_flags_start */ |
4160 TODO_update_ssa, /* todo_flags_finish */ | 3677 TODO_update_ssa, /* todo_flags_finish */ |
4175 | 3692 |
4176 virtual unsigned int execute (function *); | 3693 virtual unsigned int execute (function *); |
4177 | 3694 |
4178 }; // class pass_optimize_widening_mul | 3695 }; // class pass_optimize_widening_mul |
4179 | 3696 |
4180 unsigned int | 3697 /* Walker class to perform the transformation in reverse dominance order. */ |
4181 pass_optimize_widening_mul::execute (function *fun) | 3698 |
4182 { | 3699 class math_opts_dom_walker : public dom_walker |
4183 basic_block bb; | 3700 { |
4184 bool cfg_changed = false; | 3701 public: |
4185 | 3702 /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set |
4186 memset (&widen_mul_stats, 0, sizeof (widen_mul_stats)); | 3703 if walking modidifes the CFG. */ |
4187 calculate_dominance_info (CDI_DOMINATORS); | 3704 |
4188 renumber_gimple_stmt_uids (); | 3705 math_opts_dom_walker (bool *cfg_changed_p) |
4189 | 3706 : dom_walker (CDI_DOMINATORS), m_last_result_set (), |
4190 FOR_EACH_BB_FN (bb, fun) | 3707 m_cfg_changed_p (cfg_changed_p) {} |
4191 { | 3708 |
4192 gimple_stmt_iterator gsi; | 3709 /* The actual actions performed in the walk. */ |
4193 | 3710 |
4194 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);) | 3711 virtual void after_dom_children (basic_block); |
4195 { | 3712 |
4196 gimple *stmt = gsi_stmt (gsi); | 3713 /* Set of results of chains of multiply and add statement combinations that |
4197 enum tree_code code; | 3714 were not transformed into FMAs because of active deferring. */ |
4198 | 3715 hash_set<tree> m_last_result_set; |
4199 if (is_gimple_assign (stmt)) | 3716 |
3717 /* Pointer to a flag of the user that needs to be set if CFG has been | |
3718 modified. */ | |
3719 bool *m_cfg_changed_p; | |
3720 }; | |
3721 | |
3722 void | |
3723 math_opts_dom_walker::after_dom_children (basic_block bb) | |
3724 { | |
3725 gimple_stmt_iterator gsi; | |
3726 | |
3727 fma_deferring_state fma_state (PARAM_VALUE (PARAM_AVOID_FMA_MAX_BITS) > 0); | |
3728 | |
3729 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);) | |
3730 { | |
3731 gimple *stmt = gsi_stmt (gsi); | |
3732 enum tree_code code; | |
3733 | |
3734 if (is_gimple_assign (stmt)) | |
3735 { | |
3736 code = gimple_assign_rhs_code (stmt); | |
3737 switch (code) | |
4200 { | 3738 { |
4201 code = gimple_assign_rhs_code (stmt); | 3739 case MULT_EXPR: |
4202 switch (code) | 3740 if (!convert_mult_to_widen (stmt, &gsi) |
3741 && !convert_expand_mult_copysign (stmt, &gsi) | |
3742 && convert_mult_to_fma (stmt, | |
3743 gimple_assign_rhs1 (stmt), | |
3744 gimple_assign_rhs2 (stmt), | |
3745 &fma_state)) | |
4203 { | 3746 { |
4204 case MULT_EXPR: | 3747 gsi_remove (&gsi, true); |
4205 if (!convert_mult_to_widen (stmt, &gsi) | 3748 release_defs (stmt); |
4206 && !convert_expand_mult_copysign (stmt, &gsi) | 3749 continue; |
3750 } | |
3751 break; | |
3752 | |
3753 case PLUS_EXPR: | |
3754 case MINUS_EXPR: | |
3755 if (!convert_plusminus_to_widen (&gsi, stmt, code)) | |
3756 match_uaddsub_overflow (&gsi, stmt, code); | |
3757 break; | |
3758 | |
3759 case TRUNC_MOD_EXPR: | |
3760 convert_to_divmod (as_a<gassign *> (stmt)); | |
3761 break; | |
3762 | |
3763 default:; | |
3764 } | |
3765 } | |
3766 else if (is_gimple_call (stmt)) | |
3767 { | |
3768 tree fndecl = gimple_call_fndecl (stmt); | |
3769 if (fndecl && gimple_call_builtin_p (stmt, BUILT_IN_NORMAL)) | |
3770 { | |
3771 switch (DECL_FUNCTION_CODE (fndecl)) | |
3772 { | |
3773 case BUILT_IN_POWF: | |
3774 case BUILT_IN_POW: | |
3775 case BUILT_IN_POWL: | |
3776 if (gimple_call_lhs (stmt) | |
3777 && TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST | |
3778 && real_equal | |
3779 (&TREE_REAL_CST (gimple_call_arg (stmt, 1)), | |
3780 &dconst2) | |
4207 && convert_mult_to_fma (stmt, | 3781 && convert_mult_to_fma (stmt, |
4208 gimple_assign_rhs1 (stmt), | 3782 gimple_call_arg (stmt, 0), |
4209 gimple_assign_rhs2 (stmt))) | 3783 gimple_call_arg (stmt, 0), |
3784 &fma_state)) | |
4210 { | 3785 { |
4211 gsi_remove (&gsi, true); | 3786 unlink_stmt_vdef (stmt); |
3787 if (gsi_remove (&gsi, true) | |
3788 && gimple_purge_dead_eh_edges (bb)) | |
3789 *m_cfg_changed_p = true; | |
4212 release_defs (stmt); | 3790 release_defs (stmt); |
4213 continue; | 3791 continue; |
4214 } | 3792 } |
4215 break; | 3793 break; |
4216 | 3794 |
4217 case PLUS_EXPR: | |
4218 case MINUS_EXPR: | |
4219 if (!convert_plusminus_to_widen (&gsi, stmt, code)) | |
4220 match_uaddsub_overflow (&gsi, stmt, code); | |
4221 break; | |
4222 | |
4223 case TRUNC_MOD_EXPR: | |
4224 convert_to_divmod (as_a<gassign *> (stmt)); | |
4225 break; | |
4226 | |
4227 default:; | 3795 default:; |
4228 } | 3796 } |
4229 } | 3797 } |
4230 else if (is_gimple_call (stmt) | 3798 else |
4231 && gimple_call_lhs (stmt)) | 3799 cancel_fma_deferring (&fma_state); |
4232 { | 3800 } |
4233 tree fndecl = gimple_call_fndecl (stmt); | 3801 gsi_next (&gsi); |
4234 if (fndecl | 3802 } |
4235 && gimple_call_builtin_p (stmt, BUILT_IN_NORMAL)) | 3803 if (fma_state.m_deferring_p |
4236 { | 3804 && fma_state.m_initial_phi) |
4237 switch (DECL_FUNCTION_CODE (fndecl)) | 3805 { |
4238 { | 3806 gcc_checking_assert (fma_state.m_last_result); |
4239 case BUILT_IN_POWF: | 3807 if (!last_fma_candidate_feeds_initial_phi (&fma_state, |
4240 case BUILT_IN_POW: | 3808 &m_last_result_set)) |
4241 case BUILT_IN_POWL: | 3809 cancel_fma_deferring (&fma_state); |
4242 if (TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST | 3810 else |
4243 && real_equal | 3811 m_last_result_set.add (fma_state.m_last_result); |
4244 (&TREE_REAL_CST (gimple_call_arg (stmt, 1)), | 3812 } |
4245 &dconst2) | 3813 } |
4246 && convert_mult_to_fma (stmt, | 3814 |
4247 gimple_call_arg (stmt, 0), | 3815 |
4248 gimple_call_arg (stmt, 0))) | 3816 unsigned int |
4249 { | 3817 pass_optimize_widening_mul::execute (function *fun) |
4250 unlink_stmt_vdef (stmt); | 3818 { |
4251 if (gsi_remove (&gsi, true) | 3819 bool cfg_changed = false; |
4252 && gimple_purge_dead_eh_edges (bb)) | 3820 |
4253 cfg_changed = true; | 3821 memset (&widen_mul_stats, 0, sizeof (widen_mul_stats)); |
4254 release_defs (stmt); | 3822 calculate_dominance_info (CDI_DOMINATORS); |
4255 continue; | 3823 renumber_gimple_stmt_uids (); |
4256 } | 3824 |
4257 break; | 3825 math_opts_dom_walker (&cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun)); |
4258 | |
4259 default:; | |
4260 } | |
4261 } | |
4262 } | |
4263 gsi_next (&gsi); | |
4264 } | |
4265 } | |
4266 | 3826 |
4267 statistics_counter_event (fun, "widening multiplications inserted", | 3827 statistics_counter_event (fun, "widening multiplications inserted", |
4268 widen_mul_stats.widen_mults_inserted); | 3828 widen_mul_stats.widen_mults_inserted); |
4269 statistics_counter_event (fun, "widening maccs inserted", | 3829 statistics_counter_event (fun, "widening maccs inserted", |
4270 widen_mul_stats.maccs_inserted); | 3830 widen_mul_stats.maccs_inserted); |