CbC/CbC_gcc: gcc/simplify-rtx.c comparison

comparison gcc/simplify-rtx.c @ 131:84e7813d76e9

gcc-8.2

author	mir3636
date	Thu, 25 Oct 2018 07:37:49 +0900
parents	04ced10e8804
children	1830386684a0

comparison

equal deleted inserted replaced

-:04ced10e8804
+:84e7813d76e9
 /* RTL simplification functions for GNU compiler.
-Copyright (C) 1987-2017 Free Software Foundation, Inc.
+Copyright (C) 1987-2018 Free Software Foundation, Inc.
 This file is part of GCC.
 GCC is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
 #include "emit-rtl.h"
 #include "recog.h"
 #include "diagnostic-core.h"
 #include "varasm.h"
 #include "flags.h"
+#include "selftest.h"
+#include "selftest-rtl.h"
 /* Simplification and canonicalization of RTL.  */
 /* Much code operates on (low, high) pairs; the low value is an
 unsigned wide int, the high value a signed wide int.  We
 ((((HOST_WIDE_INT) low) < 0) ? HOST_WIDE_INT_M1 : HOST_WIDE_INT_0)
 static rtx neg_const_int (machine_mode, const_rtx);
 static bool plus_minus_operand_p (const_rtx);
 static rtx simplify_plus_minus (enum rtx_code, machine_mode, rtx, rtx);
-static rtx simplify_immed_subreg (machine_mode, rtx, machine_mode,
-				  unsigned int);
 static rtx simplify_associative_operation (enum rtx_code, machine_mode,
 					   rtx, rtx);
 static rtx simplify_relational_operation_1 (enum rtx_code, machine_mode,
 					    machine_mode, rtx, rtx);
 static rtx simplify_unary_operation_1 (enum rtx_code, machine_mode, rtx);
 rtx
 avoid_constant_pool_reference (rtx x)
 {
 rtx c, tmp, addr;
 machine_mode cmode;
-HOST_WIDE_INT offset = 0;
+poly_int64 offset = 0;
 switch (GET_CODE (x))
 {
 case MEM:
 break;
 /* Call target hook to avoid the effects of -fpic etc....  */
 addr = targetm.delegitimize_address (addr);
 /* Split the address into a base and integer offset.  */
-if (GET_CODE (addr) == CONST
+addr = strip_offset (addr, &offset);
-&& GET_CODE (XEXP (addr, 0)) == PLUS
-&& CONST_INT_P (XEXP (XEXP (addr, 0), 1)))
-{
-offset = INTVAL (XEXP (XEXP (addr, 0), 1));
-addr = XEXP (XEXP (addr, 0), 0);
-}
 if (GET_CODE (addr) == LO_SUM)
 addr = XEXP (addr, 1);
 /* If this is a constant pool reference, we can turn it into its
 cmode = get_pool_mode (addr);
 /* If we're accessing the constant in a different mode than it was
 originally stored, attempt to fix that up via subreg simplifications.
 If that fails we have no choice but to return the original memory.  */
-if (offset == 0 && cmode == GET_MODE (x))
+if (known_eq (offset, 0) && cmode == GET_MODE (x))
 	return c;
-else if (offset >= 0 && offset < GET_MODE_SIZE (cmode))
+else if (known_in_range_p (offset, 0, GET_MODE_SIZE (cmode)))
 {
 rtx tem = simplify_subreg (GET_MODE (x), c, cmode, offset);
 if (tem && CONSTANT_P (tem))
 return tem;
 }
 && MEM_EXPR (x)
 && MEM_OFFSET_KNOWN_P (x))
 {
 tree decl = MEM_EXPR (x);
 machine_mode mode = GET_MODE (x);
-HOST_WIDE_INT offset = 0;
+poly_int64 offset = 0;
 switch (TREE_CODE (decl))
 	{
 	default:
 	  decl = NULL;
 	case BIT_FIELD_REF:
 	case REALPART_EXPR:
 	case IMAGPART_EXPR:
 	case VIEW_CONVERT_EXPR:
 	  {
-	    HOST_WIDE_INT bitsize, bitpos;
+	    poly_int64 bitsize, bitpos, bytepos, toffset_val = 0;
 	    tree toffset;
 	    int unsignedp, reversep, volatilep = 0;
 	    decl
 	      = get_inner_reference (decl, &bitsize, &bitpos, &toffset, &mode,
 				     &unsignedp, &reversep, &volatilep);
-	    if (bitsize != GET_MODE_BITSIZE (mode)
+	    if (maybe_ne (bitsize, GET_MODE_BITSIZE (mode))
-		|| (bitpos % BITS_PER_UNIT)
+		|| !multiple_p (bitpos, BITS_PER_UNIT, &bytepos)
-		|| (toffset && !tree_fits_shwi_p (toffset)))
+		|| (toffset && !poly_int_tree_p (toffset, &toffset_val)))
 	      decl = NULL;
 	    else
-	      {
+	      offset += bytepos + toffset_val;
-		offset += bitpos / BITS_PER_UNIT;
-		if (toffset)
-		  offset += tree_to_shwi (toffset);
-	      }
 	    break;
 	  }
 	}
 if (decl
 	  newx = DECL_RTL (decl);
 	  if (MEM_P (newx))
 	    {
 	      rtx n = XEXP (newx, 0), o = XEXP (x, 0);
+	      poly_int64 n_offset, o_offset;
 	      /* Avoid creating a new MEM needlessly if we already had
 		 the same address.  We do if there's no OFFSET and the
 		 old address X is identical to NEWX, or if X is of the
 		 form (plus NEWX OFFSET), or the NEWX is of the form
 		 (plus Y (const_int Z)) and X is that with the offset
 		 added: (plus Y (const_int Z+OFFSET)).  */
-	      if (!((offset == 0
+	      n = strip_offset (n, &n_offset);
-		     || (GET_CODE (o) == PLUS
+	      o = strip_offset (o, &o_offset);
-			 && GET_CODE (XEXP (o, 1)) == CONST_INT
+	      if (!(known_eq (o_offset, n_offset + offset)
-			 && (offset == INTVAL (XEXP (o, 1))
-			     || (GET_CODE (n) == PLUS
-				 && GET_CODE (XEXP (n, 1)) == CONST_INT
-				 && (INTVAL (XEXP (n, 1)) + offset
-				     == INTVAL (XEXP (o, 1)))
-				 && (n = XEXP (n, 0))))
-			 && (o = XEXP (o, 0))))
 		    && rtx_equal_p (o, n)))
 		x = adjust_address_nv (newx, mode, offset);
 	    }
 	  else if (GET_MODE (x) == GET_MODE (newx)
-		   && offset == 0)
+		   && known_eq (offset, 0))
 	    x = newx;
 	}
 }
 return x;
 		      machine_mode op0_mode, rtx op0, rtx op1, rtx op2)
 {
 rtx tem;
 /* If this simplifies, use it.  */
-if (0 != (tem = simplify_ternary_operation (code, mode, op0_mode,
+if ((tem = simplify_ternary_operation (code, mode, op0_mode,
-					      op0, op1, op2)))
+					 op0, op1, op2)) != 0)
 return tem;
 return gen_rtx_fmt_eee (code, mode, op0, op1, op2);
 }
 simplify_gen_relational (enum rtx_code code, machine_mode mode,
 			 machine_mode cmp_mode, rtx op0, rtx op1)
 {
 rtx tem;
-if (0 != (tem = simplify_relational_operation (code, mode, cmp_mode,
+if ((tem = simplify_relational_operation (code, mode, cmp_mode,
-						 op0, op1)))
+					    op0, op1)) != 0)
 return tem;
 return gen_rtx_fmt_ee (code, mode, op0, op1);
 }
 && 2 * precision <= op_precision
 && CONST_INT_P (XEXP (op, 1))
 && (INTVAL (XEXP (op, 1)) & (precision - 1)) == 0
 && UINTVAL (XEXP (op, 1)) < op_precision)
 {
-int byte = subreg_lowpart_offset (mode, op_mode);
+poly_int64 byte = subreg_lowpart_offset (mode, op_mode);
 int shifted_bytes = INTVAL (XEXP (op, 1)) / BITS_PER_UNIT;
 return simplify_gen_subreg (mode, XEXP (op, 0), op_mode,
 				  (WORDS_BIG_ENDIAN
 				   ? byte - shifted_bytes
 				   : byte + shifted_bytes));
 				     MEM_ADDR_SPACE (XEXP (op, 0)))
 && ! MEM_VOLATILE_P (XEXP (op, 0))
 && (GET_MODE_SIZE (int_mode) >= UNITS_PER_WORD
 	  || WORDS_BIG_ENDIAN == BYTES_BIG_ENDIAN))
 {
-int byte = subreg_lowpart_offset (int_mode, int_op_mode);
+poly_int64 byte = subreg_lowpart_offset (int_mode, int_op_mode);
 int shifted_bytes = INTVAL (XEXP (op, 1)) / BITS_PER_UNIT;
 return adjust_address_nv (XEXP (op, 0), int_mode,
 				(WORDS_BIG_ENDIAN
 				 ? byte - shifted_bytes
 				 : byte + shifted_bytes));
 aren't constant.  */
 static rtx
 simplify_unary_operation_1 (enum rtx_code code, machine_mode mode, rtx op)
 {
 enum rtx_code reversed;
-rtx temp;
+rtx temp, elt, base, step;
 scalar_int_mode inner, int_mode, op_mode, op0_mode;
 switch (code)
 {
 case NOT:
 	  int_mode = as_a <scalar_int_mode> (mode);
 	  int isize = GET_MODE_PRECISION (inner);
 	  if (STORE_FLAG_VALUE == 1)
 	    {
 	      temp = simplify_gen_binary (ASHIFTRT, inner, XEXP (op, 0),
-					  GEN_INT (isize - 1));
+					  gen_int_shift_amount (inner,
+								isize - 1));
 	      if (int_mode == inner)
 		return temp;
 	      if (GET_MODE_PRECISION (int_mode) > isize)
 		return simplify_gen_unary (SIGN_EXTEND, int_mode, temp, inner);
 	      return simplify_gen_unary (TRUNCATE, int_mode, temp, inner);
 	    }
 	  else if (STORE_FLAG_VALUE == -1)
 	    {
 	      temp = simplify_gen_binary (LSHIFTRT, inner, XEXP (op, 0),
-					  GEN_INT (isize - 1));
+					  gen_int_shift_amount (inner,
+								isize - 1));
 	      if (int_mode == inner)
 		return temp;
 	      if (GET_MODE_PRECISION (int_mode) > isize)
 		return simplify_gen_unary (ZERO_EXTEND, int_mode, temp, inner);
 	      return simplify_gen_unary (TRUNCATE, int_mode, temp, inner);
+	    }
+	}
+if (vec_series_p (op, &base, &step))
+	{
+	  /* Only create a new series if we can simplify both parts.  In other
+	     cases this isn't really a simplification, and it's not necessarily
+	     a win to replace a vector operation with a scalar operation.  */
+	  scalar_mode inner_mode = GET_MODE_INNER (mode);
+	  base = simplify_unary_operation (NEG, inner_mode, base, inner_mode);
+	  if (base)
+	    {
+	      step = simplify_unary_operation (NEG, inner_mode,
+					       step, inner_mode);
+	      if (step)
+		return gen_vec_series (mode, base, step);
 	    }
 	}
 break;
 case TRUNCATE:
 	    return temp;
 	}
 /* If we know that the value is already truncated, we can
 	 replace the TRUNCATE with a SUBREG.  */
-if (GET_MODE_NUNITS (mode) == 1
+if (known_eq (GET_MODE_NUNITS (mode), 1)
 	  && (TRULY_NOOP_TRUNCATION_MODES_P (mode, GET_MODE (op))
 	      || truncated_to_mode (mode, op)))
 	{
 	  temp = rtl_hooks.gen_lowpart_no_emit (mode, op);
 	  if (temp)
 if (GET_CODE (op) == SUBREG
 	  && SUBREG_PROMOTED_VAR_P (op)
 	  && SUBREG_PROMOTED_SIGNED_P (op)
 	  && !paradoxical_subreg_p (mode, GET_MODE (SUBREG_REG (op))))
 	{
-	  temp = rtl_hooks.gen_lowpart_no_emit (mode, op);
+	  temp = rtl_hooks.gen_lowpart_no_emit (mode, SUBREG_REG (op));
 	  if (temp)
 	    return temp;
 	}
 /* (sign_extend:M (sign_extend:N <X>)) is (sign_extend:M <X>).
 if (GET_CODE (op) == SUBREG
 	  && SUBREG_PROMOTED_VAR_P (op)
 	  && SUBREG_PROMOTED_UNSIGNED_P (op)
 	  && !paradoxical_subreg_p (mode, GET_MODE (SUBREG_REG (op))))
 	{
-	  temp = rtl_hooks.gen_lowpart_no_emit (mode, op);
+	  temp = rtl_hooks.gen_lowpart_no_emit (mode, SUBREG_REG (op));
 	  if (temp)
 	    return temp;
 	}
 /* Extending a widening multiplication should be canonicalized to
 default:
 break;
 }
+if (VECTOR_MODE_P (mode)
+&& vec_duplicate_p (op, &elt)
+&& code != VEC_DUPLICATE)
+{
+/* Try applying the operator to ELT and see if that simplifies.
+	 We can duplicate the result if so.
+	 The reason we don't use simplify_gen_unary is that it isn't
+	 necessarily a win to convert things like:
+	   (neg:V (vec_duplicate:V (reg:S R)))
+	 to:
+	   (vec_duplicate:V (neg:S (reg:S R)))
+	 The first might be done entirely in vector registers while the
+	 second might need a move between register files.  */
+temp = simplify_unary_operation (code, GET_MODE_INNER (mode),
+				       elt, GET_MODE_INNER (GET_MODE (op)));
+if (temp)
+	return gen_vec_duplicate (mode, temp);
+}
 return 0;
 }
 /* Try to compute the value of a unary operation CODE whose output mode is to
 be MODE with input operand OP whose mode was originally OP_MODE.
 	  gcc_assert (GET_MODE_INNER (mode) == GET_MODE (op));
 	else
 	  gcc_assert (GET_MODE_INNER (mode) == GET_MODE_INNER
 						(GET_MODE (op)));
 }
-if (CONST_SCALAR_INT_P (op) || CONST_DOUBLE_AS_FLOAT_P (op)
+if (CONST_SCALAR_INT_P (op) || CONST_DOUBLE_AS_FLOAT_P (op))
-	  || GET_CODE (op) == CONST_VECTOR)
+	return gen_const_vec_duplicate (mode, op);
-	{
+unsigned int n_elts;
-	  int elt_size = GET_MODE_UNIT_SIZE (mode);
+if (GET_CODE (op) == CONST_VECTOR
-unsigned n_elts = (GET_MODE_SIZE (mode) / elt_size);
+	  && GET_MODE_NUNITS (mode).is_constant (&n_elts))
+	{
+	  /* This must be constant if we're duplicating it to a constant
+	     number of elements.  */
+	  unsigned int in_n_elts = CONST_VECTOR_NUNITS (op).to_constant ();
+	  gcc_assert (in_n_elts < n_elts);
+	  gcc_assert ((n_elts % in_n_elts) == 0);
 	  rtvec v = rtvec_alloc (n_elts);
-	  unsigned int i;
+	  for (unsigned i = 0; i < n_elts; i++)
+	    RTVEC_ELT (v, i) = CONST_VECTOR_ELT (op, i % in_n_elts);
-	  if (GET_CODE (op) != CONST_VECTOR)
-	    for (i = 0; i < n_elts; i++)
-	      RTVEC_ELT (v, i) = op;
-	  else
-	    {
-	      machine_mode inmode = GET_MODE (op);
-	      int in_elt_size = GET_MODE_UNIT_SIZE (inmode);
-unsigned in_n_elts = (GET_MODE_SIZE (inmode) / in_elt_size);
-	      gcc_assert (in_n_elts < n_elts);
-	      gcc_assert ((n_elts % in_n_elts) == 0);
-	      for (i = 0; i < n_elts; i++)
-	        RTVEC_ELT (v, i) = CONST_VECTOR_ELT (op, i % in_n_elts);
-	    }
 	  return gen_rtx_CONST_VECTOR (mode, v);
 	}
 }
 if (VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
 {
-int elt_size = GET_MODE_UNIT_SIZE (mode);
+unsigned int n_elts;
-unsigned n_elts = (GET_MODE_SIZE (mode) / elt_size);
+if (!CONST_VECTOR_NUNITS (op).is_constant (&n_elts))
+	return NULL_RTX;
 machine_mode opmode = GET_MODE (op);
-int op_elt_size = GET_MODE_UNIT_SIZE (opmode);
+gcc_assert (known_eq (GET_MODE_NUNITS (mode), n_elts));
-unsigned op_n_elts = (GET_MODE_SIZE (opmode) / op_elt_size);
+gcc_assert (known_eq (GET_MODE_NUNITS (opmode), n_elts));
 rtvec v = rtvec_alloc (n_elts);
 unsigned int i;
-gcc_assert (op_n_elts == n_elts);
 for (i = 0; i < n_elts; i++)
 	{
 	  rtx x = simplify_unary_operation (code, GET_MODE_INNER (mode),
 					    CONST_VECTOR_ELT (op, i),
 					    GET_MODE_INNER (opmode));
-	  if (!x)
+	  if (!x || !valid_for_const_vector_p (mode, x))
 	    return 0;
 	  RTVEC_ELT (v, i) = x;
 	}
 return gen_rtx_CONST_VECTOR (mode, v);
 }
 	case CLZ:
 	  if (wi::ne_p (op0, 0))
 	    int_value = wi::clz (op0);
 	  else if (! CLZ_DEFINED_VALUE_AT_ZERO (imode, int_value))
-	    int_value = GET_MODE_PRECISION (imode);
+	    return NULL_RTX;
 	  result = wi::shwi (int_value, result_mode);
 	  break;
 	case CLRSB:
 	  result = wi::shwi (wi::clrsb (op0), result_mode);
 	case CTZ:
 	  if (wi::ne_p (op0, 0))
 	    int_value = wi::ctz (op0);
 	  else if (! CTZ_DEFINED_VALUE_AT_ZERO (imode, int_value))
-	    int_value = GET_MODE_PRECISION (imode);
+	    return NULL_RTX;
 	  result = wi::shwi (int_value, result_mode);
 	  break;
 	case POPCOUNT:
 	  result = wi::shwi (wi::popcount (op0), result_mode);
 	default:
 	  gcc_unreachable ();
 	}
 }
+/* Handle polynomial integers.  */
+else if (CONST_POLY_INT_P (op))
+{
+poly_wide_int result;
+switch (code)
+	{
+	case NEG:
+	  result = -const_poly_int_value (op);
+	  break;
+	case NOT:
+	  result = ~const_poly_int_value (op);
+	  break;
+	default:
+	  return NULL_RTX;
+	}
+return immed_wide_int_const (result, mode);
+}
 return NULL_RTX;
 }
 /* Subroutine of simplify_binary_operation to simplify a binary operation
 CODE that can commute with byte swapping, with result mode MODE and
 return simplify_gen_binary (code, mode, trueop0, trueop1);
 return NULL_RTX;
 }
+/* Subroutine of simplify_binary_operation_1 that looks for cases in
+which OP0 and OP1 are both vector series or vector duplicates
+(which are really just series with a step of 0).  If so, try to
+form a new series by applying CODE to the bases and to the steps.
+Return null if no simplification is possible.
+MODE is the mode of the operation and is known to be a vector
+integer mode.  */
+static rtx
+simplify_binary_operation_series (rtx_code code, machine_mode mode,
+				  rtx op0, rtx op1)
+{
+rtx base0, step0;
+if (vec_duplicate_p (op0, &base0))
+step0 = const0_rtx;
+else if (!vec_series_p (op0, &base0, &step0))
+return NULL_RTX;
+rtx base1, step1;
+if (vec_duplicate_p (op1, &base1))
+step1 = const0_rtx;
+else if (!vec_series_p (op1, &base1, &step1))
+return NULL_RTX;
+/* Only create a new series if we can simplify both parts.  In other
+cases this isn't really a simplification, and it's not necessarily
+a win to replace a vector operation with a scalar operation.  */
+scalar_mode inner_mode = GET_MODE_INNER (mode);
+rtx new_base = simplify_binary_operation (code, inner_mode, base0, base1);
+if (!new_base)
+return NULL_RTX;
+rtx new_step = simplify_binary_operation (code, inner_mode, step0, step1);
+if (!new_step)
+return NULL_RTX;
+return gen_vec_series (mode, new_base, new_step);
+}
 /* Subroutine of simplify_binary_operation.  Simplify a binary operation
 CODE with result mode MODE, operating on OP0 and OP1.  If OP0 and/or
 OP1 are constant pool references, TRUEOP0 and TRUEOP1 represent the
 actual constants.  */
 static rtx
 simplify_binary_operation_1 (enum rtx_code code, machine_mode mode,
 			     rtx op0, rtx op1, rtx trueop0, rtx trueop1)
 {
-rtx tem, reversed, opleft, opright;
+rtx tem, reversed, opleft, opright, elt0, elt1;
 HOST_WIDE_INT val;
 scalar_int_mode int_mode, inner_mode;
+poly_int64 offset;
 /* Even if we can't compute a constant result,
 there are some cases worth simplifying.  */
 switch (code)
 	 than HOST_BITS_PER_WIDE_INT.  */
 if ((GET_CODE (op0) == CONST
 	   || GET_CODE (op0) == SYMBOL_REF
 	   || GET_CODE (op0) == LABEL_REF)
-	  && CONST_INT_P (op1))
+	  && poly_int_rtx_p (op1, &offset))
-	return plus_constant (mode, op0, INTVAL (op1));
+	return plus_constant (mode, op0, offset);
 else if ((GET_CODE (op1) == CONST
 		|| GET_CODE (op1) == SYMBOL_REF
 		|| GET_CODE (op1) == LABEL_REF)
-	       && CONST_INT_P (op0))
+	       && poly_int_rtx_p (op0, &offset))
-	return plus_constant (mode, op1, INTVAL (op0));
+	return plus_constant (mode, op1, offset);
 /* See if this is something like X * C - X or vice versa or
 	 if the multiplication is written as a shift.  If so, we can
 	 distribute and make a new multiply, shift, or maybe just
 	 have X (if C is 2 in the example above).  But don't make
 	 specifies associative math operations.  */
 if (FLOAT_MODE_P (mode)
 	  && flag_associative_math)
 	{
 	  tem = simplify_associative_operation (code, mode, op0, op1);
+	  if (tem)
+	    return tem;
+	}
+/* Handle vector series.  */
+if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+	{
+	  tem = simplify_binary_operation_series (code, mode, op0, op1);
 	  if (tem)
 	    return tem;
 	}
 break;
 	  tem = simplify_unary_operation (NEG, mode, op1, mode);
 	  if (tem)
 	    return simplify_gen_binary (MINUS, mode, tem, XEXP (op0, 0));
 	}
+if ((GET_CODE (op0) == CONST
+	   || GET_CODE (op0) == SYMBOL_REF
+	   || GET_CODE (op0) == LABEL_REF)
+	  && poly_int_rtx_p (op1, &offset))
+	return plus_constant (mode, op0, trunc_int_for_mode (-offset, mode));
 /* Don't let a relocatable value get a negative coeff.  */
 if (CONST_INT_P (op1) && GET_MODE (op0) != VOIDmode)
 	return simplify_gen_binary (PLUS, mode,
 				    op0,
 				    neg_const_int (mode, op1));
 if (INTEGRAL_MODE_P (mode)
 	  && (plus_minus_operand_p (op0)
 	      || plus_minus_operand_p (op1))
 	  && (tem = simplify_plus_minus (code, mode, op0, op1)) != 0)
 	return tem;
+/* Handle vector series.  */
+if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+	{
+	  tem = simplify_binary_operation_series (code, mode, op0, op1);
+	  if (tem)
+	    return tem;
+	}
 break;
 case MULT:
 if (trueop1 == constm1_rtx)
 	return simplify_gen_unary (NEG, mode, op0, mode);
 /* Convert multiply by constant power of two into shift.  */
 if (CONST_SCALAR_INT_P (trueop1))
 	{
 	  val = wi::exact_log2 (rtx_mode_t (trueop1, mode));
 	  if (val >= 0)
-	    return simplify_gen_binary (ASHIFT, mode, op0, GEN_INT (val));
+	    return simplify_gen_binary (ASHIFT, mode, op0,
+					gen_int_shift_amount (mode, val));
 	}
 /* x*2 is x+x and x*(-1) is -x */
 if (CONST_DOUBLE_AS_FLOAT_P (trueop1)
 	  && SCALAR_FLOAT_MODE_P (GET_MODE (trueop1))
 	  && is_a <scalar_int_mode> (GET_MODE (SUBREG_REG (opleft)),
 				     &inner_mode)
 && GET_CODE (SUBREG_REG (opleft)) == ASHIFT
 && GET_CODE (opright) == LSHIFTRT
 && GET_CODE (XEXP (opright, 0)) == SUBREG
-&& SUBREG_BYTE (opleft) == SUBREG_BYTE (XEXP (opright, 0))
+	  && known_eq (SUBREG_BYTE (opleft), SUBREG_BYTE (XEXP (opright, 0)))
 	  && GET_MODE_SIZE (int_mode) < GET_MODE_SIZE (inner_mode)
 && rtx_equal_p (XEXP (SUBREG_REG (opleft), 0),
 SUBREG_REG (XEXP (opright, 0)))
 && CONST_INT_P (XEXP (SUBREG_REG (opleft), 1))
 && CONST_INT_P (XEXP (opright, 1))
 	    return tem;
 	}
 /* Convert divide by power of two into shift.  */
 if (CONST_INT_P (trueop1)
 	  && (val = exact_log2 (UINTVAL (trueop1))) > 0)
-	return simplify_gen_binary (LSHIFTRT, mode, op0, GEN_INT (val));
+	return simplify_gen_binary (LSHIFTRT, mode, op0,
+				    gen_int_shift_amount (mode, val));
 break;
 case DIV:
 /* Handle floating point and integers separately.  */
 if (SCALAR_FLOAT_MODE_P (mode))
 	}
 /* Implement modulus by power of two as AND.  */
 if (CONST_INT_P (trueop1)
 	  && exact_log2 (UINTVAL (trueop1)) > 0)
 	return simplify_gen_binary (AND, mode, op0,
-				    gen_int_mode (INTVAL (op1) - 1, mode));
+				    gen_int_mode (UINTVAL (trueop1) - 1,
+						  mode));
 break;
 case MOD:
 /* 0%x is 0 (or x&0 if x has side-effects).  */
 if (trueop0 == CONST0_RTX (mode))
 #if defined(HAVE_rotate) && defined(HAVE_rotatert)
 if (CONST_INT_P (trueop1)
 	  && IN_RANGE (INTVAL (trueop1),
 		       GET_MODE_UNIT_PRECISION (mode) / 2 + (code == ROTATE),
 		       GET_MODE_UNIT_PRECISION (mode) - 1))
-	return simplify_gen_binary (code == ROTATE ? ROTATERT : ROTATE,
+	{
-				    mode, op0,
+	  int new_amount = GET_MODE_UNIT_PRECISION (mode) - INTVAL (trueop1);
-				    GEN_INT (GET_MODE_UNIT_PRECISION (mode)
+	  rtx new_amount_rtx = gen_int_shift_amount (mode, new_amount);
-					     - INTVAL (trueop1)));
+	  return simplify_gen_binary (code == ROTATE ? ROTATERT : ROTATE,
+				      mode, op0, new_amount_rtx);
+	}
 #endif
 /* FALLTHRU */
 case ASHIFTRT:
 if (trueop1 == CONST0_RTX (mode))
 	return op0;
 	  && GET_MODE_BITSIZE (inner_mode) > GET_MODE_BITSIZE (int_mode)
 	  && (INTVAL (XEXP (SUBREG_REG (op0), 1))
 	      == GET_MODE_BITSIZE (inner_mode) - GET_MODE_BITSIZE (int_mode))
 	  && subreg_lowpart_p (op0))
 	{
-	  rtx tmp = GEN_INT (INTVAL (XEXP (SUBREG_REG (op0), 1))
+	  rtx tmp = gen_int_shift_amount
-			     + INTVAL (op1));
+	    (inner_mode, INTVAL (XEXP (SUBREG_REG (op0), 1)) + INTVAL (op1));
 	  tmp = simplify_gen_binary (code, inner_mode,
 				     XEXP (SUBREG_REG (op0), 0),
 				     tmp);
 	  return lowpart_subreg (int_mode, tmp, inner_mode);
 	}
 if (SHIFT_COUNT_TRUNCATED && CONST_INT_P (op1))
 	{
 	  val = INTVAL (op1) & (GET_MODE_UNIT_PRECISION (mode) - 1);
 	  if (val != INTVAL (op1))
-	    return simplify_gen_binary (code, mode, op0, GEN_INT (val));
+	    return simplify_gen_binary (code, mode, op0,
+					gen_int_shift_amount (mode, val));
 	}
 break;
 case ASHIFT:
 case SS_ASHIFT:
 case SS_DIV:
 case US_DIV:
 /* ??? There are simplifications that can be done.  */
 return 0;
+case VEC_SERIES:
+if (op1 == CONST0_RTX (GET_MODE_INNER (mode)))
+	return gen_vec_duplicate (mode, op0);
+if (valid_for_const_vector_p (mode, op0)
+	  && valid_for_const_vector_p (mode, op1))
+	return gen_const_vec_series (mode, op0, op1);
+return 0;
 case VEC_SELECT:
 if (!VECTOR_MODE_P (mode))
 	{
 	  gcc_assert (VECTOR_MODE_P (GET_MODE (trueop0)));
 	  gcc_assert (mode == GET_MODE_INNER (GET_MODE (trueop0)));
 	  gcc_assert (GET_CODE (trueop1) == PARALLEL);
 	  gcc_assert (XVECLEN (trueop1, 0) == 1);
-	  gcc_assert (CONST_INT_P (XVECEXP (trueop1, 0, 0)));
+	  /* We can't reason about selections made at runtime.  */
+	  if (!CONST_INT_P (XVECEXP (trueop1, 0, 0)))
+	    return 0;
+	  if (vec_duplicate_p (trueop0, &elt0))
+	    return elt0;
 	  if (GET_CODE (trueop0) == CONST_VECTOR)
 	    return CONST_VECTOR_ELT (trueop0, INTVAL (XVECEXP
 						      (trueop1, 0, 0)));
 	     (with optional nested VEC_CONCAT expression).  Some targets
 	     (i386) extract scalar element from a vector using chain of
 	     nested VEC_SELECT expressions.  When input operand is a memory
 	     operand, this operation can be simplified to a simple scalar
 	     load from an offseted memory address.  */
-	  if (GET_CODE (trueop0) == VEC_SELECT)
+	  int n_elts;
+	  if (GET_CODE (trueop0) == VEC_SELECT
+	      && (GET_MODE_NUNITS (GET_MODE (XEXP (trueop0, 0)))
+		  .is_constant (&n_elts)))
 	    {
 	      rtx op0 = XEXP (trueop0, 0);
 	      rtx op1 = XEXP (trueop0, 1);
-	      machine_mode opmode = GET_MODE (op0);
-	      int elt_size = GET_MODE_UNIT_SIZE (opmode);
-	      int n_elts = GET_MODE_SIZE (opmode) / elt_size;
 	      int i = INTVAL (XVECEXP (trueop1, 0, 0));
 	      int elem;
 	      rtvec vec;
 		  int n_elts00, n_elts01;
 		  mode00 = GET_MODE (op00);
 		  mode01 = GET_MODE (op01);
-		  /* Find out number of elements of each operand.  */
+		  /* Find out the number of elements of each operand.
-		  if (VECTOR_MODE_P (mode00))
+		     Since the concatenated result has a constant number
-		    {
+		     of elements, the operands must too.  */
-		      elt_size = GET_MODE_UNIT_SIZE (mode00);
+		  n_elts00 = GET_MODE_NUNITS (mode00).to_constant ();
-		      n_elts00 = GET_MODE_SIZE (mode00) / elt_size;
+		  n_elts01 = GET_MODE_NUNITS (mode01).to_constant ();
-		    }
-		  else
-		    n_elts00 = 1;
-		  if (VECTOR_MODE_P (mode01))
-		    {
-		      elt_size = GET_MODE_UNIT_SIZE (mode01);
-		      n_elts01 = GET_MODE_SIZE (mode01) / elt_size;
-		    }
-		  else
-		    n_elts01 = 1;
 		  gcc_assert (n_elts == n_elts00 + n_elts01);
 		  /* Select correct operand of VEC_CONCAT
 		     and adjust selector. */
 	      tmp = gen_rtx_fmt_ee (code, mode,
 				    tmp_op, gen_rtx_PARALLEL (VOIDmode, vec));
 	      return tmp;
 	    }
-	  if (GET_CODE (trueop0) == VEC_DUPLICATE
-	      && GET_MODE (XEXP (trueop0, 0)) == mode)
-	    return XEXP (trueop0, 0);
 	}
 else
 	{
 	  gcc_assert (VECTOR_MODE_P (GET_MODE (trueop0)));
 	  gcc_assert (GET_MODE_INNER (mode)
 		      == GET_MODE_INNER (GET_MODE (trueop0)));
 	  gcc_assert (GET_CODE (trueop1) == PARALLEL);
+	  if (vec_duplicate_p (trueop0, &elt0))
+	    /* It doesn't matter which elements are selected by trueop1,
+	       because they are all the same.  */
+	    return gen_vec_duplicate (mode, elt0);
 	  if (GET_CODE (trueop0) == CONST_VECTOR)
 	    {
-	      int elt_size = GET_MODE_UNIT_SIZE (mode);
+	      unsigned n_elts = XVECLEN (trueop1, 0);
-	      unsigned n_elts = (GET_MODE_SIZE (mode) / elt_size);
 	      rtvec v = rtvec_alloc (n_elts);
 	      unsigned int i;
-	      gcc_assert (XVECLEN (trueop1, 0) == (int) n_elts);
+	      gcc_assert (known_eq (n_elts, GET_MODE_NUNITS (mode)));
 	      for (i = 0; i < n_elts; i++)
 		{
 		  rtx x = XVECEXP (trueop1, 0, i);
-		  gcc_assert (CONST_INT_P (x));
+		  if (!CONST_INT_P (x))
+		    return 0;
 		  RTVEC_ELT (v, i) = CONST_VECTOR_ELT (trueop0,
 						       INTVAL (x));
 		}
 	      return gen_rtx_CONST_VECTOR (mode, v);
 	      return simplify_gen_binary (VEC_CONCAT, mode, subop0, subop1);
 	    }
 	  /* If we select one half of a vec_concat, return that.  */
+	  int l0, l1;
 	  if (GET_CODE (trueop0) == VEC_CONCAT
+	      && (GET_MODE_NUNITS (GET_MODE (XEXP (trueop0, 0)))
+		  .is_constant (&l0))
+	      && (GET_MODE_NUNITS (GET_MODE (XEXP (trueop0, 1)))
+		  .is_constant (&l1))
 	      && CONST_INT_P (XVECEXP (trueop1, 0, 0)))
 	    {
 	      rtx subop0 = XEXP (trueop0, 0);
 	      rtx subop1 = XEXP (trueop0, 1);
 	      machine_mode mode0 = GET_MODE (subop0);
 	      machine_mode mode1 = GET_MODE (subop1);
-	      int li = GET_MODE_UNIT_SIZE (mode0);
-	      int l0 = GET_MODE_SIZE (mode0) / li;
-	      int l1 = GET_MODE_SIZE (mode1) / li;
 	      int i0 = INTVAL (XVECEXP (trueop1, 0, 0));
 	      if (i0 == 0 && !side_effects_p (op1) && mode == mode0)
 		{
 		  bool success = true;
 		  for (int i = 1; i < l0; ++i)
 if (XVECLEN (trueop1, 0) == 1
 	  && CONST_INT_P (XVECEXP (trueop1, 0, 0))
 	  && GET_CODE (trueop0) == VEC_CONCAT)
 	{
 	  rtx vec = trueop0;
-	  int offset = INTVAL (XVECEXP (trueop1, 0, 0)) * GET_MODE_SIZE (mode);
+	  offset = INTVAL (XVECEXP (trueop1, 0, 0)) * GET_MODE_SIZE (mode);
 	  /* Try to find the element in the VEC_CONCAT.  */
 	  while (GET_MODE (vec) != mode
 		 && GET_CODE (vec) == VEC_CONCAT)
 	    {
-	      HOST_WIDE_INT vec_size;
+	      poly_int64 vec_size;
 	      if (CONST_INT_P (XEXP (vec, 0)))
 	        {
 	          /* vec_concat of two const_ints doesn't make sense with
 	             respect to modes.  */
 	                     - GET_MODE_SIZE (GET_MODE (XEXP (vec, 1)));
 	        }
 	      else
 	        vec_size = GET_MODE_SIZE (GET_MODE (XEXP (vec, 0)));
-	      if (offset < vec_size)
+	      if (known_lt (offset, vec_size))
 		vec = XEXP (vec, 0);
-	      else
+	      else if (known_ge (offset, vec_size))
 		{
 		  offset -= vec_size;
 		  vec = XEXP (vec, 1);
 		}
+	      else
+		break;
 	      vec = avoid_constant_pool_reference (vec);
 	    }
 	  if (GET_MODE (vec) == mode)
 	    return vec;
 if (GET_CODE (trueop0) == VEC_SELECT
 	  && GET_MODE (XEXP (trueop0, 0)) == mode)
 	{
 	  rtx op0_subop1 = XEXP (trueop0, 1);
 	  gcc_assert (GET_CODE (op0_subop1) == PARALLEL);
-	  gcc_assert (XVECLEN (trueop1, 0) == GET_MODE_NUNITS (mode));
+	  gcc_assert (known_eq (XVECLEN (trueop1, 0), GET_MODE_NUNITS (mode)));
 	  /* Apply the outer ordering vector to the inner one.  (The inner
 	     ordering vector is expressly permitted to be of a different
 	     length than the outer one.)  If the result is { 0, 1, ..., n-1 }
 	     then the two VEC_SELECTs cancel.  */
 	machine_mode op1_mode = (GET_MODE (trueop1) != VOIDmode
 				      ? GET_MODE (trueop1)
 				      : GET_MODE_INNER (mode));
 	gcc_assert (VECTOR_MODE_P (mode));
-	gcc_assert (GET_MODE_SIZE (op0_mode) + GET_MODE_SIZE (op1_mode)
+	gcc_assert (known_eq (GET_MODE_SIZE (op0_mode)
-		    == GET_MODE_SIZE (mode));
+			      + GET_MODE_SIZE (op1_mode),
+			      GET_MODE_SIZE (mode)));
 	if (VECTOR_MODE_P (op0_mode))
 	  gcc_assert (GET_MODE_INNER (mode)
 		      == GET_MODE_INNER (op0_mode));
 	else
 	  gcc_assert (GET_MODE_INNER (mode)
 		      == GET_MODE_INNER (op1_mode));
 	else
 	  gcc_assert (GET_MODE_INNER (mode) == op1_mode);
+	unsigned int n_elts, in_n_elts;
 	if ((GET_CODE (trueop0) == CONST_VECTOR
 	     || CONST_SCALAR_INT_P (trueop0)
 	     || CONST_DOUBLE_AS_FLOAT_P (trueop0))
 	    && (GET_CODE (trueop1) == CONST_VECTOR
 		|| CONST_SCALAR_INT_P (trueop1)
-		|| CONST_DOUBLE_AS_FLOAT_P (trueop1)))
+		|| CONST_DOUBLE_AS_FLOAT_P (trueop1))
+	    && GET_MODE_NUNITS (mode).is_constant (&n_elts)
+	    && GET_MODE_NUNITS (op0_mode).is_constant (&in_n_elts))
 	  {
-	    int elt_size = GET_MODE_UNIT_SIZE (mode);
-	    unsigned n_elts = (GET_MODE_SIZE (mode) / elt_size);
 	    rtvec v = rtvec_alloc (n_elts);
 	    unsigned int i;
-	    unsigned in_n_elts = 1;
-	    if (VECTOR_MODE_P (op0_mode))
-	      in_n_elts = (GET_MODE_SIZE (op0_mode) / elt_size);
 	    for (i = 0; i < n_elts; i++)
 	      {
 		if (i < in_n_elts)
 		  {
 		    if (!VECTOR_MODE_P (op0_mode))
 default:
 gcc_unreachable ();
 }
+if (mode == GET_MODE (op0)
+&& mode == GET_MODE (op1)
+&& vec_duplicate_p (op0, &elt0)
+&& vec_duplicate_p (op1, &elt1))
+{
+/* Try applying the operator to ELT and see if that simplifies.
+	 We can duplicate the result if so.
+	 The reason we don't use simplify_gen_binary is that it isn't
+	 necessarily a win to convert things like:
+	   (plus:V (vec_duplicate:V (reg:S R1))
+		   (vec_duplicate:V (reg:S R2)))
+	 to:
+	   (vec_duplicate:V (plus:S (reg:S R1) (reg:S R2)))
+	 The first might be done entirely in vector registers while the
+	 second might need a move between register files.  */
+tem = simplify_binary_operation (code, GET_MODE_INNER (mode),
+				       elt0, elt1);
+if (tem)
+	return gen_vec_duplicate (mode, tem);
+}
 return 0;
 }
 rtx
 simplify_const_binary_operation (enum rtx_code code, machine_mode mode,
 if (VECTOR_MODE_P (mode)
 && code != VEC_CONCAT
 && GET_CODE (op0) == CONST_VECTOR
 && GET_CODE (op1) == CONST_VECTOR)
 {
-unsigned n_elts = GET_MODE_NUNITS (mode);
+unsigned int n_elts;
-machine_mode op0mode = GET_MODE (op0);
+if (!CONST_VECTOR_NUNITS (op0).is_constant (&n_elts))
-unsigned op0_n_elts = GET_MODE_NUNITS (op0mode);
+	return NULL_RTX;
-machine_mode op1mode = GET_MODE (op1);
-unsigned op1_n_elts = GET_MODE_NUNITS (op1mode);
+gcc_assert (known_eq (n_elts, CONST_VECTOR_NUNITS (op1)));
+gcc_assert (known_eq (n_elts, GET_MODE_NUNITS (mode)));
 rtvec v = rtvec_alloc (n_elts);
 unsigned int i;
-gcc_assert (op0_n_elts == n_elts);
-gcc_assert (op1_n_elts == n_elts);
 for (i = 0; i < n_elts; i++)
 	{
 	  rtx x = simplify_binary_operation (code, GET_MODE_INNER (mode),
 					     CONST_VECTOR_ELT (op0, i),
 					     CONST_VECTOR_ELT (op1, i));
-	  if (!x)
+	  if (!x || !valid_for_const_vector_p (mode, x))
 	    return 0;
 	  RTVEC_ELT (v, i) = x;
 	}
 return gen_rtx_CONST_VECTOR (mode, v);
 }
 if (VECTOR_MODE_P (mode)
 && code == VEC_CONCAT
 && (CONST_SCALAR_INT_P (op0)
-	  || GET_CODE (op0) == CONST_FIXED
+	  || CONST_FIXED_P (op0)
 	  || CONST_DOUBLE_AS_FLOAT_P (op0))
 && (CONST_SCALAR_INT_P (op1)
 	  || CONST_DOUBLE_AS_FLOAT_P (op1)
-	  || GET_CODE (op1) == CONST_FIXED))
+	  || CONST_FIXED_P (op1)))
 {
-unsigned n_elts = GET_MODE_NUNITS (mode);
+/* Both inputs have a constant number of elements, so the result
+	 must too.  */
+unsigned n_elts = GET_MODE_NUNITS (mode).to_constant ();
 rtvec v = rtvec_alloc (n_elts);
 gcc_assert (n_elts >= 2);
 if (n_elts == 2)
 	{
 	  RTVEC_ELT (v, 0) = op0;
 	  RTVEC_ELT (v, 1) = op1;
 	}
 else
 	{
-	  unsigned op0_n_elts = GET_MODE_NUNITS (GET_MODE (op0));
+	  unsigned op0_n_elts = GET_MODE_NUNITS (GET_MODE (op0)).to_constant ();
-	  unsigned op1_n_elts = GET_MODE_NUNITS (GET_MODE (op1));
+	  unsigned op1_n_elts = GET_MODE_NUNITS (GET_MODE (op1)).to_constant ();
 	  unsigned i;
 	  gcc_assert (GET_CODE (op0) == CONST_VECTOR);
 	  gcc_assert (GET_CODE (op1) == CONST_VECTOR);
 	  gcc_assert (op0_n_elts + op1_n_elts == n_elts);
 	  for (i = 0; i < op0_n_elts; ++i)
-	    RTVEC_ELT (v, i) = XVECEXP (op0, 0, i);
+	    RTVEC_ELT (v, i) = CONST_VECTOR_ELT (op0, i);
 	  for (i = 0; i < op1_n_elts; ++i)
-	    RTVEC_ELT (v, op0_n_elts+i) = XVECEXP (op1, 0, i);
+	    RTVEC_ELT (v, op0_n_elts+i) = CONST_VECTOR_ELT (op1, i);
 	}
 return gen_rtx_CONST_VECTOR (mode, v);
 }
 if (is_a <scalar_int_mode> (mode, &int_mode)
 && CONST_SCALAR_INT_P (op0)
 && CONST_SCALAR_INT_P (op1))
 {
 wide_int result;
-bool overflow;
+wi::overflow_type overflow;
 rtx_mode_t pop0 = rtx_mode_t (op0, int_mode);
 rtx_mode_t pop1 = rtx_mode_t (op1, int_mode);
 #if TARGET_SUPPORTS_WIDE_INT == 0
 /* This assert keeps the simplification from producing a result
 	      default:
 		gcc_unreachable ();
 	      }
 	    break;
 	  }
+	default:
+	  return NULL_RTX;
+	}
+return immed_wide_int_const (result, int_mode);
+}
+/* Handle polynomial integers.  */
+if (NUM_POLY_INT_COEFFS > 1
+&& is_a <scalar_int_mode> (mode, &int_mode)
+&& poly_int_rtx_p (op0)
+&& poly_int_rtx_p (op1))
+{
+poly_wide_int result;
+switch (code)
+	{
+	case PLUS:
+	  result = wi::to_poly_wide (op0, mode) + wi::to_poly_wide (op1, mode);
+	  break;
+	case MINUS:
+	  result = wi::to_poly_wide (op0, mode) - wi::to_poly_wide (op1, mode);
+	  break;
+	case MULT:
+	  if (CONST_SCALAR_INT_P (op1))
+	    result = wi::to_poly_wide (op0, mode) * rtx_mode_t (op1, mode);
+	  else
+	    return NULL_RTX;
+	  break;
+	case ASHIFT:
+	  if (CONST_SCALAR_INT_P (op1))
+	    {
+	      wide_int shift = rtx_mode_t (op1, mode);
+	      if (SHIFT_COUNT_TRUNCATED)
+		shift = wi::umod_trunc (shift, GET_MODE_PRECISION (int_mode));
+	      else if (wi::geu_p (shift, GET_MODE_PRECISION (int_mode)))
+		return NULL_RTX;
+	      result = wi::to_poly_wide (op0, mode) << shift;
+	    }
+	  else
+	    return NULL_RTX;
+	  break;
+	case IOR:
+	  if (!CONST_SCALAR_INT_P (op1)
+	      || !can_ior_p (wi::to_poly_wide (op0, mode),
+			     rtx_mode_t (op1, mode), &result))
+	    return NULL_RTX;
+	  break;
 	default:
 	  return NULL_RTX;
 	}
 return immed_wide_int_const (result, int_mode);
 }
 	{
 	  if (tem == const0_rtx)
 	    return CONST0_RTX (mode);
 #ifdef VECTOR_STORE_FLAG_VALUE
 	  {
-	    int i, units;
-	    rtvec v;
 	    rtx val = VECTOR_STORE_FLAG_VALUE (mode);
 	    if (val == NULL_RTX)
 	      return NULL_RTX;
 	    if (val == const1_rtx)
 	      return CONST1_RTX (mode);
-	    units = GET_MODE_NUNITS (mode);
+	    return gen_const_vec_duplicate (mode, val);
-	    v = rtvec_alloc (units);
-	    for (i = 0; i < units; i++)
-	      RTVEC_ELT (v, i) = val;
-	    return gen_rtx_raw_CONST_VECTOR (mode, v);
 	  }
 #else
 	  return NULL_RTX;
 #endif
 	}
 && CONST_SCALAR_INT_P (XEXP (op0, 1)))
 return simplify_gen_relational (code, mode, cmp_mode, XEXP (op0, 0),
 				    simplify_gen_binary (XOR, cmp_mode,
 							 XEXP (op0, 1), op1));
-/* (eq/ne (and x y) x) simplifies to (eq/ne (and (not y) x) 0), which
+/* Simplify eq/ne (and/ior x y) x/y) for targets with a BICS instruction or
-can be implemented with a BICS instruction on some targets, or
+constant folding if x/y is a constant.  */
-constant-folded if y is a constant.  */
 if ((code == EQ || code == NE)
-&& op0code == AND
+&& (op0code == AND || op0code == IOR)
-&& rtx_equal_p (XEXP (op0, 0), op1)
 && !side_effects_p (op1)
 && op1 != CONST0_RTX (cmp_mode))
 {
-rtx not_y = simplify_gen_unary (NOT, cmp_mode, XEXP (op0, 1), cmp_mode);
+/* Both (eq/ne (and x y) x) and (eq/ne (ior x y) y) simplify to
-rtx lhs = simplify_gen_binary (AND, cmp_mode, not_y, XEXP (op0, 0));
+	 (eq/ne (and (not y) x) 0).  */
+if ((op0code == AND && rtx_equal_p (XEXP (op0, 0), op1))
-return simplify_gen_relational (code, mode, cmp_mode, lhs,
+	  || (op0code == IOR && rtx_equal_p (XEXP (op0, 1), op1)))
-				      CONST0_RTX (cmp_mode));
+	{
-}
+	  rtx not_y = simplify_gen_unary (NOT, cmp_mode, XEXP (op0, 1),
+					  cmp_mode);
-/* Likewise for (eq/ne (and x y) y).  */
+	  rtx lhs = simplify_gen_binary (AND, cmp_mode, not_y, XEXP (op0, 0));
-if ((code == EQ || code == NE)
-&& op0code == AND
+	  return simplify_gen_relational (code, mode, cmp_mode, lhs,
-&& rtx_equal_p (XEXP (op0, 1), op1)
+					  CONST0_RTX (cmp_mode));
-&& !side_effects_p (op1)
+	}
-&& op1 != CONST0_RTX (cmp_mode))
-{
+/* Both (eq/ne (and x y) y) and (eq/ne (ior x y) x) simplify to
-rtx not_x = simplify_gen_unary (NOT, cmp_mode, XEXP (op0, 0), cmp_mode);
+	 (eq/ne (and (not x) y) 0).  */
-rtx lhs = simplify_gen_binary (AND, cmp_mode, not_x, XEXP (op0, 1));
+if ((op0code == AND && rtx_equal_p (XEXP (op0, 1), op1))
+	  || (op0code == IOR && rtx_equal_p (XEXP (op0, 0), op1)))
-return simplify_gen_relational (code, mode, cmp_mode, lhs,
+	{
-				      CONST0_RTX (cmp_mode));
+	  rtx not_x = simplify_gen_unary (NOT, cmp_mode, XEXP (op0, 0),
+					  cmp_mode);
+	  rtx lhs = simplify_gen_binary (AND, cmp_mode, not_x, XEXP (op0, 1));
+	  return simplify_gen_relational (code, mode, cmp_mode, lhs,
+					  CONST0_RTX (cmp_mode));
+	}
 }
 /* (eq/ne (bswap x) C1) simplifies to (eq/ne x C2) with C2 swapped.  */
 if ((code == EQ || code == NE)
 && GET_CODE (op0) == BSWAP
 if (INTEGRAL_MODE_P (mode) && trueop1 != const0_rtx
 && (code == EQ || code == NE)
 && ! ((REG_P (op0) || CONST_INT_P (trueop0))
 	    && (REG_P (op1) || CONST_INT_P (trueop1)))
-&& 0 != (tem = simplify_binary_operation (MINUS, mode, op0, op1))
+&& (tem = simplify_binary_operation (MINUS, mode, op0, op1)) != 0
 /* We cannot do this if tem is a nonzero address.  */
 && ! nonzero_address_p (tem))
 return simplify_const_relational_operation (signed_condition (code),
 						mode, tem, const0_rtx);
 return on_nonzero;
 return NULL_RTX;
 }
+/* Try to simplify X given that it appears within operand OP of a
+VEC_MERGE operation whose mask is MASK.  X need not use the same
+vector mode as the VEC_MERGE, but it must have the same number of
+elements.
+Return the simplified X on success, otherwise return NULL_RTX.  */
+rtx
+simplify_merge_mask (rtx x, rtx mask, int op)
+{
+gcc_assert (VECTOR_MODE_P (GET_MODE (x)));
+poly_uint64 nunits = GET_MODE_NUNITS (GET_MODE (x));
+if (GET_CODE (x) == VEC_MERGE && rtx_equal_p (XEXP (x, 2), mask))
+{
+if (side_effects_p (XEXP (x, 1 - op)))
+	return NULL_RTX;
+return XEXP (x, op);
+}
+if (UNARY_P (x)
+&& VECTOR_MODE_P (GET_MODE (XEXP (x, 0)))
+&& known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits))
+{
+rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op);
+if (top0)
+	return simplify_gen_unary (GET_CODE (x), GET_MODE (x), top0,
+				   GET_MODE (XEXP (x, 0)));
+}
+if (BINARY_P (x)
+&& VECTOR_MODE_P (GET_MODE (XEXP (x, 0)))
+&& known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits)
+&& VECTOR_MODE_P (GET_MODE (XEXP (x, 1)))
+&& known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 1))), nunits))
+{
+rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op);
+rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op);
+if (top0 || top1)
+	return simplify_gen_binary (GET_CODE (x), GET_MODE (x),
+				    top0 ? top0 : XEXP (x, 0),
+				    top1 ? top1 : XEXP (x, 1));
+}
+if (GET_RTX_CLASS (GET_CODE (x)) == RTX_TERNARY
+&& VECTOR_MODE_P (GET_MODE (XEXP (x, 0)))
+&& known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits)
+&& VECTOR_MODE_P (GET_MODE (XEXP (x, 1)))
+&& known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 1))), nunits)
+&& VECTOR_MODE_P (GET_MODE (XEXP (x, 2)))
+&& known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 2))), nunits))
+{
+rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op);
+rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op);
+rtx top2 = simplify_merge_mask (XEXP (x, 2), mask, op);
+if (top0 || top1 || top2)
+	return simplify_gen_ternary (GET_CODE (x), GET_MODE (x),
+				     GET_MODE (XEXP (x, 0)),
+				     top0 ? top0 : XEXP (x, 0),
+				     top1 ? top1 : XEXP (x, 1),
+				     top2 ? top2 : XEXP (x, 2));
+}
+return NULL_RTX;
+}
 /* Simplify CODE, an operation with result mode MODE and three operands,
 OP0, OP1, and OP2.  OP0_MODE was the mode of OP0 before it became
 a constant.  Return 0 if no simplifications is possible.  */
 			    rtx op2)
 {
 bool any_change = false;
 rtx tem, trueop2;
 scalar_int_mode int_mode, int_op0_mode;
+unsigned int n_elts;
 switch (code)
 {
 case FMA:
 /* Simplify negations around the multiplication.  */
 	  && GET_CODE (op0) == NE
 	  && GET_CODE (XEXP (op0, 0)) == NOT
 	  && GET_CODE (XEXP (op0, 1)) == CONST_VECTOR)
 	{
 	  rtx cv = XEXP (op0, 1);
-	  int nunits = CONST_VECTOR_NUNITS (cv);
+	  int nunits;
 	  bool ok = true;
-	  for (int i = 0; i < nunits; ++i)
+	  if (!CONST_VECTOR_NUNITS (cv).is_constant (&nunits))
-	    if (CONST_VECTOR_ELT (cv, i) != const0_rtx)
+	    ok = false;
-	      {
+	  else
-		ok = false;
+	    for (int i = 0; i < nunits; ++i)
-		break;
+	      if (CONST_VECTOR_ELT (cv, i) != const0_rtx)
-	      }
+		{
+		  ok = false;
+		  break;
+		}
 	  if (ok)
 	    {
 	      rtx new_op0 = gen_rtx_NE (GET_MODE (op0),
 					XEXP (XEXP (op0, 0), 0),
 					XEXP (op0, 1));
 	      return simplify_gen_relational (code, mode, cmp_mode,
 					      XEXP (op0, 0), XEXP (op0, 1));
 	    }
-	  if (cmp_mode == VOIDmode)
-	    cmp_mode = op0_mode;
 	  temp = simplify_relational_operation (GET_CODE (op0), op0_mode,
 			  			cmp_mode, XEXP (op0, 0),
 						XEXP (op0, 1));
 	  /* See if any simplifications were possible.  */
 case VEC_MERGE:
 gcc_assert (GET_MODE (op0) == mode);
 gcc_assert (GET_MODE (op1) == mode);
 gcc_assert (VECTOR_MODE_P (mode));
 trueop2 = avoid_constant_pool_reference (op2);
-if (CONST_INT_P (trueop2))
+if (CONST_INT_P (trueop2)
-	{
+	  && GET_MODE_NUNITS (mode).is_constant (&n_elts))
-	  int elt_size = GET_MODE_UNIT_SIZE (mode);
+	{
-	  unsigned n_elts = (GET_MODE_SIZE (mode) / elt_size);
 	  unsigned HOST_WIDE_INT sel = UINTVAL (trueop2);
 	  unsigned HOST_WIDE_INT mask;
 	  if (n_elts == HOST_BITS_PER_WIDE_INT)
 	    mask = -1;
 	  else
 	  /* Replace (vec_merge (vec_duplicate (vec_select a parallel (i))) a 1 << i)
 	     with a.  */
 	  if (GET_CODE (op0) == VEC_DUPLICATE
 	      && GET_CODE (XEXP (op0, 0)) == VEC_SELECT
 	      && GET_CODE (XEXP (XEXP (op0, 0), 1)) == PARALLEL
-	      && mode_nunits[GET_MODE (XEXP (op0, 0))] == 1)
+	      && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (op0, 0))), 1))
 	    {
 	      tem = XVECEXP ((XEXP (XEXP (op0, 0), 1)), 0, 0);
 	      if (CONST_INT_P (tem) && CONST_INT_P (op2))
 		{
 		  if (XEXP (XEXP (op0, 0), 0) == op1
 		      && UINTVAL (op2) == HOST_WIDE_INT_1U << UINTVAL (tem))
 		    return op1;
 		}
 	    }
+	  /* Replace (vec_merge (vec_duplicate (X)) (const_vector [A, B])
+	     (const_int N))
+	     with (vec_concat (X) (B)) if N == 1 or
+	     (vec_concat (A) (X)) if N == 2.  */
+	  if (GET_CODE (op0) == VEC_DUPLICATE
+	      && GET_CODE (op1) == CONST_VECTOR
+	      && known_eq (CONST_VECTOR_NUNITS (op1), 2)
+	      && known_eq (GET_MODE_NUNITS (GET_MODE (op0)), 2)
+	      && IN_RANGE (sel, 1, 2))
+	    {
+	      rtx newop0 = XEXP (op0, 0);
+	      rtx newop1 = CONST_VECTOR_ELT (op1, 2 - sel);
+	      if (sel == 2)
+		std::swap (newop0, newop1);
+	      return simplify_gen_binary (VEC_CONCAT, mode, newop0, newop1);
+	    }
+	  /* Replace (vec_merge (vec_duplicate x) (vec_concat (y) (z)) (const_int N))
+	     with (vec_concat x z) if N == 1, or (vec_concat y x) if N == 2.
+	     Only applies for vectors of two elements.  */
+	  if (GET_CODE (op0) == VEC_DUPLICATE
+	      && GET_CODE (op1) == VEC_CONCAT
+	      && known_eq (GET_MODE_NUNITS (GET_MODE (op0)), 2)
+	      && known_eq (GET_MODE_NUNITS (GET_MODE (op1)), 2)
+	      && IN_RANGE (sel, 1, 2))
+	    {
+	      rtx newop0 = XEXP (op0, 0);
+	      rtx newop1 = XEXP (op1, 2 - sel);
+	      rtx otherop = XEXP (op1, sel - 1);
+	      if (sel == 2)
+		std::swap (newop0, newop1);
+	      /* Don't want to throw away the other part of the vec_concat if
+		 it has side-effects.  */
+	      if (!side_effects_p (otherop))
+		return simplify_gen_binary (VEC_CONCAT, mode, newop0, newop1);
+	    }
+	  /* Replace:
+	      (vec_merge:outer (vec_duplicate:outer x:inner)
+			       (subreg:outer y:inner 0)
+			       (const_int N))
+	     with (vec_concat:outer x:inner y:inner) if N == 1,
+	     or (vec_concat:outer y:inner x:inner) if N == 2.
+	     Implicitly, this means we have a paradoxical subreg, but such
+	     a check is cheap, so make it anyway.
+	     Only applies for vectors of two elements.  */
+	  if (GET_CODE (op0) == VEC_DUPLICATE
+	      && GET_CODE (op1) == SUBREG
+	      && GET_MODE (op1) == GET_MODE (op0)
+	      && GET_MODE (SUBREG_REG (op1)) == GET_MODE (XEXP (op0, 0))
+	      && paradoxical_subreg_p (op1)
+	      && subreg_lowpart_p (op1)
+	      && known_eq (GET_MODE_NUNITS (GET_MODE (op0)), 2)
+	      && known_eq (GET_MODE_NUNITS (GET_MODE (op1)), 2)
+	      && IN_RANGE (sel, 1, 2))
+	    {
+	      rtx newop0 = XEXP (op0, 0);
+	      rtx newop1 = SUBREG_REG (op1);
+	      if (sel == 2)
+		std::swap (newop0, newop1);
+	      return simplify_gen_binary (VEC_CONCAT, mode, newop0, newop1);
+	    }
+	  /* Same as above but with switched operands:
+		Replace (vec_merge:outer (subreg:outer x:inner 0)
+					 (vec_duplicate:outer y:inner)
+			       (const_int N))
+	     with (vec_concat:outer x:inner y:inner) if N == 1,
+	     or (vec_concat:outer y:inner x:inner) if N == 2.  */
+	  if (GET_CODE (op1) == VEC_DUPLICATE
+	      && GET_CODE (op0) == SUBREG
+	      && GET_MODE (op0) == GET_MODE (op1)
+	      && GET_MODE (SUBREG_REG (op0)) == GET_MODE (XEXP (op1, 0))
+	      && paradoxical_subreg_p (op0)
+	      && subreg_lowpart_p (op0)
+	      && known_eq (GET_MODE_NUNITS (GET_MODE (op1)), 2)
+	      && known_eq (GET_MODE_NUNITS (GET_MODE (op0)), 2)
+	      && IN_RANGE (sel, 1, 2))
+	    {
+	      rtx newop0 = SUBREG_REG (op0);
+	      rtx newop1 = XEXP (op1, 0);
+	      if (sel == 2)
+		std::swap (newop0, newop1);
+	      return simplify_gen_binary (VEC_CONCAT, mode, newop0, newop1);
+	    }
+	  /* Replace (vec_merge (vec_duplicate x) (vec_duplicate y)
+				 (const_int n))
+	     with (vec_concat x y) or (vec_concat y x) depending on value
+	     of N.  */
+	  if (GET_CODE (op0) == VEC_DUPLICATE
+	      && GET_CODE (op1) == VEC_DUPLICATE
+	      && known_eq (GET_MODE_NUNITS (GET_MODE (op0)), 2)
+	      && known_eq (GET_MODE_NUNITS (GET_MODE (op1)), 2)
+	      && IN_RANGE (sel, 1, 2))
+	    {
+	      rtx newop0 = XEXP (op0, 0);
+	      rtx newop1 = XEXP (op1, 0);
+	      if (sel == 2)
+		std::swap (newop0, newop1);
+	      return simplify_gen_binary (VEC_CONCAT, mode, newop0, newop1);
+	    }
 	}
 if (rtx_equal_p (op0, op1)
 	  && !side_effects_p (op2) && !side_effects_p (op1))
 	return op0;
+if (!side_effects_p (op2))
+	{
+	  rtx top0 = simplify_merge_mask (op0, op2, 0);
+	  rtx top1 = simplify_merge_mask (op1, op2, 1);
+	  if (top0 || top1)
+	    return simplify_gen_ternary (code, mode, mode,
+					 top0 ? top0 : op0,
+					 top1 ? top1 : op1, op2);
+	}
 break;
 default:
 gcc_unreachable ();
 }
 /* Evaluate a SUBREG of a CONST_INT or CONST_WIDE_INT or CONST_DOUBLE
 or CONST_FIXED or CONST_VECTOR, returning another CONST_INT or
 CONST_WIDE_INT or CONST_DOUBLE or CONST_FIXED or CONST_VECTOR.
-Works by unpacking OP into a collection of 8-bit values
+Works by unpacking INNER_BYTES bytes of OP into a collection of 8-bit values
 represented as a little-endian array of 'unsigned char', selecting by BYTE,
-and then repacking them again for OUTERMODE.  */
+and then repacking them again for OUTERMODE.  If OP is a CONST_VECTOR,
+FIRST_ELEM is the number of the first element to extract, otherwise
+FIRST_ELEM is ignored.  */
 static rtx
-simplify_immed_subreg (machine_mode outermode, rtx op,
+simplify_immed_subreg (fixed_size_mode outermode, rtx op,
-		       machine_mode innermode, unsigned int byte)
+		       machine_mode innermode, unsigned int byte,
+		       unsigned int first_elem, unsigned int inner_bytes)
 {
 enum {
 value_bit = 8,
 value_mask = (1 << value_bit) - 1
 };
 if (COMPLEX_MODE_P (outermode))
 return NULL_RTX;
 /* We support any size mode.  */
 max_bitsize = MAX (GET_MODE_BITSIZE (outermode),
-		     GET_MODE_BITSIZE (innermode));
+		     inner_bytes * BITS_PER_UNIT);
 /* Unpack the value.  */
 if (GET_CODE (op) == CONST_VECTOR)
 {
-num_elem = CONST_VECTOR_NUNITS (op);
+num_elem = CEIL (inner_bytes, GET_MODE_UNIT_SIZE (innermode));
-elems = &CONST_VECTOR_ELT (op, 0);
 elem_bitsize = GET_MODE_UNIT_BITSIZE (innermode);
 }
 else
 {
 num_elem = 1;
-elems = &op;
 elem_bitsize = max_bitsize;
 }
 /* If this asserts, it is too complicated; reducing value_bit may help.  */
 gcc_assert (BITS_PER_UNIT % value_bit == 0);
 /* I don't know how to handle endianness of sub-units.  */
 gcc_assert (elem_bitsize % BITS_PER_UNIT == 0);
 for (elem = 0; elem < num_elem; elem++)
 {
 unsigned char * vp;
-rtx el = elems[elem];
+rtx el = (GET_CODE (op) == CONST_VECTOR
+		? CONST_VECTOR_ELT (op, first_elem + elem)
+		: op);
 /* Vectors are kept in target memory order.  (This is probably
 	 a mistake.)  */
 {
 	unsigned byte = (elem * elem_bitsize) / BITS_PER_UNIT;
 /* Now, pick the right byte to start with.  */
 /* Renumber BYTE so that the least-significant byte is byte 0.  A special
 case is paradoxical SUBREGs, which shouldn't be adjusted since they
 will already have offset 0.  */
-if (GET_MODE_SIZE (innermode) >= GET_MODE_SIZE (outermode))
+if (inner_bytes >= GET_MODE_SIZE (outermode))
 {
-unsigned ibyte = (GET_MODE_SIZE (innermode) - GET_MODE_SIZE (outermode)
+unsigned ibyte = inner_bytes - GET_MODE_SIZE (outermode) - byte;
-			- byte);
 unsigned word_byte = WORDS_BIG_ENDIAN ? ibyte : byte;
 unsigned subword_byte = BYTES_BIG_ENDIAN ? ibyte : byte;
 byte = (subword_byte % UNITS_PER_WORD
 	      + (word_byte / UNITS_PER_WORD) * UNITS_PER_WORD);
 }
 /* BYTE should still be inside OP.  (Note that BYTE is unsigned,
 so if it's become negative it will instead be very large.)  */
-gcc_assert (byte < GET_MODE_SIZE (innermode));
+gcc_assert (byte < inner_bytes);
 /* Convert from bytes to chunks of size value_bit.  */
 value_start = byte * (BITS_PER_UNIT / value_bit);
 /* Re-pack the value.  */
 /* Simplify SUBREG:OUTERMODE(OP:INNERMODE, BYTE)
 Return 0 if no simplifications are possible.  */
 rtx
 simplify_subreg (machine_mode outermode, rtx op,
-		 machine_mode innermode, unsigned int byte)
+		 machine_mode innermode, poly_uint64 byte)
 {
 /* Little bit of sanity checking.  */
 gcc_assert (innermode != VOIDmode);
 gcc_assert (outermode != VOIDmode);
 gcc_assert (innermode != BLKmode);
 gcc_assert (outermode != BLKmode);
 gcc_assert (GET_MODE (op) == innermode
 	      || GET_MODE (op) == VOIDmode);
-if ((byte % GET_MODE_SIZE (outermode)) != 0)
+poly_uint64 outersize = GET_MODE_SIZE (outermode);
+if (!multiple_p (byte, outersize))
 return NULL_RTX;
-if (byte >= GET_MODE_SIZE (innermode))
+poly_uint64 innersize = GET_MODE_SIZE (innermode);
+if (maybe_ge (byte, innersize))
 return NULL_RTX;
-if (outermode == innermode && !byte)
+if (outermode == innermode && known_eq (byte, 0U))
 return op;
+if (multiple_p (byte, GET_MODE_UNIT_SIZE (innermode)))
+{
+rtx elt;
+if (VECTOR_MODE_P (outermode)
+	  && GET_MODE_INNER (outermode) == GET_MODE_INNER (innermode)
+	  && vec_duplicate_p (op, &elt))
+	return gen_vec_duplicate (outermode, elt);
+if (outermode == GET_MODE_INNER (innermode)
+	  && vec_duplicate_p (op, &elt))
+	return elt;
+}
 if (CONST_SCALAR_INT_P (op)
 || CONST_DOUBLE_AS_FLOAT_P (op)
-|| GET_CODE (op) == CONST_FIXED
+|| CONST_FIXED_P (op)
 || GET_CODE (op) == CONST_VECTOR)
-return simplify_immed_subreg (outermode, op, innermode, byte);
+{
+/* simplify_immed_subreg deconstructs OP into bytes and constructs
+	 the result from bytes, so it only works if the sizes of the modes
+	 and the value of the offset are known at compile time.  Cases that
+	 that apply to general modes and offsets should be handled here
+	 before calling simplify_immed_subreg.  */
+fixed_size_mode fs_outermode, fs_innermode;
+unsigned HOST_WIDE_INT cbyte;
+if (is_a <fixed_size_mode> (outermode, &fs_outermode)
+	  && is_a <fixed_size_mode> (innermode, &fs_innermode)
+	  && byte.is_constant (&cbyte))
+	return simplify_immed_subreg (fs_outermode, op, fs_innermode, cbyte,
+				      0, GET_MODE_SIZE (fs_innermode));
+/* Handle constant-sized outer modes and variable-sized inner modes.  */
+unsigned HOST_WIDE_INT first_elem;
+if (GET_CODE (op) == CONST_VECTOR
+	  && is_a <fixed_size_mode> (outermode, &fs_outermode)
+	  && constant_multiple_p (byte, GET_MODE_UNIT_SIZE (innermode),
+				  &first_elem))
+	return simplify_immed_subreg (fs_outermode, op, innermode, 0,
+				      first_elem,
+				      GET_MODE_SIZE (fs_outermode));
+return NULL_RTX;
+}
 /* Changing mode twice with SUBREG => just change it once,
 or not at all if changing back op starting mode.  */
 if (GET_CODE (op) == SUBREG)
 {
 machine_mode innermostmode = GET_MODE (SUBREG_REG (op));
+poly_uint64 innermostsize = GET_MODE_SIZE (innermostmode);
 rtx newx;
 if (outermode == innermostmode
-	  && byte == 0 && SUBREG_BYTE (op) == 0)
+	  && known_eq (byte, 0U)
+	  && known_eq (SUBREG_BYTE (op), 0))
 	return SUBREG_REG (op);
 /* Work out the memory offset of the final OUTERMODE value relative
 	 to the inner value of OP.  */
-HOST_WIDE_INT mem_offset = subreg_memory_offset (outermode,
+poly_int64 mem_offset = subreg_memory_offset (outermode,
-						       innermode, byte);
+						    innermode, byte);
-HOST_WIDE_INT op_mem_offset = subreg_memory_offset (op);
+poly_int64 op_mem_offset = subreg_memory_offset (op);
-HOST_WIDE_INT final_offset = mem_offset + op_mem_offset;
+poly_int64 final_offset = mem_offset + op_mem_offset;
 /* See whether resulting subreg will be paradoxical.  */
 if (!paradoxical_subreg_p (outermode, innermostmode))
 	{
-	  /* In nonparadoxical subregs we can't handle negative offsets.  */
+	  /* Bail out in case resulting subreg would be incorrect.  */
-	  if (final_offset < 0)
+	  if (maybe_lt (final_offset, 0)
+	      || maybe_ge (poly_uint64 (final_offset), innermostsize)
+	      || !multiple_p (final_offset, outersize))
 	    return NULL_RTX;
-	  /* Bail out in case resulting subreg would be incorrect.  */
-	  if (final_offset % GET_MODE_SIZE (outermode)
-	      || (unsigned) final_offset >= GET_MODE_SIZE (innermostmode))
-	    return NULL_RTX;
 	}
 else
 	{
-	  HOST_WIDE_INT required_offset
+	  poly_int64 required_offset = subreg_memory_offset (outermode,
-	    = subreg_memory_offset (outermode, innermostmode, 0);
+							     innermostmode, 0);
-	  if (final_offset != required_offset)
+	  if (maybe_ne (final_offset, required_offset))
 	    return NULL_RTX;
 	  /* Paradoxical subregs always have byte offset 0.  */
 	  final_offset = 0;
 	}
 	{
 	  newx = gen_rtx_SUBREG (outermode, SUBREG_REG (op), final_offset);
 	  if (SUBREG_PROMOTED_VAR_P (op)
 	      && SUBREG_PROMOTED_SIGN (op) >= 0
 	      && GET_MODE_CLASS (outermode) == MODE_INT
-	      && IN_RANGE (GET_MODE_SIZE (outermode),
+	      && known_ge (outersize, innersize)
-			   GET_MODE_SIZE (innermode),
+	      && known_le (outersize, innermostsize)
-			   GET_MODE_SIZE (innermostmode))
 	      && subreg_lowpart_p (newx))
 	    {
 	      SUBREG_PROMOTED_VAR_P (newx) = 1;
 	      SUBREG_PROMOTED_SET (newx, SUBREG_PROMOTED_GET (op));
 	    }
 	  /* Propagate original regno.  We don't have any way to specify
 	     the offset inside original regno, so do so only for lowpart.
 	     The information is used only by alias analysis that can not
 	     grog partial register anyway.  */
-	  if (subreg_lowpart_offset (outermode, innermode) == byte)
+	  if (known_eq (subreg_lowpart_offset (outermode, innermode), byte))
 	    ORIGINAL_REGNO (x) = ORIGINAL_REGNO (op);
 	  return x;
 	}
 }
 && ! mode_dependent_address_p (XEXP (op, 0), MEM_ADDR_SPACE (op))
 /* Allow splitting of volatile memory references in case we don't
 have instruction to move the whole thing.  */
 && (! MEM_VOLATILE_P (op)
 	  || ! have_insn_for (SET, innermode))
-&& GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (GET_MODE (op)))
+&& known_le (outersize, innersize))
 return adjust_address_nv (op, outermode, byte);
 /* Handle complex or vector values represented as CONCAT or VEC_CONCAT
 of two parts.  */
 if (GET_CODE (op) == CONCAT
 || GET_CODE (op) == VEC_CONCAT)
 {
-unsigned int part_size, final_offset;
+poly_uint64 final_offset;
 rtx part, res;
 machine_mode part_mode = GET_MODE (XEXP (op, 0));
 if (part_mode == VOIDmode)
 	part_mode = GET_MODE_INNER (GET_MODE (op));
-part_size = GET_MODE_SIZE (part_mode);
+poly_uint64 part_size = GET_MODE_SIZE (part_mode);
-if (byte < part_size)
+if (known_lt (byte, part_size))
 	{
 	  part = XEXP (op, 0);
 	  final_offset = byte;
 	}
-else
+else if (known_ge (byte, part_size))
 	{
 	  part = XEXP (op, 1);
 	  final_offset = byte - part_size;
 	}
+else
-if (final_offset + GET_MODE_SIZE (outermode) > part_size)
+	return NULL_RTX;
+if (maybe_gt (final_offset + outersize, part_size))
 	return NULL_RTX;
 part_mode = GET_MODE (part);
 if (part_mode == VOIDmode)
 	part_mode = GET_MODE_INNER (GET_MODE (op));
 if (validate_subreg (outermode, part_mode, part, final_offset))
 	return gen_rtx_SUBREG (outermode, part, final_offset);
 return NULL_RTX;
 }
+/* Simplify
+	(subreg (vec_merge (X)
+			   (vector)
+			   (const_int ((1 << N) | M)))
+		(N * sizeof (outermode)))
+to
+	(subreg (X) (N * sizeof (outermode)))
+*/
+unsigned int idx;
+if (constant_multiple_p (byte, GET_MODE_SIZE (outermode), &idx)
+&& idx < HOST_BITS_PER_WIDE_INT
+&& GET_CODE (op) == VEC_MERGE
+&& GET_MODE_INNER (innermode) == outermode
+&& CONST_INT_P (XEXP (op, 2))
+&& (UINTVAL (XEXP (op, 2)) & (HOST_WIDE_INT_1U << idx)) != 0)
+return simplify_gen_subreg (outermode, XEXP (op, 0), innermode, byte);
 /* A SUBREG resulting from a zero extension may fold to zero if
 it extracts higher bits that the ZERO_EXTEND's source bits.  */
 if (GET_CODE (op) == ZERO_EXTEND && SCALAR_INT_MODE_P (innermode))
 {
-unsigned int bitpos = subreg_lsb_1 (outermode, innermode, byte);
+poly_uint64 bitpos = subreg_lsb_1 (outermode, innermode, byte);
-if (bitpos >= GET_MODE_PRECISION (GET_MODE (XEXP (op, 0))))
+if (known_ge (bitpos, GET_MODE_PRECISION (GET_MODE (XEXP (op, 0)))))
 	return CONST0_RTX (outermode);
 }
 scalar_int_mode int_outermode, int_innermode;
 if (is_a <scalar_int_mode> (outermode, &int_outermode)
 && is_a <scalar_int_mode> (innermode, &int_innermode)
-&& (GET_MODE_PRECISION (int_outermode)
+&& known_eq (byte, subreg_lowpart_offset (int_outermode, int_innermode)))
+{
+/* Handle polynomial integers.  The upper bits of a paradoxical
+	 subreg are undefined, so this is safe regardless of whether
+	 we're truncating or extending.  */
+if (CONST_POLY_INT_P (op))
+	{
+	  poly_wide_int val
+	    = poly_wide_int::from (const_poly_int_value (op),
+				   GET_MODE_PRECISION (int_outermode),
+				   SIGNED);
+	  return immed_wide_int_const (val, int_outermode);
+	}
+if (GET_MODE_PRECISION (int_outermode)
 	  < GET_MODE_PRECISION (int_innermode))
-&& byte == subreg_lowpart_offset (int_outermode, int_innermode))
+	{
-{
+	  rtx tem = simplify_truncation (int_outermode, op, int_innermode);
-rtx tem = simplify_truncation (int_outermode, op, int_innermode);
+	  if (tem)
-if (tem)
+	    return tem;
-	return tem;
+	}
 }
 return NULL_RTX;
 }
 /* Make a SUBREG operation or equivalent if it folds.  */
 rtx
 simplify_gen_subreg (machine_mode outermode, rtx op,
-		     machine_mode innermode, unsigned int byte)
+		     machine_mode innermode, poly_uint64 byte)
 {
 rtx newx;
 newx = simplify_subreg (outermode, op, innermode, byte);
 if (newx)
 default:
 break;
 }
 return NULL;
 }
+#if CHECKING_P
+namespace selftest {
+/* Make a unique pseudo REG of mode MODE for use by selftests.  */
+static rtx
+make_test_reg (machine_mode mode)
+{
+static int test_reg_num = LAST_VIRTUAL_REGISTER + 1;
+return gen_rtx_REG (mode, test_reg_num++);
+}
+/* Test vector simplifications involving VEC_DUPLICATE in which the
+operands and result have vector mode MODE.  SCALAR_REG is a pseudo
+register that holds one element of MODE.  */
+static void
+test_vector_ops_duplicate (machine_mode mode, rtx scalar_reg)
+{
+scalar_mode inner_mode = GET_MODE_INNER (mode);
+rtx duplicate = gen_rtx_VEC_DUPLICATE (mode, scalar_reg);
+poly_uint64 nunits = GET_MODE_NUNITS (mode);
+if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+{
+/* Test some simple unary cases with VEC_DUPLICATE arguments.  */
+rtx not_scalar_reg = gen_rtx_NOT (inner_mode, scalar_reg);
+rtx duplicate_not = gen_rtx_VEC_DUPLICATE (mode, not_scalar_reg);
+ASSERT_RTX_EQ (duplicate,
+		     simplify_unary_operation (NOT, mode,
+					       duplicate_not, mode));
+rtx neg_scalar_reg = gen_rtx_NEG (inner_mode, scalar_reg);
+rtx duplicate_neg = gen_rtx_VEC_DUPLICATE (mode, neg_scalar_reg);
+ASSERT_RTX_EQ (duplicate,
+		     simplify_unary_operation (NEG, mode,
+					       duplicate_neg, mode));
+/* Test some simple binary cases with VEC_DUPLICATE arguments.  */
+ASSERT_RTX_EQ (duplicate,
+		     simplify_binary_operation (PLUS, mode, duplicate,
+						CONST0_RTX (mode)));
+ASSERT_RTX_EQ (duplicate,
+		     simplify_binary_operation (MINUS, mode, duplicate,
+						CONST0_RTX (mode)));
+ASSERT_RTX_PTR_EQ (CONST0_RTX (mode),
+			 simplify_binary_operation (MINUS, mode, duplicate,
+						    duplicate));
+}
+/* Test a scalar VEC_SELECT of a VEC_DUPLICATE.  */
+rtx zero_par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
+ASSERT_RTX_PTR_EQ (scalar_reg,
+		     simplify_binary_operation (VEC_SELECT, inner_mode,
+						duplicate, zero_par));
+unsigned HOST_WIDE_INT const_nunits;
+if (nunits.is_constant (&const_nunits))
+{
+/* And again with the final element.  */
+rtx last_index = gen_int_mode (const_nunits - 1, word_mode);
+rtx last_par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, last_index));
+ASSERT_RTX_PTR_EQ (scalar_reg,
+			 simplify_binary_operation (VEC_SELECT, inner_mode,
+						    duplicate, last_par));
+/* Test a scalar subreg of a VEC_MERGE of a VEC_DUPLICATE.  */
+rtx vector_reg = make_test_reg (mode);
+for (unsigned HOST_WIDE_INT i = 0; i < const_nunits; i++)
+	{
+	  if (i >= HOST_BITS_PER_WIDE_INT)
+	    break;
+	  rtx mask = GEN_INT ((HOST_WIDE_INT_1U << i) | (i + 1));
+	  rtx vm = gen_rtx_VEC_MERGE (mode, duplicate, vector_reg, mask);
+	  poly_uint64 offset = i * GET_MODE_SIZE (inner_mode);
+	  ASSERT_RTX_EQ (scalar_reg,
+			 simplify_gen_subreg (inner_mode, vm,
+					      mode, offset));
+	}
+}
+/* Test a scalar subreg of a VEC_DUPLICATE.  */
+poly_uint64 offset = subreg_lowpart_offset (inner_mode, mode);
+ASSERT_RTX_EQ (scalar_reg,
+		 simplify_gen_subreg (inner_mode, duplicate,
+				      mode, offset));
+machine_mode narrower_mode;
+if (maybe_ne (nunits, 2U)
+&& multiple_p (nunits, 2)
+&& mode_for_vector (inner_mode, 2).exists (&narrower_mode)
+&& VECTOR_MODE_P (narrower_mode))
+{
+/* Test VEC_SELECT of a vector.  */
+rtx vec_par
+	= gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, const1_rtx, const0_rtx));
+rtx narrower_duplicate
+	= gen_rtx_VEC_DUPLICATE (narrower_mode, scalar_reg);
+ASSERT_RTX_EQ (narrower_duplicate,
+		     simplify_binary_operation (VEC_SELECT, narrower_mode,
+						duplicate, vec_par));
+/* Test a vector subreg of a VEC_DUPLICATE.  */
+poly_uint64 offset = subreg_lowpart_offset (narrower_mode, mode);
+ASSERT_RTX_EQ (narrower_duplicate,
+		     simplify_gen_subreg (narrower_mode, duplicate,
+					  mode, offset));
+}
+}
+/* Test vector simplifications involving VEC_SERIES in which the
+operands and result have vector mode MODE.  SCALAR_REG is a pseudo
+register that holds one element of MODE.  */
+static void
+test_vector_ops_series (machine_mode mode, rtx scalar_reg)
+{
+/* Test unary cases with VEC_SERIES arguments.  */
+scalar_mode inner_mode = GET_MODE_INNER (mode);
+rtx duplicate = gen_rtx_VEC_DUPLICATE (mode, scalar_reg);
+rtx neg_scalar_reg = gen_rtx_NEG (inner_mode, scalar_reg);
+rtx series_0_r = gen_rtx_VEC_SERIES (mode, const0_rtx, scalar_reg);
+rtx series_0_nr = gen_rtx_VEC_SERIES (mode, const0_rtx, neg_scalar_reg);
+rtx series_nr_1 = gen_rtx_VEC_SERIES (mode, neg_scalar_reg, const1_rtx);
+rtx series_r_m1 = gen_rtx_VEC_SERIES (mode, scalar_reg, constm1_rtx);
+rtx series_r_r = gen_rtx_VEC_SERIES (mode, scalar_reg, scalar_reg);
+rtx series_nr_nr = gen_rtx_VEC_SERIES (mode, neg_scalar_reg,
+					 neg_scalar_reg);
+ASSERT_RTX_EQ (series_0_r,
+		 simplify_unary_operation (NEG, mode, series_0_nr, mode));
+ASSERT_RTX_EQ (series_r_m1,
+		 simplify_unary_operation (NEG, mode, series_nr_1, mode));
+ASSERT_RTX_EQ (series_r_r,
+		 simplify_unary_operation (NEG, mode, series_nr_nr, mode));
+/* Test that a VEC_SERIES with a zero step is simplified away.  */
+ASSERT_RTX_EQ (duplicate,
+		 simplify_binary_operation (VEC_SERIES, mode,
+					    scalar_reg, const0_rtx));
+/* Test PLUS and MINUS with VEC_SERIES.  */
+rtx series_0_1 = gen_const_vec_series (mode, const0_rtx, const1_rtx);
+rtx series_0_m1 = gen_const_vec_series (mode, const0_rtx, constm1_rtx);
+rtx series_r_1 = gen_rtx_VEC_SERIES (mode, scalar_reg, const1_rtx);
+ASSERT_RTX_EQ (series_r_r,
+		 simplify_binary_operation (PLUS, mode, series_0_r,
+					    duplicate));
+ASSERT_RTX_EQ (series_r_1,
+		 simplify_binary_operation (PLUS, mode, duplicate,
+					    series_0_1));
+ASSERT_RTX_EQ (series_r_m1,
+		 simplify_binary_operation (PLUS, mode, duplicate,
+					    series_0_m1));
+ASSERT_RTX_EQ (series_0_r,
+		 simplify_binary_operation (MINUS, mode, series_r_r,
+					    duplicate));
+ASSERT_RTX_EQ (series_r_m1,
+		 simplify_binary_operation (MINUS, mode, duplicate,
+					    series_0_1));
+ASSERT_RTX_EQ (series_r_1,
+		 simplify_binary_operation (MINUS, mode, duplicate,
+					    series_0_m1));
+ASSERT_RTX_EQ (series_0_m1,
+		 simplify_binary_operation (VEC_SERIES, mode, const0_rtx,
+					    constm1_rtx));
+}
+/* Verify simplify_merge_mask works correctly.  */
+static void
+test_vec_merge (machine_mode mode)
+{
+rtx op0 = make_test_reg (mode);
+rtx op1 = make_test_reg (mode);
+rtx op2 = make_test_reg (mode);
+rtx op3 = make_test_reg (mode);
+rtx op4 = make_test_reg (mode);
+rtx op5 = make_test_reg (mode);
+rtx mask1 = make_test_reg (SImode);
+rtx mask2 = make_test_reg (SImode);
+rtx vm1 = gen_rtx_VEC_MERGE (mode, op0, op1, mask1);
+rtx vm2 = gen_rtx_VEC_MERGE (mode, op2, op3, mask1);
+rtx vm3 = gen_rtx_VEC_MERGE (mode, op4, op5, mask1);
+/* Simple vec_merge.  */
+ASSERT_EQ (op0, simplify_merge_mask (vm1, mask1, 0));
+ASSERT_EQ (op1, simplify_merge_mask (vm1, mask1, 1));
+ASSERT_EQ (NULL_RTX, simplify_merge_mask (vm1, mask2, 0));
+ASSERT_EQ (NULL_RTX, simplify_merge_mask (vm1, mask2, 1));
+/* Nested vec_merge.
+It's tempting to make this simplify right down to opN, but we don't
+because all the simplify_* functions assume that the operands have
+already been simplified.  */
+rtx nvm = gen_rtx_VEC_MERGE (mode, vm1, vm2, mask1);
+ASSERT_EQ (vm1, simplify_merge_mask (nvm, mask1, 0));
+ASSERT_EQ (vm2, simplify_merge_mask (nvm, mask1, 1));
+/* Intermediate unary op. */
+rtx unop = gen_rtx_NOT (mode, vm1);
+ASSERT_RTX_EQ (gen_rtx_NOT (mode, op0),
+		 simplify_merge_mask (unop, mask1, 0));
+ASSERT_RTX_EQ (gen_rtx_NOT (mode, op1),
+		 simplify_merge_mask (unop, mask1, 1));
+/* Intermediate binary op. */
+rtx binop = gen_rtx_PLUS (mode, vm1, vm2);
+ASSERT_RTX_EQ (gen_rtx_PLUS (mode, op0, op2),
+		 simplify_merge_mask (binop, mask1, 0));
+ASSERT_RTX_EQ (gen_rtx_PLUS (mode, op1, op3),
+		 simplify_merge_mask (binop, mask1, 1));
+/* Intermediate ternary op. */
+rtx tenop = gen_rtx_FMA (mode, vm1, vm2, vm3);
+ASSERT_RTX_EQ (gen_rtx_FMA (mode, op0, op2, op4),
+		 simplify_merge_mask (tenop, mask1, 0));
+ASSERT_RTX_EQ (gen_rtx_FMA (mode, op1, op3, op5),
+		 simplify_merge_mask (tenop, mask1, 1));
+/* Side effects.  */
+rtx badop0 = gen_rtx_PRE_INC (mode, op0);
+rtx badvm = gen_rtx_VEC_MERGE (mode, badop0, op1, mask1);
+ASSERT_EQ (badop0, simplify_merge_mask (badvm, mask1, 0));
+ASSERT_EQ (NULL_RTX, simplify_merge_mask (badvm, mask1, 1));
+/* Called indirectly.  */
+ASSERT_RTX_EQ (gen_rtx_VEC_MERGE (mode, op0, op3, mask1),
+		 simplify_rtx (nvm));
+}
+/* Verify some simplifications involving vectors.  */
+static void
+test_vector_ops ()
+{
+for (unsigned int i = 0; i < NUM_MACHINE_MODES; ++i)
+{
+machine_mode mode = (machine_mode) i;
+if (VECTOR_MODE_P (mode))
+	{
+	  rtx scalar_reg = make_test_reg (GET_MODE_INNER (mode));
+	  test_vector_ops_duplicate (mode, scalar_reg);
+	  if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+	      && maybe_gt (GET_MODE_NUNITS (mode), 2))
+	    test_vector_ops_series (mode, scalar_reg);
+	  test_vec_merge (mode);
+	}
+}
+}
+template<unsigned int N>
+struct simplify_const_poly_int_tests
+{
+static void run ();
+};
+template<>
+struct simplify_const_poly_int_tests<1>
+{
+static void run () {}
+};
+/* Test various CONST_POLY_INT properties.  */
+template<unsigned int N>
+void
+simplify_const_poly_int_tests<N>::run ()
+{
+rtx x1 = gen_int_mode (poly_int64 (1, 1), QImode);
+rtx x2 = gen_int_mode (poly_int64 (-80, 127), QImode);
+rtx x3 = gen_int_mode (poly_int64 (-79, -128), QImode);
+rtx x4 = gen_int_mode (poly_int64 (5, 4), QImode);
+rtx x5 = gen_int_mode (poly_int64 (30, 24), QImode);
+rtx x6 = gen_int_mode (poly_int64 (20, 16), QImode);
+rtx x7 = gen_int_mode (poly_int64 (7, 4), QImode);
+rtx x8 = gen_int_mode (poly_int64 (30, 24), HImode);
+rtx x9 = gen_int_mode (poly_int64 (-30, -24), HImode);
+rtx x10 = gen_int_mode (poly_int64 (-31, -24), HImode);
+rtx two = GEN_INT (2);
+rtx six = GEN_INT (6);
+poly_uint64 offset = subreg_lowpart_offset (QImode, HImode);
+/* These tests only try limited operation combinations.  Fuller arithmetic
+testing is done directly on poly_ints.  */
+ASSERT_EQ (simplify_unary_operation (NEG, HImode, x8, HImode), x9);
+ASSERT_EQ (simplify_unary_operation (NOT, HImode, x8, HImode), x10);
+ASSERT_EQ (simplify_unary_operation (TRUNCATE, QImode, x8, HImode), x5);
+ASSERT_EQ (simplify_binary_operation (PLUS, QImode, x1, x2), x3);
+ASSERT_EQ (simplify_binary_operation (MINUS, QImode, x3, x1), x2);
+ASSERT_EQ (simplify_binary_operation (MULT, QImode, x4, six), x5);
+ASSERT_EQ (simplify_binary_operation (MULT, QImode, six, x4), x5);
+ASSERT_EQ (simplify_binary_operation (ASHIFT, QImode, x4, two), x6);
+ASSERT_EQ (simplify_binary_operation (IOR, QImode, x4, two), x7);
+ASSERT_EQ (simplify_subreg (HImode, x5, QImode, 0), x8);
+ASSERT_EQ (simplify_subreg (QImode, x8, HImode, offset), x5);
+}
+/* Run all of the selftests within this file.  */
+void
+simplify_rtx_c_tests ()
+{
+test_vector_ops ();
+simplify_const_poly_int_tests<NUM_POLY_INT_COEFFS>::run ();
+}
+} // namespace selftest
+#endif /* CHECKING_P */

Mercurial > hg > CbC > CbC_gcc

comparison gcc/simplify-rtx.c @ 131:84e7813d76e9