173
|
1 //===-- lib/Decimal/big-radix-floating-point.h ------------------*- C++ -*-===//
|
|
2 //
|
|
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
4 // See https://llvm.org/LICENSE.txt for license information.
|
|
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
6 //
|
|
7 //===----------------------------------------------------------------------===//
|
|
8
|
|
9 #ifndef FORTRAN_DECIMAL_BIG_RADIX_FLOATING_POINT_H_
|
|
10 #define FORTRAN_DECIMAL_BIG_RADIX_FLOATING_POINT_H_
|
|
11
|
|
12 // This is a helper class for use in floating-point conversions
|
|
13 // between binary decimal representations. It holds a multiple-precision
|
|
14 // integer value using digits of a radix that is a large even power of ten
|
|
15 // (10,000,000,000,000,000 by default, 10**16). These digits are accompanied
|
|
16 // by a signed exponent that denotes multiplication by a power of ten.
|
|
17 // The effective radix point is to the right of the digits (i.e., they do
|
|
18 // not represent a fraction).
|
|
19 //
|
|
20 // The operations supported by this class are limited to those required
|
|
21 // for conversions between binary and decimal representations; it is not
|
|
22 // a general-purpose facility.
|
|
23
|
|
24 #include "flang/Common/bit-population-count.h"
|
|
25 #include "flang/Common/leading-zero-bit-count.h"
|
|
26 #include "flang/Common/uint128.h"
|
|
27 #include "flang/Common/unsigned-const-division.h"
|
|
28 #include "flang/Decimal/binary-floating-point.h"
|
|
29 #include "flang/Decimal/decimal.h"
|
|
30 #include "llvm/Support/raw_ostream.h"
|
|
31 #include <cinttypes>
|
|
32 #include <limits>
|
|
33 #include <type_traits>
|
|
34
|
|
35 namespace Fortran::decimal {
|
|
36
|
|
37 static constexpr std::uint64_t TenToThe(int power) {
|
|
38 return power <= 0 ? 1 : 10 * TenToThe(power - 1);
|
|
39 }
|
|
40
|
|
41 // 10**(LOG10RADIX + 3) must be < 2**wordbits, and LOG10RADIX must be
|
|
42 // even, so that pairs of decimal digits do not straddle Digits.
|
|
43 // So LOG10RADIX must be 16 or 6.
|
|
44 template <int PREC, int LOG10RADIX = 16> class BigRadixFloatingPointNumber {
|
|
45 public:
|
|
46 using Real = BinaryFloatingPointNumber<PREC>;
|
|
47 static constexpr int log10Radix{LOG10RADIX};
|
|
48
|
|
49 private:
|
|
50 static constexpr std::uint64_t uint64Radix{TenToThe(log10Radix)};
|
|
51 static constexpr int minDigitBits{
|
|
52 64 - common::LeadingZeroBitCount(uint64Radix)};
|
|
53 using Digit = common::HostUnsignedIntType<minDigitBits>;
|
|
54 static constexpr Digit radix{uint64Radix};
|
|
55 static_assert(radix < std::numeric_limits<Digit>::max() / 1000,
|
|
56 "radix is somehow too big");
|
|
57 static_assert(radix > std::numeric_limits<Digit>::max() / 10000,
|
|
58 "radix is somehow too small");
|
|
59
|
|
60 // The base-2 logarithm of the least significant bit that can arise
|
|
61 // in a subnormal IEEE floating-point number.
|
|
62 static constexpr int minLog2AnyBit{
|
|
63 -Real::exponentBias - Real::binaryPrecision};
|
|
64
|
|
65 // The number of Digits needed to represent the smallest subnormal.
|
|
66 static constexpr int maxDigits{3 - minLog2AnyBit / log10Radix};
|
|
67
|
|
68 public:
|
|
69 explicit BigRadixFloatingPointNumber(
|
|
70 enum FortranRounding rounding = RoundDefault)
|
|
71 : rounding_{rounding} {}
|
|
72
|
|
73 // Converts a binary floating point value.
|
|
74 explicit BigRadixFloatingPointNumber(
|
|
75 Real, enum FortranRounding = RoundDefault);
|
|
76
|
|
77 BigRadixFloatingPointNumber &SetToZero() {
|
|
78 isNegative_ = false;
|
|
79 digits_ = 0;
|
|
80 exponent_ = 0;
|
|
81 return *this;
|
|
82 }
|
|
83
|
|
84 // Converts decimal floating-point to binary.
|
|
85 ConversionToBinaryResult<PREC> ConvertToBinary();
|
|
86
|
|
87 // Parses and converts to binary. Handles leading spaces,
|
|
88 // "NaN", & optionally-signed "Inf". Does not skip internal
|
|
89 // spaces.
|
|
90 // The argument is a reference to a pointer that is left
|
|
91 // pointing to the first character that wasn't parsed.
|
|
92 ConversionToBinaryResult<PREC> ConvertToBinary(const char *&);
|
|
93
|
|
94 // Formats a decimal floating-point number to a user buffer.
|
|
95 // May emit "NaN" or "Inf", or an possibly-signed integer.
|
|
96 // No decimal point is written, but if it were, it would be
|
|
97 // after the last digit; the effective decimal exponent is
|
|
98 // returned as part of the result structure so that it can be
|
|
99 // formatted by the client.
|
|
100 ConversionToDecimalResult ConvertToDecimal(
|
|
101 char *, std::size_t, enum DecimalConversionFlags, int digits) const;
|
|
102
|
|
103 // Discard decimal digits not needed to distinguish this value
|
|
104 // from the decimal encodings of two others (viz., the nearest binary
|
|
105 // floating-point numbers immediately below and above this one).
|
|
106 // The last decimal digit may not be uniquely determined in all
|
|
107 // cases, and will be the mean value when that is so (e.g., if
|
|
108 // last decimal digit values 6-8 would all work, it'll be a 7).
|
|
109 // This minimization necessarily assumes that the value will be
|
|
110 // emitted and read back into the same (or less precise) format
|
|
111 // with default rounding to the nearest value.
|
|
112 void Minimize(
|
|
113 BigRadixFloatingPointNumber &&less, BigRadixFloatingPointNumber &&more);
|
|
114
|
|
115 llvm::raw_ostream &Dump(llvm::raw_ostream &) const;
|
|
116
|
|
117 private:
|
|
118 BigRadixFloatingPointNumber(const BigRadixFloatingPointNumber &that)
|
|
119 : digits_{that.digits_}, exponent_{that.exponent_},
|
|
120 isNegative_{that.isNegative_}, rounding_{that.rounding_} {
|
|
121 for (int j{0}; j < digits_; ++j) {
|
|
122 digit_[j] = that.digit_[j];
|
|
123 }
|
|
124 }
|
|
125
|
|
126 bool IsZero() const {
|
|
127 // Don't assume normalization.
|
|
128 for (int j{0}; j < digits_; ++j) {
|
|
129 if (digit_[j] != 0) {
|
|
130 return false;
|
|
131 }
|
|
132 }
|
|
133 return true;
|
|
134 }
|
|
135
|
|
136 // Predicate: true when 10*value would cause a carry.
|
|
137 // (When this happens during decimal-to-binary conversion,
|
|
138 // there are more digits in the input string than can be
|
|
139 // represented precisely.)
|
|
140 bool IsFull() const {
|
|
141 return digits_ == digitLimit_ && digit_[digits_ - 1] >= radix / 10;
|
|
142 }
|
|
143
|
|
144 // Sets *this to an unsigned integer value.
|
|
145 // Returns any remainder.
|
|
146 template <typename UINT> UINT SetTo(UINT n) {
|
|
147 static_assert(
|
|
148 std::is_same_v<UINT, common::uint128_t> || std::is_unsigned_v<UINT>);
|
|
149 SetToZero();
|
|
150 while (n != 0) {
|
|
151 auto q{common::DivideUnsignedBy<UINT, 10>(n)};
|
|
152 if (n != q * 10) {
|
|
153 break;
|
|
154 }
|
|
155 ++exponent_;
|
|
156 n = q;
|
|
157 }
|
|
158 if constexpr (sizeof n < sizeof(Digit)) {
|
|
159 if (n != 0) {
|
|
160 digit_[digits_++] = n;
|
|
161 }
|
|
162 return 0;
|
|
163 } else {
|
|
164 while (n != 0 && digits_ < digitLimit_) {
|
|
165 auto q{common::DivideUnsignedBy<UINT, radix>(n)};
|
|
166 digit_[digits_++] = static_cast<Digit>(n - q * radix);
|
|
167 n = q;
|
|
168 }
|
|
169 return n;
|
|
170 }
|
|
171 }
|
|
172
|
|
173 int RemoveLeastOrderZeroDigits() {
|
|
174 int remove{0};
|
|
175 if (digits_ > 0 && digit_[0] == 0) {
|
|
176 while (remove < digits_ && digit_[remove] == 0) {
|
|
177 ++remove;
|
|
178 }
|
|
179 if (remove >= digits_) {
|
|
180 digits_ = 0;
|
|
181 } else if (remove > 0) {
|
|
182 for (int j{0}; j + remove < digits_; ++j) {
|
|
183 digit_[j] = digit_[j + remove];
|
|
184 }
|
|
185 digits_ -= remove;
|
|
186 }
|
|
187 }
|
|
188 return remove;
|
|
189 }
|
|
190
|
|
191 void RemoveLeadingZeroDigits() {
|
|
192 while (digits_ > 0 && digit_[digits_ - 1] == 0) {
|
|
193 --digits_;
|
|
194 }
|
|
195 }
|
|
196
|
|
197 void Normalize() {
|
|
198 RemoveLeadingZeroDigits();
|
|
199 exponent_ += RemoveLeastOrderZeroDigits() * log10Radix;
|
|
200 }
|
|
201
|
|
202 // This limited divisibility test only works for even divisors of the radix,
|
|
203 // which is fine since it's only ever used with 2 and 5.
|
|
204 template <int N> bool IsDivisibleBy() const {
|
|
205 static_assert(N > 1 && radix % N == 0, "bad modulus");
|
|
206 return digits_ == 0 || (digit_[0] % N) == 0;
|
|
207 }
|
|
208
|
|
209 template <unsigned DIVISOR> int DivideBy() {
|
|
210 Digit remainder{0};
|
|
211 for (int j{digits_ - 1}; j >= 0; --j) {
|
|
212 Digit q{common::DivideUnsignedBy<Digit, DIVISOR>(digit_[j])};
|
|
213 Digit nrem{digit_[j] - DIVISOR * q};
|
|
214 digit_[j] = q + (radix / DIVISOR) * remainder;
|
|
215 remainder = nrem;
|
|
216 }
|
|
217 return remainder;
|
|
218 }
|
|
219
|
|
220 int DivideByPowerOfTwo(int twoPow) { // twoPow <= LOG10RADIX
|
|
221 int remainder{0};
|
|
222 for (int j{digits_ - 1}; j >= 0; --j) {
|
|
223 Digit q{digit_[j] >> twoPow};
|
|
224 int nrem = digit_[j] - (q << twoPow);
|
|
225 digit_[j] = q + (radix >> twoPow) * remainder;
|
|
226 remainder = nrem;
|
|
227 }
|
|
228 return remainder;
|
|
229 }
|
|
230
|
|
231 int AddCarry(int position = 0, int carry = 1) {
|
|
232 for (; position < digits_; ++position) {
|
|
233 Digit v{digit_[position] + carry};
|
|
234 if (v < radix) {
|
|
235 digit_[position] = v;
|
|
236 return 0;
|
|
237 }
|
|
238 digit_[position] = v - radix;
|
|
239 carry = 1;
|
|
240 }
|
|
241 if (digits_ < digitLimit_) {
|
|
242 digit_[digits_++] = carry;
|
|
243 return 0;
|
|
244 }
|
|
245 Normalize();
|
|
246 if (digits_ < digitLimit_) {
|
|
247 digit_[digits_++] = carry;
|
|
248 return 0;
|
|
249 }
|
|
250 return carry;
|
|
251 }
|
|
252
|
|
253 void Decrement() {
|
|
254 for (int j{0}; digit_[j]-- == 0; ++j) {
|
|
255 digit_[j] = radix - 1;
|
|
256 }
|
|
257 }
|
|
258
|
|
259 template <int N> int MultiplyByHelper(int carry = 0) {
|
|
260 for (int j{0}; j < digits_; ++j) {
|
|
261 auto v{N * digit_[j] + carry};
|
|
262 carry = common::DivideUnsignedBy<Digit, radix>(v);
|
|
263 digit_[j] = v - carry * radix; // i.e., v % radix
|
|
264 }
|
|
265 return carry;
|
|
266 }
|
|
267
|
|
268 template <int N> int MultiplyBy(int carry = 0) {
|
|
269 if (int newCarry{MultiplyByHelper<N>(carry)}) {
|
|
270 return AddCarry(digits_, newCarry);
|
|
271 } else {
|
|
272 return 0;
|
|
273 }
|
|
274 }
|
|
275
|
|
276 template <int N> int MultiplyWithoutNormalization() {
|
|
277 if (int carry{MultiplyByHelper<N>(0)}) {
|
|
278 if (digits_ < digitLimit_) {
|
|
279 digit_[digits_++] = carry;
|
|
280 return 0;
|
|
281 } else {
|
|
282 return carry;
|
|
283 }
|
|
284 } else {
|
|
285 return 0;
|
|
286 }
|
|
287 }
|
|
288
|
|
289 void LoseLeastSignificantDigit(); // with rounding
|
|
290
|
|
291 void PushCarry(int carry) {
|
|
292 if (digits_ == maxDigits && RemoveLeastOrderZeroDigits() == 0) {
|
|
293 LoseLeastSignificantDigit();
|
|
294 digit_[digits_ - 1] += carry;
|
|
295 } else {
|
|
296 digit_[digits_++] = carry;
|
|
297 }
|
|
298 }
|
|
299
|
|
300 // Adds another number and then divides by two.
|
|
301 // Assumes same exponent and sign.
|
|
302 // Returns true when the the result has effectively been rounded down.
|
|
303 bool Mean(const BigRadixFloatingPointNumber &);
|
|
304
|
|
305 bool ParseNumber(const char *&, bool &inexact);
|
|
306
|
|
307 using Raw = typename Real::RawType;
|
|
308 constexpr Raw SignBit() const { return Raw{isNegative_} << (Real::bits - 1); }
|
|
309 constexpr Raw Infinity() const {
|
|
310 return (Raw{Real::maxExponent} << Real::significandBits) | SignBit();
|
|
311 }
|
|
312 static constexpr Raw NaN() {
|
|
313 return (Raw{Real::maxExponent} << Real::significandBits) |
|
|
314 (Raw{1} << (Real::significandBits - 2));
|
|
315 }
|
|
316
|
|
317 Digit digit_[maxDigits]; // in little-endian order: digit_[0] is LSD
|
|
318 int digits_{0}; // # of elements in digit_[] array; zero when zero
|
|
319 int digitLimit_{maxDigits}; // precision clamp
|
|
320 int exponent_{0}; // signed power of ten
|
|
321 bool isNegative_{false};
|
|
322 enum FortranRounding rounding_ { RoundDefault };
|
|
323 };
|
|
324 } // namespace Fortran::decimal
|
|
325 #endif
|