random_distributions.h 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739
  1. /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
  2. Licensed under the Apache License, Version 2.0 (the "License");
  3. you may not use this file except in compliance with the License.
  4. You may obtain a copy of the License at
  5. http://www.apache.org/licenses/LICENSE-2.0
  6. Unless required by applicable law or agreed to in writing, software
  7. distributed under the License is distributed on an "AS IS" BASIS,
  8. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9. See the License for the specific language governing permissions and
  10. limitations under the License.
  11. ==============================================================================*/
  12. #ifndef TENSORFLOW_CORE_LIB_RANDOM_RANDOM_DISTRIBUTIONS_H_
  13. #define TENSORFLOW_CORE_LIB_RANDOM_RANDOM_DISTRIBUTIONS_H_
  14. #define _USE_MATH_DEFINES
  15. #include <math.h>
  16. #include <cmath>
  17. #undef _USE_MATH_DEFINES
  18. #include <string.h>
  19. #include <algorithm>
  20. #include <type_traits>
  21. #include "philox_random.h"
  22. #include "tensorflow/core/lib/bfloat16/bfloat16.h"
  23. #include "unsupported/Eigen/CXX11/Tensor"
  24. namespace tensorflow {
  25. namespace random {
  26. // Helper function to convert a 16-bit integer to a half between [0..1).
  27. PHILOX_DEVICE_INLINE Eigen::half Uint16ToHalf(uint16 x);
  28. // Helper function to convert a 16-bit integer to a bfloat16 between [0..1).
  29. PHILOX_DEVICE_INLINE bfloat16 Uint16ToGfloat16(uint16 x);
  30. // Helper function to convert a 32-bit integer to a float between [0..1).
  31. PHILOX_DEVICE_INLINE float Uint32ToFloat(uint32 x);
  32. // Helper function to convert two 32-bit integers to a double between [0..1).
  33. PHILOX_DEVICE_INLINE double Uint64ToDouble(uint32 x0, uint32 x1);
  34. // Computes a + b. Requires that the result is representable in the destination
  35. // type and that b is not maximal (i.e. b + 1 is not 0). Notably, the addend b
  36. // need *not* be representable in that type. (The condition on b excludes the
  37. // extremal case INT_MIN + UINT_MAX = INT_MAX, which this function cannot
  38. // compute.)
  39. template <typename Int>
  40. PHILOX_DEVICE_INLINE Int SignedAdd(Int a, typename std::make_unsigned<Int>::type b) {
  41. // Implementation note: both b_div_2 and b - b_div_2 are positive and
  42. // representatble as Int.
  43. auto b_div_2 = b >> 1;
  44. return a + static_cast<Int>(b_div_2) + static_cast<Int>(b - b_div_2);
  45. }
  46. // A class that generates uniform distribution random numbers from the
  47. // underlying random integer generator.
  48. // Arguments:
  49. // Generator: a generator type that returns a number of uint32 upon each
  50. // invocation. It needs to define kResultElementCount for the
  51. // sample count for each invocation, and ResultType for the
  52. // actual returned sample type.
  53. // RealType: the data type of the real numbers that will be returned by the
  54. // distribution. This could be either float or double for now.
  55. // This class is meant to be implemented through specialization. The default
  56. // is not defined by design.
  57. template <class Generator, typename RealType>
  58. class UniformDistribution;
  59. template <class Generator>
  60. class UniformDistribution<Generator, Eigen::half> {
  61. public:
  62. // The number of elements that will be returned.
  63. static const int kResultElementCount = Generator::kResultElementCount;
  64. // Cost of generation of a single element (in cycles).
  65. static const int kElementCost = 3;
  66. // Indicate that this distribution may take variable number of samples
  67. // during the runtime.
  68. static const bool kVariableSamplesPerOutput = false;
  69. typedef Array<Eigen::half, kResultElementCount> ResultType;
  70. typedef Eigen::half ResultElementType;
  71. PHILOX_DEVICE_INLINE
  72. ResultType operator()(Generator* gen) {
  73. typename Generator::ResultType sample = (*gen)();
  74. ResultType result;
  75. for (int i = 0; i < kResultElementCount; ++i) {
  76. result[i] = Uint16ToHalf(sample[i]); // Truncate the upper 16 bits.
  77. }
  78. return result;
  79. }
  80. };
  81. template <class Generator>
  82. class UniformDistribution<Generator, bfloat16> {
  83. public:
  84. // The number of elements that will be returned.
  85. static const int kResultElementCount = Generator::kResultElementCount;
  86. // Cost of generation of a single element (in cycles).
  87. static const int kElementCost = 3;
  88. // Indicate that this distribution may take variable number of samples
  89. // during the runtime.
  90. static const bool kVariableSamplesPerOutput = false;
  91. typedef Array<bfloat16, kResultElementCount> ResultType;
  92. typedef bfloat16 ResultElementType;
  93. PHILOX_DEVICE_INLINE
  94. ResultType operator()(Generator* gen) {
  95. typename Generator::ResultType sample = (*gen)();
  96. ResultType result;
  97. for (int i = 0; i < kResultElementCount; ++i) {
  98. result[i] = Uint16ToGfloat16(sample[i]);
  99. }
  100. return result;
  101. }
  102. };
  103. template <class Generator>
  104. class UniformDistribution<Generator, float> {
  105. public:
  106. // The number of elements that will be returned.
  107. static const int kResultElementCount = Generator::kResultElementCount;
  108. // Cost of generation of a single element (in cycles).
  109. static const int kElementCost = 3;
  110. // Indicate that this distribution may take variable number of samples
  111. // during the runtime.
  112. static const bool kVariableSamplesPerOutput = false;
  113. typedef Array<float, kResultElementCount> ResultType;
  114. typedef float ResultElementType;
  115. PHILOX_DEVICE_INLINE
  116. ResultType operator()(Generator* gen) {
  117. typename Generator::ResultType sample = (*gen)();
  118. ResultType result;
  119. for (int i = 0; i < kResultElementCount; ++i) {
  120. result[i] = Uint32ToFloat(sample[i]);
  121. }
  122. return result;
  123. }
  124. };
  125. template <class Generator>
  126. class UniformDistribution<Generator, double> {
  127. public:
  128. // The number of elements that will be returned.
  129. static const int kResultElementCount = Generator::kResultElementCount / 2;
  130. // Cost of generation of a single element (in cycles).
  131. static const int kElementCost = 3;
  132. // Indicate that this distribution may take variable number of samples
  133. // during the runtime.
  134. static const bool kVariableSamplesPerOutput = false;
  135. typedef Array<double, kResultElementCount> ResultType;
  136. typedef double ResultElementType;
  137. PHILOX_DEVICE_INLINE
  138. ResultType operator()(Generator* gen) {
  139. typename Generator::ResultType sample = (*gen)();
  140. ResultType result;
  141. for (int i = 0; i < kResultElementCount; ++i) {
  142. result[i] = Uint64ToDouble(sample[2 * i], sample[2 * i + 1]);
  143. }
  144. return result;
  145. }
  146. };
  147. template <class Generator>
  148. class UniformDistribution<Generator, int32> {
  149. public:
  150. // The number of elements that will be returned.
  151. static const int kResultElementCount = Generator::kResultElementCount;
  152. // Cost of generation of a single element (in cycles).
  153. static const int kElementCost = 3;
  154. // Indicate that this distribution may take variable number of samples
  155. // during the runtime.
  156. static const bool kVariableSamplesPerOutput = false;
  157. typedef Array<int32, kResultElementCount> ResultType;
  158. typedef int32 ResultElementType;
  159. // Must have lo < hi
  160. UniformDistribution(int32 lo, int32 hi)
  161. : lo_(lo), range_(static_cast<uint32>(hi) - static_cast<uint32>(lo)) {}
  162. PHILOX_DEVICE_INLINE
  163. ResultType operator()(Generator* gen) {
  164. typename Generator::ResultType sample = (*gen)();
  165. ResultType result;
  166. for (int i = 0; i < kResultElementCount; ++i) {
  167. result[i] = SignedAdd(lo_, sample[i] % range_);
  168. }
  169. return result;
  170. }
  171. private:
  172. // Note that lo_ is intentionally signed while range_ is intentionally
  173. // unsigned. This is because hi - lo can overflow signed integers if
  174. // lo < 0 < hi, but always fits in unsigned.
  175. int32 lo_;
  176. uint32 range_;
  177. };
  178. template <class Generator>
  179. class UniformDistribution<Generator, int64> {
  180. public:
  181. // The number of elements that will be returned.
  182. static const int kResultElementCount = Generator::kResultElementCount / 2;
  183. // Cost of generation of a single element (in cycles).
  184. static const int kElementCost = 3;
  185. // Indicate that this distribution may take variable number of samples
  186. // during the runtime.
  187. static const bool kVariableSamplesPerOutput = false;
  188. typedef Array<int64, kResultElementCount> ResultType;
  189. typedef int64 ResultElementType;
  190. // Must have lo < hi
  191. UniformDistribution(int64 lo, int64 hi)
  192. : lo_(lo), range_(static_cast<uint64>(hi) - static_cast<uint64>(lo)) {}
  193. PHILOX_DEVICE_INLINE
  194. ResultType operator()(Generator* gen) {
  195. typename Generator::ResultType sample = (*gen)();
  196. ResultType result;
  197. for (int i = 0; i < kResultElementCount; ++i) {
  198. auto bits = sample[2 * i] | static_cast<uint64>(sample[2 * i + 1]) << 32;
  199. result[i] = SignedAdd(lo_, bits % range_);
  200. }
  201. return result;
  202. }
  203. private:
  204. // Note that lo_ is intentionally signed while range_ is intentionally
  205. // unsigned. This is because hi - lo can overflow signed integers if
  206. // lo < 0 < hi, but always fits in unsigned.
  207. int64 lo_;
  208. uint64 range_;
  209. };
  210. // A class that adapts the underlying native multiple samples to return a single
  211. // sample at a time.
  212. template <class Generator>
  213. class SingleSampleAdapter {
  214. public:
  215. // The number of elements that will be returned.
  216. static const int kResultElementCount = 1;
  217. // The number of elements that will be returned by the underlying generator.
  218. static const int kNativeElementCount = Generator::kResultElementCount;
  219. typedef typename Generator::ResultElementType ResultType;
  220. typedef typename Generator::ResultElementType ResultElementType;
  221. PHILOX_DEVICE_INLINE
  222. explicit SingleSampleAdapter(Generator* gen)
  223. : generator_(gen), used_result_index_(Generator::kResultElementCount) {}
  224. PHILOX_DEVICE_INLINE
  225. ResultType operator()() {
  226. if (used_result_index_ == Generator::kResultElementCount) {
  227. unused_results_ = (*generator_)();
  228. used_result_index_ = 0;
  229. }
  230. return unused_results_[used_result_index_++];
  231. }
  232. PHILOX_DEVICE_INLINE
  233. void Skip(uint64 num_skips) {
  234. if (!num_skips) {
  235. return;
  236. }
  237. int num_unused_results = kNativeElementCount - used_result_index_;
  238. if (num_skips <= num_unused_results) {
  239. used_result_index_ += num_skips;
  240. return;
  241. }
  242. num_skips -= num_unused_results;
  243. used_result_index_ = kNativeElementCount;
  244. SkipFromGenerator(num_skips / kNativeElementCount);
  245. num_skips = num_skips % kNativeElementCount;
  246. if (num_skips) {
  247. unused_results_ = (*generator_)();
  248. used_result_index_ = num_skips;
  249. }
  250. }
  251. private:
  252. // This implementation iteratively skips over `num_skips` samples
  253. // from `generator_`. There is an O(1) implementation for PhiloxRandom
  254. // in random_distributions.cc.
  255. PHILOX_DEVICE_INLINE
  256. void SkipFromGenerator(uint64 num_skips) {
  257. while (num_skips--) {
  258. (*generator_)();
  259. }
  260. }
  261. Generator* generator_;
  262. typename Generator::ResultType unused_results_;
  263. int used_result_index_;
  264. };
  265. // A class that generates unit normal distribution random numbers from the
  266. // underlying random integer generator.
  267. // Arguments:
  268. // Generator: a generator type that returns a number of uint32 upon each
  269. // each invocation. It needs to define kResultElementCount for the
  270. // sample count for each invocation, and ResultType for actual
  271. // returned sample type.
  272. // RealType: the data type of the real numbers that will be returned by the
  273. // distribution. This could be either float or double for now.
  274. // This class is meant to be implemented through specialization. The default
  275. // is not defined by design.
  276. template <class Generator, typename RealType>
  277. class NormalDistribution;
  278. PHILOX_DEVICE_INLINE
  279. void BoxMullerFloat(uint32 x0, uint32 x1, float* f0, float* f1);
  280. PHILOX_DEVICE_INLINE
  281. void BoxMullerDouble(uint32 x0, uint32 x1, uint32 x2, uint32 x3, double* d0, double* d1);
  282. // Exactly like the float version, except that we convert to half afterwards;
  283. // since we don't have half-precision sin/cos even on GPUs, there's nothing to
  284. // gain from working in half internally.
  285. template <class Generator>
  286. class NormalDistribution<Generator, Eigen::half> {
  287. public:
  288. // The number of elements that will be returned.
  289. static const int kResultElementCount = Generator::kResultElementCount;
  290. // Cost of generation of a single element (in cycles).
  291. static const int kElementCost = 70;
  292. // Indicate that this distribution may take variable number of samples
  293. // during the runtime.
  294. static const bool kVariableSamplesPerOutput = false;
  295. typedef Array<Eigen::half, kResultElementCount> ResultType;
  296. typedef Eigen::half ResultElementType;
  297. PHILOX_DEVICE_INLINE
  298. ResultType operator()(Generator* gen) {
  299. typename Generator::ResultType sample = (*gen)();
  300. ResultType result;
  301. for (int i = 0; i < kResultElementCount; i += 2) {
  302. float f[2];
  303. BoxMullerFloat(sample[i], sample[i + 1], &f[0], &f[1]);
  304. result[i] = Eigen::half(f[0]);
  305. result[i + 1] = Eigen::half(f[1]);
  306. }
  307. return result;
  308. }
  309. };
  310. template <class Generator>
  311. class NormalDistribution<Generator, bfloat16> {
  312. public:
  313. // The number of elements that will be returned.
  314. static const int kResultElementCount = Generator::kResultElementCount;
  315. // Cost of generation of a single element (in cycles).
  316. static const int kElementCost = 70;
  317. // Indicate that this distribution may take variable number of samples
  318. // during the runtime.
  319. static const bool kVariableSamplesPerOutput = false;
  320. typedef Array<bfloat16, kResultElementCount> ResultType;
  321. typedef bfloat16 ResultElementType;
  322. PHILOX_DEVICE_INLINE
  323. ResultType operator()(Generator* gen) {
  324. typename Generator::ResultType sample = (*gen)();
  325. ResultType result;
  326. static_assert(kResultElementCount % 2 == 0, "kResultElementCount should be an even number");
  327. for (int i = 0; i < kResultElementCount; i += 2) {
  328. float f[2];
  329. // Box-Muller transform requires processing 2 elements at a time.
  330. BoxMullerFloat(sample[i], sample[i + 1], &f[0], &f[1]);
  331. result[i] = bfloat16(f[0]);
  332. result[i + 1] = bfloat16(f[1]);
  333. }
  334. return result;
  335. }
  336. };
  337. template <class Generator>
  338. class NormalDistribution<Generator, float> {
  339. public:
  340. // The number of elements that will be returned.
  341. static const int kResultElementCount = Generator::kResultElementCount;
  342. // Cost of generation of a single element (in cycles).
  343. static const int kElementCost = 70;
  344. // Indicate that this distribution may take variable number of samples
  345. // during the runtime.
  346. static const bool kVariableSamplesPerOutput = false;
  347. typedef Array<float, kResultElementCount> ResultType;
  348. typedef float ResultElementType;
  349. PHILOX_DEVICE_INLINE
  350. ResultType operator()(Generator* gen) {
  351. typename Generator::ResultType sample = (*gen)();
  352. ResultType result;
  353. for (int i = 0; i < kResultElementCount; i += 2) {
  354. BoxMullerFloat(sample[i], sample[i + 1], &result[i], &result[i + 1]);
  355. }
  356. return result;
  357. }
  358. };
  359. template <class Generator>
  360. class NormalDistribution<Generator, double> {
  361. public:
  362. // The number of elements that will be returned.
  363. static const int kResultElementCount = Generator::kResultElementCount / 2;
  364. // Cost of generation of a single element (in cycles).
  365. static const int kElementCost = 70;
  366. // Indicate that this distribution may take variable number of samples
  367. // during the runtime.
  368. static const bool kVariableSamplesPerOutput = false;
  369. typedef Array<double, kResultElementCount> ResultType;
  370. typedef double ResultElementType;
  371. PHILOX_DEVICE_INLINE
  372. ResultType operator()(Generator* gen) {
  373. typename Generator::ResultType sample = (*gen)();
  374. ResultType result;
  375. for (int i = 0; i < kResultElementCount; i += 2) {
  376. const int i2 = 2 * i;
  377. BoxMullerDouble(sample[i2], sample[i2 + 1], sample[i2 + 2], sample[i2 + 3], &result[i],
  378. &result[i + 1]);
  379. }
  380. return result;
  381. }
  382. };
  383. // A class that returns standard normal distribution between
  384. // [-kTruncateValue, kTruncateValue].
  385. // Arguments:
  386. // Generator: a generator type that returns a number of uint32 upon each
  387. // each invocation. It needs to define kResultElementCount for the
  388. // sample count for each invocation, and ResultType for actual
  389. // returned sample type.
  390. // RealType: the data type of the real numbers that will be returned by the
  391. // distribution. This could be either float or double for now.
  392. // This class is meant to be implemented through specialization. The default
  393. // is not defined by design.
  394. template <class SingleSampleGenerator, typename RealType>
  395. class TruncatedNormalDistribution;
  396. // Exactly like the float version, except that we convert to half afterwards;
  397. // since we don't have half-precision sin/cos even on GPUs, there's nothing to
  398. // gain from working in half internally.
  399. template <class SingleSampleGenerator>
  400. class TruncatedNormalDistribution<SingleSampleGenerator, Eigen::half> {
  401. public:
  402. // The number of elements that will be returned.
  403. static const int kResultElementCount = SingleSampleGenerator::kNativeElementCount;
  404. // Cost of generation of a single element (in cycles).
  405. static const int kElementCost = 90;
  406. // Indicate that this distribution may take variable number of samples
  407. // during the runtime.
  408. static const bool kVariableSamplesPerOutput = true;
  409. // The threshold where the normal distribution is truncated.
  410. const float kTruncateValue = 2.0f;
  411. typedef Array<Eigen::half, kResultElementCount> ResultType;
  412. typedef Eigen::half ResultElementType;
  413. PHILOX_DEVICE_INLINE
  414. ResultType operator()(SingleSampleGenerator* gen) {
  415. ResultType results;
  416. int index = 0;
  417. while (true) {
  418. // Repeatedly take samples from the normal distribution, until we have
  419. // the desired number of elements that fall within the pre-defined cutoff
  420. // threshold.
  421. const uint32 x0 = (*gen)();
  422. const uint32 x1 = (*gen)();
  423. float f[2];
  424. BoxMullerFloat(x0, x1, &f[0], &f[1]);
  425. for (int i = 0; i < 2; ++i) {
  426. if (Eigen::numext::abs(f[i]) < kTruncateValue) {
  427. results[index++] = Eigen::half(f[i]);
  428. if (index >= kResultElementCount) {
  429. return results;
  430. }
  431. }
  432. }
  433. }
  434. }
  435. };
  436. template <class SingleSampleGenerator>
  437. class TruncatedNormalDistribution<SingleSampleGenerator, bfloat16> {
  438. public:
  439. // The number of elements that will be returned.
  440. static const int kResultElementCount = SingleSampleGenerator::kNativeElementCount;
  441. // Cost of generation of a single element (in cycles).
  442. static const int kElementCost = 90;
  443. // Indicate that this distribution may take variable number of samples
  444. // during the runtime.
  445. static const bool kVariableSamplesPerOutput = true;
  446. // The threshold where the normal distribution is truncated.
  447. const float kTruncateValue = 2.0f;
  448. typedef Array<bfloat16, kResultElementCount> ResultType;
  449. typedef bfloat16 ResultElementType;
  450. PHILOX_DEVICE_INLINE
  451. ResultType operator()(SingleSampleGenerator* gen) {
  452. ResultType results;
  453. int index = 0;
  454. while (true) {
  455. // Repeatedly take samples from the normal distribution, until we have
  456. // the desired number of elements that fall within the pre-defined cutoff
  457. // threshold.
  458. const uint32 x0 = (*gen)();
  459. const uint32 x1 = (*gen)();
  460. float f[2];
  461. BoxMullerFloat(x0, x1, &f[0], &f[1]);
  462. for (int i = 0; i < 2; ++i) {
  463. if (Eigen::numext::abs(f[i]) < kTruncateValue) {
  464. results[index++] = bfloat16(f[i]);
  465. if (index >= kResultElementCount) {
  466. return results;
  467. }
  468. }
  469. }
  470. }
  471. }
  472. };
  473. // Partial specialization for float.
  474. template <class SingleSampleGenerator>
  475. class TruncatedNormalDistribution<SingleSampleGenerator, float> {
  476. public:
  477. // The number of elements that will be returned.
  478. static const int kResultElementCount = SingleSampleGenerator::kNativeElementCount;
  479. // Cost of generation of a single element (in cycles).
  480. static const int kElementCost = 90;
  481. // Indicate that this distribution may take variable number of samples
  482. // during the runtime.
  483. static const bool kVariableSamplesPerOutput = true;
  484. // The threshold where the normal distribution is truncated.
  485. const float kTruncateValue = 2.0f;
  486. typedef Array<float, kResultElementCount> ResultType;
  487. typedef float ResultElementType;
  488. PHILOX_DEVICE_INLINE
  489. ResultType operator()(SingleSampleGenerator* gen) {
  490. ResultType results;
  491. int index = 0;
  492. while (true) {
  493. // Repeatedly take samples from the normal distribution, until we have
  494. // the desired number of elements that fall within the pre-defined cutoff
  495. // threshold.
  496. const uint32 x0 = (*gen)();
  497. const uint32 x1 = (*gen)();
  498. float f[2];
  499. BoxMullerFloat(x0, x1, &f[0], &f[1]);
  500. for (int i = 0; i < 2; ++i) {
  501. if (Eigen::numext::abs(f[i]) < kTruncateValue) {
  502. results[index++] = f[i];
  503. if (index >= kResultElementCount) {
  504. return results;
  505. }
  506. }
  507. }
  508. }
  509. }
  510. };
  511. // Partial specialization for double.
  512. template <class SingleSampleGenerator>
  513. class TruncatedNormalDistribution<SingleSampleGenerator, double> {
  514. public:
  515. // The number of elements that will be returned.
  516. static const int kResultElementCount = (SingleSampleGenerator::kNativeElementCount > 1)
  517. ? SingleSampleGenerator::kNativeElementCount / 2
  518. : 1;
  519. // Cost of generation of a single element (in cycles).
  520. static const int kElementCost = 90;
  521. // Indicate that this distribution may take variable number of samples
  522. // during the runtime.
  523. static const bool kVariableSamplesPerOutput = true;
  524. typedef Array<double, kResultElementCount> ResultType;
  525. typedef double ResultElementType;
  526. const double kTruncateValue = 2.0;
  527. PHILOX_DEVICE_INLINE
  528. ResultType operator()(SingleSampleGenerator* gen) {
  529. ResultType results;
  530. int index = 0;
  531. while (1) {
  532. const uint32 x0 = (*gen)();
  533. const uint32 x1 = (*gen)();
  534. const uint32 x2 = (*gen)();
  535. const uint32 x3 = (*gen)();
  536. double d[2];
  537. BoxMullerDouble(x0, x1, x2, x3, &d[0], &d[1]);
  538. for (int i = 0; i < 2; ++i) {
  539. if (Eigen::numext::abs(d[i]) < kTruncateValue) {
  540. results[index++] = d[i];
  541. if (index >= kResultElementCount) {
  542. return results;
  543. }
  544. }
  545. }
  546. }
  547. }
  548. };
  549. // Helper function to convert two 32-bit uniform integers to two floats
  550. // under the unit normal distribution.
  551. PHILOX_DEVICE_INLINE
  552. void BoxMullerFloat(uint32 x0, uint32 x1, float* f0, float* f1) {
  553. // This function implements the Box-Muller transform:
  554. // http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform#Basic_form
  555. // Do not send a really small number to log().
  556. // We cannot mark "epsilon" as "static const" because NVCC would complain
  557. const float epsilon = 1.0e-7f;
  558. float u1 = Uint32ToFloat(x0);
  559. if (u1 < epsilon) {
  560. u1 = epsilon;
  561. }
  562. const float v1 = 2.0f * M_PI * Uint32ToFloat(x1);
  563. const float u2 = Eigen::numext::sqrt(-2.0f * Eigen::numext::log(u1));
  564. #if defined(TENSORFLOW_USE_SYCL) || !defined(__linux__)
  565. *f0 = Eigen::numext::sin(v1);
  566. *f1 = Eigen::numext::cos(v1);
  567. #else
  568. sincosf(v1, f0, f1);
  569. #endif
  570. *f0 *= u2;
  571. *f1 *= u2;
  572. }
  573. // Helper function to convert four 32-bit uniform integers to two doubles
  574. // under the unit normal distribution.
  575. PHILOX_DEVICE_INLINE
  576. void BoxMullerDouble(uint32 x0, uint32 x1, uint32 x2, uint32 x3, double* d0, double* d1) {
  577. // This function implements the Box-Muller transform:
  578. // http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform#Basic_form
  579. // Do not send a really small number to log().
  580. // We cannot mark "epsilon" as "static const" because NVCC would complain
  581. const double epsilon = 1.0e-7;
  582. double u1 = Uint64ToDouble(x0, x1);
  583. if (u1 < epsilon) {
  584. u1 = epsilon;
  585. }
  586. const double v1 = 2 * M_PI * Uint64ToDouble(x2, x3);
  587. const double u2 = Eigen::numext::sqrt(-2.0 * Eigen::numext::log(u1));
  588. #if defined(TENSORFLOW_USE_SYCL) || !defined(__linux__)
  589. *d0 = Eigen::numext::sin(v1);
  590. *d1 = Eigen::numext::cos(v1);
  591. #else
  592. sincos(v1, d0, d1);
  593. #endif
  594. *d0 *= u2;
  595. *d1 *= u2;
  596. }
  597. // Helper function to convert an 16-bit integer to a half between [0..1).
  598. PHILOX_DEVICE_INLINE Eigen::half Uint16ToHalf(uint16 x) {
  599. // IEEE754 halfs are formatted as follows (MSB first):
  600. // sign(1) exponent(5) mantissa(10)
  601. // Conceptually construct the following:
  602. // sign == 0
  603. // exponent == 15 -- an excess 15 representation of a zero exponent
  604. // mantissa == 10 random bits
  605. const uint16 man = x & 0x3ffu; // 10 bit mantissa
  606. const uint16 exp = static_cast<uint16>(15);
  607. const uint16 val = (exp << 10) | man;
  608. Eigen::half result;
  609. result.x = val;
  610. return result - Eigen::half(1.0);
  611. }
  612. // Helper function to convert an 16-bit integer to a bfloat16 between [0..1).
  613. // This can create a uniform distribution of values between [0..1).
  614. PHILOX_DEVICE_INLINE bfloat16 Uint16ToGfloat16(uint16 x) {
  615. // bfloat are formatted as follows (MSB first):
  616. // sign(1) exponent(8) mantissa(7)
  617. // Conceptually construct the following:
  618. // sign == 0
  619. // exponent == 127 -- an excess 127 representation of a zero exponent
  620. // mantissa == 7 random bits
  621. const uint16 man = x & 0x7fu; // 7 bit mantissa
  622. const uint16 exp = static_cast<uint16>(127);
  623. const uint16 val = (exp << 7) | man;
  624. bfloat16 result;
  625. memcpy(&result, &val, sizeof(val));
  626. // The mantissa has an implicit leading 1, so the above code creates a value
  627. // in [1, 2). The minus will not cause a rounding that makes the result 1.
  628. // Instead it will just be close to 1.
  629. return result - bfloat16(1.0);
  630. }
  631. // Helper function to convert an 32-bit integer to a float between [0..1).
  632. PHILOX_DEVICE_INLINE float Uint32ToFloat(uint32 x) {
  633. // IEEE754 floats are formatted as follows (MSB first):
  634. // sign(1) exponent(8) mantissa(23)
  635. // Conceptually construct the following:
  636. // sign == 0
  637. // exponent == 127 -- an excess 127 representation of a zero exponent
  638. // mantissa == 23 random bits
  639. const uint32 man = x & 0x7fffffu; // 23 bit mantissa
  640. const uint32 exp = static_cast<uint32>(127);
  641. const uint32 val = (exp << 23) | man;
  642. // Assumes that endian-ness is same for float and uint32.
  643. float result;
  644. memcpy(&result, &val, sizeof(val));
  645. return result - 1.0f;
  646. }
  647. // Helper function to convert two 32-bit integers to a double between [0..1).
  648. PHILOX_DEVICE_INLINE double Uint64ToDouble(uint32 x0, uint32 x1) {
  649. // IEEE754 doubles are formatted as follows (MSB first):
  650. // sign(1) exponent(11) mantissa(52)
  651. // Conceptually construct the following:
  652. // sign == 0
  653. // exponent == 1023 -- an excess 1023 representation of a zero exponent
  654. // mantissa == 52 random bits
  655. const uint32 mhi = x0 & 0xfffffu; // upper 20 bits of mantissa
  656. const uint32 mlo = x1; // lower 32 bits of mantissa
  657. const uint64 man = (static_cast<uint64>(mhi) << 32) | mlo; // mantissa
  658. const uint64 exp = static_cast<uint64>(1023);
  659. const uint64 val = (exp << 52) | man;
  660. // Assumes that endian-ness is same for double and uint64.
  661. double result;
  662. memcpy(&result, &val, sizeof(val));
  663. return result - 1.0;
  664. }
  665. } // namespace random
  666. } // namespace tensorflow
  667. #endif // TENSORFLOW_CORE_LIB_RANDOM_RANDOM_DISTRIBUTIONS_H_