fix the cves to tensorflow

2021-08-31 15:06:16 +08:00 · 2021-08-31 15:06:16 +08:00 · 61695f3dc6
commit 61695f3dc6
parent 407229ab39
52 changed files with 4309 additions and 1 deletions
--- a/CVE-2020-15265.patch
+++ b/CVE-2020-15265.patch
@ -0,0 +1,53 @@
+From eccb7ec454e6617738554a255d77f08e60ee0808 Mon Sep 17 00:00:00 2001
+From: Mihai Maruseac <mihaimaruseac@google.com>
+Date: Mon, 19 Oct 2020 17:56:36 -0700
+Subject: [PATCH] Prevent segfault in `quantize_and_dequantize`
+
+---
+ .../core/kernels/quantize_and_dequantize_op.cc     |  4 ++++
+ tensorflow/python/kernel_tests/array_ops_test.py   | 14 ++++++++++++++
+ 2 files changed, 18 insertions(+)
+
+diff --git a/tensorflow/core/kernels/quantize_and_dequantize_op.cc b/tensorflow/core/kernels/quantize_and_dequantize_op.cc
+index 8f71d09c..fda54208 100644
+--- a/tensorflow/core/kernels/quantize_and_dequantize_op.cc
+++ b/tensorflow/core/kernels/quantize_and_dequantize_op.cc
+@@ -71,6 +71,10 @@ class QuantizeAndDequantizeV2Op : public OpKernel {
+ 
+   void Compute(OpKernelContext* ctx) override {
+     const Tensor& input = ctx->input(0);
+    OP_REQUIRES(
+	ctx, (axis_ == -1 || axis_ < input.shape().dims()),
+	errors::InvalidArgument("Shape must be at least rank", axis_ + 1,
+				" but is rank ", input.shape().dims()));
+     const int depth = (axis_ == -1) ? 1 : input.dim_size(axis_);
+     Tensor input_min_tensor;
+     Tensor input_max_tensor;
+diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py
+index dbff3a1b..c498ff62 100644
+--- a/tensorflow/python/kernel_tests/array_ops_test.py
+++ b/tensorflow/python/kernel_tests/array_ops_test.py
+@@ -1541,6 +1541,20 @@ class QuantizeAndDequantizeTest(test_util.TensorFlowTestCase):
+                   axis=(axis - 4)))
+           self.assertAllClose(fake_quantized, expected)
+ 
+  def testBadAxis(self):
+    input_tensor = [2.5, 2.5]
+    input_min = [0, 0]
+    input_max = [1, 1]
+    error_message_pattern = "Shape must be at least rank 11 but is rank 1"
+    # TODO(b/171260356): Eager mode and graph mode throw different error types
+    error = errors.InvalidArgumentError if context.executing_eagerly(
+    ) else ValueError
+    with self.assertRaisesRegex(error, error_message_pattern):      self.evaluate(
+          array_ops.quantize_and_dequantize_v2(
+              input=input_tensor,
+              input_min=input_min,
+              input_max=input_max,
+              axis=10))
+ 
+ @test_util.run_all_in_graph_and_eager_modes
+ class SortedSearchTest(test_util.TensorFlowTestCase):
+-- 
+2.23.0
+
--- a/CVE-2020-15266.patch
+++ b/CVE-2020-15266.patch
@ -0,0 +1,67 @@
+From 3ade2efec2e90c6237de32a19680caaa3ebc2845 Mon Sep 17 00:00:00 2001
+From: Yong Tang <yong.tang.github@outlook.com>
+Date: Sat, 8 Aug 2020 00:47:35 +0000
+Subject: [PATCH] Fix segmentation fault in tf.image.crop_and_resize when boxes
+
+---
+ tensorflow/core/kernels/crop_and_resize_op.cc | 13 +++++++++++++
+ tensorflow/python/ops/image_ops_test.py       | 12 ++++++++++++
+ 2 files changed, 25 insertions(+)
+
+diff --git a/tensorflow/core/kernels/crop_and_resize_op.cc b/tensorflow/core/kernels/crop_and_resize_op.cc
+index 4ecd3bc0..e14f4e43 100644
+--- a/tensorflow/core/kernels/crop_and_resize_op.cc
+++ b/tensorflow/core/kernels/crop_and_resize_op.cc
+@@ -71,6 +71,18 @@ static inline Status ParseAndCheckBoxSizes(const Tensor& boxes,
+   if (boxes.dim_size(1) != 4) {
+     return errors::InvalidArgument("boxes must have 4 columns");
+   }
+  for (int64 i = 0; i < *num_boxes; i++) {
+    for (int64 j = 0; j < 4; j++) {
+      if (!isfinite(boxes.tensor<float, 2>()(i, j))) {
+	return errors::InvalidArgument(
+	    "boxes values must be finite, received boxes[", i, "]: ",
+	    boxes.tensor<float, 2>()(i, 0), ", ",
+	    boxes.tensor<float, 2>()(i, 1), ", ",
+	    boxes.tensor<float, 2>()(i, 2), ", ",
+	    boxes.tensor<float, 2>()(i, 3));
+      }
+    }
+  }
+   // The shape of 'box_index' is [num_boxes].
+   if (box_index.dims() != 1) {
+     return errors::InvalidArgument("box_index must be 1-D",
+@@ -256,6 +268,7 @@ struct CropAndResize<CPUDevice, T> {
+             continue;
+           }
+           if (method_name == "bilinear") {
+
+             const int top_y_index = floorf(in_y);
+             const int bottom_y_index = ceilf(in_y);
+             const float y_lerp = in_y - top_y_index;
+diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py
+index 0206ccf9..0630b6fc 100644
+--- a/tensorflow/python/ops/image_ops_test.py
+++ b/tensorflow/python/ops/image_ops_test.py
+@@ -5275,6 +5275,18 @@ class DecodeImageTest(test_util.TensorFlowTestCase):
+       self.assertAllEqual(list(image0.shape), [40, 20, 3])
+       self.assertAllEqual(image0, image1)
+ 
+  def testImageCropAndResize(self):
+    # Test case for GitHub issue 42129
+    message = "boxes values must be finite"
+    with self.assertRaisesRegex(
+        (errors.InvalidArgumentError, ValueError), message):
+      v = image_ops_impl.crop_and_resize_v2(
+          image=array_ops.zeros((2, 1, 1, 1)),
+          boxes=[[1.0e+40, 0, 0, 0]],
+          box_indices=[1],
+          crop_size=[1, 1])
+      self.evaluate(v)
+
+ 
+ if __name__ == "__main__":
+   googletest.main()
+-- 
+2.23.0
+
--- a/CVE-2021-29517-1.patch
+++ b/CVE-2021-29517-1.patch
@ -0,0 +1,611 @@
+From cc5ea8469641b6680971eb76020407f81ab3f573 Mon Sep 17 00:00:00 2001
+From: Anna R <annarev@google.com>
+Date: Wed, 9 Dec 2020 16:13:53 -0800
+Subject: [PATCH] Remove changes made to support TFRT-based OpKernel classes in
+
+---
+ tensorflow/core/framework/BUILD             |   3 -
+ tensorflow/core/framework/numeric_op.h      |  21 ++-
+ tensorflow/core/framework/numeric_op_base.h |  49 -----
+ tensorflow/core/kernels/BUILD               |  47 +----
+ tensorflow/core/kernels/conv_ops_3d.cc      | 153 ++++++++++++++--
+ tensorflow/core/kernels/conv_ops_3d.h       | 187 --------------------
+ 6 files changed, 161 insertions(+), 299 deletions(-)
+
+diff --git a/tensorflow/core/framework/BUILD b/tensorflow/core/framework/BUILD
+index d47c74a6..9b6ddb2a 100644
+--- a/tensorflow/core/framework/BUILD
+++ b/tensorflow/core/framework/BUILD
+@@ -51,7 +51,6 @@ exports_files(
+         "model.h",
+         "node_def_builder.h",
+         "numeric_op.h",
+-        "numeric_op_base.h",
+         "op_kernel.h",
+         "op_requires.h",
+         "op_segment.h",
+@@ -183,7 +182,6 @@ filegroup(
+         "node_def_util.h",
+         "node_properties.h",
+         "numeric_op.h",
+-        "numeric_op_base.h",
+         "numeric_types.h",
+         "op.h",
+         "op_def_builder.h",
+@@ -280,7 +278,6 @@ filegroup(
+         "kernel_shape_util.h",
+         "log_memory.cc",
+         "log_memory.h",
+-        "numeric_op_base.h",
+         "numeric_types.h",
+         "op_requires.h",
+         "ops_util.cc",
+diff --git a/tensorflow/core/framework/numeric_op.h b/tensorflow/core/framework/numeric_op.h
+index 9f8ceed2..ad452bcd 100644
+--- a/tensorflow/core/framework/numeric_op.h
+++ b/tensorflow/core/framework/numeric_op.h
+@@ -15,19 +15,34 @@ limitations under the License.
+ #ifndef TENSORFLOW_CORE_FRAMEWORK_NUMERIC_OP_H_
+ #define TENSORFLOW_CORE_FRAMEWORK_NUMERIC_OP_H_
+ 
+-#include "tensorflow/core/framework/numeric_op_base.h"
+ #include "tensorflow/core/framework/op_kernel.h"
+ #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/framework/types.pb.h"
+ #include "tensorflow/core/lib/core/errors.h"
+ #include "tensorflow/core/lib/core/status.h"
+ 
+ namespace tensorflow {
+ 
+// One input and one output, both the same type.
+ template <class T>
+-using UnaryOp = UnaryOpBase<T, OpKernel, OpKernelConstruction>;
+class UnaryOp : public OpKernel {
+ public:
+  explicit UnaryOp(OpKernelConstruction* context) : OpKernel(context) {
+    const DataType dt = DataTypeToEnum<T>::v();
+    OP_REQUIRES_OK(context, context->MatchSignature({dt}, {dt}));
+  }
+};
+ 
+// Two inputs and one output, all the same type.
+ template <class T>
+-using BinaryOp = BinaryOpBase<T, OpKernel, OpKernelConstruction>;
+class BinaryOp : public OpKernel {
+ public:
+  explicit BinaryOp(OpKernelConstruction* context) : OpKernel(context) {
+    const DataType dt = DataTypeToEnum<T>::v();
+    OP_REQUIRES_OK(context, context->MatchSignature({dt, dt}, {dt}));
+  }
+};
+ 
+ // For operations where the input and output are the same shape.
+ //
+diff --git a/tensorflow/core/framework/numeric_op_base.h b/tensorflow/core/framework/numeric_op_base.h
+index be7d3bf8..e69de29b 100644
+--- a/tensorflow/core/framework/numeric_op_base.h
+++ b/tensorflow/core/framework/numeric_op_base.h
+@@ -1,49 +0,0 @@
+-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+-
+-Licensed under the Apache License, Version 2.0 (the "License");
+-you may not use this file except in compliance with the License.
+-You may obtain a copy of the License at
+-
+-    http://www.apache.org/licenses/LICENSE-2.0
+-
+-Unless required by applicable law or agreed to in writing, software
+-distributed under the License is distributed on an "AS IS" BASIS,
+-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-See the License for the specific language governing permissions and
+-limitations under the License.
+-==============================================================================*/
+-
+-#ifndef TENSORFLOW_CORE_FRAMEWORK_NUMERIC_OP_BASE_H_
+-#define TENSORFLOW_CORE_FRAMEWORK_NUMERIC_OP_BASE_H_
+-
+-#include "tensorflow/core/framework/op_requires.h"
+-#include "tensorflow/core/framework/types.h"
+-#include "tensorflow/core/framework/types.pb.h"
+-#include "tensorflow/core/lib/core/status.h"
+-
+-namespace tensorflow {
+-
+-// One input and one output, both the same type.
+-template <class T, class OpKernelT, class OpKernelConstructionT>
+-class UnaryOpBase : public OpKernelT {
+- public:
+-  explicit UnaryOpBase(OpKernelConstructionT* construction) :
+-      OpKernelT(construction) {
+-    const DataType dt = DataTypeToEnum<T>::v();
+-    OP_REQUIRES_OK(construction, construction->MatchSignature({dt}, {dt}));
+-  }
+-};
+-
+-// Two inputs and one output, all the same type.
+-template <class T, class OpKernelT, class OpKernelConstructionT>
+-class BinaryOpBase : public OpKernelT {
+- public:
+-  explicit BinaryOpBase(OpKernelConstructionT* construction) :
+-      OpKernelT(construction) {
+-    const DataType dt = DataTypeToEnum<T>::v();
+-    OP_REQUIRES_OK(construction, construction->MatchSignature({dt, dt}, {dt}));
+-  }
+-};
+-}  // namespace tensorflow
+-
+-#endif  // TENSORFLOW_CORE_FRAMEWORK_NUMERIC_OP_BASE_H_
+diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
+index 14f7d99b..5f8fa80b 100644
+--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
+@@ -4048,48 +4048,6 @@ cc_library(
+     }),
+ )
+ 
+-# TODO(annarev): conv_ops_3d_headers currently depends on android target build
+-# from selected sources. We should switch to use granular dependencies instead.
+-# Then, we can just depend on "conv3d".
+-cc_library(
+-    name = "conv_3d_mobile",
+-    hdrs = [
+-        "conv_3d.h",
+-        "eigen_backward_cuboid_convolutions.h",
+-        "eigen_convolution_helpers.h",
+-        "eigen_cuboid_convolution.h",
+-        "eigen_volume_patch.h",
+-    ],
+-    deps = [
+-        ":eigen_spatial_convolutions-inl",
+-    ] + select({
+-        "//tensorflow:android": [
+-            "//tensorflow/core:portable_tensorflow_lib_lite",  # TODO(annarev): exclude runtime srcs
+-        ],
+-        "//conditions:default": [
+-            "//tensorflow/core:framework",
+-        ],
+-    }),
+-)
+-
+-cc_library(
+-    name = "conv_ops_3d_headers",
+-    hdrs = [
+-        "conv_ops_3d.h",
+-    ],
+-    deps = select({
+-        "//tensorflow:android": [
+-            ":conv_3d_mobile",
+-            "//tensorflow/core:portable_tensorflow_lib_lite",  # TODO(annarev): exclude runtime srcs
+-        ],
+-        "//conditions:default": [
+-            ":conv_3d",
+-            "//third_party/eigen3",
+-            "//tensorflow/core:framework",
+-        ],
+-    }),
+-)
+-
+ tf_kernel_library(
+     name = "argmax_op",
+     prefix = "argmax_op",
+@@ -4673,6 +4631,7 @@ tf_kernel_library(
+         "deep_conv2d.h",
+         "gemm_functors.h",
+         "winograd_transform.h",
+	"conv_ops_fused_impl.h",
+     ] + select({
+         ":xsmm_convolutions": ["xsmm_conv2d.h"],
+         "//conditions:default": [],
+@@ -4687,8 +4646,6 @@ tf_kernel_library(
+     prefix = "conv_ops",
+     deps = [
+         ":conv_grad_shape_utils",
+-        ":conv_ops_3d_headers",
+-        ":bounds_check",
+         ":conv_2d",
+         ":conv_3d",
+         ":eigen_contraction_kernel",
+@@ -6710,7 +6667,6 @@ filegroup(
+         "conv_2d.h",
+         "conv_3d.h",
+         "conv_ops.h",
+-        "conv_ops_3d.h",
+         "conv_ops_gpu.h",
+         "data_format_ops.h",
+         "depthtospace_op.h",
+@@ -7160,7 +7116,6 @@ filegroup(
+         "stateful_random_ops_cpu_gpu.h",
+         # Allows conv_3d ops for android but excluded from *_3d* rule above.
+         "conv_3d.h",
+-        "conv_ops_3d.h",
+         "conv_ops_3d.cc",
+         "conv_ops_gpu.h",
+     ],
+diff --git a/tensorflow/core/kernels/conv_ops_3d.cc b/tensorflow/core/kernels/conv_ops_3d.cc
+index 289a083a..52356443 100644
+--- a/tensorflow/core/kernels/conv_ops_3d.cc
+++ b/tensorflow/core/kernels/conv_ops_3d.cc
+@@ -16,8 +16,7 @@ limitations under the License.
+ #define USE_EIGEN_TENSOR
+ #define EIGEN_USE_THREADS
+ 
+-#include "tensorflow/core/kernels/conv_ops_3d.h"
+-
+#include "tensorflow/core/framework/kernel_shape_util.h"
+ #include "tensorflow/core/framework/numeric_op.h"
+ #include "tensorflow/core/framework/op_kernel.h"
+ #include "tensorflow/core/framework/register_types.h"
+@@ -51,11 +50,146 @@ namespace tensorflow {
+ typedef Eigen::ThreadPoolDevice CPUDevice;
+ typedef Eigen::GpuDevice GPUDevice;
+ 
+template <typename Device, typename T>
+	struct LaunchConvOp;
+template <typename T>
+struct LaunchConvOp<CPUDevice, T> {
+  static void launch(OpKernelContext* context, bool cudnn_use_autotune,
+		     const Tensor& input, const Tensor& filter,
+		     const std::array<int64, 3>& dilations,
+		     const std::array<int64, 3>& strides, const Padding padding,
+		     TensorFormat data_format, Tensor* output) {
+    OP_REQUIRES(context, data_format == FORMAT_NHWC,
+		errors::InvalidArgument("CPU implementation of Conv3D "
+					"currently only supports the NHWC "
+					"tensor format."));
+    OP_REQUIRES(context,
+		dilations[0] == 1 && dilations[1] == 1 && dilations[2] == 1,
+		errors::InvalidArgument("CPU implementation of Conv3D "
+					"currently only supports dilated rates "
+					"of 1."));
+    functor::CuboidConvolution<CPUDevice, T>()(
+	context->eigen_device<CPUDevice>(), output->tensor<T, 5>(),
+	input.tensor<T, 5>(), filter.tensor<T, 5>(), strides[2], strides[1],
+	strides[0], BrainPadding2EigenPadding(padding));
+  }
+};
+
+template <typename Device, typename T>
+class Conv3DOp : public BinaryOp<T> {
+ public:
+  explicit Conv3DOp(OpKernelConstruction* context) : BinaryOp<T>(context) {
+    string data_format;
+    OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format));
+    OP_REQUIRES(context, FormatFromString(data_format, &data_format_),
+		errors::InvalidArgument("Invalid data format"));
+    OP_REQUIRES_OK(context, context->GetAttr("strides", &stride_));
+    OP_REQUIRES(context, stride_.size() == 5,
+		errors::InvalidArgument("Sliding window strides field must "
+					"specify 5 dimensions"));
+    OP_REQUIRES(
+	context,
+	(GetTensorDim(stride_, data_format_, 'N') == 1 &&
+	 GetTensorDim(stride_, data_format_, 'C') == 1),
+	errors::InvalidArgument("Current implementation does not yet support "
+				"strides in the batch and depth dimensions."));
+    OP_REQUIRES(
+	context,
+	(GetTensorDim(stride_, data_format_, '0') > 0 &&
+	 GetTensorDim(stride_, data_format_, '1') > 0 &&
+	 GetTensorDim(stride_, data_format_, '2') > 0),
+	errors::InvalidArgument("Spatial strides should be larger than 0."));
+    OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilation_));
+    OP_REQUIRES(context, dilation_.size() == 5,
+		errors::InvalidArgument("Dilation rates field must "
+					"specify 5 dimensions"));
+    OP_REQUIRES(context,
+		(GetTensorDim(dilation_, data_format_, 'N') == 1 &&
+		 GetTensorDim(dilation_, data_format_, 'C') == 1),
+		errors::InvalidArgument(
+		    "Current implementation does not yet support "
+		    "dilation rates in the batch and depth dimensions."));
+    OP_REQUIRES(
+	context,
+	(GetTensorDim(dilation_, data_format_, '0') > 0 &&
+	 GetTensorDim(dilation_, data_format_, '1') > 0 &&
+	 GetTensorDim(dilation_, data_format_, '2') > 0),
+	errors::InvalidArgument("Dilated rates should be larger than 0."));
+    OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
+    cudnn_use_autotune_ = CudnnUseAutotune();
+  }
+
+  void Compute(OpKernelContext* context) override {
+    // Input tensor is of the following dimensions:
+    // [ batch, in_z, in_y, in_x, in_channels ]
+    const Tensor& input = context->input(0);
+
+    // Input filter is of the following dimensions:
+    // [ filter_z, filter_y, filter_x, in_channels, out_channels]
+    const Tensor& filter = context->input(1);
+
+    // NOTE: The ordering of the spatial dimensions is arbitrary, but has to be
+    // kept consistent between input/filter/output.
+    OP_REQUIRES(context, input.dims() == 5,
+		errors::InvalidArgument("input must be 5-dimensional"));
+    OP_REQUIRES(context, filter.dims() == 5,
+		errors::InvalidArgument("filter must be 5-dimensional"));
+
+    const int64 in_depth = GetTensorDim(input, data_format_, 'C');
+    const int64 in_batch = GetTensorDim(input, data_format_, 'N');
+
+    const int64 filter_depth = filter.dim_size(3);
+    const int64 out_depth = filter.dim_size(4);
+
+    OP_REQUIRES(context, in_depth % filter_depth == 0,
+		errors::InvalidArgument(
+		    "Input depth must be evenly divisible by filter depth: ",
+		    in_depth, " vs ", filter_depth));
+
+    // Dimension order for these arrays is: z, y, x.
+    std::array<int64, 3> input_size = {
+	{GetTensorDim(input, data_format_, '0'),
+	 GetTensorDim(input, data_format_, '1'),
+	 GetTensorDim(input, data_format_, '2')}};
+    std::array<int64, 3> filter_size = {
+	{filter.dim_size(0), filter.dim_size(1), filter.dim_size(2)}};
+    std::array<int64, 3> dilations = {
+	{GetTensorDim(dilation_, data_format_, '0'),
+	 GetTensorDim(dilation_, data_format_, '1'),
+	 GetTensorDim(dilation_, data_format_, '2')}};
+    std::array<int64, 3> strides = {{GetTensorDim(stride_, data_format_, '0'),
+	    			     GetTensorDim(stride_, data_format_, '1'),
+				     GetTensorDim(stride_, data_format_, '2')}};
+    std::array<int64, 3> out, padding;
+
+    OP_REQUIRES_OK(
+	context, Get3dOutputSizeV2(input_size, filter_size, dilations, strides,
+				   padding_, &out, &padding));
+    TensorShape out_shape = ShapeFromFormat(
+	data_format_, in_batch, {{out[0], out[1], out[2]}}, out_depth);
+    Tensor* output;
+    OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output));
+
+    // Return early if nothing to do.
+    if (out_shape.num_elements() == 0) return;
+
+    LaunchConvOp<Device, T>::launch(context, cudnn_use_autotune_, input, filter,
+		    		    dilations, strides, padding_, data_format_,
+				    output);
+  }
+
+ private:
+  std::vector<int32> dilation_;
+  std::vector<int32> stride_;
+  Padding padding_;
+  TensorFormat data_format_;
+  bool cudnn_use_autotune_;
+};
+
+ #define REGISTER_CPU_KERNEL(T)                                  \
+   REGISTER_KERNEL_BUILDER(                                      \
+       Name("Conv3D").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+-      Conv3DOp<CPUDevice, T, OpKernel, OpKernelConstruction,    \
+-               OpKernelContext>);
+      Conv3DOp<CPUDevice, T>);
+ TF_CALL_half(REGISTER_CPU_KERNEL);
+ TF_CALL_float(REGISTER_CPU_KERNEL);
+ TF_CALL_double(REGISTER_CPU_KERNEL);
+@@ -73,7 +207,7 @@ typedef AutoTuneSingleton<Conv3dAutoTuneGroup, ConvParameters,
+ 
+ // TODO(mjanusz): Share logic with 2d implementation as much as possible.
+ template <typename T>
+-struct LaunchConvOp<GPUDevice, T, OpKernelContext> {
+struct LaunchConvOp<GPUDevice, T> {
+   static void launch(OpKernelContext* ctx, bool cudnn_use_autotune,
+                      const Tensor& input_param, const Tensor& filter,
+                      const std::array<int64, 3>& dilations,
+@@ -559,16 +693,13 @@ DECLARE_GPU_SPEC(double);
+ // Registration of the GPU implementations.
+ REGISTER_KERNEL_BUILDER(
+     Name("Conv3D").Device(DEVICE_GPU).TypeConstraint<Eigen::half>("T"),
+-    Conv3DOp<GPUDevice, Eigen::half, OpKernel, OpKernelConstruction,
+-             OpKernelContext>);
+    Conv3DOp<GPUDevice, Eigen::half>);
+ REGISTER_KERNEL_BUILDER(
+     Name("Conv3D").Device(DEVICE_GPU).TypeConstraint<float>("T"),
+-    Conv3DOp<GPUDevice, float, OpKernel, OpKernelConstruction,
+-             OpKernelContext>);
+    Conv3DOp<GPUDevice, float>);
+ REGISTER_KERNEL_BUILDER(
+     Name("Conv3D").Device(DEVICE_GPU).TypeConstraint<double>("T"),
+-    Conv3DOp<GPUDevice, double, OpKernel, OpKernelConstruction,
+-             OpKernelContext>);
+    Conv3DOp<GPUDevice, double>);
+ #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+ 
+ }  // namespace tensorflow
+diff --git a/tensorflow/core/kernels/conv_ops_3d.h b/tensorflow/core/kernels/conv_ops_3d.h
+index 9dcdea5b..e69de29b 100644
+--- a/tensorflow/core/kernels/conv_ops_3d.h
+++ b/tensorflow/core/kernels/conv_ops_3d.h
+@@ -1,187 +0,0 @@
+-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+-
+-Licensed under the Apache License, Version 2.0 (the "License");
+-you may not use this file except in compliance with the License.
+-You may obtain a copy of the License at
+-
+-    http://www.apache.org/licenses/LICENSE-2.0
+-
+-Unless required by applicable law or agreed to in writing, software
+-distributed under the License is distributed on an "AS IS" BASIS,
+-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-See the License for the specific language governing permissions and
+-limitations under the License.
+-==============================================================================*/
+-#ifndef TENSORFLOW_CORE_KERNELS_CONV_OPS_3D_H_
+-#define TENSORFLOW_CORE_KERNELS_CONV_OPS_3D_H_
+-
+-#include <vector>
+-
+-#define USE_EIGEN_TENSOR
+-#define EIGEN_USE_THREADS
+-
+-#include "tensorflow/core/framework/numeric_op_base.h"
+-#include "tensorflow/core/framework/kernel_shape_util.h"
+-#include "tensorflow/core/framework/op_requires.h"
+-#include "tensorflow/core/framework/ops_util.h"
+-#include "tensorflow/core/framework/tensor.h"
+-#include "tensorflow/core/framework/tensor_shape.h"
+-#include "tensorflow/core/kernels/conv_3d.h"
+-#include "tensorflow/core/platform/errors.h"
+-#include "tensorflow/core/util/padding.h"
+-#include "tensorflow/core/util/tensor_format.h"
+-#if GOOGLE_CUDA
+-#include "tensorflow/core/util/use_cudnn.h"
+-#endif
+-
+-namespace tensorflow {
+-typedef Eigen::ThreadPoolDevice CPUDevice;
+-
+-template <typename Device, typename T, class OpKernelContextT>
+-struct LaunchConvOp;
+-
+-template <typename T, class OpKernelContextT>
+-struct LaunchConvOp<CPUDevice, T, OpKernelContextT> {
+-  static void launch(OpKernelContextT* context, bool cudnn_use_autotune,
+-                     const Tensor& input, const Tensor& filter,
+-                     const std::array<int64, 3>& dilations,
+-                     const std::array<int64, 3>& strides, const Padding padding,
+-                     TensorFormat data_format, Tensor* output) {
+-    OP_REQUIRES(context, data_format == FORMAT_NHWC,
+-                errors::InvalidArgument("CPU implementation of Conv3D "
+-                                        "currently only supports the NHWC "
+-                                        "tensor format."));
+-    OP_REQUIRES(context,
+-                dilations[0] == 1 && dilations[1] == 1 && dilations[2] == 1,
+-                errors::InvalidArgument("CPU implementation of Conv3D "
+-                                        "currently only supports dilated rates "
+-                                        "of 1."));
+-    functor::CuboidConvolution<CPUDevice, T>()(
+-        context->template eigen_device<CPUDevice>(), output->tensor<T, 5>(),
+-        input.tensor<T, 5>(), filter.tensor<T, 5>(), strides[2], strides[1],
+-        strides[0], BrainPadding2EigenPadding(padding));
+-  }
+-};
+-
+-template <typename Device, typename T, class OpKernelT,
+-          class OpKernelConstructionT, class OpKernelContextT>
+-class Conv3DOp : public BinaryOpBase<T, OpKernelT, OpKernelConstructionT> {
+- public:
+-  explicit Conv3DOp(OpKernelConstructionT* context) :
+-      BinaryOpBase<T, OpKernelT, OpKernelConstructionT>(context) {
+-    string data_format;
+-    OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format));
+-    OP_REQUIRES(context, FormatFromString(data_format, &data_format_),
+-                errors::InvalidArgument("Invalid data format"));
+-    OP_REQUIRES_OK(context, context->GetAttr("strides", &stride_));
+-    OP_REQUIRES(context, stride_.size() == 5,
+-                errors::InvalidArgument("Sliding window strides field must "
+-                                        "specify 5 dimensions"));
+-    OP_REQUIRES(
+-        context,
+-        (GetTensorDim(stride_, data_format_, 'N') == 1 &&
+-         GetTensorDim(stride_, data_format_, 'C') == 1),
+-        errors::InvalidArgument("Current implementation does not yet support "
+-                                "strides in the batch and depth dimensions."));
+-    OP_REQUIRES(
+-        context,
+-        (GetTensorDim(stride_, data_format_, '0') > 0 &&
+-         GetTensorDim(stride_, data_format_, '1') > 0 &&
+-         GetTensorDim(stride_, data_format_, '2') > 0),
+-        errors::InvalidArgument("Spatial strides should be larger than 0."));
+-    OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilation_));
+-    OP_REQUIRES(context, dilation_.size() == 5,
+-                errors::InvalidArgument("Dilation rates field must "
+-                                        "specify 5 dimensions"));
+-    OP_REQUIRES(context,
+-                (GetTensorDim(dilation_, data_format_, 'N') == 1 &&
+-                 GetTensorDim(dilation_, data_format_, 'C') == 1),
+-                errors::InvalidArgument(
+-                    "Current implementation does not yet support "
+-                    "dilation rates in the batch and depth dimensions."));
+-    OP_REQUIRES(
+-        context,
+-        (GetTensorDim(dilation_, data_format_, '0') > 0 &&
+-         GetTensorDim(dilation_, data_format_, '1') > 0 &&
+-         GetTensorDim(dilation_, data_format_, '2') > 0),
+-        errors::InvalidArgument("Dilated rates should be larger than 0."));
+-    OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
+-#if GOOGLE_CUDA
+-    cudnn_use_autotune_ = CudnnUseAutotune();
+-#else
+-    cudnn_use_autotune_ = false;
+-#endif
+-  }
+-
+-  void Compute(OpKernelContextT* context) override {
+-    // Input tensor is of the following dimensions:
+-    // [ batch, in_z, in_y, in_x, in_channels ]
+-    const Tensor& input = context->input(0);
+-
+-    // Input filter is of the following dimensions:
+-    // [ filter_z, filter_y, filter_x, in_channels, out_channels]
+-    const Tensor& filter = context->input(1);
+-
+-    // NOTE: The ordering of the spatial dimensions is arbitrary, but has to be
+-    // kept consistent between input/filter/output.
+-    OP_REQUIRES(context, input.dims() == 5,
+-                errors::InvalidArgument("input must be 5-dimensional"));
+-    OP_REQUIRES(context, filter.dims() == 5,
+-                errors::InvalidArgument("filter must be 5-dimensional"));
+-
+-    const int64 in_depth = GetTensorDim(input, data_format_, 'C');
+-    const int64 in_batch = GetTensorDim(input, data_format_, 'N');
+-
+-    const int64 filter_depth = filter.dim_size(3);
+-    const int64 out_depth = filter.dim_size(4);
+-
+-    OP_REQUIRES(context, in_depth % filter_depth == 0,
+-                errors::InvalidArgument(
+-                    "Input depth must be evenly divisible by filter depth: ",
+-                    in_depth, " vs ", filter_depth));
+-
+-    // Dimension order for these arrays is: z, y, x.
+-    std::array<int64, 3> input_size = {
+-        {GetTensorDim(input, data_format_, '0'),
+-         GetTensorDim(input, data_format_, '1'),
+-         GetTensorDim(input, data_format_, '2')}};
+-    std::array<int64, 3> filter_size = {
+-        {filter.dim_size(0), filter.dim_size(1), filter.dim_size(2)}};
+-    std::array<int64, 3> dilations = {
+-        {GetTensorDim(dilation_, data_format_, '0'),
+-         GetTensorDim(dilation_, data_format_, '1'),
+-         GetTensorDim(dilation_, data_format_, '2')}};
+-    std::array<int64, 3> strides = {{GetTensorDim(stride_, data_format_, '0'),
+-                                     GetTensorDim(stride_, data_format_, '1'),
+-                                     GetTensorDim(stride_, data_format_, '2')}};
+-    std::array<int64, 3> out, padding;
+-
+-    OP_REQUIRES_OK(
+-        context, Get3dOutputSizeV2(input_size, filter_size, dilations, strides,
+-                                   padding_, &out, &padding));
+-    TensorShape out_shape = ShapeFromFormat(
+-        data_format_, in_batch, {{out[0], out[1], out[2]}}, out_depth);
+-    Tensor* output;
+-    OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output));
+-
+-    // Return early if nothing to do.
+-    if (out_shape.num_elements() == 0) return;
+-
+-    LaunchConvOp<Device, T, OpKernelContextT>::launch(
+-        context, cudnn_use_autotune_, input, filter,
+-        dilations, strides, padding_, data_format_,
+-        output);
+-  }
+-
+- private:
+-  std::vector<int32> dilation_;
+-  std::vector<int32> stride_;
+-  Padding padding_;
+-  TensorFormat data_format_;
+-  bool cudnn_use_autotune_;
+-};
+-
+-}  // namespace tensorflow
+-
+-
+-#endif  // TENSORFLOW_CORE_KERNELS_CONV_OPS_3D_H_
+-- 
+2.23.0
+
--- a/CVE-2021-29517-2.patch
+++ b/CVE-2021-29517-2.patch
@ -0,0 +1,37 @@
+From 799f835a3dfa00a4d852defa29b15841eea9d64f Mon Sep 17 00:00:00 2001
+From: Mihai Maruseac <mihaimaruseac@google.com>
+Date: Mon, 19 Apr 2021 09:56:46 -0700
+Subject: [PATCH] Fix 2 issues with `Conv3D`.
+
+---
+ tensorflow/core/kernels/conv_ops_3d.cc | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/tensorflow/core/kernels/conv_ops_3d.cc b/tensorflow/core/kernels/conv_ops_3d.cc
+index 52356443..75a0a043 100644
+--- a/tensorflow/core/kernels/conv_ops_3d.cc
+++ b/tensorflow/core/kernels/conv_ops_3d.cc
+@@ -68,6 +68,11 @@ struct LaunchConvOp<CPUDevice, T> {
+ 		errors::InvalidArgument("CPU implementation of Conv3D "
+ 					"currently only supports dilated rates "
+ 					"of 1."));
+    OP_REQUIRES(context, filter.dim_size(3) == input.dim_size(input.dims() - 1),
+		errors::InvalidArgument(
+		    "Number of channels in filter (", filter.dim_size(3),
+		    ") must match last dimension of input (",
+		    input.dim_size(input.dims() - 1), ")"));
+     functor::CuboidConvolution<CPUDevice, T>()(
+ 	context->eigen_device<CPUDevice>(), output->tensor<T, 5>(),
+ 	input.tensor<T, 5>(), filter.tensor<T, 5>(), strides[2], strides[1],
+@@ -141,6 +146,8 @@ class Conv3DOp : public BinaryOp<T> {
+     const int64 filter_depth = filter.dim_size(3);
+     const int64 out_depth = filter.dim_size(4);
+ 
+    OP_REQUIRES(context, filter_depth != 0,
+		errors::InvalidArgument("filter_depth must be non-zero"));
+     OP_REQUIRES(context, in_depth % filter_depth == 0,
+ 		errors::InvalidArgument(
+ 		    "Input depth must be evenly divisible by filter depth: ",
+-- 
+2.23.0
+
--- a/CVE-2021-29518.patch
+++ b/CVE-2021-29518.patch
@ -0,0 +1,42 @@
+From ff70c47a396ef1e3cb73c90513da4f5cb71bebba Mon Sep 17 00:00:00 2001
+From: Amit Patankar <amitpatankar@google.com>
+Date: Tue, 13 Apr 2021 14:24:00 -0700
+Subject: [PATCH] Fix `tf.raw_ops.GetSessionTensor` and
+ `tf.raw_ops.DeleteSessionTensor` null pointer dereferences.
+
+---
+ tensorflow/core/kernels/session_ops.cc | 11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+diff --git a/tensorflow/core/kernels/session_ops.cc b/tensorflow/core/kernels/session_ops.cc
+index e7e73549..dab59e70 100644
+--- a/tensorflow/core/kernels/session_ops.cc
+++ b/tensorflow/core/kernels/session_ops.cc
+@@ -119,6 +119,11 @@ class GetSessionTensorOp : public OpKernel {
+     const string& name = handle.scalar<tstring>()();
+     Tensor val;
+     OP_REQUIRES_OK(ctx, ctx->session_state()->GetTensor(name, &val));
+    auto session_state = ctx->session_state();
+    OP_REQUIRES(ctx, session_state != nullptr,
+		errors::FailedPrecondition(
+		    "GetSessionTensor called on null session state"));
+    OP_REQUIRES_OK(ctx, session_state->GetTensor(name, &val));
+     ctx->set_output(0, val);
+   }
+ 
+@@ -160,7 +165,11 @@ class DeleteSessionTensorOp : public OpKernel {
+   void Compute(OpKernelContext* ctx) override {
+     const Tensor& handle = ctx->input(0);
+     const string& name = handle.scalar<tstring>()();
+-    OP_REQUIRES_OK(ctx, ctx->session_state()->DeleteTensor(name));
+    auto session_state = ctx->session_state();
+    OP_REQUIRES(ctx, session_state != nullptr,
+		errors::FailedPrecondition(
+	            "DeleteSessionTensor called on null session state"));
+    OP_REQUIRES_OK(ctx, session_state->DeleteTensor(name));
+   }
+ 
+   TF_DISALLOW_COPY_AND_ASSIGN(DeleteSessionTensorOp);
+-- 
+2.23.0
+
--- a/CVE-2021-29521.patch
+++ b/CVE-2021-29521.patch
@ -0,0 +1,35 @@
+From c57c0b9f3a4f8684f3489dd9a9ec627ad8b599f5 Mon Sep 17 00:00:00 2001
+From: Amit Patankar <amitpatankar@google.com>
+Date: Mon, 19 Apr 2021 11:33:50 -0700
+Subject: [PATCH] Fix the segfault in `tf.raw_ops.SparseCountSparseOutput`.
+
+---
+ tensorflow/core/kernels/count_ops.cc | 10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+diff --git a/tensorflow/core/kernels/count_ops.cc b/tensorflow/core/kernels/count_ops.cc
+index b7bb3ed9..67aafebe 100644
+--- a/tensorflow/core/kernels/count_ops.cc
+++ b/tensorflow/core/kernels/count_ops.cc
+@@ -200,9 +200,17 @@ class SparseCount : public OpKernel {
+                     "The shape argument requires at least one element."));
+ 
+     bool is_1d = shape.NumElements() == 1;
+-    int num_batches = is_1d ? 1 : shape.flat<int64>()(0);
+    auto shape_vector = shape.flat<int64>();
+    int num_batches = is_1d ? 1 : shape_vector(0);
+     int num_values = values.NumElements();
+ 
+    for (int b = 0; b < shape_vector.size(); b++) {
+      OP_REQUIRES(context, shape_vector(b) >= 0,
+		  errors::InvalidArgument(
+		      "Elements in dense_shape must be >= 0. Instead got:",
+		      shape.DebugString()));
+    }
+
+     OP_REQUIRES(context, num_values == indices.shape().dim_size(0),
+                 errors::InvalidArgument(
+                     "Number of values must match first dimension of indices.",
+-- 
+2.23.0
+
--- a/CVE-2021-29526-1.patch
+++ b/CVE-2021-29526-1.patch
@ -0,0 +1,322 @@
+From 7b8db6083b34520688dbc71f341f7aeaf156bf17 Mon Sep 17 00:00:00 2001
+From: Eugene Zhulenev <ezhulenev@google.com>
+Date: Fri, 19 Mar 2021 16:16:41 -0700
+Subject: [PATCH] Implement grouped convolution on CPU
+
+To get better compute resources utilization group-compute loop has to be parallelized, but it involves a lot of changes in Conv2D primitives. Will address that later if it will be critical for some of the users.
+
+Fix for: https://github.com/tensorflow/tensorflow/issues/29005
+
+PiperOrigin-RevId: 363991782
+Change-Id: I97f375b1133833c4de5181199316be7cbf4ebee0
+---
+ tensorflow/core/kernels/BUILD                 |   1 +
+ tensorflow/core/kernels/conv_2d.h             |  54 +++++++
+ tensorflow/core/kernels/conv_ops.cc           | 133 ++++++++++++++++--
+ .../python/kernel_tests/conv_ops_test.py      |  20 +--
+ 4 files changed, 189 insertions(+), 19 deletions(-)
+
+diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
+index 8e49f1e0a5caf..bc455626f4322 100644
+--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
+@@ -3818,6 +3818,7 @@ tf_kernel_library(
+         ":ops_util",
+         "@com_google_absl//absl/base:dynamic_annotations",
+         "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/synchronization",
+         "//third_party/eigen3",
+         "//tensorflow/core:core_cpu",
+         "//tensorflow/core:framework",
+diff --git a/tensorflow/core/kernels/conv_2d.h b/tensorflow/core/kernels/conv_2d.h
+index b9a8c977e11ee..87df4a848dd56 100644
+--- a/tensorflow/core/kernels/conv_2d.h
+++ b/tensorflow/core/kernels/conv_2d.h
+@@ -43,6 +43,9 @@ void SpatialConvolutionFunc(const Device& d, Output output, Input input,
+       padding_bottom);
+ }
+ 
+// TODO(ezhulenev): Non-templated `operator()` are required by explicit template
+// instantiations for the GPU device. However they are almost certainly not used
+// in any of the kernel implementation. Check if they can be removed.
+ template <typename Device, typename T,
+           typename OutputKernel = const Eigen::NoOpOutputKernel>
+ struct SpatialConvolution {
+@@ -55,6 +58,16 @@ struct SpatialConvolution {
+     SpatialConvolutionFunc(d, output, input, filter, row_stride, col_stride,
+                            row_dilation, col_dilation, padding, output_kernel);
+   }
+
+  template <typename Input, typename Filter, typename Output>
+  void operator()(const Device& d, Output output, Input input, Filter filter,
+                  int row_stride, int col_stride, int row_dilation,
+                  int col_dilation, const Eigen::PaddingType& padding,
+                  const OutputKernel& output_kernel = OutputKernel()) {
+    SpatialConvolutionFunc(d, output, input, filter, row_stride, col_stride,
+                           row_dilation, col_dilation, padding, output_kernel);
+  }
+
+   void operator()(const Device& d, typename TTypes<T, 4>::Tensor output,
+                   typename TTypes<T, 4>::ConstTensor input,
+                   typename TTypes<T, 4>::ConstTensor filter, int row_stride,
+@@ -67,6 +80,18 @@ struct SpatialConvolution {
+         col_dilation, Eigen::PaddingType::PADDING_VALID, output_kernel,
+         padding_top, padding_bottom, padding_left, padding_right);
+   }
+
+  template <typename Input, typename Filter, typename Output>
+  void operator()(const Device& d, Output output, Input input, Filter filter,
+                  int row_stride, int col_stride, int row_dilation,
+                  int col_dilation, int padding_top, int padding_bottom,
+                  int padding_left, int padding_right,
+                  const OutputKernel& output_kernel = OutputKernel()) {
+    SpatialConvolutionFunc(
+        d, output, input, filter, row_stride, col_stride, row_dilation,
+        col_dilation, Eigen::PaddingType::PADDING_VALID, output_kernel,
+        padding_top, padding_bottom, padding_left, padding_right);
+  }
+ };
+ 
+ template <typename Device, typename OutputKernel>
+@@ -84,6 +109,20 @@ struct SpatialConvolution<Device, Eigen::half, OutputKernel> {
+                                   row_dilation, output_kernel)
+             .template cast<Eigen::half>();
+   }
+
+  template <typename Input, typename Filter, typename Output>
+  void operator()(const Device& d, Output output, Input input, Filter filter,
+                  int row_stride, int col_stride, int row_dilation,
+                  int col_dilation, const Eigen::PaddingType& padding,
+                  const OutputKernel& output_kernel = OutputKernel()) {
+    output.device(d) =
+        Eigen::SpatialConvolution(input.template cast<float>(),
+                                  filter.template cast<float>(), col_stride,
+                                  row_stride, padding, col_dilation,
+                                  row_dilation, output_kernel)
+            .template cast<Eigen::half>();
+  }
+
+   void operator()(const Device& d,
+                   typename TTypes<Eigen::half, 4>::Tensor output,
+                   typename TTypes<Eigen::half, 4>::ConstTensor input,
+@@ -100,6 +139,21 @@ struct SpatialConvolution<Device, Eigen::half, OutputKernel> {
+             padding_bottom)
+             .template cast<Eigen::half>();
+   }
+
+  template <typename Input, typename Filter, typename Output>
+  void operator()(const Device& d, Output output, Input input, Filter filter,
+                  int row_stride, int col_stride, int row_dilation,
+                  int col_dilation, int padding_top, int padding_bottom,
+                  int padding_left, int padding_right,
+                  const OutputKernel& output_kernel = OutputKernel()) {
+    output.device(d) =
+        Eigen::SpatialConvolution(
+            input.template cast<float>(), filter.template cast<float>(),
+            col_stride, row_stride, Eigen::PaddingType::PADDING_VALID,
+            col_dilation, row_dilation, output_kernel, padding_left,
+            padding_right, padding_top, padding_bottom)
+            .template cast<Eigen::half>();
+  }
+ };
+ 
+ template <typename Device, typename T>
+diff --git a/tensorflow/core/kernels/conv_ops.cc b/tensorflow/core/kernels/conv_ops.cc
+index 025a8e37a94e9..8fdfe04bd1c67 100644
+--- a/tensorflow/core/kernels/conv_ops.cc
+++ b/tensorflow/core/kernels/conv_ops.cc
+@@ -30,6 +30,7 @@ limitations under the License.
+ #include <map>
+ #include <vector>
+ 
+#include "absl/synchronization/blocking_counter.h"
+ #include "tensorflow/core/framework/allocator.h"
+ #include "tensorflow/core/framework/bounds_check.h"
+ #include "tensorflow/core/framework/kernel_shape_util.h"
+@@ -138,6 +139,98 @@ struct LaunchGeneric {
+     }
+   }
+ };
+
+// Compute grouped 2D convolutions on CPU. Unlike grouped convolution
+// implementation in cuDNN this is faaaaaar from optimal and needs more work
+// to deliver competitive performance. Currently it exists to close the feature
+// parity gap between convolution operations on different devices.
+template <typename T>
+struct LaunchGrouped {
+  void operator()(OpKernelContext* ctx, const Tensor& input,
+                  const Tensor& filter, int row_stride, int col_stride,
+                  int row_dilation, int col_dilation, const Padding& padding,
+                  const std::vector<int64>& explicit_paddings, Tensor* output,
+                  TensorFormat data_format) {
+    DCHECK(data_format == FORMAT_NHWC)
+        << "Grouped conv implementation only "
+           "supports NHWC tensor format for now.";
+
+    const int64 in_depth = input.dim_size(3);
+    const int64 patch_depth = filter.dim_size(2);
+    const int64 num_groups = in_depth / patch_depth;
+
+    // Shuffle input/filter tensors to have group as a leading dimension.
+    std::array<int64, 5> shuffle({3, 0, 1, 2, 4});
+
+    // Compute pre shuffle dimemnsions.
+    auto pre_shuffle = [&](const Tensor& tensor) -> std::array<int64, 5> {
+      return {tensor.dim_size(0), tensor.dim_size(1), tensor.dim_size(2),
+              num_groups, tensor.dim_size(3) / num_groups};
+    };
+
+    // Compute post shuffle dimemnsions.
+    auto post_shuffle = [&](const Tensor& tensor) -> std::array<int64, 5> {
+      return {num_groups, tensor.dim_size(0), tensor.dim_size(1),
+              tensor.dim_size(2), tensor.dim_size(3) / num_groups};
+    };
+
+    auto& device = ctx->eigen_device<CPUDevice>();
+
+    absl::BlockingCounter shuffles_completed(2);
+    auto on_shuffled = [&]() { shuffles_completed.DecrementCount(); };
+
+    // Shuffle input into temporary tensor.
+    Tensor input_shuffled(input.dtype(), TensorShape(post_shuffle(input)));
+    input_shuffled.tensor<T, 5>().device(device, on_shuffled) =
+        input.shaped<T, 5>(pre_shuffle(input)).shuffle(shuffle);
+
+    // Shuffle filter into temporary tensor.
+    Tensor filter_shuffled(filter.dtype(), TensorShape(post_shuffle(filter)));
+    filter_shuffled.tensor<T, 5>().device(device, on_shuffled) =
+        filter.shaped<T, 5>(pre_shuffle(filter)).shuffle(shuffle);
+
+    // Wait for the completion of input/filter shuffles.
+    shuffles_completed.Wait();
+
+    // Write group convolution results into temporary output tensor.
+    Tensor output_shuffled(output->dtype(), TensorShape(post_shuffle(*output)));
+
+    for (int64 i = 0; i < num_groups; ++i) {
+      // TODO(ezhulenev): Run this loop using `parallelFor` (regular parallelFor
+      // will lead to deadlock, SpatialConvolution has to use async Eigen
+      // assignment). This requires small changes to Eigen to support async
+      // exeuction for tensor chipping operation.
+
+      // TODO(ezhulenev): Grouped convolution should also support 1x1 filter
+      // optimization.
+
+      auto input_slice = input_shuffled.tensor<T, 5>().template chip<0>(i);
+      auto filter_slice = filter_shuffled.tensor<T, 5>().template chip<0>(i);
+      auto output_slice = output_shuffled.tensor<T, 5>().template chip<0>(i);
+
+      if (padding == EXPLICIT) {
+        functor::SpatialConvolution<CPUDevice, T>()(
+            ctx->eigen_device<CPUDevice>(), output_slice, input_slice,
+            filter_slice, row_stride, col_stride, row_dilation, col_dilation,
+            static_cast<int>(explicit_paddings[2]),
+            static_cast<int>(explicit_paddings[3]),
+            static_cast<int>(explicit_paddings[4]),
+            static_cast<int>(explicit_paddings[5]));
+      } else {
+        functor::SpatialConvolution<CPUDevice, T>()(
+            ctx->eigen_device<CPUDevice>(), output_slice, input_slice,
+            filter_slice, row_stride, col_stride, row_dilation, col_dilation,
+            BrainPadding2EigenPadding(padding));
+      }
+    }
+
+    // Shuffle temporary output back into pre-shuffled shape.
+    std::array<int64, 5> rev_shuffle({1, 2, 3, 0, 4});
+    output->shaped<T, 5>(pre_shuffle(*output)).device(device) =
+        output_shuffled.tensor<T, 5>().shuffle(rev_shuffle);
+  }
+};
+
+ }  // namespace
+ 
+ template <typename T>
+@@ -155,14 +248,6 @@ struct LaunchConv2DOp<CPUDevice, T> {
+           ToString(data_format)));
+       return;
+     }
+-    const int64 in_depth = GetTensorDim(input, data_format, 'C');
+-    OP_REQUIRES(ctx, in_depth == filter.dim_size(2),
+-                errors::Unimplemented(
+-                    "The Conv2D op currently does not support grouped "
+-                    "convolutions on the CPU. A grouped convolution was "
+-                    "attempted to be run because the input depth of ",
+-                    in_depth, " does not match the filter input depth of ",
+-                    filter.dim_size(2)));
+ 
+     for (int64 explicit_padding : explicit_paddings) {
+       if (!FastBoundsCheck(explicit_padding, std::numeric_limits<int>::max())) {
+@@ -170,9 +255,35 @@ struct LaunchConv2DOp<CPUDevice, T> {
+         return;
+       }
+     }
+-    LaunchGeneric<CPUDevice, T>()(ctx, input, filter, row_stride, col_stride,
+-                                  row_dilation, col_dilation, padding,
+-                                  explicit_paddings, output, data_format);
+
+    const int64 in_depth = input.dim_size(3);
+    const int64 out_depth = output->dim_size(3);
+    const int64 patch_depth = filter.dim_size(2);
+
+    if (in_depth % patch_depth != 0) {
+      ctx->SetStatus(errors::InvalidArgument(
+          "input depth must be evenly divisible by filter depth: ", in_depth,
+          " vs ", patch_depth));
+      return;
+    }
+
+    const int64 num_groups = in_depth / patch_depth;
+    if (out_depth % num_groups != 0 || out_depth < num_groups) {
+      ctx->SetStatus(errors::InvalidArgument(
+          "output depth must be evenly divisible by number of groups: ",
+          out_depth, " vs ", num_groups));
+      return;
+    }
+
+    if (in_depth != patch_depth) {
+      LaunchGrouped<T>()(ctx, input, filter, row_stride, col_stride,
+                         row_dilation, col_dilation, padding, explicit_paddings,
+                         output, data_format);
+    } else {
+      LaunchGeneric<CPUDevice, T>()(ctx, input, filter, row_stride, col_stride,
+                                    row_dilation, col_dilation, padding,
+                                    explicit_paddings, output, data_format);
+    }
+   }
+ };
+ 
+diff --git a/tensorflow/python/kernel_tests/conv_ops_test.py b/tensorflow/python/kernel_tests/conv_ops_test.py
+index 44a67ccc55f0a..92af04359caa9 100644
+--- a/tensorflow/python/kernel_tests/conv_ops_test.py
+++ b/tensorflow/python/kernel_tests/conv_ops_test.py
+@@ -834,17 +834,21 @@ def MakeConv2d(inputs, filters):
+           results[0], results[1], atol=tol_to_use, rtol=tol_to_use)
+ 
+   @test_util.run_in_graph_and_eager_modes
+-  @test_util.run_cuda_only
+   def testConv2DGroupConvFwd(self):
+-    for data_format in ["NHWC", "NCHW"]:
+    if test.is_gpu_available(cuda_only=True):
+      data_formats = ["NHWC", "NCHW"]
+    else:
+      data_formats = ["NHWC"]
+    for data_format in data_formats:
+       for dilation in [1, 2]:
+         for stride in [1, 2]:
+-          self._VerifyGroupConvFwd([10, 32, 32, 16], [3, 3, 4, 8],
+-                                   dilations=[dilation, dilation],
+-                                   strides=[stride, stride],
+-                                   padding="SAME",
+-                                   data_format=data_format,
+-                                   dtype=dtypes.float32)
+          for filter_dims in [[3, 3, 4, 8], [1, 1, 2, 16]]:
+            self._VerifyGroupConvFwd([10, 32, 32, 16], filter_dims,
+                                     dilations=[dilation, dilation],
+                                     strides=[stride, stride],
+                                     padding="SAME",
+                                     data_format=data_format,
+                                     dtype=dtypes.float32)
+ 
+   @test_util.deprecated_graph_mode_only
+   @test_util.run_cuda_only
--- a/CVE-2021-29526-2.patch
+++ b/CVE-2021-29526-2.patch
@ -0,0 +1,50 @@
+From b12aa1d44352de21d1a6faaf04172d8c2508b42b Mon Sep 17 00:00:00 2001
+From: Mihai Maruseac <mihaimaruseac@google.com>
+Date: Mon, 19 Apr 2021 18:32:56 -0700
+Subject: [PATCH] Fix one more FPE.
+
+---
+ tensorflow/core/kernels/conv_ops.cc | 13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+diff --git a/tensorflow/core/kernels/conv_ops.cc b/tensorflow/core/kernels/conv_ops.cc
+index ef13eb3f..2d357710 100644
+--- a/tensorflow/core/kernels/conv_ops.cc
+++ b/tensorflow/core/kernels/conv_ops.cc
+@@ -260,6 +260,11 @@ struct LaunchConv2DOp<CPUDevice, T> {
+     const int64 out_depth = output->dim_size(3);
+     const int64 patch_depth = filter.dim_size(2);
+ 
+    if (patch_depth <= 0) {
+      ctx->SetStatus(errors::InvalidArgument(
+	  "filter depth must be stricly positive, got ", patch_depth));
+      return;
+    }
+     if (in_depth % patch_depth != 0) {
+       ctx->SetStatus(errors::InvalidArgument(
+           "input depth must be evenly divisible by filter depth: ", in_depth,
+@@ -268,6 +273,11 @@ struct LaunchConv2DOp<CPUDevice, T> {
+     }
+ 
+     const int64 num_groups = in_depth / patch_depth;
+    if (num_groups <= 0) {
+      ctx->SetStatus(errors::InvalidArgument(
+	  "number of groups must be stricly positive, got ", num_groups));
+      return;
+    }
+     if (out_depth % num_groups != 0 || out_depth < num_groups) {
+       ctx->SetStatus(errors::InvalidArgument(
+           "output depth must be evenly divisible by number of groups: ",
+@@ -536,6 +546,9 @@ Status ComputeConv2DDimension(const Conv2DParameters& params,
+               errors::InvalidArgument("Patch depth too large"));
+   const int in_depth = static_cast<int>(in_depth_raw);
+   const int patch_depth = static_cast<int>(patch_depth_raw);
+  TF_REQUIRES(patch_depth > 0,
+	      errors::InvalidArgument(
+		  "filter depth must be stricly positive, got", patch_depth));
+   TF_REQUIRES(in_depth % patch_depth == 0,
+               errors::InvalidArgument(
+                   "input depth must be evenly divisible by filter depth: ",
+-- 
+2.23.0
+
--- a/CVE-2021-29533.patch
+++ b/CVE-2021-29533.patch
@ -0,0 +1,76 @@
+From b432a38fe0e1b4b904a6c222cbce794c39703e87 Mon Sep 17 00:00:00 2001
+From: Amit Patankar <amitpatankar@google.com>
+Date: Wed, 21 Apr 2021 15:57:36 -0700
+Subject: [PATCH] Fix overflow CHECK issue with `tf.raw_ops.DrawBoundingBoxes`.
+
+---
+ .../core/kernels/draw_bounding_box_op.cc      | 49 ++++++++++++++-----
+ 1 file changed, 37 insertions(+), 12 deletions(-)
+
+diff --git a/tensorflow/core/kernels/draw_bounding_box_op.cc b/tensorflow/core/kernels/draw_bounding_box_op.cc
+index 30de99b7..39519523 100644
+--- a/tensorflow/core/kernels/draw_bounding_box_op.cc
+++ b/tensorflow/core/kernels/draw_bounding_box_op.cc
+@@ -147,22 +147,47 @@ class DrawBoundingBoxesOp : public OpKernel {
+ 
+         // At this point, {min,max}_box_{row,col}_clamp are inside the
+         // image.
+-        CHECK_GE(min_box_row_clamp, 0);
+-        CHECK_GE(max_box_row_clamp, 0);
+-        CHECK_LT(min_box_row_clamp, height);
+-        CHECK_LT(max_box_row_clamp, height);
+-        CHECK_GE(min_box_col_clamp, 0);
+-        CHECK_GE(max_box_col_clamp, 0);
+-        CHECK_LT(min_box_col_clamp, width);
+-        CHECK_LT(max_box_col_clamp, width);
+
+	OP_REQUIRES(
+	    context, min_box_row_clamp >= 0,
+	    errors::InvalidArgument("Min box row clamp is less than 0."));
+	OP_REQUIRES(
+	    context, max_box_row_clamp >= 0,
+	    errors::InvalidArgument("Max box row clamp is less than 0."));
+	OP_REQUIRES(context, min_box_row_clamp <= height,
+		    errors::InvalidArgument(
+			"Min box row clamp is greater than height."));
+	OP_REQUIRES(context, max_box_row_clamp <= height,
+		    errors::InvalidArgument(
+			"Max box row clamp is greater than height."));
+
+	OP_REQUIRES(
+	    context, min_box_col_clamp >= 0,
+	    errors::InvalidArgument("Min box col clamp is less than 0."));
+	OP_REQUIRES(
+	    context, max_box_col_clamp >= 0,
+	    errors::InvalidArgument("Max box col clamp is less than 0."));
+	OP_REQUIRES(context, min_box_col_clamp <= width,
+		    errors::InvalidArgument(
+			"Min box col clamp is greater than width."));
+	OP_REQUIRES(context, max_box_col_clamp <= width,
+		    errors::InvalidArgument(
+			"Max box col clamp is greater than width."));
+ 
+         // At this point, the min_box_row and min_box_col are either
+         // in the image or above/left of it, and max_box_row and
+         // max_box_col are either in the image or below/right or it.
+-        CHECK_LT(min_box_row, height);
+-        CHECK_GE(max_box_row, 0);
+-        CHECK_LT(min_box_col, width);
+-        CHECK_GE(max_box_col, 0);
+	
+	OP_REQUIRES(
+	    context, min_box_row <= height,
+	    errors::InvalidArgument("Min box row is greater than height."));
+	OP_REQUIRES(context, max_box_row >= 0,
+		    errors::InvalidArgument("Max box row is less than 0."));
+	OP_REQUIRES(
+	    context, min_box_col <= width,
+	    errors::InvalidArgument("Min box col is greater than width."));
+	OP_REQUIRES(context, max_box_col >= 0,
+		    errors::InvalidArgument("Max box col is less than 0."));
+ 
+         // Draw top line.
+         if (min_box_row >= 0) {
+-- 
+2.23.0
+
--- a/CVE-2021-29537.patch
+++ b/CVE-2021-29537.patch
@ -0,0 +1,33 @@
+From f6c40f0c6cbf00d46c7717a26419f2062f2f8694 Mon Sep 17 00:00:00 2001
+From: Mihai Maruseac <mihaimaruseac@google.com>
+Date: Wed, 21 Apr 2021 17:00:39 -0700
+Subject: [PATCH] Validate min and max arguments to `QuantizedResizeBilinear`.
+
+---
+ .../core/kernels/quantized_resize_bilinear_op.cc       | 10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+diff --git a/tensorflow/core/kernels/quantized_resize_bilinear_op.cc b/tensorflow/core/kernels/quantized_resize_bilinear_op.cc
+index 8270fc11..a94f56a5 100644
+--- a/tensorflow/core/kernels/quantized_resize_bilinear_op.cc
+++ b/tensorflow/core/kernels/quantized_resize_bilinear_op.cc
+@@ -703,8 +703,14 @@ class QuantizedResizeBilinearOp : public OpKernel {
+ 
+   void Compute(OpKernelContext* context) override {
+     const Tensor& input = context->input(0);
+-    const float in_min = context->input(2).flat<float>()(0);
+-    const float in_max = context->input(3).flat<float>()(0);
+    const auto& in_min_tensor = context->input(2);
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(in_min_tensor.shape()),
+		errors::InvalidArgument("min must be a scalar"));
+    const float in_min = in_min_tensor.flat<float>()(0);
+    const auto& in_max_tensor = context->input(3);
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(in_max_tensor.shape()),
+		errors::InvalidArgument("max must be a scalar"));
+    const float in_max = in_max_tensor.flat<float>()(0);
+ 
+     ImageResizerState st(align_corners_, false);
+     st.ValidateAndCreateOutput(context, input);
+-- 
+2.23.0
+
--- a/CVE-2021-29544-1.patch
+++ b/CVE-2021-29544-1.patch
@ -0,0 +1,919 @@
+From 52df91c5634e6c666843849a1c6ff29b3d2676be Mon Sep 17 00:00:00 2001
+From: Pankaj Kanwar <pkanwar@google.com>
+Date: Mon, 12 Oct 2020 10:30:20 -0700
+Subject: [PATCH] Create a V2 Op to stop the gradient when the input is out of
+ range.
+
+PiperOrigin-RevId: 336692325
+Change-Id: I36fd3fcfc58a30d5218beca512fbfc7c24b8b5cb
+---
+ tensorflow/cc/gradients/array_grad.cc         |  29 ++--
+ tensorflow/compiler/tests/unary_ops_test.py   |   6 +-
+ .../api_def_QuantizeAndDequantizeV4.pbtxt     |   8 ++
+ .../api_def_QuantizeAndDequantizeV4Grad.pbtxt |   8 ++
+ .../api_def_QuantizeAndDequantizeV4.pbtxt     |   3 +
+ .../api_def_QuantizeAndDequantizeV4Grad.pbtxt |   3 +
+ .../api_def_QuantizeAndDequantizeV4.pbtxt     |   4 +
+ .../api_def_QuantizeAndDequantizeV4Grad.pbtxt |   4 +
+ .../kernels/quantize_and_dequantize_op.cc     | 126 ++++++++++++++++++
+ .../core/kernels/quantize_and_dequantize_op.h |  71 ++++++++++
+ .../quantize_and_dequantize_op_gpu.cu.cc      |  40 ++++++
+ .../quantize_and_dequantize_op_test.cc        |  48 +++++++
+ tensorflow/core/ops/array_ops.cc              |  64 +++++++++
+ .../python/kernel_tests/array_ops_test.py     |  21 ++-
+ tensorflow/python/ops/array_ops.py            | 113 +++++++++++++++-
+ .../tools/api/golden/v1/tensorflow.pbtxt      |   4 +
+ .../golden/v1/tensorflow.quantization.pbtxt   |   4 +
+ .../api/golden/v1/tensorflow.raw_ops.pbtxt    |   8 ++
+ .../tools/api/golden/v2/tensorflow.pbtxt      |   4 +
+ .../golden/v2/tensorflow.quantization.pbtxt   |   4 +
+ .../api/golden/v2/tensorflow.raw_ops.pbtxt    |   8 ++
+ 21 files changed, 564 insertions(+), 16 deletions(-)
+ create mode 100644 tensorflow/core/api_def/base_api/api_def_QuantizeAndDequantizeV4.pbtxt
+ create mode 100644 tensorflow/core/api_def/base_api/api_def_QuantizeAndDequantizeV4Grad.pbtxt
+ create mode 100644 tensorflow/core/api_def/java_api/api_def_QuantizeAndDequantizeV4.pbtxt
+ create mode 100644 tensorflow/core/api_def/java_api/api_def_QuantizeAndDequantizeV4Grad.pbtxt
+ create mode 100644 tensorflow/core/api_def/python_api/api_def_QuantizeAndDequantizeV4.pbtxt
+ create mode 100644 tensorflow/core/api_def/python_api/api_def_QuantizeAndDequantizeV4Grad.pbtxt
+
+diff --git a/tensorflow/cc/gradients/array_grad.cc b/tensorflow/cc/gradients/array_grad.cc
+index e9173227..480243a2 100644
+--- a/tensorflow/cc/gradients/array_grad.cc
+++ b/tensorflow/cc/gradients/array_grad.cc
+@@ -15,13 +15,12 @@ limitations under the License.
+ 
+ #include <vector>
+ 
+#include "tensorflow/cc/framework/grad_op_registry.h"
+#include "tensorflow/cc/framework/gradients.h"
+ #include "tensorflow/cc/ops/array_ops_internal.h"
+ #include "tensorflow/cc/ops/standard_ops.h"
+ #include "tensorflow/core/lib/strings/strcat.h"
+ 
+-#include "tensorflow/cc/framework/grad_op_registry.h"
+-#include "tensorflow/cc/framework/gradients.h"
+-
+ namespace tensorflow {
+ namespace ops {
+ namespace {
+@@ -90,15 +89,25 @@ Status QuantizeAndDequantizeGrad(const Scope& scope, const Operation& op,
+ }
+ REGISTER_GRADIENT_OP("QuantizeAndDequantize", QuantizeAndDequantizeGrad);
+ 
+-Status QuantizeAndDequantizeV2Grad(const Scope& scope, const Operation& op,
+-                                   const std::vector<Output>& grad_inputs,
+-                                   std::vector<Output>* grad_outputs) {
+-  grad_outputs->push_back(Identity(scope, grad_inputs[0]));
+-  grad_outputs->push_back(NoGradient());
+-  grad_outputs->push_back(NoGradient());
+Status QuantizeAndDequantizeV4GradHelper(const Scope& scope,
+                                         const Operation& op,
+                                         const std::vector<Output>& grad_inputs,
+                                         std::vector<Output>* grad_outputs) {
+  Input input = Shape(scope, op.input(0));
+  Input input_min = op.input(1);
+  Input input_max = op.input(2);
+  int64 axis;
+  TF_RETURN_IF_ERROR(GetNodeAttr(op.node()->attrs(), "axis", &axis));
+  auto qdq_v4_grad = QuantizeAndDequantizeV4Grad(
+      scope, grad_inputs[0], input, input_min, input_max,
+      QuantizeAndDequantizeV4Grad::Axis(axis));
+  grad_outputs->push_back(qdq_v4_grad.input_backprop);
+  grad_outputs->push_back(qdq_v4_grad.input_min_backprop);
+  grad_outputs->push_back(qdq_v4_grad.input_max_backprop);
+   return scope.status();
+ }
+-REGISTER_GRADIENT_OP("QuantizeAndDequantizeV2", QuantizeAndDequantizeV2Grad);
+REGISTER_GRADIENT_OP("QuantizeAndDequantizeV4",
+                     QuantizeAndDequantizeV4GradHelper);
+ 
+ Status QuantizeAndDequantizeV3Grad(const Scope& scope, const Operation& op,
+                                    const std::vector<Output>& grad_inputs,
+diff --git a/tensorflow/compiler/tests/unary_ops_test.py b/tensorflow/compiler/tests/unary_ops_test.py
+index 162693a9..dacd7232 100644
+--- a/tensorflow/compiler/tests/unary_ops_test.py
+++ b/tensorflow/compiler/tests/unary_ops_test.py
+@@ -535,7 +535,7 @@ class UnaryOpsTest(xla_test.XLATestCase):
+     for dtype in self.float_types:
+ 
+       def quantize_and_dequantize_v2(x):
+-        return array_ops.quantize_and_dequantize_v2(
+        return array_ops.quantize_and_dequantize(
+             x, -127, 127, signed_input=True, num_bits=8)
+ 
+       self._assertOpOutputMatchesExpected(
+@@ -544,7 +544,7 @@ class UnaryOpsTest(xla_test.XLATestCase):
+           expected=np.array([-1., -0.5, 0., 0.296875], dtype=dtype))
+ 
+       def quantize_and_dequantize_v2_round_half_up(x):
+-        return array_ops.quantize_and_dequantize_v2(
+        return array_ops.quantize_and_dequantize(
+             x,
+             -1,
+             1.0,
+@@ -568,7 +568,7 @@ class UnaryOpsTest(xla_test.XLATestCase):
+                             dtype=dtype))
+ 
+       def quantize_and_dequantize_v2_round_half_to_even(x):
+-        return array_ops.quantize_and_dequantize_v2(
+        return array_ops.quantize_and_dequantize(
+             x,
+             -1.0,
+             1.0,
+diff --git a/tensorflow/core/api_def/base_api/api_def_QuantizeAndDequantizeV4.pbtxt b/tensorflow/core/api_def/base_api/api_def_QuantizeAndDequantizeV4.pbtxt
+new file mode 100644
+index 00000000..a84ccb78
+--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_QuantizeAndDequantizeV4.pbtxt
+@@ -0,0 +1,8 @@
+op {
+  graph_op_name: "QuantizeAndDequantizeV4"
+  summary: "Returns the gradient of `QuantizeAndDequantizeV4`."
+  description: <<END
+This is almost identical to QuantizeAndDequantizeV2, except that it returns a
+gradient of 1 for inputs that are within the quantization range, or 0 otherwise.
+END
+}
+diff --git a/tensorflow/core/api_def/base_api/api_def_QuantizeAndDequantizeV4Grad.pbtxt b/tensorflow/core/api_def/base_api/api_def_QuantizeAndDequantizeV4Grad.pbtxt
+new file mode 100644
+index 00000000..88ba0ea8
+--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_QuantizeAndDequantizeV4Grad.pbtxt
+@@ -0,0 +1,8 @@
+op {
+  graph_op_name: "QuantizeAndDequantizeV4Grad"
+  summary: "Returns the gradient of `QuantizeAndDequantizeV4`."
+  description: <<END
+Returns a gradient of 1 for inputs that are within the quantization range,
+or 0 otherwise.
+END
+}
+diff --git a/tensorflow/core/api_def/java_api/api_def_QuantizeAndDequantizeV4.pbtxt b/tensorflow/core/api_def/java_api/api_def_QuantizeAndDequantizeV4.pbtxt
+new file mode 100644
+index 00000000..80544053
+--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_QuantizeAndDequantizeV4.pbtxt
+@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "QuantizeAndDequantizeV4Grad"
+}
+diff --git a/tensorflow/core/api_def/java_api/api_def_QuantizeAndDequantizeV4Grad.pbtxt b/tensorflow/core/api_def/java_api/api_def_QuantizeAndDequantizeV4Grad.pbtxt
+new file mode 100644
+index 00000000..80544053
+--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_QuantizeAndDequantizeV4Grad.pbtxt
+@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "QuantizeAndDequantizeV4Grad"
+}
+diff --git a/tensorflow/core/api_def/python_api/api_def_QuantizeAndDequantizeV4.pbtxt b/tensorflow/core/api_def/python_api/api_def_QuantizeAndDequantizeV4.pbtxt
+new file mode 100644
+index 00000000..0ed576f0
+--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_QuantizeAndDequantizeV4.pbtxt
+@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "QuantizeAndDequantizeV4Grad"
+  visibility: HIDDEN
+}
+diff --git a/tensorflow/core/api_def/python_api/api_def_QuantizeAndDequantizeV4Grad.pbtxt b/tensorflow/core/api_def/python_api/api_def_QuantizeAndDequantizeV4Grad.pbtxt
+new file mode 100644
+index 00000000..0ed576f0
+--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_QuantizeAndDequantizeV4Grad.pbtxt
+@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "QuantizeAndDequantizeV4Grad"
+  visibility: HIDDEN
+}
+diff --git a/tensorflow/core/kernels/quantize_and_dequantize_op.cc b/tensorflow/core/kernels/quantize_and_dequantize_op.cc
+index 408196a2..a2e0fe33 100644
+--- a/tensorflow/core/kernels/quantize_and_dequantize_op.cc
+++ b/tensorflow/core/kernels/quantize_and_dequantize_op.cc
+@@ -139,6 +139,75 @@ class QuantizeAndDequantizeV2Op : public OpKernel {
+   bool narrow_range_;
+ };
+ 
+// Implementation of QuantizeAndDequantizeV4GradientOp.
+// When back-propagating the error through a quantized layer, the following
+// paper gives evidence that clipped-ReLU is better than non-clipped:
+// "Deep Learning with Low Precision by Half-wave Gaussian Quantization"
+// http://zpascal.net/cvpr2017/Cai_Deep_Learning_With_CVPR_2017_paper.pdf
+template <typename Device, typename T>
+class QuantizeAndDequantizeV4GradientOp : public OpKernel {
+ public:
+  explicit QuantizeAndDequantizeV4GradientOp(OpKernelConstruction* ctx)
+      : OpKernel::OpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("axis", &axis_));
+  }
+
+  void Compute(OpKernelContext* ctx) override {
+    const Tensor& gradient = ctx->input(0);
+    const Tensor& input = ctx->input(1);
+    Tensor* input_backprop = nullptr;
+    OP_REQUIRES_OK(ctx,
+                   ctx->allocate_output(0, input.shape(), &input_backprop));
+
+    OP_REQUIRES(
+        ctx, input.IsSameSize(gradient),
+        errors::InvalidArgument("gradient and input must be the same size"));
+    const int depth = (axis_ == -1) ? 1 : input.dim_size(axis_);
+    const Tensor& input_min_tensor = ctx->input(2);
+    const Tensor& input_max_tensor = ctx->input(3);
+    if (axis_ != -1) {
+      OP_REQUIRES(
+          ctx, input_min_tensor.dim_size(0) == depth,
+          errors::InvalidArgument("min has incorrect size, expected ", depth,
+                                  " was ", input_min_tensor.dim_size(0)));
+      OP_REQUIRES(
+          ctx, input_max_tensor.dim_size(0) == depth,
+          errors::InvalidArgument("max has incorrect size, expected ", depth,
+                                  " was ", input_max_tensor.dim_size(0)));
+    }
+
+    TensorShape min_max_shape(input_min_tensor.shape());
+    Tensor* input_min_backprop;
+    OP_REQUIRES_OK(ctx,
+                   ctx->allocate_output(1, min_max_shape, &input_min_backprop));
+
+    Tensor* input_max_backprop;
+    OP_REQUIRES_OK(ctx,
+                   ctx->allocate_output(2, min_max_shape, &input_max_backprop));
+
+    if (axis_ == -1) {
+      functor::QuantizeAndDequantizeOneScaleGradientFunctor<Device, T> f;
+      f(ctx->eigen_device<Device>(), gradient.template flat<T>(),
+        input.template flat<T>(), input_min_tensor.scalar<T>(),
+        input_max_tensor.scalar<T>(), input_backprop->template flat<T>(),
+        input_min_backprop->template scalar<T>(),
+        input_max_backprop->template scalar<T>());
+    } else {
+      functor::QuantizeAndDequantizePerChannelGradientFunctor<Device, T> f;
+      f(ctx->eigen_device<Device>(),
+        gradient.template flat_inner_outer_dims<T, 3>(axis_ - 1),
+        input.template flat_inner_outer_dims<T, 3>(axis_ - 1),
+        &input_min_tensor, &input_max_tensor,
+        input_backprop->template flat_inner_outer_dims<T, 3>(axis_ - 1),
+        input_min_backprop->template flat<T>(),
+        input_max_backprop->template flat<T>());
+    }
+  }
+
+ private:
+  int axis_;
+};
+
+ // Simulate quantization precision loss in a float tensor by:
+ // 1. Quantize the tensor to fixed point numbers, which should match the target
+ //    quantization method when it is used in inference.
+@@ -307,6 +376,43 @@ struct QuantizeAndDequantizePerChannelFunctor<CPUDevice, T> {
+         input_max_tensor, round_mode, narrow_range, out);
+   }
+ };
+
+template <typename T>
+struct QuantizeAndDequantizeOneScaleGradientFunctor<CPUDevice, T> {
+  void operator()(const CPUDevice& d, typename TTypes<T>::ConstFlat gradient,
+                  typename TTypes<T>::ConstFlat input,
+                  typename TTypes<T>::ConstScalar input_min_tensor,
+                  typename TTypes<T>::ConstScalar input_max_tensor,
+                  typename TTypes<T>::Flat input_backprop,
+                  typename TTypes<T>::Scalar input_min_backprop,
+                  typename TTypes<T>::Scalar input_max_backprop) {
+    QuantizeAndDequantizeOneScaleGradientImpl<CPUDevice, T>::Compute(
+        d, gradient, input, input_min_tensor, input_max_tensor, input_backprop,
+        input_min_backprop, input_max_backprop);
+  }
+};
+
+template <typename T>
+struct QuantizeAndDequantizePerChannelGradientFunctor<CPUDevice, T> {
+  void operator()(const CPUDevice& d,
+                  typename TTypes<T, 3>::ConstTensor gradient,
+                  typename TTypes<T, 3>::ConstTensor input,
+                  const Tensor* input_min_tensor,
+                  const Tensor* input_max_tensor,
+                  typename TTypes<T, 3>::Tensor input_backprop,
+                  typename TTypes<T>::Flat input_min_backprop,
+                  typename TTypes<T>::Flat input_max_backprop) {
+    QuantizeAndDequantizePerChannelGradientImpl<CPUDevice, T>::Compute(
+        d, gradient, input, input_min_tensor, input_max_tensor, input_backprop,
+        input_min_backprop, input_max_backprop);
+  }
+};
+
+template struct functor::QuantizeAndDequantizeOneScaleGradientFunctor<CPUDevice,
+                                                                      float>;
+template struct functor::QuantizeAndDequantizePerChannelGradientFunctor<
+    CPUDevice, double>;
+
+ }  // namespace functor
+ 
+ #define REGISTER_CPU_KERNEL(T)                                                 \
+@@ -318,6 +424,14 @@ struct QuantizeAndDequantizePerChannelFunctor<CPUDevice, T> {
+                               .Device(DEVICE_CPU)                              \
+                               .TypeConstraint<T>("T"),                         \
+                           QuantizeAndDequantizeV3Op<CPUDevice, T>);            \
+  REGISTER_KERNEL_BUILDER(Name("QuantizeAndDequantizeV4")                      \
+                              .Device(DEVICE_CPU)                              \
+                              .TypeConstraint<T>("T"),                         \
+                          QuantizeAndDequantizeV2Op<CPUDevice, T>);            \
+  REGISTER_KERNEL_BUILDER(Name("QuantizeAndDequantizeV4Grad")                  \
+                              .Device(DEVICE_CPU)                              \
+                              .TypeConstraint<T>("T"),                         \
+                          QuantizeAndDequantizeV4GradientOp<CPUDevice, T>);    \
+   REGISTER_KERNEL_BUILDER(                                                     \
+       Name("QuantizeAndDequantize").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+       QuantizeAndDequantizeOp<CPUDevice, T>);
+@@ -341,6 +455,18 @@ TF_CALL_double(REGISTER_CPU_KERNEL);
+                               .HostMemory("num_bits")                          \
+                               .TypeConstraint<T>("T"),                         \
+                           QuantizeAndDequantizeV3Op<GPUDevice, T>);            \
+  REGISTER_KERNEL_BUILDER(Name("QuantizeAndDequantizeV4")                      \
+                              .Device(DEVICE_GPU)                              \
+                              .HostMemory("input_min")                         \
+                              .HostMemory("input_max")                         \
+                              .TypeConstraint<T>("T"),                         \
+                          QuantizeAndDequantizeV2Op<GPUDevice, T>);            \
+  REGISTER_KERNEL_BUILDER(Name("QuantizeAndDequantizeV4Grad")                  \
+                              .Device(DEVICE_GPU)                              \
+                              .HostMemory("input_min")                         \
+                              .HostMemory("input_max")                         \
+                              .TypeConstraint<T>("T"),                         \
+                          QuantizeAndDequantizeV4GradientOp<GPUDevice, T>);    \
+   REGISTER_KERNEL_BUILDER(                                                     \
+       Name("QuantizeAndDequantize").Device(DEVICE_GPU).TypeConstraint<T>("T"), \
+       QuantizeAndDequantizeOp<GPUDevice, T>);
+diff --git a/tensorflow/core/kernels/quantize_and_dequantize_op.h b/tensorflow/core/kernels/quantize_and_dequantize_op.h
+index 4dd6e5c8..c286a10a 100644
+--- a/tensorflow/core/kernels/quantize_and_dequantize_op.h
+++ b/tensorflow/core/kernels/quantize_and_dequantize_op.h
+@@ -60,6 +60,28 @@ struct QuantizeAndDequantizePerChannelFunctor {
+                   typename TTypes<T, 3>::Tensor output);
+ };
+ 
+template <typename Device, typename T>
+struct QuantizeAndDequantizeOneScaleGradientFunctor {
+  void operator()(const Device& d, typename TTypes<T>::ConstFlat gradient,
+                  typename TTypes<T>::ConstFlat input,
+                  typename TTypes<T>::ConstScalar input_min,
+                  typename TTypes<T>::ConstScalar input_max,
+                  typename TTypes<T>::Flat input_backprop,
+                  typename TTypes<T>::Scalar input_min_backprop,
+                  typename TTypes<T>::Scalar input_max_backprop);
+};
+
+template <typename Device, typename T>
+struct QuantizeAndDequantizePerChannelGradientFunctor {
+  void operator()(const Device& d, typename TTypes<T, 3>::ConstTensor gradient,
+                  typename TTypes<T, 3>::ConstTensor input,
+                  const Tensor* input_min_tensor,
+                  const Tensor* input_max_tensor,
+                  typename TTypes<T, 3>::Tensor input_backprop,
+                  typename TTypes<T>::Flat input_min_backprop,
+                  typename TTypes<T>::Flat input_max_backprop);
+};
+
+ // The implementation below runs on both CPU and GPU.
+ template <typename Device, typename T, typename Func,
+           typename Vec = typename TTypes<T>::Vec,
+@@ -249,6 +271,55 @@ struct QuantizeAndDequantizePerChannelImpl {
+   }
+ };
+ 
+template <typename Device, typename T>
+struct QuantizeAndDequantizeOneScaleGradientImpl {
+  static void Compute(const Device& d, typename TTypes<T>::ConstFlat gradient,
+                      typename TTypes<T>::ConstFlat input,
+                      typename TTypes<T>::ConstScalar input_min,
+                      typename TTypes<T>::ConstScalar input_max,
+                      typename TTypes<T>::Flat input_backprop,
+                      typename TTypes<T>::Scalar input_min_backprop,
+                      typename TTypes<T>::Scalar input_max_backprop) {
+    const T min_val = input_min();
+    const T max_val = input_max();
+    const auto in_range =
+        (input >= min_val && input <= max_val)
+            .select(input.constant(1.0f), input.constant(0.0f));
+    input_backprop.device(d) = gradient * in_range;
+    input_min_backprop.device(d) = input_min_backprop.constant(0.0f);
+    input_max_backprop.device(d) = input_max_backprop.constant(0.0f);
+  }
+};
+
+template <typename Device, typename T>
+struct QuantizeAndDequantizePerChannelGradientImpl {
+  static void Compute(const Device& d,
+                      typename TTypes<T, 3>::ConstTensor gradient,
+                      typename TTypes<T, 3>::ConstTensor input,
+                      const Tensor* input_min_tensor,
+                      const Tensor* input_max_tensor,
+                      typename TTypes<T, 3>::Tensor input_backprop,
+                      typename TTypes<T>::Flat input_min_backprop,
+                      typename TTypes<T>::Flat input_max_backprop) {
+    using Index = typename tensorflow::TTypes<T>::ConstTensor::Index;
+    auto input_min = input_min_tensor->vec<T>();
+    auto input_max = input_max_tensor->vec<T>();
+    int num_channels = input.dimension(1);
+    for (Index i = 0; i < num_channels; ++i) {
+      const auto gradient_chip = gradient.template chip<1>(i);
+      const auto input_chip = input.template chip<1>(i);
+      const T min_val = input_min(i);
+      const T max_val = input_max(i);
+      const auto in_range =
+          (input_chip >= min_val && input_chip <= max_val)
+              .select(input_chip.constant(1.0f), input_chip.constant(0.0f));
+      input_backprop.template chip<1>(i).device(d) = gradient_chip * in_range;
+    }
+    input_min_backprop.device(d) = input_min_backprop.constant(0.0f);
+    input_max_backprop.device(d) = input_max_backprop.constant(0.0f);
+  }
+};
+
+ }  // end of namespace functor
+ }  // end of namespace tensorflow
+ 
+diff --git a/tensorflow/core/kernels/quantize_and_dequantize_op_gpu.cu.cc b/tensorflow/core/kernels/quantize_and_dequantize_op_gpu.cu.cc
+index f3bb4107..9f074535 100644
+--- a/tensorflow/core/kernels/quantize_and_dequantize_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/quantize_and_dequantize_op_gpu.cu.cc
+@@ -53,6 +53,37 @@ struct QuantizeAndDequantizePerChannelFunctor<GPUDevice, T> {
+   }
+ };
+ 
+template <typename T>
+struct QuantizeAndDequantizeOneScaleGradientFunctor<GPUDevice, T> {
+  void operator()(const GPUDevice& d, typename TTypes<T>::ConstFlat gradient,
+                  typename TTypes<T>::ConstFlat input,
+                  typename TTypes<T>::ConstScalar input_min_tensor,
+                  typename TTypes<T>::ConstScalar input_max_tensor,
+                  typename TTypes<T>::Flat input_backprop,
+                  typename TTypes<T>::Scalar input_min_backprop,
+                  typename TTypes<T>::Scalar input_max_backprop) {
+    QuantizeAndDequantizeOneScaleGradientImpl<GPUDevice, T>::Compute(
+        d, gradient, input, input_min_tensor, input_max_tensor, input_backprop,
+        input_min_backprop, input_max_backprop);
+  }
+};
+
+template <typename T>
+struct QuantizeAndDequantizePerChannelGradientFunctor<GPUDevice, T> {
+  void operator()(const GPUDevice& d,
+                  typename TTypes<T, 3>::ConstTensor gradient,
+                  typename TTypes<T, 3>::ConstTensor input,
+                  const Tensor* input_min_tensor,
+                  const Tensor* input_max_tensor,
+                  typename TTypes<T, 3>::Tensor input_backprop,
+                  typename TTypes<T>::Flat input_min_backprop,
+                  typename TTypes<T>::Flat input_max_backprop) {
+    QuantizeAndDequantizePerChannelGradientImpl<GPUDevice, T>::Compute(
+        d, gradient, input, input_min_tensor, input_max_tensor, input_backprop,
+        input_min_backprop, input_max_backprop);
+  }
+};
+
+ }  // end namespace functor
+ 
+ // Instantiate the GPU implementation for float and double.
+@@ -65,6 +96,15 @@ template struct functor::QuantizeAndDequantizePerChannelFunctor<GPUDevice,
+ template struct functor::QuantizeAndDequantizePerChannelFunctor<GPUDevice,
+                                                                 double>;
+ 
+template struct functor::QuantizeAndDequantizeOneScaleGradientFunctor<GPUDevice,
+                                                                      float>;
+template struct functor::QuantizeAndDequantizeOneScaleGradientFunctor<GPUDevice,
+                                                                      double>;
+template struct functor::QuantizeAndDequantizePerChannelGradientFunctor<
+    GPUDevice, float>;
+template struct functor::QuantizeAndDequantizePerChannelGradientFunctor<
+    GPUDevice, double>;
+
+ }  // end namespace tensorflow
+ 
+ #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+diff --git a/tensorflow/core/kernels/quantize_and_dequantize_op_test.cc b/tensorflow/core/kernels/quantize_and_dequantize_op_test.cc
+index 90764b0f..596ab135 100644
+--- a/tensorflow/core/kernels/quantize_and_dequantize_op_test.cc
+++ b/tensorflow/core/kernels/quantize_and_dequantize_op_test.cc
+@@ -362,6 +362,54 @@ TEST_P(ParameterizedQuantizeAndDequantizeTest,
+   }
+ }
+ 
+// Verifies the Gradient.
+TEST_P(ParameterizedQuantizeAndDequantizeTest, GradientV4_op) {
+  const int axis = GetParam();
+  TF_ASSERT_OK(NodeDefBuilder("qdq_v4_grad_op", "QuantizeAndDequantizeV4Grad")
+                   .Input(FakeInput(DT_FLOAT))
+                   .Input(FakeInput(DT_FLOAT))
+                   .Input(FakeInput(DT_FLOAT))
+                   .Input(FakeInput(DT_FLOAT))
+                   .Attr("axis", axis)
+                   .Finalize(node_def()));
+  TF_ASSERT_OK(InitOp());
+  const std::vector<int64> dims = {2, 3, 4, 5};
+  // Input gradient. (repeating 11 values multiplied by (slice_idx + 1))
+  auto gradients = ScalePerSliceAlongAxis<float>(
+      dims, axis, {1, -2, -3, 4, 5, 6, -7, -8, -9, -10, 11});
+  AddInputFromArray<float>(TensorShape(dims), gradients);
+  // Forward op inputs. (repeating 7 values multiplied by (slice_idx + 1)).
+  auto inputs = ScalePerSliceAlongAxis<float>(
+      dims, axis, {-1, -0.5, 0, 0.3, 0.8, 0.55, 0.6});
+  AddInputFromArray<float>(TensorShape(dims), inputs);
+  const int num_slices = (axis == -1) ? 1 : dims[axis];
+  const TensorShape range_shape =
+      (axis == -1) ? TensorShape({}) : TensorShape({num_slices});
+  std::vector<float> input_min_values(num_slices), input_max_values(num_slices);
+  for (int i = 0; i < num_slices; ++i) {
+    input_max_values[i] = 0.8f + i * 0.4f;
+    input_min_values[i] = -input_max_values[i];
+  }
+  AddInputFromArray<float>(range_shape, input_min_values);
+  AddInputFromArray<float>(range_shape, input_max_values);
+  std::vector<float> expected_vals(inputs.size());
+  int minor_size = 1;
+  for (int i = axis + 1; i < dims.size(); ++i) {
+    minor_size *= dims[i];
+  }
+  for (int i = 0; i < inputs.size(); ++i) {
+    int slice_idx = (i / minor_size) % num_slices;
+    expected_vals[i] = ((inputs[i] >= input_min_values[slice_idx]) &&
+                        (inputs[i] <= input_max_values[slice_idx]))
+                           ? gradients[i]
+                           : 0;
+  }
+  TF_ASSERT_OK(RunOpKernel());
+  Tensor expected(allocator(), DT_FLOAT, TensorShape(dims));
+  test::FillValues<float>(&expected, expected_vals);
+  test::ExpectTensorNear<float>(expected, *GetOutput(0), 1e-5);
+}
+
+ // Instantiate parameterized tests for axis = -1, 1, 3.
+ INSTANTIATE_TEST_SUITE_P(All, ParameterizedQuantizeAndDequantizeTest,
+                          ::testing::Values(-1, 1, 3));
+diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
+index ad11e0b7..17258bc1 100644
+--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
+@@ -2802,6 +2802,70 @@ REGISTER_OP("QuantizeAndDequantizeV2")
+       return Status::OK();
+     });
+ 
+REGISTER_OP("QuantizeAndDequantizeV4")
+    .Input("input: T")
+    .Input("input_min: T")
+    .Input("input_max: T")
+    .Attr("signed_input: bool = true")
+    .Attr("num_bits: int = 8")
+    .Attr("range_given: bool = false")
+    .Output("output: T")
+    .Attr("T: {bfloat16, half, float, double}")
+    .Attr(
+        "round_mode: {'HALF_TO_EVEN', 'HALF_UP'} = "
+        "'HALF_TO_EVEN'")
+    .Attr("narrow_range: bool = false")
+    .Attr("axis: int = -1")
+    .SetShapeFn([](InferenceContext* c) {
+      int axis;
+      TF_RETURN_IF_ERROR(c->GetAttr("axis", &axis));
+      const int minmax_rank = (axis == -1) ? 0 : 1;
+      ShapeHandle minmax;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), minmax_rank, &minmax));
+      TF_RETURN_IF_ERROR(c->Merge(c->input(2), minmax, &minmax));
+      if (axis != -1) {
+        ShapeHandle input;
+        TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), axis + 1, &input));
+        DimensionHandle depth;
+        TF_RETURN_IF_ERROR(
+            c->Merge(c->Dim(minmax, 0), c->Dim(input, axis), &depth));
+      }
+      c->set_output(0, c->input(0));
+      return Status::OK();
+    });
+
+REGISTER_OP("QuantizeAndDequantizeV4Grad")
+    .Input("gradients: T")
+    .Input("input: T")
+    .Input("input_min: T")
+    .Input("input_max: T")
+    .Output("input_backprop: T")
+    .Output("input_min_backprop: T")
+    .Output("input_max_backprop: T")
+    .Attr("T: {bfloat16, half, float, double}")
+    .Attr("axis: int = -1")
+    .SetShapeFn([](InferenceContext* c) {
+      int axis;
+      TF_RETURN_IF_ERROR(c->GetAttr("axis", &axis));
+      const int minmax_rank = (axis == -1) ? 0 : 1;
+      ShapeHandle minmax;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), minmax_rank, &minmax));
+      TF_RETURN_IF_ERROR(c->Merge(c->input(3), minmax, &minmax));
+      if (axis != -1) {
+        ShapeHandle input;
+        TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), axis + 1, &input));
+        DimensionHandle depth;
+        TF_RETURN_IF_ERROR(
+            c->Merge(c->Dim(minmax, 0), c->Dim(input, axis), &depth));
+      }
+      ShapeHandle inputs;
+      TF_RETURN_IF_ERROR(c->Merge(c->input(0), c->input(1), &inputs));
+      c->set_output(0, inputs);
+      c->set_output(1, minmax);
+      c->set_output(2, minmax);
+      return Status::OK();
+    });
+
+ REGISTER_OP("QuantizeAndDequantizeV3")
+     .Input("input: T")
+     .Input("input_min: T")
+diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py
+index 97a1184f..a3097fe2 100644
+--- a/tensorflow/python/kernel_tests/array_ops_test.py
+++ b/tensorflow/python/kernel_tests/array_ops_test.py
+@@ -1523,7 +1523,7 @@ class QuantizeAndDequantizeTest(test_util.TensorFlowTestCase):
+         expected = self._scale_per_slice(shape, axis, quant_values)
+         unused_minmax_value = 0 if axis is None else [0] * shape[axis]
+         fake_quantized = self.evaluate(
+-            array_ops.quantize_and_dequantize(
+            array_ops.quantize_and_dequantize_v2(
+                 inputs,
+                 unused_minmax_value,
+                 unused_minmax_value,
+@@ -1533,7 +1533,7 @@ class QuantizeAndDequantizeTest(test_util.TensorFlowTestCase):
+         self.assertAllEqual(fake_quantized, expected)
+         if axis is not None:
+           fake_quantized = self.evaluate(
+-              array_ops.quantize_and_dequantize(
+              array_ops.quantize_and_dequantize_v2(
+                   inputs,
+                   unused_minmax_value,
+                   unused_minmax_value,
+@@ -1541,6 +1541,23 @@ class QuantizeAndDequantizeTest(test_util.TensorFlowTestCase):
+                   axis=(axis - 4)))
+           self.assertAllClose(fake_quantized, expected)
+ 
+  def testQuantizeDequantizeGrad(self):
+    shape = (2, 2)
+    max_threshold = 0
+    min_threshold = -10
+    input_value = np.random.rand(2, 2) * 40.0 - 20.0
+    input_tensor = constant_op.constant(input_value, shape=shape,
+                                        name="input_tensor")
+    with self.cached_session():
+      def f(a):
+        return array_ops.quantize_and_dequantize_v2(
+            a,
+            input_min=min_threshold,
+            input_max=max_threshold,
+            range_given=True)
+      output_grad = gradient_checker_v2.compute_gradient(f, [input_tensor])
+      self.assertAllClose(output_grad[0], np.zeros([1, 4, 4]))
+
+   def testBadAxis(self):
+     input_tensor = [2.5, 2.5]
+     input_min = [0, 0]
+diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
+index cb97b8f9..19a2a0c2 100644
+--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
+@@ -3654,6 +3654,23 @@ def _FakeQuantWithMinMaxVarsPerChannelGradient(op, grad):
+       narrow_range=op.get_attr("narrow_range"))
+ 
+ 
+@ops.RegisterGradient("QuantizeAndDequantizeV4")
+def _QuantizeAndDequantizeV4Grad(op, grad):
+  """Gradient for QuantizeAndDequantizeV4 op."""
+  return quantize_and_dequantize_v4_grad(
+      grad,
+      op.inputs[0],
+      op.inputs[1],
+      op.inputs[2],
+      axis=op.get_attr("axis"))
+
+
+@ops.RegisterGradient("QuantizeAndDequantizeV4Grad")
+def _QuantizeAndDequantizeV4GradGrad(op, grad):
+  """Gradient for QuantizeAndDequantizeV4Grad op."""
+  return _QuantizeAndDequantizeV4Grad(op, grad)
+
+
+ @tf_export("required_space_to_batch_paddings")
+ def required_space_to_batch_paddings(input_shape,
+                                      block_shape,
+@@ -5223,6 +5240,13 @@ dequantize.__doc__ = gen_array_ops.dequantize.__doc__
+ 
+ @tf_export("quantization.quantize_and_dequantize")
+ @dispatch.add_dispatch_support
+@deprecation.deprecated(None,
+                        "This Op has been deprecated, use" +
+                        "`quantize_and_dequantize_v2` instead. To " +
+                        "To simulate the V1 the behavior of " +
+                        "tf.quantization.quantize_and_dequantize(...) use " +
+                        "tf.grad_pass_through(" +
+                        "tf.quantization.quantize_and_dequantize_v2)(...).")
+ def quantize_and_dequantize(
+     input,  # pylint: disable=redefined-builtin
+     input_min,
+@@ -5281,6 +5305,93 @@ def quantize_and_dequantize(
+       name=name)
+ 
+ 
+@tf_export("quantization.quantize_and_dequantize_v2")
+@dispatch.add_dispatch_support
+def quantize_and_dequantize_v2(
+    input,  # pylint: disable=redefined-builtin
+    input_min,
+    input_max,
+    signed_input=True,
+    num_bits=8,
+    range_given=False,
+    round_mode="HALF_TO_EVEN",
+    name=None,
+    narrow_range=False,
+    axis=None):
+  """Quantizes then dequantizes a tensor.
+
+  Updates the gradient definition for quantization that is outside the range to
+  be 0.To simulate the V1 the behavior of
+  tf.quantization.quantize_and_dequantize(...) use
+  tf.grad_pass_through(tf.quantization.quantize_and_dequantize_v2)(...).
+
+  Example usage:
+
+  ```python
+  def getQuantizeOp(input):
+      input_tensor = tf.placeholder(tf.float32, shape=[4, 4])
+      net = tf.quantization.quantize_and_dequantize(input,
+                                                    input_min=min_threshold,
+                                                    input_max=max_threshold,
+                                                    range_given=True)
+
+  To simulate v1 behavior:
+
+  def testDecomposeQuantizeDequantize(self):
+      def f(input_tensor):
+        return tf.quantization.quantize_and_dequantize_v2(input_tensor,
+                                                          input_min = 5.0,
+                                                          input_max= -10.0,
+                                                          range_given=True)
+      input_tensor = tf.placeholder(tf.float32, shape=[4, 4])
+      net = tf.grad_pass_through(f)(input_tensor)
+  ```
+
+  Args:
+    input: A `Tensor` to quantize and dequantize.
+    input_min: If range_given=True, the minimum input value, that needs to be
+      represented in the quantized representation. If axis is specified, this
+      should be a vector of minimum values for each slice along axis.
+    input_max: If range_given=True, the maximum input value that needs to be
+      represented in the quantized representation. If axis is specified, this
+      should be a vector of maximum values for each slice along axis.
+    signed_input: True if the quantization is signed or unsigned.
+    num_bits: The bitwidth of the quantization.
+    range_given: If true use `input_min` and `input_max` for the range of the
+      input, otherwise determine min and max from the input `Tensor`.
+    round_mode: Rounding mode when rounding from float values to quantized ones.
+      one of ['HALF_TO_EVEN', 'HALF_UP']
+    name: Optional name for the operation.
+    narrow_range: If true, then the absolute value of the quantized minimum
+      value is the same as the quantized maximum value, instead of 1 greater.
+      i.e. for 8 bit quantization, the minimum value is -127 instead of -128.
+    axis: Integer. If specified, refers to a dimension of the input tensor, such
+      that quantization will be per slice along that dimension.
+
+  Returns:
+    A `Tensor`. Each element is the result of quantizing and dequantizing the
+    corresponding element of `input`.
+  """
+  if axis is None:
+    axis = -1
+  elif axis < 0:
+    if input.shape.ndims is None:
+      raise ValueError("input should have known rank to use negative axis.")
+    axis %= input.shape.ndims
+
+  return gen_array_ops.quantize_and_dequantize_v4(
+      input,
+      input_min=input_min,
+      input_max=input_max,
+      signed_input=signed_input,
+      num_bits=num_bits,
+      range_given=range_given,
+      round_mode=round_mode,
+      narrow_range=narrow_range,
+      axis=axis,
+      name=name)
+
+
+ @tf_export("searchsorted")
+ @dispatch.add_dispatch_support
+ def searchsorted(sorted_sequence,
+@@ -5768,7 +5879,7 @@ def _with_nonzero_rank(data):
+ @dispatch.add_dispatch_support
+ def repeat(input, repeats, axis=None, name=None):  # pylint: disable=redefined-builtin
+   """Repeat elements of `input`.
+-  
+
+   See also `tf.concat`, `tf.stack`, `tf.tile`.
+ 
+   Args:
+diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
+index 1fe3c40d..0b944f56 100644
+--- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
+@@ -1820,6 +1820,10 @@ tf_module {
+     name: "quantize"
+     argspec: "args=[\'input\', \'min_range\', \'max_range\', \'T\', \'mode\', \'round_mode\', \'name\', \'narrow_range\', \'axis\', \'ensure_minimum_range\'], varargs=None, keywords=None, defaults=[\'MIN_COMBINED\', \'HALF_AWAY_FROM_ZERO\', \'None\', \'False\', \'None\', \'0.01\'], "
+   }
+  member_method {
+    name: "quantize_and_dequantize_v4"
+    argspec: "args=[\'input\', \'input_min\', \'input_max\', \'signed_input\', \'num_bits\', \'range_given\', \'round_mode\', \'narrow_range\', \'axis\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'8\', \'False\', \'HALF_TO_EVEN\', \'False\', \'-1\', \'None\'], "
+  }
+   member_method {
+     name: "quantize_v2"
+     argspec: "args=[\'input\', \'min_range\', \'max_range\', \'T\', \'mode\', \'name\', \'round_mode\', \'narrow_range\', \'axis\', \'ensure_minimum_range\'], varargs=None, keywords=None, defaults=[\'MIN_COMBINED\', \'None\', \'HALF_AWAY_FROM_ZERO\', \'False\', \'None\', \'0.01\'], "
+diff --git a/tensorflow/tools/api/golden/v1/tensorflow.quantization.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.quantization.pbtxt
+index 047fb4de..269873c6 100644
+--- a/tensorflow/tools/api/golden/v1/tensorflow.quantization.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.quantization.pbtxt
+@@ -36,6 +36,10 @@ tf_module {
+     name: "quantize_and_dequantize"
+     argspec: "args=[\'input\', \'input_min\', \'input_max\', \'signed_input\', \'num_bits\', \'range_given\', \'round_mode\', \'name\', \'narrow_range\', \'axis\'], varargs=None, keywords=None, defaults=[\'True\', \'8\', \'False\', \'HALF_TO_EVEN\', \'None\', \'False\', \'None\'], "
+   }
+  member_method {
+    name: "quantize_and_dequantize_v2"
+    argspec: "args=[\'input\', \'input_min\', \'input_max\', \'signed_input\', \'num_bits\', \'range_given\', \'round_mode\', \'name\', \'narrow_range\', \'axis\'], varargs=None, keywords=None, defaults=[\'True\', \'8\', \'False\', \'HALF_TO_EVEN\', \'None\', \'False\', \'None\'], "
+  }
+   member_method {
+     name: "quantized_concat"
+     argspec: "args=[\'concat_dim\', \'values\', \'input_mins\', \'input_maxes\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+diff --git a/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt
+index 8e5303cb..136d86f7 100644
+--- a/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt
+@@ -2900,6 +2900,14 @@ tf_module {
+     name: "QuantizeAndDequantizeV3"
+     argspec: "args=[\'input\', \'input_min\', \'input_max\', \'num_bits\', \'signed_input\', \'range_given\', \'narrow_range\', \'axis\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'True\', \'False\', \'-1\', \'None\'], "
+   }
+  member_method {
+    name: "QuantizeAndDequantizeV4"
+    argspec: "args=[\'input\', \'input_min\', \'input_max\', \'signed_input\', \'num_bits\', \'range_given\', \'round_mode\', \'narrow_range\', \'axis\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'8\', \'False\', \'HALF_TO_EVEN\', \'False\', \'-1\', \'None\'], "
+  }
+  member_method {
+    name: "QuantizeAndDequantizeV4Grad"
+    argspec: "args=[\'gradients\', \'input\', \'input_min\', \'input_max\', \'axis\', \'name\'], varargs=None, keywords=None, defaults=[\'-1\', \'None\'], "
+  }
+   member_method {
+     name: "QuantizeDownAndShrinkRange"
+     argspec: "args=[\'input\', \'input_min\', \'input_max\', \'out_type\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+diff --git a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
+index 1d56969b..5f358a7b 100644
+--- a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
+@@ -872,6 +872,10 @@ tf_module {
+     name: "py_function"
+     argspec: "args=[\'func\', \'inp\', \'Tout\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+   }
+  member_method {
+    name: "quantize_and_dequantize_v4"
+    argspec: "args=[\'input\', \'input_min\', \'input_max\', \'signed_input\', \'num_bits\', \'range_given\', \'round_mode\', \'narrow_range\', \'axis\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'8\', \'False\', \'HALF_TO_EVEN\', \'False\', \'-1\', \'None\'], "
+  }
+   member_method {
+     name: "range"
+     argspec: "args=[\'start\', \'limit\', \'delta\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'None\', \'range\'], "
+diff --git a/tensorflow/tools/api/golden/v2/tensorflow.quantization.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.quantization.pbtxt
+index 047fb4de..269873c6 100644
+--- a/tensorflow/tools/api/golden/v2/tensorflow.quantization.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.quantization.pbtxt
+@@ -36,6 +36,10 @@ tf_module {
+     name: "quantize_and_dequantize"
+     argspec: "args=[\'input\', \'input_min\', \'input_max\', \'signed_input\', \'num_bits\', \'range_given\', \'round_mode\', \'name\', \'narrow_range\', \'axis\'], varargs=None, keywords=None, defaults=[\'True\', \'8\', \'False\', \'HALF_TO_EVEN\', \'None\', \'False\', \'None\'], "
+   }
+  member_method {
+    name: "quantize_and_dequantize_v2"
+    argspec: "args=[\'input\', \'input_min\', \'input_max\', \'signed_input\', \'num_bits\', \'range_given\', \'round_mode\', \'name\', \'narrow_range\', \'axis\'], varargs=None, keywords=None, defaults=[\'True\', \'8\', \'False\', \'HALF_TO_EVEN\', \'None\', \'False\', \'None\'], "
+  }
+   member_method {
+     name: "quantized_concat"
+     argspec: "args=[\'concat_dim\', \'values\', \'input_mins\', \'input_maxes\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+diff --git a/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt
+index 8e5303cb..136d86f7 100644
+--- a/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt
+@@ -2900,6 +2900,14 @@ tf_module {
+     name: "QuantizeAndDequantizeV3"
+     argspec: "args=[\'input\', \'input_min\', \'input_max\', \'num_bits\', \'signed_input\', \'range_given\', \'narrow_range\', \'axis\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'True\', \'False\', \'-1\', \'None\'], "
+   }
+  member_method {
+    name: "QuantizeAndDequantizeV4"
+    argspec: "args=[\'input\', \'input_min\', \'input_max\', \'signed_input\', \'num_bits\', \'range_given\', \'round_mode\', \'narrow_range\', \'axis\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'8\', \'False\', \'HALF_TO_EVEN\', \'False\', \'-1\', \'None\'], "
+  }
+  member_method {
+    name: "QuantizeAndDequantizeV4Grad"
+    argspec: "args=[\'gradients\', \'input\', \'input_min\', \'input_max\', \'axis\', \'name\'], varargs=None, keywords=None, defaults=[\'-1\', \'None\'], "
+  }
+   member_method {
+     name: "QuantizeDownAndShrinkRange"
+     argspec: "args=[\'input\', \'input_min\', \'input_max\', \'out_type\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+-- 
+2.27.0
+
--- a/CVE-2021-29544-2.patch
+++ b/CVE-2021-29544-2.patch
@ -0,0 +1,33 @@
+From 20431e9044cf2ad3c0323c34888b192f3289af6b Mon Sep 17 00:00:00 2001
+From: Amit Patankar <amitpatankar@google.com>
+Date: Mon, 26 Apr 2021 13:43:59 -0700
+Subject: [PATCH] Fix `tf.raw_ops.QuantizeAndDequantizeV4Grad` CHECK failure.
+
+PiperOrigin-RevId: 370532425
+Change-Id: I767721be266851b63d8fe55e7ac6be0af6017f6c
+---
+ tensorflow/core/kernels/quantize_and_dequantize_op.cc | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+diff --git a/tensorflow/core/kernels/quantize_and_dequantize_op.cc b/tensorflow/core/kernels/quantize_and_dequantize_op.cc
+index 675bdaec225bd..c2a7a90d8713d 100644
+--- a/tensorflow/core/kernels/quantize_and_dequantize_op.cc
+++ b/tensorflow/core/kernels/quantize_and_dequantize_op.cc
+@@ -164,7 +164,17 @@ class QuantizeAndDequantizeV4GradientOp : public OpKernel {
+         errors::InvalidArgument("gradient and input must be the same size"));
+     const int depth = (axis_ == -1) ? 1 : input.dim_size(axis_);
+     const Tensor& input_min_tensor = ctx->input(2);
+    OP_REQUIRES(ctx,
+                input_min_tensor.dims() == 0 || input_min_tensor.dims() == 1,
+                errors::InvalidArgument(
+                    "Input min tensor must have dimension 1. Recieved ",
+                    input_min_tensor.dims(), "."));
+     const Tensor& input_max_tensor = ctx->input(3);
+    OP_REQUIRES(ctx,
+                input_max_tensor.dims() == 0 || input_max_tensor.dims() == 1,
+                errors::InvalidArgument(
+                    "Input max tensor must have dimension 1. Recieved ",
+                    input_max_tensor.dims(), "."));
+     if (axis_ != -1) {
+       OP_REQUIRES(
+           ctx, input_min_tensor.dim_size(0) == depth,
--- a/CVE-2021-29560.patch
+++ b/CVE-2021-29560.patch
@ -0,0 +1,30 @@
+From a84358aa12f0b1518e606095ab9cfddbf597c121 Mon Sep 17 00:00:00 2001
+From: Amit Patankar <amitpatankar@google.com>
+Date: Tue, 4 May 2021 13:45:57 -0700
+Subject: [PATCH] Fix heap-buffer-overflow issue with
+ `tf.raw_ops.RaggedTensorToTensor`.
+
+---
+ tensorflow/core/kernels/ragged_tensor_to_tensor_op.cc | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/tensorflow/core/kernels/ragged_tensor_to_tensor_op.cc b/tensorflow/core/kernels/ragged_tensor_to_tensor_op.cc
+index 988a3333..38cb4257 100644
+--- a/tensorflow/core/kernels/ragged_tensor_to_tensor_op.cc
+++ b/tensorflow/core/kernels/ragged_tensor_to_tensor_op.cc
+@@ -313,6 +313,12 @@ class RaggedTensorToTensorBaseOp : public OpKernel {
+             output_index_multiplier, output_size, result);
+         return tensorflow::Status::OK();
+       case RowPartitionType::ROW_SPLITS:
+	if (row_partition_tensor.size() - 1 > parent_output_index.size()) {
+	  return errors::InvalidArgument(
+	      "Row partition size is greater than output size: ",
+	      row_partition_tensor.size() - 1, " > ",
+	      parent_output_index.size());
+	 }
+         CalculateOutputIndexRowSplit(
+             context, row_partition_tensor, parent_output_index,
+             output_index_multiplier, output_size, result);
+-- 
+2.23.0
+
--- a/CVE-2021-29571.patch
+++ b/CVE-2021-29571.patch
@ -0,0 +1,30 @@
+From 79865b542f9ffdc9caeb255631f7c56f1d4b6517 Mon Sep 17 00:00:00 2001
+From: Amit Patankar <amitpatankar@google.com>
+Date: Tue, 4 May 2021 18:05:46 -0700
+Subject: [PATCH] Fix memory corruption issue with
+ `tf.raw_ops.DrawBoundingBoxesV2`.
+
+---
+ tensorflow/core/kernels/draw_bounding_box_op.cc | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/tensorflow/core/kernels/draw_bounding_box_op.cc b/tensorflow/core/kernels/draw_bounding_box_op.cc
+index 30de99b7..1e866325 100644
+--- a/tensorflow/core/kernels/draw_bounding_box_op.cc
+++ b/tensorflow/core/kernels/draw_bounding_box_op.cc
+@@ -73,6 +73,12 @@ class DrawBoundingBoxesOp : public OpKernel {
+         errors::InvalidArgument("Channel depth should be either 1 (GRY), "
+                                 "3 (RGB), or 4 (RGBA)"));
+ 
+    OP_REQUIRES(
+	context, boxes.dim_size(2) == 4,
+	errors::InvalidArgument(
+	    "The size of the third dimension of the box mustbe 4. Received: ",
+	    boxes.dim_size(2)));
+
+     const int64 batch_size = images.dim_size(0);
+     const int64 height = images.dim_size(1);
+     const int64 width = images.dim_size(2);
+-- 
+2.23.0
+
--- a/CVE-2021-29583.patch
+++ b/CVE-2021-29583.patch
@ -0,0 +1,59 @@
+From 6972f9dfe325636b3db4e0bc517ee22a159365c0 Mon Sep 17 00:00:00 2001
+From: Mihai Maruseac <mihaimaruseac@google.com>
+Date: Thu, 6 May 2021 17:45:51 -0700
+Subject: [PATCH] Add missing valuidation to FusedBatchNorm.
+
+---
+ .../core/kernels/fused_batch_norm_op.cc       | 29 ++++++++++++++++++-
+ 1 file changed, 28 insertions(+), 1 deletion(-)
+
+diff --git a/tensorflow/core/kernels/fused_batch_norm_op.cc b/tensorflow/core/kernels/fused_batch_norm_op.cc
+index 59470c8a..bd5dab36 100644
+--- a/tensorflow/core/kernels/fused_batch_norm_op.cc
+++ b/tensorflow/core/kernels/fused_batch_norm_op.cc
+@@ -1267,6 +1267,33 @@ class FusedBatchNormOpBase : public OpKernel {
+         context, estimated_variance.dims() == 1,
+         errors::InvalidArgument("estimated_variance must be 1-dimensional",
+                                 estimated_variance.shape().DebugString()));
+
+    const auto num_channels = GetTensorDim(x, tensor_format_, 'C');
+    OP_REQUIRES(
+	context, scale.NumElements() == num_channels,
+	errors::InvalidArgument("scale must have the same number of elements "
+				"as the channels of x, got ",
+				scale.NumElements(), " and ", num_channels));
+    OP_REQUIRES(
+	context, offset.NumElements() == num_channels,
+	errors::InvalidArgument("offset must have the same number of elements "
+				"as the channels of x, got ",
+				offset.NumElements(), " and ", num_channels));
+    if (estimated_mean.NumElements() != 0) {
+      OP_REQUIRES(context, estimated_mean.NumElements() == num_channels,
+		  errors::InvalidArgument(
+		      "mean must be empty or have the same number of "
+		      "elements as the channels of x, got ",
+		      estimated_mean.NumElements(), " and ",num_channels));
+    }
+    if (estimated_variance.NumElements() != 0) {
+      OP_REQUIRES(context, estimated_variance.NumElements() == num_channels,
+		  errors::InvalidArgument(
+		      "variance must be empty or have the same number of "
+		      "elements as the channels of x, got ",
+		      estimated_variance.NumElements(), " and ", num_channels));
+    }
+
+     if (has_side_input_) {
+       OP_REQUIRES(context, side_input->shape() == x.shape(),
+                   errors::InvalidArgument(
+@@ -1279,7 +1306,7 @@ class FusedBatchNormOpBase : public OpKernel {
+       // NOTE(ezhulenev): This requirement is coming from implementation
+       // details of cudnnBatchNormalizationForwardTrainingEx.
+       OP_REQUIRES(
+-          context, !is_training_ || x.dim_size(3) % 4 == 0,
+	  context, !is_training_ || num_channels % 4 == 0,
+           errors::InvalidArgument("FusedBatchNorm with activation requires "
+                                   "channel dimension to be a multiple of 4."));
+     }
+-- 
+2.23.0
+
--- a/CVE-2021-29589.patch
+++ b/CVE-2021-29589.patch
@ -0,0 +1,26 @@
+From 8e45822aa0b9f5df4b4c64f221e64dc930a70a9d Mon Sep 17 00:00:00 2001
+From: Mihai Maruseac <mihaimaruseac@google.com>
+Date: Tue, 27 Apr 2021 17:46:10 -0700
+Subject: [PATCH] Handle one more division by 0 in TFLite.
+
+---
+ tensorflow/lite/kernels/gather_nd.cc | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/tensorflow/lite/kernels/gather_nd.cc b/tensorflow/lite/kernels/gather_nd.cc
+index 288f7deb..5c9990a0 100644
+--- a/tensorflow/lite/kernels/gather_nd.cc
+++ b/tensorflow/lite/kernels/gather_nd.cc
+@@ -155,6 +155,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+   const TfLiteTensor* indices = GetInput(context, node, kIndices);
+   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+ 
+  // Prevent division by 0 in the helper
+  TF_LITE_ENSURE(context, NumElements(params) > 0);
+  
+   switch (indices->type) {
+     case kTfLiteInt32:
+       return EvalGatherNd<int32_t>(context, params, indices, output);
+-- 
+2.23.0
+
--- a/CVE-2021-29595.patch
+++ b/CVE-2021-29595.patch
@ -0,0 +1,41 @@
+From 106d8f4fb89335a2c52d7c895b7a7485465ca8d9 Mon Sep 17 00:00:00 2001
+From: Mihai Maruseac <mihaimaruseac@google.com>
+Date: Tue, 27 Apr 2021 17:47:36 -0700
+Subject: [PATCH] Prevent division by 0 in TFLite
+
+---
+ tensorflow/lite/kernels/depth_to_space.cc      | 1 +
+ tensorflow/lite/kernels/depth_to_space_test.cc | 5 +++++
+ 2 files changed, 6 insertions(+)
+
+diff --git a/tensorflow/lite/kernels/depth_to_space.cc b/tensorflow/lite/kernels/depth_to_space.cc
+index 1637ad43..c2047f10 100644
+--- a/tensorflow/lite/kernels/depth_to_space.cc
+++ b/tensorflow/lite/kernels/depth_to_space.cc
+@@ -58,6 +58,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+   TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
+ 
+   const int block_size = params->block_size;
+  TF_LITE_ENSURE(context, block_size > 0);
+   const int input_height = input->dims->data[1];
+   const int input_width = input->dims->data[2];
+   const int input_channels = input->dims->data[3];
+diff --git a/tensorflow/lite/kernels/depth_to_space_test.cc b/tensorflow/lite/kernels/depth_to_space_test.cc
+index 4429faf9..e0de01a9 100644
+--- a/tensorflow/lite/kernels/depth_to_space_test.cc
+++ b/tensorflow/lite/kernels/depth_to_space_test.cc
+@@ -60,6 +60,11 @@ TEST(DepthToSpaceOpModel, BadBlockSize) {
+   EXPECT_DEATH(DepthToSpaceOpModel({TensorType_FLOAT32, {1, 1, 1, 4}}, 4),
+                "Cannot allocate tensors");
+ }
+
+TEST(DepthToSpaceOpModel, NoBlockSize) {
+  EXPECT_DEATH(DepthToSpaceOpModel({TensorType_FLOAT32, {1, 1, 1, 4}}, 0),
+	       "Cannot allocate tensors");
+}
+ #endif
+ 
+ TEST(DepthToSpaceOpModel, Float32) {
+-- 
+2.23.0
+
--- a/CVE-2021-29602.patch
+++ b/CVE-2021-29602.patch
@ -0,0 +1,35 @@
+From cbda3c6b2dbbd3fbdc482ff8c0170a78ec2e97d0 Mon Sep 17 00:00:00 2001
+From: Mihai Maruseac <mihaimaruseac@google.com>
+Date: Wed, 28 Apr 2021 15:53:48 -0700
+Subject: [PATCH] Prevent divisions by 0
+
+---
+ tensorflow/lite/kernels/depthwise_conv.cc | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/tensorflow/lite/kernels/depthwise_conv.cc b/tensorflow/lite/kernels/depthwise_conv.cc
+index 3f75287e..fb031926 100644
+--- a/tensorflow/lite/kernels/depthwise_conv.cc
+++ b/tensorflow/lite/kernels/depthwise_conv.cc
+@@ -276,6 +276,7 @@ TfLiteStatus ComputeDepthMultiplier(TfLiteContext* context,
+                                     int16* depth_multiplier) {
+   int num_filter_channels = SizeOfDimension(filter, 3);
+   int num_input_channels = SizeOfDimension(input, 3);
+  TF_LITE_ENSURE(context, num_input_channels != 0);
+   TF_LITE_ENSURE_EQ(context, num_filter_channels % num_input_channels, 0);
+ 
+   *depth_multiplier = num_filter_channels / num_input_channels;
+@@ -446,8 +447,9 @@ TfLiteStatus EvalHybridPerChannel(TfLiteContext* context, TfLiteNode* node,
+   float output_activation_min, output_activation_max;
+   CalculateActivationRange(params->activation, &output_activation_min,
+                            &output_activation_max);
+-  const int input_size = NumElements(input) / SizeOfDimension(input, 0);
+   const int batch_size = SizeOfDimension(input, 0);
+  TF_LITE_ENSURE(context, batch_size != 0);
+  const int input_size = NumElements(input) / batch_size;
+   const TfLiteTensor* input_quantized =
+       GetTemporary(context, node, data->input_quantized_index);
+   int8_t* quantized_input_ptr_batch = input_quantized->data.int8;
+-- 
+2.23.0
+
--- a/CVE-2021-29604.patch
+++ b/CVE-2021-29604.patch
@ -0,0 +1,24 @@
+From 5117e0851348065ed59c991562c0ec80d9193db2 Mon Sep 17 00:00:00 2001
+From: Mihai Maruseac <mihaimaruseac@google.com>
+Date: Wed, 28 Apr 2021 16:16:56 -0700
+Subject: [PATCH] Prevent a division by 0
+
+---
+ tensorflow/lite/kernels/hashtable_lookup.cc | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/tensorflow/lite/kernels/hashtable_lookup.cc b/tensorflow/lite/kernels/hashtable_lookup.cc
+index 65e50fe4..9d947107 100644
+--- a/tensorflow/lite/kernels/hashtable_lookup.cc
+++ b/tensorflow/lite/kernels/hashtable_lookup.cc
+@@ -101,6 +101,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+   const TfLiteTensor* value = GetInput(context, node, 2);
+ 
+   const int num_rows = SizeOfDimension(value, 0);
+  TF_LITE_ENSURE(context, num_rows != 0);
+   const int row_bytes = value->bytes / num_rows;
+   void* pointer = nullptr;
+   DynamicBuffer buf;
+-- 
+2.23.0
+
--- a/CVE-2021-29610.patch
+++ b/CVE-2021-29610.patch
@ -0,0 +1,26 @@
+From c5b0d5f8ac19888e46ca14b0e27562e7fbbee9a9 Mon Sep 17 00:00:00 2001
+From: Amit Patankar <amitpatankar@google.com>
+Date: Fri, 30 Apr 2021 10:39:05 -0700
+Subject: [PATCH] Fix the CHECK failure in tf.raw_ops.QuantizeAndDequantizeV2.
+
+---
+ tensorflow/core/kernels/quantize_and_dequantize_op.cc | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/tensorflow/core/kernels/quantize_and_dequantize_op.cc b/tensorflow/core/kernels/quantize_and_dequantize_op.cc
+index ae905e22..408196a2 100644
+--- a/tensorflow/core/kernels/quantize_and_dequantize_op.cc
+++ b/tensorflow/core/kernels/quantize_and_dequantize_op.cc
+@@ -72,6 +72,9 @@ class QuantizeAndDequantizeV2Op : public OpKernel {
+ 
+   void Compute(OpKernelContext* ctx) override {
+     const Tensor& input = ctx->input(0);
+    OP_REQUIRES(
+	ctx, axis_ >= -1,
+	errors::InvalidArgument("Axis must be at least -1. Found ", axis_));
+     OP_REQUIRES(
+ 	ctx, (axis_ == -1 || axis_ < input.shape().dims()),
+ 	errors::InvalidArgument("Shape must be at least rank", axis_ + 1,
+-- 
+2.27.0
+
--- a/CVE-2021-29611.patch
+++ b/CVE-2021-29611.patch
@ -0,0 +1,45 @@
+From 1d04d7d93f4ed3854abf75d6b712d72c3f70d6b6 Mon Sep 17 00:00:00 2001
+From: Amit Patankar <amitpatankar@google.com>
+Date: Thu, 29 Apr 2021 15:30:30 -0700
+Subject: [PATCH] Fix heap-buffer-overflow issue with
+ `tf.raw_ops.SparseReshape`.
+
+PiperOrigin-RevId: 371218558
+Change-Id: I6a6dc5bf15b50a1d05bdd95e9ba347cb39f40f45
+---
+ tensorflow/core/kernels/sparse_reshape_op.cc | 12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+diff --git a/tensorflow/core/kernels/sparse_reshape_op.cc b/tensorflow/core/kernels/sparse_reshape_op.cc
+index 6eb5f0af..3896c959 100644
+--- a/tensorflow/core/kernels/sparse_reshape_op.cc
+++ b/tensorflow/core/kernels/sparse_reshape_op.cc
+@@ -26,6 +26,7 @@ limitations under the License.
+ #include "tensorflow/core/framework/types.h"
+ #include "tensorflow/core/kernels/reshape_util.h"
+ #include "tensorflow/core/lib/gtl/inlined_vector.h"
+#include "tensorflow/core/platform/errors.h"
+ 
+ namespace tensorflow {
+ 
+@@ -34,6 +35,17 @@ class SparseReshapeOp : public OpKernel {
+   explicit SparseReshapeOp(OpKernelConstruction* context) : OpKernel(context) {}
+ 
+   void Compute(OpKernelContext* context) override {
+    const Tensor& input_indices_in = context->input(0);
+    const Tensor& input_shape_in = context->input(1);
+
+    OP_REQUIRES(context, TensorShapeUtils::IsMatrix(input_indices_in.shape()),
+		errors::InvalidArgument("Input must be a matrix."));
+    OP_REQUIRES(context, TensorShapeUtils::IsVector(input_shape_in.shape()),
+		errors::InvalidArgument("Input shape must be a vector."));
+    OP_REQUIRES(context,
+		input_indices_in.dim_size(1) == input_shape_in.dim_size(0),
+		errors::InvalidArgument(
+		    "Input tensor rank must match input shape length."));
+     ReshapeSparseTensor(context, context->input(0), context->input(1),
+                         context->input(2), 0 /* output indices index */,
+                         1 /* output shape index */);
+-- 
+2.27.0
+
--- a/CVE-2021-29612-1.patch
+++ b/CVE-2021-29612-1.patch
@ -0,0 +1,23 @@
+From 0ab290774f91a23bebe30a358fde4e53ab4876a0 Mon Sep 17 00:00:00 2001
+From: Mihai Maruseac <mihaimaruseac@google.com>
+Date: Tue, 11 May 2021 18:36:43 -0700
+Subject: [PATCH] Ensure validation sticks in banded_triangular_solve_op
+
+PiperOrigin-RevId: 373275480
+Change-Id: Id7717cf275b2d6fdb9441fbbe166d555182d2e79
+---
+ tensorflow/core/kernels/banded_triangular_solve_op.cc | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/tensorflow/core/kernels/banded_triangular_solve_op.cc b/tensorflow/core/kernels/banded_triangular_solve_op.cc
+index c079c63b778ac..b719f55b507b0 100644
+--- a/tensorflow/core/kernels/banded_triangular_solve_op.cc
+++ b/tensorflow/core/kernels/banded_triangular_solve_op.cc
+@@ -217,6 +217,7 @@ class BandedTriangularSolveOpCpu : public OpKernel {
+     const Tensor& in1 = ctx->input(1);
+ 
+     ValidateInputTensors(ctx, in0, in1);
+    if (!ctx->status().ok()) return;
+ 
+     MatMulBCast bcast(in0.shape().dim_sizes(), in1.shape().dim_sizes());
+     OP_REQUIRES(
--- a/CVE-2021-29612-2.patch
+++ b/CVE-2021-29612-2.patch
@ -0,0 +1,30 @@
+From ba6822bd7b7324ba201a28b2f278c29a98edbef2 Mon Sep 17 00:00:00 2001
+From: Amit Patankar <amitpatankar@google.com>
+Date: Wed, 28 Apr 2021 16:06:54 -0700
+Subject: [PATCH] Fix OOB issue with `tf.raw_ops.SparseSparseMinimum`.
+
+PiperOrigin-RevId: 371005787
+Change-Id: Ib686ccc077836e8b980b8b5a03936d36a8ecaf71
+---
+ tensorflow/core/kernels/sparse_sparse_binary_op_shared.cc | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/tensorflow/core/kernels/sparse_sparse_binary_op_shared.cc b/tensorflow/core/kernels/sparse_sparse_binary_op_shared.cc
+index eb993a59..6c190814 100644
+--- a/tensorflow/core/kernels/sparse_sparse_binary_op_shared.cc
+++ b/tensorflow/core/kernels/sparse_sparse_binary_op_shared.cc
+@@ -189,6 +189,11 @@ class SparseSparseBinaryOpShared : public OpKernel {
+                                           " for dimension ", i));
+     }
+ 
+    OP_REQUIRES(
+        ctx, a_indices_t->dim_size(1) == b_indices_t->dim_size(1),
+	errors::InvalidArgument(
+            "Indices' dimensions do not match: got ", a_indices_t->dim_size(1),
+	    " and ", b_indices_t->dim_size(1), " for the second dimension."));
+     const auto a_indices_mat = a_indices_t->matrix<int64>();
+     const auto b_indices_mat = b_indices_t->matrix<int64>();
+     std::vector<T> a_augmented_values, b_augmented_values;
+-- 
+2.27.0
+
--- a/CVE-2021-29614.patch
+++ b/CVE-2021-29614.patch
@ -0,0 +1,66 @@
+From 698e01511f62a3c185754db78ebce0eee1f0184d Mon Sep 17 00:00:00 2001
+From: Mihai Maruseac <mihaimaruseac@google.com>
+Date: Fri, 30 Apr 2021 06:36:59 -0700
+Subject: [PATCH] Fix `tf.io.decode_raw` bugs and update documentation.
+
+Fixes cases where specifying `fixed_length` resulted in data loss and even segfault and corruption of the Python interpreter. The fix is subtle but needed due to pointer arithmetic rules.
+
+Makes sure that `fixed_length` does not change the output when present but not needed.
+
+Eliminates needless copy and cast in the main codepath.
+
+PiperOrigin-RevId: 371322725
+Change-Id: I514ef67a2961c86422f69d05122d31615e87896c
+---
+ .../core/kernels/decode_padded_raw_op.cc      | 21 +++++++++++--------
+ 1 file changed, 12 insertions(+), 9 deletions(-)
+
+diff --git a/tensorflow/core/kernels/decode_padded_raw_op.cc b/tensorflow/core/kernels/decode_padded_raw_op.cc
+index 12e8ec6a..d3e830c0 100644
+--- a/tensorflow/core/kernels/decode_padded_raw_op.cc
+++ b/tensorflow/core/kernels/decode_padded_raw_op.cc
+@@ -19,6 +19,7 @@ limitations under the License.
+ #include "tensorflow/core/framework/common_shape_fns.h"
+ #include "tensorflow/core/framework/op.h"
+ #include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/op_requires.h"
+ #include "tensorflow/core/framework/shape_inference.h"
+ 
+ namespace tensorflow {
+@@ -83,14 +84,13 @@ class DecodePaddedRawOp : public OpKernel {
+     // can copy the memory directly.
+     if (!convert_data_endianness_ || sizeof(T) == 1) {
+       for (int64 i = 0; i < flat_in.size(); ++i) {
+-        const T* in_data = reinterpret_cast<const T*>(flat_in(i).data());
+-
+-        if (flat_in(i).size() > fixed_length) {
+-          memcpy(out_data, in_data, fixed_length);
+-        } else {
+-          memcpy(out_data, in_data, flat_in(i).size());
+-        }
+-        out_data += fixed_length;
+        const auto to_copy =
+            std::min(flat_in(i).size(), static_cast<size_t>(fixed_length));
+        memcpy(out_data, flat_in(i).data(), to_copy);
+        // Note: increase out_data by width since it's already of type T* so
+        // each shift amount is implicitly multiplied by sizeof(T) according to
+        // pointer arithmetic rules.
+        out_data += width;
+       }
+     } else {
+       // Otherwise, the data is not in the host's byte order, and rather than a
+@@ -105,7 +105,10 @@ class DecodePaddedRawOp : public OpKernel {
+              p_in += sizeof(T), p_out += sizeof(T)) {
+           std::reverse_copy(p_in, p_in + sizeof(T), p_out);
+         }
+-        out_data += fixed_length;
+        // Note: increase out_data by width since it's already of type T* so
+        // each shift amount is implicitly multiplied by sizeof(T) according to
+        // pointer arithmetic rules.
+        out_data += width;
+       }
+     }
+   }
+-- 
+2.27.0
+
--- a/CVE-2021-29618.patch
+++ b/CVE-2021-29618.patch
@ -0,0 +1,60 @@
+From 63333b967844327856352f484aeddd1509b10604 Mon Sep 17 00:00:00 2001
+From: Yong Tang <yong.tang.github@outlook.com>
+Date: Sat, 6 Feb 2021 18:53:33 +0000
+Subject: [PATCH 1/2] Fix crash with tf.transpose when a is complex and
+ conjugate is True
+
+This PR tries to address the issue raised in 46891 where
+tf.transpose will crash when a is complex and conjugate is True.
+The issue comes from:
+https://github.com/tensorflow/tensorflow/blob/57bbc5e0d4b93483b8ae853352173516f1c08018/tensorflow/core/kernels/transpose_functor.h#L169
+
+However, as ndims < 2 has already been handled properly:
+https://github.com/tensorflow/tensorflow/blob/57bbc5e0d4b93483b8ae853352173516f1c08018/tensorflow/core/kernels/transpose_functor_cpu.cc#L103-L105
+The check could be removed.
+
+This PR fixes 46891.
+
+Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
+---
+ tensorflow/core/kernels/transpose_functor.h         | 1 -
+ tensorflow/python/kernel_tests/transpose_op_test.py | 4 ++++
+ 2 files changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/tensorflow/core/kernels/transpose_functor.h b/tensorflow/core/kernels/transpose_functor.h
+index 0c22b11b..479ad7af 100644
+--- a/tensorflow/core/kernels/transpose_functor.h
+++ b/tensorflow/core/kernels/transpose_functor.h
+@@ -166,7 +166,6 @@ template <typename Device>
+ Status DoTransposeImpl(const Device& d, const Tensor& in,
+                        const gtl::ArraySlice<int32> perm, bool conjugate,
+                        Tensor* out) {
+-  CHECK_GE(in.dims(), 2);
+   CHECK_EQ(in.dims(), out->dims());
+   CHECK_EQ(in.dims(), perm.size());
+   CHECK_EQ(in.dtype(), out->dtype());
+diff --git a/tensorflow/python/kernel_tests/transpose_op_test.py b/tensorflow/python/kernel_tests/transpose_op_test.py
+index 87096211..ed634ae7 100644
+--- a/tensorflow/python/kernel_tests/transpose_op_test.py
+++ b/tensorflow/python/kernel_tests/transpose_op_test.py
+@@ -387,6 +387,8 @@ class TransposeTest(test.TestCase):
+ 
+   @test_util.run_v1_only("b/120545219")
+   def testComplex64(self):
+    self._testBoth(np.array(np.complex(1, 2)).astype(np.complex64))
+    self._testBoth(np.complex(1, 2) * np.arange(0, 21).astype(np.complex64))
+     self._testBoth(
+         np.complex(1, 2) *
+         np.arange(0, 21).reshape([3, 7]).astype(np.complex64))
+@@ -399,6 +401,8 @@ class TransposeTest(test.TestCase):
+ 
+   @test_util.run_v1_only("b/120545219")
+   def testComplex128(self):
+    self._testBoth(np.array(np.complex(1, 2)).astype(np.complex128))
+    self._testBoth(np.complex(1, 2) * np.arange(0, 21).astype(np.complex128))
+     self._testBoth(
+         np.complex(1, 2) *
+         np.arange(0, 21).reshape([3, 7]).astype(np.complex128))
+-- 
+2.27.0
+
--- a/CVE-2021-37635.patch
+++ b/CVE-2021-37635.patch
@ -0,0 +1,39 @@
+From 87158f43f05f2720a374f3e6d22a7aaa3a33f750 Mon Sep 17 00:00:00 2001
+From: Mihai Maruseac <mihaimaruseac@google.com>
+Date: Fri, 30 Jul 2021 21:11:18 -0700
+Subject: [PATCH] Prevent heap OOB in sparse reduction ops.
+
+PiperOrigin-RevId: 387934524
+Change-Id: I894aa30f1e454f09b471d565b4a325da49322c1a
+---
+ tensorflow/core/kernels/sparse_reduce_op.cc | 13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+diff --git a/tensorflow/core/kernels/sparse_reduce_op.cc b/tensorflow/core/kernels/sparse_reduce_op.cc
+index b65f31e5..2bfa3299 100644
+--- a/tensorflow/core/kernels/sparse_reduce_op.cc
+++ b/tensorflow/core/kernels/sparse_reduce_op.cc
+@@ -219,7 +219,20 @@ class SparseReduceOp : public OpKernel {
+     sp.Reorder<T>(reduction.reorder_dims);
+     for (const auto &g : sp.group(reduction.group_by_dims)) {
+       Op::template Run<T>(ctx, reduced_val, g.template values<T>());
+      OP_REQUIRES(ctx,
+		  output_strides.empty() ||
+		  (g.group().size() == output_strides.size()),
+		  errors::Internal(
+		      "Expected group size and output_strides size to match",
+		      ", but got ", g.group().size(), " and ",
+		      output_strides.size()));
+       const int64 idx = CoordinatesToFlatIndex(g.group(), output_strides);
+      OP_REQUIRES(ctx,
+		  idx >= 0 && idx < out_flat.size(),
+		  errors::Internal(
+		      "Obtained a write index of ", idx,
+		      " which is outside of bounds of [0, ",
+		      out_flat.size(), ")"));
+       out_flat(idx) = reduced_val();
+       VLOG(2) << "coords: " << absl::StrJoin(g.group(), ",")
+               << "; idx: " << idx << "; group " << Op::Name() << ": "
+-- 
+2.27.0
+
--- a/CVE-2021-37640-1.patch
+++ b/CVE-2021-37640-1.patch
@ -0,0 +1,242 @@
+From 1d8218f155c1d22c21afda8bf28e36e4094d9e88 Mon Sep 17 00:00:00 2001
+From: Ben Barsdell <bbarsdell@nvidia.com>
+Date: Fri, 8 Jan 2021 11:04:37 +1100
+Subject: [PATCH 1/2] Refactor ReshapeSparseTensor into a template+class
+
+- This is in preparation for adding a GPU implementation.
+- No functional change.
+---
+ .../kernels/deserialize_sparse_string_op.cc   |   8 +-
+ tensorflow/core/kernels/reshape_util.cc       | 101 ++++++++++++------
+ tensorflow/core/kernels/reshape_util.h        |  18 ++++
+ tensorflow/core/kernels/sparse_reshape_op.cc  |  12 ++-
+ 4 files changed, 98 insertions(+), 41 deletions(-)
+
+diff --git a/tensorflow/core/kernels/deserialize_sparse_string_op.cc b/tensorflow/core/kernels/deserialize_sparse_string_op.cc
+index 2e151078..3acd86ef 100644
+--- a/tensorflow/core/kernels/deserialize_sparse_string_op.cc
+++ b/tensorflow/core/kernels/deserialize_sparse_string_op.cc
+@@ -35,6 +35,8 @@ limitations under the License.
+ 
+ namespace tensorflow {
+ 
+using CPUDevice = Eigen::ThreadPoolDevice;
+
+ namespace {
+ 
+ using sparse::SparseTensor;
+@@ -204,9 +206,9 @@ class DeserializeSparseOp : public OpKernel {
+       target_shape.vec<int64>()(i + ndims - 1) = output.shape().data()[i + 1];
+     }
+ 
+-    ReshapeSparseTensor(context, output.indices(), input_shape, target_shape,
+-                        0 /* output indices index */,
+-                        2 /* output shape index */);
+    ReshapeSparseTensor<CPUDevice>(context, output.indices(), input_shape,
+                                   target_shape, 0 /* output indices index */,
+                                   2 /* output shape index */);
+     context->set_output(1, output.values());
+   }
+ 
+diff --git a/tensorflow/core/kernels/reshape_util.cc b/tensorflow/core/kernels/reshape_util.cc
+index 1fce80f7..d0d54738 100644
+--- a/tensorflow/core/kernels/reshape_util.cc
+++ b/tensorflow/core/kernels/reshape_util.cc
+@@ -31,6 +31,53 @@ limitations under the License.
+ 
+ namespace tensorflow {
+ 
+using CPUDevice = Eigen::ThreadPoolDevice;
+
+namespace functor {
+
+template <>
+struct ReshapeSparseTensorFunctor<CPUDevice> {
+  Status operator()(const TensorShape &input_shape,
+                    const TensorShape &output_shape,
+                    typename TTypes<int64>::ConstMatrix input_indices,
+                    typename TTypes<int64>::Matrix output_indices) const {
+    const int64 input_rank = input_shape.dims();
+    const int64 output_rank = output_shape.dims();
+    const int64 nnz = input_indices.dimension(0);
+    gtl::InlinedVector<int64, 8> input_strides(input_rank);
+    if (input_rank > 0) {
+      input_strides[input_rank - 1] = 1;
+      for (int d = input_rank - 2; d >= 0; --d) {
+        input_strides[d] = input_strides[d + 1] * input_shape.dim_size(d + 1);
+      }
+    }
+
+    gtl::InlinedVector<int64, 8> output_strides(output_rank);
+    if (output_rank > 0) {
+      output_strides[output_rank - 1] = 1;
+      for (int d = output_rank - 2; d >= 0; --d) {
+        output_strides[d] =
+            output_strides[d + 1] * output_shape.dim_size(d + 1);
+      }
+    }
+
+    for (int i = 0; i < nnz; ++i) {
+      int64 id = 0;
+      for (int j = 0; j < input_rank; ++j) {
+        id += input_indices(i, j) * input_strides[j];
+      }
+      for (int j = 0; j < output_rank; ++j) {
+        output_indices(i, j) = id / output_strides[j];
+        id %= output_strides[j];
+      }
+    }
+    return Status::OK();
+  }
+};
+
+}  // namespace functor
+
+template <typename Device>
+ void ReshapeSparseTensor(OpKernelContext *context,
+                          const Tensor &input_indices_in,
+                          const Tensor &input_shape_in,
+@@ -111,40 +158,6 @@ void ReshapeSparseTensor(OpKernelContext *context,
+     return;
+   }
+ 
+-  gtl::InlinedVector<int64, 8> input_strides(input_rank);
+-  if (input_rank > 0) {
+-    input_strides[input_rank - 1] = 1;
+-    for (int d = input_rank - 2; d >= 0; --d) {
+-      input_strides[d] = input_strides[d + 1] * input_shape.dim_size(d + 1);
+-    }
+-  }
+-
+-  gtl::InlinedVector<int64, 8> output_strides(output_rank);
+-  if (output_rank > 0) {
+-    output_strides[output_rank - 1] = 1;
+-    for (int d = output_rank - 2; d >= 0; --d) {
+-      output_strides[d] = output_strides[d + 1] * output_shape.dim_size(d + 1);
+-    }
+-  }
+-
+-  Tensor *result_indices = nullptr;
+-  OP_REQUIRES_OK(context,
+-                 context->allocate_output(output_indices_idx,
+-                                          TensorShape({nnz, output_rank}),
+-                                          &result_indices));
+-  auto input_ind = input_indices_in.matrix<int64>();
+-  auto output_ind = result_indices->matrix<int64>();
+-  for (int i = 0; i < nnz; ++i) {
+-    int64 id = 0;
+-    for (int j = 0; j < input_rank; ++j) {
+-      id += input_ind(i, j) * input_strides[j];
+-    }
+-    for (int j = 0; j < output_rank; ++j) {
+-      output_ind(i, j) = id / output_strides[j];
+-      id %= output_strides[j];
+-    }
+-  }
+-
+   Tensor *result_shape = nullptr;
+   OP_REQUIRES_OK(context, context->allocate_output(output_shape_idx,
+                                                    TensorShape({output_rank}),
+@@ -153,6 +166,26 @@ void ReshapeSparseTensor(OpKernelContext *context,
+   for (int j = 0; j < output_shape.dims(); ++j) {
+     output_shape_vec(j) = output_shape.dim_size(j);
+   }
+
+  Tensor *result_indices = nullptr;
+  OP_REQUIRES_OK(context,
+                 context->allocate_output(output_indices_idx,
+                                          TensorShape({nnz, output_rank}),
+                                          &result_indices));
+  if (nnz > 0) {
+    OP_REQUIRES_OK(context, functor::ReshapeSparseTensorFunctor<Device>()(
+                                input_shape, output_shape,
+                                input_indices_in.matrix<int64>(),
+                                result_indices->matrix<int64>()));
+  }
+ }
+ 
+#define EXPLICITLY_INSTANTIATE_FUNCTION(Device)                    \
+  template void ReshapeSparseTensor<Device>(                       \
+      OpKernelContext *context, const Tensor &input_indices_in,    \
+      const Tensor &input_shape_in, const Tensor &target_shape_in, \
+      int output_indices_idx, int output_shape_idx)
+EXPLICITLY_INSTANTIATE_FUNCTION(CPUDevice);
+#undef EXPLICITLY_INSTANTIATE_FUNCTION
+
+ }  // namespace tensorflow
+diff --git a/tensorflow/core/kernels/reshape_util.h b/tensorflow/core/kernels/reshape_util.h
+index 7e1809e8..b3a35651 100644
+--- a/tensorflow/core/kernels/reshape_util.h
+++ b/tensorflow/core/kernels/reshape_util.h
+@@ -16,18 +16,36 @@ limitations under the License.
+ #ifndef TENSORFLOW_CORE_KERNELS_RESHAPE_UTIL_H_
+ #define TENSORFLOW_CORE_KERNELS_RESHAPE_UTIL_H_
+ 
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/lib/core/status.h"
+
+ namespace tensorflow {
+ 
+ class OpKernelContext;
+ class Tensor;
+ 
+ // Reshapes the input indices and input shape to the target shape.
+// Note: This template is explicitly instantiated for CPU device only.
+template <typename Device>
+ void ReshapeSparseTensor(OpKernelContext *context,
+                          const Tensor &input_indices_in,
+                          const Tensor &input_shape_in,
+                          const Tensor &target_shape_in, int output_indices_idx,
+                          int output_shape_idx);
+ 
+namespace functor {
+
+template <typename Device>
+struct ReshapeSparseTensorFunctor {
+  Status operator()(const TensorShape &input_shape,
+                    const TensorShape &output_shape,
+                    typename TTypes<int64>::ConstMatrix input_indices,
+                    typename TTypes<int64>::Matrix output_indices) const;
+};
+
+}  // namespace functor
+
+ }  // namespace tensorflow
+ 
+ #endif  // TENSORFLOW_CORE_KERNELS_RESHAPE_UTIL_H_
+diff --git a/tensorflow/core/kernels/sparse_reshape_op.cc b/tensorflow/core/kernels/sparse_reshape_op.cc
+index 3896c959..490d9ffd 100644
+--- a/tensorflow/core/kernels/sparse_reshape_op.cc
+++ b/tensorflow/core/kernels/sparse_reshape_op.cc
+@@ -30,6 +30,9 @@ limitations under the License.
+ 
+ namespace tensorflow {
+ 
+using CPUDevice = Eigen::ThreadPoolDevice;
+
+template <typename Device>
+ class SparseReshapeOp : public OpKernel {
+  public:
+   explicit SparseReshapeOp(OpKernelConstruction* context) : OpKernel(context) {}
+@@ -46,12 +49,13 @@ class SparseReshapeOp : public OpKernel {
+ 		input_indices_in.dim_size(1) == input_shape_in.dim_size(0),
+ 		errors::InvalidArgument(
+ 		    "Input tensor rank must match input shape length."));
+-    ReshapeSparseTensor(context, context->input(0), context->input(1),
+-                        context->input(2), 0 /* output indices index */,
+-                        1 /* output shape index */);
+    ReshapeSparseTensor<Device>(
+        context, context->input(0), context->input(1), context->input(2),
+	0 /* output indices index */, 1 /* output shape index */);
+   }
+ };
+ 
+ REGISTER_KERNEL_BUILDER(Name("SparseReshape").Device(DEVICE_CPU),
+-                        SparseReshapeOp)
+			SparseReshapeOp<CPUDevice>)
+
+ }  // namespace tensorflow
+-- 
+2.27.0
+
--- a/CVE-2021-37640-2.patch
+++ b/CVE-2021-37640-2.patch
@ -0,0 +1,34 @@
+From 4923de56ec94fff7770df259ab7f2288a74feb41 Mon Sep 17 00:00:00 2001
+From: Mihai Maruseac <mihaimaruseac@google.com>
+Date: Mon, 2 Aug 2021 13:52:28 -0700
+Subject: [PATCH] Don't do any work when reshaping 0 elements sparse tensor.
+
+If reshaping to 0 elements tensor, check that input has no elements.
+If reshaping no elements input, check that output has no elements.
+
+PiperOrigin-RevId: 388296986
+Change-Id: Iadc9fe7252e14313ca987e69bf0d7042fd10232a
+---
+ tensorflow/core/kernels/reshape_util.cc | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/tensorflow/core/kernels/reshape_util.cc b/tensorflow/core/kernels/reshape_util.cc
+index d0d54738..9b520334 100644
+--- a/tensorflow/core/kernels/reshape_util.cc
+++ b/tensorflow/core/kernels/reshape_util.cc
+@@ -173,6 +173,12 @@ void ReshapeSparseTensor(OpKernelContext *context,
+                                           TensorShape({nnz, output_rank}),
+                                           &result_indices));
+   if (nnz > 0) {
+    OP_REQUIRES(
+        context, dense_size > 0 && product > 0,
+	errors::InvalidArgument(
+            "Input tensor has ", nnz, " non zero elements but input shape (",
+	    input_shape.DebugString(), ") or output shape (",
+	    output_shape.DebugString(), ") is empty"));
+     OP_REQUIRES_OK(context, functor::ReshapeSparseTensorFunctor<Device>()(
+                                 input_shape, output_shape,
+                                 input_indices_in.matrix<int64>(),
+-- 
+2.27.0
+
--- a/CVE-2021-37642.patch
+++ b/CVE-2021-37642.patch
@ -0,0 +1,70 @@
+From 4aacb30888638da75023e6601149415b39763d76 Mon Sep 17 00:00:00 2001
+From: Mihai Maruseac <mihaimaruseac@google.com>
+Date: Tue, 3 Aug 2021 12:28:58 -0700
+Subject: [PATCH] Disallow division by zero FPE in
+ `tf.raw_ops.ResourceScatterDiv`
+
+Had to update a test that was broken.
+
+PiperOrigin-RevId: 388516976
+Change-Id: Ic358e6bf0559e011539974d453fc7aa18b427e9c
+---
+ .../core/kernels/resource_variable_ops.cc     | 35 +++++++++++++++++++
+ 1 file changed, 35 insertions(+)
+
+diff --git a/tensorflow/core/kernels/resource_variable_ops.cc b/tensorflow/core/kernels/resource_variable_ops.cc
+index b9c883c7..e056d9cb 100644
+--- a/tensorflow/core/kernels/resource_variable_ops.cc
+++ b/tensorflow/core/kernels/resource_variable_ops.cc
+@@ -844,6 +844,35 @@ TF_CALL_GPU_NUMBER_TYPES(REGISTER_GATHER_ND_GPU);
+ #undef REGISTER_GATHER_ND_ALL_INDICES
+ #undef REGISTER_GATHER_ND_FULL
+ 
+namespace {
+
+template <typename Device>
+bool isCPUDevice() {
+  return false;
+}
+
+template <>
+bool isCPUDevice<CPUDevice>() {
+  return true;
+}
+
+template <typename T>
+bool ValidateInput(const Tensor& updates) {
+  const auto updates_flat = updates.flat<T>();
+  const T zero(0);
+  for (int i = 0; i < updates.NumElements(); i++) {
+    if (updates_flat(i) == zero) return false;
+  }
+  return true;
+}
+
+template <>
+bool ValidateInput<Variant>(const Tensor& updates) {
+  return true;
+}
+
+}  // namespace
+
+ template <typename Device, typename T, typename Index, scatter_op::UpdateOp op>
+ class ResourceScatterUpdateOp : public OpKernel {
+  public:
+@@ -910,6 +939,12 @@ class ResourceScatterUpdateOp : public OpKernel {
+                                 " indexing: ", params->dim_size(0), " > ",
+                                 std::numeric_limits<Index>::max()));
+ 
+    // Prevent division by 0
+    if (isCPUDevice<Device>() && op == tensorflow::scatter_op::UpdateOp::DIV) {
+      OP_REQUIRES(c, ValidateInput<T>(updates),
+                  errors::InvalidArgument("updates must not contain 0"));
+    }
+
+     if (N > 0) {
+       auto indices_flat = indices.flat<Index>();
+       auto params_flat = params->flat_outer_dims<T>();
+-- 
+2.27.0
+
--- a/CVE-2021-37643.patch
+++ b/CVE-2021-37643.patch
@ -0,0 +1,28 @@
+From 482da92095c4d48f8784b1f00dda4f81c28d2988 Mon Sep 17 00:00:00 2001
+From: Laura Pak <lpak@google.com>
+Date: Mon, 2 Aug 2021 15:07:31 -0700
+Subject: [PATCH] Ensure non-empty padding_value input to
+ tf.raw_ops.MatrixDiagPartV2, if a padding_value is input
+
+PiperOrigin-RevId: 388314614
+Change-Id: If0b51ad58d5d8543a6be6ce8f42ae4755c80d55f
+---
+ tensorflow/core/kernels/matrix_diag_op.cc | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/tensorflow/core/kernels/matrix_diag_op.cc b/tensorflow/core/kernels/matrix_diag_op.cc
+index b61dbe96a5d6a..1506df1712138 100644
+--- a/tensorflow/core/kernels/matrix_diag_op.cc
+++ b/tensorflow/core/kernels/matrix_diag_op.cc
+@@ -86,7 +86,10 @@ class MatrixDiagPartOp : public OpKernel {
+           upper_diag_index = diag_index.flat<int32>()(1);
+         }
+       }
+-      padding_value = context->input(2).flat<T>()(0);
+      const Tensor& padding_in = context->input(2);
+      OP_REQUIRES(context, padding_in.NumElements() == 1,
+                  errors::InvalidArgument("Padding must be scalar."));
+      padding_value = padding_in.flat<T>()(0);
+     }
+     const TensorShape& input_shape = input.shape();
+ 
--- a/CVE-2021-37651.patch
+++ b/CVE-2021-37651.patch
@ -0,0 +1,38 @@
+From 0f931751fb20f565c4e94aa6df58d54a003cdb30 Mon Sep 17 00:00:00 2001
+From: Mihai Maruseac <mihaimaruseac@google.com>
+Date: Mon, 2 Aug 2021 13:03:44 -0700
+Subject: [PATCH] Validate dimensions of input tensor in
+ `FractionalAvgPoolGrad`
+
+PiperOrigin-RevId: 388286227
+Change-Id: Ieb7566155e92acc8993a2212c76deacadc0edc8a
+---
+ tensorflow/core/kernels/fractional_avg_pool_op.cc | 12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+diff --git a/tensorflow/core/kernels/fractional_avg_pool_op.cc b/tensorflow/core/kernels/fractional_avg_pool_op.cc
+index 0452638a..a338dd95 100644
+--- a/tensorflow/core/kernels/fractional_avg_pool_op.cc
+++ b/tensorflow/core/kernels/fractional_avg_pool_op.cc
+@@ -271,6 +271,18 @@ class FractionalAvgPoolGradOp : public OpKernel {
+     const int64 in_rows = orig_input_tensor_shape_flat(1);
+     const int64 in_cols = orig_input_tensor_shape_flat(2);
+     const int64 in_depth = orig_input_tensor_shape_flat(3);
+    OP_REQUIRES(
+        context, in_batch != 0,
+	errors::InvalidArgument("Batch dimension of input must not be 0"));
+    OP_REQUIRES(
+        context, in_rows != 0,
+	errors::InvalidArgument("Rows dimension of input must not be 0"));
+    OP_REQUIRES(
+        context, in_cols != 0,
+	errors::InvalidArgument("Columns dimension of input must not be 0"));
+    OP_REQUIRES(
+        context, in_depth != 0,
+	errors::InvalidArgument("Depth dimension of input must not be 0"));
+ 
+     constexpr int tensor_in_and_out_dims = 4;
+     // Transform orig_input_tensor_shape into TensorShape
+-- 
+2.27.0
+
--- a/CVE-2021-37653.patch
+++ b/CVE-2021-37653.patch
@ -0,0 +1,47 @@
+From ac117ee8a8ea57b73d34665cdf00ef3303bc0b11 Mon Sep 17 00:00:00 2001
+From: Mihai Maruseac <mihaimaruseac@google.com>
+Date: Fri, 30 Jul 2021 22:23:28 -0700
+Subject: [PATCH] Prevent division by 0 in `resource_variable_ops.cc`
+
+PiperOrigin-RevId: 387939939
+Change-Id: Ib04902d63756633999959a70613f2eaa30c2c151
+---
+ tensorflow/core/kernels/resource_variable_ops.cc | 11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+diff --git a/tensorflow/core/kernels/resource_variable_ops.cc b/tensorflow/core/kernels/resource_variable_ops.cc
+index b9c883c7..f6d114a8 100644
+--- a/tensorflow/core/kernels/resource_variable_ops.cc
+++ b/tensorflow/core/kernels/resource_variable_ops.cc
+@@ -688,7 +688,8 @@ class ResourceGatherOp : public OpKernel {
+         copy_functor(c->eigen_device<Device>(), tmp_indices.flat<Index>(),
+                      indices.flat<Index>());
+ 
+-        AddBatchOffsets(&tmp_indices, params);
+        AddBatchOffsets(c, &tmp_indices, params);
+        if (!c->status().ok()) return;
+         op_indices = &tmp_indices;
+       }
+ 
+@@ -720,11 +721,17 @@ class ResourceGatherOp : public OpKernel {
+   // Example: batch_dims = 1, indices = [[0, 1, 2], [0, 1, 2]]
+   // If indexing into a params dimension of size 4, then the indices will become
+   // [0, 1, 2, 4, 5, 6]
+-  void AddBatchOffsets(Tensor* indices, const Tensor& params) {
+  void AddBatchOffsets(OpKernelContext* ctx, Tensor* indices,
+                       const Tensor& params) {
+     int64 batch_size = 1;  // The size of all batch dimensions.
+     for (int idx = 0; idx < batch_dims_; ++idx) {
+       batch_size *= params.dim_size(idx);
+     }
+    OP_REQUIRES(
+        ctx, batch_size != 0,
+	 errors::InvalidArgument(
+            "Inner size of indices would result in batch_size of 0 and a ",
+	     "division by 0 in the implementation. This is illegal"));
+ 
+     auto indices_flat = indices->flat<Index>();
+     int64 const index_inner_size = indices->NumElements() / batch_size;
+-- 
+2.27.0
+
--- a/CVE-2021-37654.patch
+++ b/CVE-2021-37654.patch
@ -0,0 +1,30 @@
+From bc9c546ce7015c57c2f15c168b3d9201de679a1d Mon Sep 17 00:00:00 2001
+From: Mihai Maruseac <mihaimaruseac@google.com>
+Date: Fri, 30 Jul 2021 21:37:59 -0700
+Subject: [PATCH] Prevent heap oob access in `resource_variable_ops.cc`
+
+PiperOrigin-RevId: 387936433
+Change-Id: I9e71ddaa8dbd51ec6afbf163a6b3b591f193b4f6
+---
+ tensorflow/core/kernels/resource_variable_ops.cc | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/tensorflow/core/kernels/resource_variable_ops.cc b/tensorflow/core/kernels/resource_variable_ops.cc
+index b9c883c7..1d7e7c3c 100644
+--- a/tensorflow/core/kernels/resource_variable_ops.cc
+++ b/tensorflow/core/kernels/resource_variable_ops.cc
+@@ -643,6 +643,11 @@ class ResourceGatherOp : public OpKernel {
+     OP_REQUIRES(
+         c, TensorShapeUtils::IsVectorOrHigher(params.shape()),
+         errors::InvalidArgument("params must be at least 1 dimensional"));
+    OP_REQUIRES(
+        c, params.shape().dims() >= batch_dims_,
+	 errors::InvalidArgument("params must have at least ", batch_dims_,
+                                " (batch_dims) dimensions but it has shape ",
+				 params.shape().DebugString()));
+ 
+     // Check that we have enough index space
+     const int64 N = indices.NumElements();
+-- 
+2.27.0
+
--- a/CVE-2021-37655.patch
+++ b/CVE-2021-37655.patch
@ -0,0 +1,37 @@
+From 01cff3f986259d661103412a20745928c727326f Mon Sep 17 00:00:00 2001
+From: Mihai Maruseac <mihaimaruseac@google.com>
+Date: Mon, 2 Aug 2021 13:33:05 -0700
+Subject: [PATCH] Fix heap OOB due to dimension mismatch in
+ `ResourceScatterUpdate`
+
+PiperOrigin-RevId: 388292801
+Change-Id: Id9bd7244d98d41b1517d4771850b32782c0cc949
+---
+ tensorflow/core/kernels/resource_variable_ops.cc | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+diff --git a/tensorflow/core/kernels/resource_variable_ops.cc b/tensorflow/core/kernels/resource_variable_ops.cc
+index b9c883c7..1c4ebb46 100644
+--- a/tensorflow/core/kernels/resource_variable_ops.cc
+++ b/tensorflow/core/kernels/resource_variable_ops.cc
+@@ -926,11 +926,12 @@ class ResourceScatterUpdateOp : public OpKernel {
+                         params->dim_size(0), ")"));
+       } else {
+         int64 num_updates = updates.NumElements();
+-        OP_REQUIRES(c, num_updates % N == 0,
+-                    errors::InvalidArgument(
+-                        "shape of indices (", indices.shape().DebugString(),
+-                        ") is not compatible with the shape of updates (",
+-                        updates.shape().DebugString(), ")"));
+	OP_REQUIRES(
+            c, TensorShapeUtils::StartsWith(updates.shape(), indices.shape()),
+	     errors::InvalidArgument(
+		 "The shape of indices (", indices.shape().DebugString(),
+		 ") must be a prefix of the shape of updates (",
+		 updates.shape().DebugString(), ")"));
+         auto updates_flat = updates.shaped<T, 2>({N, num_updates / N});
+ 
+         functor::ScatterFunctor<Device, T, Index, op> functor;
+-- 
+2.27.0
+
--- a/CVE-2021-37657.patch
+++ b/CVE-2021-37657.patch
@ -0,0 +1,35 @@
+From f2a673bd34f0d64b8e40a551ac78989d16daad09 Mon Sep 17 00:00:00 2001
+From: Mihai Maruseac <mihaimaruseac@google.com>
+Date: Fri, 30 Jul 2021 19:00:00 -0700
+Subject: [PATCH] Add missing validation to `matrix_diag_op.cc`
+
+PiperOrigin-RevId: 387923533
+Change-Id: Idfffeb328d5f9c6748d992d28a56d6e9e45103a0
+---
+ tensorflow/core/kernels/matrix_diag_op.cc | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/tensorflow/core/kernels/matrix_diag_op.cc b/tensorflow/core/kernels/matrix_diag_op.cc
+index 2fd361e715b9b..b61dbe96a5d6a 100644
+--- a/tensorflow/core/kernels/matrix_diag_op.cc
+++ b/tensorflow/core/kernels/matrix_diag_op.cc
+@@ -73,6 +73,9 @@ class MatrixDiagPartOp : public OpKernel {
+                   errors::InvalidArgument(
+                       "diag_index must be a scalar or vector, received shape: ",
+                       diag_index.shape().DebugString()));
+      OP_REQUIRES(context, diag_index.NumElements() > 0,
+                  errors::InvalidArgument(
+                      "Expected diag_index to have at least 1 element"));
+       lower_diag_index = diag_index.flat<int32>()(0);
+       upper_diag_index = lower_diag_index;
+       if (TensorShapeUtils::IsVector(diag_index.shape())) {
+@@ -179,6 +182,9 @@ class MatrixDiagOp : public OpKernel {
+                   errors::InvalidArgument(
+                       "diag_index must be a scalar or vector, received shape: ",
+                       diag_index.shape().DebugString()));
+      OP_REQUIRES(context, diag_index.NumElements() > 0,
+                  errors::InvalidArgument(
+                      "Expected diag_index to have at least 1 element"));
+       lower_diag_index = diag_index.flat<int32>()(0);
+       upper_diag_index = lower_diag_index;
+       if (TensorShapeUtils::IsVector(diag_index.shape())) {
--- a/CVE-2021-37658.patch
+++ b/CVE-2021-37658.patch
@ -0,0 +1,25 @@
+From ff8894044dfae5568ecbf2ed514c1a37dc394f1b Mon Sep 17 00:00:00 2001
+From: Mihai Maruseac <mihaimaruseac@google.com>
+Date: Fri, 30 Jul 2021 18:58:29 -0700
+Subject: [PATCH] Add one missing valdiation to `matrix_set_diag_op.cc`
+
+PiperOrigin-RevId: 387923408
+Change-Id: If6a97b9098c13879400f56c22f91555cdf0ce5d7
+---
+ tensorflow/core/kernels/matrix_set_diag_op.cc | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/tensorflow/core/kernels/matrix_set_diag_op.cc b/tensorflow/core/kernels/matrix_set_diag_op.cc
+index 07b6e69de67dc..4e89433718b46 100644
+--- a/tensorflow/core/kernels/matrix_set_diag_op.cc
+++ b/tensorflow/core/kernels/matrix_set_diag_op.cc
+@@ -70,6 +70,9 @@ class MatrixSetDiagOp : public OpKernel {
+                   errors::InvalidArgument(
+                       "diag_index must be a scalar or vector, received shape: ",
+                       diag_index.shape().DebugString()));
+      OP_REQUIRES(
+          context, diag_index.NumElements() > 0,
+         errors::InvalidArgument("diag_index must have at least one element"));
+       lower_diag_index = diag_index.flat<int32>()(0);
+       upper_diag_index = lower_diag_index;
+       if (TensorShapeUtils::IsVector(diag_index.shape())) {
--- a/CVE-2021-37661.patch
+++ b/CVE-2021-37661.patch
@ -0,0 +1,29 @@
+From 8a84f7a2b5a2b27ecf88d25bad9ac777cd2f7992 Mon Sep 17 00:00:00 2001
+From: Laura Pak <lpak@google.com>
+Date: Wed, 28 Jul 2021 15:34:04 -0700
+Subject: [PATCH] Ensure num_streams >= 0 in
+ tf.raw_ops.BoostedTreesCreateQuantileStreamResource
+
+PiperOrigin-RevId: 387452765
+Change-Id: I9990c760e177fabca6a3b9b4612ceeaeeba51495
+---
+ tensorflow/core/kernels/boosted_trees/quantile_ops.cc | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/tensorflow/core/kernels/boosted_trees/quantile_ops.cc b/tensorflow/core/kernels/boosted_trees/quantile_ops.cc
+index 0065bdd6..2a0ca6ae 100644
+--- a/tensorflow/core/kernels/boosted_trees/quantile_ops.cc
+++ b/tensorflow/core/kernels/boosted_trees/quantile_ops.cc
+@@ -116,6 +116,9 @@ class BoostedTreesCreateQuantileStreamResourceOp : public OpKernel {
+     const Tensor* num_streams_t;
+     OP_REQUIRES_OK(context, context->input(kNumStreamsName, &num_streams_t));
+     int64 num_streams = num_streams_t->scalar<int64>()();
+    OP_REQUIRES(context, num_streams >= 0,
+                errors::InvalidArgument(
+		     "Num_streams input cannot be a negative integer"));
+ 
+     auto result =
+         new QuantileStreamResource(epsilon, max_elements_, num_streams);
+-- 
+2.27.0
+
--- a/CVE-2021-37662-1.patch
+++ b/CVE-2021-37662-1.patch
@ -0,0 +1,98 @@
+From 429f009d2b2c09028647dd4bb7b3f6f414bbaad7 Mon Sep 17 00:00:00 2001
+From: Mihai Maruseac <mihaimaruseac@google.com>
+Date: Wed, 28 Jul 2021 13:25:18 -0700
+Subject: [PATCH] Add remaining missing validation to
+ `BoostedTreesCalculateBestFeatureSplit`
+
+PiperOrigin-RevId: 387423006
+Change-Id: I8eaf30efb223011519e60707bfa751b275d3a443
+---
+ .../core/kernels/boosted_trees/stats_ops.cc   | 20 ++++++++++++++++++-
+ 1 file changed, 19 insertions(+), 1 deletion(-)
+
+diff --git a/tensorflow/core/kernels/boosted_trees/stats_ops.cc b/tensorflow/core/kernels/boosted_trees/stats_ops.cc
+index 851e5b78..b26ab49e 100644
+--- a/tensorflow/core/kernels/boosted_trees/stats_ops.cc
+++ b/tensorflow/core/kernels/boosted_trees/stats_ops.cc
+@@ -14,6 +14,7 @@ limitations under the License.
+ ==============================================================================*/
+ 
+ #include <limits>
+#include <string>
+ #include <vector>
+ 
+ #include "third_party/eigen3/Eigen/Core"
+@@ -22,6 +23,7 @@ limitations under the License.
+ #include "tensorflow/core/framework/tensor_shape.h"
+ #include "tensorflow/core/kernels/boosted_trees/boosted_trees.pb.h"
+ #include "tensorflow/core/kernels/boosted_trees/tree_helper.h"
+#include "tensorflow/core/platform/errors.h"
+ #include "tensorflow/core/platform/logging.h"
+ 
+ namespace tensorflow {
+@@ -244,12 +246,18 @@ class BoostedTreesCalculateBestFeatureSplitOp : public OpKernel {
+     // node_id_range
+     const Tensor* node_id_range_t;
+     OP_REQUIRES_OK(context, context->input("node_id_range", &node_id_range_t));
+    OP_REQUIRES(
+        context, node_id_range_t->NumElements() == 2,
+	errors::InvalidArgument("node_id_range argument must have shape [2]"));
+     const auto node_id_range = node_id_range_t->vec<int32>();
+     const int32 node_id_first = node_id_range(0);  // inclusive
+     const int32 node_id_last = node_id_range(1);   // exclusive
+ 
+     const Tensor* stats_summary_t;
+     OP_REQUIRES_OK(context, context->input("stats_summary", &stats_summary_t));
+    OP_REQUIRES(
+        context, stats_summary_t->shape().dims() == 4,
+	errors::InvalidArgument("stats_summary argument must have rank 4"));
+     TTypes<float, 4>::ConstTensor stats_summary =
+         stats_summary_t->tensor<float, 4>();
+     const int32 feature_dims = stats_summary_t->dim_size(1);
+@@ -262,6 +270,8 @@ class BoostedTreesCalculateBestFeatureSplitOp : public OpKernel {
+ 
+     const Tensor* l1_t;
+     OP_REQUIRES_OK(context, context->input("l1", &l1_t));
+    OP_REQUIRES(context, l1_t->NumElements() == 1,
+                errors::InvalidArgument("l1 argument must be a scalar"));
+     const auto l1 = l1_t->scalar<float>()();
+     DCHECK_GE(l1, 0);
+     if (logits_dim_ > 1) {
+@@ -271,17 +281,25 @@ class BoostedTreesCalculateBestFeatureSplitOp : public OpKernel {
+ 
+     const Tensor* l2_t;
+     OP_REQUIRES_OK(context, context->input("l2", &l2_t));
+    OP_REQUIRES(context, l2_t->NumElements() == 1,
+                errors::InvalidArgument("l2 argument must be a scalar"));
+     const auto l2 = l2_t->scalar<float>()();
+     DCHECK_GE(l2, 0);
+ 
+     const Tensor* tree_complexity_t;
+     OP_REQUIRES_OK(context,
+                    context->input("tree_complexity", &tree_complexity_t));
+    OP_REQUIRES(
+        context, tree_complexity_t->NumElements() == 1,
+       errors::InvalidArgument("tree_complexity argument must be a scalar"));
+     const auto tree_complexity = tree_complexity_t->scalar<float>()();
+ 
+     const Tensor* min_node_weight_t;
+     OP_REQUIRES_OK(context,
+                    context->input("min_node_weight", &min_node_weight_t));
+    OP_REQUIRES(
+        context, min_node_weight_t->NumElements() == 1,
+       errors::InvalidArgument("min_node_weight argument must be a scalar"));
+     const auto min_node_weight = min_node_weight_t->scalar<float>()();
+ 
+     std::vector<int32> output_node_ids;
+@@ -290,7 +308,7 @@ class BoostedTreesCalculateBestFeatureSplitOp : public OpKernel {
+     std::vector<int32> output_thresholds;
+     std::vector<Eigen::VectorXf> output_left_node_contribs;
+     std::vector<Eigen::VectorXf> output_right_node_contribs;
+-    std::vector<string> output_split_types;
+    std::vector<std::string> output_split_types;
+ 
+     // TODO(tanzheny) parallelize the computation.
+     // Iterate each node and find the best gain per node.
+-- 
+2.27.0
+
--- a/CVE-2021-37662-2.patch
+++ b/CVE-2021-37662-2.patch
@ -0,0 +1,54 @@
+From 9c87c32c710d0b5b53dc6fd3bfde4046e1f7a5ad Mon Sep 17 00:00:00 2001
+From: Laura Pak <lpak@google.com>
+Date: Tue, 27 Jul 2021 12:11:33 -0700
+Subject: [PATCH] Disallow empty node_id_range in
+ tf.raw_ops.BoostedTreesCalculateBestFeatureSplitV2 and
+ tf.raw_ops.BoostedTreesCalculateBestGainsPerFeature
+
+PiperOrigin-RevId: 387165936
+Change-Id: I2f70341af96236b2776c2a592c917d549c1fc1e2
+---
+ .../core/kernels/boosted_trees/stats_ops.cc   | 20 +++++++++++++++++++
+ 1 file changed, 20 insertions(+)
+
+diff --git a/tensorflow/core/kernels/boosted_trees/stats_ops.cc b/tensorflow/core/kernels/boosted_trees/stats_ops.cc
+index 73e5b85f..48ace868 100644
+--- a/tensorflow/core/kernels/boosted_trees/stats_ops.cc
+++ b/tensorflow/core/kernels/boosted_trees/stats_ops.cc
+@@ -53,6 +53,16 @@ class BoostedTreesCalculateBestGainsPerFeatureOp : public OpKernel {
+     // node_id_range
+     const Tensor* node_id_range_t;
+     OP_REQUIRES_OK(context, context->input("node_id_range", &node_id_range_t));
+    OP_REQUIRES(
+        context, node_id_range_t->dims() == 1,
+	errors::InvalidArgument("node_id_range must be a rank 1 tensor, but "
+                                "given node_id_range has dims of ",
+				node_id_range_t->dims()));
+    OP_REQUIRES(context, node_id_range_t->dim_size(0) == 2,
+                errors::InvalidArgument(
+                    "node_id_range must be a rank 1 tensor with shape=[2], but "
+		    "given node_id_range has shape ",
+		    node_id_range_t->dim_size(0), " on its first dim"));
+     const auto node_id_range = node_id_range_t->vec<int32>();
+     const int32 node_id_first = node_id_range(0);  // inclusive
+     const int32 node_id_last = node_id_range(1);   // exclusive
+@@ -586,6 +596,16 @@ class BoostedTreesCalculateBestFeatureSplitV2 : public OpKernel {
+     const Tensor* node_id_range_t;
+     OP_REQUIRES_OK(context, context->input("node_id_range", &node_id_range_t));
+     const auto node_id_range = node_id_range_t->vec<int32>();
+    OP_REQUIRES(
+        context, node_id_range_t->dims() == 1,
+        errors::InvalidArgument("node_id_range must be a rank 1 tensor, but "
+                                "given node_id_range has dim of ",
+			        node_id_range_t->dims()));
+    OP_REQUIRES(context, node_id_range_t->dim_size(0) == 2,
+                errors::InvalidArgument(
+		    "node_id_range must be a rank 1 tensor with shape=[2], but "
+	            "given node_id_range has shape ",
+	            node_id_range_t->dim_size(0), " on its first dim"));	    
+     const int32 node_id_first = node_id_range(0);  // Inclusive.
+     const int32 node_id_last = node_id_range(1);   // Exclusive.
+ 
+-- 
+2.27.0
+
--- a/CVE-2021-37664.patch
+++ b/CVE-2021-37664.patch
@ -0,0 +1,34 @@
+From e84c975313e8e8e38bb2ea118196369c45c51378 Mon Sep 17 00:00:00 2001
+From: Laura Pak <lpak@google.com>
+Date: Tue, 27 Jul 2021 12:35:03 -0700
+Subject: [PATCH] In tf.raw_ops.BoostedTreesSparseCalculateBestFeatureSplit,
+ limit stat_dim in stats_summary_indices to under stats_dims in
+ stats_summary_shape
+
+PiperOrigin-RevId: 387171191
+Change-Id: I83ca8a75b22aa78c037e8b98779da6cced16bfaa
+---
+ tensorflow/core/kernels/boosted_trees/stats_ops.cc | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/tensorflow/core/kernels/boosted_trees/stats_ops.cc b/tensorflow/core/kernels/boosted_trees/stats_ops.cc
+index 851e5b78..032bbfdf 100644
+--- a/tensorflow/core/kernels/boosted_trees/stats_ops.cc
+++ b/tensorflow/core/kernels/boosted_trees/stats_ops.cc
+@@ -1025,6 +1025,13 @@ class BoostedTreesSparseCalculateBestFeatureSplitOp : public OpKernel {
+       const int32 feature_dim = stats_summary_indices(idx, 1);
+       const int32 bucket_id = stats_summary_indices(idx, 2);
+       const int32 stat_dim = stats_summary_indices(idx, 3);
+      OP_REQUIRES(context, stat_dim < stats_dims,
+		  errors::InvalidArgument(
+		      "Stat dim, the sum of logits dim and hessian dim in "
+		      "stats_summary_indices, cannot be greater than stats "
+		      "dims, the last value in stats_summary_shape, which was ",
+		      stats_dims, ". At index (", idx,
+		      ", 4), stats_summary_indices contains value ", stat_dim));
+       std::pair<FeatureMapIterator, bool> const& f_insert_result = f_map.insert(
+           FeatureMapIterator::value_type(feature_dim, BucketMap()));
+       auto& b_map = f_insert_result.first->second;
+-- 
+2.27.0
+
--- a/CVE-2021-37665-1.patch
+++ b/CVE-2021-37665-1.patch
@ -0,0 +1,76 @@
+From 203214568f5bc237603dbab6e1fd389f1572f5c9 Mon Sep 17 00:00:00 2001
+From: Mihai Maruseac <mihaimaruseac@google.com>
+Date: Fri, 30 Jul 2021 16:06:23 -0700
+Subject: [PATCH] Reorganize and add more validation to MKL requantization
+
+PiperOrigin-RevId: 387901341
+Change-Id: I2515b9034c64e113db0bcec8337d30643ab0a0f1
+---
+ .../mkl_requantize_per_channel_op.cc      | 40 ++++++++++++-------
+ 1 file changed, 25 insertions(+), 15 deletions(-)
+
+diff --git a/tensorflow/core/kernels/mkl_requantize_per_channel_op.cc b/tensorflow/core/kernels/mkl_requantize_per_channel_op.cc
+index c0f9845cd4b08..6ffbd09b44f54 100644
+--- a/tensorflow/core/kernels/mkl_requantize_per_channel_op.cc
+++ b/tensorflow/core/kernels/mkl_requantize_per_channel_op.cc
+@@ -49,35 +49,45 @@ class MklRequantizePerChannelOp : public OpKernel {
+   void Compute(OpKernelContext* ctx) override {
+     try {
+       const Tensor& input = ctx->input(kInputTensorIndex);
+      OP_REQUIRES(
+          ctx, input.dims() == 4,
+          errors::InvalidArgument("Current RequantizePerChannel operator"
+                                  "supports 4D tensors only."));
+
+       const Tensor& input_min_vec = ctx->input(kInputMinVecIndex);
+      size_t depth = input_min_vec.NumElements();
+       float* input_min_vec_data = (float*)const_cast<void*>(
+           static_cast<const void*>(input_min_vec.flat<float>().data()));
+
+       const Tensor& input_max_vec = ctx->input(kInputMaxVecIndex);
+      OP_REQUIRES(
+          ctx, input_max_vec.NumElements() == depth,
+          errors::InvalidArgument("input_max has incorrect size, expected ",
+                                  depth, " was ", input_max_vec.NumElements()));
+       float* input_max_vec_data = (float*)const_cast<void*>(
+           static_cast<const void*>(input_max_vec.flat<float>().data()));
+ 
+       const Tensor& input_requested_min = ctx->input(this->kRequestMinIndex);
+      OP_REQUIRES(
+          ctx, input_requested_min.NumElements() == 1,
+          errors::InvalidArgument("requested_output_min must be a scalar"));
+       const float input_requested_min_float =
+           input_requested_min.flat<float>()(0);
+
+       const Tensor& input_requested_max = ctx->input(this->kRequestMaxIndex);
+      OP_REQUIRES(
+          ctx, input_requested_min.NumElements() == 1,
+          errors::InvalidArgument("requested_output_max must be a scalar"));
+       const float input_requested_max_float =
+           input_requested_max.flat<float>()(0);
+ 
+-      size_t depth = input_min_vec.NumElements();
+-      OP_REQUIRES(
+-          ctx, input.dims() == 4,
+-          errors::InvalidArgument("Current RequantizePerChannel operator"
+-                                  "supports 4D tensors only."));
+-      OP_REQUIRES(
+-          ctx, input_min_vec.dim_size(0) == depth,
+-          errors::InvalidArgument("input_min has incorrect size, expected ",
+-                                  depth, " was ", input_min_vec.dim_size(0)));
+-      OP_REQUIRES(
+-          ctx, input_max_vec.dim_size(0) == depth,
+-          errors::InvalidArgument("input_max has incorrect size, expected ",
+-                                  depth, " was ", input_max_vec.dim_size(0)));
+-
+-      if (out_type_ == DT_QINT8) DCHECK(input_requested_min_float < 0.0f);
+      if (out_type_ == DT_QINT8) {
+        OP_REQUIRES(ctx, input_requested_min_float < 0.0f,
+                    errors::InvalidArgument(
+                        "If out_type is QINT8, requested_output_max must be "
+                        "non negative, got ",
+                        input_requested_min_float));
+      }
+ 
+       const float factor = (out_type_ == DT_QINT8) ? 127.0f : 255.0f;
+       const float requested_min_max =
--- a/CVE-2021-37665-2.patch
+++ b/CVE-2021-37665-2.patch
@ -0,0 +1,36 @@
+From 9e62869465573cb2d9b5053f1fa02a81fce21d69 Mon Sep 17 00:00:00 2001
+From: Mihai Maruseac <mihaimaruseac@google.com>
+Date: Thu, 29 Jul 2021 16:29:20 -0700
+Subject: [PATCH] Add more validation to `RequantizationRangePerChannel`.
+
+PiperOrigin-RevId: 387693946
+Change-Id: Ife8dcbdb021bec4787eef6a4361dd08f17c14bd6
+---
+ .../mkl_requantization_range_per_channel_op.cc | 14 ++++++++++++++
+ 1 file changed, 14 insertions(+)
+
+diff --git a/tensorflow/core/kernels/mkl_requantization_range_per_channel_op.cc b/tensorflow/core/kernels/mkl_requantization_range_per_channel_op.cc
+index 24dabb07ca067..a38df2450d194 100644
+--- a/tensorflow/core/kernels/mkl_requantization_range_per_channel_op.cc
+++ b/tensorflow/core/kernels/mkl_requantization_range_per_channel_op.cc
+@@ -57,6 +57,20 @@ class MklRequantizationRangePerChannelOp : public OpKernel {
+         ctx, input_max.dim_size(0) == depth,
+         errors::InvalidArgument("input_max has incorrect size, expected ",
+                                 depth, " was ", input_max.dim_size(0)));
+    OP_REQUIRES(
+        ctx, input_min.NumElements() == depth,
+        errors::InvalidArgument("input_min must have the same number of "
+                                "elements as input_max, got ",
+                                input_min.NumElements(), " and ", depth));
+    OP_REQUIRES(ctx, input.NumElements() > 0,
+                errors::InvalidArgument("input must not be empty"));
+    OP_REQUIRES(ctx, input.dims() == 4,
+                errors::InvalidArgument("input must be in NHWC format"));
+    OP_REQUIRES(
+        ctx, input.dim_size(3) == depth,
+        errors::InvalidArgument(
+            "input must have same number of channels as length of input_min: ",
+            input.dim_size(3), " vs ", depth));
+ 
+     const float* input_min_data = input_min.flat<float>().data();
+     const float* input_max_data = input_max.flat<float>().data();
--- a/CVE-2021-37666.patch
+++ b/CVE-2021-37666.patch
@ -0,0 +1,32 @@
+From be7a4de6adfbd303ce08be4332554dff70362612 Mon Sep 17 00:00:00 2001
+From: Laura Pak <lpak@google.com>
+Date: Thu, 29 Jul 2021 14:05:34 -0700
+Subject: [PATCH] Ensure non-empty rt_nested_splits in
+ tf.raw_ops.RaggedTensorToVariant
+
+PiperOrigin-RevId: 387664237
+Change-Id: Ia1700c34b5610873d63561abc86e23b46ead93b3
+---
+ tensorflow/core/kernels/ragged_tensor_to_variant_op.cc | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/tensorflow/core/kernels/ragged_tensor_to_variant_op.cc b/tensorflow/core/kernels/ragged_tensor_to_variant_op.cc
+index 7a5ae1c6..3190534b 100644
+--- a/tensorflow/core/kernels/ragged_tensor_to_variant_op.cc
+++ b/tensorflow/core/kernels/ragged_tensor_to_variant_op.cc
+@@ -173,6 +173,12 @@ class RaggedTensorToVariantOp : public OpKernel {
+       return;
+     }
+ 
+    // Checked here instead of at input in case batched_input_ is false
+    OP_REQUIRES(context, ragged_nested_splits_len > 0,
+		errors::InvalidArgument(
+		    "rt_nested_splits must be a list of one or more, but "
+		    "received rt_nested_splits of length 0."));
+
+     // Unbatch the Ragged Tensor and encode the components.
+     std::vector<RaggedTensor> ragged_components;
+     OP_REQUIRES_OK(context, UnbatchRaggedZerothDim<VALUE_TYPE, SPLIT_TYPE>(
+-- 
+2.27.0
+
--- a/CVE-2021-37668.patch
+++ b/CVE-2021-37668.patch
@ -0,0 +1,48 @@
+From a776040a5e7ebf76eeb7eb923bf1ae417dd4d233 Mon Sep 17 00:00:00 2001
+From: Laura Pak <lpak@google.com>
+Date: Mon, 12 Jul 2021 11:55:27 -0700
+Subject: [PATCH] Disallow dims input of 0 in tf.raw_ops.UnravelIndex
+
+PiperOrigin-RevId: 384284198
+Change-Id: Ia1804ef1aec57b4d857ea507e6891bcccde18e9b
+---
+ tensorflow/core/kernels/unravel_index_op.cc      | 9 +++++++++
+ tensorflow/python/kernel_tests/array_ops_test.py | 2 +-
+ 2 files changed, 10 insertions(+), 1 deletion(-)
+
+diff --git a/tensorflow/core/kernels/unravel_index_op.cc b/tensorflow/core/kernels/unravel_index_op.cc
+index b45ff5e5..fff4527d 100644
+--- a/tensorflow/core/kernels/unravel_index_op.cc
+++ b/tensorflow/core/kernels/unravel_index_op.cc
+@@ -53,6 +53,15 @@ class UnravelIndexOp : public OpKernel {
+                                 dims_tensor.shape().DebugString(), "\""));
+ 
+     auto dims = dims_tensor.vec<Tidx>();
+    // Make sure dims does not contain a zero
+    for (int i = 0; i < dims.size(); i++) {
+      OP_REQUIRES(
+	  ctx, dims(i) != 0,
+	  errors::InvalidArgument("Input dims cannot contain a dim of zero, "
+		                  "but dims contains zero at index ",
+				  i));
+
+    } 
+ 
+     // Chek to make sure indices is not out of boundary
+     Eigen::Tensor<Tidx, 0, Eigen::RowMajor> dims_prod_eigen = dims.prod();
+diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py
+index dbff3a1b..6ce6a9e6 100644
+--- a/tensorflow/python/kernel_tests/array_ops_test.py
+++ b/tensorflow/python/kernel_tests/array_ops_test.py
+@@ -1441,7 +1441,7 @@ class UnravelIndexTest(test_util.TensorFlowTestCase):
+     with self.cached_session():
+       for dtype in [dtypes.int32, dtypes.int64]:
+         with self.assertRaisesRegexp(errors.InvalidArgumentError,
+-                                     "index is out of bound as with dims"):
+                                     "dims cannot contain a dim of zero"):
+           indices = constant_op.constant([2, 5, 7], dtype=dtype)
+           dims = constant_op.constant([3, 0], dtype=dtype)
+           self.evaluate(array_ops.unravel_index(indices=indices, dims=dims))
+-- 
+2.27.0
+
--- a/CVE-2021-37669-1.patch
+++ b/CVE-2021-37669-1.patch
@ -0,0 +1,24 @@
+From b5cdbf12ffcaaffecf98f22a6be5a64bb96e4f58 Mon Sep 17 00:00:00 2001
+From: Mihai Maruseac <mihaimaruseac@google.com>
+Date: Thu, 29 Jul 2021 22:24:52 -0700
+Subject: [PATCH] Prevent overflow due to integer conversion to unsigned.
+
+PiperOrigin-RevId: 387738045
+Change-Id: Id7e95bc07e02df1c66b72bd09f389608c87bdebe
+---
+ tensorflow/core/kernels/non_max_suppression_op.cc | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/tensorflow/core/kernels/non_max_suppression_op.cc b/tensorflow/core/kernels/non_max_suppression_op.cc
+index 5cb721ed7105f..69b05cc9d84f8 100644
+--- a/tensorflow/core/kernels/non_max_suppression_op.cc
+++ b/tensorflow/core/kernels/non_max_suppression_op.cc
+@@ -921,6 +921,8 @@ class CombinedNonMaxSuppressionOp : public OpKernel {
+         errors::InvalidArgument("max_size_per_class must be 0-D, got shape ",
+                                 max_output_size.shape().DebugString()));
+     const int max_size_per_class = max_output_size.scalar<int>()();
+    OP_REQUIRES(context, max_size_per_class > 0,
+                errors::InvalidArgument("max_size_per_class must be positive"));
+     // max_total_size: scalar
+     const Tensor& max_total_size = context->input(3);
+     OP_REQUIRES(
--- a/CVE-2021-37669-2.patch
+++ b/CVE-2021-37669-2.patch
@ -0,0 +1,45 @@
+From 3a7362750d5c372420aa8f0caf7bf5b5c3d0f52d Mon Sep 17 00:00:00 2001
+From: Mihai Maruseac <mihaimaruseac@google.com>
+Date: Fri, 30 Jul 2021 22:02:22 -0700
+Subject: [PATCH] Prevent crash/heap OOB due to integer conversion to unsigned
+ in NMS kernels
+
+PiperOrigin-RevId: 387938262
+Change-Id: Id361a715307e7179977cf5c64391c199a966f2ad
+---
+ tensorflow/core/kernels/non_max_suppression_op.cc | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/tensorflow/core/kernels/non_max_suppression_op.cc b/tensorflow/core/kernels/non_max_suppression_op.cc
+index 69b05cc9d84f8..1ec4c853f5f5b 100644
+--- a/tensorflow/core/kernels/non_max_suppression_op.cc
+++ b/tensorflow/core/kernels/non_max_suppression_op.cc
+@@ -161,6 +161,8 @@ void DoNonMaxSuppressionOp(OpKernelContext* context, const Tensor& scores,
+                            bool pad_to_max_output_size = false,
+                            int* ptr_num_valid_outputs = nullptr) {
+   const int output_size = max_output_size.scalar<int>()();
+  OP_REQUIRES(context, output_size >= 0,
+              errors::InvalidArgument("output size must be non-negative"));
+ 
+   std::vector<T> scores_data(num_boxes);
+   std::copy_n(scores.flat<T>().data(), num_boxes, scores_data.begin());
+@@ -759,6 +761,9 @@ class NonMaxSuppressionV4Op : public OpKernel {
+         context, scores, num_boxes, max_output_size, iou_threshold_val,
+         score_threshold_val, dummy_soft_nms_sigma, similarity_fn,
+         return_scores_tensor_, pad_to_max_output_size_, &num_valid_outputs);
+    if (!context->status().ok()) {
+      return;
+    }
+ 
+     // Allocate scalar output tensor for number of indices computed.
+     Tensor* num_outputs_t = nullptr;
+@@ -836,6 +841,9 @@ class NonMaxSuppressionV5Op : public OpKernel {
+         context, scores, num_boxes, max_output_size, iou_threshold_val,
+         score_threshold_val, soft_nms_sigma_val, similarity_fn,
+         return_scores_tensor_, pad_to_max_output_size_, &num_valid_outputs);
+    if (!context->status().ok()) {
+      return;
+    }
+ 
+     // Allocate scalar output tensor for number of indices computed.
+     Tensor* num_outputs_t = nullptr;
--- a/CVE-2021-37674.patch
+++ b/CVE-2021-37674.patch
@ -0,0 +1,51 @@
+From 136b51f10903e044308cf77117c0ed9871350475 Mon Sep 17 00:00:00 2001
+From: Mihai Maruseac <mihaimaruseac@google.com>
+Date: Fri, 30 Jul 2021 20:50:00 -0700
+Subject: [PATCH] Add missing validation to `maxpooling_op.cc`
+
+PiperOrigin-RevId: 387932441
+Change-Id: I43a0b24e6a12cc965611144ba035accd384594b9
+---
+ tensorflow/core/kernels/maxpooling_op.cc      | 5 +++++
+ tensorflow/core/kernels/pooling_ops_common.cc | 2 ++
+ 2 files changed, 7 insertions(+)
+
+diff --git a/tensorflow/core/kernels/maxpooling_op.cc b/tensorflow/core/kernels/maxpooling_op.cc
+index 7accd1a8..27ff9005 100644
+--- a/tensorflow/core/kernels/maxpooling_op.cc
+++ b/tensorflow/core/kernels/maxpooling_op.cc
+@@ -68,6 +68,7 @@ static void SpatialMaxPoolWithArgMaxHelper(
+             "SpatialMaxPoolWithArgMaxHelper requires include_batch_in_index "
+             "to be True when when input_backprop != nullptr"));
+   }
+  if (tensor_in.NumElements() == 0 || output->NumElements() == 0) return;
+ 
+   typedef Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>
+       ConstEigenMatrixMap;
+@@ -783,6 +784,10 @@ class MaxPoolingNoMaskOp : public OpKernel {
+ 
+   void Compute(OpKernelContext* context) override {
+     const Tensor& tensor_in = context->input(0);
+    OP_REQUIRES(context, tensor_in.dims() == 4,
+		errors::InvalidArgument("tensor_in must be 4-dimensional (2)"));
+    OP_REQUIRES(context, tensor_in.NumElements() > 0,
+		errors::InvalidArgument("tensor_in must not be empty (2)"));
+ 
+     PoolParameters params{context,  ksize_,       stride_,
+                           padding_, data_format_, tensor_in.shape()};
+diff --git a/tensorflow/core/kernels/pooling_ops_common.cc b/tensorflow/core/kernels/pooling_ops_common.cc
+index 4bd71054..2af93960 100644
+--- a/tensorflow/core/kernels/pooling_ops_common.cc
+++ b/tensorflow/core/kernels/pooling_ops_common.cc
+@@ -96,6 +96,8 @@ PoolParameters::PoolParameters(OpKernelContext* context,
+     pad_depth = 0;
+     out_depth = depth;
+   } else {
+    OP_REQUIRES(context, depth_window > 0,
+		errors::InvalidArgument("depth_window must not be 0"));
+     // Our current version of depthwise max pooling does not support
+     // any padding, and expects the depth_window to equal the
+     // depth_stride (no overlapping).
+-- 
+2.27.0
+
--- a/CVE-2021-37675.patch
+++ b/CVE-2021-37675.patch
@ -0,0 +1,64 @@
+From 8a793b5d7f59e37ac7f3cd0954a750a2fe76bad4 Mon Sep 17 00:00:00 2001
+From: Mihai Maruseac <mihaimaruseac@google.com>
+Date: Thu, 29 Jul 2021 18:23:45 -0700
+Subject: [PATCH] Prevent division by 0 in common shape functions.
+
+PiperOrigin-RevId: 387712197
+Change-Id: Id25c7460e35b68aeeeac23b9a88e455b443ee149
+---
+ tensorflow/core/framework/common_shape_fns.cc | 11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+diff --git a/tensorflow/core/framework/common_shape_fns.cc b/tensorflow/core/framework/common_shape_fns.cc
+index b9efddf4..e25d5581 100644
+--- a/tensorflow/core/framework/common_shape_fns.cc
+++ b/tensorflow/core/framework/common_shape_fns.cc
+@@ -659,6 +659,8 @@ Status Conv2DShapeImpl(shape_inference::InferenceContext* c,
+   if (c->ValueKnown(input_depth_dim) && c->ValueKnown(filter_input_depth_dim)) {
+     int64 input_depth_value = c->Value(input_depth_dim),
+           filter_input_depth_value = c->Value(filter_input_depth_dim);
+    if (filter_input_depth_value == 0)
+      return errors::InvalidArgument("Depth of filter must not be 0");
+     if (input_depth_value % filter_input_depth_value != 0)
+       return errors::InvalidArgument(
+           "Depth of input (", input_depth_value,
+@@ -668,6 +670,8 @@ Status Conv2DShapeImpl(shape_inference::InferenceContext* c,
+       int64 num_groups = input_depth_value / filter_input_depth_value;
+       if (c->ValueKnown(output_depth_dim)) {
+         int64 output_depth_value = c->Value(output_depth_dim);
+	if (num_groups == 0)
+	  return errors::InvalidArgument("Number of groups must not be 0");
+         if (output_depth_value % num_groups != 0)
+           return errors::InvalidArgument(
+               "Depth of output (", output_depth_value,
+@@ -798,6 +802,8 @@ Status Conv3DShape(shape_inference::InferenceContext* c) {
+   if (c->ValueKnown(input_depth_dim) && c->ValueKnown(filter_input_depth_dim)) {
+     int64 input_depth_value = c->Value(input_depth_dim),
+           filter_input_depth_value = c->Value(filter_input_depth_dim);
+    if (filter_input_depth_value == 0)
+      return errors::InvalidArgument("Depth of filter must not be 0");
+     if (input_depth_value % filter_input_depth_value != 0)
+       return errors::InvalidArgument(
+           "Depth of input (", input_depth_value,
+@@ -807,6 +813,8 @@ Status Conv3DShape(shape_inference::InferenceContext* c) {
+       int64 num_groups = input_depth_value / filter_input_depth_value;
+       if (c->ValueKnown(output_depth_dim)) {
+         int64 output_depth_value = c->Value(output_depth_dim);
+	if (num_groups == 0)
+          return errors::InvalidArgument("Number of groups must not be 0");
+         if (output_depth_value % num_groups != 0)
+           return errors::InvalidArgument(
+               "Depth of output (", output_depth_value,
+@@ -2364,6 +2372,9 @@ Status SparseReduceShapeFn(InferenceContext* c) {
+ 
+     int64 ndims = shape_vec.size();
+     absl::flat_hash_set<int64> axes;
+    if (ndims == 0)
+      return errors::InvalidArgument(
+	  "Number of dims in shape tensor must not be 0");
+     for (int i = 0; i < axes_vec.size(); i++) {
+       axes.insert((axes_vec(i) + ndims) % ndims);
+     }
+-- 
+2.27.0
+
--- a/CVE-2021-37678.patch
+++ b/CVE-2021-37678.patch
@ -0,0 +1,179 @@
+From 23d6383eb6c14084a8fc3bdf164043b974818012 Mon Sep 17 00:00:00 2001
+From: Mihai Maruseac <mihaimaruseac@google.com>
+Date: Tue, 3 Aug 2021 11:20:20 -0700
+Subject: [PATCH] Use the safer `safe_load` function instead of `unsafe_load`
+ when possible
+
+There is no need to open ourselves up to arbitrary code execution,especially since this is not in a performance critical loop, so we can take the slowdown due to safety.
+
+PiperOrigin-RevId: 388501098
+Change-Id: I3434318a5e07a798490533b554f46752397837e5
+---
+ tensorflow/python/keras/engine/functional.py  |  2 +-
+ .../python/keras/engine/functional_test.py    | 13 -------
+ tensorflow/python/keras/engine/training.py    | 18 ++++-----
+ .../python/keras/saving/model_config.py       | 38 ++++---------------
+ 4 files changed, 17 insertions(+), 54 deletions(-)
+
+diff --git a/tensorflow/python/keras/engine/functional.py b/tensorflow/python/keras/engine/functional.py
+index fd80e7f8..b0cf778a 100644
+--- a/tensorflow/python/keras/engine/functional.py
+++ b/tensorflow/python/keras/engine/functional.py
+@@ -58,7 +58,7 @@ class Functional(training_lib.Model):
+   than with subclassed `Model`s, specifically:
+ 
+   - Model cloning (`keras.models.clone`)
+-  - Serialization (`model.get_config()/from_config`, `model.to_json()/to_yaml()`
+  - Serialization (`model.get_config()/from_config`, `model.to_json()`
+   - Whole-model saving (`model.save()`)
+ 
+   A `Functional` model can be instantiated by passing two arguments to
+diff --git a/tensorflow/python/keras/engine/functional_test.py b/tensorflow/python/keras/engine/functional_test.py
+index b60373e8..c91026a6 100644
+--- a/tensorflow/python/keras/engine/functional_test.py
+++ b/tensorflow/python/keras/engine/functional_test.py
+@@ -52,11 +52,6 @@ from tensorflow.python.ops.ragged import ragged_factory_ops
+ from tensorflow.python.platform import test
+ from tensorflow.python.training.tracking.util import Checkpoint
+ 
+-try:
+-  import yaml  # pylint:disable=g-import-not-at-top
+-except ImportError:
+-  yaml = None
+-
+ 
+ class NetworkConstructionTest(keras_parameterized.TestCase):
+ 
+@@ -620,10 +615,6 @@ class NetworkConstructionTest(keras_parameterized.TestCase):
+       json_str = model.to_json()
+       models.model_from_json(json_str)
+ 
+-      if yaml is not None:
+-        yaml_str = model.to_yaml()
+-        models.model_from_yaml(yaml_str)
+-
+   @combinations.generate(combinations.combine(mode=['graph', 'eager']))
+   def test_invalid_graphs(self):
+     a = layers.Input(shape=(32,), name='input_a')
+@@ -1261,10 +1252,6 @@ class NetworkConstructionTest(keras_parameterized.TestCase):
+     json_str = model.to_json()
+     models.model_from_json(json_str)
+ 
+-    if yaml is not None:
+-      yaml_str = model.to_yaml()
+-      models.model_from_yaml(yaml_str)
+-
+   def test_subclassed_error_if_init_not_called(self):
+ 
+     class MyNetwork(training_lib.Model):
+diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
+index a0ebec4f..e000e62f 100644
+--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
+@@ -88,11 +88,6 @@ try:
+   import h5py
+ except ImportError:
+   h5py = None
+-
+-try:
+-  import yaml
+-except ImportError:
+-  yaml = None
+ # pylint: enable=g-import-not-at-top
+ 
+ 
+@@ -2258,6 +2253,9 @@ class Model(base_layer.Layer, version_utils.ModelVersionSelector):
+   def to_yaml(self, **kwargs):
+     """Returns a yaml string containing the network configuration.
+ 
+    Note: Since TF 2.6, this method is no longer supported and will raise a
+    RuntimeError.
+
+     To load a network from a yaml save file, use
+     `keras.models.model_from_yaml(yaml_string, custom_objects={})`.
+ 
+@@ -2273,12 +2271,12 @@ class Model(base_layer.Layer, version_utils.ModelVersionSelector):
+         A YAML string.
+ 
+     Raises:
+-        ImportError: if yaml module is not found.
+        RuntimeError: announces that the method poses a security risk
+     """
+-    if yaml is None:
+-      raise ImportError(
+-          'Requires yaml module installed (`pip install pyyaml`).')
+-    return yaml.dump(self._updated_config(), **kwargs)
+    raise RuntimeError(
+        'Method `model.to_yaml()` has been removed due to security risk of '
+        'arbitrary code execution. Please use `model.to_json()` instead.'
+    )
+ 
+   def reset_states(self):
+     for layer in self.layers:
+diff --git a/tensorflow/python/keras/saving/model_config.py b/tensorflow/python/keras/saving/model_config.py
+index 63f82b40..344e543f 100644
+--- a/tensorflow/python/keras/saving/model_config.py
+++ b/tensorflow/python/keras/saving/model_config.py
+@@ -23,13 +23,6 @@ import json
+ 
+ from tensorflow.python.util.tf_export import keras_export
+ 
+-# pylint: disable=g-import-not-at-top
+-try:
+-  import yaml
+-except ImportError:
+-  yaml = None
+-# pylint: enable=g-import-not-at-top
+-
+ 
+ @keras_export('keras.models.model_from_config')
+ def model_from_config(config, custom_objects=None):
+@@ -59,17 +52,8 @@ def model_from_config(config, custom_objects=None):
+ def model_from_yaml(yaml_string, custom_objects=None):
+   """Parses a yaml model configuration file and returns a model instance.
+ 
+-  Usage:
+-
+-  >>> model = tf.keras.Sequential([
+-  ...     tf.keras.layers.Dense(5, input_shape=(3,)),
+-  ...     tf.keras.layers.Softmax()])
+-  >>> try:
+-  ...   import yaml
+-  ...   config = model.to_yaml()
+-  ...   loaded_model = tf.keras.models.model_from_yaml(config)
+-  ... except ImportError:
+-  ...   pass
+  Note: Since TF 2.6, this method is no longer supported and will raise a
+  RuntimeError.
+ 
+   Arguments:
+       yaml_string: YAML string or open file encoding a model configuration.
+@@ -81,19 +65,13 @@ def model_from_yaml(yaml_string, custom_objects=None):
+       A Keras model instance (uncompiled).
+ 
+   Raises:
+-      ImportError: if yaml module is not found.
+      RuntimeError: announces that the method poses a security risk
+   """
+-  if yaml is None:
+-    raise ImportError('Requires yaml module installed (`pip install pyyaml`).')
+-  # The method unsafe_load only exists in PyYAML 5.x+, so which branch of the
+-  # try block is covered by tests depends on the installed version of PyYAML.
+-  try:
+-    # PyYAML 5.x+
+-    config = yaml.unsafe_load(yaml_string)
+-  except AttributeError:
+-    config = yaml.load(yaml_string)
+-  from tensorflow.python.keras.layers import deserialize  # pylint: disable=g-import-not-at-top
+-  return deserialize(config, custom_objects=custom_objects)
+  raise RuntimeError(
+      'Method `model_from_yaml()` has been removed due to security risk of '
+      'arbitrary code execution. Please use `Model.to_json()` and '
+      '`model_from_json()` instead.'
+  )
+ 
+ 
+ @keras_export('keras.models.model_from_json')
+-- 
+2.27.0
+
--- a/CVE-2021-37683.patch
+++ b/CVE-2021-37683.patch
@ -0,0 +1,51 @@
+From 1e206baedf8bef0334cca3eb92bab134ef525a28 Mon Sep 17 00:00:00 2001
+From: Mihai Maruseac <mihaimaruseac@google.com>
+Date: Fri, 16 Jul 2021 14:23:21 -0700
+Subject: [PATCH] Prevent a division by 0 in division ops.
+
+PiperOrigin-RevId: 385223169
+Change-Id: Ia4228960b5d2aa44480385f74bdd70d21a3613c3
+---
+ tensorflow/lite/kernels/div.cc | 17 ++++++++++++++++-
+ 1 file changed, 16 insertions(+), 1 deletion(-)
+
+diff --git a/tensorflow/lite/kernels/div.cc b/tensorflow/lite/kernels/div.cc
+index c9eb1db5..aafe00f0 100644
+--- a/tensorflow/lite/kernels/div.cc
+++ b/tensorflow/lite/kernels/div.cc
+@@ -204,9 +204,23 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+   const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
+   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+ 
+-  if (output->type == kTfLiteFloat32 || output->type == kTfLiteInt32) {
+  // TODO(b/193904910): This can written with C++ templates
+#define TF_LITE_CHECK_DIV_NON_ZERO(data_type)                       \
+  const auto* input2_data = GetTensorData<data_type>(input2);       \
+  const size_t input2_elements = input2->bytes / sizeof(data_type); \
+  for (size_t i = 0; i < input2_elements; i++) {                    \
+    TF_LITE_ENSURE(context, input2_data[i] != 0);                   \
+  }
+
+  if (output->type == kTfLiteFloat32) {
+    // Div by zero seems ok in this case, just like in TF case infinities are
+    // returned. So we don't do a check at this point.
+    EvalDiv<kernel_type>(context, node, params, data, input1, input2, output);
+  } else if (output->type == kTfLiteInt32) {
+    TF_LITE_CHECK_DIV_NON_ZERO(int32_t);
+     EvalDiv<kernel_type>(context, node, params, data, input1, input2, output);
+   } else if (output->type == kTfLiteUInt8) {
+    TF_LITE_CHECK_DIV_NON_ZERO(uint8_t);
+     TF_LITE_ENSURE_OK(
+         context, EvalQuantized<kernel_type>(context, node, params, data, input1,
+                                             input2, output));
+@@ -217,6 +231,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+         output->type);
+     return kTfLiteError;
+   }
+#undef TF_LITE_CHECK_DIV_NON_ZERO
+ 
+   return kTfLiteOk;
+ }
+-- 
+2.27.0
+
--- a/CVE-2021-37691.patch
+++ b/CVE-2021-37691.patch
@ -0,0 +1,35 @@
+From 0575b640091680cfb70f4dd93e70658de43b94f9 Mon Sep 17 00:00:00 2001
+From: Mihai Maruseac <mihaimaruseac@google.com>
+Date: Tue, 27 Jul 2021 16:45:20 -0700
+Subject: [PATCH] Prevent division by 0 in LSH projection.
+
+PiperOrigin-RevId: 387225857
+Change-Id: Iaeb572a763618c64f503e0026f6dd9fd769bf50c
+---
+ tensorflow/lite/kernels/lsh_projection.cc | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/tensorflow/lite/kernels/lsh_projection.cc b/tensorflow/lite/kernels/lsh_projection.cc
+index b809748c..34cbf6e5 100644
+--- a/tensorflow/lite/kernels/lsh_projection.cc
+++ b/tensorflow/lite/kernels/lsh_projection.cc
+@@ -28,7 +28,7 @@ limitations under the License.
+ //
+ // Input:
+ //   Tensor[0]: Hash functions. Dim.size == 2, DataType: Float.
+-//              Tensor[0].Dim[0]: Num of hash functions.
+//              Tensor[0].Dim[0]: Num of hash functions. Must be at least 1.
+ //              Tensor[0].Dim[1]: Num of projected output bits generated by
+ //                                each hash function.
+ //   In sparse case, Tensor[0].Dim[1] + ceil( log2(Tensor[0].Dim[0] )) <= 32.
+@@ -80,6 +80,7 @@ TfLiteStatus Resize(TfLiteContext* context, TfLiteNode* node) {
+ 
+   const TfLiteTensor* input = GetInput(context, node, 1);
+   TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
+  TF_LITE_ENSURE(context, SizeOfDimension(input, 0) >= 1);
+ 
+   if (NumInputs(node) == 3) {
+     const TfLiteTensor* weight = GetInput(context, node, 2);
+-- 
+2.27.0
+
--- a/tensorflow.spec
+++ b/tensorflow.spec
@ -1,7 +1,7 @@
 %global _empty_manifest_terminate_build 0
 Name:		tensorflow
 Version:	2.3.1
-Release:	8
+Release:	9
 Summary:	An Open Source Machine Learning Framework for Everyone
 License:	Apache License 2.0
 URL:		https://www.tensorflow.org/
@ -122,6 +122,57 @@ Patch0110:      CVE-2021-37685.patch
 Patch0111:      CVE-2021-37687.patch
 Patch0112:      CVE-2021-37688.patch
 Patch0113:      CVE-2021-37689.patch
+Patch0114:      CVE-2020-15265.patch
+Patch0115:      CVE-2020-15266.patch
+Patch0116:      CVE-2021-37666.patch
+Patch0117:      CVE-2021-29517-1.patch
+Patch0118:      CVE-2021-29517-2.patch
+Patch0119:      CVE-2021-29518.patch
+Patch0120:      CVE-2021-29521.patch
+Patch0121:      CVE-2021-29533.patch
+Patch0122:      CVE-2021-29537.patch
+Patch0123:      CVE-2021-29560.patch
+Patch0124:      CVE-2021-29571.patch
+Patch0125:      CVE-2021-29583.patch
+Patch0126:      CVE-2021-29589.patch
+Patch0127:      CVE-2021-29595.patch
+Patch0128:      CVE-2021-29602.patch
+Patch0129:      CVE-2021-29604.patch
+Patch0130:      CVE-2021-29610.patch
+Patch0131:      CVE-2021-29611.patch
+Patch0132:      CVE-2021-29612-1.patch
+Patch0133:      CVE-2021-29612-2.patch
+Patch0134:      CVE-2021-29614.patch
+Patch0135:      CVE-2021-29618.patch
+Patch0136:      CVE-2021-37635.patch
+Patch0137:      CVE-2021-37640-1.patch
+Patch0138:      CVE-2021-37640-2.patch
+Patch0139:      CVE-2021-37642.patch
+Patch0140:      CVE-2021-37643.patch
+Patch0141:      CVE-2021-37651.patch
+Patch0142:      CVE-2021-37653.patch
+Patch0143:      CVE-2021-37654.patch
+Patch0144:      CVE-2021-37655.patch
+Patch0145:      CVE-2021-37657.patch
+Patch0146:      CVE-2021-37658.patch
+Patch0147:      CVE-2021-37661.patch
+Patch0148:      CVE-2021-37662-1.patch
+Patch0149:      CVE-2021-37662-2.patch
+Patch0150:      CVE-2021-37664.patch
+Patch0151:      CVE-2021-37665-1.patch
+Patch0152:      CVE-2021-37665-2.patch
+Patch0153:      CVE-2021-37668.patch
+Patch0154:      CVE-2021-37669-1.patch
+Patch0155:      CVE-2021-37669-2.patch
+Patch0156:      CVE-2021-37674.patch
+Patch0157:      CVE-2021-37675.patch
+Patch0158:      CVE-2021-37678.patch
+Patch0159:      CVE-2021-37683.patch
+Patch0160:      CVE-2021-37691.patch
+Patch0161:      CVE-2021-29526-1.patch
+Patch0162:      CVE-2021-29526-2.patch
+Patch0163:      CVE-2021-29544-1.patch
+Patch0164:      CVE-2021-29544-2.patch
 Requires:	python3-future
 Requires:	python3-numpy

@ -168,6 +219,9 @@ bazel --output_user_root=`pwd`/../output_user_root build --host_copt=-Wno-string
 %{_bindir}/*

 %changelog
+* Tue Aug 31 2021 yaoxin <yaoxin30@huawei.com> - 2.3.1-9
+- Fix CVE-2020-15265 CVE-2020-15266 CVE-2021-37666 CVE-2021-29517 CVE-2021-29518 CVE-2021-29521 CVE-2021-29533 CVE-2021-29537 CVE-2021-29560 CVE-2021-29571 CVE-2021-29583 CVE-2021-29589 CVE-2021-29595 CVE-2021-29602 CVE-2021-29604 CVE-2021-29610-to-CVE-2021-29612 CVE-2021-29614 CVE-2021-29618 CVE-2021-37635 CVE-2021-37640 CVE-2021-37642 CVE-2021-37643 CVE-2021-37651 CVE-2021-37653-to-CVE-2021-37655 CVE-2021-37657 CVE-2021-37658 CVE-2021-37661 CVE-2021-37662 CVE-2021-37664 CVE-2021-37665 CVE-2021-37668 CVE-2021-37669 CVE-2021-37674 CVE-2021-37675 CVE-2021-37678 CVE-2021-37683 CVE-2021-37691 CVE-2021-29526 CVE-2021-29544
+
 * Thu Aug 26 2021 yaoxin <yaoxin30@huawei.com> - 2.3.1-8
 - Fix CVE-2021-29512 CVE-2021-29514 CVE-2021-29519 CVE-2021-29520 CVE-2021-29522 CVE-2021-29524 CVE-2021-29527-to-CVE-2021-29530 CVE-2021-29532 CVE-2021-29536 CVE-2021-29539 CVE-2021-29541-to-CVE-2021-29543 CVE-2021-29545-to-CVE-2021-29547 CVE-2021-29549 CVE-2021-29550 CVE-2021-29552-to-CVE-2021-29559 CVE-2021-29561-to-CVE-2021-29564 CVE-2021-29567-to-CVE-2021-29570 CVE-2021-29572-to-CVE-2021-29582 CVE-2021-29585-to-CVE-2021-29588 CVE-2021-29590-to-CVE-2021-29593 CVE-2021-29596-to-CVE-2021-29601 CVE-2021-29603 CVE-2021-29605-to-CVE-2021-29609 CVE-2021-29613 CVE-2021-29615-to-CVE-2021-29617 CVE-2021-29619 CVE-2021-37637-to-CVE-2021-37639 CVE-2021-37641 CVE-2021-37644 CVE-2021-37646-to-CVE-2021-37649 CVE-2021-37652 CVE-2021-37656 CVE-2021-37659 CVE-2021-37660 CVE-2021-37663 CVE-2021-37667 CVE-2021-37670-to-CVE-2021-37673 CVE-2021-37676 CVE-2021-37677 CVE-2021-37680 CVE-2021-37682 CVE-2021-37685 CVE-2021-37687-to-CVE-2021-37689