docker/patch/0033-devmapper-Fix-devicemapper-issue-power-off-th.patch
2019-09-30 10:37:25 -04:00

225 lines
7.7 KiB
Diff

From 72f9e0c4d0d907a036d25c9dfeea9c0baddddc3e Mon Sep 17 00:00:00 2001
From: jingrui <jingrui@huawei.com>
Date: Thu, 3 Jan 2019 10:37:50 +0800
Subject: [PATCH 033/111] devmapper: Fix devicemapper issue: power off
the VM while loading a image, couldn't load it after VM bootup
reason: cherry-pick commits to docker-18.09
cherry-pick from 28991928c4 | * Fix devicemapper issue: power off the VM while
loading a image, couldn't load it after VM bootup
Issue Description:
While running loading image test, power off or restart the VM, and then
there are
some chances that we can not load the image. And "Error running
deviceCreate (createSnapDevice) dm_task_run failed" will be reported.
Reproduce Steps:
> 1. run `docker load -i xxx.tar`
> 2. virsh restart VM; # restart the VM.
> 3. After startup, run `docker load -i xxx.tar`, will fail to import
> the image
Analysis:
From syslog, we found that docker was executing "Umount Device" then VM
powered restart. And found
two failure reasons:
> 1. Rollback operation only remove the device on DM thin pool, not
> remove the device in memory
> 2. TransactionData or metadata not flushed to Disk.
Solution:
> 1. Rollback operation for DM, should remove the devices cache in DM
> driver.
> 2. When restore layers and images, check whether the device exists in
> graphdriver.
If desen't, remove the layer( and the metadata) and do not load the
image.
Issue link:
#203
Signed-off-by: Wentao Zhang <zhangwentao234@huawei.com>
Conflicts:
image/store.go
layer/layer_store.go
Change-Id: If3bfbf0d0ed8f950cfd5934fc25fac892481275c
Signed-off-by: jingrui <jingrui@huawei.com>
---
.../daemon/graphdriver/devmapper/deviceset.go | 38 +++++++------------
components/engine/image/store.go | 3 +-
components/engine/layer/layer_store.go | 15 +++++++-
3 files changed, 28 insertions(+), 28 deletions(-)
diff --git a/components/engine/daemon/graphdriver/devmapper/deviceset.go b/components/engine/daemon/graphdriver/devmapper/deviceset.go
index af53cf83e6..0675b2eacd 100644
--- a/components/engine/daemon/graphdriver/devmapper/deviceset.go
+++ b/components/engine/daemon/graphdriver/devmapper/deviceset.go
@@ -6,7 +6,6 @@ import (
"bufio"
"encoding/json"
"fmt"
- "io"
"io/ioutil"
"os"
"os/exec"
@@ -23,6 +22,7 @@ import (
"github.com/docker/docker/pkg/devicemapper"
"github.com/docker/docker/pkg/dmesg"
"github.com/docker/docker/pkg/idtools"
+ "github.com/docker/docker/pkg/ioutils"
"github.com/docker/docker/pkg/loopback"
"github.com/docker/docker/pkg/mount"
"github.com/docker/docker/pkg/parsers"
@@ -306,6 +306,10 @@ func (devices *DeviceSet) ensureImage(name string, size int64) (string, error) {
return filename, nil
}
+func (devices *DeviceSet) removeDeviceMap(hash string) {
+ delete(devices.Devices, hash)
+}
+
func (devices *DeviceSet) allocateTransactionID() uint64 {
devices.OpenTransactionID = devices.TransactionID + 1
return devices.OpenTransactionID
@@ -328,28 +332,9 @@ func (devices *DeviceSet) removeMetadata(info *devInfo) error {
// Given json data and file path, write it to disk
func (devices *DeviceSet) writeMetaFile(jsonData []byte, filePath string) error {
- tmpFile, err := ioutil.TempFile(devices.metadataDir(), ".tmp")
- if err != nil {
- return fmt.Errorf("devmapper: Error creating metadata file: %s", err)
- }
-
- n, err := tmpFile.Write(jsonData)
- if err != nil {
- return fmt.Errorf("devmapper: Error writing metadata to %s: %s", tmpFile.Name(), err)
- }
- if n < len(jsonData) {
- return io.ErrShortWrite
+ if err := ioutils.AtomicWriteFile(filePath, jsonData, 0600); err != nil {
+ return fmt.Errorf("devmapper: Error writing metadata to %s: %s", filePath, err)
}
- if err := tmpFile.Sync(); err != nil {
- return fmt.Errorf("devmapper: Error syncing metadata file %s: %s", tmpFile.Name(), err)
- }
- if err := tmpFile.Close(); err != nil {
- return fmt.Errorf("devmapper: Error closing metadata file %s: %s", tmpFile.Name(), err)
- }
- if err := os.Rename(tmpFile.Name(), filePath); err != nil {
- return fmt.Errorf("devmapper: Error committing metadata file %s: %s", tmpFile.Name(), err)
- }
-
return nil
}
@@ -483,7 +468,7 @@ func (devices *DeviceSet) unregisterDevice(hash string) error {
Hash: hash,
}
- delete(devices.Devices, hash)
+ devices.removeDeviceMap(hash)
if err := devices.removeMetadata(info); err != nil {
logrus.WithField("storage-driver", "devicemapper").Debugf("Error removing metadata: %s", err)
@@ -509,7 +494,7 @@ func (devices *DeviceSet) registerDevice(id int, hash string, size uint64, trans
if err := devices.saveMetadata(info); err != nil {
// Try to remove unused device
- delete(devices.Devices, hash)
+ devices.removeDeviceMap(hash)
return nil, err
}
@@ -1173,7 +1158,7 @@ func (devices *DeviceSet) checkGrowBaseDeviceFS(info *devInfo) error {
if err := devices.saveMetadata(info); err != nil {
// Try to remove unused device
- delete(devices.Devices, info.Hash)
+ devices.removeDeviceMap(info.Hash)
return err
}
@@ -1401,6 +1386,8 @@ func (devices *DeviceSet) rollbackTransaction() error {
devices.markDeviceIDFree(devices.DeviceID)
}
+ devices.removeDeviceMap(dinfo.Hash)
+
if err := devices.removeTransactionMetaData(); err != nil {
logger.Errorf("Unable to remove transaction meta file %s: %s", devices.transactionMetaFile(), err)
}
@@ -1482,6 +1469,7 @@ func (devices *DeviceSet) closeTransaction() error {
logrus.WithField("storage-driver", "devicemapper").Debug("Failed to close Transaction")
return err
}
+
return nil
}
diff --git a/components/engine/image/store.go b/components/engine/image/store.go
index 1a8a8a2451..b078a2627a 100644
--- a/components/engine/image/store.go
+++ b/components/engine/image/store.go
@@ -81,8 +81,9 @@ func (is *store) restore() error {
}
l, err = is.lss[img.OperatingSystem()].Get(chainID)
if err != nil {
+ logrus.Errorf("layer does not exist, not restoring image %v, %v, %s", dgst, chainID, img.OperatingSystem())
+ // If the layer doesn't exist, return nil to ignore this image.
if err == layer.ErrLayerDoesNotExist {
- logrus.Errorf("layer does not exist, not restoring image %v, %v, %s", dgst, chainID, img.OperatingSystem())
return nil
}
return err
diff --git a/components/engine/layer/layer_store.go b/components/engine/layer/layer_store.go
index 6a568e9d9b..351f787b87 100644
--- a/components/engine/layer/layer_store.go
+++ b/components/engine/layer/layer_store.go
@@ -105,7 +105,7 @@ func newStoreFromGraphDriver(root string, driver graphdriver.Driver, os string)
for _, id := range ids {
l, err := ls.loadLayer(id)
if err != nil {
- logrus.Debugf("Failed to load layer %s: %s", id, err)
+ logrus.Warnf("Failed to load layer %s: %s", id, err)
continue
}
if l.parent != nil {
@@ -126,11 +126,17 @@ func (ls *layerStore) Driver() graphdriver.Driver {
return ls.driver
}
-func (ls *layerStore) loadLayer(layer ChainID) (*roLayer, error) {
+func (ls *layerStore) loadLayer(layer ChainID) (l *roLayer, err error) {
cl, ok := ls.layerMap[layer]
if ok {
return cl, nil
}
+ defer func() {
+ // If failed to load the layer, remove the layer metadata.
+ if err != nil {
+ ls.store.Remove(layer)
+ }
+ }()
diff, err := ls.store.GetDiffID(layer)
if err != nil {
@@ -147,6 +153,11 @@ func (ls *layerStore) loadLayer(layer ChainID) (*roLayer, error) {
return nil, fmt.Errorf("failed to get cache id for %s: %s", layer, err)
}
+ // Check whether the layer exists in graphdriver here.
+ if exist := ls.driver.Exists(cacheID); !exist {
+ return nil, fmt.Errorf("cacheID %s for layer %s does not exists in graphdriver", cacheID, layer)
+ }
+
parent, err := ls.store.GetParent(layer)
if err != nil {
return nil, fmt.Errorf("failed to get parent for %s: %s", layer, err)
--
2.17.1