From 29ec95f1a0ad2d27cf5262aab96ad8f60722a246 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Wed, 7 Jan 2015 17:15:17 -0500 Subject: [PATCH] devicemapper: trim and resize from alexl Largely bringing in the Trim and resize logic by @alexlarsson and https://github.com/docker/docker/pull/4202 Signed-off-by: Vincent Batts --- daemon/graphdriver/devmapper/deviceset.go | 191 ++++++++++++++++++++++ pkg/devicemapper/devmapper.go | 8 +- pkg/devicemapper/metadata.go | 167 +++++++++++++++++++ pkg/devicemapper/ranges.go | 101 ++++++++++++ pkg/devicemapper/ranges_test.go | 54 ++++++ 5 files changed, 520 insertions(+), 1 deletion(-) create mode 100644 pkg/devicemapper/metadata.go create mode 100644 pkg/devicemapper/ranges.go create mode 100644 pkg/devicemapper/ranges_test.go diff --git a/daemon/graphdriver/devmapper/deviceset.go b/daemon/graphdriver/devmapper/deviceset.go index 078e31a1ecbb2..b9452c57db914 100644 --- a/daemon/graphdriver/devmapper/deviceset.go +++ b/daemon/graphdriver/devmapper/deviceset.go @@ -728,7 +728,102 @@ func minor(device uint64) uint64 { return (device & 0xff) | ((device >> 12) & 0xfff00) } +// TODO(vbatts) copied from https://github.com/docker/docker/pull/4202/files but need to ensure the logic stays sane when path is a block, not loopdevice +func (devices *DeviceSet) getBlockDevice(name string) (*os.File, error) { + dirname := devices.loopbackDir() + filename := path.Join(dirname, name) + + file, err := os.OpenFile(filename, os.O_RDWR, 0) + if file == nil { + return nil, err + } + defer file.Close() + + loopback := devicemapper.FindLoopDeviceFor(file) + if loopback == nil { + return nil, fmt.Errorf("Unable to find loopback mount for: %s", filename) + } + return loopback, nil +} + +// TrimPool discard blocks from the unused portions of this DeviceSet +func (devices *DeviceSet) TrimPool() error { + devices.Lock() + defer devices.Unlock() + + totalSizeInSectors, _, _, dataTotal, _, _, err := devices.poolStatus() + if err != nil { + return err + } + blockSizeInSectors := totalSizeInSectors / dataTotal + SectorSize := blockSizeInSectors * 512 + + data, err := devices.getBlockDevice("data") + if err != nil { + return err + } + defer data.Close() + + dataSize, err := devicemapper.GetBlockDeviceSize(data) + if err != nil { + return err + } + + metadata, err := devices.getBlockDevice("metadata") + if err != nil { + return err + } + defer metadata.Close() + + // Suspend the pool so the metadata doesn't change and new blocks + // are not loaded + if err := devicemapper.SuspendDevice(devices.getPoolName()); err != nil { + return fmt.Errorf("Unable to suspend pool: %s", err) + } + + // Just in case, make sure everything is on disk + syscall.Sync() + + ranges, err := devicemapper.ReadMetadataRanges(metadata.Name()) + if err != nil { + devicemapper.ResumeDevice(devices.getPoolName()) + return err + } + + lastEnd := uint64(0) + + for e := ranges.Front(); e != nil; e = e.Next() { + r := e.Value.(*devicemapper.Range) + // Convert to bytes + rBegin := r.Begin() * SectorSize + rEnd := r.End() * SectorSize + + if rBegin > lastEnd { + if err := devicemapper.BlockDeviceDiscardFile(data, lastEnd, rBegin-lastEnd); err != nil { + return fmt.Errorf("Failing do discard block, leaving pool suspended: %v", err) + } + } + lastEnd = rEnd + } + + if dataSize > lastEnd { + if err := devicemapper.BlockDeviceDiscardFile(data, lastEnd, dataSize-lastEnd); err != nil { + return fmt.Errorf("Failing do discard block, leaving pool suspended: %v", err) + } + } + + // Resume the pool + if err := devicemapper.ResumeDevice(devices.getPoolName()); err != nil { + return fmt.Errorf("Unable to resume pool: %s", err) + } + + return nil +} + func (devices *DeviceSet) ResizePool(size int64) error { + devices.Lock() + defer devices.Unlock() + dirname := devices.loopbackDir() datafilename := path.Join(dirname, "data") if len(devices.dataDevice) > 0 { @@ -800,6 +895,102 @@ func (devices *DeviceSet) ResizePool(size int64) error { return nil } +// ResizeDevice of hash to provided size +func (devices *DeviceSet) ResizeDevice(hash string, size int64) error { + info, err := devices.lookupDevice(hash) + if err != nil { + return err + } + + info.lock.Lock() + defer info.lock.Unlock() + + if size < 0 || info.Size > uint64(size) { + return fmt.Errorf("Can't shrink devices") + } + + devices.Lock() + defer devices.Unlock() + + devinfo, err := devicemapper.GetInfo(info.Name()) + if info == nil { + return err + } + + if devinfo.OpenCount != 0 { + return fmt.Errorf("Device in use") + } + + if devinfo.Exists != 0 { + if err := devices.deactivateDevice(info); err != nil { + return err + } + } + oldSize := info.Size + info.Size = uint64(size) + + if err := devices.saveMetadata(info); err != nil { + info.Size = oldSize + return err + } + + // Activate with new size + if err := devices.activateDeviceIfNeeded(info); err != nil { + return err + } + + fstype, err := ProbeFsType(info.DevName()) + if err != nil { + return err + } + + switch fstype { + case "xfs": + dir, err := ioutil.TempDir(devices.root, "resizemnt") + if err != nil { + return err + } + + defer os.Remove(dir) + + err = syscall.Mount(info.DevName(), dir, "xfs", syscall.MS_MGC_VAL, "nouuid") + if err != nil { + return err + } + + err = exec.Command("xfs_growfs", dir).Run() + if err != nil { + syscall.Unmount(dir, 0) + return fmt.Errorf("xfs_growfs failed: %v", err) + } + + err = syscall.Unmount(dir, 0) + if err != nil { + return err + } + + case "ext4": + err = exec.Command("e2fsck", "-f", "-y", info.DevName()).Run() + if err != nil { + return fmt.Errorf("e2fsck failed: %v", err) + } + + err = exec.Command("resize2fs", info.DevName()).Run() + if err != nil { + return fmt.Errorf("resizee2fs failed: %v", err) + } + + default: + return fmt.Errorf("Unsupported filesystem %s", fstype) + } + + if err := devices.deactivateDevice(info); err != nil { + return err + } + + return nil +} + func (devices *DeviceSet) loadTransactionMetaData() error { jsonData, err := ioutil.ReadFile(devices.transactionMetaFile()) if err != nil { diff --git a/pkg/devicemapper/devmapper.go b/pkg/devicemapper/devmapper.go index c23a3624db828..5babccd9f24ba 100644 --- a/pkg/devicemapper/devmapper.go +++ b/pkg/devicemapper/devmapper.go @@ -354,6 +354,7 @@ func GetBlockDeviceSize(file *os.File) (uint64, error) { return uint64(size), nil } +// BlockDeviceDiscard will discard _all_ blocks from the device at path func BlockDeviceDiscard(path string) error { file, err := os.OpenFile(path, os.O_RDWR, 0) if err != nil { @@ -366,7 +367,7 @@ func BlockDeviceDiscard(path string) error { return err } - if err := ioctlBlkDiscard(file.Fd(), 0, size); err != nil { + if err := BlockDeviceDiscardFile(file, 0, size); err != nil { return err } @@ -377,6 +378,11 @@ func BlockDeviceDiscard(path string) error { return nil } +// BlockDeviceDiscardFile allows specifying the offset and length of blocks to discard from file +func BlockDeviceDiscardFile(file *os.File, offset, length uint64) error { + return ioctlBlkDiscard(file.Fd(), offset, length) +} + // This is the programmatic example of "dmsetup create" func CreatePool(poolName string, dataFile, metadataFile *os.File, poolBlockSize uint32) error { task, err := TaskCreateNamed(DeviceCreate, poolName) diff --git a/pkg/devicemapper/metadata.go b/pkg/devicemapper/metadata.go new file mode 100644 index 0000000000000..2434cb775dd90 --- /dev/null +++ b/pkg/devicemapper/metadata.go @@ -0,0 +1,167 @@ +// +build linux,amd64 + +package devicemapper + +import ( + "encoding/xml" + "fmt" + "io" + "os/exec" + "strconv" +) + +type MetadataDecoder struct { + d *xml.Decoder + ranges *Ranges +} + +func NewMetadataDecoder(reader io.Reader) *MetadataDecoder { + m := &MetadataDecoder{ + d: xml.NewDecoder(reader), + ranges: NewRanges(), + } + + return m +} + +func (m *MetadataDecoder) parseRange(start *xml.StartElement) error { + var begin, length uint64 + var err error + for _, attr := range start.Attr { + switch attr.Name.Local { + case "data_begin": + begin, err = strconv.ParseUint(attr.Value, 10, 64) + if err != nil { + return err + } + case "length": + length, err = strconv.ParseUint(attr.Value, 10, 64) + if err != nil { + return err + } + } + } + + m.ranges.Add(begin, begin+length) + + m.d.Skip() + return nil +} + +func (m *MetadataDecoder) parseSingle(start *xml.StartElement) error { + for _, attr := range start.Attr { + switch attr.Name.Local { + case "data_block": + block, err := strconv.ParseUint(attr.Value, 10, 64) + if err != nil { + return err + } + m.ranges.Add(block, block+1) + } + } + + m.d.Skip() + + return nil +} + +func (m *MetadataDecoder) parseDevice(start *xml.StartElement) error { + for { + tok, err := m.d.Token() + if err != nil { + return err + } + switch tok := tok.(type) { + case xml.StartElement: + switch tok.Name.Local { + case "range_mapping": + if err := m.parseRange(&tok); err != nil { + return err + } + case "single_mapping": + if err := m.parseSingle(&tok); err != nil { + return err + } + default: + return fmt.Errorf("Unknown tag type %s\n", tok.Name) + } + case xml.EndElement: + return nil + } + } +} + +func (m *MetadataDecoder) readStart() (*xml.StartElement, error) { + for { + tok, err := m.d.Token() + if err != nil { + return nil, err + } + + switch tok := tok.(type) { + case xml.StartElement: + return &tok, nil + + case xml.EndElement: + return nil, fmt.Errorf("Unbalanced tags") + } + } +} + +func (m *MetadataDecoder) parseMetadata() error { + start, err := m.readStart() + if err != nil { + return err + } + if start.Name.Local != "superblock" { + return fmt.Errorf("Unexpected tag type %s", start.Name) + } + + for { + tok, err := m.d.Token() + if err != nil { + return err + } + switch tok := tok.(type) { + case xml.StartElement: + switch tok.Name.Local { + case "device": + m.parseDevice(&tok) + default: + return fmt.Errorf("Unknown tag type %s\n", tok.Name) + } + case xml.EndElement: + return nil + } + } +} + +// ReadMetadataRanges shells out to `thin_dump` to collect thin provisioning metadata about file +func ReadMetadataRanges(file string) (*Ranges, error) { + cmd := exec.Command("thin_dump", file) + + stdout, err := cmd.StdoutPipe() + if err != nil { + return nil, err + } + + m := NewMetadataDecoder(stdout) + + errChan := make(chan error) + + go func() { + err = m.parseMetadata() + errChan <- err + }() + + if err := cmd.Run(); err != nil { + return nil, err + } + + err = <-errChan + if err != nil { + return nil, err + } + + return m.ranges, nil +} diff --git a/pkg/devicemapper/ranges.go b/pkg/devicemapper/ranges.go new file mode 100644 index 0000000000000..dcb0d9aa9300f --- /dev/null +++ b/pkg/devicemapper/ranges.go @@ -0,0 +1,101 @@ +// +build linux,amd64 + +package devicemapper + +import ( + "container/list" + "fmt" +) + +type Range struct { + begin uint64 + end uint64 +} + +func (r Range) Begin() uint64 { return r.begin } +func (r Range) End() uint64 { return r.end } + +type Ranges struct { + *list.List +} + +func NewRanges() *Ranges { + return &Ranges{list.New()} +} + +func (r *Ranges) ToString() string { + s := "" + for e := r.Front(); e != nil; e = e.Next() { + r := e.Value.(*Range) + if s != "" { + s = s + "," + } + s = fmt.Sprintf("%s%d-%d", s, r.begin, r.end) + } + return s +} + +func (r *Ranges) Clear() { + r.Init() +} + +func (r *Ranges) Add(begin, end uint64) { + var next *list.Element + for e := r.Front(); e != nil; e = next { + next = e.Next() + + existing := e.Value.(*Range) + + // If existing range is fully to the left, skip + if existing.end < begin { + continue + } + + // If new range is fully to the left, just insert + if end < existing.begin { + r.InsertBefore(&Range{begin, end}, e) + return + } + + // Now we know the two ranges somehow intersect (or at least touch) + + // Extend existing range with the new range + if begin < existing.begin { + existing.begin = begin + } + + // If the new range is completely covered by existing range, we're done + if end <= existing.end { + return + } + + // Otherwise strip r from new range + begin = existing.end + + // We're now touching r at the end, and so we need to either extend r + // or merge with next + + if next == nil { + // Nothing after, extend + existing.end = end + return + } + + nextR := next.Value.(*Range) + if end < nextR.begin { + // Fits, Just extend + existing.end = end + return + } + + // The new region overlaps the next, merge the two + nextR.begin = existing.begin + r.Remove(e) + } + + // nothing in list or everything to the left, just append the rest + if begin < end { + r.PushBack(&Range{begin, end}) + return + } +} diff --git a/pkg/devicemapper/ranges_test.go b/pkg/devicemapper/ranges_test.go new file mode 100644 index 0000000000000..ea20c8138cd23 --- /dev/null +++ b/pkg/devicemapper/ranges_test.go @@ -0,0 +1,54 @@ +// +build linux,amd64 + +package devicemapper + +import ( + "fmt" + "testing" +) + +func assert(t *testing.T, r *Ranges, res string) { + s := r.ToString() + if s != res { + t.Fatalf(fmt.Sprintf("error: got %s, expecting %s\n", s, res)) + } +} + +func TestRanges(t *testing.T) { + r := NewRanges() + assert(t, r, "") + r.Clear() + assert(t, r, "") + r.Add(5, 6) + assert(t, r, "5-6") + r.Add(5, 6) + assert(t, r, "5-6") + r.Add(5, 7) + assert(t, r, "5-7") + r.Add(7, 8) + assert(t, r, "5-8") + r.Add(4, 6) + assert(t, r, "4-8") + r.Add(5, 6) + assert(t, r, "4-8") + r.Add(3, 4) + assert(t, r, "3-8") + r.Add(1, 2) + assert(t, r, "1-2,3-8") + r.Add(15, 20) + assert(t, r, "1-2,3-8,15-20") + r.Add(30, 40) + assert(t, r, "1-2,3-8,15-20,30-40") + r.Add(8, 9) + assert(t, r, "1-2,3-9,15-20,30-40") + r.Add(8, 10) + assert(t, r, "1-2,3-10,15-20,30-40") + r.Add(8, 25) + assert(t, r, "1-2,3-25,30-40") + r.Add(0, 27) + assert(t, r, "0-27,30-40") + r.Add(29, 41) + assert(t, r, "0-27,29-41") + r.Add(27, 29) + assert(t, r, "0-41") +}