Codebase list golang-procfs / HEAD blockdevice / stats.go
HEAD

Tree @HEAD (Download .tar.gz)

stats.go @HEADraw · history · blame

// Copyright 2018 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package blockdevice

import (
	"bufio"
	"fmt"
	"github.com/prometheus/procfs/internal/util"
	"io"
	"io/ioutil"
	"os"
	"strings"

	"github.com/prometheus/procfs/internal/fs"
)

// Info contains identifying information for a block device such as a disk drive
type Info struct {
	MajorNumber uint32
	MinorNumber uint32
	DeviceName  string
}

// IOStats models the iostats data described in the kernel documentation
// https://www.kernel.org/doc/Documentation/iostats.txt,
// https://www.kernel.org/doc/Documentation/block/stat.txt,
// and https://www.kernel.org/doc/Documentation/ABI/testing/procfs-diskstats
type IOStats struct {
	// ReadIOs is the number of reads completed successfully.
	ReadIOs uint64
	// ReadMerges is the number of reads merged.  Reads and writes
	// which are adjacent to each other may be merged for efficiency.
	ReadMerges uint64
	// ReadSectors is the total number of sectors read successfully.
	ReadSectors uint64
	// ReadTicks is the total number of milliseconds spent by all reads.
	ReadTicks uint64
	// WriteIOs is the total number of writes completed successfully.
	WriteIOs uint64
	// WriteMerges is the number of reads merged.
	WriteMerges uint64
	// WriteSectors is the total number of sectors written successfully.
	WriteSectors uint64
	// WriteTicks is the total number of milliseconds spent by all writes.
	WriteTicks uint64
	// IOsInProgress is number of I/Os currently in progress.
	IOsInProgress uint64
	// IOsTotalTicks is the number of milliseconds spent doing I/Os.
	// This field increases so long as IosInProgress is nonzero.
	IOsTotalTicks uint64
	// WeightedIOTicks is the weighted number of milliseconds spent doing I/Os.
	// This can also be used to estimate average queue wait time for requests.
	WeightedIOTicks uint64
	// DiscardIOs is the total number of discards completed successfully.
	DiscardIOs uint64
	// DiscardMerges is the number of discards merged.
	DiscardMerges uint64
	// DiscardSectors is the total number of sectors discarded successfully.
	DiscardSectors uint64
	// DiscardTicks is the total number of milliseconds spent by all discards.
	DiscardTicks uint64
	// FlushRequestsCompleted is the total number of flush request completed successfully.
	FlushRequestsCompleted uint64
	// TimeSpentFlushing is the total number of milliseconds spent flushing.
	TimeSpentFlushing uint64
}

// Diskstats combines the device Info and IOStats
type Diskstats struct {
	Info
	IOStats
	// IoStatsCount contains the number of io stats read. For kernel versions 5.5+,
	// there should be 20 fields read. For kernel versions 4.18+,
	// there should be 18 fields read. For earlier kernel versions this
	// will be 14 because the discard values are not available.
	IoStatsCount int
}

// BlockQueueStats models the queue files that are located in the sysfs tree for each block device
// and described in the kernel documentation:
// https://www.kernel.org/doc/Documentation/block/queue-sysfs.txt
// https://www.kernel.org/doc/html/latest/block/queue-sysfs.html
type BlockQueueStats struct {
	// AddRandom is the status of a disk entropy (1 is on, 0 is off).
	AddRandom uint64
	// Dax indicates whether the device supports Direct Access (DAX) (1 is on, 0 is off).
	DAX uint64
	// DiscardGranularity is the size of internal allocation of the device in bytes, 0 means device
	// does not support the discard functionality.
	DiscardGranularity uint64
	// DiscardMaxHWBytes is the hardware maximum number of bytes that can be discarded in a single operation,
	// 0 means device does not support the discard functionality.
	DiscardMaxHWBytes uint64
	// DiscardMaxBytes is the software maximum number of bytes that can be discarded in a single operation.
	DiscardMaxBytes uint64
	// HWSectorSize is the sector size of the device, in bytes.
	HWSectorSize uint64
	// IOPoll indicates if polling is enabled (1 is on, 0 is off).
	IOPoll uint64
	// IOPollDelay indicates how polling will be performed, -1 for classic polling, 0 for hybrid polling,
	// with greater than 0 the kernel will put process issuing IO to sleep for this amount of time in
	// microseconds before entering classic polling.
	IOPollDelay int64
	// IOTimeout is the request timeout in milliseconds.
	IOTimeout uint64
	// IOStats indicates if iostats accounting is used for the disk (1 is on, 0 is off).
	IOStats uint64
	// LogicalBlockSize is the logical block size of the device, in bytes.
	LogicalBlockSize uint64
	// MaxHWSectorsKB is the maximum number of kilobytes supported in a single data transfer.
	MaxHWSectorsKB uint64
	// MaxIntegritySegments is the max limit of integrity segments as set by block layer which a hardware controller
	// can handle.
	MaxIntegritySegments uint64
	// MaxSectorsKB is the maximum number of kilobytes that the block layer will allow for a filesystem request.
	MaxSectorsKB uint64
	// MaxSegments is the number of segments on the device.
	MaxSegments uint64
	// MaxSegmentsSize is the maximum segment size of the device.
	MaxSegmentSize uint64
	// MinimumIOSize is the smallest preferred IO size reported by the device.
	MinimumIOSize uint64
	// NoMerges shows the lookup logic involved with IO merging requests in the block layer. 0 all merges are
	// enabled, 1 only simple one hit merges are tried, 2 no merge algorithms will be tried.
	NoMerges uint64
	// NRRequests is the number of how many requests may be allocated in the block layer for read or write requests.
	NRRequests uint64
	// OptimalIOSize is the optimal IO size reported by the device.
	OptimalIOSize uint64
	// PhysicalBlockSize is the physical block size of device, in bytes.
	PhysicalBlockSize uint64
	// ReadAHeadKB is the maximum number of kilobytes to read-ahead for filesystems on this block device.
	ReadAHeadKB uint64
	// Rotational indicates if the device is of rotational type or non-rotational type.
	Rotational uint64
	// RQAffinity indicates affinity policy of device, if 1 the block layer will migrate request completions to the
	// cpu “group” that originally submitted the request, if 2 forces the completion to run on the requesting cpu.
	RQAffinity uint64
	// SchedulerList contains list of available schedulers for this block device.
	SchedulerList []string
	// SchedulerCurrent is the current scheduler for this block device.
	SchedulerCurrent string
	// WriteCache shows the type of cache for block device, "write back" or "write through".
	WriteCache string
	// WriteSameMaxBytes is the number of bytes the device can write in a single write-same command.
	// A value of ‘0’ means write-same is not supported by this device.
	WriteSameMaxBytes uint64
	// WBTLatUSec is the target minimum read latency, 0 means feature is disables.
	WBTLatUSec int64
	// ThrottleSampleTime is the time window that blk-throttle samples data, in millisecond. Optional
	// exists only if CONFIG_BLK_DEV_THROTTLING_LOW is enabled.
	ThrottleSampleTime *uint64
	// Zoned indicates if the device is a zoned block device and the zone model of the device if it is indeed zoned.
	// Possible values are: none, host-aware, host-managed for zoned block devices.
	Zoned string
	// NRZones indicates the total number of zones of the device, always zero for regular block devices.
	NRZones uint64
	// ChunksSectors for RAID is the size in 512B sectors of the RAID volume stripe segment,
	// for zoned host device is the size in 512B sectors.
	ChunkSectors uint64
	// FUA indicates whether the device supports Force Unit Access for write requests.
	FUA uint64
	// MaxDiscardSegments is the maximum number of DMA entries in a discard request.
	MaxDiscardSegments uint64
	// WriteZeroesMaxBytes the maximum number of bytes that can be zeroed at once.
	// The value 0 means that REQ_OP_WRITE_ZEROES is not supported.
	WriteZeroesMaxBytes uint64
}

const (
	procDiskstatsPath   = "diskstats"
	procDiskstatsFormat = "%d %d %s %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d"
	sysBlockPath        = "block"
	sysBlockStatFormat  = "%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d"
	sysBlockQueue       = "queue"
)

// FS represents the pseudo-filesystems proc and sys, which provides an
// interface to kernel data structures.
type FS struct {
	proc *fs.FS
	sys  *fs.FS
}

// NewDefaultFS returns a new blockdevice fs using the default mountPoints for proc and sys.
// It will error if either of these mount points can't be read.
func NewDefaultFS() (FS, error) {
	return NewFS(fs.DefaultProcMountPoint, fs.DefaultSysMountPoint)
}

// NewFS returns a new blockdevice fs using the given mountPoints for proc and sys.
// It will error if either of these mount points can't be read.
func NewFS(procMountPoint string, sysMountPoint string) (FS, error) {
	if strings.TrimSpace(procMountPoint) == "" {
		procMountPoint = fs.DefaultProcMountPoint
	}
	procfs, err := fs.NewFS(procMountPoint)
	if err != nil {
		return FS{}, err
	}
	if strings.TrimSpace(sysMountPoint) == "" {
		sysMountPoint = fs.DefaultSysMountPoint
	}
	sysfs, err := fs.NewFS(sysMountPoint)
	if err != nil {
		return FS{}, err
	}
	return FS{&procfs, &sysfs}, nil
}

// ProcDiskstats reads the diskstats file and returns
// an array of Diskstats (one per line/device)
func (fs FS) ProcDiskstats() ([]Diskstats, error) {
	file, err := os.Open(fs.proc.Path(procDiskstatsPath))
	if err != nil {
		return nil, err
	}
	defer file.Close()

	diskstats := []Diskstats{}
	scanner := bufio.NewScanner(file)
	for scanner.Scan() {
		d := &Diskstats{}
		d.IoStatsCount, err = fmt.Sscanf(scanner.Text(), procDiskstatsFormat,
			&d.MajorNumber,
			&d.MinorNumber,
			&d.DeviceName,
			&d.ReadIOs,
			&d.ReadMerges,
			&d.ReadSectors,
			&d.ReadTicks,
			&d.WriteIOs,
			&d.WriteMerges,
			&d.WriteSectors,
			&d.WriteTicks,
			&d.IOsInProgress,
			&d.IOsTotalTicks,
			&d.WeightedIOTicks,
			&d.DiscardIOs,
			&d.DiscardMerges,
			&d.DiscardSectors,
			&d.DiscardTicks,
			&d.FlushRequestsCompleted,
			&d.TimeSpentFlushing,
		)
		// The io.EOF error can be safely ignored because it just means we read fewer than
		// the full 20 fields.
		if err != nil && err != io.EOF {
			return diskstats, err
		}
		if d.IoStatsCount >= 14 {
			diskstats = append(diskstats, *d)
		}
	}
	return diskstats, scanner.Err()
}

// SysBlockDevices lists the device names from /sys/block/<dev>
func (fs FS) SysBlockDevices() ([]string, error) {
	deviceDirs, err := ioutil.ReadDir(fs.sys.Path(sysBlockPath))
	if err != nil {
		return nil, err
	}
	devices := []string{}
	for _, deviceDir := range deviceDirs {
		if deviceDir.IsDir() {
			devices = append(devices, deviceDir.Name())
		}
	}
	return devices, nil
}

// SysBlockDeviceStat returns stats for the block device read from /sys/block/<device>/stat.
// The number of stats read will be 15 if the discard stats are available (kernel 4.18+)
// and 11 if they are not available.
func (fs FS) SysBlockDeviceStat(device string) (IOStats, int, error) {
	stat := IOStats{}
	bytes, err := ioutil.ReadFile(fs.sys.Path(sysBlockPath, device, "stat"))
	if err != nil {
		return stat, 0, err
	}
	count, err := fmt.Sscanf(strings.TrimSpace(string(bytes)), sysBlockStatFormat,
		&stat.ReadIOs,
		&stat.ReadMerges,
		&stat.ReadSectors,
		&stat.ReadTicks,
		&stat.WriteIOs,
		&stat.WriteMerges,
		&stat.WriteSectors,
		&stat.WriteTicks,
		&stat.IOsInProgress,
		&stat.IOsTotalTicks,
		&stat.WeightedIOTicks,
		&stat.DiscardIOs,
		&stat.DiscardMerges,
		&stat.DiscardSectors,
		&stat.DiscardTicks,
	)
	// An io.EOF error is ignored because it just means we read fewer than the full 15 fields.
	if err == io.EOF {
		return stat, count, nil
	}
	return stat, count, err
}

// SysBlockDeviceQueueStats returns stats for /sys/block/xxx/queue where xxx is a device name.
func (fs FS) SysBlockDeviceQueueStats(device string) (BlockQueueStats, error) {
	stat := BlockQueueStats{}
	// files with uint64 fields
	for file, p := range map[string]*uint64{
		"add_random":             &stat.AddRandom,
		"dax":                    &stat.DAX,
		"discard_granularity":    &stat.DiscardGranularity,
		"discard_max_hw_bytes":   &stat.DiscardMaxHWBytes,
		"discard_max_bytes":      &stat.DiscardMaxBytes,
		"hw_sector_size":         &stat.HWSectorSize,
		"io_poll":                &stat.IOPoll,
		"io_timeout":             &stat.IOTimeout,
		"iostats":                &stat.IOStats,
		"logical_block_size":     &stat.LogicalBlockSize,
		"max_hw_sectors_kb":      &stat.MaxHWSectorsKB,
		"max_integrity_segments": &stat.MaxIntegritySegments,
		"max_sectors_kb":         &stat.MaxSectorsKB,
		"max_segments":           &stat.MaxSegments,
		"max_segment_size":       &stat.MaxSegmentSize,
		"minimum_io_size":        &stat.MinimumIOSize,
		"nomerges":               &stat.NoMerges,
		"nr_requests":            &stat.NRRequests,
		"optimal_io_size":        &stat.OptimalIOSize,
		"physical_block_size":    &stat.PhysicalBlockSize,
		"read_ahead_kb":          &stat.ReadAHeadKB,
		"rotational":             &stat.Rotational,
		"rq_affinity":            &stat.RQAffinity,
		"write_same_max_bytes":   &stat.WriteSameMaxBytes,
		"nr_zones":               &stat.NRZones,
		"chunk_sectors":          &stat.ChunkSectors,
		"fua":                    &stat.FUA,
		"max_discard_segments":   &stat.MaxDiscardSegments,
		"write_zeroes_max_bytes": &stat.WriteZeroesMaxBytes,
	} {
		val, err := util.ReadUintFromFile(fs.sys.Path(sysBlockPath, device, sysBlockQueue, file))
		if err != nil {
			return BlockQueueStats{}, err
		}
		*p = val
	}
	// files with int64 fields
	for file, p := range map[string]*int64{
		"io_poll_delay": &stat.IOPollDelay,
		"wbt_lat_usec":  &stat.WBTLatUSec,
	} {
		val, err := util.ReadIntFromFile(fs.sys.Path(sysBlockPath, device, sysBlockQueue, file))
		if err != nil {
			return BlockQueueStats{}, err
		}
		*p = val
	}
	// files with string fields
	for file, p := range map[string]*string{
		"write_cache": &stat.WriteCache,
		"zoned":       &stat.Zoned,
	} {
		val, err := util.SysReadFile(fs.sys.Path(sysBlockPath, device, sysBlockQueue, file))
		if err != nil {
			return BlockQueueStats{}, err
		}
		*p = val
	}
	scheduler, err := util.SysReadFile(fs.sys.Path(sysBlockPath, device, sysBlockQueue, "scheduler"))
	if err != nil {
		return BlockQueueStats{}, err
	}
	var schedulers []string
	xs := strings.Split(scheduler, " ")
	for _, s := range xs {
		if strings.HasPrefix(s, "[") && strings.HasSuffix(s, "]") {
			s = s[1 : len(s)-1]
			stat.SchedulerCurrent = s
		}
		schedulers = append(schedulers, s)
	}
	stat.SchedulerList = schedulers
	// optional
	throttleSampleTime, err := util.ReadUintFromFile(fs.sys.Path(sysBlockPath, device, sysBlockQueue, "throttle_sample_time"))
	if err == nil {
		stat.ThrottleSampleTime = &throttleSampleTime
	}
	return stat, nil
}