#!/usr/bin/python3 # -*- coding: utf-8 -*- # Copyright (c) 2019 Huawei Technologies Co., Ltd. # A-Tune is licensed under the Mulan PSL v1. # You can use this software according to the terms and conditions of the Mulan PSL v1. # You may obtain a copy of Mulan PSL v1 at: # http://license.coscl.org.cn/MulanPSL # THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR # PURPOSE. # See the Mulan PSL v1 for more details. # Create: 2019-10-29 """ Parse the string of CPUs to a list of CPUs. """ from __future__ import print_function import re from . import base _CPU_PATTERNS = {"single": re.compile(r"^\s*(\d+)\s*$"), "range": re.compile(r"^\s*(\d+)\s*-\s*(\d+)\s*$"), "exclude": re.compile(r"^\s*\^(\d+)\s*$")} def _parse_cpu_str(cpu_str): """Parse the string of CPUs to a list of CPUs. @param cpu_str: the string of CPUs to parse @return: a list of string which represents CPU. """ matches = [{key: pattern.match(sub_list) for key, pattern in _CPU_PATTERNS.items()} for sub_list in cpu_str.split(',')] includes = set() excludes = set() for match in matches: if match['single'] is not None: includes.add(int(match['single'].group(1))) elif match['range'] is not None: # in "start-end" format, both `start` and `end` are included includes.update(range(int(match['range'].group(1)), int(match['range'].group(2)) + 1)) elif match['exclude'] is not None: excludes.add(int(match['exclude'].group(1))) else: raise ValueError("Unknown cpu str format `{}`".format(cpu_str)) cpu_list = list(includes - excludes) cpu_list.sort() return map(str, cpu_list) def _get_available_cpus(): """Get the avaiable CPUs. @return: a list of string which represents CPU. """ with open('/sys/devices/system/cpu/possible', 'r') as possible_fd: possible = possible_fd.read() return _parse_cpu_str(possible) class MpstatParser(base.Parser): """The parser to parse the output of mpstat""" def __init__(self, raw_data_file, data_to_collect, **kwargs): """Initialize a mpstat parser. @param raw_data_file: the path of raw data @param data_to_collect: list of str which represents the metrics to parse @param dev_list: list of devices of which metrics whille be collectted @param alias: alias name of output fields (default: "mpstat") """ base.Parser.__init__(self, raw_data_file, data_to_collect, **kwargs) self._dev_list = kwargs.get("dev_list", None) if self._dev_list == "all": self._dev_list = ["all"] elif self._dev_list == "ALL": self._dev_list = ["all"] self._dev_list.extend(_get_available_cpus()) else: self._dev_list = _parse_cpu_str(self._dev_list) self._check_dev() def _check_data_to_collect(self): """Read the first batch output of mpstat and check whether or not all metrics in data_to_collect are in the output. If not, it will raise ValueError. """ with open(self._raw_data_file, 'r') as raw_data_fd: raw_data_fd.readline() raw_data_fd.readline() line_part = raw_data_fd.readline().split() cpu_idx = line_part.index("CPU") datas = line_part[cpu_idx + 1:] diff_set = set(self._data_to_collect) - set(datas) if diff_set: raise ValueError("`{}`: Unknown data name `{}`".format(self._raw_data_file, ','.join(diff_set))) def _check_dev(self): """Read the first batch output of mpstat and check whether or not all devices in dev_list are in the output. If not, it will raise ValueError. """ if not self._dev_list: raise ValueError("You must assigned at least one device") devs = set() with open(self._raw_data_file, 'r') as raw_data_fd: raw_data_fd.readline() raw_data_fd.readline() cpu_idx = raw_data_fd.readline().split().index("CPU") for line in raw_data_fd: if not line.strip(): continue dev = line.split()[cpu_idx] if dev in devs: break else: devs.add(dev) if "CPU" in devs: devs.remove("CPU") diff_set = set(self._dev_list) - devs if diff_set: raise ValueError("Can not find block device `{}`".format(','.join(diff_set))) def _get_iter(self): """Get the iteration of the mpstat parser. @return: the iteration of the mpstat parser """ data = {} attrs = [] with open(self._raw_data_file, 'r') as raw_data_fd: raw_data_fd.readline() for row_num, line in enumerate(raw_data_fd, 2): if not line.strip(): continue line_part = line.split() if "CPU" in line_part: cpu_idx = line_part.index("CPU") attrs = line_part[cpu_idx + 1:] else: if len(attrs) != len(line_part) - cpu_idx - 1: print("WARNING: {}: Line {}: The number of columns may be wrong." .format(self._raw_data_file, row_num)) return line_data = [float(d) for d in line_part[cpu_idx + 1:]] if line_part[cpu_idx] not in data: data[line_part[cpu_idx]] = dict(zip(attrs, line_data)) else: yield [data[dev][attr] for dev in self._dev_list for attr in self._data_to_collect] data = {} data[line_part[cpu_idx]] = dict(zip(attrs, line_data)) if all(dev in data for dev in self._dev_list): yield [data[dev][attr] for dev in self._dev_list for attr in self._data_to_collect]