`
ejacky
  • 浏览: 31955 次
  • 性别: Icon_minigender_1
  • 来自: 抚顺
社区版块
存档分类
最新评论

阿里云监控服务器信息

阅读更多
#!/usr/bin/python
#########################################
# Function:    sample linux performance indices
# Usage:       python sampler.py
# Author:      CMS DEV TEAM
# Company:     Aliyun Inc.
# Version:     1.1
#########################################

import os
import os.path
import sys
import time
import operator
import httplib
import logging
import socket
import random
from shutil import copyfile
from subprocess import Popen, PIPE
from logging.handlers import RotatingFileHandler

logger = None
REMOTE_HOST = None
REMOTE_PORT = None
REMOTE_MONITOR_URI = None
UUID = None

def get_mem_usage_percent():
    try:
        f = open('/proc/meminfo', 'r')
        for line in f:
            if line.startswith('MemTotal:'):
                mem_total = int(line.split()[1])
            elif line.startswith('MemFree:'):
                mem_free = int(line.split()[1])
            elif line.startswith('Buffers:'):
                mem_buffer = int(line.split()[1])
            elif line.startswith('Cached:'):
                mem_cache = int(line.split()[1])
            elif line.startswith('SwapTotal:'):
                vmem_total = int(line.split()[1])
            elif line.startswith('SwapFree:'):
                vmem_free = int(line.split()[1])
            else:
                continue
        f.close()
    except:
        return None
    physical_percent = usage_percent(mem_total - (mem_free + mem_buffer + mem_cache), mem_total)
    virtual_percent = 0
    if vmem_total > 0:
        virtual_percent = usage_percent((vmem_total - vmem_free), vmem_total)
    return physical_percent, virtual_percent

black_list = ('iso9660',)

def usage_percent(use, total):
    try:
        ret = (float(use) / total) * 100
    except ZeroDivisionError:
        raise Exception("ERROR - zero division error")
    return ret

def get_disk_partition():
    return_list = []
    pd = []
    try:
        f = open("/proc/filesystems", "r")
        for line in f:
            if not line.startswith("nodev"):
                fs_type = line.strip()
                if fs_type not in black_list:
                    pd.append(fs_type)
        f.close()

        f = open('/etc/mtab', "r")
        for line in f:
            if line.startswith('none'):
                continue
            tmp = line.strip().split()
            ft = tmp[2]
            if ft not in pd:
                continue
            return_list.append(tmp[1])
        f.close()
    except:
        return None
    return return_list

def check_disk():
    try:
        return_dict = {}
        p_list = get_disk_partition()
        for i in p_list:
            dt = os.statvfs(i)
            use = (dt.f_blocks - dt.f_bfree) * dt.f_frsize
            all = dt.f_blocks * dt.f_frsize
            return_dict[i] = ('%.2f' % (usage_percent(use, all),), ('%.2f' % (all * 1.0 / (1024 * 1000000))))
    except:
        return None
    return return_dict

_CLOCK_TICKS = os.sysconf("SC_CLK_TCK")

def get_cpu_time():
    need_sleep = False
    if not os.path.isfile('/tmp/cpu_stat') or os.path.getsize('/tmp/cpu_stat') == 0:
        copyfile('/proc/stat', '/tmp/cpu_stat')
        need_sleep = True

    try:
        f1 = open('/tmp/cpu_stat', 'r')
        values1 = f1.readline().split()
        total_time1 = 0
        for i in values1[1:]:
            total_time1 += int(i)
        idle_time1 = int(values1[4])
        iowait_time1 = int(values1[5])
    finally:
        f1.close()

    if need_sleep:
        time.sleep(1)

    f2 = open('/proc/stat', 'r')
    try:
        values2 = f2.readline().split()
        total_time2 = 0
        for i in values2[1:]:
            total_time2 += int(i)
        idle_time2 = int(values2[4])
        iowait_time2 = int(values2[5])
    finally:
        f2.close()
    idle_time = idle_time2 - idle_time1
    iowait_time = iowait_time2 - iowait_time1
    total_time = total_time2 - total_time1

    cpu_percentage = int(100.0 * (total_time - idle_time - iowait_time) / total_time)
    # compensate logic
    if total_time < 0 or idle_time < 0 or iowait_time < 0 or cpu_percentage < 0 or cpu_percentage > 100:
        time.sleep(1)
        f3 = open('/proc/stat', 'r')
        try:
            values3 = f3.readline().split()
            total_time3 = 0
            for i in values3[1:]:
                total_time3 += int(i)
            idle_time3 = int(values3[4])
            iowait_time3 = int(values3[5])
        finally:
            f3.close()
        idle_time = idle_time3 - idle_time2
        iowait_time = iowait_time3 - iowait_time2
        total_time = total_time3 - total_time2
        cpu_percentage = int(100.0 * (total_time - idle_time - iowait_time) / total_time)

    copyfile('/proc/stat', '/tmp/cpu_stat')
    return cpu_percentage

def network_io_kbitps():
    """Return network I/O statistics for every network interface
    installed on the system as a dict of raw tuples.
    """
    f1 = open("/proc/net/dev", "r")
    try:
        lines1 = f1.readlines()
    finally:
        f1.close()

    retdict1 = {}
    for line1 in lines1[2:]:
        colon1 = line1.find(':')
        assert colon1 > 0, line1
        name1 = line1[:colon1].strip()
        fields1 = line1[colon1 + 1:].strip().split()
        bytes_recv1 = float('%.4f' % (float(fields1[0]) * 0.0078125))
        bytes_sent1 = float('%.4f' % (float(fields1[8]) * 0.0078125))
        retdict1[name1] = (bytes_recv1, bytes_sent1)
    time.sleep(1)
    f2 = open("/proc/net/dev", "r")
    try:
        lines2 = f2.readlines()
    finally:
        f2.close()
    retdict2 = {}
    for line2 in lines2[2:]:
        colon2 = line2.find(':')
        assert colon2 > 0, line2
        name2 = line2[:colon2].strip()
        fields2 = line2[colon2 + 1:].strip().split()
        bytes_recv2 = float('%.4f' % (float(fields2[0]) * 0.0078125))
        bytes_sent2 = float('%.4f' % (float(fields2[8]) * 0.0078125))
        retdict2[name2] = (bytes_recv2, bytes_sent2)
    retdict = merge_with(retdict2, retdict1)
    return retdict

def disk_io_Kbps():
    iostat = Popen("iostat -d -k 1 2 | sed '/Device\|Linux\|^$/d' > /tmp/disk_io", shell=True, stdout=PIPE, stderr=PIPE)
    iostat_error = iostat.communicate()[1].strip()
    if iostat_error:
        logger.error("iostat not exists, %s" % iostat_error)
        return None

    retdict = {}
    exception = None 
    try:
        try:
            f = open('/tmp/disk_io', 'r')
        except Exception, ex:
            exception = ex
            logger.error(exception)
        if exception:
            return None
        lines = f.readlines()
        for line in lines:
            name, _, readkps, writekps, _, _, = line.split()
            if name:
                readkps = float(readkps)
                writekps = float(writekps)
                retdict[name] = (readkps, writekps)
        return retdict
    finally:
        f.close()

def merge_with(d1, d2, fn=lambda x, y: tuple(map(operator.sub, x, y))):
    res = d1.copy() # "= dict(d1)" for lists of tuples
    for key, val in d2.iteritems(): # ".. in d2" for lists of tuples
        try:
            res[key] = fn(res[key], val)
        except KeyError:
            res[key] = val
    return res

def get_load():
    try:
        f = open('/proc/loadavg', 'r')
        tmp = f.readline().split()
        lavg_1 = float(tmp[0])
        lavg_5 = float(tmp[1])
        lavg_15 = float(tmp[2])
        f.close()
    except:
        return None
    return lavg_1, lavg_5, lavg_15

def get_tcp_status():
    check_cmd = "command -v ss"
    check_proc = Popen(check_cmd, shell=True, stdout=PIPE)
    ss = check_proc.communicate()[0].rstrip('\n')
    if ss:
        cmd = "ss -ant | awk '{if(NR != 1) print $1}' | awk '{state=$1;arr[state]++} END{for(i in arr){printf \"%s=%s \", i,arr[i]}}' | sed 's/-/_/g' | sed 's/ESTAB=/ESTABLISHED=/g' | sed 's/FIN_WAIT_/FIN_WAIT/g'"
    else:
        cmd = "netstat -anp | grep tcp | awk '{print $6}' | awk '{state=$1;arr[state]++} END{for(i in arr){printf \"%s=%s \", i,arr[i]}}' | tail -n 1"
    tcp_proc = Popen(cmd, shell=True, stdout=PIPE)
    tcp_status = tcp_proc.communicate()[0].rstrip('\n')
    return tcp_status

def get_proc_number():
    cmd = "ps axu | wc -l | tail -n 1"
    proc_func = Popen(cmd, shell=True, stdout=PIPE)
    proc_number = proc_func.communicate()[0].rstrip('\n')
    return proc_number

def all_index():
    return (
        int(time.time() * 1000),
        get_cpu_time(),
        get_mem_usage_percent(),
        check_disk(),
        disk_io_Kbps(),
        network_io_kbitps(),
        get_load(),
        get_tcp_status(),
        get_proc_number()
    )

def collector():
    timestamp, cpu, mem, disk, disk_io, net, load, tcp_status, process_number = all_index()
    disk_utilization = ''
    disk_io_read = ''
    disk_io_write = ''
    internet_networkrx = ''
    internet_networktx = ''
    tcp_status_count = ''
    period_1 = ''
    period_5 = ''
    period_15 = ''

    if UUID:
        cpu_utilization = 'vm.CPUUtilization ' + str(timestamp) + ' ' + str(cpu) + ' ns=ACS/ECS unit=Percent instanceId=%s\n' % UUID

        memory_utilization = 'vm.MemoryUtilization ' + str(timestamp) + ' ' + str(mem[0]) + ' ns=ACS/ECS unit=Percent instanceId=%s\n' % UUID

        if load:
            period_1 = 'vm.LoadAverage ' + str(timestamp) + ' ' + str(load[0]) + ' ns=ACS/ECS unit=count' + ' instanceId=%s period=1min\n' % UUID
            period_5 = 'vm.LoadAverage ' + str(timestamp) + ' ' + str(load[1]) + ' ns=ACS/ECS unit=count' + ' instanceId=%s period=5min\n' % UUID
            period_15 = 'vm.LoadAverage ' + str(timestamp) + ' ' + str(load[2]) + ' ns=ACS/ECS unit=count' + ' instanceId=%s period=15min\n' % UUID

        if disk:
            for name, value in disk.items():
                disk_utilization = disk_utilization + 'vm.DiskUtilization ' + str(timestamp) + ' ' + str(value[0]) + ' ns=ACS/ECS unit=Percent instanceId=%s mountpoint=%s\n' % (UUID, name)

        if disk_io:
            for name, value in disk_io.items():
                disk_io_read = disk_io_read + 'vm.DiskIORead ' + str(timestamp) + ' ' + str(value[0]) + ' ns=ACS/ECS unit=Kilobytes/Second instanceId=%s diskname=%s\n' % (UUID, name)
                disk_io_write = disk_io_write + 'vm.DiskIOWrite ' + str(timestamp) + ' ' + str(value[1]) + ' ns=ACS/ECS unit=Kilobytes/Second instanceId=%s diskname=%s\n' % (UUID, name)

        for name, value in net.items():
            internet_networkrx = internet_networkrx + 'vm.InternetNetworkRX ' + str(timestamp) + ' ' + str(value[0]) + ' ns=ACS/ECS unit=Kilobits/Second instanceId=%s netname=%s\n' % (UUID, name)
            internet_networktx = internet_networktx + 'vm.InternetNetworkTX ' + str(timestamp) + ' ' + str(value[1]) + ' ns=ACS/ECS unit=Kilobits/Second instanceId=%s netname=%s\n' % (UUID, name)

        if tcp_status:
            status_count = tcp_status.split()
            for element in status_count:
                key_value = element.split('=')
                tcp_status_count = tcp_status_count + 'vm.TcpCount ' + str(timestamp) + ' ' + key_value[1] + ' ns=ACS/ECS unit=Count instanceId=%s state=%s\n' % (UUID, key_value[0])

        process_count = 'vm.ProcessCount ' + str(timestamp) + ' ' + process_number + ' ns=ACS/ECS unit=Count instanceId=%s\n' % UUID
    else:
        cpu_utilization = 'vm.CPUUtilization ' + str(timestamp) + ' ' + str(cpu) + ' ns=ACS/ECS unit=Percent\n'

        memory_utilization = 'vm.MemoryUtilization ' + str(timestamp) + ' ' + str(mem[0]) + ' ns=ACS/ECS unit=Percent\n'

        if load:
            period_1 = 'vm.LoadAverage ' + str(timestamp) + ' ' + str(load[0]) + ' ns=ACS/ECS unit=count period=1min\n'
            period_5 = 'vm.LoadAverage ' + str(timestamp) + ' ' + str(load[1]) + ' ns=ACS/ECS unit=count period=5min\n'
            period_15 = 'vm.LoadAverage ' + str(timestamp) + ' ' + str(load[2]) + ' ns=ACS/ECS unit=count period=15min\n'

        if disk:
            for name, value in disk.items():
                disk_utilization = disk_utilization + 'vm.DiskUtilization ' + str(timestamp) + ' ' + str(value[0]) + ' ns=ACS/ECS unit=Percent mountpoint=%s\n' % name

        if disk_io:
            for name, value in disk_io.items():
                disk_io_read = disk_io_read + 'vm.DiskIORead ' + str(timestamp) + ' ' + str(value[0]) + ' ns=ACS/ECS unit=Kilobytes/Second diskname=%s\n' % name
                disk_io_write = disk_io_write + 'vm.DiskIOWrite ' + str(timestamp) + ' ' + str(value[1]) + ' ns=ACS/ECS unit=Kilobytes/Second diskname=%s\n' % name

        for name, value in net.items():
            internet_networkrx = internet_networkrx + 'vm.InternetNetworkRX ' + str(timestamp) + ' ' + str(value[0]) + ' ns=ACS/ECS unit=Kilobits/Second netname=%s\n' % name
            internet_networktx = internet_networktx + 'vm.InternetNetworkTX ' + str(timestamp) + ' ' + str(value[1]) + ' ns=ACS/ECS unit=Kilobits/Second netname=%s\n' % name

        if tcp_status:
            status_count = tcp_status.split()
            for element in status_count:
                key_value = element.split('=')
                tcp_status_count = tcp_status_count + 'vm.TcpCount ' + str(timestamp) + ' ' + key_value[1] + ' ns=ACS/ECS unit=Count state=%s\n' % key_value[0]

        process_count = 'vm.ProcessCount ' + str(timestamp) + ' ' + process_number + ' ns=ACS/ECS unit=Count\n'

    data_post = cpu_utilization + memory_utilization + period_1 + period_5 + period_15 + disk_utilization + disk_io_read + disk_io_write + internet_networkrx + internet_networktx + tcp_status_count + process_count
    print data_post
    interval = random.randint(0, 5000)
    time.sleep(interval / 1000.0)

    headers = {"Content-Type": "text/plain", "Accept": "text/plain"}
    exception = None
    http_client = None
    try:
        try:
            http_client = httplib.HTTPConnection(REMOTE_HOST, REMOTE_PORT)
            http_client.request(method="POST", url=REMOTE_MONITOR_URI, body=data_post, headers=headers)
            response = http_client.getresponse()
            if response.status == 200:
                return
            else:
                logger.warn("response code %d" % response.status)
                logger.warn("response code %s" % response.read())
        except Exception, ex:
            exception = ex
    finally:
        if http_client:
            http_client.close()
        if exception:
            logger.error(exception)

if __name__ == '__main__':
    REMOTE_HOST = 'open.cms.aliyun.com'
    REMOTE_PORT = 80

    # get report address
    if not os.path.isfile("../cmscfg"):
        pass
    else:
        props = {}
        prop_file = file("../cmscfg", 'r')
        for line in prop_file.readlines():
            kv = line.split('=')
            props[kv[0].strip()] = kv[1].strip()
        prop_file.close()
        if props.get('report_domain'):
            REMOTE_HOST = props.get('report_domain')
        if props.get('report_port'):
            REMOTE_PORT = props.get('report_port')

    # get uuid
    if not os.path.isfile("../aegis_quartz/conf/uuid"):
        pass
    else:
        uuid_file = file("../aegis_quartz/conf/uuid", 'r')
        UUID = uuid_file.readline()
        UUID = UUID.lower()

    REMOTE_MONITOR_URI = "/metrics/putLines"
    MONITOR_DATA_FILE_DIR = "/tmp"
    LOG_FILE = "/tmp/" + "vm.log"
    LOG_LEVEL = logging.INFO
    LOG_FILE_MAX_BYTES = 1024 * 1024
    LOG_FILE_MAX_COUNT = 3
    logger = logging.getLogger('sampler')
    logger.setLevel(LOG_LEVEL)
    handler = RotatingFileHandler(filename=LOG_FILE, mode='a', maxBytes=LOG_FILE_MAX_BYTES,
                                  backupCount=LOG_FILE_MAX_COUNT)
    formatter = logging.Formatter(fmt='%(asctime)s - %(levelname)s - %(message)s')
    handler.setFormatter(formatter)
    logger.addHandler(handler)
    socket.setdefaulttimeout(10)

    try:
        collector()
    except Exception, e:
        logger.error(e)
        sys.exit(1)
分享到:
评论

相关推荐

    阿里云云监控对接grafana插件

    阿里云云监控对接Grafana插件是一种强大的可视化工具,它允许用户将阿里云平台上的监控数据集成到Grafana面板中,以便于更直观、更有效地管理和分析这些数据。Grafana是一款开源的度量分析与可视化套件,常用于展示...

    关于阿里云监控(部署)

    阿里云监控是一个功能强大且广泛使用的云计算监控平台,它可以实时监控服务器和应用程序的性能和状态,提供实时警报和自动化故障恢复等功能。本文将详细介绍阿里云监控的部署过程,包括 Linux 和 Windows 平台的部署...

    阿里云服务器ECS介绍.pptx

    云服务器ECS介绍 阿里云服务器ECS介绍全文共12页,当前为第1页。 打造公共、开放的以数据为中心的云计算服务平台,借助技术创新,不断提升计算能力与规模效益,将云计算变成真正意义上的5A的公共服务 阿里云打造数据...

    阿里云服务器建网的jdk与tomact

    在构建基于阿里云服务器的网络应用时,JDK(Java Development Kit)和Tomcat是两个至关重要的组件。本文将深入探讨这两个技术以及如何在阿里云轻量应用服务器上进行安装和配置,以便支持各种网站的搭建。 JDK,全称...

    Python3编程实现获取阿里云ECS实例及监控的方法

    本文实例讲述了Python3编程实现获取阿里云ECS实例及监控的方法。分享给大家供大家参考,具体如下: #!/usr/bin/env python3.5 # -*- coding:utf8 -*- try: import httplib except ImportError: import ...

    阿里云负载均衡服务器集群架构图:云盾、云监控、SLB、ECS集群、主从数据库.zip

    阿里云均衡负载服务器集群架构图:云盾、云监控、SLB、ECS集群、主从数据库.zip 1.购买均衡负载 2.购买ECS云主机 3.管理后端服务器

    “阿里云监控Agent”软件使用许可协议.docx

    “阿里云监控Agent”是一款由阿里云自主研发、具有自主知识产权的计算机软件,用于帮助用户实现对服务器各项监控功能。软件名称和功能可能根据产品优化而变化,无需提前通知用户。 2. **使用方式**: 用户可以...

    STM32+ESP8266通过AT指令WIFI连接阿里云MQTT服务器

    本项目聚焦于使用STM32微控制器通过AT指令控制ESP8266模块,以实现WiFi连接到阿里云MQTT服务器。以下是对这一主题的详细阐述: 首先,STM32是意法半导体推出的基于ARM Cortex-M内核的微控制器系列,具有高性能、低...

    Nginx 阿里云服务器提示504

    7. **监控和报警**:设置适当的监控机制,如通过阿里云监控服务,一旦发现504错误频繁出现,能够及时收到报警并处理。 通过以上步骤,大多数情况下可以解决504 Gateway Time-out问题。在实际操作中,可能需要结合...

    阿里云服务器用户管理

    ### 阿里云服务器用户管理相关知识点 #### 一、概述 云服务器作为阿里云计算在基础设施应用上不可或缺的一部分,不仅是阿里云的核心产品之一,也是众多企业和开发者构建互联网应用程序和服务的基础。阿里云提供了...

    2020阿里云产品图标.pptx

    这些服务包括消息队列服务mq、API网关apigateway、云命令行cloudshell、云监控cms等。这些服务为用户提供了丰富的云计算能力和应用开发能力。 阿里云产品图标大全展示了阿里云的强大产品和服务能力,为用户提供了...

    阿里云监控使用手册-33

    阿里云监控使用手冊-33 阿里云监控是阿里云提供的一种云服务监控管理入口,能够让用户快速了解各产品实例的状态和性能。云监控从站点监控、云服务监控、自定义监控三个方面来为用户提供服务。 站点监控 站点监控...

    jmeter第7章 高级篇之阿里云Linux服务器压测接口实战

    通过JMeter的监听器(如聚合报告、响应时间图等)收集这些数据,并结合阿里云服务器上的系统监控工具(如云监控、top、iostat等),分析服务器资源使用情况。 7. **结果分析与优化**:基于测试结果,分析系统的瓶颈...

    阿里云专有云企业版云服务器ECS运维指南产品版本:V3.12.0

    《阿里云专有云企业版云服务器ECS运维指南》V3.12.0版本主要针对使用阿里云专有云企业版云服务器ECS的企业用户提供了一份详尽的操作和维护手册。此文档旨在帮助用户更好地理解和管理他们的云服务器资源,确保服务的...

    阿里监控工具阿里监控工具

    此外,阿里监控工具还支持与其他阿里云服务集成,如云服务器ECS、负载均衡SLB、数据库RDS等,实现全栈式云上监控。对于混合云或本地环境,阿里监控工具也有相应的解决方案,确保全面覆盖企业的IT环境。 总的来说,...

    ESP8266 连接阿里云 MQTT 服务器 EMQ

    总结来说,连接ESP8266到阿里云MQTT服务器EMQ涉及到ESP8266的配置、MQTT库的使用、阿里云账号信息的集成、主题的设定、心跳维持以及异常处理等多个步骤。正确执行这些步骤,就能实现ESP8266与阿里云之间的稳定通信,...

    阿里云 专有云企业版 V3.8.1 云服务器 ECS 产品简介 20190910

    阿里云专有云企业版V3.8.1云服务器ECS产品简介是阿里云发布的一份关于云服务器ECS的产品介绍文档,该文档详细介绍了云服务器ECS的产品版本、法律声明、通用约定、云服务器ECS的概念、产品优势、产品架构等内容。...

    阿里云服务器实例web工程

    综上所述,阿里云服务器实例Web工程是一个涵盖了服务器配置、Web应用开发、数据库连接、安全策略、监控运维等多个方面的综合项目。通过合理设计和优化,可以在阿里云平台上搭建出高效、稳定的Web服务环境。

    Usart.rar_串口传输 云服务器 无线控制_云 上位机_阿里云_阿里云传感器_阿里云服务器

    在本文中,我们将深入探讨如何使用串口通信技术将MCU(微控制器)上的传感器数据传输到阿里云服务器,并通过云上位机实现无线控制。首先,我们要理解串口传输的基本原理,然后介绍如何与阿里云平台进行对接,以及...

Global site tag (gtag.js) - Google Analytics