描述统计

lingzhi007

浏览: 126157 次
性别:
来自: 杭州

最近访客更多访客>>

morelily

gaojingsong

gaz0301

jiedushi

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

数据-通用数据分析

import org.apache.commons.math3.stat.descriptive.moment._

def vLTreeDigesttest = {
    val ttCnt = 10000
    val myDigestOrg: AVLTreeDigest = TDigest.createAvlTreeDigest(100).asInstanceOf[AVLTreeDigest]
    val orgCollection = new   mutable.ArrayBuffer[Double]()
    for(i <- 0 to ttCnt){
       val value = i // r.nextInt(ttCnt)*r.nextInt(100)*0.01
       myDigestOrg.add(value)
       orgCollection += value
    }

    val buf: ByteBuffer = ByteBuffer.allocate(myDigestOrg.byteSize)
    buf.mark
    myDigestOrg.asSmallBytes(buf)
    buf.reset
    println(s"size: ${buf.array().size}" )
    val myDigestFromOrg: AVLTreeDigest = AVLTreeDigest.fromBytes(buf)

    val sampleCollcetion = new   mutable.ArrayBuffer[Double]()
    for(i <- 0 to 10000) sampleCollcetion += myDigestFromOrg.quantile(i/10000.0)

    // max
    println(s"orgCollection max : ${orgCollection.max}")
    println(s"sampleCollcetion max : ${sampleCollcetion.max}")
    // min
    println(s"orgCollection min : ${orgCollection.min}")
    println(s"sampleCollcetion min : ${sampleCollcetion.min}")

    // 全距（Range）
    println(s"orgCollection Range : ${orgCollection.max - orgCollection.min }")
    println(s"sampleCollcetion Range : ${sampleCollcetion.max - sampleCollcetion.min}")

    //avg
    val orgCollectionAvg = orgCollection.reduce(_+_)/orgCollection.size
    val sampleCollcetionAvg = sampleCollcetion.reduce(_+_)/sampleCollcetion.size
    println(s"orgCollection avg :$orgCollectionAvg")
    println(s"sampleCollcetion avg : $sampleCollcetionAvg")

    //均值标准误差（Standard Error of Mean，S.E. Mean，简称标准误
    println(s"S.E. Mean : ${sampleCollcetionAvg - orgCollectionAvg}")

    //Variance 方差
    val orgCollectionVariance = orgCollection.map(x => Math.pow((x - orgCollectionAvg),2)).reduce(_+_)
    val sampleCollcetionVariance = sampleCollcetion.map(x => Math.pow((x - sampleCollcetionAvg),2)).reduce(_+_)
    println(s"orgCollection Variance : $orgCollectionVariance")
    println(s"sampleCollcetion Variance : $sampleCollcetionVariance")
    //Standard Deviation 标准差
    println(s"orgCollection Standard Deviation : ${ Math.pow((orgCollectionVariance/orgCollection.size),0.5)}")
    println(s"sampleCollcetion Standard Deviation : ${ Math.pow((sampleCollcetionVariance/sampleCollcetion.size),0.5)}")

    //Median   中位数
    println(s"orgCollection Median : ${ orgCollection.sortWith( (a,b) => a>b )(ttCnt/2) }")
    println(s"sampleCollcetion Median : ${ myDigestFromOrg.quantile(0.5)}")

    // 众数（Mode）把 Double -> Int 由于采用后 ,值有些变化 . 所以众数不准
    //    orgCollection Mode : (504671,1)
    //    sampleCollectionMode Mode : (145013,1)
    val orgCollectionMode =    orgCollection.map(x => (x,1)).groupBy(_._1).map(x => (x._1,x._2.size)).toArray.sortBy(_._2)
    val sampleCollectionMode =    sampleCollcetion.map(x => (x.toInt,1)).groupBy(_._1).map(x => (x._1,x._2.size)).toArray.sortBy(_._2)
    println(s"orgCollection Mode : ${ orgCollectionMode(orgCollectionMode.length-1) }")
    println(s"sampleCollectionMode Mode : ${ sampleCollectionMode(sampleCollectionMode.length-1) }")

    //峰度（Kurtosis）
    val kurtosis = new Kurtosis()
    println(s"orgCollection Kurtosis : ${ kurtosis.evaluate(orgCollection.toArray) }")
    println(s"sampleCollection Kurtosis : ${ kurtosis.evaluate(sampleCollcetion.toArray) }")

    // 偏度（Skewness）
    val skewness = new Skewness()
    println(s"orgCollection Skewness : ${ skewness.evaluate(orgCollection.toArray) }")
    println(s"sampleCollection Skewness : ${ skewness.evaluate(sampleCollcetion.toArray) }")

}

      <dependency>
            <groupId>org.apache.commons</groupId>
            <artifactId>commons-math3</artifactId>
            <version>3.6.1</version>
        </dependency>

分享到：