- 浏览: 127060 次
- 性别:
- 来自: 杭州
-
最新评论
文章列表
THREAD TEST
- 博客分类:
- scala
val THREAD_POOL_SIZE = 10
val THREAD_POOL = Executors.newScheduledThreadPool(THREAD_POOL_SIZE);
THREAD_POOL.scheduleWithFixedDelay(new Runnable() {
def run() {
otsQueueProcess
}
}, 0, 60000, TimeUnit.MILLISECONDS);
./sbin/start-thriftserver.sh --hiveconf hive.server2.thrift.port=9998 --hiveconf hive.server2.thrift.bind.host=ip --master yarn --deploy-mode client --conf spark.shuffle.service.enabled=true --conf spark.shuffle.service.port=7337 --conf spark.dynamicAllocation.enabled=true --conf spark.dynamicAlloc ...
val gson: Gson = new GsonBuilder().create
def jsonToMap(jsonstring: String): java.util.Map[String, String] = {
val typeOfHashMap: Type = new TypeToken[java.util.Map[String, String]]() {
}.getType
val newMap: java.util.Map[String, String] = gson.fromJson(jsonstring, typeOfHashMap)
...
test code2
- 博客分类:
- spark 学习
package org.test.udf
import com.google.gson.{Gson, GsonBuilder}
import org.apache.spark.sql.Row
import org.apache.spark.sql.api.java.UDF2
import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction}
import org.apache.spark.sql.types._
import scala.collection.mutab ...
def taskcal(data:Array[(String,Long)],rt:Array[String],wd:Int):Array[Boolean]={
val result = Array.fill[Boolean](rt.length)(false)
val sortData = data.sortBy(_._2)
val indexArrayLength = rt.length - 1
var startTimeArray = Array.fill[Long](rt.length)(0l)
val indexMap = rt.map(item ...
spark aggregator
class HllcdistinctByte extends Aggregator[Row, HLLCounter, Array[Byte]] {
// A zero value for this aggregation. Should satisfy the property that any b + zero = b
def zero: HLLCounter = new HLLCounter(14)
// Combine two values to produce a new value. For performance ...
pipiline tf token
- 博客分类:
- spark 学习
import org.apache.spark.ml.{Pipeline, PipelineModel}
import org.apache.spark.ml.classification.LogisticRegression
import org.apache.spark.ml.feature.{HashingTF, Tokenizer}
import org.apache.spark.ml.linalg.Vector
import org.apache.spark.sql.Row
// Prepare training documents from a list of (id, text, ...
object StructuredNetworkWordCount {
def main(args: Array[String]) {
if (args.length < 2) {
System.err.println("Usage: StructuredNetworkWordCount <hostname> <port>")
System.exit(1)
}
val host = args(0)
val port = args(1).toInt
val spark = Sp ...
spark , jar
- 博客分类:
- spark 学习
cat conf/spark-defaults.conf
spark.yarn.jars hdfs:/app/jars/*.jar
curreying function
- 博客分类:
- scala
benchmark2("hllc")(10000000)(hcclcodeanddecode2)
benchmark("hllc")(10000000)(hcclcodeanddecode)
def hcclcodeanddecode() :Unit = {
val hllc = new HLLCounter(14)
hllc.add("adsfasdfawerwfadfs")
val bytes1 = ByteBuffer.allocate(hllc.maxLength())
h ...
import org.apache.commons.math3.stat.descriptive.moment._
def vLTreeDigesttest = {
val ttCnt = 10000
val myDigestOrg: AVLTreeDigest = TDigest.createAvlTreeDigest(100).asInstanceOf[AVLTreeDigest]
val orgCollection = new mutable.ArrayBuffer[Double]()
for(i <- 0 to ttCnt){
...
class HllcdistinctByte extends Aggregator[Row, HLLCounter, Array[Byte]] {
// A zero value for this aggregation. Should satisfy the property that any b + zero = b
def zero: HLLCounter = new HLLCounter(14)
// Combine two values to produce a new value. For performance, the function may mod ...
pip
146 sudo python ez_setup.py
147 python setup.py intall
148 python setup.py install
149 pip list
tersonflow
150 pip install --ignore-installed --upgrade https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.12.0-cp27-none-linux_x86_64.whl
notebook
158 pip i ...
window 下通过 source 读文件各种鬼 改用 BufferedReader
用于删除文件中中文
object ChineseDrop extends App {
// val stArray = Array("胜多负少","abadsf","13123123")
// stArray.foreach( word => println(s" $word is ${isChinese(word)} "))
//G:\\fromHD\\勇敢的心\\勇敢的心.srt
...
$M2_HOME/conf/settings.xml
尼玛 ,这个库真的快 阿里云做了件好事
<mirror>
<id>alimaven</id>
<name>aliyun maven</name>
<url>http://maven.aliyun.com/nexus/content/groups/public/</url>
<mirrorOf>central</mirrorOf>
...