ES中如何使用逗号来分词

yangshangchuan

浏览: 2477851 次
性别:
来自: 北京

最近访客更多访客>>

wangyy

akingde

feilafei123

wf_chn

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

搜索

es elastic elasticsearch analyzer

使用软件版本：elasticsearch-2.2.0

1. setting:

curl -XPOST 'http://localhost:9200/data' -d '{

    "settings": {

        "analysis": {

            "analyzer": {

                "comma": {

                     "type": "pattern",

                     "pattern":","

                    }

                }

            }

        }

    }
}
'

return:

1	`{"acknowledged":true}`

2. view index:

1	`curl -XGET` `'http://localhost:9200/data'`

return:

{

   "data": {

      "aliases": {},

      "mappings": {},

      "settings": {

         "index": {

            "creation_date": "1456931889151",

            "analysis": {

               "analyzer": {

                  "comma": {

                     "pattern": ",",

                     "type": "pattern"

                  }

               }

            },

            "number_of_shards": "5",

            "number_of_replicas": "1",

            "uuid": "aXyFMRzKQ0m_Ex8N2yJeSA",

            "version": {

               "created": "2020099"

            }

         }

      },

      "warmers": {}

   }
}

3. mapping:

1

2

3

4

5

6

7

curl -XPOST 'http://localhost:9200/data/_mapping/record' -d '{

    "properties": {

        "id": { "type": "string", "index": "not_analyzed" },

        "number": { "type": "string", "analyzer": "comma", "search_analyzer": "comma" }

    }
}
'

return:

1	`{"acknowledged":true}`

4. view index:

1	`curl -XGET` `'http://localhost:9200/data'`

return:

{

   "data": {

      "aliases": {},

      "mappings": {

         "record": {

            "properties": {

               "id": {

                  "type": "string",

                  "index": "not_analyzed"

               },

               "number": {

                  "type": "string",

                  "analyzer": "comma"

               }

            }

         }

      },

      "settings": {

         "index": {

            "creation_date": "1456972030705",

            "analysis": {

               "analyzer": {

                  "comma": {

                     "pattern": ",",

                     "type": "pattern"

                  }

               }

            },

            "number_of_shards": "5",

            "number_of_replicas": "1",

            "uuid": "A9Z76U9DR0OBqn29smtq8w",

            "version": {

               "created": "2020099"

            }

         }

      },

      "warmers": {}

   }
}

5. verify analyze:

1	`curl -GET` `'http://127.0.0.1:9200/data/_analyze?analyzer=comma&text=2,3,4,5,100-100'`

return:

{

   "tokens": [

      {

         "token": "2",

         "start_offset": 0,

         "end_offset": 1,

         "type": "word",

         "position": 0

      },

      {

         "token": "3",

         "start_offset": 2,

         "end_offset": 3,

         "type": "word",

         "position": 101

      },

      {

         "token": "4",

         "start_offset": 4,

         "end_offset": 5,

         "type": "word",

         "position": 202

      },

      {

         "token": "5",

         "start_offset": 6,

         "end_offset": 7,

         "type": "word",

         "position": 303

      },

      {

         "token": "100-100",

         "start_offset": 8,

         "end_offset": 15,

         "type": "word",

         "position": 404

      }

   ]
}

6. post data:

1

2

3

4

5

curl -PUT 'http://localhost:9200/data/record' -d '{

    "id" : "001CV",

    "number" : "2,3,4,5,100-100,1010"
}
'

return:

{

   "_index": "data",

   "_type": "record",

   "_id": "AVM3kt-GiEDWd2i_MREb",

   "_version": 1,

   "_shards": {

      "total": 2,

      "successful": 1,

      "failed": 0

   },

   "created": true
}

7. post data:

1

2

3

4

5

curl -PUT 'http://localhost:9200/data/record' -d '{

    "id" : "002CV",

    "number" : "9999,8888"
}
'

return:

{

   "_index": "data",

   "_type": "record",

   "_id": "AVM3k7vIiEDWd2i_MREc",

   "_version": 1,

   "_shards": {

      "total": 2,

      "successful": 1,

      "failed": 0

   },

   "created": true
}

8. post data:

1

2

3

4

5

curl -PUT 'http://localhost:9200/data/record' -d '{

    "id" : "002CV",

    "number" : "2,8888"
}
'

return:

{

   "_index": "data",

   "_type": "record",

   "_id": "AVM3mCGMiEDWd2i_MREh",

   "_version": 1,

   "_shards": {

      "total": 2,

      "successful": 1,

      "failed": 0

   },

   "created": true
}

9. search data:

1	`curl -XGET` `'http://localhost:9200/data/record/_search?q=number:9999'`

return:

{

   "took": 41,

   "timed_out": false,

   "_shards": {

      "total": 5,

      "successful": 5,

      "failed": 0

   },

   "hits": {

      "total": 1,

      "max_score": 0.19178301,

      "hits": [

         {

            "_index": "data",

            "_type": "record",

            "_id": "AVM3k7vIiEDWd2i_MREc",

            "_score": 0.19178301,

            "_source": {

               "id": "002CV",

               "number": "9999,8888"

            }

         }

      ]

   }
}

10. search data:

1	`curl -XGET` `'http://localhost:9200/data/record/_search?q=number:2'`

return:

{

   "took": 2,

   "timed_out": false,

   "_shards": {

      "total": 5,

      "successful": 5,

      "failed": 0

   },

   "hits": {

      "total": 2,

      "max_score": 0.37158427,

      "hits": [

         {

            "_index": "data",

            "_type": "record",

            "_id": "AVM3mCGMiEDWd2i_MREh",

            "_score": 0.37158427,

            "_source": {

               "id": "002CV",

               "number": "2,8888"

            }

         },

         {

            "_index": "data",

            "_type": "record",

            "_id": "AVM3kt-GiEDWd2i_MREb",

            "_score": 0.22295055,

            "_source": {

               "id": "001CV",

               "number": "2,3,4,5,100-100,1010"

            }

         }

      ]

   }
}

11. search data:

1	`curl -XGET` `'http://localhost:9200/data/record/_search?q=number:8888,100-100'`

return:

{

   "took": 3,

   "timed_out": false,

   "_shards": {

      "total": 5,

      "successful": 5,

      "failed": 0

   },

   "hits": {

      "total": 3,

      "max_score": 0.22097087,

      "hits": [

         {

            "_index": "data",

            "_type": "record",

            "_id": "AVM3mCGMiEDWd2i_MREh",

            "_score": 0.22097087,

            "_source": {

               "id": "002CV",

               "number": "2,8888"

            }

         },

         {

            "_index": "data",

            "_type": "record",

            "_id": "AVM3kt-GiEDWd2i_MREb",

            "_score": 0.13258252,

            "_source": {

               "id": "001CV",

               "number": "2,3,4,5,100-100,1010"

            }

         },

         {

            "_index": "data",

            "_type": "record",

            "_id": "AVM3k7vIiEDWd2i_MREc",

            "_score": 0.028130025,

            "_source": {

               "id": "002CV",

               "number": "9999,8888"

            }

         }

      ]

   }
}

12. search data:

curl -XPOST 'http://localhost:9200/data/record/_search' -d '{

  "query": {

    "bool": {

      "must": [

        {

          "term": {

            "number": "2"

          }

        }

      ],

      "must_not": [

        {

          "term": {

            "number": "8888"

          }

        }

      ]

    }

  }
}'

return:

{

   "took": 3,

   "timed_out": false,

   "_shards": {

      "total": 5,

      "successful": 5,

      "failed": 0

   },

   "hits": {

      "total": 1,

      "max_score": 0.22295055,

      "hits": [

         {

            "_index": "data",

            "_type": "record",

            "_id": "AVM3kt-GiEDWd2i_MREb",

            "_score": 0.22295055,

            "_source": {

               "id": "001CV",

               "number": "2,3,4,5,100-100,1010"

            }

         }

      ]

   }
}

2
顶

4
踩

分享到：

互联网时代下的合作治理机制及其应用：以开 ... | 一种使用随机抽样梯度下降算法来预估词汇量 ...

2016-03-04 22:49
浏览 12686
评论(1)
分类:开源软件
查看更多

1 楼 ronin47 2016-03-05

es的api很棒

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

ES中如何使用逗号来分词

评论

发表评论

相关推荐

最近访客 更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

ES中如何使用逗号来分词

评论

发表评论

相关推荐

The Future of Compass & ElasticSearch

分布式搜索算法

最近访客更多访客>>