elasticesearch 第3.4章 HTTP操作-高级查询

Elasticsearch 提供了基于 JSON 提供完整的查询 DSL 来定义查询。

定义数据：

# POST /student/_doc/1001
{
  "name":"zhangsan",
  "nickname":"zhangsan",
  "sex":"男",
  "age":30
}

# POST /student/_doc/1002
{
  "name":"lisi",
  "nickname":"lisi",
  "sex":"男",
  "age":20
}

# POST /student/_doc/1003
{
  "name":"wangwu",
  "nickname":"wangwu",
  "sex":"女",
  "age":40
}

# POST /student/_doc/1004
{
  "name":"zhangsan1",
  "nickname":"zhangsan1",
  "sex":"女",
  "age":50
}

# POST /student/_doc/1005
{
  "name":"zhangsan2",
  "nickname":"zhangsan2",
  "sex":"女",
  "age":30
}

向 ES 服务器发 GET 请求：

http://127.0.0.1:9200/student/_search

一、查询所有文档

请求体内容为：

{
 "query": {
  "match_all": {}
 }
}

"query"：这里的 query 代表一个查询对象，里面可以有不同的查询属性
"match_all"：查询类型，例如：match_all(代表查询所有、match、term、range 等等
{查询条件}：查询条件会根据类型的不同，写法也有差异

服务器响应结果如下：

{
    "took【查询花费时间，单位毫秒】": 189,
    "timed_out【是否超时】": false,
    "_shards【分片信息】": {
        "total【总数】": 1,
        "successful【成功】": 1,
        "skipped【忽略】": 0,
        "failed【失败】": 0
    },
    "hits【搜索命中结果】": {
        "total【搜索条件匹配的文档总数】": {
            "value【总命中计数的值】": 5,
            "relation【计数规则】": "eq" # eq 表示计数准确，gte 表示计数不准确
        },
        "max_score【匹配度分值】": 1,
        "hits【命中结果集合】": [
            {
                "_index": "student",
                "_id": "1001",
                "_score": 1,
                "_source": {
                    "name": "zhangsan",
                    "nickname": "zhangsan",
                    "sex": "男",
                    "age": 30
                }
            },
            {
                "_index": "student",
                "_id": "1002",
                "_score": 1,
                "_source": {
                    "name": "lisi",
                    "nickname": "lisi",
                    "sex": "男",
                    "age": 20
                }
            },
            {
                "_index": "student",
                "_id": "1003",
                "_score": 1,
                "_source": {
                    "name": "wangwu",
                    "nickname": "wangwu",
                    "sex": "女",
                    "age": 40
                }
            },
            {
                "_index": "student",
                "_id": "1004",
                "_score": 1,
                "_source": {
                    "name": "zhangsan1",
                    "nickname": "zhangsan1",
                    "sex": "女",
                    "age": 50
                }
            },
            {
                "_index": "student",
                "_id": "1005",
                "_score": 1,
                "_source": {
                    "name": "zhangsan2",
                    "nickname": "zhangsan2",
                    "sex": "女",
                    "age": 30
                }
            }
        ]
    }
}

二、匹配查询

match 匹配类型查询，会把查询条件进行分词，然后进行查询，多个词条之间是 or 的关系

请求体内容为：

{
 "query": {
  "match": {
    "name":"zhangsan"
  }
 }
}

服务器响应结果为：

{
    "took": 26,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 1,
            "relation": "eq"
        },
        "max_score": 1.3862942,
        "hits": [
            {
                "_index": "student",
                "_id": "1001",
                "_score": 1.3862942,
                "_source": {
                    "name": "zhangsan",
                    "nickname": "zhangsan",
                    "sex": "男",
                    "age": 30
                }
            }
        ]
    }
}

三、字段匹配查询

multi_match 与 match 类似，不同的是它可以在多个字段中查询。

请求体内容为：

{
 "query": {
  "multi_match": {
    "query": "zhangsan",
    "fields": ["name","nickname"]
  }
 }
}

服务器响应结果为：

{
    "took": 8,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 1,
            "relation": "eq"
        },
        "max_score": 1.3862942,
        "hits": [
            {
                "_index": "student",
                "_id": "1001",
                "_score": 1.3862942,
                "_source": {
                    "name": "zhangsan",
                    "nickname": "zhangsan",
                    "sex": "男",
                    "age": 30
                }
            }
        ]
    }
}

四、关键字精确查询

term 查询，精确的关键词匹配查询，不对查询条件进行分词。

请求体内容为：

{
 "query": {
  "term": {
   "name": {
    "value": "zhangsan"
   }
  }
 }
}

服务器响应结果为：

{
    "took": 1,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 1,
            "relation": "eq"
        },
        "max_score": 1.3862942,
        "hits": [
            {
                "_index": "student",
                "_id": "1001",
                "_score": 1.3862942,
                "_source": {
                    "name": "zhangsan",
                    "nickname": "zhangsan",
                    "sex": "男",
                    "age": 30
                }
            }
        ]
    }
}

五、多关键字精确查询

terms 查询和 term 查询一样，但它允许你指定多值进行匹配。

如果这个字段包含了指定值中的任何一个值，那么这个文档满足条件，类似于 mysql 的 in

请求体内容为：

{
 "query": {
  "terms": {
   "name": ["zhangsan","lisi"]
  }
 }
}

服务器响应结果为：

{
  "took": 15,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 2,
      "relation": "eq"
    },
    "max_score": 1,
    "hits": [
      {
        "_index": "student",
        "_id": "1001",
        "_score": 1,
        "_source": {
          "name": "zhangsan",
          "nickname": "zhangsan",
          "sex": "男",
          "age": 30
        }
      },
      {
        "_index": "student",
        "_id": "1002",
        "_score": 1,
        "_source": {
          "name": "lisi",
          "nickname": "lisi",
          "sex": "男",
          "age": 20
        }
      }
    ]
  }
}

六、指定查询字段

默认情况下，Elasticsearch 在搜索的结果中，会把文档中保存在_source 的所有字段都返回。

如果我们只想获取其中的部分字段，我们可以添加_source 的过滤

请求体内容为：

{
 "_source": ["name","nickname"],  
 "query": {
  "terms": {
   "nickname": ["zhangsan"]
  }
 }
}

服务器响应结果为：

{
    "took": 8,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 1,
            "relation": "eq"
        },
        "max_score": 1,
        "hits": [
            {
                "_index": "student",
                "_id": "1001",
                "_score": 1,
                "_source": {
                    "name": "zhangsan",
                    "nickname": "zhangsan"
                }
            }
        ]
    }
}

七、过滤字段

我们也可以通过：

includes：来指定想要显示的字段
excludes：来指定不想要显示的字段

请求体内容为：

{
 "_source": {
  "includes": ["name","nickname"]
 },  
 "query": {
  "terms": {
   "nickname": ["zhangsan"]
  }
 }
}

服务器响应结果为：

{
    "took": 8,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 1,
            "relation": "eq"
        },
        "max_score": 1,
        "hits": [
            {
                "_index": "student",
                "_id": "1001",
                "_score": 1,
                "_source": {
                    "name": "zhangsan",
                    "nickname": "zhangsan"
                }
            }
        ]
    }
}

请求体内容为：

{
 "_source": {
  "excludes": ["name","nickname"]
 },  
 "query": {
  "terms": {
   "nickname": ["zhangsan"]
  }
 }
}

服务器响应结果为：

{
    "took": 2,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 1,
            "relation": "eq"
        },
        "max_score": 1,
        "hits": [
            {
                "_index": "student",
                "_id": "1001",
                "_score": 1,
                "_source": {
                    "sex": "男",
                    "age": 30
                }
            }
        ]
    }
}

八、组合查询

bool 把各种其它查询通过 must（必须）、must_not（必须不）、should（应该）的方式进行组合

请求体内容为：

{
 "query": {
  "bool": {
   "must": [
    {
     "match": {
      "name": "zhangsan"
     }
    }
   ],
   "must_not": [
    {
     "match": {
      "age": "40"
     }
    }
   ]
  }
 }
}

服务器响应结果为：

{
    "took": 9,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 1,
            "relation": "eq"
        },
        "max_score": 1.3862942,
        "hits": [
            {
                "_index": "student",
                "_id": "1001",
                "_score": 1.3862942,
                "_source": {
                    "name": "zhangsan",
                    "nickname": "zhangsan",
                    "sex": "男",
                    "age": 30
                }
            }
        ]
    }
}

九、范围查询

range 查询找出那些落在指定区间内的数字或者时间。

range 查询允许以下字符：

gt：大于 >
gte：大于等于 >=
lt：小于 <
lte：小于等于 <=

请求体内容为：

{
 "query": {
  "range": {
   "age": {
    "gte": 30,
    "lte": 35
   }
  }
 }
}

服务器响应结果为：

{
    "took": 7,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 2,
            "relation": "eq"
        },
        "max_score": 1,
        "hits": [
            {
                "_index": "student",
                "_id": "1001",
                "_score": 1,
                "_source": {
                    "name": "zhangsan",
                    "nickname": "zhangsan",
                    "sex": "男",
                    "age": 30
                }
            },
            {
                "_index": "student",
                "_id": "1005",
                "_score": 1,
                "_source": {
                    "name": "zhangsan2",
                    "nickname": "zhangsan2",
                    "sex": "女",
                    "age": 30
                }
            }
        ]
    }
}

十、模糊查询

返回包含与搜索字词相似的字词的文档。

编辑距离是将一个术语转换为另一个术语所需的一个字符更改的次数。

这些更改可以包括：

更改字符（box → fox）
删除字符（black → lack）
插入字符（sic → sick）
转置两个相邻字符（act → cat）

为了找到相似的术语，fuzzy 查询会在指定的编辑距离内创建一组搜索词的所有可能的变体或扩展。然后查询返回每个扩展的完全匹配。

通过 fuzziness 修改编辑距离。一般使用默认值 AUTO，根据术语的长度生成编辑距离。

请求体内容为：

{
 "query": {
  "fuzzy": {
   "title": {
    "value": "zhangsan"
   }
  }
 }
}

服务器响应结果为：

{
    "took": 2,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 0,
            "relation": "eq"
        },
        "max_score": null,
        "hits": []
    }
}

请求体内容为：

{
 "query": {
  "fuzzy": {
   "title": {
    "value": "zhangsan",
      "fuzziness": 2
   }
  }
 }
}

服务器响应结果为：

{
    "took": 1,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 0,
            "relation": "eq"
        },
        "max_score": null,
        "hits": []
    }
}

十一、单字段排序

sort 可以让我们按照不同的字段进行排序，并且通过 order 指定排序的方式。desc 降序，asc 升序。

请求体内容为：

{
 "query": {
  "match": {
    "name":"zhangsan"
  }
 },
 "sort": [{
  "age": {
    "order":"desc"
  }
 }]
}

服务器响应结果为：

{
    "took": 23,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 1,
            "relation": "eq"
        },
        "max_score": null,
        "hits": [
            {
                "_index": "student",
                "_id": "1001",
                "_score": null,
                "_source": {
                    "name": "zhangsan",
                    "nickname": "zhangsan",
                    "sex": "男",
                    "age": 30
                },
                "sort": [
                    30
                ]
            }
        ]
    }
}

十二、多字段排序

假定我们想要结合使用 age 和 _score 进行查询，并且匹配的结果首先按照年龄排序，然后按照相关性得分排序。

请求体内容为：

{
 "query": {
  "match_all": {}
 },
 "sort": [
  {
   "age": {
    "order": "desc"
   }
  },
  {
   "_score":{
    "order": "desc"
   }
  }
 ]
}

服务器响应结果为：

{
    "took": 3,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 5,
            "relation": "eq"
        },
        "max_score": null,
        "hits": [
            {
                "_index": "student",
                "_id": "1004",
                "_score": 1,
                "_source": {
                    "name": "zhangsan1",
                    "nickname": "zhangsan1",
                    "sex": "女",
                    "age": 50
                },
                "sort": [
                    50,
                    1
                ]
            },
            {
                "_index": "student",
                "_id": "1003",
                "_score": 1,
                "_source": {
                    "name": "wangwu",
                    "nickname": "wangwu",
                    "sex": "女",
                    "age": 40
                },
                "sort": [
                    40,
                    1
                ]
            },
            {
                "_index": "student",
                "_id": "1001",
                "_score": 1,
                "_source": {
                    "name": "zhangsan",
                    "nickname": "zhangsan",
                    "sex": "男",
                    "age": 30
                },
                "sort": [
                    30,
                    1
                ]
            },
            {
                "_index": "student",
                "_id": "1005",
                "_score": 1,
                "_source": {
                    "name": "zhangsan2",
                    "nickname": "zhangsan2",
                    "sex": "女",
                    "age": 30
                },
                "sort": [
                    30,
                    1
                ]
            },
            {
                "_index": "student",
                "_id": "1002",
                "_score": 1,
                "_source": {
                    "name": "lisi",
                    "nickname": "lisi",
                    "sex": "男",
                    "age": 20
                },
                "sort": [
                    20,
                    1
                ]
            }
        ]
    }
}

十三、高亮查询

在进行关键字搜索时，搜索出的内容中的关键字会显示不同的颜色，称之为高亮。

Elasticsearch 可以对查询内容中的关键字部分，进行标签和样式（高亮）的设置。

在使用 match 查询的同时，加上一个 highlight 属性：

pre_tags：前置标签
post_tags：后置标签
fields：需要高亮的字段
title：这里声明 title 字段需要高亮，后面可以为这个字段设置特有配置，也可以空

请求体内容为：

{
 "query": {
  "match": {
   "name": "zhangsan"
  }
 },
 "highlight": {
  "pre_tags": "",
  "post_tags": "",
  "fields": {
   "name": {}
  }
 }
}

服务器响应结果为：

{
    "took": 46,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 1,
            "relation": "eq"
        },
        "max_score": 1.3862942,
        "hits": [
            {
                "_index": "student",
                "_id": "1001",
                "_score": 1.3862942,
                "_source": {
                    "name": "zhangsan",
                    "nickname": "zhangsan",
                    "sex": "男",
                    "age": 30
                },
                "highlight": {
                    "name": [
                        "zhangsan"
                    ]
                }
            }
        ]
    }
}

十四、分页查询

from：当前页的起始索引，默认从 0 开始。 from = (pageNum - 1) size

size：每页显示多少条

请求体内容为：

{
 "query": {
  "match_all": {}
 },
 "sort": [
  {
   "age": {
    "order": "desc"
   }
  }
 ],
 "from": 0,
 "size": 2
}

服务器响应结果为：

{
    "took": 2,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 5,
            "relation": "eq"
        },
        "max_score": null,
        "hits": [
            {
                "_index": "student",
                "_id": "1004",
                "_score": null,
                "_source": {
                    "name": "zhangsan1",
                    "nickname": "zhangsan1",
                    "sex": "女",
                    "age": 50
                },
                "sort": [
                    50
                ]
            },
            {
                "_index": "student",
                "_id": "1003",
                "_score": null,
                "_source": {
                    "name": "wangwu",
                    "nickname": "wangwu",
                    "sex": "女",
                    "age": 40
                },
                "sort": [
                    40
                ]
            }
        ]
    }
}

十五、聚合查询

聚合允许使用者对 ES 文档进行统计分析，类似与关系型数据库中的 group by，当然还有很多其他的聚合，例如取最大值、平均值等等。

①、对某个字段取最大值 max

请求体内容为：

{
  "aggs":{
   "max_age":{
    "max":{"field":"age"}
   }
  },
  "size":0
}

服务器响应结果为：

{
    "took": 33,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 5,
            "relation": "eq"
        },
        "max_score": null,
        "hits": []
    },
    "aggregations": {
        "max_age": {
            "value": 50
        }
    }
}

②、对某个字段取最小值 min

请求体内容为：

{
  "aggs":{
   "min_age":{
    "min":{"field":"age"}
   }
  },
  "size":0
}

服务器响应结果为：

{
    "took": 2,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 5,
            "relation": "eq"
        },
        "max_score": null,
        "hits": []
    },
    "aggregations": {
        "min_age": {
            "value": 20
        }
    }
}

③、对某个字段求和 sum

请求体内容为：

{
  "aggs":{
   "sum_age":{
    "sum":{"field":"age"}
   }
  },
  "size":0
}

服务器响应结果为：

{
    "took": 3,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 5,
            "relation": "eq"
        },
        "max_score": null,
        "hits": []
    },
    "aggregations": {
        "sum_age": {
            "value": 170
        }
    }
}

④、对某个字段取平均值 avg

请求体内容为：

{
  "aggs":{
   "avg_age":{
    "avg":{"field":"age"}
   }
  },
  "size":0
}

服务器响应结果为：

{
    "took": 1,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 5,
            "relation": "eq"
        },
        "max_score": null,
        "hits": []
    },
    "aggregations": {
        "avg_age": {
            "value": 34
        }
    }
}

⑤、对某个字段的值进行去重之后再取总数

请求体内容为：

{
  "aggs":{
   "distinct_age":{
    "cardinality":{"field":"age"}
   }
  },
  "size":0
}

服务器响应结果为：

{
    "took": 16,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 5,
            "relation": "eq"
        },
        "max_score": null,
        "hits": []
    },
    "aggregations": {
        "distinct_age": {
            "value": 4
        }
    }
}

⑥、State 聚合：对某个字段一次性返回 count、max、min、avg 和 sum 五个指标。

请求体内容为：

{
  "aggs":{
   "stats_age":{
    "stats":{"field":"age"}
   }
  },
  "size":0
}

服务器响应结果为：

{
    "took": 2,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 5,
            "relation": "eq"
        },
        "max_score": null,
        "hits": []
    },
    "aggregations": {
        "stats_age": {
            "count": 5,
            "min": 20,
            "max": 50,
            "avg": 34,
            "sum": 170
        }
    }
}

十六、桶聚合查询

桶聚和相当于 SQL 中的 group by 语句。

①、terms 聚合，分组统计

请求体内容为：

{
  "aggs":{
   "age_groupby":{
    "terms":{"field":"age"}
   }
  },
  "size":0
}

服务器响应结果为：

{
    "took": 17,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 5,
            "relation": "eq"
        },
        "max_score": null,
        "hits": []
    },
    "aggregations": {
        "age_groupby": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
                {
                    "key": 30,
                    "doc_count": 2
                },
                {
                    "key": 20,
                    "doc_count": 1
                },
                {
                    "key": 40,
                    "doc_count": 1
                },
                {
                    "key": 50,
                    "doc_count": 1
                }
            ]
        }
    }
}

②、在 terms 分组下再进行聚合

请求体内容为：

{
  "aggs":{
   "age_groupby":{
    "terms":{"field":"age"}
   }
  },
  "size":0
}

服务器响应结果为：

{
    "took": 15,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 5,
            "relation": "eq"
        },
        "max_score": null,
        "hits": []
    },
    "aggregations": {
        "age_groupby": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
                {
                    "key": 30,
                    "doc_count": 2
                },
                {
                    "key": 20,
                    "doc_count": 1
                },
                {
                    "key": 40,
                    "doc_count": 1
                },
                {
                    "key": 50,
                    "doc_count": 1
                }
            ]
        }
    }
}

elasticesearch 第3.4章 HTTP操作-高级查询

一、查询所有文档

二、匹配查询

三、字段匹配查询

四、关键字精确查询

五、多关键字精确查询

六、指定查询字段

七、过滤字段

八、组合查询

九、范围查询

十、模糊查询

十一、单字段排序

十二、多字段排序

十三、高亮查询

十四、分页查询

十五、聚合查询

十六、桶聚合查询

elasticesearch文章

带到手机上看