Multi index query of elasticsearch

Posted by brodwilkinson on Mon, 03 Jan 2022 14:09:11 +0100

1, The origin of the problem

In the query of elastic search, we usually set the index to search directly through the URL; If we need to query more indexes and have no rules, we will face an embarrassing situation, exceeding the length limit of the URL;

2, Test environment

elasticsearch 6.8.12

test data

Add three test indexes, one document in each index;

PUT test1/_doc/1
{
  "id":1,
  "name":"test1-1"
}


# {
#   "_index" : "test1",
#   "_type" : "_doc",
#   "_id" : "1",
#   "_version" : 1,
#   "result" : "created",
#   "_shards" : {
#     "total" : 2,
#     "successful" : 1,
#     "failed" : 0
#   },
#   "_seq_no" : 0,
#   "_primary_term" : 1
# }

PUT test2/_doc/1
{
  "id":1,
  "name":"test2-1"
}


# {
#   "_index" : "test2",
#   "_type" : "_doc",
#   "_id" : "1",
#   "_version" : 1,
#   "result" : "created",
#   "_shards" : {
#     "total" : 2,
#     "successful" : 1,
#     "failed" : 0
#   },
#   "_seq_no" : 0,
#   "_primary_term" : 1
# }

PUT test3/_doc/1
{
  "id":1,
  "name":"test3-1"
}

# {
#   "_index" : "test3",
#   "_type" : "_doc",
#   "_id" : "1",
#   "_version" : 1,
#   "result" : "created",
#   "_shards" : {
#     "total" : 2,
#     "successful" : 1,
#     "failed" : 0
#   },
#   "_seq_no" : 0,
#   "_primary_term" : 1
# }

3, Specify multi index in URL

Specify a search specific index directly in the URL

POST test1/_search 
{
    "query": {
        "match_all": {}
    }
}


# {
#   "took" : 0,
#   "timed_out" : false,
#   "_shards" : {
#     "total" : 5,
#     "successful" : 5,
#     "skipped" : 0,
#     "failed" : 0
#   },
#   "hits" : {
#     "total" : 1,
#     "max_score" : 1.0,
#     "hits" : [
#       {
#         "_index" : "test1",
#         "_type" : "_doc",
#         "_id" : "1",
#         "_score" : 1.0,
#         "_source" : {
#           "id" : 1,
#           "name" : "test1-1"
#         }
#       }
#     ]
#   }
# }

Multiple index es can be searched simultaneously through all good segmentation;

POST test1,test2/_search
{
    "query": {
        "match_all": {}
    }
}

# {
#   "took" : 1,
#   "timed_out" : false,
#   "_shards" : {
#     "total" : 10,
#     "successful" : 10,
#     "skipped" : 0,
#     "failed" : 0
#   },
#   "hits" : {
#     "total" : 2,
#     "max_score" : 1.0,
#     "hits" : [
#       {
#         "_index" : "test1",
#         "_type" : "_doc",
#         "_id" : "1",
#         "_score" : 1.0,
#         "_source" : {
#           "id" : 1,
#           "name" : "test1-1"
#         }
#       },
#       {
#         "_index" : "test2",
#         "_type" : "_doc",
#         "_id" : "1",
#         "_score" : 1.0,
#         "_source" : {
#           "id" : 1,
#           "name" : "test2-1"
#         }
#       }
#     ]
#   }
# }

We can use keywords_ All specifies to search all index es;

POST _all/_search 
{
    "query": {
        "match_all": {}
    }
}

{
#   "took" : 0,
#   "timed_out" : false,
#   "_shards" : {
#     "total" : 15,
#     "successful" : 15,
#     "skipped" : 0,
#     "failed" : 0
#   },
#   "hits" : {
#     "total" : 3,
#     "max_score" : 1.0,
#     "hits" : [
#       {
#         "_index" : "test1",
#         "_type" : "_doc",
#         "_id" : "1",
#         "_score" : 1.0,
#         "_source" : {
#           "id" : 1,
#           "name" : "test1-1"
#         }
#       },
#       {
#         "_index" : "test2",
#         "_type" : "_doc",
#         "_id" : "1",
#         "_score" : 1.0,
#         "_source" : {
#           "id" : 1,
#           "name" : "test2-1"
#         }
#       },
#       {
#         "_index" : "test3",
#         "_type" : "_doc",
#         "_id" : "1",
#         "_score" : 1.0,
#         "_source" : {
#           "id" : 1,
#           "name" : "test3-1"
#         }
#       }
#     ]
#   }
# }

You can also use the wildcard * to match some index es whose names have common characteristics;

POST test*/_search
{
    "query": {
        "match_all": {}
    }
}

# {
#   "took" : 1,
#   "timed_out" : false,
#   "_shards" : {
#     "total" : 15,
#     "successful" : 15,
#     "skipped" : 0,
#     "failed" : 0
#   },
#   "hits" : {
#     "total" : 3,
#     "max_score" : 1.0,
#     "hits" : [
#       {
#         "_index" : "test1",
#         "_type" : "_doc",
#         "_id" : "1",
#         "_score" : 1.0,
#         "_source" : {
#           "id" : 1,
#           "name" : "test1-1"
#         }
#       },
#       {
#         "_index" : "test2",
#         "_type" : "_doc",
#         "_id" : "1",
#         "_score" : 1.0,
#         "_source" : {
#           "id" : 1,
#           "name" : "test2-1"
#         }
#       },
#       {
#         "_index" : "test3",
#         "_type" : "_doc",
#         "_id" : "1",
#         "_score" : 1.0,
#         "_source" : {
#           "id" : 1,
#           "name" : "test3-1"
#         }
#       }
#     ]
#   }
# }

You can also use - to exclude an index;

POST test*,-test2/_search
{
    "query": {
        "match_all": {}
    }
}

# {
#   "took" : 0,
#   "timed_out" : false,
#   "_shards" : {
#     "total" : 10,
#     "successful" : 10,
#     "skipped" : 0,
#     "failed" : 0
#   },
#   "hits" : {
#     "total" : 2,
#     "max_score" : 1.0,
#     "hits" : [
#       {
#         "_index" : "test1",
#         "_type" : "_doc",
#         "_id" : "1",
#         "_score" : 1.0,
#         "_source" : {
#           "id" : 1,
#           "name" : "test1-1"
#         }
#       },
#       {
#         "_index" : "test3",
#         "_type" : "_doc",
#         "_id" : "1",
#         "_score" : 1.0,
#         "_source" : {
#           "id" : 1,
#           "name" : "test3-1"
#         }
#       }
#     ]
#   }
# }

4, Some control options of multi index in URL

If we display search for a nonexistent or closed index, an error will be reported;

POST test4/_search
{
    "query": {
        "match_all": {}
    }
}


# {
#   "error" : {
#     "root_cause" : [
#       {
#         "type" : "index_not_found_exception",
#         "reason" : "no such index",
#         "resource.type" : "index_or_alias",
#         "resource.id" : "test4",
#         "index_uuid" : "_na_",
#         "index" : "test4"
#       }
#     ],
#     "type" : "index_not_found_exception",
#     "reason" : "no such index",
#     "resource.type" : "index_or_alias",
#     "resource.id" : "test4",
#     "index_uuid" : "_na_",
#     "index" : "test4"
#   },
#   "status" : 404
# }

POST test3/_close
# 
# {
#   "acknowledged" : true
# }

POST test3/_search
{
    "query": {
        "match_all": {}
    }
}


# {
#   "error": {
#     "root_cause": [
#       {
#         "type": "index_closed_exception",
#         "reason": "closed",
#         "index_uuid": "KI7Iv4eGRIOk6MsycXokNQ",
#         "index": "test3"
#       }
#     ],
#     "type": "index_closed_exception",
#     "reason": "closed",
#     "index_uuid": "KI7Iv4eGRIOk6MsycXokNQ",
#     "index": "test3"
#   },
#   "status": 400
# }

We can use ignore_unavailable to ignore nonexistent or closed index es;

POST test4/_search?ignore_unavailable=true
{
    "query": {
        "match_all": {}
    }
}

# {
#   "took" : 0,
#   "timed_out" : false,
#   "_shards" : {
#     "total" : 0,
#     "successful" : 0,
#     "skipped" : 0,
#     "failed" : 0
#   },
#   "hits" : {
#     "total" : 0,
#     "max_score" : 0.0,
#     "hits" : [ ]
#   }
# }


POST test3/_search?ignore_unavailable=true
{
    "query": {
        "match_all": {}
    }
}


# {
#   "took" : 0,
#   "timed_out" : false,
#   "_shards" : {
#     "total" : 0,
#     "successful" : 0,
#     "skipped" : 0,
#     "failed" : 0
#   },
#   "hits" : {
#     "total" : 0,
#     "max_score" : 0.0,
#     "hits" : [ ]
#   }
# }

If you use wildcards_ all implicitly specifies the index of search. If it does not exist, no error will be reported by default, but you can use allow_ no_ Indexes = false to make elasticsearch report an error;

POST noexist*/_search
{
    "query": {
        "match_all": {}
    }
}

# {
#   "took" : 0,
#   "timed_out" : false,
#   "_shards" : {
#     "total" : 0,
#     "successful" : 0,
#     "skipped" : 0,
#     "failed" : 0
#   },
#   "hits" : {
#     "total" : 0,
#     "max_score" : 0.0,
#     "hits" : [ ]
#   }
# }


POST noexist*/_search?allow_no_indices=false
{
    "query": {
        "match_all": {}
    }
}

# {
#   "error" : {
#     "root_cause" : [
#       {
#         "type" : "index_not_found_exception",
#         "reason" : "no such index",
#         "resource.type" : "index_or_alias",
#         "resource.id" : "noexist*",
#         "index_uuid" : "_na_",
#         "index" : "noexist*"
#       }
#     ],
#     "type" : "index_not_found_exception",
#     "reason" : "no such index",
#     "resource.type" : "index_or_alias",
#     "resource.id" : "noexist*",
#     "index_uuid" : "_na_",
#     "index" : "noexist*"
#   },
#   "status" : 404
# }



POST test3*/_search
{
    "query": {
        "match_all": {}
    }
}

# {
#   "took" : 0,
#   "timed_out" : false,
#   "_shards" : {
#     "total" : 0,
#     "successful" : 0,
#     "skipped" : 0,
#     "failed" : 0
#   },
#   "hits" : {
#     "total" : 0,
#     "max_score" : 0.0,
#     "hits" : [ ]
#   }
# }

POST test3*/_search?allow_no_indices=false
{
    "query": {
        "match_all": {}
    }
}

# {
#   "error" : {
#     "root_cause" : [
#       {
#         "type" : "index_not_found_exception",
#         "reason" : "no such index",
#         "resource.type" : "index_or_alias",
#         "resource.id" : "test3*"
#       }
#     ],
#     "type" : "index_not_found_exception",
#     "reason" : "no such index",
#     "resource.type" : "index_or_alias",
#     "resource.id" : "test3*"
#   },
#   "status" : 404
# }


We can also use expand_wildcards to control which index es to expand. The optional values are open, closed, none and all;

Only open is extended by default;

POST test*/_search
{
    "query": {
        "match_all": {}
    }
}

# {
#   "took" : 0,
#   "timed_out" : false,
#   "_shards" : {
#     "total" : 10,
#     "successful" : 10,
#     "skipped" : 0,
#     "failed" : 0
#   },
#   "hits" : {
#     "total" : 2,
#     "max_score" : 1.0,
#     "hits" : [
#       {
#         "_index" : "test1",
#         "_type" : "_doc",
#         "_id" : "1",
#         "_score" : 1.0,
#         "_source" : {
#           "id" : 1,
#           "name" : "test1-1"
#         }
#       },
#       {
#         "_index" : "test2",
#         "_type" : "_doc",
#         "_id" : "1",
#         "_score" : 1.0,
#         "_source" : {
#           "id" : 1,
#           "name" : "test2-1"
#         }
#       }
#     ]
#   }
# }


POST test*/_search?expand_wildcards=all
{
    "query": {
        "match_all": {}
    }
}

# {
#   "error": {
#     "root_cause": [
#       {
#         "type": "index_closed_exception",
#         "reason": "closed",
#         "index_uuid": "KI7Iv4eGRIOk6MsycXokNQ",
#         "index": "test3"
#       }
#     ],
#     "type": "index_closed_exception",
#     "reason": "closed",
#     "index_uuid": "KI7Iv4eGRIOk6MsycXokNQ",
#     "index": "test3"
#   },
#   "status": 400
# }

POST test*/_search?expand_wildcards=all&ignore_unavailable=true
{
    "query": {
        "match_all": {}
    }
}

# {
#   "took" : 0,
#   "timed_out" : false,
#   "_shards" : {
#     "total" : 10,
#     "successful" : 10,
#     "skipped" : 0,
#     "failed" : 0
#   },
#   "hits" : {
#     "total" : 2,
#     "max_score" : 1.0,
#     "hits" : [
#       {
#         "_index" : "test1",
#         "_type" : "_doc",
#         "_id" : "1",
#         "_score" : 1.0,
#         "_source" : {
#           "id" : 1,
#           "name" : "test1-1"
#         }
#       },
#       {
#         "_index" : "test2",
#         "_type" : "_doc",
#         "_id" : "1",
#         "_score" : 1.0,
#         "_source" : {
#           "id" : 1,
#           "name" : "test2-1"
#         }
#       }
#     ]
#   }
# }

5, Encapsulating physical indexes with index aliases

Aliases is the alias of the physical index. When requesting an api, elasticsearch will automatically convert aliases into the corresponding physical index name;

Aliases can be mapped to either a specific index or multiple indexes;

Alias can also apply filter conditions at the same time to search only the local data of index;

POST /_aliases
{
    "actions" : [
        { "add" : { "index" : "test*", "alias" : "all_test_indices" } }
    ]
}

# {
#   "acknowledged" : true
# }

POST all_test_indices/_search
{
    "query": {
        "match_all": {}
    }
}

# {
#   "took" : 0,
#   "timed_out" : false,
#   "_shards" : {
#     "total" : 10,
#     "successful" : 10,
#     "skipped" : 0,
#     "failed" : 0
#   },
#   "hits" : {
#     "total" : 2,
#     "max_score" : 1.0,
#     "hits" : [
#       {
#         "_index" : "test1",
#         "_type" : "_doc",
#         "_id" : "1",
#         "_score" : 1.0,
#         "_source" : {
#           "id" : 1,
#           "name" : "test1-1"
#         }
#       },
#       {
#         "_index" : "test2",
#         "_type" : "_doc",
#         "_id" : "1",
#         "_score" : 1.0,
#         "_source" : {
#           "id" : 1,
#           "name" : "test2-1"
#         }
#       }
#     ]
#   }
# }

6, multi search – specify the index through the body

The main purpose of the Multi Search API is to realize multiple search requests in one API. It specifies the index through the header and the query statement through the body in the following format;

header\n
body\n
header\n
body\n

In addition to the same ability to specify index name as the first two, the Multi Search API has the biggest advantage of passing index name through the body, which easily breaks through the limitation of URL length limit;

Another point is that the Multi Search API supports a large number of index names without specific rules, such as index names related to time series;

GET _msearch
{"index":"test*"}
{"query" : {"match_all" : {}}}

# {
#   "responses" : [
#     {
#       "took" : 0,
#       "timed_out" : false,
#       "_shards" : {
#         "total" : 10,
#         "successful" : 10,
#         "skipped" : 0,
#         "failed" : 0
#       },
#       "hits" : {
#         "total" : 2,
#         "max_score" : 1.0,
#         "hits" : [
#           {
#             "_index" : "test1",
#             "_type" : "_doc",
#             "_id" : "1",
#             "_score" : 1.0,
#             "_source" : {
#               "id" : 1,
#               "name" : "test1-1"
#             }
#           },
#           {
#             "_index" : "test2",
#             "_type" : "_doc",
#             "_id" : "1",
#             "_score" : 1.0,
#             "_source" : {
#               "id" : 1,
#               "name" : "test2-1"
#             }
#           }
#         ]
#       },
#       "status" : 200
#     }
#   ]
# }

Topics: Big Data ElasticSearch search engine