From a9d36e4d27d038f7736b4a9a8f3bb644ef1819e0 Mon Sep 17 00:00:00 2001 From: Jan Semmelink <jan@uafrica.com> Date: Thu, 4 Nov 2021 13:47:20 +0200 Subject: [PATCH] Update search to support flexible queries --- search/opensearch_types.go | 125 +++++++++++++++++++++++++++++++++++++ search/search_test.go | 9 ++- search/time_series.go | 104 ++---------------------------- 3 files changed, 139 insertions(+), 99 deletions(-) diff --git a/search/opensearch_types.go b/search/opensearch_types.go index dba91cd..b46a9ff 100644 --- a/search/opensearch_types.go +++ b/search/opensearch_types.go @@ -1,5 +1,7 @@ package search +import "time" + //Mapping configures an index in OpenSearch type Index struct { Settings Settings `json:"settings"` @@ -34,3 +36,126 @@ type MappingKeyword struct { Type string `json:"type"` //="keyword" IgnoreAbove int `json:"ignore_above"` //e.g. 256 } + +type SearchRequestBody struct { + Size int64 `json:"size,omitempty"` + Query Query `json:"query"` +} + +type Query struct { + //one of: + Match *QueryNameValue `json:"match,omitempty" doc:"<field>:<value>"` + Term *QueryNameValue `json:"term,omitempty"` + Range *QueryRange `json:"range,omitempty"` + MultiMatch *QueryMultiMatch `json:"multi_match,omitempty"` + Bool *QueryBool `json:"bool,omitempty"` +} + +type QueryMultiMatch struct { + Query string `json:"query" doc:"Text search in below fields"` + Fields []string `json:"fields" doc:"List of fields"` +} + +//https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-bool-query.html +type QueryBool struct { + Must []Query `json:"must,omitempty" docs:"List of things that must appear in matching documents and will contribute to the score."` + Filter []Query `json:"filter,omitempty" doc:"List of things that must appear in matching documents. However unlike must the score of the query will be ignored. Filter clauses are executed in filter context, meaning that scoring is ignored and clauses are considered for caching."` + Should []Query `json:"should,omitempty" doc:"List of things that should appear in the matching document."` + MustNot []Query `json:"must_not,omitempty" doc:"List of things that must not appear in the matching documents. Clauses are executed in filter context meaning that scoring is ignored and clauses are considered for caching. Because scoring is ignored, a score of 0 for all documents is returned."` +} + +//<name>:<value> can be shorthanded to just a text value "...", but for sake of go type def, we always use an object meaning the same, allowing more options +//https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-query.html#query-dsl-match-query-short-ex +type QueryNameValue map[string]QueryValue + +type QueryValue struct { + Query string `json:"query"` + Operator string `json:"operator,omitempty"` //defaults to "or", accepted values: or|and + Fuzziness string `json:"fuzziness,omitempty"` //https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#fuzziness + ZeroTermsQuery string `json:"zero_terms_query,omitempty"` +} + +func QueryValueText(text string) QueryValue { + return QueryValue{Query: text, Operator: "and"} +} + +func QueryValueTime(t time.Time) QueryValue { + return QueryValue{Query: t.String(), Operator: "and"} +} + +type QueryRange map[string]QueryExpr + +type QueryExpr map[string]QueryValue //<oper>:<value> e.g. "gte":"10" + +//example of search response body: +// { +// "took":872, +// "timed_out":false, +// "_shards":{ +// "total":38, +// "successful":38, +// "skipped":0, +// "failed":0 +// }, +// "hits":{ +// "total":{ +// "value":0, +// "relation":"eq" +// }, +// "max_score":null, +// "hits":[ +// { +// "_index": "go-utils-audit-test-20211030", +// "_type": "_doc", +// "_id": "Tj9l5XwBWRiAneoYazic", +// "_score": 1.2039728, +// "_source": { +// "@timestamp": "2021-10-30T15:03:20.679481+02:00", +// "@end_time": "2021-10-30T15:03:20.469481+02:00", +// "@duration_ms": -210, +// "test1": "6", +// "test2": "ACC_00098", +// "test3": 10, +// "http": { +// "method": "GET", +// "path": "/accounts" +// }, +// "http_method": "GET", +// "http_path": "/accounts" +// } +// }, +// ] +// } +// } +type SearchResponseBody struct { + Took int `json:"took"` //milliseconds + TimedOut bool `json:"timed_out"` + Shards SearchResponseShards `json:"_shards"` + Hits SearchResponseHits `json:"hits"` +} + +type SearchResponseShards struct { + Total int `json:"total"` + Successful int `json:"successful"` + Skipped int `json:"skipped"` + Failed int `json:"failed"` +} + +type SearchResponseHits struct { + Total SearchResponseHitsTotal `json:"total"` + MaxScore *float64 `json:"max_score,omitempty"` + Hits []HitDoc `json:"hits"` +} + +type SearchResponseHitsTotal struct { + Value int `json:"value"` //e.g. 0 when no docs matched + Relation string `json:"relation"` //e.g. "eq" +} + +type HitDoc struct { + Index string `json:"_index"` //name of index + Type string `json:"_type"` //_doc + ID string `json:"_id"` + Score float64 `json:"_score"` // + Source map[string]interface{} `json:"_source"` //the document of itemType +} diff --git a/search/search_test.go b/search/search_test.go index 068cb1a..0deef6b 100644 --- a/search/search_test.go +++ b/search/search_test.go @@ -67,7 +67,14 @@ func test(t *testing.T, c search.Config) { } } - docs, totalCount, err := ts.Search(10) + query := search.Query{ + MultiMatch: &search.QueryMultiMatch{ + Query: "GET", + Fields: []string{"http_method"}, + }, + } + + docs, totalCount, err := ts.Search(query, 10) if err != nil { t.Errorf("failed to search: %+v", err) } else { diff --git a/search/time_series.go b/search/time_series.go index 9201356..46cc049 100644 --- a/search/time_series.go +++ b/search/time_series.go @@ -24,7 +24,7 @@ type TimeSeriesHeader struct { type TimeSeries interface { Write(StartTime time.Time, EndTime time.Time, data interface{}) error - Search(limit int) (docs interface{}, totalCount int, err error) + Search(query Query, limit int64) (docs interface{}, totalCount int, err error) } type timeSeries struct { @@ -305,9 +305,9 @@ type IndexSettings struct { //Search //Return: // docs will be a slice of the TimeSeries data type -func (ts *timeSeries) Search(limit int) (docs interface{}, totalCount int, err error) { - if limit > 1000 { - err = errors.Errorf("limit=%d > 1000", limit) +func (ts *timeSeries) Search(query Query, limit int64) (docs interface{}, totalCount int, err error) { + if limit < 0 || limit > 1000 { + err = errors.Errorf("limit=%d not 0..1000", limit) return } @@ -322,13 +322,8 @@ func (ts *timeSeries) Search(limit int) (docs interface{}, totalCount int, err e // } // } body := SearchRequestBody{ - Size: limit, - Query: &SearchQuery{ - MultiMatch: &QueryMultiMatch{ - Query: "GET", //keyword to find - Fields: []string{"http_method"}, //, "title^2", "directory"}, - }, - }, + Size: limit, + Query: query, } jsonBody, _ := json.Marshal(body) search := opensearchapi.SearchRequest{ @@ -370,90 +365,3 @@ func (ts *timeSeries) Search(limit int) (docs interface{}, totalCount int, err e } return items.Interface(), hitsTotalValue.Interface().(int), nil } - -type SearchRequestBody struct { - Size int `json:"size,omitempty"` - Query *SearchQuery `json:"query,omitempty"` -} - -type SearchQuery struct { - MultiMatch *QueryMultiMatch `json:"multi_match,omitempty"` -} - -type QueryMultiMatch struct { - Query string `json:"query"` - Fields []string `json:"fields"` -} - -//example of search response body: -// { -// "took":872, -// "timed_out":false, -// "_shards":{ -// "total":38, -// "successful":38, -// "skipped":0, -// "failed":0 -// }, -// "hits":{ -// "total":{ -// "value":0, -// "relation":"eq" -// }, -// "max_score":null, -// "hits":[ -// { -// "_index": "go-utils-audit-test-20211030", -// "_type": "_doc", -// "_id": "Tj9l5XwBWRiAneoYazic", -// "_score": 1.2039728, -// "_source": { -// "@timestamp": "2021-10-30T15:03:20.679481+02:00", -// "@end_time": "2021-10-30T15:03:20.469481+02:00", -// "@duration_ms": -210, -// "test1": "6", -// "test2": "ACC_00098", -// "test3": 10, -// "http": { -// "method": "GET", -// "path": "/accounts" -// }, -// "http_method": "GET", -// "http_path": "/accounts" -// } -// }, -// ] -// } -// } -type SearchResponseBody struct { - Took int `json:"took"` //milliseconds - TimedOut bool `json:"timed_out"` - Shards SearchResponseShards `json:"_shards"` - Hits SearchResponseHits `json:"hits"` -} - -type SearchResponseShards struct { - Total int `json:"total"` - Successful int `json:"successful"` - Skipped int `json:"skipped"` - Failed int `json:"failed"` -} - -type SearchResponseHits struct { - Total SearchResponseHitsTotal `json:"total"` - MaxScore *float64 `json:"max_score,omitempty"` - Hits []HitDoc `json:"hits"` -} - -type SearchResponseHitsTotal struct { - Value int `json:"value"` //e.g. 0 when no docs matched - Relation string `json:"relation"` //e.g. "eq" -} - -type HitDoc struct { - Index string `json:"_index"` //name of index - Type string `json:"_type"` //_doc - ID string `json:"_id"` - Score float64 `json:"_score"` // - Source map[string]interface{} `json:"_source"` //the document of itemType -} -- GitLab