summaryrefslogtreecommitdiff
path: root/src/include
diff options
context:
space:
mode:
authorAlexander Korotkov <akorotkov@postgresql.org>2019-03-16 12:15:37 +0300
committerAlexander Korotkov <akorotkov@postgresql.org>2019-03-16 12:16:48 +0300
commit72b6460336e86ad5cafd3426af6013c7d8457367 (patch)
tree8c590577fcfac501f24d7a282b529bea9d45c6d9 /src/include
parent893d6f8a1f9b43da805124e93cbf0f7aea890ad4 (diff)
Partial implementation of SQL/JSON path language
SQL 2016 standards among other things contains set of SQL/JSON features for JSON processing inside of relational database. The core of SQL/JSON is JSON path language, allowing access parts of JSON documents and make computations over them. This commit implements partial support JSON path language as separate datatype called "jsonpath". The implementation is partial because it's lacking datetime support and suppression of numeric errors. Missing features will be added later by separate commits. Support of SQL/JSON features requires implementation of separate nodes, and it will be considered in subsequent patches. This commit includes following set of plain functions, allowing to execute jsonpath over jsonb values: * jsonb_path_exists(jsonb, jsonpath[, jsonb, bool]), * jsonb_path_match(jsonb, jsonpath[, jsonb, bool]), * jsonb_path_query(jsonb, jsonpath[, jsonb, bool]), * jsonb_path_query_array(jsonb, jsonpath[, jsonb, bool]). * jsonb_path_query_first(jsonb, jsonpath[, jsonb, bool]). This commit also implements "jsonb @? jsonpath" and "jsonb @@ jsonpath", which are wrappers over jsonpath_exists(jsonb, jsonpath) and jsonpath_predicate(jsonb, jsonpath) correspondingly. These operators will have an index support (implemented in subsequent patches). Catversion bumped, to add new functions and operators. Code was written by Nikita Glukhov and Teodor Sigaev, revised by me. Documentation was written by Oleg Bartunov and Liudmila Mantrova. The work was inspired by Oleg Bartunov. Discussion: https://postgr.es/m/fcc6fc6a-b497-f39a-923d-aa34d0c588e8%402ndQuadrant.com Author: Nikita Glukhov, Teodor Sigaev, Alexander Korotkov, Oleg Bartunov, Liudmila Mantrova Reviewed-by: Tomas Vondra, Andrew Dunstan, Pavel Stehule, Alexander Korotkov
Diffstat (limited to 'src/include')
-rw-r--r--src/include/catalog/catversion.h2
-rw-r--r--src/include/catalog/pg_operator.dat8
-rw-r--r--src/include/catalog/pg_proc.dat39
-rw-r--r--src/include/catalog/pg_type.dat5
-rw-r--r--src/include/regex/regex.h8
-rw-r--r--src/include/utils/.gitignore1
-rw-r--r--src/include/utils/jsonb.h4
-rw-r--r--src/include/utils/jsonpath.h245
-rw-r--r--src/include/utils/jsonpath_scanner.h32
9 files changed, 343 insertions, 1 deletions
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 8132e28c307..f896653f4d4 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 201903121
+#define CATALOG_VERSION_NO 201903161
#endif
diff --git a/src/include/catalog/pg_operator.dat b/src/include/catalog/pg_operator.dat
index 06aec0780b9..ad8c5bb30e9 100644
--- a/src/include/catalog/pg_operator.dat
+++ b/src/include/catalog/pg_operator.dat
@@ -3255,5 +3255,13 @@
{ oid => '3287', descr => 'delete path',
oprname => '#-', oprleft => 'jsonb', oprright => '_text',
oprresult => 'jsonb', oprcode => 'jsonb_delete_path' },
+{ oid => '4012', descr => 'jsonpath exists',
+ oprname => '@?', oprleft => 'jsonb', oprright => 'jsonpath',
+ oprresult => 'bool', oprcode => 'jsonb_path_exists(jsonb,jsonpath)',
+ oprrest => 'contsel', oprjoin => 'contjoinsel' },
+{ oid => '4013', descr => 'jsonpath match',
+ oprname => '@@', oprleft => 'jsonb', oprright => 'jsonpath',
+ oprresult => 'bool', oprcode => 'jsonb_path_match(jsonb,jsonpath)',
+ oprrest => 'contsel', oprjoin => 'contjoinsel' },
]
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index c4b012cf4c1..84120de3620 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -9216,6 +9216,45 @@
proname => 'jsonb_insert', prorettype => 'jsonb',
proargtypes => 'jsonb _text jsonb bool', prosrc => 'jsonb_insert' },
+# jsonpath
+{ oid => '4001', descr => 'I/O',
+ proname => 'jsonpath_in', prorettype => 'jsonpath', proargtypes => 'cstring',
+ prosrc => 'jsonpath_in' },
+{ oid => '4002', descr => 'I/O',
+ proname => 'jsonpath_recv', prorettype => 'jsonpath', proargtypes => 'internal',
+ prosrc => 'jsonpath_recv' },
+{ oid => '4003', descr => 'I/O',
+ proname => 'jsonpath_out', prorettype => 'cstring', proargtypes => 'jsonpath',
+ prosrc => 'jsonpath_out' },
+{ oid => '4004', descr => 'I/O',
+ proname => 'jsonpath_send', prorettype => 'bytea', proargtypes => 'jsonpath',
+ prosrc => 'jsonpath_send' },
+
+{ oid => '4005', descr => 'jsonpath exists test',
+ proname => 'jsonb_path_exists', prorettype => 'bool',
+ proargtypes => 'jsonb jsonpath jsonb bool', prosrc => 'jsonb_path_exists' },
+{ oid => '4006', descr => 'jsonpath query',
+ proname => 'jsonb_path_query', prorows => '1000', proretset => 't',
+ prorettype => 'jsonb', proargtypes => 'jsonb jsonpath jsonb bool',
+ prosrc => 'jsonb_path_query' },
+{ oid => '4007', descr => 'jsonpath query wrapped into array',
+ proname => 'jsonb_path_query_array', prorettype => 'jsonb',
+ proargtypes => 'jsonb jsonpath jsonb bool',
+ prosrc => 'jsonb_path_query_array' },
+{ oid => '4008', descr => 'jsonpath query first item',
+ proname => 'jsonb_path_query_first', prorettype => 'jsonb',
+ proargtypes => 'jsonb jsonpath jsonb bool', prosrc => 'jsonb_path_query_first' },
+{ oid => '4009', descr => 'jsonpath match', proname => 'jsonb_path_match',
+ prorettype => 'bool', proargtypes => 'jsonb jsonpath jsonb bool',
+ prosrc => 'jsonb_path_match' },
+
+{ oid => '4010', descr => 'implementation of @? operator',
+ proname => 'jsonb_path_exists', prorettype => 'bool',
+ proargtypes => 'jsonb jsonpath', prosrc => 'jsonb_path_exists_opr' },
+{ oid => '4011', descr => 'implementation of @@ operator',
+ proname => 'jsonb_path_match', prorettype => 'bool',
+ proargtypes => 'jsonb jsonpath', prosrc => 'jsonb_path_match_opr' },
+
# txid
{ oid => '2939', descr => 'I/O',
proname => 'txid_snapshot_in', prorettype => 'txid_snapshot',
diff --git a/src/include/catalog/pg_type.dat b/src/include/catalog/pg_type.dat
index 2495ed6b179..8f5ea9332ad 100644
--- a/src/include/catalog/pg_type.dat
+++ b/src/include/catalog/pg_type.dat
@@ -434,6 +434,11 @@
typname => 'jsonb', typlen => '-1', typbyval => 'f', typcategory => 'U',
typinput => 'jsonb_in', typoutput => 'jsonb_out', typreceive => 'jsonb_recv',
typsend => 'jsonb_send', typalign => 'i', typstorage => 'x' },
+{ oid => '4072', array_type_oid => '4073', descr => 'JSON path',
+ typname => 'jsonpath', typlen => '-1', typbyval => 'f', typcategory => 'U',
+ typarray => '_jsonpath', typinput => 'jsonpath_in',
+ typoutput => 'jsonpath_out', typreceive => '-', typsend => '-',
+ typalign => 'i', typstorage => 'x' },
{ oid => '2970', array_type_oid => '2949', descr => 'txid snapshot',
typname => 'txid_snapshot', typlen => '-1', typbyval => 'f',
diff --git a/src/include/regex/regex.h b/src/include/regex/regex.h
index 27fdc090409..23ad03d9304 100644
--- a/src/include/regex/regex.h
+++ b/src/include/regex/regex.h
@@ -167,10 +167,18 @@ typedef struct
/*
* the prototypes for exported functions
*/
+
+/* regcomp.c */
extern int pg_regcomp(regex_t *, const pg_wchar *, size_t, int, Oid);
extern int pg_regexec(regex_t *, const pg_wchar *, size_t, size_t, rm_detail_t *, size_t, regmatch_t[], int);
extern int pg_regprefix(regex_t *, pg_wchar **, size_t *);
extern void pg_regfree(regex_t *);
extern size_t pg_regerror(int, const regex_t *, char *, size_t);
+/* regexp.c */
+extern regex_t *RE_compile_and_cache(text *text_re, int cflags, Oid collation);
+extern bool RE_compile_and_execute(text *text_re, char *dat, int dat_len,
+ int cflags, Oid collation,
+ int nmatch, regmatch_t *pmatch);
+
#endif /* _REGEX_H_ */
diff --git a/src/include/utils/.gitignore b/src/include/utils/.gitignore
index 05cfa7a8d6c..e0705e1aa70 100644
--- a/src/include/utils/.gitignore
+++ b/src/include/utils/.gitignore
@@ -3,3 +3,4 @@
/probes.h
/errcodes.h
/header-stamp
+/jsonpath_gram.h
diff --git a/src/include/utils/jsonb.h b/src/include/utils/jsonb.h
index 6ccacf545ce..ec0355f13c2 100644
--- a/src/include/utils/jsonb.h
+++ b/src/include/utils/jsonb.h
@@ -66,8 +66,10 @@ typedef enum
/* Convenience macros */
#define DatumGetJsonbP(d) ((Jsonb *) PG_DETOAST_DATUM(d))
+#define DatumGetJsonbPCopy(d) ((Jsonb *) PG_DETOAST_DATUM_COPY(d))
#define JsonbPGetDatum(p) PointerGetDatum(p)
#define PG_GETARG_JSONB_P(x) DatumGetJsonbP(PG_GETARG_DATUM(x))
+#define PG_GETARG_JSONB_P_COPY(x) DatumGetJsonbPCopy(PG_GETARG_DATUM(x))
#define PG_RETURN_JSONB_P(x) PG_RETURN_POINTER(x)
typedef struct JsonbPair JsonbPair;
@@ -378,6 +380,8 @@ extern char *JsonbToCString(StringInfo out, JsonbContainer *in,
int estimated_len);
extern char *JsonbToCStringIndent(StringInfo out, JsonbContainer *in,
int estimated_len);
+extern bool JsonbExtractScalar(JsonbContainer *jbc, JsonbValue *res);
+extern const char *JsonbTypeName(JsonbValue *jb);
#endif /* __JSONB_H__ */
diff --git a/src/include/utils/jsonpath.h b/src/include/utils/jsonpath.h
new file mode 100644
index 00000000000..14f837e00d5
--- /dev/null
+++ b/src/include/utils/jsonpath.h
@@ -0,0 +1,245 @@
+/*-------------------------------------------------------------------------
+ *
+ * jsonpath.h
+ * Definitions for jsonpath datatype
+ *
+ * Copyright (c) 2019, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/include/utils/jsonpath.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef JSONPATH_H
+#define JSONPATH_H
+
+#include "fmgr.h"
+#include "utils/jsonb.h"
+#include "nodes/pg_list.h"
+
+typedef struct
+{
+ int32 vl_len_; /* varlena header (do not touch directly!) */
+ uint32 header; /* version and flags (see below) */
+ char data[FLEXIBLE_ARRAY_MEMBER];
+} JsonPath;
+
+#define JSONPATH_VERSION (0x01)
+#define JSONPATH_LAX (0x80000000)
+#define JSONPATH_HDRSZ (offsetof(JsonPath, data))
+
+#define DatumGetJsonPathP(d) ((JsonPath *) DatumGetPointer(PG_DETOAST_DATUM(d)))
+#define DatumGetJsonPathPCopy(d) ((JsonPath *) DatumGetPointer(PG_DETOAST_DATUM_COPY(d)))
+#define PG_GETARG_JSONPATH_P(x) DatumGetJsonPathP(PG_GETARG_DATUM(x))
+#define PG_GETARG_JSONPATH_P_COPY(x) DatumGetJsonPathPCopy(PG_GETARG_DATUM(x))
+#define PG_RETURN_JSONPATH_P(p) PG_RETURN_POINTER(p)
+
+/*
+ * All node's type of jsonpath expression
+ */
+typedef enum JsonPathItemType
+{
+ jpiNull = jbvNull, /* NULL literal */
+ jpiString = jbvString, /* string literal */
+ jpiNumeric = jbvNumeric, /* numeric literal */
+ jpiBool = jbvBool, /* boolean literal: TRUE or FALSE */
+ jpiAnd, /* predicate && predicate */
+ jpiOr, /* predicate || predicate */
+ jpiNot, /* ! predicate */
+ jpiIsUnknown, /* (predicate) IS UNKNOWN */
+ jpiEqual, /* expr == expr */
+ jpiNotEqual, /* expr != expr */
+ jpiLess, /* expr < expr */
+ jpiGreater, /* expr > expr */
+ jpiLessOrEqual, /* expr <= expr */
+ jpiGreaterOrEqual, /* expr >= expr */
+ jpiAdd, /* expr + expr */
+ jpiSub, /* expr - expr */
+ jpiMul, /* expr * expr */
+ jpiDiv, /* expr / expr */
+ jpiMod, /* expr % expr */
+ jpiPlus, /* + expr */
+ jpiMinus, /* - expr */
+ jpiAnyArray, /* [*] */
+ jpiAnyKey, /* .* */
+ jpiIndexArray, /* [subscript, ...] */
+ jpiAny, /* .** */
+ jpiKey, /* .key */
+ jpiCurrent, /* @ */
+ jpiRoot, /* $ */
+ jpiVariable, /* $variable */
+ jpiFilter, /* ? (predicate) */
+ jpiExists, /* EXISTS (expr) predicate */
+ jpiType, /* .type() item method */
+ jpiSize, /* .size() item method */
+ jpiAbs, /* .abs() item method */
+ jpiFloor, /* .floor() item method */
+ jpiCeiling, /* .ceiling() item method */
+ jpiDouble, /* .double() item method */
+ jpiKeyValue, /* .keyvalue() item method */
+ jpiSubscript, /* array subscript: 'expr' or 'expr TO expr' */
+ jpiLast, /* LAST array subscript */
+ jpiStartsWith, /* STARTS WITH predicate */
+ jpiLikeRegex, /* LIKE_REGEX predicate */
+} JsonPathItemType;
+
+/* XQuery regex mode flags for LIKE_REGEX predicate */
+#define JSP_REGEX_ICASE 0x01 /* i flag, case insensitive */
+#define JSP_REGEX_SLINE 0x02 /* s flag, single-line mode */
+#define JSP_REGEX_MLINE 0x04 /* m flag, multi-line mode */
+#define JSP_REGEX_WSPACE 0x08 /* x flag, expanded syntax */
+
+/*
+ * Support functions to parse/construct binary value.
+ * Unlike many other representation of expression the first/main
+ * node is not an operation but left operand of expression. That
+ * allows to implement cheep follow-path descending in jsonb
+ * structure and then execute operator with right operand
+ */
+
+typedef struct JsonPathItem
+{
+ JsonPathItemType type;
+
+ /* position form base to next node */
+ int32 nextPos;
+
+ /*
+ * pointer into JsonPath value to current node, all positions of current
+ * are relative to this base
+ */
+ char *base;
+
+ union
+ {
+ /* classic operator with two operands: and, or etc */
+ struct
+ {
+ int32 left;
+ int32 right;
+ } args;
+
+ /* any unary operation */
+ int32 arg;
+
+ /* storage for jpiIndexArray: indexes of array */
+ struct
+ {
+ int32 nelems;
+ struct
+ {
+ int32 from;
+ int32 to;
+ } *elems;
+ } array;
+
+ /* jpiAny: levels */
+ struct
+ {
+ uint32 first;
+ uint32 last;
+ } anybounds;
+
+ struct
+ {
+ char *data; /* for bool, numeric and string/key */
+ int32 datalen; /* filled only for string/key */
+ } value;
+
+ struct
+ {
+ int32 expr;
+ char *pattern;
+ int32 patternlen;
+ uint32 flags;
+ } like_regex;
+ } content;
+} JsonPathItem;
+
+#define jspHasNext(jsp) ((jsp)->nextPos > 0)
+
+extern void jspInit(JsonPathItem *v, JsonPath *js);
+extern void jspInitByBuffer(JsonPathItem *v, char *base, int32 pos);
+extern bool jspGetNext(JsonPathItem *v, JsonPathItem *a);
+extern void jspGetArg(JsonPathItem *v, JsonPathItem *a);
+extern void jspGetLeftArg(JsonPathItem *v, JsonPathItem *a);
+extern void jspGetRightArg(JsonPathItem *v, JsonPathItem *a);
+extern Numeric jspGetNumeric(JsonPathItem *v);
+extern bool jspGetBool(JsonPathItem *v);
+extern char *jspGetString(JsonPathItem *v, int32 *len);
+extern bool jspGetArraySubscript(JsonPathItem *v, JsonPathItem *from,
+ JsonPathItem *to, int i);
+
+extern const char *jspOperationName(JsonPathItemType type);
+
+/*
+ * Parsing support data structures.
+ */
+
+typedef struct JsonPathParseItem JsonPathParseItem;
+
+struct JsonPathParseItem
+{
+ JsonPathItemType type;
+ JsonPathParseItem *next; /* next in path */
+
+ union
+ {
+
+ /* classic operator with two operands: and, or etc */
+ struct
+ {
+ JsonPathParseItem *left;
+ JsonPathParseItem *right;
+ } args;
+
+ /* any unary operation */
+ JsonPathParseItem *arg;
+
+ /* storage for jpiIndexArray: indexes of array */
+ struct
+ {
+ int nelems;
+ struct
+ {
+ JsonPathParseItem *from;
+ JsonPathParseItem *to;
+ } *elems;
+ } array;
+
+ /* jpiAny: levels */
+ struct
+ {
+ uint32 first;
+ uint32 last;
+ } anybounds;
+
+ struct
+ {
+ JsonPathParseItem *expr;
+ char *pattern; /* could not be not null-terminated */
+ uint32 patternlen;
+ uint32 flags;
+ } like_regex;
+
+ /* scalars */
+ Numeric numeric;
+ bool boolean;
+ struct
+ {
+ uint32 len;
+ char *val; /* could not be not null-terminated */
+ } string;
+ } value;
+};
+
+typedef struct JsonPathParseResult
+{
+ JsonPathParseItem *expr;
+ bool lax;
+} JsonPathParseResult;
+
+extern JsonPathParseResult *parsejsonpath(const char *str, int len);
+
+#endif
diff --git a/src/include/utils/jsonpath_scanner.h b/src/include/utils/jsonpath_scanner.h
new file mode 100644
index 00000000000..bbdd984dab5
--- /dev/null
+++ b/src/include/utils/jsonpath_scanner.h
@@ -0,0 +1,32 @@
+/*-------------------------------------------------------------------------
+ *
+ * jsonpath_scanner.h
+ * Definitions for jsonpath scanner & parser
+ *
+ * Portions Copyright (c) 2019, PostgreSQL Global Development Group
+ *
+ * src/include/utils/jsonpath_scanner.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef JSONPATH_SCANNER_H
+#define JSONPATH_SCANNER_H
+
+/* struct string is shared between scan and gram */
+typedef struct string
+{
+ char *val;
+ int len;
+ int total;
+} string;
+
+#include "utils/jsonpath.h"
+#include "utils/jsonpath_gram.h"
+
+/* flex 2.5.4 doesn't bother with a decl for this */
+extern int jsonpath_yylex(YYSTYPE *yylval_param);
+extern int jsonpath_yyparse(JsonPathParseResult **result);
+extern void jsonpath_yyerror(JsonPathParseResult **result, const char *message);
+
+#endif