所有文章是按github的markdown格式书写的,如此页面显示不正常,请跳转到github版

关于gin的部分匹配

1. gin索引的部分匹配实现

gin除了可以做某个Key的精确匹配查询,但也能支持匹配一个Key范围的查询。
手册的说明如下:

http://www.postgres.cn/docs/9.3/gin-implementation.html#GIN-PARTIAL-MATCH

===========================================================================================

57.3.2. 部分匹配算法

GIN可以支持”部分匹配”查询。即:查询并不决定单个或多个键的一个精确的匹配, 而是, 可能的匹配落在一个合理的狭窄键值范围内(根据compare支持函数决定的键值排序顺序)。 此时,extractQuery方法并不返回一个用于精确匹配的键值,取而代之的是, 返回一个 要被搜索的键值范围的下边界,并且设置pmatch为true。 然后,这个键值范围被使用 comparePartial进行扫描。 comparePartial必须为一个相匹配的索引键返回0, 不匹配但依然在被搜索范围内时返回小于0的值,对超过可以匹配的范围的索引键则返回大于0的值。

===========================================================================================

2. btree-gin的例子

手册中的描述结合例子会更好理解。下面就看看btree-gin扩展中如何通过部分匹配支持比较操作符的。
相关代码如下:

contrib/btree_gin/btree_gin–1.0.sql

CREATE OPERATOR CLASS int4_ops
DEFAULT FOR TYPE int4 USING gin
AS
    OPERATOR        1       <,
    OPERATOR        2       <=,
    OPERATOR        3       =,
    OPERATOR        4       >=,
    OPERATOR        5       >,
    FUNCTION        1       btint4cmp(int4,int4),
    FUNCTION        2       gin_extract_value_int4(int4, internal),
    FUNCTION        3       gin_extract_query_int4(int4, internal, int2, internal, internal),
    FUNCTION        4       gin_btree_consistent(internal, int2, anyelement, int4, internal, internal),
    FUNCTION        5       gin_compare_prefix_int4(int4,int4,int2, internal),
STORAGE         int4;

以”<”操作符为例,实现”<”部分匹配的关键是gin_extract_query_int4()和gin_compare_prefix_int4()函数。下面是这两个函数的实现。

contrib/btree_gin/btree_gin.c

GIN_SUPPORT(int4)

这是个复杂的宏,展开后就是下面的东西(注意里面的中文注释)。

...
Datum																		\
gin_extract_query_int4(FunctionCallInfo fcinfo)									\
{																			\
	Datum		datum = (fcinfo->arg[0]);									\
	int32	   *nentries = (int32 *) ((Pointer) ((fcinfo->arg[1])));					\
	StrategyNumber strategy = ((uint16) (((Datum) ((fcinfo->arg[2]))) & 0x0000ffff));							\
	bool	  **partialmatch = (bool **) ((Pointer) ((fcinfo->arg[3])));				\
	Pointer   **extra_data = (Pointer **) ((Pointer) ((fcinfo->arg[4])));				\
	Datum	   *entries = (Datum *) palloc(sizeof(Datum));					\
	QueryInfo  *data = (QueryInfo *) palloc(sizeof(QueryInfo));				\
	bool	   *ptr_partialmatch;											\
																			\
	*nentries = 1;															\
	ptr_partialmatch = *partialmatch = (bool *) palloc(sizeof(bool));		\
	*ptr_partialmatch = ((bool) 0);												\
	if ( TypeInfo_int4.is_varlena )										\
		datum = ((Datum) (pg_detoast_datum((struct varlena *) ((Pointer) (datum)))));					\
	data->strategy = strategy;												\
	data->datum = datum;													\
	*extra_data = (Pointer *) palloc(sizeof(Pointer));						\
	**extra_data = (Pointer) data;											\
																			\
	switch (strategy)														\
	{																		\
		case 1:											\
		case 2:										\
			/* 在处理"<"和"<="操作符时,设置扫描的起始范围是int4的最小值,并在前面将比较值作为extra_data保存。 */
			entries[0] = TypeInfo_int4.leftmostvalue();					\
			*ptr_partialmatch = ((bool) 1);										\
			break;															\
		case 4:									\
		case 5:										\
			*ptr_partialmatch = ((bool) 1);										\
		case 3:											\
			entries[0] = datum;												\
			break;															\
		default:															\
			elog_start("E:\\OSSSrc\\postgresql-9.4.2\\postgresql-9.4.2\\contrib\\btree_gin\\btree_gin.c", 207, ((void *) 0)), \
	elog_finish(20, "unrecognized strategy number: %d", strategy);		\
	}																		\
																			\
	return ((Datum) (entries));												\
}			\
...
Datum																		\
gin_compare_prefix_int4(FunctionCallInfo fcinfo)									\
{																			\
	Datum		a = (fcinfo->arg[0]);										\
	Datum		b = (fcinfo->arg[1]);										\
	QueryInfo  *data = (QueryInfo *) ((Pointer) ((fcinfo->arg[3])));					\
	int32		res,														\
				cmp;														\
																			\
	cmp = ((int32) (((Datum) (DirectFunctionCall2Coll(							\
				TypeInfo_int4.typecmp,									\
				(fcinfo->fncollation),											\
				(data->strategy == 1 ||					\
				 data->strategy == 2)				\
				 ? data->datum : a,											\
				b))) & 0xffffffff));														\
																			\
	switch (data->strategy)													\
	{																		\
		case 1:											\
			/* If original datum > indexed one then return match */			\
            /* 在处理"<"操作符时,当extra_data中保存的比较值大于索引中的key时,返回0(匹配)并继续扫描,否则返回1(不匹配)退出扫描。 */
			if (cmp > 0)													\
				res = 0;													\
			else															\
				res = 1;													\
			break;															\
		case 2:										\
			/* The same except equality */									\
			if (cmp >= 0)													\
				res = 0;													\
			else															\
				res = 1;													\
			break;															\
		case 3:											\
			if (cmp != 0)													\
				res = 1;													\
			else															\
				res = 0;													\
			break;															\
		case 4:									\
			/* If original datum <= indexed one then return match */		\
			if (cmp <= 0)													\
				res = 0;													\
			else															\
				res = 1;													\
			break;															\
		case 5:										\
			/* If original datum <= indexed one then return match */		\
			/* If original datum == indexed one then continue scan */		\
			if (cmp < 0)													\
				res = 0;													\
			else if (cmp == 0)												\
				res = -1;													\
			else															\
				res = 1;													\
			break;															\
		default:															\
			elog_start("E:\\OSSSrc\\postgresql-9.4.2\\postgresql-9.4.2\\contrib\\btree_gin\\btree_gin.c", 207, ((void *) 0)), \
	elog_finish(20, "unrecognized strategy number: %d",					\
				 data->strategy);											\
			res = 0;														\
	}																		\
																			\
	return ((Datum) (((Datum) (res)) & 0xffffffff));
}
...
June 27, 2015