本文只是一个山寨试验品
思路仅供参考
原理介绍:索引建立
目录结构划分方案也只是很简易的实现了一下通过unicode把任意连续的两个字符(中文或英文)分为个字节来做四层目录把索引的内容对应的主关键字(主要为了使用sql索引和唯一性)作为文件名两个字符在索引内容中的位置作为文件后缀来存储文件本身为字节不保存任何信息
比如一条数据 pk山寨索引 山寨索引 四个字的unicode为
[]:
[]:
[]:
[]:
[]:
[]:
[]:
[]:
那么对应的文件结构为
/////pk
/////pk
/////pk
索引使用 比如搜索寨索引
则搜索 ///// 目录下的所有文件然后根据 pk 的文件后缀名去看 /////pk文件是否存在依次类推最后返回一个结果集
实用性具体的实用性还有待验证这只是实现了精确的like搜索而不能做常见搜索引擎的分词效果另外海量数据重建索引的性能也是面临很严峻的问题比如cpu负载和磁盘io负载关于windows一个目录下可以保持多少个文件而不会对文件搜索造成大的性能损失也有待评估不过这个可以考虑根据主键的文件名hash来增加文件目录深度降低单一目录下的文件数量
演示效果实现了针对test标的name和caption两个字段作索引搜索
设置和获取索引文件根目录
select dboxfn_SetMyIndexFileRoot(d:/MyIndex)
select dboxfn_GetMyIndexFileRoot()
建立测试环境 go
create table test( id uniqueidentifier name nvarchar ( ) caption nvarchar ( ))
insert into test select top newid () 我的索引 测试 from sysobjects
insert into test select top newid () 我的测试 索引 from sysobjects
insert into test select top newid () 测试索引 测试索引 from sysobjects
insert into test select top newid () 我的索引 索引 from sysobjects
create index i_testid on test( id)
建立索引文件 declare @t int
select @t=
dbo xfn_SetKeyForMyIndex( id testIndex name + + caption)
from test
查询数据 select a* from test a dbo xfn_GetKeyFromMyIndex( 测试 索引 我的 testIndex ) b
where a id= b pk
/*
CEADFAAEBDFEED 我的索引 测试
DDBCDFFBBAF 我的索引 测试
CACFCBACEAEE 我的索引 测试
CBFFBFFBAECEADE 我的测试 索引
BBFCCDBBFCBBEBDEDDF 我的测试 索引
BDEACDFFAA 我的测试 索引
*/
drop table test
clr代码如下:编译为MyFullIndexdll
复制代码 代码如下:
using System;
using SystemDataSqlTypes;
using MicrosoftSqlServerServer;
using SystemCollections;
using SystemCollectionsGeneric;
public partial class UserDefinedFunctions
{
/// <summary>
/// 设置索引目录
/// </summary>
/// <param name=value></param>
/// <returns></returns>
[MicrosoftSqlServerServerSqlFunction ]
public static SqlBoolean SetRoot(SqlString value)
{
if (valueIsNull) return false ;
if (SystemIODirectory Exists(valueValue))
{
root = valueValue;
return true ;
}
else
{
return false ;
}
}
/// <summary>
/// 获取索引目录
/// </summary>
/// <returns></returns>
[MicrosoftSqlServerServerSqlFunction ]
public static SqlString GetRoot()
{
return new SqlString (root);
}
/// <summary>
/// 建立索引
/// </summary>
/// <param name=key> 主键 </param>
/// <param name=indexName> 索引名称 </param>
/// <param name=content> 索引内容 </param>
/// <returns></returns>
[MicrosoftSqlServerServerSqlFunction ]
public static SqlInt SetIndex(SqlString keySqlString indexNameSqlString content)
{
if (keyIsNull || contentIsNull||indexNameIsNull) return ;
return _setIndex(keyValueindexNameValue contentValue);
}
/// <summary>
/// 查询索引
/// </summary>
/// <param name=word> 关键字(空格区分) </param>
/// <param name=indexName> 索引名称 </param>
/// <returns></returns>
[SqlFunction (TableDefinition = pk nvarchar() Name = GetIndex FillRowMethodName = FillRow )]
public static IEnumerable GetIndex(SqlString wordSqlString indexName)
{
SystemCollectionsGenericList <string > ret = new List <string >();
if (wordIsNull || indexNameIsNull) return ret;
return _getIndex(wordValue indexNameValue);
}
public static void FillRow(Object obj out SqlString pk)
{
string key = objToString();
pk = key;
}
static string root = @d:/index ;
/// <summary>
/// 获取有空格分隔的索引信息
/// </summary>
/// <param name=word></param>
/// <param name=indexName></param>
/// <returns></returns>
static SystemCollectionsGenericList <string > _getIndex(string word string indexName)
{
string [] arrWord = wordSplit(new char [] { } StringSplitOptions RemoveEmptyEntries);
SystemCollectionsGenericList <string > key_ = _getIndex(arrWord[] indexName);
if (arrWordLength == ) return key_;
SystemCollectionsGenericList <string > [] key_list=new List <string >[arrWordLength];
for (int i = ; i < arrWordLength; i++)
{
SystemCollectionsGenericList <string > key_i = _getIndex(arrWord[i+]indexName);
key_list[i] = key_i;
}
for (int i=key_Count;i>=;i)
{
foreach (SystemCollectionsGenericList <string > key_i in key_list)
{
if (key_iContains(key_[i]) == false )
{
key_RemoveAt(i);
continue ;
}
}
}
return key_;
}
/// <summary>
/// 获取单个词的索引信息
/// </summary>
/// <param name=word></param>
/// <param name=indexName></param>
/// <returns></returns>
static SystemCollectionsGenericList <string > _getIndex(string word string indexName)
{
SystemCollectionsGenericList <string > ret = new List <string >();
byte [] bWord = SystemTextEncoding UnicodeGetBytes(word);
if (bWordLength < ) return ret;
string path = string Format(@{}/{}/{}/{}/{}/{}/ rootindexName bWord[] bWord[] bWord[] bWord[]);
if (SystemIODirectory Exists(path) == false )
{
return ret;
}
string [] arrFiles = SystemIODirectory GetFiles(path);
foreach (string file in arrFiles)
{
string key = SystemIOPath GetFileNameWithoutExtension(file);
string index = SystemIOPath GetExtension(file)TrimStart(new char [] { });
int cIndex = int Parse(index);
bool bHas = true ;
for (int i = ; i < bWordLength ; i = i + )
{
string nextFile = string Format(@{}/{}/{}/{}/{}/{}/{}{}
root indexName bWord[i + ] bWord[i + ] bWord[i + ] bWord[i + ] key ++cIndex);
if (SystemIOFile Exists(nextFile) == false )
{
bHas = false ;
break ;
}
}
if (bHas == true &&retContains(key)==false )
retAdd(key);
}
return ret;
}
/// <summary>
/// 建立索引文件
/// </summary>
/// <param name=key></param>
/// <param name=indexName></param>
/// <param name=content></param>
/// <returns></returns>
static int _setIndex(string keystring indexName string content)
{
byte [] bContent = SystemTextEncoding UnicodeGetBytes(content);
if (bContentLength <= ) return ;
for (int i = ; i < bContentLength ; i = i + )
{
string path = string Format(@{}/{}/{}/{}/{}/{}/ rootindexName bContent[i + ] bContent[i + ] bContent[i + ] bContent[i + ]);
if (SystemIODirectory Exists(path) == false )
{
SystemIODirectory CreateDirectory(path);
}
string file = string Format(@{}/{}{} path key i / );
if (SystemIOFile Exists(file) == false )
{
SystemIOFile Create(file)Close();
}
}
return contentLength;
}
};
部署的sql脚本如下drop function dboxfn_SetMyIndexFileRoot
drop function dboxfn_GetMyIndexFileRoot
drop function dboxfn_GetKeyFromMyIndex
drop function dboxfn_SetKeyForMyIndex
drop assembly MyFullIndex
go
CREATE ASSEMBLY MyFullIndex FROM d:/SQLCLR/MyFullIndexdll WITH PERMISSION_SET = UnSAFE;
go
索引搜索 CREATE FUNCTION dbo xfn_GetKeyFromMyIndex ( @word nvarchar ( max ) @indexName nvarchar ( ))
RETURNS table ( pk nvarchar ( ))
AS EXTERNAL NAME MyFullIndex UserDefinedFunctions GetIndex
go
索引建立 CREATE FUNCTION dbo xfn_SetKeyForMyIndex ( @pk nvarchar ( ) @indexName nvarchar ( ) @word nvarchar ( max ))
RETURNS int
AS EXTERNAL NAME MyFullIndex UserDefinedFunctions SetIndex
go
获取索引文件根目录 CREATE FUNCTION dbo xfn_GetMyIndexFileRoot ()
RETURNS nvarchar ( max )
AS EXTERNAL NAME MyFullIndex UserDefinedFunctions GetRoot
go
设置索引文件根目录(默认目录为 d:/myindex ) CREATE FUNCTION dbo xfn_SetMyIndexFileRoot ( @FileRoot nvarchar ( max ))
RETURNS bit
AS EXTERNAL NAME MyFullIndex UserDefinedFunctions SetRoot
go