job建索引
public class SoftIndexJob{
private Log log = LogFactory.getLog(SoftIndexJob.class);
private String indexpath = SearchEngineCore.getIndexpath("VSOYOU_SOFT_INDEX_PATH"); //索引的路径
private String lastDate; //上次建立索引的时间
public void doUpdateIndexData(){
SoftService softService = ServiceFactory.getBean(SoftService.class);
String lastModify = softService.getSoftLastModify();
if(StringUtils.isNotBlank(lastDate) &&StringUtils.isNotBlank(lastModify) && lastModify.equals(lastDate)){
return;
}
lastDate = lastModify;
int rows = 20000,start=0;
String tmpIndexPath = indexpath + File.separator + "softTmp";// 临时索引路径
IndexWriter writer = null;
try {
FileUtil.deleteFile(tmpIndexPath);// 删除临时索引目录
writer = SearchEngineCore.getIndexWriter(tmpIndexPath);
if(null == writer) return;
System.out.println("索引SoftIndex更新start");
while (true) {
List<Soft> list = softService.findSoftIndexRes(start,rows);
if(null ==list || list.isEmpty()) break;
Document doc = null;
for (Soft soft : list) {
doc = new Document();
doc.add(new LongField("softId", soft.getSoftId(), Field.Store.YES));
doc.add(new LongField("releaseId", soft.getReleaseId(), Field.Store.YES));
doc.add(new TextField("softName", StringUtils.isNotBlank(soft.getSoftName())?soft.getSoftName():"", Field.Store.YES));
doc.add(new StringField("iconPath", StringUtils.isNotBlank(soft.getIconPath())?soft.getIconPath():"", Field.Store.YES));
doc.add(new IntField("stars", soft.getStars(), Field.Store.YES));
doc.add(new LongField("fileSize", soft.getFileSize(), Field.Store.YES));
doc.add(new StringField("releaseDate", soft.getReleaseDate(), Field.Store.YES));
doc.add(new StringField("versionName", StringUtils.isNotBlank(soft.getVersionName())? soft.getVersionName():"", Field.Store.YES));
doc.add(new IntField("totalDownloads", soft.getTotalDownloads(), Field.Store.YES));
doc.add(new TextField("runType", soft.getRunType()+"", Field.Store.YES));
doc.add(new IntField("totalComemntCount", soft.getTotalComemntCount(), Field.Store.YES));
doc.add(new IntField("freeUse", soft.isFreeUse() == true ?1 :0, Field.Store.YES));
doc.add(new IntField("freeDownload", soft.isFreeDownload()==true ? 1:0, Field.Store.YES));
doc.add(new IntField("softCurrency", soft.getSoftCurrency(), Field.Store.YES));
doc.add(new LongField("versionCode", soft.getVersionCode(), Field.Store.YES));
doc.add(new StringField("packageName", StringUtils.isNotBlank(soft.getPackageName())?soft.getPackageName() : "", Field.Store.YES));
writer.addDocument(doc);
}
start += rows;
}
} catch (Exception e) {
e.printStackTrace();
log.info(e.getMessage(),e);
}finally{
if(null !=writer){
try {
writer.forceMerge(1);
writer.commit();
writer.close();
Directory dir = SearchEngineCore.getWriteDirectory(tmpIndexPath);
if(null !=dir && IndexWriter.isLocked(dir))
IndexWriter.unlock(dir);
System.out.println("索引SoftIndex更新end");
} catch (IOException e) {
e.printStackTrace();
log.info(e.getMessage(),e);
}
}
String indexDir = indexpath + File.separator + "softIndex";
SearchEngineCore.updateIndex(tmpIndexPath, indexDir);
}
}
}
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;
/**
* 查询和排序对象
*
*/
public class QuerySort {
public Query query;
public Sort sort;
public QuerySort(Query query, Sort sort) {
this.query = query;
this.sort = sort;
}
public QuerySort() {
}
}
public class SearchEngineCore {
protected static Log log = LogFactory.getLog(SearchEngineCore.class);
protected String indexpath = null; //索引的路径
private static Object lock_r= new Object();
private static Object lock_w=new Object();
public SearchEngineCore(){}
// 创建索引IndexWriter
public static IndexWriter getIndexWriter(String indexDir) throws IOException {
IndexWriter indexWriter = null;
try {
synchronized(lock_w){
Directory indexDirectory = getWriteDirectory(indexDir);
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_43, new IKAnalyzer());
if(IndexWriter.isLocked(indexDirectory)){
IndexWriter.unlock(indexDirectory);
}
indexWriterConfig.setOpenMode(OpenMode.CREATE);
indexWriter = new IndexWriter(indexDirectory,indexWriterConfig);
}
} catch (Exception e) {
e.printStackTrace();
log.error(e.getMessage(), e);
}
return indexWriter;
}
public static Directory getWriteDirectory(String indexDir) {
Directory indexDirectory = null;
try {
File indexFile = new File(indexDir);
if(!indexFile.exists()) {
indexFile.mkdir();
}
indexDirectory = FSDirectory.open(indexFile);
} catch (IOException e) {
e.printStackTrace();
log.error(e.getMessage(), e);
}
return indexDirectory;
}
public static IndexSearcher getIndexSearcher(String indexDir){
IndexSearcher indexSearcher = null;
try {
synchronized(lock_r){
File indexFile = new File(indexDir);
if(!indexFile.exists()) {
return null;
}
IndexReader reader = DirectoryReader.open(FSDirectory.open(indexFile));
indexSearcher = new IndexSearcher(reader);
}
} catch (IOException e) {
e.printStackTrace();
log.error(e.getMessage(), e);
}
return indexSearcher;
}
/**
* 更新索引
* @param srcDir
* 更新目录
* @param dstDir
* 被更新目录
*/
public static void updateIndex(String srcDir, String dstDir) {
File dstDirFile = new File(dstDir);
if (dstDirFile.exists()) {
String dstBakDir = dstDir + ".bak";
File dstBakDirFile = new File(dstBakDir);
if (dstBakDirFile.exists()) {
boolean flag = FileUtil.deleteFile(dstBakDirFile);
log.info("delete " + dstBakDir + "====" + flag);
}
IndexUtil.renameFile(dstDir, dstBakDir);
}
IndexUtil.renameFile(srcDir, dstDir);
}
public static String getIndexpath(String idxPath) {
String indexPath = Config.getProperty(idxPath);
return SearchEngineCore.class.getResource("/").toString().replaceAll("WEB-INF/classes", indexPath)
.replaceAll("\\\\", "/").replace("file:/", "").trim();
}
}
public class UserInfo implements Serializable{
private static final long serialVersionUID = -1341713350583127283L;
private Long userId;
private String headImg;
private String nickName;
@JSONField(serialize=false)
private String loginName;
private int sex=-1; //性别 0=男 1=女
private int checkinCount=0; //签到数
private int favoriteCount=0; //收藏数
public UserInfo(){}
public Long getUserId() {
return userId;
}
public void setUserId(Long userId) {
this.userId = userId;
}
public String getHeadImg() {
return headImg;
}
public void setHeadImg(String headImg) {
this.headImg = headImg;
}
public String getNickName() {
return nickName;
}
public void setNickName(String nickName) {
this.nickName = nickName;
}
public int getSex() {
return sex;
}
public void setSex(int sex) {
this.sex = sex;
}
public int getCheckinCount() {
return checkinCount;
}
public void setCheckinCount(int checkinCount) {
this.checkinCount = checkinCount;
}
public int getFavoriteCount() {
return favoriteCount;
}
public void setFavoriteCount(int favoriteCount) {
this.favoriteCount = favoriteCount;
}
public String getLoginName() {
return loginName;
}
public void setLoginName(String loginName) {
this.loginName = loginName;
}
}
索引搜索:
public class SoftSearch extends BaseServiceImpl{
private static String indexpath = SearchEngineCore.getIndexpath("VSOYOU_SOFT_INDEX_PATH"); //索引的路径
private static IndexSearcher indexSearcher = null;
public static void main(String[] args) throws ParseException {
long startTime = System.currentTimeMillis();
String searchWord ="QQ欢乐斗地主";
searchWord = SearchUtil.wmlEncode(searchWord);
searchWord = SearchUtil.traditionalToSimple(searchWord).trim();// 繁体转简体
int page =1;
int pageSize = 100;
TopDocs topDocs = search(searchWord,page,pageSize);
System.out.println("总共命中数:"+topDocs.totalHits);
if(topDocs != null && topDocs.totalHits !=0){
ScoreDoc[] scoreDocs = topDocs.scoreDocs; // 搜索返回的结果集合
//查询起始记录位置
int begin = (page - 1)*pageSize ;
//查询终止记录位置
int end = Math.min(begin + pageSize, scoreDocs.length);
SoftSearch search = new SoftSearch();
List<Soft> softs = search.addHits2List(scoreDocs,begin,end);
for (int i = 0; i < softs.size(); i++) {
System.out.println("i: \t "+softs.get(i).getSoftName());
}
}
System.out.println("检索完成用时:"+(System.currentTimeMillis()-startTime)+"毫秒");
}
private static TopDocs search(String searchWord,int page,int pageSize) throws ParseException {
String indexDir = indexpath + File.separator + "softIndex";
indexSearcher = SearchEngineCore.getIndexSearcher(indexDir);
if(null == indexSearcher) return null;
TopDocs topDocs = null;
try {
BooleanQuery allQuery = new BooleanQuery();
//根据搜索关键字进行查询
QueryParser parser = new QueryParser(Version.LUCENE_43,"softName",new IKAnalyzer());
Query query = parser.parse(searchWord);
allQuery.add(query, BooleanClause.Occur.MUST);
QuerySort keywordQuerySort = getKeywordQuerySort();
allQuery.add(keywordQuerySort.query,BooleanClause.Occur.MUST);
topDocs = indexSearcher.search(allQuery, page*pageSize, keywordQuerySort.sort);
} catch (IOException e) {
e.printStackTrace();
}
return topDocs;
}
private List<Soft> addHits2List(ScoreDoc[] scoreDocs,int begin,int end) {
List<Soft> softs = new ArrayList<Soft>();
try {
this.commentsInfoDao = ServiceFactory.getBean(CommentsInfoDao.class);
for (int i = begin; i < end; i++) {
int docID = scoreDocs[i].doc;
Soft soft = new Soft();
Document doc = indexSearcher.doc(docID);
if(StringUtils.isNotBlank(doc.get("fileSize")))
soft.setFileSize(Long.valueOf(doc.get("fileSize")));
if(StringUtils.isNotBlank(doc.get("freeDownload")))
soft.setFreeDownload(doc.get("freeDownload").equals("1"));
if(StringUtils.isNotBlank(doc.get("freeUse")))
soft.setFreeUse(doc.get("freeUse").equals("1"));
soft.setIconPath(doc.get("iconPath"));
soft.setReleaseDate(doc.get("releaseDate"));
soft.setReleaseId(Long.valueOf(doc.get("releaseId")));
soft.setSoftName(doc.get("softName"));
if(StringUtils.isNotBlank(doc.get("stars")))
soft.setStars(Integer.valueOf(doc.get("stars")));
int commentRowCount = NumberUtils.strToInt(commentsInfoDao.getSoftCommentRowCount(Long.valueOf(doc.get("releaseId"))));
if(commentRowCount >=0){
soft.setTotalComemntCount(commentRowCount);
}else if(StringUtils.isNotBlank(doc.get("totalComemntCount"))){
soft.setTotalComemntCount(Integer.valueOf(doc.get("totalComemntCount")));
}
if(StringUtils.isNotBlank(doc.get("totalDownloads")))
soft.setTotalDownloads(Integer.valueOf(doc.get("totalDownloads")));
soft.setVersionName(doc.get("versionName"));
if(StringUtils.isNotBlank(doc.get("softCurrency")))
soft.setSoftCurrency(Integer.valueOf(doc.get("softCurrency")));
if(StringUtils.isNotBlank(doc.get("versionCode")))
soft.setVersionCode(Long.valueOf(doc.get("versionCode")));
if(StringUtils.isNotBlank(doc.get("packageName")))
soft.setPackageName(doc.get("packageName"));
softs.add(soft);
}
} catch (IOException e) {
e.printStackTrace();
}
return softs;
}
private static QuerySort getKeywordQuerySort() {
QuerySort querySort = new QuerySort();
querySort.query = new TermQuery(new Term("runType", "100"));
querySort.sort = new Sort(new SortField[] {
new SortField("totalDownloads", SortField.Type.INT, true),
new SortField("totalComemntCount", SortField.Type.INT, true),
new SortField("releaseDate", SortField.Type.STRING, true),
new SortField("freeDownload", SortField.Type.INT, true)
});
return querySort;
}
public Map<String, Object> searchKeyWord(String searchWord,int page,int pageSize) {
Map<String, Object> map = new HashMap<String, Object>();
map.put(Const.IMG_DOMAIN_KEY,Const.IMG_DOMAIN_VALUE);
try {
searchWord = SearchUtil.wmlEncode(searchWord);
searchWord = SearchUtil.traditionalToSimple(searchWord).trim();// 繁体转简体
TopDocs topDocs = search(searchWord,page,pageSize);
if(topDocs == null || topDocs.totalHits ==0){
map.put("list", null);
return map;
}
map.put("pageCount", getPageCount(topDocs.totalHits,pageSize));
ScoreDoc[] scoreDocs = topDocs.scoreDocs; // 搜索返回的结果集合
//查询起始记录位置
int begin = (page - 1)*pageSize ;
//查询终止记录位置
int end = Math.min(begin + pageSize, scoreDocs.length);
List<Soft> softs = addHits2List(scoreDocs,begin,end);
map.put("list", softs);
/*SoftSearchKeyWord searchKeyWord = new SoftSearchKeyWord();
searchKeyWord.setKeyword(searchWord);
Thread thread = new Thread(new SearchKeyWordThread(searchKeyWord));
thread.start();*/
} catch (Exception e) {
e.printStackTrace();
}
return map;
}
private int getPageCount(int rowCount, int pageSize) {
int pageCount = 1;
if ((rowCount % pageSize) == 0) {
pageCount = rowCount / pageSize;
} else {
pageCount = rowCount / pageSize + 1;
}
if (pageCount == 0) {
pageCount = 1;
}
return pageCount;
}
}
需要jar包:
相关推荐
lucene.NET 中文分词 高亮 lucene.NET 中文分词 高亮 lucene.NET 中文分词 高亮 lucene.NET 中文分词 高亮
Lucene是一套用于全文检索和搜寻的开源程式库,由Apache软件基金会支持和提供。Lucene提供了一个简单却强大的应用程式接口,能够做全文索引和搜寻。在Java开发环境里Lucene是一个成熟的免费开源工具。就其本身而言,...
lucene4.3增删改查的的一个工具类,对新手来说是一份不可多得的入门资料。
全文检索lucene 4.3 所用到的3个jar包,包含lucene-queryparser-4.3.0.jar、 lucene-core-4.3.0.jar、lucene-analyzers-common-4.3.0.jar。
lucene4.3 按坐标距离排序,里面写了个简单的例子。运行就行
lucene4.3源代码 censed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information ...
来自“猎图网 www.richmap.cn”基于IKAnalyzer分词算法的准商业化Lucene中文分词器。 1. 正向全切分算法,42万汉字字符/每秒的处理能力(IBM ThinkPad 酷睿I 1.6G 1G内存 WinXP) 2. 对数量词、地名、路名的...
1.XunTa是在lucene4.3上创建的通过“知识点”来找人的搜人引擎。 输入一个关键词(或组合),XunTa返回一个排名列表,排在前面的人是与该关键词(组合)最相关的“达人”。 可访问 http://www.xunta.so立即体验...
Lucene与中文分词技术的研究及应用Lucene与中文分词技术的研究及应用Lucene与中文分词技术的研究及应用
使用lucene需要检索时,需要导入jar包,下载全资源文件,进去找就可以
NULL 博文链接:https://gznofeng.iteye.com/blog/1129902
lucene全文检索需要的三个jar包:lucene-analyzers-3.6.1.jar lucene-core-3.6.1.jar lucene-highlighter-3.6.1.jar
Lucene全文检索案例Lucene全文检索案例Lucene全文检索案例Lucene全文检索案例
本文设计实现了一个中文分词模块,其主要研究目的在于寻找更为有效的中文词汇 处理方法,提高全文检索系统的中文处理能力.整个模块基于当前最流行的搜索引擎架构 Lucene,实现了带有歧义消除功能的正向最大匹配算法...
lucene全文检索全面教程,基于JAVA的lucene全文检索全面教程。www.288158.com
使用lucene.net盘古分词实现站内搜索demo
使用visual studio 开发的lucene.net和盘古分词实现全文检索。并按照lucene的得分算法进行多条件检索并按照得分算法计算匹配度排序。 可以输入一句话进行检索。 lucene.net的版本为2.9.2 盘古分词的版本为2.3.1 并...
基于Lucene的中文分词方法设计与实现 基于Lucene的中文分词方法设计与实现 基于Lucene的中文分词方法设计与实现
Lucene 与中文分词的结合
视频详细讲解,需要的小伙伴自行网盘下载,链接见附件,永久有效。 目前业界流行的ElasticSearch和Solr搜索...系统的学习Lucene全文检索技术,全面掌握搜索原理和底层知识,为学习其他应用层面搜索技术打下坚实的基础。