Springboot集成ElasticSearch实现minio文件内容全文检索_minio 全文检索
一、docker安装Elasticsearch
(1)springboot和Elasticsearch的版本对应关系如下,请看版本对应:
注意安装对应版本,否则可能会出现一些未知的错误。
(2)拉取镜像
docker pull elasticsearch:7.17.6
(3)运行容器
docker run -it -d --name elasticsearch -e \"discovery.type=single-node\" -e \"ES_JAVA_OPTS=-Xms512m -Xmx1024m\" -p 9200:9200 -p 9300:9300 elasticsearch:7.17.6
访问http://localhost:9200/,出现如下内容表示安装成功。
(4)安装中文分词器
进入容器:
docker exec -it elasticsearch bash
然后进入bin目录执行下载安装ik分词器命令:
elasticsearch-plugin install https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v7.17.6/elasticsearch-analysis-ik-7.17.6.zip
退出bash并重启容器:
docker restart elasticsearch
二、安装kibana
Kibana 是为 Elasticsearch设计的开源分析和可视化平台。你可以使用 Kibana 来搜索,查看存储在 Elasticsearch 索引中的数据并与之交互。你可以很容易实现高级的数据分析和可视化,以图表的形式展现出来。
(1)拉取镜像
docker pull kibana:7.17.6
(2)运行容器
docker run --name kibana -p 5601:5601 --link elasticsearch:es -e \"elasticsearch.hosts=http://es:9200\" -d kibana:7.17.6
--link elasticsearch:es表示容器互联,即容器kibana连接到elasticsearch。
(3)使用kibana dev_tools发送http请求操作Elasticsearch
三、后端代码
(1)引入maven依赖
org.springframework.boot spring-boot-starter-data-elasticsearch
(2)application.yml配置
spring: elasticsearch: uris: http://localhost:9200
(3)实体类
import lombok.AllArgsConstructor;import lombok.Data;import lombok.NoArgsConstructor;import org.springframework.data.annotation.Id;import org.springframework.data.elasticsearch.annotations.Document;import org.springframework.data.elasticsearch.annotations.Field;import org.springframework.data.elasticsearch.annotations.FieldType;import java.util.Date;/** * @author yangfeng */@Data@NoArgsConstructor@AllArgsConstructor@Document(indexName = \"file\")public class File { @Id private String id; /** * 文件名称 */ @Field(type = FieldType.Text, analyzer = \"ik_max_word\") private String fileName; /** * 文件分类 */ @Field(type = FieldType.Keyword) private String fileCategory; /** * 文件内容 */ @Field(type = FieldType.Text, analyzer = \"ik_max_word\") private String fileContent; /** * 文件存储路径 */ @Field(type = FieldType.Keyword, index = false) private String filePath; /** * 文件大小 */ @Field(type = FieldType.Keyword, index = false) private Long fileSize; /** * 文件类型 */ @Field(type = FieldType.Keyword, index = false) private String fileType; /** * 创建人 */ @Field(type = FieldType.Keyword, index = false) private String createBy; /** * 创建日期 */ @Field(type = FieldType.Keyword, index = false) private Date createTime; /** * 更新人 */ @Field(type = FieldType.Keyword, index = false) private String updateBy; /** * 更新日期 */ @Field(type = FieldType.Keyword, index = false) private Date updateTime;}
(4)repository接口,继承ElasticsearchRepository
import org.springframework.data.domain.Page;import org.springframework.data.domain.Pageable;import org.springframework.data.elasticsearch.annotations.Highlight;import org.springframework.data.elasticsearch.annotations.HighlightField;import org.springframework.data.elasticsearch.annotations.HighlightParameters;import org.springframework.data.elasticsearch.core.SearchHit;import org.springframework.data.elasticsearch.repository.ElasticsearchRepository;import org.springframework.stereotype.Repository;import java.util.List;/** * @author yangfeng * @date: 2024年11月9日 15:29 */@Repositorypublic interface FileRepository extends ElasticsearchRepository { /** * 关键字查询 * * @return */ @Highlight(fields = {@HighlightField(name = \"fileName\"), @HighlightField(name = \"fileContent\")}, parameters = @HighlightParameters(preTags = {\"\"}, postTags = {\"\"}, numberOfFragments = 0)) List<SearchHit> findByFileNameOrFileContent(String fileName, String fileContent, Pageable pageable);}
(5)service接口
import org.springframework.data.elasticsearch.core.SearchHit;import org.springframework.data.elasticsearch.core.SearchHits;import java.util.List;/** * description: ES文件服务 * * @author yangfeng * @version V1.0 * @date 2023-02-21 */public interface IFileService { /** * 保存文件 */ void saveFile(String filePath, String fileCategory) throws Exception; /** * 关键字查询 * * @return */ List<SearchHit> search(FileDTO dto); /** * 关键字查询 * * @return */ SearchHits searchPage(FileDTO dto);}
(6)service实现类
import cn.hutool.core.util.IdUtil;import lombok.extern.slf4j.Slf4j;import org.apache.commons.lang3.StringUtils;import org.apache.shiro.SecurityUtils;import org.elasticsearch.index.query.QueryBuilders;import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;import org.elasticsearch.search.sort.SortBuilders;import org.elasticsearch.search.sort.SortOrder;import org.jeecg.common.exception.JeecgBootException;import org.jeecg.common.system.vo.LoginUser;import org.jeecg.common.util.CommonUtils;import org.jeecg.common.util.MinioUtil;import org.springframework.beans.factory.annotation.Autowired;import org.springframework.data.domain.PageRequest;import org.springframework.data.domain.Pageable;import org.springframework.data.domain.Sort;import org.springframework.data.elasticsearch.core.ElasticsearchRestTemplate;import org.springframework.data.elasticsearch.core.SearchHit;import org.springframework.data.elasticsearch.core.SearchHits;import org.springframework.data.elasticsearch.core.query.NativeSearchQuery;import org.springframework.data.elasticsearch.core.query.NativeSearchQueryBuilder;import org.springframework.stereotype.Service;import java.io.InputStream;import java.util.Date;import java.util.List;import java.util.Objects;/** * description: ES文件服务 * * @author yangfeng * @version V1.0 * @date 2023-02-21 */@Slf4j@Servicepublic class FileServiceImpl implements IFileService { @Autowired private FileRepository fileRepository; @Autowired private ElasticsearchRestTemplate elasticsearchRestTemplate; /** * 保存文件 */ @Override public void saveFile(String filePath, String fileCategory) throws Exception { if (Objects.isNull(filePath)) { throw new JeecgBootException(\"文件不存在\"); } LoginUser user = (LoginUser) SecurityUtils.getSubject().getPrincipal(); String fileName = CommonUtils.getFileNameByUrl(filePath); String fileType = StringUtils.isNotBlank(fileName) ? fileName.substring(fileName.lastIndexOf(\".\") + 1) : null; InputStream inputStream = MinioUtil.getMinioFile(filePath); // 读取文件内容,上传到es,方便后续的检索 String fileContent = FileUtils.readFileContent(inputStream, fileType); File file = new File(); file.setId(IdUtil.getSnowflake(1, 1).nextIdStr()); file.setFileContent(fileContent); file.setFileName(fileName); file.setFilePath(filePath); file.setFileType(fileType); file.setFileCategory(fileCategory); file.setCreateBy(user.getUsername()); file.setCreateTime(new Date()); fileRepository.save(file); } /** * 关键字查询 * * @return */ @Override public List<SearchHit> search(FileDTO dto) { Pageable pageable = PageRequest.of(dto.getPageNo() - 1, dto.getPageSize(), Sort.Direction.DESC, \"createTime\"); return fileRepository.findByFileNameOrFileContent(dto.getKeyword(), dto.getKeyword(), pageable); } @Override public SearchHits searchPage(FileDTO dto) { NativeSearchQueryBuilder queryBuilder = new NativeSearchQueryBuilder(); queryBuilder.withQuery(QueryBuilders.multiMatchQuery(dto.getKeyword(), \"fileName\", \"fileContent\")); // 设置高亮 HighlightBuilder highlightBuilder = new HighlightBuilder(); String[] fieldNames = {\"fileName\", \"fileContent\"}; for (String fieldName : fieldNames) { highlightBuilder.field(fieldName); } highlightBuilder.preTags(\"\"); highlightBuilder.postTags(\"\"); highlightBuilder.order(); queryBuilder.withHighlightBuilder(highlightBuilder); // 也可以添加分页和排序 queryBuilder.withSorts(SortBuilders.fieldSort(\"createTime\").order(SortOrder.DESC)) .withPageable(PageRequest.of(dto.getPageNo() - 1, dto.getPageSize())); NativeSearchQuery nativeSearchQuery = queryBuilder.build(); return elasticsearchRestTemplate.search(nativeSearchQuery, File.class); }}
(7)controller
import lombok.extern.slf4j.Slf4j;import org.jeecg.common.api.vo.Result;import org.springframework.beans.factory.annotation.Autowired;import org.springframework.web.bind.annotation.PostMapping;import org.springframework.web.bind.annotation.RequestBody;import org.springframework.web.bind.annotation.RequestMapping;import org.springframework.web.bind.annotation.RestController;/** * 文件es操作 * * @author yangfeng * @since 2024-11-09 */@Slf4j@RestController@RequestMapping(\"/elasticsearch/file\")public class FileController { @Autowired private IFileService fileService; /** * 保存文件 * * @return */ @PostMapping(value = \"/saveFile\") public Result saveFile(@RequestBody File file) throws Exception { fileService.saveFile(file.getFilePath(), file.getFileCategory()); return Result.OK(); } /** * 关键字查询-repository * * @throws Exception */ @PostMapping(value = \"/search\") public Result search(@RequestBody FileDTO dto) { return Result.OK(fileService.search(dto)); } /** * 关键字查询-原生方法 * * @throws Exception */ @PostMapping(value = \"/searchPage\") public Result searchPage(@RequestBody FileDTO dto) { return Result.OK(fileService.searchPage(dto)); }}
(8)工具类
import lombok.extern.slf4j.Slf4j;import org.apache.pdfbox.pdmodel.PDDocument;import org.apache.pdfbox.text.PDFTextStripper;import org.apache.poi.xwpf.extractor.XWPFWordExtractor;import org.apache.poi.xwpf.usermodel.XWPFDocument;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.nio.charset.StandardCharsets;import java.util.Arrays;import java.util.List;@Slf4jpublic class FileUtils { private static final List FILE_TYPE; static { FILE_TYPE = Arrays.asList(\"pdf\", \"doc\", \"docx\", \"text\"); } public static String readFileContent(InputStream inputStream, String fileType) throws Exception{ if (!FILE_TYPE.contains(fileType)) { return null; } // 使用PdfBox读取pdf文件内容 if (\"pdf\".equalsIgnoreCase(fileType)) { return readPdfContent(inputStream); } else if (\"doc\".equalsIgnoreCase(fileType) || \"docx\".equalsIgnoreCase(fileType)) { return readDocOrDocxContent(inputStream); } else if (\"text\".equalsIgnoreCase(fileType)) { return readTextContent(inputStream); } return null; } private static String readPdfContent(InputStream inputStream) throws Exception { // 加载PDF文档 PDDocument pdDocument = PDDocument.load(inputStream); // 创建PDFTextStripper对象, 提取文本 PDFTextStripper textStripper = new PDFTextStripper(); // 提取文本 String content = textStripper.getText(pdDocument); // 关闭PDF文档 pdDocument.close(); return content; } private static String readDocOrDocxContent(InputStream inputStream) { try { // 加载DOC文档 XWPFDocument document = new XWPFDocument(inputStream); // 2. 提取文本内容 XWPFWordExtractor extractor = new XWPFWordExtractor(document); return extractor.getText(); } catch (IOException e) { e.printStackTrace(); return null; } } private static String readTextContent(InputStream inputStream) { StringBuilder content = new StringBuilder(); try (InputStreamReader isr = new InputStreamReader(inputStream, StandardCharsets.UTF_8)) { int ch; while ((ch = isr.read()) != -1) { content.append((char) ch); } } catch (IOException e) { e.printStackTrace(); return null; } return content.toString(); }}
(9)dto
import lombok.Data;@Datapublic class FileDTO { private String keyword; private Integer pageNo; private Integer pageSize;}
四、前端代码
(1)查询组件封装
`共 ${total} 条`\"/> import {ref} from \'vue\'import {Pagination} from \"ant-design-vue\";import SearchContent from \"@/components/ElasticSearch/SearchContent.vue\"import {searchPage} from \"@/api/sys/elasticsearch\"const loading = ref(false)const showSearch = ref(false)const searchItems = ref();const pageInfo = ref({ // 当前页码 pageNo: 1, // 当前每页显示多少条数据 pageSize: 10, keyword: \'\', total: 0,});async function handleSearch() { if (!pageInfo.value.keyword) { return; } pageInfo.value.pageNo = 1 showSearch.value = true await getSearchItems();}function changePage(pageNo) { pageInfo.value.pageNo = pageNo getSearchItems();}async function getSearchItems() { loading.value = true try { const res: any = await searchPage(pageInfo.value); searchItems.value = res?.searchHits; debugger pageInfo.value.total = res?.totalHits } finally { loading.value = false }}
(2)接口elasticsearch.ts
import {defHttp} from \'/@/utils/http/axios\';enum Api { saveFile = \'/elasticsearch/file/saveFile\', searchPage = \'/elasticsearch/file/searchPage\',}/** * 保存文件到es * @param params */export const saveFile = (params) => defHttp.post({ url: Api.saveFile, params});/** * 关键字查询-原生方法 * @param params */export const searchPage = (params) => defHttp.post({ url: Api.searchPage, params},);
(3)搜索内容组件SearchContent.vue
0\">
{{ item?.content?.fileName }} {{ item?.content?.fileContent?.length > 150 ? item.content.fileContent.substring(0, 150) + \'......\' : item.content.fileContent }} import {useGlobSetting} from \"@/hooks/setting\";import EmptyData from \"/@/components/ElasticSearch/EmptyData.vue\";import {ref} from \"vue\";const glob = useGlobSetting();const props = defineProps({ loading: { type: Boolean, default: false }, items: { type: Array, default: [] },})function detailSearch(searchItem) { const url = ref(`${glob.domainUrl}/sys/common/pdf/preview/`); window.open(url.value + searchItem.filePath + \'#scrollbars=0&toolbar=0&statusbar=0\', \'_blank\');}.searchContent { min-height: 500px; overflow-y: auto;}.contentCard { margin: 10px 20px;}a { color: black;}a:hover { color: #3370ff;}:deep(.ant-card-body) { padding: 13px;}
五、效果展示