Java批量写入Elasticsearch
记一次es批量导入数据的惨痛经历(分享给缺少团队协作的苦逼人儿):
1、确认es版本及maven依赖es版本;
2、确认es启动内存、索引刷新规则、默认批量写入数据量大小;
3、分批次导入,没批数量1000-5000;
4、如使用多线程操作,计数器使用ThreadLocal
package com.config; import lombok.extern.slf4j.Slf4j; import org.apache.http.HttpHost; import org.elasticsearch.action.bulk.BulkRequest; import org.elasticsearch.action.bulk.BulkResponse; import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.support.WriteRequest; import org.elasticsearch.client.RequestOptions; import org.elasticsearch.client.RestClient; import org.elasticsearch.client.RestHighLevelClient; import org.elasticsearch.common.xcontent.XContentType; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Component; import java.io.IOException; import java.util.List; import java.util.Map; @Slf4j @Component public class ElasticsearchDataHandle { @Value("${spring.elasticsearch.rest.nodes:127.0.0.1}") private String host; @Value("${spring.elasticsearch.rest.port:9200}") private String port; @Value("${spring.elasticsearch.rest.indexName:indexName}") private String indexName;//索引名称 /** * 创建连接 高级客户端 */ public RestHighLevelClient restHighLevelClient() { RestHighLevelClient client = new RestHighLevelClient(RestClient.builder(new HttpHost(host, Integer.parseInt(port), "http"))); return client; } /** * Elasticsearch数据导入 */ public void addElasticsearchData(List<Map<String, Object>> addEsDataMapList) { //获取连接 RestHighLevelClient client = restHighLevelClient(); try { //创建请求 BulkRequest bulkRequest = new BulkRequest(); //创建index请求 千万注意,这个写在循环外侧,否则UDP协议会有丢数据的情况,看运气 IndexRequest requestData = null; for (Map<String, Object> addEsDataMap : addEsDataMapList) {//添加数据 requestData = new IndexRequest(indexName, "_doc", addEsDataMap.get("id").toString()).source(addEsDataMap, XContentType.JSON); bulkRequest.add(requestData); } log.info("es同步数据数量:{}", bulkRequest.numberOfActions()); //设置索引刷新规则 bulkRequest.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); //分批次提交,数量控制 if (bulkRequest.numberOfActions() >= 1) { BulkResponse bulkResponse = client.bulk(bulkRequest, RequestOptions.DEFAULT); log.info("es同步数据结果:{}", bulkResponse.hasFailures()); } } catch (Exception e) { e.printStackTrace(); log.error("es同步数据执行失败:{}", addEsDataMapList); } finally { try { client.close(); } catch (IOException e) { e.printStackTrace(); } } } }
上一篇:
JS实现多线程数据分片下载
下一篇:
Spark 累加器与广播变量