Elasticsearch 使用 Python 导入数据
Elasticsearch 使用 Python 导入数据
from elasticsearch import Elasticsearch #引入 es 模块
import csv #引入 python 自带的 csv 模块
#from elasticsearch import helpers
ip = xxx.xxx.xxx.xxx
port = xxxx
es = Elasticsearch([ip],port=port)
file = C:\Users\Administrator\Desktop\data.csv
print(
start to import....................
)
def parse_csv(datafile):
data = []
#n = 0
with open(datafile, "rt",encoding=utf8) as sd:
r = csv.DictReader(sd) #为每行创建一个字典,同时将字段名称与表头对应
#每行对应返回一个 dict 数据类型
for line in r:
#print(start: + str(line) + end)
data.append(line)
return data
data = parse_csv(file)
#print(data)
for d in data:
#keys = dict.keys(d)
#指定 key 和 value
o = {
"createDateTime":d["_source.createDateTime"],
"post_time_str":d["_source.post_time_str"],
"rowkey":d["_source.rowkey"],
"cate":d["_source.cate"],
"updateDateTime_str":d["_source.updateDateTime_str"],
"source_type":d["_source.source_type"],
"type":d["_source.type"],
"version":d["_source.version"],
"cate_type":d["_source.cate_type"],
"title":d["_source.title"],
"length":d["_source.length"],
"dedupFlag":d["_source.dedupFlag"],
"simHashCode":d["_source.simHashCode"],
"stock_name":d["_source.stock_name"],
"updateDateTime":d["_source.updateDateTime"],
"source_num":d["_source.source_num"],
"datasource":d["_source.datasource"],
"abstract":d["_source.abstract"],
"createDateTime_str":d["_source.createDateTime_str"],
"keywords":d["_source.keywords"],
"charset":d["_source.charset"],
"attr":d["_source.attr"],
"stock_num":d["_source.stock_num"],
"tasks":d["_source.tasks"],
"cid":d["_source.cid"],
"url":d["_source.url"],
"content":d["_source.content"],
"emotion_i":d["_source.emotion_i"],
"category":d["_source.category"],
"source":d["_source.source"],
"quality_score":d["_source.quality_score"],
"stock_company":d["_source.stock_company"],
"emotion_f":d["_source.emotion_f"],
"contentimgs":d["_source.contentimgs"],
"emotionEva":d["_source.emotionEva"],
"tmpl_id":d["_source.tmpl_id"],
"post_time":d["_source.post_time"],
"reply_cnt":d["_source.reply_cnt"],
"rel_type":d["_source.rel_type"],
"author":d["_source.author"],
"visit":d["_source.visit"],
"all_source":d["_source.all_source"],
#"collection":d["_source.collection"],
#"support":d["_source.support"],
#"concept":d["_source.concept"],
#"event":d["_source.event"],
#"share_num":d["_source.share_num"]
}
#print(o)
es.index(index="your_index",doc_type="your_type",body = o)
下一篇:
Idea工具创建javaweb项目例子
