Elasticsearch 使用 Python 导入数据
Elasticsearch 使用 Python 导入数据
from elasticsearch import Elasticsearch #引入 es 模块 import csv #引入 python 自带的 csv 模块 #from elasticsearch import helpers ip = xxx.xxx.xxx.xxx port = xxxx es = Elasticsearch([ip],port=port) file = C:\Users\Administrator\Desktop\data.csv print( start to import.................... ) def parse_csv(datafile): data = [] #n = 0 with open(datafile, "rt",encoding=utf8) as sd: r = csv.DictReader(sd) #为每行创建一个字典,同时将字段名称与表头对应 #每行对应返回一个 dict 数据类型 for line in r: #print(start: + str(line) + end) data.append(line) return data data = parse_csv(file) #print(data) for d in data: #keys = dict.keys(d) #指定 key 和 value o = { "createDateTime":d["_source.createDateTime"], "post_time_str":d["_source.post_time_str"], "rowkey":d["_source.rowkey"], "cate":d["_source.cate"], "updateDateTime_str":d["_source.updateDateTime_str"], "source_type":d["_source.source_type"], "type":d["_source.type"], "version":d["_source.version"], "cate_type":d["_source.cate_type"], "title":d["_source.title"], "length":d["_source.length"], "dedupFlag":d["_source.dedupFlag"], "simHashCode":d["_source.simHashCode"], "stock_name":d["_source.stock_name"], "updateDateTime":d["_source.updateDateTime"], "source_num":d["_source.source_num"], "datasource":d["_source.datasource"], "abstract":d["_source.abstract"], "createDateTime_str":d["_source.createDateTime_str"], "keywords":d["_source.keywords"], "charset":d["_source.charset"], "attr":d["_source.attr"], "stock_num":d["_source.stock_num"], "tasks":d["_source.tasks"], "cid":d["_source.cid"], "url":d["_source.url"], "content":d["_source.content"], "emotion_i":d["_source.emotion_i"], "category":d["_source.category"], "source":d["_source.source"], "quality_score":d["_source.quality_score"], "stock_company":d["_source.stock_company"], "emotion_f":d["_source.emotion_f"], "contentimgs":d["_source.contentimgs"], "emotionEva":d["_source.emotionEva"], "tmpl_id":d["_source.tmpl_id"], "post_time":d["_source.post_time"], "reply_cnt":d["_source.reply_cnt"], "rel_type":d["_source.rel_type"], "author":d["_source.author"], "visit":d["_source.visit"], "all_source":d["_source.all_source"], #"collection":d["_source.collection"], #"support":d["_source.support"], #"concept":d["_source.concept"], #"event":d["_source.event"], #"share_num":d["_source.share_num"] } #print(o) es.index(index="your_index",doc_type="your_type",body = o)
下一篇:
Idea工具创建javaweb项目例子