Elasticsearch 使用 Python 导入数据

Elasticsearch 使用 Python 导入数据

from elasticsearch import Elasticsearch    #引入 es 模块
import csv #引入 python 自带的 csv 模块
#from elasticsearch import helpers 

ip = xxx.xxx.xxx.xxx
port = xxxx
es = Elasticsearch([ip],port=port)
 
file = C:\Users\Administrator\Desktop\data.csv
 
print(
start to import....................
)

def parse_csv(datafile):
    data = []
    #n = 0
    with open(datafile, "rt",encoding=utf8) as sd:
        r = csv.DictReader(sd)   #为每行创建一个字典,同时将字段名称与表头对应
        #每行对应返回一个 dict 数据类型
        for line in r:
            #print(start:    + str(line) +    end)
            data.append(line)
 
    return data

data = parse_csv(file)


#print(data)     

for d in data:
    #keys = dict.keys(d)
   #指定 key 和 value
    o = {
          
   
        "createDateTime":d["_source.createDateTime"],
        "post_time_str":d["_source.post_time_str"],
        "rowkey":d["_source.rowkey"],
        "cate":d["_source.cate"],
        "updateDateTime_str":d["_source.updateDateTime_str"],
        "source_type":d["_source.source_type"],
        "type":d["_source.type"],
        "version":d["_source.version"],
        "cate_type":d["_source.cate_type"],
        "title":d["_source.title"],
        "length":d["_source.length"],
        "dedupFlag":d["_source.dedupFlag"],
        "simHashCode":d["_source.simHashCode"],
        "stock_name":d["_source.stock_name"],
        "updateDateTime":d["_source.updateDateTime"],
        "source_num":d["_source.source_num"],
        "datasource":d["_source.datasource"],
        "abstract":d["_source.abstract"],
        "createDateTime_str":d["_source.createDateTime_str"],
        "keywords":d["_source.keywords"],
        "charset":d["_source.charset"],
        "attr":d["_source.attr"],
        "stock_num":d["_source.stock_num"],
        "tasks":d["_source.tasks"],
        "cid":d["_source.cid"],
        "url":d["_source.url"],
        "content":d["_source.content"],
        "emotion_i":d["_source.emotion_i"],
        "category":d["_source.category"],
        "source":d["_source.source"],
        "quality_score":d["_source.quality_score"],
        "stock_company":d["_source.stock_company"],
        "emotion_f":d["_source.emotion_f"],
        "contentimgs":d["_source.contentimgs"],
        "emotionEva":d["_source.emotionEva"],
        "tmpl_id":d["_source.tmpl_id"],
        "post_time":d["_source.post_time"],
        "reply_cnt":d["_source.reply_cnt"],
        "rel_type":d["_source.rel_type"],
        "author":d["_source.author"],
        "visit":d["_source.visit"],
        "all_source":d["_source.all_source"],
        #"collection":d["_source.collection"],
        #"support":d["_source.support"],
        #"concept":d["_source.concept"],
        #"event":d["_source.event"],
        #"share_num":d["_source.share_num"]
    }
    
    #print(o)
    es.index(index="your_index",doc_type="your_type",body = o)
经验分享 程序员 微信小程序 职场和发展