tkNag
1/27/2019 - 11:00 AM

Elasticsearch bulkデータ生成

Elasticsearch bulkデータ生成

# coding:utf-8

import codecs
import json
import uuid
import random
import collections as cl
 
def main():
    firstname = [
        "adam",
        "beth",
        "curl",
        "denym",
        "elizabeth",
        "flora",
        "genome",
        "hyne",
        "iila",
        "john",
        "kate",
        "lamberd",
        "mike"
    ]
    lastname = [
        "aliandra",
        "bonum",
        "cata",
        "delis",
        "endo",
        "fobus",
        "goldbaum",
        "hinelich",
        "ilunums",
        "joshua",
        "kasperski",
        "lindbirg",
        "mizur"
    ]
    gender = [
        "M", "F"
    ]

    fw = codecs.open('./test.json', 'w', 'utf-8')

    com = cl.OrderedDict()
    ys = cl.OrderedDict()
    for i in range(50):
        predata = cl.OrderedDict()
        predata["_index"] = "TestIndex"
        predata["_type"] = "testType"
        predata["_id"] = "123"
        com["index"] = predata
        # json.dump(com, fw)

        data = cl.OrderedDict()
        data["account_number"] = str(uuid.uuid4())
        data["firstname"] = firstname[random.randint(0, len(firstname)-1)]
        data["lastname"] = lastname[random.randint(0, len(lastname)-1)]
        data["age"] = random.randint(15, 100)
        data["gender"] = gender[random.randint(0, len(gender)-1)]
        
        fw.write("{}".format(json.dumps(com)) + '\n' + "{}".format(json.dumps(data)) + '\n')

 
if __name__=='__main__':
    main()