yanivk1984
10/4/2019 - 7:54 PM

python convert csv file into json

def csv_to_json(csv_file):
    # open the file for reading and collecting header
    try:
        file = open(csv_file, "r")
    except Exception as err:
        logger.error(f"Got an error while trying to open {str(csv_file)}")
        logger.error(f"ERROR: {str(err)}")
        exit(1)
    file_list = file.readlines()
    # In case that the file has less then two lines, there is not enough data for performing the task (first line is the header)
    if len(file_list) < 2:
        logger.error("The file should have at least 2 lines (header and data)")
        logger.error(f"This file has only {len(file_list)}")

    header = file_list[0].replace(', ', ',').replace(' ,', ',').rstrip('\n').split(',')  # remove spaces from each field
    logger.debug("The header of the file is: " + str(header))
    file.close()

    # open the file again in order to add everything beside the header (only content)
    try:
        file = open(f"{csv_file}_tmp", "w+")
    except Exception as err:
        logger.error(f"Got an error while trying to open {str(csv_file)}")
        logger.error(f"ERROR: {str(err)}")
        exit(1)
    file_list_no_header = file_list[1:]

    # removing spaces from each field of the file
    new_file_list_no_header = []
    for line in file_list_no_header:
        new_file_list_no_header.append(line.replace(', ', ',').replace(' ,', ','))

    file.writelines(new_file_list_no_header)  # add the clean fields to the file
    file.close()

    # check if there are lines that do not correspond with the number of columns in the header (should have the same size)
    header_size = len(header)
    for row in new_file_list_no_header:
        row = row.split(',')
        row_size = len(row)
        if row_size > header_size or row_size < header_size:
            logger.error("One of the rows has more columns then the header, please fix it!")
            logger.error(f'The header has {header_size} and the row column size is {row_size}')
            logger.error(f"Header: {header}")
            logger.error(f"Row: {row}")
            exit(1)

    # open the file in order to read it in the CSV reader
    try:
        file = open(f"{csv_file}_tmp", "r")
    except Exception as err:
        logger.error(f"Got an error while trying to open {str(csv_file)}")
        logger.error(f"ERROR: {str(err)}")
        exit(1)
    reader = csv.DictReader(file, fieldnames=header)
    device_json = json.dumps([row for row in reader])
    device_json = json.loads(device_json)
    logger.debug("Devices parsed from CSV file: " + str(device_json))
    file.close()

    logger.info("Devices found:" + str(len(device_json)))
    os.remove(f"{csv_file}_tmp")  # delete the tmp file (the file without the header)

    return device_json