[xml file to csv file for voc]transform the voc label with xml format to csv format #python #csv #xml
作者:pengfei
邮箱:pengfeidip@qq.com
时间:2019年4月15日08:31:38
The following code is for VOC dataset label for transformation, from XML format to csv format.
The getted .csv like following, every row is all objects belong to an image.
image_name_1, xmin, xmax, ymin, ymax, class_index, xmin, xmax, ymin, ymax, class_index, .......
image_name_2, xmin, xmax, ymin, ymax, class_index, xmin, xmax, ymin, ymax, class_index, .......
import csv
import xmltodict
import os
def xml2csv(xml_path, saved_name):
# param:
# xml_path: (string)a foler. In this folder there are many xml files
# saved_name :(string)name of csv to be saved.Format like following
# image_name_1, xmin,xmax,ymin,ymax,class_index, xmin.......
# image_name_2, ...............
VOC_CLASSES = ( # always index 0
'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor')
with open(saved_name, "w+", newline="") as csvfile:
csv_writer = csv.writer(csvfile)
xml_name = os.listdir(xml_path) # get all xim files' name
# an image's info is a line in the csv
for i_xml_name in xml_name:
info = []
with open(os.path.join(xml_path, i_xml_name)) as xmlfile:
file_dict = xmltodict.parse(xmlfile.read()) # read a xml as a dict with keys annotation
info.append(file_dict["annotation"]["filename"])
# traverse all objects in the image
if isinstance(file_dict["annotation"]["object"], list): # if there are objects
for i_object in file_dict["annotation"]["object"]:
class_index = VOC_CLASSES.index(i_object["name"]) # get class index by it's name
xmin = i_object["bndbox"]["xmin"]
xmax = i_object["bndbox"]["xmax"]
ymin = i_object["bndbox"]["ymin"]
ymax = i_object["bndbox"]["ymax"]
info.append(xmin)
info.append(ymin)
info.append(xmax)
info.append(ymax)
info.append(class_index)
else: # if there is only an objects, file_dict["annotation"]["object"] is a dict
class_index = VOC_CLASSES.index(
file_dict["annotation"]["object"]["name"]) # get class index by it's name
xmin = file_dict["annotation"]["object"]["bndbox"]["xmin"]
xmax = file_dict["annotation"]["object"]["bndbox"]["xmax"]
ymin = file_dict["annotation"]["object"]["bndbox"]["ymin"]
ymax = file_dict["annotation"]["object"]["bndbox"]["ymax"]
info.append(xmin)
info.append(ymin)
info.append(xmax)
info.append(ymax)
info.append(class_index)
csv_writer.writerow(info)
print("conversion is finished !!! ")
return
if __name__ == "__main__":
xml_path = "I:/PASCAL_VOC/2007/VOC2007_trainval/Annotations"
saved_name = "VOC2007_trainval.csv"
xml2csv(xml_path=xml_path, saved_name=saved_name)