##Before Parsing : ##Change RECORDX\xc3\x91_5\xc3\xaf\x03\x12 to RECORD ##Change & to and ##fix problems of manualy. import libxml2 record = [] def Record(i, node): sub_node = node.children while sub_node is not None: if sub_node.type == "element": record[i][sub_node.name] = sub_node.get_content() sub_node = sub_node.next else: sub_node = sub_node.next # Memory debug specific libxml2.debugMemory(1) isis_data = libxml2.parseFile ('/root/fctvet.xml') root = isis_data.getRootElement() node = root.children i = 0 while node is not None: if node.type != "element": node = node.next continue if node is None: break if node.name == "RECORD": ##print node.get_content() record.append({}) Record(i, node) i = i + 1 node = node.next else: print "unhandled node in : " + node.name isis_data.freeDoc() # Memory debug specific libxml2.cleanupParser() if libxml2.debugMemory(1) == 0: print "OK" else: print "Memory leak %d bytes" % (libxml2.debugMemory(1)) libxml2.dumpMemory() #################################################################### record_file = open('/root/fctveout.txt', 'w') tag = ['Tag_4', 'Tag_7', 'Tag_11', 'Tag_12', 'Tag_13', 'Tag_25', 'Tag_60', 'Tag_70', 'Tag_80', 'Tag_18', 'Tag_19', 'Tag_20', 'Tag_21', 'Tag_24', 'Tag_38', 'Tag_10', 'Tag_114', 'Tag_115', 'Tag_110', 'Tag_5', 'Tag_122', 'Tag_97', 'Tag_151', 'Tag_153', 'Tag_123'] for j in record: if j.has_key('Tag_691'): record_file.write(j['Tag_691']) ##Check docs without 114 tag ## if j.has_key('Tag_114'): ## pass ## else: ## print j['Tag_691'] else: record_file.write('_B_L_A_N_K_') for h in tag: if j.has_key(h): record_file.write('\t'+j[h]) else: record_file.write('\t_B_L_A_N_K_') record_file.write('\n')