我不斷收到錯誤:AttributeError: 'NodeList' object has no attribute 'data'
,但我只是想檢查該節點是否爲空,如果是,只需傳遞-1而不是值。我的理解是temp_pub.getElementsByTagName("pages").data
應該返回None
。我該如何解決?檢查ElementTree節點是否爲空失敗
(PS-我試過!= None
和is None
)
xmldoc = minidom.parse('pubsClean.xml')
#loop through <pub> tags to find number of pubs to grab
root = xmldoc.getElementsByTagName("root")[0]
pubs = [a.firstChild.data for a in root.getElementsByTagName("pub")]
num_pubs = len(pubs)
count = 0
while(count < num_pubs):
temp_pages = 0
#get data from each <pub> tag
temp_pub = root.getElementsByTagName("pub")[count]
temp_ID = temp_pub.getElementsByTagName("ID")[0].firstChild.data
temp_title = temp_pub.getElementsByTagName("title")[0].firstChild.data
temp_year = temp_pub.getElementsByTagName("year")[0].firstChild.data
temp_booktitle = temp_pub.getElementsByTagName("booktitle")[0].firstChild.data
#handling no value
if temp_pub.getElementsByTagName("pages").data != None:
temp_pages = temp_pub.getElementsByTagName("pages")[0].firstChild.data
else:
temp_pages = -1
temp_authors = temp_pub.getElementsByTagName("authors")[0]
temp_author_array = [a.firstChild.data for a in temp_authors.getElementsByTagName("author")]
num_authors = len(temp_author_array)
count = count + 1
XML正在處理
<pub>
<ID>5010</ID>
<title>Model-Checking for L<sub>2</sub</title>
<year>1997</year>
<booktitle>Universität Trier, Mathematik/Informatik, Forschungsbericht</booktitle>
<pages></pages>
<authors>
<author>Helmut Seidl</author>
</authors>
</pub>
<pub>
<ID>5011</ID>
<title>Locating Matches of Tree Patterns in Forest</title>
<year>1998</year>
<booktitle>Universität Trier, Mathematik/Informatik, Forschungsbericht</booktitle>
<pages></pages>
<authors>
<author>Andreas Neumann</author>
<author>Helmut Seidl</author>
</authors>
</pub>
從編輯的完整代碼(有將ElementTree)
#for execute command to work
import sqlite3
import xml.etree.ElementTree as ET
con = sqlite3.connect("publications.db")
cur = con.cursor()
from xml.dom import minidom
#use this to clean the foreign characters
import re
def anglicise(matchobj):
if matchobj.group(0) == '&':
return matchobj.group(0)
else:
return matchobj.group(0)[1]
outputFilename = 'pubsClean.xml'
with open('test.xml') as inXML, open(outputFilename, 'w') as outXML:
outXML.write('<root>\n')
for line in inXML.readlines():
if (line.find("<sub>") or line.find("</sub>")):
newline = line.replace("<sub>", "")
newLine = newline.replace("</sub>", "")
outXML.write(re.sub('&[a-zA-Z]+;',anglicise,newLine))
outXML.write('\n</root>')
tree = ET.parse('pubsClean.xml')
root = tree.getroot()
xmldoc = minidom.parse('pubsClean.xml')
#loop through <pub> tags to find number of pubs to grab
root2 = xmldoc.getElementsByTagName("root")[0]
pubs = [a.firstChild.data for a in root2.getElementsByTagName("pub")]
num_pubs = len(pubs)
count = 0
while(count < num_pubs):
temp_pages = 0
#get data from each <pub> tag
temp_ID = root.find(".//ID").text
temp_title = root.find(".//title").text
temp_year = root.find(".//year").text
temp_booktitle = root.find(".//booktitle").text
#handling no value
if root.find(".//pages").text:
temp_pages = root.find(".//pages").text
else:
temp_pages = -1
temp_authors = root.find(".//authors")
temp_author_array = [a.text for a in temp_authors.findall(".//author")]
num_authors = len(temp_author_array)
count = count + 1
#process results into sqlite
pub_params = (temp_ID, temp_title)
cur.execute("INSERT OR IGNORE INTO publication (id, ptitle) VALUES (?, ?)", pub_params)
cur.execute("INSERT OR IGNORE INTO journal (jtitle, pages, year, pub_id, pub_title) VALUES (?, ?, ?, ?, ?)", (temp_booktitle, temp_pages, temp_year, temp_ID, temp_title))
x = 0
while(x < num_authors):
cur.execute("INSERT OR IGNORE INTO authors (name, pub_id, pub_title) VALUES (?, ?, ?)", (temp_author_array[x],temp_ID, temp_title))
cur.execute("INSERT OR IGNORE INTO wrote (name, jtitle) VALUES (?, ?)", (temp_author_array[x], temp_booktitle))
x = x + 1
con.commit()
con.close()
print("\nNumber of entries processed: ", count)
如果你給我們完成它會更容易幫助,複製pastable代碼+在XML正在處理中。總之,提供一個[mcve]。 – mzjn
@mzjn好的,抱歉,網站是新的,我不希望它太長。我現在就添加它。 – douglasrcjames
對不起,這是一個嘮叨,但你應該付出更多的努力,使例子最小和完成。例如,sqlite的東西似乎不相關。並請確定您是否在詢問有關miniidom或ElementTree。 「正在處理的XML」不是真正的XML,因爲沒有根元素。我注意到你在你的代碼中添加了根元素,但是這與你得到的錯誤似乎無關。 – mzjn