0
所以在這裏我有以下的列表和字典:字典映射在不同的列表
{'linked': {'instructors.v1':
[{'id': '3219339', 'fullName': 'Lisa Mazzola'},
{'id': '6407572', 'fullName': 'Alan S. Miller '},
{'id': '226710', 'fullName': 'Kevin Werbach'},
{'id': '8054217', 'fullName': '許 肖瀟'},
{'id': '20696355', 'fullName': 'Варшалович Дмитрий Александрович'},
{'id': '15622422', 'fullName': 'Prof. James Evans'}}],
'elements':
[{ 'id': '69Bku0KoEeWZtA4u62x6lQ', 'name': 'Gamification','instructorIds': '226710'}]
}
我試圖從「要素獲得與「instructorIds」關聯「instructors.v1」的「全名」 '通過匹配兩者。 我的方法 - >創建了另一個解釋如下:
{'3219339': 'Lisa Mazzola'}
{'6407572': 'Alan S. Miller'}
{'226710': 'Kevin Werbach'}
這給了我一個KeyError異常:「‘226710’」即使226710在list.Please確實存在提出另一種方法,否則,我要去哪裏錯誤?
這裏是供您參考Python代碼:
import imp
import importlib
import requests
import json
import re
from bs4 import BeautifulSoup
import csv
import sys
import urllib.request
from importlib import reload
if __name__ == "__main__":
headers = ({
"x-user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36
(KHTML, like Gecko) Chrome/53.0.2785.92 Safari/537.36
FKUA/website/41/website/Desktop"})
url = "https://api.coursera.org/api/courses.v1?start=0&limit=20&includes=instructorIds,partnerIds,specializations,s12nlds,v1Details,v2Details&fields=instructorIds,partnerIds,specializations,s12nlds,description"
data = requests.get(url).json()
# print(len(data['elements']))
print(data)
with open("courserarough1.csv", 'a') as f:
##### Header #####
header = f.write(
'instructors' + ',' + 'courseURL' + ',' + 'courseType' + ',' +
'CourseName' + ',' + 'partnerName' + ',' +
'slug' + ',' + 'specializations' + ',' + 'course_id' + ',' +
'description' + ',' + "\n")
for n in range(len(data['linked']['instructors.v1'])):
instructors = data['linked']['instructors.v1'][n]['fullName']
instructors = str(instructors)
instructors = instructors.strip().replace(',', '')
instructorsid = data['linked']['instructors.v1'][n]['id']
instructorsid = str(instructorsid)
instructorsid = instructorsid.strip().replace(',', '')
newdict = dict([(instructorsid,instructors)])
print(newdict)
#print(data['linked']['instructors.v1'])
partnerlist = []
for m in range(len(data['linked']['partners.v1'])):
partnerName = data['linked']['partners.v1'][m]['name']
partnerName = str(partnerName)
partnerid = data['linked']['partners.v1'][m]['id']
partnerid = str(partnerid)
partnerlist.append(partnerid)
#print(partnerlist)
for i in range(len(data['elements'])):
partnerIds = data['elements'][i]['partnerIds']
#filtered = data[(np.where(partnerlist.__contains__(partnerIds)))]
#print(filtered)
courseType = data['elements'][i]['courseType']
courseType = str(courseType)
if courseType:
courseType = courseType.rstrip().replace('v2.', '')
else:
courseType = ' '
# print(courseType)
CourseName = data['elements'][i]['name']
CourseName = str(CourseName)
CourseName = CourseName.strip().replace(',', '')
partnerIds = data['elements'][i]['partnerIds']
partnerIds = str(partnerIds)
if partnerIds:
partnerIds = partnerIds.rstrip().replace(',', '')
partnerIds = partnerIds.rstrip().replace('\n', '')
partnerIds = partnerIds.rstrip().replace('u', '')
partnerIds = partnerIds.rstrip().replace('[', '')
partnerIds = partnerIds.rstrip().replace(']', '')
else:
partnerIds = ' '
slug = data['elements'][i]['slug']
slug = str(slug)
# print(slug)
specializations = data['elements'][i]['specializations']
specializations = str(specializations)
if specializations:
specializations = specializations.rstrip().replace(',', '')
specializations = specializations.rstrip().replace('\n', '')
specializations = specializations.rstrip().replace('u', '')
specializations = specializations.rstrip().replace('[', '')
specializations = specializations.rstrip().replace(']', '')
else:
specializations = ' '
course_id = data['elements'][i]['id']
course_id = str(course_id)
description = data['elements'][i]['description']
description = str(description)
if description:
description = description.strip().replace(',', '')
description = description.strip().replace('\n', '')
else:
description = ' '
courseURL = "https://www.coursera.org/learn/" + slug
courseURL = str(courseURL)
instructorIds = data['elements'][i]['instructorIds']
instructorIds = str(instructorIds)
if instructorIds:
instructorIds = instructorIds.rstrip().replace(',', '')
instructorIds = instructorIds.rstrip().replace('\n', '')
instructorIds = instructorIds.rstrip().replace('u', '')
instructorIds = instructorIds.rstrip().replace('[', '')
instructorIds = instructorIds.rstrip().replace(']', '')
instructorIds = re.sub(r'^"|"$', '', instructorIds)
else:
instructorIds = ' '
#print(instructorIds)
instructors = newdict[instructorIds]
print(instructors)
##writing the
attributes in a csv file##
f.write(instructors + ',' + courseURL + ',' + courseType + ',' +
CourseName + ',' + partnerName + ',' + slug +
',' + specializations + ',' + course_id + ',' + description +
"\n")
請告訴我們什麼ü試圖 – DineshKumar
這樣會更如果您還添加了用於抽取數據的代碼,則會很有幫助。還要注意「instructor.v1」是一個字典數組,在'226710'中是一個值不是關鍵,關鍵是'id'。 –
@AnisH_GuptA是的正是..所以這就是我創建另一個名爲'newdict'的字典,我可以使用'226710'作爲關鍵字,這樣我就可以得到這個名字。 –