2017-08-16 47 views
0
from bs4 import BeautifulSoup 
from pprint import pprint 
import requests 

url = 'http://estadistico.ut.com.sv/OperacionDiaria.aspx' 

s = requests.Session() 

pagereq = s.get(url) 
soup = BeautifulSoup(pagereq.content, 'lxml') 

viewstategenerator = soup.find("input", attrs = {'id': '__VIEWSTATEGENERATOR'})['value'] 
viewstate = soup.find("input", attrs = {'id': '__VIEWSTATE'})['value'] 
eventvalidation = soup.find("input", attrs = {'id': '__EVENTVALIDATION'})['value'] 

eventtarget = 'ASPxDashboardViewer1' 
DXCss = '1_33,1_4,1_9,1_5,15_2,15_4' 
DXScript = '1_232,1_134,1_225,1_169,1_187,15_1,1_183,1_182,1_140,1_147,1_148,1_142,1_141,1_143,1_144,1_145,1_146,15_0,15_6,15_7' 
eventargument = {"Task":"Export","ExportInfo":{"Mode":"SingleItem","GroupName":"pivotDashboardItem1","FileName":"Generación+por+tipo+de+tecnología+(MWh)","ClientState":{"clientSize":{"width":509,"height":385},"titleHeight":48,"itemsState":[{"name":"pivotDashboardItem1","headerHeight":34,"position":{"left":11,"top":146},"width":227,"height":108,"virtualSize":'null',"scroll":{"horizontal":'true',"vertical":'true'}}]},"Format":"Excel","DocumentOptions":{"paperKind":"Letter","pageLayout":"Portrait","scaleMode":"AutoFitWithinOnePage","scaleFactor":1,"autoFitPageCount":1,"showTitle":'true',"title":"Operación+Diaria","imageFormatOptions":{"format":"Png","resolution":96},"excelFormatOptions":{"format":"Csv","csvValueSeparator":","},"commonOptions":{"filterStatePresentation":"None","includeCaption":'true',"caption":"Generación+por+tipo+de+tecnología+(MWh)"},"pivotOptions":{"printHeadersOnEveryPage":'true'},"gridOptions":{"fitToPageWidth":'true',"printHeadersOnEveryPage":'true'},"chartOptions":{"automaticPageLayout":'true',"sizeMode":"Zoom"},"pieOptions":{"autoArrangeContent":'true'},"gaugeOptions":{"autoArrangeContent":'true'},"cardOptions":{"autoArrangeContent":'true'},"mapOptions":{"automaticPageLayout":'true',"sizeMode":"Zoom"},"rangeFilterOptions":{"automaticPageLayout":'true',"sizeMode":"Stretch"},"imageOptions":{},"fileName":"Generación+por+tipo+de+tecnología+(MWh)"},"ItemType":"PIVOT"},"Context":"BwAHAAIkY2NkNWRiYzItYzIwNS00MDIyLTkzZjUtYWQ0NzVhYTM5Y2E3Ag9PcGVyYWNpb25EaWFyaWECAAIAAAAAAMByQA==","RequestMarker":1,"ClientState":{}} 

postdata = {'__EVENTTARGET': eventtarget, 
      '__EVENTARGUMENT': eventargument, 
      '__VIEWSTATE': viewstate, 
      '__VIEWSTATEGENERATOR': viewstategenerator, 
      '__EVENTVALIDATION': eventvalidation, 
      'DXScript': DXScript, 
      'DXCss': DXCss 
      } 

datareq = s.post(url, data = postdata) 

print datareq.text 

我試圖從this .aspx網頁上刮取數據。該頁面通過javascript動態加載數據,因此直接請求/ BeautifulSoup將無法工作。從.aspx觸發數據響應頁

通過查看網絡流量,我可以看到,當您單擊某個元素的導出(Exportar a)按鈕時,選擇一種導出類型(excel,csv),然後確認向頁面發出POST請求。它返回一個base64編碼的我需要的數據字符串。據我所知,無法直接對文件進行GET請求,因爲它僅在請求時才生成。

我想要做的是複製觸發csv響應的POST請求。因此,我首先要抓住__VIEWSTATE,__VIEWSTATEGENERATOR和__EVENTVALIDATION。 __EVENTTARGET,DXCSS和DXScript看起來是固定的。直接從POST請求複製__EVENTARGUMENT。

我的代碼返回一個服務器應用程序錯誤。我認爲這個問題或者是a)錯誤的__EVENTARGUMENT(可能是動態的而不是固定的),b)不能真正理解.aspx頁面是如何工作的,或者c)我想要做的是使用這些工具無法實現的。

我沒有看過使用硒觸發數據導出,但我看不到一種方式來捕獲服務器響應。

回答