我試圖用C#屏幕刮。它工作了幾次,之後我收到會話過期error.Any幫助將不勝感激。.Net屏幕抓取和會話
0
A
回答
5
這是我用於屏幕抓取的一組類。 (我寫了這些類,可以隨意使用,但是你想要。)
它可能有一些錯誤,但我對它的每一個用法它的工作非常完美。
它也處理SSL網站罰款,與重定向一起工作,並捕獲在WebPage類中導致重定向的原始頁面。
using System;
using System.Collections.Generic;
using System.IO;
using System.Net;
using System.Text;
using System.IO.Compression;
using System.Threading;
using System.Web;
using System.Security.Cryptography.X509Certificates;
public class PostValue
{
public PostValue(String key, String value)
{
Key = key;
Value = value;
}
public String Key { get; set; }
public String Value { get; set; }
}
[Serializable]
public class WebPage
{
public WebPage(String html)
{
Html = html;
}
public WebPage(String html, WebPage parent)
{
Html = html;
Parent = parent;
}
public String Html { get; set; }
public WebPage Parent { get; set; }
}
internal class AcceptAllCertificatePolicy : ICertificatePolicy
{
public AcceptAllCertificatePolicy()
{
}
public bool CheckValidationResult(ServicePoint sPoint,
X509Certificate cert, WebRequest wRequest, int certProb)
{
// Always accept
return true;
}
}
public class WebSession
{
public String BaseUrl { get; set; }
public String LastUrl { get; set; }
public String UserAgent { get; set; }
public int PageReattempts { get; set; }
public WebProxy Proxy { get; set; }
public String CookieString { get; set; }
public Dictionary<String, String> Cookies { get; set; }
private static WebSession instance { get; set; }
public static WebSession Instance { get { if (instance == null) instance = new WebSession(); return instance; } }
public const String DefaultAgent = "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.8) Gecko/2009032609 Firefox/3.0.8";
public WebSession()
: this(DefaultAgent, null)
{
}
public WebSession(String baseUrl)
: this(DefaultAgent, null)
{
BaseUrl = baseUrl;
}
public WebSession(String userAgent, WebProxy proxy)
{
ServicePointManager.CertificatePolicy = new AcceptAllCertificatePolicy();
CookieString = "";
Cookies = new Dictionary<string, string>();
if (userAgent == "")
UserAgent = DefaultAgent;
else
UserAgent = userAgent;
Proxy = proxy;
LastUrl = "";
PageReattempts = 4;
ServicePointManager.Expect100Continue = false;
}
public WebPage RequestPage(string URL)
{
return RequestPage(new Uri(BaseUrl + URL));
}
public WebPage RequestPage(string URL, string Values, string Method)
{
return RequestPage(new Uri(BaseUrl + URL), Values, Method);
}
public WebPage RequestPage(string URL, string Values, string Method, string ContentType)
{
return RequestPage(new Uri(BaseUrl + URL), Values, Method, "application/x-www-form-urlencoded");
}
public WebPage RequestPage(Uri URL)
{
return RequestPage(URL, "", "GET");
}
public WebPage RequestPage(String URL, params PostValue[] postValues)
{
String totalString = "";
if (postValues.Length > 0)
{
for (int count = 0; count < postValues.Length; count++)
{
if (count > 0)
totalString += "&";
totalString += postValues[count].Key + "=" + HttpUtility.UrlEncode(postValues[count].Value);
}
}
return RequestPage(URL, totalString);
}
public WebPage RequestPage(string URL, string Values)
{
return RequestPage(new Uri(BaseUrl + URL), Values);
}
public WebPage RequestPage(Uri URL, string Values)
{
return RequestPage(URL, Values, "POST");
}
public WebPage RequestPage(Uri URL, string Values, string Method)
{
return RequestPage(URL, Values, Method, "application/x-www-form-urlencoded");
}
public WebPage RequestPage(Uri url, string content, string method, string contentType)
{
string htmlResult;
HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(url);
HttpWebResponse response = null;
ASCIIEncoding encoding = new ASCIIEncoding();
byte[] contentData = encoding.GetBytes(content);
request.Proxy = Proxy;
request.Timeout = 60000;
request.Method = method;
request.AllowAutoRedirect = false;
request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
request.Referer = LastUrl;
request.KeepAlive = false;
request.UserAgent = UserAgent;
request.Headers.Add("Accept-Language", "en-us,en;q=0.5");
//request.Headers.Add("UA-CPU", "x86");
request.Headers.Add("Cache-Control", "no-cache");
request.Headers.Add("Accept-Encoding", "gzip,deflate");
String cookieString = "";
foreach (KeyValuePair<String, String> cookiePair in Cookies)
cookieString += cookiePair.Key + "=" + cookiePair.Value + ";";
if (cookieString.Length > 2)
{
String cookie = cookieString.Substring(0, cookieString.Length - 1);
request.Headers.Add("Cookie", cookie);
}
if (method == "POST")
{
request.ContentLength = contentData.Length;
request.ContentType = contentType;
Stream contentWriter = request.GetRequestStream();
contentWriter.Write(contentData, 0, contentData.Length);
contentWriter.Close();
}
int attempts = 0;
while (true)
{
try
{
response = (HttpWebResponse)request.GetResponse();
if (response == null)
throw new WebException();
break;
}
catch (WebException)
{
if (response != null)
response.Close();
if (attempts == PageReattempts)
throw;
// Wait three seconds before trying again
Thread.Sleep(3000);
}
attempts += 1;
}
// Tokenize cookies
if (response.Headers["Set-Cookie"] != null)
{
String headers = response.Headers["Set-Cookie"].Replace("path=/,", ";").Replace("HttpOnly,", "");
foreach (String cookie in headers.Split(';'))
{
if (cookie.Contains("="))
{
String[] splitCookie = cookie.Split('=');
String cookieKey = splitCookie[0].Trim();
String cookieValue = splitCookie[1].Trim();
if (Cookies.ContainsKey(cookieKey))
Cookies[cookieKey] = cookieValue;
else
Cookies.Add(cookieKey, cookieValue);
}
else
{
if (Cookies.ContainsKey(cookie))
Cookies[cookie] = "";
else
Cookies.Add(cookie, "");
}
}
}
htmlResult = ReadResponseStream(response);
response.Close();
if (response.Headers["Location"] != null)
{
response.Close();
Thread.Sleep(1500);
String newLocation = response.Headers["Location"];
WebPage result = RequestPage(newLocation);
return new WebPage(result.Html, new WebPage(htmlResult));
}
LastUrl = url.ToString();
return new WebPage(htmlResult);
}
public string ReadResponseStream(HttpWebResponse response)
{
Stream responseStream = null;
StreamReader reader = null;
try
{
responseStream = response.GetResponseStream();
responseStream.ReadTimeout = 5000;
if (response.ContentEncoding.ToLower().Contains("gzip"))
responseStream = new GZipStream(responseStream, CompressionMode.Decompress);
else if (response.ContentEncoding.ToLower().Contains("deflate"))
responseStream = new DeflateStream(responseStream, CompressionMode.Decompress);
reader = new StreamReader(responseStream);
return reader.ReadToEnd();
}
finally
{
reader.Close();
responseStream.Close();
}
}
}
0
我放在一起的擴展方法的某個時候回來,從一對夫婦的招聘網站的刮和電子郵件給我上週五的遠程作業。我將代碼放在控制檯應用程序中,並將應用程序綁定到Windows任務計劃程序。
// Scrapes html pages from given Uri
// Usage: this.Literal1.Text = new System.Uri("https://www.amazon.com/gp/goldbox/ref=nav_cs_gb").Scrape();
public static string Scrape(this System.Uri uri)
{
using (var sr = new System.IO.StreamReader(System.Net.HttpWebRequest.Create(uri.GetLeftPart(System.UriPartial.Query)).GetResponse().GetResponseStream()))
{
return sr.ReadToEnd();
}
}
相關問題
- 1. PHP的屏幕抓取和會話
- 2. 使用.NET通過SSL抓取屏幕
- 3. 屏幕抓取Asp.net
- 4. 在.NET中簡單的屏幕抓取和分析
- 5. PyQt5抓取和保存屏幕部分
- 6. 使用Java和X3270屏幕抓取
- 7. 屏幕抓取PHP和/或Javascript?
- 8. 屏幕抓取和SQL數據庫
- 9. 通過AJAX和javascript屏幕抓取
- 10. PHP屏幕抓取方法
- 11. 屏幕抓取效率
- 12. 想與屏幕抓取
- 13. 屏幕抓取窗口
- 14. HTMLAgility幫助屏幕抓取
- 15. 使用Python屏幕抓取
- 16. 殺屏幕會話腳本
- 17. 使用.net託管代碼屏幕抓取命令窗口
- 18. 需要建議:使用.NET屏幕抓取網頁
- 19. 使用C#和.NET Framework進行屏幕抓取,網頁抓取,網頁收集,Web數據提取等工作
- 20. 從插入抓取會話 - C#和MySQL
- 21. 屏幕抓取java中的主機屏幕?
- 22. 屏幕抓取:自動化vim腳本
- 23. 如何抓取屏幕NSWindow的圖像?
- 24. 使用XPath的屏幕抓取
- 25. 使用SimpleHTMLDom的屏幕抓取PHP
- 26. 屏幕抓取 - 但不能模擬javascript
- 27. 延遲後的屏幕抓取網頁
- 28. iOS 11沒有從MKMapView抓取屏幕
- 29. 在PHP問題屏幕抓取
- 30. 屏幕從網站抓取數據
你的意思是網頁抓取?也許與HttpWebRequest對象。基本上,我們不能沒有更多的信息,也許有些代碼。 – 2009-12-05 16:58:15
你最近怎麼樣? – Shoban 2009-12-05 17:00:09
是的,我的意思是網頁與WebRequest一起刮。 – Jignesh 2009-12-05 17:02:02