-4
我試圖獲得一個網站的學校信息,並希望將它保存爲excel表格,並在每一列中填寫詳細信息,以下代碼幫助我進一步學習。 欄目標題:學校名稱,吉祥物,地址,類型,電話,傳真等等。例如,我使用了一個鏈接。這可以使代碼更好,更正確的網頁抓取?
Imports System.IO.StreamReader
Imports System.Text.RegularExpressions
Public Class Form1
Private Sub Button1_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles Button1.Click
Dim request As System.Net.HttpWebRequest = System.Net.WebRequest.Create("http://www.maxpreps.com/high-schools/abbeville-yellowjackets-(abbeville,al)/home.htm")
Dim response As System.Net.HttpWebResponse = request.GetResponse
Dim sr As System.IO.StreamReader = New System.IO.StreamReader(response.GetResponseStream())
Dim rsssource As String = sr.ReadToEnd
Dim r As New System.Text.RegularExpressions.Regex("<h1 id=""ctl00_NavigationWithContentOverRelated_ContentOverRelated_Header_Header"">.*</h1>")
Dim r1 As New System.Text.RegularExpressions.Regex("<span id=""ctl00_NavigationWithContentOverRelated_ContentOverRelated_Header_Mascot"">.*</span>")
Dim r3 As New System.Text.RegularExpressions.Regex("<span id=""ctl00_NavigationWithContentOverRelated_ContentOverRelated_Header_Colors"">.*</span>")
Dim r4 As New System.Text.RegularExpressions.Regex("<span id=""ctl00_NavigationWithContentOverRelated_ContentOverRelated_Header_GenderType"">.*</span>")
Dim r5 As New System.Text.RegularExpressions.Regex("<span id=""ctl00_NavigationWithContentOverRelated_ContentOverRelated_Header_AthleteDirectorGenericControl"">.*</span>")
Dim r6 As New System.Text.RegularExpressions.Regex("<address>.*</address>")
Dim r7 As New System.Text.RegularExpressions.Regex("<span id=""ctl00_NavigationWithContentOverRelated_ContentOverRelated_Header_Phone"">.*</span>")
Dim r8 As New System.Text.RegularExpressions.Regex("<span id=""ctl00_NavigationWithContentOverRelated_ContentOverRelated_Header_Fax"">.*</span>")
Dim matches As MatchCollection = r.Matches(rsssource)
Dim matches1 As MatchCollection = r1.Matches(rsssource)
Dim matches3 As MatchCollection = r3.Matches(rsssource)
Dim matches4 As MatchCollection = r4.Matches(rsssource)
Dim matches5 As MatchCollection = r5.Matches(rsssource)
Dim matches6 As MatchCollection = r6.Matches(rsssource)
Dim matches7 As MatchCollection = r7.Matches(rsssource)
Dim matches8 As MatchCollection = r8.Matches(rsssource)
For Each itemcode As Match In matches
ListBox1.Items.Add(itemcode.Value.Split("_").GetValue(4))
ListBox1.Items.Add(itemcode.Value.Split("><").GetValue(1))
Next
For Each itemcode As Match In matches1
ListBox1.Items.Add(itemcode.Value.Split("_").GetValue(4))
ListBox1.Items.Add(itemcode.Value.Split("><").GetValue(1))
Next
End Sub
End Class