GetUrl.ASPx代碼
%@ Page Language="a href="http://dev.21tx.com/language/vb/"VB/a" CodeBehind="GetUrl.aspx.vb" AutoEventWireup="false" Inherits="ASPxa href="http://dev.21tx.com/web/"Web/a.GetUrl" %
Html
head
META http-equiv="content-type" content="text/Html; charset=gb2312"
/head
body
form id="Form1" method="post" runat="server"
P
asp:Label id="Label1" runat="server"/ASP:Label
ASP:TextBox id="urlTextBox" runat="server" Width="336px"
http://lucky_elove.www1.dotnetplayground.com/
/ASP:TextBox
asp:Button OnClick="scrapeButton_Click" id="scrapeButton" runat="server"/ASP:Button
/P
HR SIZE="1"
P
asp:Label id="TipResult" runat="server"/ASP:Label
ASP:TextBox id="resultLabel" runat="server" TextMode="MultiLine"
Width="100%" Height="400"/ASP:TextBox
/P
/form
/body
/Html
後代碼GetUrl.ASPx.vb
Imports System.IO
Imports System.Net
Imports System.Text
Imports System.Text.RegularExpressions
Imports System
Public Class GetUrl
Inherits System.Web.UI.Page
Protected WithEvents Label1System.Web.UI.WebControls.Label
Protected WithEvents urlTextBoxSystem.Web.UI.WebControls.TextBox
Protected WithEvents scrapeButtonSystem.Web.UI.WebControls.Button
Protected WithEvents TipResultSystem.Web.UI.WebControls.Label
Protected WithEvents resultLabelSystem.Web.UI.WebControls.TextBox
#Region " Web 窗體設計器生成的代碼 "
‘該調用是 Web 窗體設計器所必需的。
System.Diagnostics.DebuggerStepThrough() Private Sub InitializeComponent()
End Sub
Private Sub Page_Init(ByVal senderSystem.Object, ByVal eSystem.EventArgs) Handles MyBase.Init
‘CODEGEN: 此方法調用是 Web 窗體設計器所必需的
‘不要使用代碼器修改它。
InitializeComponent()
End Sub
#End Region
Private Sub Page_Load(ByVal senderSystem.Object, ByVal eSystem.EventArgs) Handles MyBase.Load
‘在此處放置初始化頁的用戶代碼
Label1.Text = "請輸入一個URL地址:"
scrapeButton.Text = "分離Href"
End Sub
Private reportNew StringBuilder()
Private webPageString
Private countOfMatchesInt32
Public Sub scrapeButton_Click(ByVal senderSystem.Object, ByVal eSystem.EventArgs)
webPage = GrabUrl()
Dim myDelegateNew MatchEvaluator(AddressOf MatchHandler)
Dim linksExpressionNew Regex( _
"“a.+?href=http://edu.IEcool.com/show/[‘""](?!http“:“/“/)(?!mailto“:)(?foundAnchor[^‘""]+?)[^]*?“", _
RegexOptions.Multiline Or RegexOptions.IgnoreCase Or RegexOptions.IgnorePatternWhitespace)
Dim newWebPageString = linksExpression.Replace(webPage, myDelegate)
TipResult.Text = "h2從 " urlTextBox.Text "分離出的Href/h2" _
"b找到並整理" countOfMatches.ToString() " 個/bbrbr" _
report.ToString().Replace(Environment.NewLine, "br")
TipResult.Text = "h2整理過的頁面/h2scriptwindow.document.title=‘抓取中的‘/script"
resultLabel.Text = newWebPage
End Sub
Public Function MatchHandler(ByVal mMatch)String
Dim linkString = m.Groups("foundAnchor").Value
Dim rToLNew Regex("^", RegexOptions.Multiline Or RegexOptions.RightToLeft)
Dim col, rowInt32
Dim lineBeginInt32 = rToL.Match(webPage, m.Index).Index
row = rToL.Matches(webPage, m.Index).Count