Imports System.Xml
Imports System.Text.RegularExpressions
Public Class Form1Class Form1

Private Sub Button1_Click()Sub Button1_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles Button1.Click
Try
Dim objXml As New XmlDataDocument
Dim objXmlElementCity As New XmlDataDocument
Dim Content As String = ""
objXml.LoadXml("<root />")
'抓取所有城市列表
Content = GetContent("http://localhost/CMA/index.htm", "gb2312")
'抓到内容后,?始分析数据
Dim regex As System.Text.RegularExpressions.Regex
Dim mc As System.Text.RegularExpressions.Match

Dim objXmlCityList As XmlElement
Dim partten As String = ""
Dim cityUrl As String = ""
Dim parttenCity As String = ""
Dim i As Long = 1
Dim j As Long = 1
'Dim mcCity As System.Text.RegularExpressions.Match
objXmlCityList = objXml.CreateElement("citylist")
Dim s As String
s = "2005-2-21"
'Regex reg = new Regex(@"(?<y>d{4})-(?<m>d{1,2})-(?<d>d{1,2})",RegexOptions.Compiled);
'Match match = reg.Match(s);
'int year = int.Parse(match.Groups["y"].Value);
'int month = int.Parse(match.Groups["m"].Value);
'int day = int .Parse(match.Groups["d"].Value);
'DateTime time = new DateTime(year,month,day);
'Console.WriteLine(time);
'Console.ReadLine(); 

'partten = "^c[1] = new Array(" + """" + "6" + ";"

' c[1] = new Array("
'partten = "" + "(?<citycode>[0-9]{5,})" + """"
'partten = "(?<citycode>[0-9]{5,})"
'<citycode>
'partten = "c[[0-9]{1,2}].*;" ' = new Array" + ".*" + ");"
'partten = "^[0-9]{2,3}.[0-9]{2,3}.[0-9]{2,3}.[0-9]{2,3}"
partten = "(?<citycode>" & """" & "[0-9]{5}" & """" & ")"
regex = New System.Text.RegularExpressions.Regex(partten, RegexOptions.Compiled Or RegexOptions.IgnoreCase)
objXmlCityList.SetAttribute("vdatetime", DateTime.Now.ToShortDateString())
mc = regex.Match(Content, 0)
MsgBox(mc.Groups("citycode").Value)
'c.Groups(
'Do While mc.Success
' 'MsgBox(mc.Groups("citycode").Value)
' MsgBox(mc.ToString)
' 'MsgBox(mc.Index)
' 'objXmlCityList.AppendChild(
' mc = regex.Match(Content, mc.Index + mc.Length)
' mc.NextMatch()
'Loop


Catch ex As Exception
End Try


End Sub


Private Function GetContent()Function GetContent(ByVal url As String, ByVal encoding As String) As String
Dim str As String = ""
Dim client As New Net.WebClient
client.Headers.Add("Accept", "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*")
client.Headers.Add("Accept-Language", "zh-cn")
client.Headers.Add("UA-CPU", "x86")
client.Headers.Add("User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)")
Try
Dim buffer As Byte()
buffer = client.DownloadData(url)
If (encoding = "utf-8") Then
str = System.Text.Encoding.GetEncoding("utf-8").GetString(buffer, 0, buffer.Length)
Else
str = System.Text.Encoding.GetEncoding("gb2312").GetString(buffer, 0, buffer.Length)
End If
Return str
Return str
Catch ex As Exception
Return ""
End Try
End Function
End Class
本文介绍了一种利用正则表达式从HTML页面中抓取并解析城市列表的方法,通过实例展示了如何创建XML文档并使用正则表达式提取特定格式的城市代码。

585

被折叠的 条评论
为什么被折叠?



