|
主题:[反馈BUG]采集图片分页重复了,如何解决? [收藏主题] | 转到: |
qq360060316 发表于:2011/8/24 21:13:40 | 只看该作者 查看该作者主题
楼主
遇到有下一页的图片分页,采集是把下一页也采集了。这里的下一页就是第二页的内容,结果就重复了一页。 看了采集网址代码,发现没有重复过滤功能。请高手帮忙加个判断,感谢!!! '================================================== '函数名:GetArray '作 用:提取链接地址,以$Array$分隔 '参 数:ConStr ------提取地址的原字符 '参 数:StartStr ------开始字符串 '参 数:OverStr ------结束字符串 '参 数:IncluL ------是否包含StartStr '参 数:IncluR ------是否包含OverStr '================================================== Function GetArray(Byval Constr, StartStr, OverStr, IncluL, IncluR) If Constr = "Error" Or Constr = "" Or IsNull(Constr) = True Or StartStr = "" Or OverStr = "" Or IsNull(StartStr) = True Or IsNull(OverStr) = True Then GetArray = "Error" Exit Function End If Dim TempStr, TempStr2, objRegExp, Matches, Match TempStr = "" Set objRegExp = New RegExp objRegExp.IgnoreCase = True objRegExp.Global = True objRegExp.Pattern = "(" & StartStr & ").+?(" & OverStr & ")" Set Matches = objRegExp.Execute(Constr) For Each Match In Matches TempStr = TempStr & "$Array$" & Match.value Next Set Matches = Nothing If TempStr = "" Then GetArray = "Error" Exit Function End If TempStr = Right(TempStr, Len(TempStr) - 7) If IncluL = False Then objRegExp.Pattern = StartStr TempStr = objRegExp.Replace(TempStr, "") End If If IncluR = False Then objRegExp.Pattern = OverStr TempStr = objRegExp.Replace(TempStr, "") End If Set objRegExp = Nothing Set Matches = Nothing 'TempStr = Replace(TempStr, """", "") 'TempStr = Replace(TempStr, "'", "") ' TempStr = Replace(TempStr, " ", "") 'TempStr = Replace(TempStr, "(", "") 'TempStr = Replace(TempStr, ")", "") If TempStr = "" Then GetArray = "Error" Else GetArray = TempStr End If End Function '================================================== '函数名:DefiniteUrl '作 用:将相对地址转换为绝对地址*** '参 数:PrimitiveUrlStr ------要转换的相对地址 '参 数:ConsultUrlStr ------当前网页地址 '================================================== 'Function DefiniteUrl(ByVal PrimitiveUrlStr, ByVal ConsultUrlStr) Function DefiniteUrl(ByVal URL,ByVal CurrentUrl) Dim strUrl If Len(URL) < 2 Or Len(URL) > 255 Or Len(CurrentUrl) < 2 Then DefiniteUrl = vbNullString Exit Function End If CurrentUrl = Trim(Replace(Replace(Replace(Replace(CurrentUrl, "'", vbNullString), """", vbNullString), vbNewLine, vbNullString), "\", "/")) URL = Trim(Replace(Replace(Replace(Replace(URL, "'", vbNullString), """", vbNullString), vbNewLine, vbNullString), "\", "/")) If InStr(9, CurrentUrl, "/") = 0 Then strUrl = CurrentUrl Else strUrl = Left(CurrentUrl, InStr(9, CurrentUrl, "/") - 1) End If If strUrl = vbNullString Then strUrl = CurrentUrl Select Case Left(LCase(URL), 6) Case "http:/", "https:", "ftp://", "rtsp:/", "mms://" DefiniteUrl = URL Exit Function End Select If Left(URL, 1) = "/" Then DefiniteUrl = strUrl & URL Exit Function End If If Left(URL, 3) = "../" Then Dim ArrayUrl Dim ArrayCurrentUrl Dim ArrayTemp() Dim strTemp Dim i, n Dim c, l n = 0 ArrayCurrentUrl = Split(CurrentUrl, "/") ArrayUrl = Split(URL, "../") c = UBound(ArrayCurrentUrl) l = UBound(ArrayUrl) + 1 If c > l + 2 Then For i = 0 To c - l ReDim Preserve ArrayTemp(n) ArrayTemp(n) = ArrayCurrentUrl(i) n = n + 1 Next strTemp = Join(ArrayTemp, "/") Else strTemp = strUrl End If URL = Replace(URL, "../", vbNullString) DefiniteUrl = strTemp & "/" & URL Exit Function End If strUrl = Left(CurrentUrl, InStrRev(CurrentUrl, "/")) DefiniteUrl = strUrl & Replace(URL, "./", vbNullString) Exit Function End Function '================================================== '函数名:GetPage '作 用:获取分页 '================================================== Function GetPage(ByVal Constr, StartStr, OverStr, IncluL, IncluR) If Constr = "Error" Or Constr = "" Or StartStr = "" Or OverStr = "" Or IsNull(Constr) = True Or IsNull(StartStr) = True Or IsNull(OverStr) = True Then GetPage = "Error" Exit Function End If Dim Start, Over, ConTemp, TempStr TempStr = LCase(Constr) StartStr = LCase(StartStr) OverStr = LCase(OverStr) Over = InStr(1, TempStr, OverStr) If Over <= 0 Then GetPage = "Error" Exit Function Else If IncluR = True Then Over = Over + Len(OverStr) End If End If TempStr = Mid(TempStr, 1, Over) Start = InStrRev(TempStr, StartStr) If IncluL = False Then Start = Start + Len(StartStr) End If If Start <= 0 Or Start >= Over Then GetPage = "Error" Exit Function End If ConTemp = Mid(Constr, Start, Over - Start) ConTemp = Trim(ConTemp) ConTemp = Replace(ConTemp, " ", "") ConTemp = Replace(ConTemp, ",", "") ConTemp = Replace(ConTemp, "'", "") ConTemp = Replace(ConTemp, """", "") ConTemp = Replace(ConTemp, ">", "") ConTemp = Replace(ConTemp, "<", "") ConTemp = Replace(ConTemp, " ", "") GetPage = ConTemp End Function Function CheckDir(ByVal FolderPath) Dim fso Set fso = KS.InitialObject(KS.Setting(99)) If fso.FolderExists(Server.MapPath(FolderPath)) Then '存在 CheckDir = True Else '不存在 CheckDir = False End If Set fso = Nothing End Function Function MakeNewsDir(ByVal foldername) Dim fso Set fso = KS.InitialObject(KS.Setting(99)) fso.CreateFolder (Server.MapPath(foldername)) If fso.FolderExists(Server.MapPath(foldername)) Then MakeNewsDir = True Else MakeNewsDir = False End If Set fso = Nothing End Function |
|
支持(9) | 反对(8) 顶端 底部 |
kesion开发论坛了[/u] [url]http://www.kesion.com[/url] <a href="http://www.kesion.com">支持你,不是我的错!</a> | |
支持(10) | 反对(10) 顶端 底部 |
支持(6) | 反对(4) 顶端 底部 |
支持(2) | 反对(0) 顶端 底部 |
支持(0) | 反对(0) 顶端 底部 |
<上一主题 | 下一主题 > |