电脑故障

位置:IT落伍者 >> 电脑故障 >> 浏览文章

采集网页图片代码


发布日期:2020/12/17
 
采集网页上图片的主要关键是在怎么解析出页面代码里那些img标签的src属性在网上找了下大多都是通过字符串操作找出img标签这种方式操作起来比较麻烦而且代码看起来比较累这里我用的方法是通过WebBrowser来加载一个页面然后HTMLDocument类来操作省去了字符串操作的步骤直接调用GetElementsByTagName把所有图片地址返回到一个HtmlElementCollection对象里

代码如下

using System;

using SystemCollectionsGeneric;

using SystemLinq;

using SystemText;

using SystemTextRegularExpressions;

using SystemNet;

using SystemIO;

using SystemWindowsForms;

namespace WindowsFormsApplication

{

public class GatherPic

{

private string savePath;

private string getUrl;

private WebBrowser wb;

private int iImgCount;

//初始化参数

public GatherPic(string sWebUrl string sSavePath)

{

thisgetUrl = sWebUrl;

thissavePath = sSavePath;

}

//开始采集

public bool start()

{

if (getUrlTrim()Equals())

{

MessageBoxShow(哪来的虾米连网址都没输!

return false;

}

thiswb = new WebBrowser()

thiswbNavigate(getUrl)

//委托事件

thiswbDocumentCompleted += new SystemWindowsFormsWebBrowserDocumentCompletedEventHandler(DocumentCompleted)

return true;

}

//WebBrowserDocumentCompleted委托事件

private void DocumentCompleted(object sender WebBrowserDocumentCompletedEventArgs e)

{

//页面里框架iframe加载完成不掉用SearchImgList()

if (eUrl != wbDocumentUrl) return;

SearchImgList()

}

//检查出所有图片并采集到本地

public void SearchImgList()

{

string sImgUrl;

//取得所有图片地址

HtmlElementCollection elemColl = thiswbDocumentGetElementsByTagName(img

thisiImgCount = elemCollCount;

foreach (HtmlElement elem in elemColl)

{

sImgUrl = elemGetAttribute(src

//调用保存远程图片函数

SaveImageFromWeb(sImgUrl thissavePath)

}

}

//保存远程图片函数

public int SaveImageFromWeb(string imgUrl string path)

{

string imgName = imgUrlToString()Substring(imgUrlToString()LastIndexOf(/) +

path = path + \\ + imgName;

string defaultType = jpg;

string[] imgTypes = new string[] { jpg jpeg png gif bmp };

string imgType = imgUrlToString()Substring(imgUrlToString()LastIndexOf())

foreach (string it in imgTypes)

{

if (imgTypeToLower()Equals(it))

break;

if (itEquals(bmp))

imgType = defaultType;

}

try

{

HttpWebRequest request = (HttpWebRequest)WebRequestCreate(imgUrl)

requestUserAgent = Mozilla/ (MSIE ; Windows NT ; NatasRobot);

requestTimeout = ;

WebResponse response = requestGetResponse()

Stream stream = responseGetResponseStream()

if (responseContentTypeToLower()StartsWith(image/))

{

byte[] arrayByte = new byte[];

int imgLong = (int)responseContentLength;

int l = ;

// CreateDirectory(path)

FileStream fso = new FileStream(path FileModeCreate)

while (l < imgLong)

{

int i = streamRead(arrayByte

fsoWrite(arrayByte i)

l += i;

}

fsoClose()

streamClose()

responseClose()

return ;

}

else

{

return ;

}

}

catch (WebException)

{

return ;

}

catch (UriFormatException)

{

return ;

}

}

}

}

//调用代码

GatherPic gatherpic = new GatherPic(C:\test

//请确保c:\下存在test路径

gatherpicstart()

上一篇:用VB制作三维字体

下一篇:遍历主机的所有IP地址