java 如何下载网络图片
CreateTime--2017年9月30日11:18:19
Author:Marydon
说明:根据网络URL获取该网页上面所有的img标签并下载符合要求的所有图片
所需jar包:jsoup.jar
import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; import java.util.ArrayList; import java.util.List; import java.util.UUID; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements;/*** 图片批量下载工具类* @author Marydon* @create time 2016-9-3下午2:01:03 * @update time 2017年9月30日11:07:02* @E-mail:dellshouji@163.com*/ public class ImgDownloadUtil {/*** 根据URL获取网页DOM对象* @param url* 网址* @return DOM对象*/public static Document getHtmlDocument(String url) {Document document = null;URL urlObj = null;try {// 1.建立网络连接urlObj = new URL(url);// 2.根据url获取Document对象document = Jsoup.parse(urlObj, 5000);// 单位:毫秒超时时间 } catch (MalformedURLException e) {System.out.println("世界上最遥远的距离就是没有网,检查设置!");e.printStackTrace();} catch (IOException e) {System.out.println("您的网络连接打开失败,请稍后重试!");e.printStackTrace();}return document;}/*** 根据URL获取网页源码* @param url* 网址* @return 网页源码*/public static String getHtmlText(String url) {String htmlText = "";Document document = null;URL urlObj = null;try {// 1.建立网络连接urlObj = new URL(url);// 2.根据url获取Document对象document = Jsoup.parse(urlObj, 5000);// 单位:毫秒超时时间// 3.根据dom对象获取网页源码htmlText = document.html();} catch (MalformedURLException e) {System.out.println("世界上最遥远的距离就是没有网,检查设置!");e.printStackTrace();} catch (IOException e) {System.out.println("您的网络连接打开失败,请稍后重试!");e.printStackTrace();}return htmlText;}/*** 操作Dom对象获取图片地址* @param document* Dom对象* @return 图片地址集合*/public static List<String> getImgAddressByDom(Document document) {// 用于存储图片地址List<String> imgAddress = new ArrayList<String>();if (null != document) {// <img src="" alt="" width="" height=""/>// 获取页面上所有的图片元素Elements elements = document.getElementsByTag("img");String imgSrc = "";// 迭代获取图片地址for (Element el : elements) {imgSrc = el.attr("src");// imgSrc的内容不为空,并且以http://开头if ((!imgSrc.isEmpty()) && imgSrc.startsWith("http://")) {// 将有效图片地址添加到List中 imgAddress.add(imgSrc);}}}return imgAddress;}/*** 根据网络URL下载文件* @param url* 文件所在地址* @param fileName* 指定下载后该文件的名字* @param savePath* 文件保存根路径*/public static void downloadFileByUrl(String url, String fileName, String savePath) {URL urlObj = null;URLConnection conn = null;InputStream inputStream = null;BufferedInputStream bis = null;OutputStream outputStream = null;BufferedOutputStream bos = null;try {// 1.建立网络连接urlObj = new URL(url);// 2.打开网络连接conn = urlObj.openConnection();// 设置超时间为3秒conn.setConnectTimeout(3 * 1000);// 防止屏蔽程序抓取而返回403错误conn.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");// 3.得到输入流inputStream = conn.getInputStream();bis = new BufferedInputStream(inputStream);// 文件保存位置File saveDir = new File(savePath);if (!saveDir.exists()) {saveDir.mkdirs();}// 文件的绝对路径String filePath = savePath + File.separator + fileName;File file = new File(filePath);// 4.outputStream = new FileOutputStream(file);bos = new BufferedOutputStream(outputStream);byte[] b = new byte[1024];int len = 0;while ((len = bis.read(b)) != -1) {bos.write(b, 0, len);}System.out.println("info:" + url + " download success,fileRename=" + fileName);} catch (MalformedURLException e) {System.out.println("世界上最遥远的距离就是没有网,检查设置");System.out.println("info:" + url + " download failure");e.printStackTrace();} catch (IOException e) {System.out.println("您的网络连接打开失败,请稍后重试!");System.out.println("info:" + url + " download failure");e.printStackTrace();} finally {// 关闭流try {if (bis != null) {// 关闭字节缓冲输入流 bis.close();}if (inputStream != null) {// 关闭字节输入流 inputStream.close();}if (bos != null) {// 关闭字节缓冲输出流 bos.close();}if (outputStream != null) {// 关闭字节输出流 outputStream.close();}} catch (IOException e) {e.printStackTrace();}}}}
测试
public static void main(String[] args) {// 1.确定网址String url = "http://www.cnblogs.com/Marydon20170307/p/7402871.html";// 2.获取该网页的Dom对象Document document = getHtmlDocument(url);// 3.获取该网页所有符合要求的图片地址List<String> imgAddresses = getImgAddressByDom(document);String imgName = "";String imgType = "";// 4.设置图片保存路径String savePath = "C:/Users/Marydon/Desktop";// 5.批量下载图片for (String imgSrc : imgAddresses) {// 5.1图片命名:图片名用32位字符组成的唯一标识imgName = UUID.randomUUID().toString().replace("-", "");// 5.2图片格式(类型)imgType = imgSrc.substring(imgSrc.lastIndexOf("."));imgName += imgType;// 5.3下载该图片 downloadFileByUrl(imgSrc, imgName, savePath);} }
相关推荐:
- Java读取并下载网络文件
- javaWeb 批量下载图片