java

位置:IT落伍者 >> java >> 浏览文章

Java获取网络文件并插入数据库


发布日期:2023年08月05日
 
Java获取网络文件并插入数据库

抓取各大网站的数据插入数据库这样就不用为没有数据而烦恼了

获取百度的歌曲名歌手和链接!!

package webTools;

import javaioBufferedReader;

import javaioIOException;

import javaioInputStreamReader;

import javaioUnsupportedEncodingException;

import MalformedURLException;

import URL;

import javautilArrayList;

import javautilHashMap;

import javautilList;

import javautilregexMatcher;

import javautilregexPattern;

import dbToolsDBTools;

public class IOTOWeb {

public String getHtmlContent(String htmlURL) {

URL url = null;

String rowContent = ;

StringBuffer htmlContent = new StringBuffer();

try {

url = new URL(htmlURL);

BufferedReader in = new BufferedReader(new InputStreamReader(url

openStream() gb));

while ((rowContent = inreadLine()) != null) {

htmlContentappend(rowContent);

}

inclose();

} catch (MalformedURLException e) {

// TODO Autogenerated catch block

eprintStackTrace();

} catch (UnsupportedEncodingException e) {

// TODO Autogenerated catch block

eprintStackTrace();

} catch (IOException e) {

// TODO Autogenerated catch block

eprintStackTrace();

}

return htmlContenttoString();

}

public List getLink(String htmlContent) {

ArrayList listLink = new ArrayList();

String regex = <td[^>]*>[\\(]*<a[^>]*href=(\([^\]*)\|\([^\]*)\|([^\\s>]*))[^>]*>(*?)[\\)]*[\\s]*</td>;

Pattern pattern = pile(regex PatternDOTALL);

Matcher matcher = patternmatcher(htmlContent);

while (matcherfind()) {

listLinkadd(matchergroup());

}

return listLink;

}

public List<String> getHref(String htmlContent) {

String regex;

List listtHref = new ArrayList();

regex = href=(\([^\]*)\|\([^\]*)\|([^\\s>]*))\;

Pattern pa = pile(regex PatternDOTALL);

Matcher ma = pamatcher(htmlContent);

while (mafind()) {

listtHrefadd(magroup()replaceFirst(href=\ )replace(\

));

}

return listtHref;

}

public List<String> getPerson(String htmlContent) {

String regex;

List list = new ArrayList();

regex = ]*href=(\"([^\"]*)\"|\([^\]*)\|([^\\s>]*))[^>]*>(*?)\\>\\(<a[^>]*href=(\([^\]*)\|\([^\]*)\|([^\\s>]*))[^>]*>(*?)\\);

Pattern pa = pile(regex PatternDOTALL);

Matcher ma = pamatcher(htmlContent);

while (mafind()) {

listadd(magroup()replaceFirst(href=\ )replace(\ ));

}

return list;

}

public List<String> getSongName(String htmlContent) {

String regex;

List listPerson = new ArrayList();

regex = <a[^>]*href=(\([^\]*)\|\([^\]*)\|([^\\s>]*))[^>]*>(*?)</a>\\s;

Pattern pa = pile(regex PatternDOTALL);

Matcher ma = pamatcher(htmlContent);

while (mafind()) {

listPersonadd(magroup());

}

return listPerson;

}

public String getMainContent(String htmlContent) {

String regex = <table width=\%\ align=\center\ cellpadding=\\ cellspacing=\\ class=\list\>(*?)</table>;

StringBuffer mainContent = new StringBuffer();

Pattern pattern = pile(regex PatternDOTALL);

Matcher matcher = patternmatcher(htmlContent);

while (matcherfind()) {

mainContentappend(matchergroup());

}

return mainContenttoString();

}

public String outTag(final String s) {

return sreplaceAll(<*?> );

}

DBTools dbTools = new DBTools();

public void getFromBaiduMap(String htmlURL) throws Throwable {

HashMap htmlContentMap = new HashMap();

String htmlContent = getHtmlContent(htmlURL);

String mainContent = getMainContent(htmlContent);

List listLink = getLink(mainContent);

for (int j = ; j < listLinksize(); j++) {

String tdTag = listLinkget(j)toString();

List songNameList = getSongName(tdTag);

String songName = outTag(songNameListget()toString());

List personList = getPerson(tdTag);

String songPerson = ;

if (personListsize() != ) {

for (int n = ; n < personListsize(); n++) {

// Systemoutprintln(personListget(n)toString());

songPerson = outTag(personListget(n)toString());

}

} else {

songPerson = ;

}

// Systemoutprint(songNameListget()toString());

List hrefList = getHref(songNameListget()toString());

String songHref = hrefListget()toString();

Systemoutprintln();

String sql = insert into song(songNamesongPersonsongHref) values(???);

ArrayList list_values = new ArrayList();

list_valuesadd(songName);

list_valuesadd(songPerson);

list_valuesadd(songHref);

dbToolsupdate(sql list_values);

}

}

}

DBTools数据库链接类

package dbTools;

import javautilArrayList;

import javasql*;

public class DBTools {

private PreparedStatement preparedStatement;

private ResultSet resultSet;

private Connection connection;

public DBTools() {

try {

ClassforName(commysqljdbcDriver);

} catch (ClassNotFoundException e) {

// TODO Autogenerated catch block

eprintStackTrace();

}

try {

connection = DriverManagergetConnection(

jdbc:mysql://localhost:/TestURL root zhuyi);

} catch (SQLException e) {

// TODO Autogenerated catch block

eprintStackTrace();

}

}

public ArrayList query(String sql ArrayList list_values) throws Throwable {

ArrayList listRows = new ArrayList();

preparedStatement = connectionprepareStatement(sql);

for (int i = ; i < list_valuessize(); i++) {

preparedStatementsetObject(i + list_valuesget(i));

}

resultSet = preparedStatementexecuteQuery();

while (resultSetnext()) {

String[] rowinfo = new String[resultSetgetMetaData()

getColumnCount()];

for (int i = ; i < rowinfolength; i++) {

rowinfo[i] = resultSetgetString(i + );

}

listRowsadd(rowinfo);

}

return listRows;

}

public void update(String sql ArrayList list_values) throws Throwable {

preparedStatement = connectionprepareStatement(sql);

for (int i = ; i < list_valuessize(); i++) {

preparedStatementsetObject(i + list_valuesget(i));

}

preparedStatementexecuteUpdate();

preparedStatementclose();

}

}

Servlet调用

代码

package controller;

import javaioIOException;

import javaioPrintWriter;

import javautilList;

import javaxservletServletException;

import javaxservlethttpHttpServlet;

import javaxservlethttpHttpServletRequest;

import javaxservlethttpHttpServletResponse;

import webToolsIOTOWeb;

public class TestURL extends HttpServlet {

/**

* Constructor of the object

*/

public TestURL() {

super();

}

/**

* Destruction of the servlet <br>

*/

public void destroy() {

superdestroy(); // Just puts destroy string in log

// Put your code here

}

/**

* The doGet method of the servlet <br>

*

* This method is called when a form has its tag value method equals to get

*

* @param request

* the request send by the client to the server

* @param response

* the response send by the server to the client

* @throws ServletException

* if an error occurred

* @throws IOException

* if an error occurred

*/

public void doGet(HttpServletRequest request HttpServletResponse response)

throws ServletException IOException {

try {

IOTOWeb iotoWeb = new IOTOWeb();

iotoWebgetFromBaiduMap(?id=?top);

} catch (Throwable e) {

// TODO Autogenerated catch block

eprintStackTrace();

}

}

/**

* The doPost method of the servlet <br>

*

* This method is called when a form has its tag value method equals to

* post

*

* @param request

* the request send by the client to the server

* @param response

* the response send by the server to the client

* @throws ServletException

* if an error occurred

* @throws IOException

* if an error occurred

*/

public void doPost(HttpServletRequest request HttpServletResponse response)

throws ServletException IOException {

responsesetContentType(text/html);

PrintWriter out = responsegetWriter();

out

println(<!DOCTYPE HTML PUBLIC \//WC//DTD HTML Transitional//EN\>);

outprintln(<HTML>);

outprintln( <HEAD><TITLE>A Servlet</TITLE></HEAD>);

outprintln( <BODY>);

outprint( This is );

outprint(thisgetClass());

outprintln( using the POST method);

outprintln( </BODY>);

outprintln(</HTML>);

outflush();

outclose();

}

/**

* Initialization of the servlet <br>

*

* @throws ServletException

* if an error occurs

*/

public void init() throws ServletException {

// Put your code here

}

}

获取金书网的图书名

代码

package webTools;

import javaioBufferedReader;

import javaioInputStreamReader;

import URL;

import javautilArrayList;

import javautilList;

import javautilregexMatcher;

import javautilregexPattern;

import dbToolsDBTools;

public class GetBook {

public String getHtmlContent(String htmlURL) throws Throwable {

URL url = null;

String rowContent = ;

StringBuffer htmlContent = new StringBuffer();

url = new URL(htmlURL);

BufferedReader in = new BufferedReader(new InputStreamReader(url

openStream() gb));

while ((rowContent = inreadLine()) != null) {

htmlContentappend(rowContent);

}

inclose();

return htmlContenttoString();

}

public String getBookName(String htmlContent) {

String bookName = ;

String regex = <span class=\style\>[^>]*</span>;

Pattern pattern = pile(regex PatternDOTALL);

Matcher matcher = patternmatcher(htmlContent);

if (matcherfind()) {

bookName = matchergroup();

}

return bookName;

}

public String outTag(final String s) {

return sreplaceAll(<*?> );

}

DBTools dbtools = new DBTools();

public void getFromJINSHU(String htmlURL) throws Throwable {

String htmlContent = getHtmlContent(htmlURL);

String bookName = outTag(getBookName(htmlContent));

if (bookName != null && !equals(bookName)) {

Systemoutprintln(bookName);

String sql = insert into bookinfo(bookName) values(?);

ArrayList list_values = new ArrayList();

list_valuesadd(bookName);

dbtoolsupdate(sql list_values);

}

}

}

调用Servlet

代码

package controller;

import javaioIOException;

import javaioPrintWriter;

import javaxservletServletException;

import javaxservlethttpHttpServlet;

import javaxservlethttpHttpServletRequest;

import javaxservlethttpHttpServletResponse;

import webToolsGetBook;

public class TestBook extends HttpServlet {

/**

* Constructor of the object

*/

public TestBook() {

super();

}

/**

* Destruction of the servlet <br>

*/

public void destroy() {

superdestroy(); // Just puts destroy string in log

// Put your code here

}

/**

* The doGet method of the servlet <br>

*

* This method is called when a form has its tag value method equals to get

*

* @param request

* the request send by the client to the server

* @param response

* the response send by the server to the client

* @throws ServletException

* if an error occurred

* @throws IOException

* if an error occurred

*/

int i = ;

public void doGet(HttpServletRequest request HttpServletResponse response)

throws ServletException IOException {

GetBook bookinfo = new GetBook();

for (; i < ; i++) {

String bookURL = /booksinfo// + i

+ l;

try {

bookinfogetFromJINSHU(bookURL);

} catch (Throwable e) {

i++;

doPost(request response);

}

}

}

/**

* The doPost method of the servlet <br>

*

* This method is called when a form has its tag value method equals to

* post

*

* @param request

* the request send by the client to the server

* @param response

* the response send by the server to the client

* @throws ServletException

* if an error occurred

* @throws IOException

* if an error occurred

*/

public void doPost(HttpServletRequest request HttpServletResponse response)

throws ServletException IOException {

GetBook bookinfo = new GetBook();

for (; i < ; i++) {

String bookURL = /booksinfo// + i

+ l;

try {

bookinfogetFromJINSHU(bookURL);

} catch (Throwable e) {

i++;

doGet(request response);

}

}

}

/**

* Initialization of the servlet <br>

*

* @throws ServletException

* if an error occurs

*/

public void init() throws ServletException {

// Put your code here

}

}

每种功能的实现方法有很多希望各位可以交流不同的思想和方法

               

上一篇:JAVA实现图片的任意方向反转

下一篇:Java Socket多线程服务端、客户端