自动解析页面编码,并转换为相应编码的字符
public static String httpGet(String url) throws ClientProtocolException, IOException, URISyntaxException {
HttpClient httpclient = new DefaultHttpClient();
String result = "";
try {
// 连接超时
httpclient.getParams().setParameter(
CoreConnectionPNames. CONNECTION_TIMEOUT, 5000);
// 读取超时
httpclient.getParams().setParameter(
CoreConnectionPNames. SO_TIMEOUT, 5000);
HttpGet hg = new HttpGet (url);
//模拟浏览器
hg.addHeader( "User-Agent", "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31");
String charset = "UTF-8";
hg.setURI( new java.net.URI(url));
HttpResponse response = httpclient.execute(hg);
HttpEntity entity = response.getEntity();
if (entity != null) {
charset = getContentCharSet(entity);
// 使用EntityUtils的toString方法,传递编码,默认编码是ISO-8859-1
result = EntityUtils.toString(entity, charset);
}
} finally {
httpclient.getConnectionManager().shutdown();
}
return result;
}
/**
* 默认编码utf -8
* Obtains character set of the entity, if known.
*
* @param entity must not be null
* @return the character set, or null if not found
* @throws ParseException if header elements cannot be parsed
* @throws IllegalArgumentException if entity is null
*/
public static String getContentCharSet(final HttpEntity entity)
throws ParseException {
if (entity == null) {
throw new IllegalArgumentException("HTTP entity may not be null");
}
String charset = null;
if (entity.getContentType() != null) {
HeaderElement values[] = entity.getContentType().getElements();
if (values.length > 0) {
NameValuePair param = values[0].getParameterByName("charset" );
if (param != null) {
charset = param.getValue();
}
}
}
if(StringUtils.isEmpty(charset)){
charset = "UTF-8";
}
return charset;
}