/*
|
* Copyright (c) 2018-2028, DreamLu All rights reserved.
|
*
|
* Redistribution and use in source and binary forms, with or without
|
* modification, are permitted provided that the following conditions are met:
|
*
|
* Redistributions of source code must retain the above copyright notice,
|
* this list of conditions and the following disclaimer.
|
* Redistributions in binary form must reproduce the above copyright
|
* notice, this list of conditions and the following disclaimer in the
|
* documentation and/or other materials provided with the distribution.
|
* Neither the name of the dreamlu.net developer nor the names of its
|
* contributors may be used to endorse or promote products derived from
|
* this software without specific prior written permission.
|
* Author: DreamLu 卢春梦 (596392912@qq.com)
|
*/
|
package org.springblade.core.http;
|
|
import org.jsoup.helper.DataUtil;
|
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Element;
|
import org.jsoup.parser.Parser;
|
import org.jsoup.select.Elements;
|
import org.springblade.core.tool.utils.Exceptions;
|
import org.springframework.cglib.proxy.Enhancer;
|
|
import java.io.IOException;
|
import java.io.InputStream;
|
import java.nio.charset.StandardCharsets;
|
import java.util.ArrayList;
|
import java.util.List;
|
|
/**
|
* 爬虫 xml 转 bean 基于 jsoup
|
*
|
* @author L.cm
|
*/
|
public class DomMapper {
|
|
/**
|
* Returns body to jsoup Document.
|
*
|
* @return Document
|
*/
|
public static Document asDocument(ResponseSpec response) {
|
return readDocument(response.asString());
|
}
|
|
/**
|
* 将流读取为 jsoup Document
|
*
|
* @param inputStream InputStream
|
* @return Document
|
*/
|
public static Document readDocument(InputStream inputStream) {
|
try {
|
return DataUtil.load(inputStream, StandardCharsets.UTF_8.name(), "");
|
} catch (IOException e) {
|
throw Exceptions.unchecked(e);
|
}
|
}
|
|
/**
|
* 将 html 字符串读取为 jsoup Document
|
*
|
* @param html String
|
* @return Document
|
*/
|
public static Document readDocument(String html) {
|
return Parser.parse(html, "");
|
}
|
|
/**
|
* 读取 xml 信息为 java Bean
|
*
|
* @param inputStream InputStream
|
* @param clazz bean Class
|
* @param <T> 泛型
|
* @return 对象
|
*/
|
public static <T> T readValue(InputStream inputStream, final Class<T> clazz) {
|
return readValue(readDocument(inputStream), clazz);
|
}
|
|
/**
|
* 读取 xml 信息为 java Bean
|
*
|
* @param html html String
|
* @param clazz bean Class
|
* @param <T> 泛型
|
* @return 对象
|
*/
|
public static <T> T readValue(String html, final Class<T> clazz) {
|
return readValue(readDocument(html), clazz);
|
}
|
|
/**
|
* 读取 xml 信息为 java Bean
|
*
|
* @param doc xml element
|
* @param clazz bean Class
|
* @param <T> 泛型
|
* @return 对象
|
*/
|
@SuppressWarnings("unchecked")
|
public static <T> T readValue(final Element doc, final Class<T> clazz) {
|
Enhancer enhancer = new Enhancer();
|
enhancer.setSuperclass(clazz);
|
enhancer.setUseCache(true);
|
enhancer.setCallback(new CssQueryMethodInterceptor(clazz, doc));
|
return (T) enhancer.create();
|
}
|
|
/**
|
* 读取 xml 信息为 java Bean
|
*
|
* @param <T> 泛型
|
* @param inputStream InputStream
|
* @param clazz bean Class
|
* @return 对象
|
*/
|
public static <T> List<T> readList(InputStream inputStream, final Class<T> clazz) {
|
return readList(readDocument(inputStream), clazz);
|
}
|
|
/**
|
* 读取 xml 信息为 java Bean
|
*
|
* @param <T> 泛型
|
* @param html html String
|
* @param clazz bean Class
|
* @return 对象
|
*/
|
public static <T> List<T> readList(String html, final Class<T> clazz) {
|
return readList(readDocument(html), clazz);
|
}
|
|
/**
|
* 读取 xml 信息为 java Bean
|
*
|
* @param doc xml element
|
* @param clazz bean Class
|
* @param <T> 泛型
|
* @return 对象列表
|
*/
|
public static <T> List<T> readList(Element doc, Class<T> clazz) {
|
CssQuery annotation = clazz.getAnnotation(CssQuery.class);
|
if (annotation == null) {
|
throw new IllegalArgumentException("DomMapper readList " + clazz + " mast has annotation @CssQuery.");
|
}
|
String cssQueryValue = annotation.value();
|
Elements elements = doc.select(cssQueryValue);
|
List<T> valueList = new ArrayList<>();
|
for (Element element : elements) {
|
valueList.add(readValue(element, clazz));
|
}
|
return valueList;
|
}
|
|
}
|