diff --git a/defaults/web.xml b/defaults/web.xml index 01fb786c6..ef48d8318 100644 --- a/defaults/web.xml +++ b/defaults/web.xml @@ -56,13 +56,9 @@ net.yacy.http.servlets.GSAsearchServlet - + URLProxyServlet - net.yacy.http.servlets.UrlProxyServlet Link text on top of proxied page to stop use of proxy diff --git a/source/net/yacy/http/servlets/YaCyProxyServlet.java b/source/net/yacy/http/servlets/YaCyProxyServlet.java deleted file mode 100644 index bcea16faa..000000000 --- a/source/net/yacy/http/servlets/YaCyProxyServlet.java +++ /dev/null @@ -1,326 +0,0 @@ -package net.yacy.http.servlets; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.StringWriter; -import java.net.MalformedURLException; -import java.net.URLDecoder; -import java.nio.charset.StandardCharsets; -import java.util.HashMap; -import java.util.StringTokenizer; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import javax.servlet.Servlet; -import javax.servlet.ServletException; -import javax.servlet.ServletRequest; -import javax.servlet.ServletResponse; -import javax.servlet.http.HttpServlet; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import net.yacy.cora.document.encoding.UTF8; -import net.yacy.cora.document.id.DigestURL; -import net.yacy.cora.document.id.MultiProtocolURL; -import net.yacy.cora.protocol.ClientIdentification; -import net.yacy.cora.protocol.Domains; -import net.yacy.cora.protocol.HeaderFramework; -import net.yacy.cora.protocol.RequestHeader; -import net.yacy.cora.protocol.ResponseHeader; -import net.yacy.cora.util.ConcurrentLog; -import net.yacy.http.ProxyHandler; -import net.yacy.kelondro.util.FileUtils; -import net.yacy.search.Switchboard; -import net.yacy.server.http.ChunkedInputStream; -import net.yacy.server.http.HTTPDProxyHandler; - -import org.eclipse.jetty.continuation.Continuation; -import org.eclipse.jetty.continuation.ContinuationSupport; - -/** - * Servlet to implement proxy via url parameter "/proxy.html?url=xyz_urltoproxy" - * this implementation uses the existing proxy functions from YaCy HTTPDProxyHandler - * - * functionality - * - get parameters - * - convert headers to YaCy style headers and parameters - * - call existing HTTPDProxy - * - revert response headers back from YaCy style to servlet specification - * - handle rewrite of link (to point to proxy) - * - send to client - * - * later improvemnts should/could use implementation to avoid back and forth converting - * between YaCy and Servlet header/parameter style and use proxy implementation within - * servlet specification or a existing reverse-proxy library. - * - * @deprecated since 1.81 use {@link UrlProxyServlet} instead. - */ -@Deprecated //use UrlProxyServlet instead -public class YaCyProxyServlet extends HttpServlet implements Servlet { - private static final long serialVersionUID = 4900000000000001120L; - - @Override - public void service (ServletRequest req, ServletResponse res) throws ServletException, IOException { - - final HttpServletRequest request = (HttpServletRequest) req; - final HttpServletResponse response = (HttpServletResponse) res; - - if (!Switchboard.getSwitchboard().getConfigBool("proxyURL", false)) { - response.sendError(HttpServletResponse.SC_FORBIDDEN,"proxy use not allowed. URL proxy globally switched off (see: System Administration -> Advanced Settings -> URL proxy)"); - return; - } - - final String remoteHost = req.getRemoteHost(); - if (!Domains.isThisHostIP(remoteHost)) { - if (!proxyippatternmatch(remoteHost)) { - response.sendError(HttpServletResponse.SC_FORBIDDEN, - "proxy use not granted for IP " + remoteHost + " (see: System Administration -> Advanced Settings -> Restrict URL proxy use filter)"); - return; - } - } - - if ("CONNECT".equalsIgnoreCase(request.getMethod())) { - return; - } - final Continuation continuation = ContinuationSupport.getContinuation(request); - - if (!continuation.isInitial()) { - response.sendError(HttpServletResponse.SC_GATEWAY_TIMEOUT); // Need better test that isInitial - return; - } - DigestURL proxyurl = null; - String strARGS = request.getQueryString(); - if (strARGS == null) { - response.sendError(HttpServletResponse.SC_NOT_FOUND,"url parameter missing"); - return; - } - - if (strARGS.startsWith("url=")) { - final String strUrl = strARGS.substring(4); // strip "url=" - - try { - proxyurl = new DigestURL(strUrl); - } catch (final MalformedURLException e) { - proxyurl = new DigestURL(URLDecoder.decode(strUrl, StandardCharsets.UTF_8.name())); - } - } - if (proxyurl == null) { - response.sendError(HttpServletResponse.SC_NOT_FOUND,"url parameter missing"); - return; - } - - String hostwithport = proxyurl.getHost(); - if (proxyurl.getPort() != -1) { - hostwithport += ":" + proxyurl.getPort(); - } - RequestHeader yacyRequestHeader = ProxyHandler.convertHeaderFromJetty(request); - yacyRequestHeader.remove(RequestHeader.KEEP_ALIVE); - yacyRequestHeader.remove(HeaderFramework.CONTENT_LENGTH); - - final HashMap prop = new HashMap(); - prop.put(HeaderFramework.CONNECTION_PROP_HTTP_VER, HeaderFramework.HTTP_VERSION_1_1); - prop.put(HeaderFramework.CONNECTION_PROP_DIGESTURL, proxyurl); - prop.put(HeaderFramework.CONNECTION_PROP_CLIENTIP, Domains.LOCALHOST); - prop.put(HeaderFramework.CONNECTION_PROP_CLIENT_HTTPSERVLETREQUEST, request); - - final ByteArrayOutputStream tmpproxyout = new ByteArrayOutputStream(); - HTTPDProxyHandler.doGet(prop, yacyRequestHeader, tmpproxyout, ClientIdentification.yacyProxyAgent); - - // reparse header to extract content-length and mimetype - final ResponseHeader proxyResponseHeader = new ResponseHeader(200); // - final InputStream proxyout = new ByteArrayInputStream(tmpproxyout.toByteArray()); - String line = readLine(proxyout); - while (line != null && !line.equals("")) { - int p; - if ((p = line.indexOf(':')) >= 0) { - // store a property - proxyResponseHeader.add(line.substring(0, p).trim(), line.substring(p + 1).trim()); - } - line = readLine(proxyout); - } - if (line == null) { - response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR,"Proxy Header missing"); - return; - } - - final int httpStatus = Integer.parseInt((String) prop.get(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_STATUS)); - - String directory = ""; - if (proxyurl.getPath().lastIndexOf('/') > 0) { - directory = proxyurl.getPath().substring(0, proxyurl.getPath().lastIndexOf('/')); - } - - if (response.getHeader(HeaderFramework.LOCATION) != null) { - // rewrite location header - String location = response.getHeader(HeaderFramework.LOCATION); - if (location.startsWith("http")) { - location = request.getServletPath() + "?url=" + location; - } else { - location = request.getServletPath() + "?url=" + proxyurl.getProtocol() + "://" + hostwithport + "/" + location; - } - response.addHeader(HeaderFramework.LOCATION, location); - } - - final String mimeType = proxyResponseHeader.getContentType(); - response.setContentType(mimeType); - response.setStatus(httpStatus); - - if ((mimeType != null) && (mimeType.startsWith("text"))) { - final StringWriter buffer = new StringWriter(); - - if (proxyResponseHeader.containsKey(HeaderFramework.TRANSFER_ENCODING) && proxyResponseHeader.get(HeaderFramework.TRANSFER_ENCODING).contains("chunked")) { - FileUtils.copy(new ChunkedInputStream(proxyout), buffer, StandardCharsets.UTF_8); - } else { - FileUtils.copy(proxyout, buffer, StandardCharsets.UTF_8); - } - final String sbuffer = buffer.toString(); - - final Pattern p = Pattern.compile("(href=\"|src=\")([^\"]+)|(href='|src=')([^']+)|(url\\(')([^']+)|(url\\(\")([^\"]+)|(url\\()([^\\)]+)"); - final Matcher m = p.matcher(sbuffer); - final StringBuffer result = new StringBuffer(80); - final Switchboard sb = Switchboard.getSwitchboard(); - final String servletstub = request.getServletPath()+"?url="; - while (m.find()) { - String init = null; - if (m.group(1) != null) { init = m.group(1); } - if (m.group(3) != null) { init = m.group(3); } - if (m.group(5) != null) { init = m.group(5); } - if (m.group(7) != null) { init = m.group(7); } - if (m.group(9) != null) { init = m.group(9); } - String url = null; - if (m.group(2) != null) { url = m.group(2); } - if (m.group(4) != null) { url = m.group(4); } - if (m.group(6) != null) { url = m.group(6); } - if (m.group(8) != null) { url = m.group(8); } - if (m.group(10) != null) { url = m.group(10); } - if (url.startsWith("data:") || url.startsWith("#") || url.startsWith("mailto:") || url.startsWith("javascript:")) { - String newurl = init + url; - newurl = newurl.replaceAll("\\$", "\\\\\\$"); - m.appendReplacement(result, newurl); - - } else if (url.startsWith("http")) { - // absoulte url of form href="http://domain.com/path" - if (sb.getConfig("proxyURL.rewriteURLs", "all").equals("domainlist")) { - try { - if (sb.crawlStacker.urlInAcceptedDomain(new DigestURL(url)) != null) { - continue; - } - } catch (final MalformedURLException e) { - ConcurrentLog.fine("PROXY","ProxyServlet: malformed url for url-rewirte " + url); - continue; - } - } - - String newurl = init + servletstub + url; - newurl = newurl.replaceAll("\\$", "\\\\\\$"); - m.appendReplacement(result, newurl); - - } else if (url.startsWith("//")) { - // absoulte url but same protocol of form href="//domain.com/path" - final String complete_url = proxyurl.getProtocol() + ":" + url; - if (sb.getConfig("proxyURL.rewriteURLs", "all").equals("domainlist")) { - try { - if (sb.crawlStacker.urlInAcceptedDomain(new DigestURL(complete_url)) != null) { - continue; - } - } catch (MalformedURLException ex) { - ConcurrentLog.fine("PROXY","ProxyServlet: malformed url for url-rewirte " + complete_url); - continue; - } - } - - String newurl = init + servletstub + complete_url; - newurl = newurl.replaceAll("\\$", "\\\\\\$"); - m.appendReplacement(result, newurl); - - } else if (url.startsWith("/")) { - // absolute path of form href="/absolute/path/to/linked/page" - String newurl = init + servletstub + "http://" + hostwithport + url; - newurl = newurl.replaceAll("\\$", "\\\\\\$"); - m.appendReplacement(result, newurl); - - } else { - // relative path of form href="relative/path" - try { - MultiProtocolURL target = new MultiProtocolURL("http://" + hostwithport + directory + "/" + url); - String newurl = init + servletstub + target.toString(); - newurl = newurl.replaceAll("\\$", "\\\\\\$"); - m.appendReplacement(result, newurl); - } catch (final MalformedURLException e) {} - } - } - m.appendTail(result); - - byte[] sbb = UTF8.getBytes(result.toString()); - - // add some proxy-headers to response header - response.setContentType(proxyResponseHeader.getContentType()); - if (proxyResponseHeader.containsKey(HeaderFramework.SERVER)) { - response.addHeader(HeaderFramework.SERVER, proxyResponseHeader.get(HeaderFramework.SERVER)); - } - if (proxyResponseHeader.containsKey(HeaderFramework.DATE)) { - response.addHeader(HeaderFramework.DATE, proxyResponseHeader.get(HeaderFramework.DATE)); - } - if (proxyResponseHeader.containsKey(HeaderFramework.LAST_MODIFIED)) { - response.addHeader(HeaderFramework.LAST_MODIFIED, proxyResponseHeader.get(HeaderFramework.LAST_MODIFIED)); - } - if (proxyResponseHeader.containsKey(HeaderFramework.EXPIRES)) { - response.addHeader(HeaderFramework.EXPIRES, proxyResponseHeader.get(HeaderFramework.EXPIRES)); - } - - response.setIntHeader(HeaderFramework.CONTENT_LENGTH, sbb.length); - response.getOutputStream().write(sbb); - - } else { - if ((response.getHeader(HeaderFramework.CONTENT_LENGTH) == null) && prop.containsKey(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_SIZE)) { - response.addHeader(HeaderFramework.CONTENT_LENGTH, (String) prop.get(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_SIZE)); - } - FileUtils.copy(proxyout, response.getOutputStream()); - } - } - - private String readLine(final InputStream in) throws IOException { - final ByteArrayOutputStream buf = new ByteArrayOutputStream(); - int b; - while ((b = in.read()) != '\r' && b != -1) { - buf.write(b); - } - if (b == -1) { - return null; - } - b = in.read(); // read \n - if (b == -1) { - return null; - } - return buf.toString(StandardCharsets.UTF_8.name()); - } - - /** - * helper for proxy IP config pattern check - */ - private boolean proxyippatternmatch(final String key) { - // the cfgippattern is a comma-separated list of patterns - // each pattern may contain one wildcard-character '*' which matches anything - final String cfgippattern = Switchboard.getSwitchboard().getConfig("proxyURL.access", "*"); - if (cfgippattern.equals("*")) { - return true; - } - final StringTokenizer st = new StringTokenizer(cfgippattern, ","); - String pattern; - while (st.hasMoreTokens()) { - pattern = st.nextToken(); - if (key.matches(pattern)) { - return true; - } - } - return false; - } - - @Override - public String getServletInfo() { - return "YaCy Proxy Servlet"; - } - -}