From 5afb0cbce8fde3334ec1baeb76fff7c27e09216c Mon Sep 17 00:00:00 2001 From: theli Date: Mon, 18 Sep 2006 11:39:06 +0000 Subject: [PATCH] *) setting default charset (for unkown documents) to iso-8859-1 *) git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2620 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/http/httpHeader.java | 13 +++++++++---- source/de/anomic/http/httpc.java | 4 ++-- source/de/anomic/http/httpdProxyHandler.java | 4 ++-- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/source/de/anomic/http/httpHeader.java b/source/de/anomic/http/httpHeader.java index 7f4321084..bc5aa145b 100644 --- a/source/de/anomic/http/httpHeader.java +++ b/source/de/anomic/http/httpHeader.java @@ -82,6 +82,8 @@ public final class httpHeader extends TreeMap implements Map { private static final long serialVersionUID = 17L; + + public static final String DEFAULT_CHARSET = "ISO-8859-1"; /* ============================================================= * Constants defining http versions @@ -432,15 +434,18 @@ public final class httpHeader extends TreeMap implements Map { public String getCharacterEncoding() { String mimeType = mime(); - + return extractCharsetFromMimetyeHeader(mimeType); + } + + public static String extractCharsetFromMimetyeHeader(String mimeType) { int idx = mimeType.indexOf(";"); if (idx == -1) return null; - String encoding = mimeType.substring(idx + 1); + String encoding = mimeType.substring(idx + 1).trim(); if (!encoding.startsWith("charset=")) return null; - return encoding.substring("charset=".length()).trim(); - } + return encoding.substring("charset=".length()).trim(); + } public Date date() { return headerDate(httpHeader.DATE); diff --git a/source/de/anomic/http/httpc.java b/source/de/anomic/http/httpc.java index e9c7a4bbe..8d17808e5 100644 --- a/source/de/anomic/http/httpc.java +++ b/source/de/anomic/http/httpc.java @@ -1854,7 +1854,7 @@ do upload serverFileUtils.writeX(this.getContentInputStream(), (OutputStream)procOS, sbb); } else if (procOS instanceof Writer) { String charSet = this.responseHeader.getCharacterEncoding(); - if (charSet == null) charSet = "UTF-8"; + if (charSet == null) charSet = httpHeader.DEFAULT_CHARSET; serverFileUtils.writeX(this.getContentInputStream(), charSet, (Writer)procOS, sbb, charSet); } else { throw new IllegalArgumentException("Invalid procOS object type '" + procOS.getClass().getName() + "'"); @@ -1882,7 +1882,7 @@ do upload //writeContentX(httpc.this.clientInput, this.gzip, this.responseHeader.contentLength(), procOS, bufferOS); } else if (procOS instanceof Writer) { String charSet = this.responseHeader.getCharacterEncoding(); - if (charSet == null) charSet = "UTF-8"; + if (charSet == null) charSet = httpHeader.DEFAULT_CHARSET; serverFileUtils.writeX(this.getContentInputStream(), charSet, (Writer)procOS, bufferOS, charSet); } else { throw new IllegalArgumentException("Invalid procOS object type '" + procOS.getClass().getName() + "'"); diff --git a/source/de/anomic/http/httpdProxyHandler.java b/source/de/anomic/http/httpdProxyHandler.java index 3c7bcc3ed..ea7edfbcb 100644 --- a/source/de/anomic/http/httpdProxyHandler.java +++ b/source/de/anomic/http/httpdProxyHandler.java @@ -613,7 +613,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt this.theLogger.logFine("create transformer for URL " + url); //hfos = new htmlFilterOutputStream((gzippedOut != null) ? gzippedOut : ((chunkedOut != null)? chunkedOut : respond), null, transformer, (ext.length() == 0)); String charSet = res.responseHeader.getCharacterEncoding(); - if (charSet == null) charSet = "UTF-8"; + if (charSet == null) charSet = httpHeader.DEFAULT_CHARSET; hfos = new htmlFilterWriter((gzippedOut != null) ? gzippedOut : ((chunkedOut != null)? chunkedOut : respond),charSet, null, transformer, (ext.length() == 0)); } else { // simply pass through without parsing @@ -816,7 +816,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt // determine the content charset String charSet = cachedResponseHeader.getCharacterEncoding(); - if (charSet == null) charSet = "UTF-8"; + if (charSet == null) charSet = httpHeader.DEFAULT_CHARSET; // make a transformer if ((!(transformer.isIdentityTransformer())) &&