From bb51555830df627c1725378683cf00b9a3754194 Mon Sep 17 00:00:00 2001 From: luccioman Date: Mon, 2 Jul 2018 10:00:40 +0200 Subject: [PATCH] Removed remaining unsafe accesses to SimpleDateFormat instances. SimpleDateFormat must not be used by concurrent threads without synchronization for parsing or formating dates as it is not thread-safe (internally holds a calendar instance that is not synchronized). Prefer now DateTimeFormatter when possible as it is thread-safe without concurrent access performance bottleneck (does not internally use synchronization locks). --- .../responsewriter/GSAResponseWriter.java | 27 +++--- source/net/yacy/data/wiki/WikiBoard.java | 2 +- source/net/yacy/document/DateDetection.java | 85 +++++++++++++------ .../yacy/http/servlets/GSAsearchServlet.java | 46 +++++----- .../net/yacy/kelondro/blob/MapDataMining.java | 19 +++-- 5 files changed, 109 insertions(+), 70 deletions(-) diff --git a/source/net/yacy/cora/federate/solr/responsewriter/GSAResponseWriter.java b/source/net/yacy/cora/federate/solr/responsewriter/GSAResponseWriter.java index 7d4ac1842..af6e50c72 100644 --- a/source/net/yacy/cora/federate/solr/responsewriter/GSAResponseWriter.java +++ b/source/net/yacy/cora/federate/solr/responsewriter/GSAResponseWriter.java @@ -23,6 +23,7 @@ package net.yacy.cora.federate.solr.responsewriter; import java.io.IOException; import java.io.Writer; import java.nio.charset.StandardCharsets; +import java.time.DateTimeException; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; @@ -33,13 +34,6 @@ import java.util.Map; import java.util.Set; import java.util.regex.Pattern; -import net.yacy.cora.protocol.HeaderFramework; -import net.yacy.cora.util.CommonPattern; -import net.yacy.http.servlets.GSAsearchServlet; -import net.yacy.peers.operation.yacyVersion; -import net.yacy.search.Switchboard; -import net.yacy.search.schema.CollectionSchema; - import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexableField; import org.apache.solr.common.params.CommonParams; @@ -54,6 +48,14 @@ import org.apache.solr.search.DocIterator; import org.apache.solr.search.DocList; import org.apache.solr.search.SolrIndexSearcher; +import net.yacy.cora.date.ISO8601Formatter; +import net.yacy.cora.protocol.HeaderFramework; +import net.yacy.cora.util.CommonPattern; +import net.yacy.http.servlets.GSAsearchServlet; +import net.yacy.peers.operation.yacyVersion; +import net.yacy.search.Switchboard; +import net.yacy.search.schema.CollectionSchema; + /** * implementation of a GSA search result. * example: GET /gsa/searchresult?q=chicken+teriyaki&output=xml&client=test&site=test&sort=date:D:S:d1 @@ -359,11 +361,14 @@ public class GSAResponseWriter implements QueryResponseWriter, EmbeddedSolrRespo * @see ISO8601Formatter */ public final String formatGSAFS(final Date date) { - if (date == null) return ""; - synchronized (GSAsearchServlet.FORMAT_GSAFS) { - final String s = GSAsearchServlet.FORMAT_GSAFS.format(date); - return s; + if (date == null) { + return ""; } + try { + return GSAsearchServlet.FORMAT_GSAFS.format(date.toInstant()); + } catch (final DateTimeException e) { + return ""; + } } } \ No newline at end of file diff --git a/source/net/yacy/data/wiki/WikiBoard.java b/source/net/yacy/data/wiki/WikiBoard.java index 37998529a..ebd3f4793 100644 --- a/source/net/yacy/data/wiki/WikiBoard.java +++ b/source/net/yacy/data/wiki/WikiBoard.java @@ -54,7 +54,7 @@ public class WikiBoard { private static final String DATE_FORMAT = "yyyyMMddHHmmss"; private static final String ANONYMOUS = "anonymous"; - protected static final SimpleDateFormat SimpleFormatter = new SimpleDateFormat(DATE_FORMAT, Locale.US); + private static final SimpleDateFormat SimpleFormatter = new SimpleDateFormat(DATE_FORMAT, Locale.US); static { SimpleFormatter.setTimeZone(TimeZone.getTimeZone("GMT")); diff --git a/source/net/yacy/document/DateDetection.java b/source/net/yacy/document/DateDetection.java index b7ec754ad..3d048ce69 100644 --- a/source/net/yacy/document/DateDetection.java +++ b/source/net/yacy/document/DateDetection.java @@ -20,12 +20,12 @@ package net.yacy.document; -import java.text.ParseException; -import java.text.SimpleDateFormat; import java.time.DayOfWeek; import java.time.LocalDate; import java.time.LocalTime; +import java.time.ZoneOffset; import java.time.ZonedDateTime; +import java.time.format.DateTimeFormatter; import java.time.temporal.TemporalAdjuster; import java.time.temporal.TemporalAdjusters; import java.util.ArrayList; @@ -63,8 +63,10 @@ import net.yacy.cora.date.GenericFormatter; public class DateDetection { private static final TimeZone UTC_TIMEZONE = TimeZone.getTimeZone("UTC"); - private static final String CONPATT = "yyyy/MM/dd"; - private static final SimpleDateFormat CONFORM = new SimpleDateFormat(CONPATT, Locale.US); + private static final String CONPATT = "uuuu/MM/dd"; + + private static final DateTimeFormatter CONFORM = DateTimeFormatter.ofPattern(CONPATT).withLocale(Locale.US) + .withZone(ZoneOffset.UTC); private static final LinkedHashMap Weekdays = new LinkedHashMap<>(); private static final LinkedHashMap Months = new LinkedHashMap<>(); private static final int[] MaxDaysInMonth = new int[]{31,29,31,30,31,30,31,31,30,31,30,31}; @@ -75,7 +77,6 @@ public class DateDetection { } static { - CONFORM.setTimeZone(UTC_TIMEZONE); // all names must be lowercase because compared strings are made to lowercase as well Weekdays.put(Language.GERMAN, new String[]{"montag", "dienstag", "mittwoch", "donnerstag", "freitag", "samstag" /*oder: "sonnabend"*/, "sonntag"}); Weekdays.put(Language.ENGLISH, new String[]{"monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday"}); @@ -133,8 +134,7 @@ public class DateDetection { } } - private final static Date TODAY = new Date(); - private final static int CURRENT_YEAR = Integer.parseInt(CONFORM.format(TODAY).substring(0, 4)); // we need that to parse dates without given years, see the ShortStyle class + private final static int CURRENT_YEAR = LocalDate.now().getYear(); // we need that to parse dates without given years, see the ShortStyle class private final static String BODNCG = "(?:\\s|^)"; // begin of date non-capturing group private final static String EODNCG = "(?:[).:;! ]|$)"; // end of date non-capturing group @@ -312,7 +312,7 @@ public class DateDetection { */ private static Date[] sameDayEveryYear(final int month, final int day, final int currentYear) { final Date[] r = new Date[4]; - final Calendar cal = CONFORM.getCalendar(); + final Calendar cal = new GregorianCalendar(UTC_TIMEZONE); cal.clear(); cal.set(currentYear - 1, month, day); // set start in previous year r[0] = cal.getTime(); @@ -336,7 +336,7 @@ public class DateDetection { january1Calendar.clear(); /* Calendar using UTC time zone to produce date results */ - final Calendar utcCalendar = CONFORM.getCalendar(); + final Calendar utcCalendar = new GregorianCalendar(UTC_TIMEZONE); /* Calendar using the same time zone as in the holidayrule to extract year,month, and day fields */ final Calendar ruleCalendar = new GregorianCalendar(ruleTimeZone); @@ -552,11 +552,11 @@ public class DateDetection { int month = this.firstEntity == EntityType.MONTH ? i1 : this.secondEntity == EntityType.MONTH ? i2 : i3; if (day > MaxDaysInMonth[month - 1]) continue; // validity check of the day number int year = this.firstEntity == EntityType.YEAR ? i1 : this.secondEntity == EntityType.YEAR ? i2 : i3; - synchronized (CONFORM) {try { - dates.add(CONFORM.parse(year + "/" + (month < 10 ? "0" : "") + month + "/" + (day < 10 ? "0" : "") + day)); - } catch (ParseException e) { - continue; - }} + final Date parsed = parseDateSafely( + year + "/" + (month < 10 ? "0" : "") + month + "/" + (day < 10 ? "0" : "") + day, CONFORM); + if(parsed != null) { + dates.add(parsed); + } if (dates.size() > 100) {dates.clear(); break;} // that does not make sense } return dates; @@ -564,6 +564,30 @@ public class DateDetection { } + /** + * Safely parse the given string to an instant using the given formatter. Return + * null when the format can not be applied to the given string or when any + * parsing error occurred. + * + * @param str + * the string to parse + * @param formatter + * the formatter to use + * @return an Instant instance or null + */ + protected static Date parseDateSafely(final String str, final DateTimeFormatter formatter) { + Date res = null; + if (str != null && !str.isEmpty()) { + try { + if (formatter != null) { + res = Date.from(LocalDate.parse(str, formatter).atStartOfDay().toInstant(ZoneOffset.UTC)); + } + } catch (final RuntimeException ignored) { + } + } + return res; + } + public static enum ShortStyle implements StyleParser { MD_ENGLISH(EntityType.MONTH, EntityType.DAY, // Big-endian (month, day), e.g. "from october 1st to september 13th" ENGLISH_LANGUAGE, @@ -619,16 +643,18 @@ public class DateDetection { if (day > MaxDaysInMonth[month - 1]) continue; // validity check of the day number int thisyear = CURRENT_YEAR; int nextyear = CURRENT_YEAR + 1; - synchronized (CONFORM) {try { - String datestub = "/" + (month < 10 ? "0" : "") + month + "/" + (day < 10 ? "0" : "") + day; - Date atThisYear = CONFORM.parse(thisyear + datestub); - Date atNextYear = CONFORM.parse(nextyear + datestub); - dates.add(atThisYear); - dates.add(atNextYear); - //dates.add(atThisYear.after(TODAY) ? atThisYear : atNextYear); // we consider these kind of dates as given for the future - } catch (ParseException e) { - continue; - }} + String datestub = "/" + (month < 10 ? "0" : "") + month + "/" + (day < 10 ? "0" : "") + day; + + final Date atThisYear = parseDateSafely(thisyear + datestub, CONFORM); + if(atThisYear != null) { + dates.add(atThisYear); + } + + final Date atNextYear = parseDateSafely(nextyear + datestub, CONFORM); + if(atNextYear != null) { + dates.add(atNextYear); + } + //dates.add(atThisYear.after(TODAY) ? atThisYear : atNextYear); // we consider these kind of dates as given for the future if (dates.size() > 100) {dates.clear(); break;} // that does not make sense } return dates; @@ -670,12 +696,15 @@ public class DateDetection { * @return determined date or null */ public static Date parseLine(final String text, final int timezoneOffset) { - Date d = null; // check standard date formats - try {d = CONFORM.parse(text);} catch (ParseException e) {} + Date d = parseDateSafely(text, CONFORM); //if (d == null) try {d = GenericFormatter.FORMAT_SHORT_DAY.parse(text);} catch (ParseException e) {} // did not work well and fired for wrong formats; do not use - if (d == null) try {d = GenericFormatter.newRfc1123ShortFormat().parse(text);} catch (ParseException e) {} - if (d == null) try {d = GenericFormatter.newAnsicFormat().parse(text);} catch (ParseException e) {} + if (d == null) { + d = parseDateSafely(text, GenericFormatter.FORMAT_RFC1123_SHORT); + } + if (d == null) { + d = parseDateSafely(text, GenericFormatter.FORMAT_ANSIC); + } if (d == null) { // check other date formats diff --git a/source/net/yacy/http/servlets/GSAsearchServlet.java b/source/net/yacy/http/servlets/GSAsearchServlet.java index b47f42a57..096d55093 100644 --- a/source/net/yacy/http/servlets/GSAsearchServlet.java +++ b/source/net/yacy/http/servlets/GSAsearchServlet.java @@ -24,8 +24,9 @@ import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.Writer; import java.nio.charset.StandardCharsets; -import java.text.ParseException; -import java.text.SimpleDateFormat; +import java.time.LocalDate; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; import java.util.Date; import java.util.Iterator; import java.util.List; @@ -37,6 +38,17 @@ import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; +import org.apache.solr.common.SolrDocumentList; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.params.DisMaxParams; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.request.SolrRequestInfo; +import org.apache.solr.response.QueryResponseWriter; +import org.apache.solr.response.ResultContext; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.util.FastWriter; + import net.yacy.cora.date.ISO8601Formatter; import net.yacy.cora.federate.solr.Ranking; import net.yacy.cora.federate.solr.connector.EmbeddedSolrConnector; @@ -53,17 +65,6 @@ import net.yacy.search.query.SearchEvent; import net.yacy.search.schema.CollectionSchema; import net.yacy.server.serverObjects; -import org.apache.solr.common.SolrDocumentList; -import org.apache.solr.common.SolrException; -import org.apache.solr.common.params.CommonParams; -import org.apache.solr.common.params.DisMaxParams; -import org.apache.solr.request.SolrQueryRequest; -import org.apache.solr.request.SolrRequestInfo; -import org.apache.solr.response.QueryResponseWriter; -import org.apache.solr.response.ResultContext; -import org.apache.solr.response.SolrQueryResponse; -import org.apache.solr.util.FastWriter; - /** * This is a gsa result formatter for solr search results. @@ -74,9 +75,11 @@ public class GSAsearchServlet extends HttpServlet { private static final long serialVersionUID = 7835985518515673885L; - // GSA date formatter (short form of ISO8601 date format) - private static final String PATTERN_GSAFS = "yyyy-MM-dd"; - public static final SimpleDateFormat FORMAT_GSAFS = new SimpleDateFormat(PATTERN_GSAFS, Locale.US); + /** GSA date formatter (short form of ISO8601 date format) */ + private static final String PATTERN_GSAFS = "uuuu-MM-dd"; + + public static final DateTimeFormatter FORMAT_GSAFS = DateTimeFormatter.ofPattern(PATTERN_GSAFS) + .withLocale(Locale.US).withZone(ZoneId.systemDefault()); private final static GSAResponseWriter responseWriter = new GSAResponseWriter(); @@ -273,10 +276,11 @@ public class GSAsearchServlet extends HttpServlet { * @see ISO8601Formatter */ public final Date parseGSAFS(final String datestring) { - try { - return FORMAT_GSAFS.parse(datestring); - } catch (final ParseException e) { - return null; - } + try { + return Date + .from(LocalDate.parse(datestring, FORMAT_GSAFS).atStartOfDay(ZoneId.systemDefault()).toInstant()); + } catch (final RuntimeException e) { + return null; + } } } diff --git a/source/net/yacy/kelondro/blob/MapDataMining.java b/source/net/yacy/kelondro/blob/MapDataMining.java index 1c62f9754..a05307a6b 100644 --- a/source/net/yacy/kelondro/blob/MapDataMining.java +++ b/source/net/yacy/kelondro/blob/MapDataMining.java @@ -30,15 +30,17 @@ package net.yacy.kelondro.blob; import java.io.File; import java.io.IOException; import java.lang.reflect.Array; -import java.text.ParseException; -import java.text.SimpleDateFormat; +import java.time.DateTimeException; +import java.time.LocalDateTime; +import java.time.ZoneOffset; +import java.time.ZonedDateTime; import java.util.Collection; import java.util.HashMap; import java.util.Iterator; -import java.util.Locale; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; +import net.yacy.cora.date.GenericFormatter; import net.yacy.cora.document.encoding.UTF8; import net.yacy.cora.order.Base64Order; import net.yacy.cora.order.ByteOrder; @@ -411,15 +413,13 @@ public class MapDataMining extends MapHeap { super.close(); } - private static final String shortDateFormatString = "yyyyMMddHHmmss"; - private static final SimpleDateFormat shortFormatter = new SimpleDateFormat(shortDateFormatString, Locale.US); private static final long minutemillis = 60000; private static long date2000 = 0; static { try { - date2000 = shortFormatter.parse("20000101000000").getTime(); - } catch (final ParseException e) {} + date2000 = ZonedDateTime.of(2000, 1, 1, 0, 0, 0, 0, ZoneOffset.UTC).toInstant().toEpochMilli(); + } catch (final DateTimeException e) {} } private static final byte[] plainByteArray = new byte[256]; @@ -453,9 +453,10 @@ public class MapDataMining extends MapHeap { if (s == null || s.isEmpty() || s.charAt(0) == '-') return 0; try { long l = 0; - if (s.length() == shortDateFormatString.length()) { + if (s.length() == GenericFormatter.PATTERN_SHORT_SECOND.length()) { // try a date - l = ((shortFormatter.parse(s).getTime() - date2000) / minutemillis); + l = ((LocalDateTime.parse(s, GenericFormatter.FORMAT_SHORT_SECOND).toInstant(ZoneOffset.UTC) + .toEpochMilli() - date2000) / minutemillis); if (l < 0) l = 0; } else { // try a number