View Javadoc
1   /**
2    * This file is part of Indicators.
3    *
4    * Indicators is free software: you can redistribute it and/or modify
5    * it under the terms of the GNU General Public License as published by
6    * the Free Software Foundation, either version 3 of the License, or
7    * (at your option) any later version.
8    *
9    * Indicators is distributed in the hope that it will be useful,
10   * but WITHOUT ANY WARRANTY; without even the implied warranty of
11   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   * GNU General Public License for more details.
13   *
14   * You should have received a copy of the GNU General Public License
15   * along with Indicators. If not, see <https://www.gnu.org/licenses/>.
16   */
17  package fr.inrae.agroclim.indicators.model.data.climate;
18  
19  import java.io.IOException;
20  import java.text.DateFormat;
21  import java.util.ArrayList;
22  import java.util.Arrays;
23  import java.util.Collection;
24  import java.util.Date;
25  import java.util.EnumMap;
26  import java.util.HashMap;
27  import java.util.HashSet;
28  import java.util.List;
29  import java.util.Locale;
30  import java.util.Map;
31  import java.util.Objects;
32  import java.util.Set;
33  
34  import com.fasterxml.jackson.databind.MappingIterator;
35  import com.fasterxml.jackson.databind.ObjectReader;
36  import com.fasterxml.jackson.dataformat.csv.CsvMapper;
37  import com.fasterxml.jackson.dataformat.csv.CsvParser;
38  import com.fasterxml.jackson.dataformat.csv.CsvSchema;
39  
40  import fr.inrae.agroclim.indicators.model.TimeScale;
41  import fr.inrae.agroclim.indicators.model.data.DataLoadingListener;
42  import fr.inrae.agroclim.indicators.model.data.FileLoader;
43  import fr.inrae.agroclim.indicators.model.data.Resource;
44  import fr.inrae.agroclim.indicators.model.data.Variable;
45  import fr.inrae.agroclim.indicators.resources.I18n;
46  import fr.inrae.agroclim.indicators.resources.Messages;
47  import fr.inrae.agroclim.indicators.util.DateUtils;
48  import fr.inrae.agroclim.indicators.util.StringUtils;
49  import jakarta.xml.bind.annotation.XmlAccessType;
50  import jakarta.xml.bind.annotation.XmlAccessorType;
51  import jakarta.xml.bind.annotation.XmlElement;
52  import jakarta.xml.bind.annotation.XmlTransient;
53  import jakarta.xml.bind.annotation.XmlType;
54  import lombok.Getter;
55  import lombok.Setter;
56  import lombok.extern.log4j.Log4j2;
57  
58  /**
59   * Load climate data from file.
60   *
61   * Last changed : $Date$
62   *
63   * @author $Author$
64   * @version $Revision$
65   */
66  @XmlAccessorType(XmlAccessType.FIELD)
67  @XmlType(propOrder = {"separator", "headers", "midnight", "endYear", "startYear"})
68  @Log4j2
69  public final class ClimateFileLoader extends FileLoader implements ClimateLoader {
70      /**
71       * UUID for Serializable.
72       */
73      private static final long serialVersionUID = 1913730755957817418L;
74  
75      /**
76       * Localized date format for log message.
77       */
78      @XmlTransient
79      private DateFormat dateFormat = DateFormat.getDateInstance(DateFormat.SHORT);
80  
81      /**
82       * Localized datetime format for log message.
83       */
84      @XmlTransient
85      private DateFormat dateTimeFormat = DateFormat.getDateTimeInstance(DateFormat.SHORT, DateFormat.SHORT);
86  
87      /**
88       * Hour of midnight (0 for 0-23 or 24 for 1-24).
89       */
90      @Getter
91      @Setter
92      @XmlElement
93      private Integer midnight = 0;
94  
95      /**
96       * Headers of CSV file.
97       */
98      @Getter
99      @Setter
100     @XmlElement(name = "header")
101     private String[] headers;
102 
103     /**
104      * CSV separator.
105      */
106     @Getter
107     @Setter
108     @XmlElement
109     private String separator = Resource.DEFAULT_SEP;
110 
111     /**
112      * Calculator to compute ETP from climatic daily data.
113      */
114     @Setter
115     @XmlTransient
116     private EtpCalculator etpCalculator;
117 
118     /**
119      * End year of data filtering (included).
120      */
121     @Getter
122     @Setter
123     private Integer endYear;
124 
125     /**
126      * Start year of data filtering (included).
127      */
128     @Getter
129     @Setter
130     private Integer startYear;
131 
132     /**
133      * Related time scales.
134      */
135     @Getter
136     @Setter
137     @XmlTransient
138     private TimeScale timeScale = TimeScale.DAILY;
139 
140     /**
141      * Constructor.
142      */
143     public ClimateFileLoader() {
144         setDataFile(DataLoadingListener.DataFile.CLIMATIC);
145     }
146 
147     /**
148      * Constructor.
149      *
150      * @param csvFile
151      *            relative path of CSV file
152      * @param csvHeaders
153      *            CSV headers
154      * @param csvSeparator
155      *            CSV separator
156      */
157     public ClimateFileLoader(final String csvFile, final String[] csvHeaders,
158             final String csvSeparator) {
159         this();
160         setPath(csvFile);
161         this.headers = csvHeaders;
162         this.separator = csvSeparator;
163     }
164 
165     /**
166      * Ensure climatic data are ordered and there is not any missing.
167      *
168      * @param previous previous data
169      * @param current current data to check
170      * @param line line number
171      * @param path file path
172      */
173     void checkDate(final ClimaticDailyData previous, final ClimaticDailyData current, final int line,
174             final String path) {
175         if (previous == null || current == null) {
176             return;
177         }
178         final DateFormat df;
179         if (timeScale == null) {
180             throw new IllegalStateException("timeScale must not be null!");
181         }
182         final long delta;
183         switch (timeScale) {
184         case DAILY -> {
185             df = dateFormat;
186             delta = DateUtils.NB_OF_MS_IN_DAY;
187         }
188         case HOURLY -> {
189             df = dateTimeFormat;
190             delta = DateUtils.NB_OF_MS_IN_HOUR;
191         }
192         default -> throw new IllegalStateException("TimeScale not handled: " + timeScale);
193         }
194         if (previous.getDate() != null && current.getDate() != null) {
195             final long previousTime = previous.getDate().getTime();
196             final long currentTime = current.getDate().getTime();
197             final long interval = currentTime - previousTime;
198             if (interval < 0) {
199                 current.getErrors().add(
200                         Messages.format("error.day.succession", path, line,
201                                 df.format(current.getDate()),
202                                 df.format(previous.getDate())
203                                 )
204                         );
205                 return;
206             }
207             if (interval == 0) {
208                 current.getErrors().add(Messages.format("error.day.duplicate", path, line,
209                         df.format(previous.getDate())));
210                 return;
211             }
212             if (interval > delta) {
213                 current.getErrors().add(Messages.format("error.day.missing", path, line, df.format(current.getDate())));
214             }
215         } else {
216             current.getErrors().add(Messages.format("error.date.notread"));
217         }
218     }
219 
220     @Override
221     public ClimateFileLoader clone() {
222         final ClimateFileLoader clone = new ClimateFileLoader();
223         clone.etpCalculator = etpCalculator.clone();
224         clone.setPath(getPath());
225         clone.headers = headers;
226         clone.separator = separator;
227         return clone;
228     }
229 
230     @Override
231     public boolean equals(final Object obj) {
232         if (this == obj) {
233             return true;
234         }
235         if (obj == null) {
236             return false;
237         }
238         if (getClass() != obj.getClass()) {
239             return false;
240         }
241         final ClimateFileLoader other = (ClimateFileLoader) obj;
242         if (!Objects.equals(this.separator, other.separator)) {
243             return false;
244         }
245         if (!Objects.equals(this.getPath(), other.getPath())) {
246             return false;
247         }
248         if (!Arrays.deepEquals(this.headers, other.headers)) {
249             return false;
250         }
251         if (!Objects.equals(this.etpCalculator, other.etpCalculator)) {
252             return false;
253         }
254         if (!Objects.equals(this.endYear, other.endYear)) {
255             return false;
256         }
257         return Objects.equals(this.startYear, other.startYear);
258     }
259 
260     @Override
261     public Map<String, String> getConfigurationErrors() {
262         final Map<String, String> errors = new HashMap<>();
263         if (getPath() == null) {
264             errors.put("climate.file", "error.evaluation.climate.file.missing");
265         }
266         if (!getFile().exists()) {
267             errors.put("climate.file", "error.evaluation.climate.file.doesnotexist");
268         } else if (getFile().length() == 0) {
269             errors.put("climate.file", "error.evaluation.climate.file.empty");
270         }
271         if (separator == null) {
272             errors.put("climate.separator", "error.evaluation.climate.separator.missing");
273         } else if (separator.isEmpty()) {
274             errors.put("climate.separator", "error.evaluation.climate.separator.empty");
275         }
276         if (headers == null) {
277             errors.put("climate.header", "error.evaluation.climate.header.missing");
278         }
279         if (timeScale == TimeScale.DAILY && etpCalculator == null) {
280             errors.put("climate.etpCalculator", "error.evaluation.climate.etpCalculator.missing");
281         }
282         if (errors.isEmpty()) {
283             return null;
284         }
285         return errors;
286     }
287 
288     /**
289      * @return Calculator to compute ETP from climatic daily data.
290      */
291     private EtpCalculator getEtpCalculator() {
292         if (timeScale != TimeScale.DAILY) {
293             throw new UnsupportedOperationException("Only daily data should have ETP!");
294         }
295         if (etpCalculator == null) {
296             throw new RuntimeException("EtpCalculator not set!");
297         }
298         return etpCalculator;
299     }
300 
301     /**
302      * @return Missing climatic variables, to check in aggregation indicators.
303      */
304     @Override
305     public Collection<String> getMissingVariables() {
306         final List<String> all = new ArrayList<>(ClimaticDailyData.getAllColumnNames(timeScale));
307         if (headers != null) {
308             for (final String header : headers) {
309                 all.remove(header.toLowerCase());
310             }
311         }
312         return all;
313     }
314 
315     @Override
316     public Set<Variable> getProvidedVariables() {
317         return super.getProvidedVariables(headers);
318     }
319 
320     @Override
321     public Set<Variable> getVariables() {
322         if (etpCalculator == null) {
323             return new HashSet<>();
324         }
325         return etpCalculator.getVariables();
326     }
327 
328     @Override
329     public int hashCode() {
330         final int prime1 = 7;
331         final int prime = 71;
332         int hash = prime1;
333         hash = prime * hash + Objects.hashCode(this.getPath());
334         hash = prime * hash + Arrays.deepHashCode(this.headers);
335         hash = prime * hash + Objects.hashCode(this.separator);
336         hash = prime * hash + Objects.hashCode(this.etpCalculator);
337         hash = prime * hash + Objects.hashCode(this.endYear);
338         hash = prime * hash + Objects.hashCode(this.startYear);
339         return hash;
340     }
341 
342     @Override
343     public List<ClimaticDailyData> load() {
344         LOGGER.trace("start");
345         if (getPath() == null || getFile() == null) {
346             throw new RuntimeException("no file defined for climate.");
347         }
348         if (separator == null) {
349             throw new RuntimeException("no separator defined for climate.");
350         }
351         LOGGER.trace("headers: {}", StringUtils.join(headers, ","));
352         final List<ClimaticDailyData> data = new ArrayList<>();
353         final List<String> headerFiltered = new ArrayList<>();
354         final Map<Variable, Integer> valuesCol = new EnumMap<>(Variable.class);
355         int yearCol = -1;
356         int monthCol = -1;
357         int dayCol = -1;
358         int hourCol = -1;
359         final String[] headersFromFile = getHeaders(getFile(), separator.charAt(0));
360         String[] usedHeaders;
361 
362         if (headers == null) {
363             usedHeaders = headersFromFile;
364         } else {
365             usedHeaders = headers;
366         }
367         for (int i = 0; i < usedHeaders.length; i++) {
368             final String header = usedHeaders[i];
369             final String lcHeader = header.toLowerCase();
370             final int index = ClimaticDailyData.getAllColumnNames(timeScale).indexOf(lcHeader);
371             if (index != -1) {
372                 headerFiltered.add(header.substring(0, 1).toUpperCase() + lcHeader.substring(1));
373                 if (header.equals("year")) {
374                     yearCol = i;
375                     continue;
376                 }
377                 if (header.equals("month")) {
378                     monthCol = i;
379                     continue;
380                 }
381                 if (header.equals("day")) {
382                     dayCol = i;
383                     continue;
384                 }
385                 if (header.equals("hour")) {
386                     hourCol = i;
387                     continue;
388                 }
389                 valuesCol.put(Variable.getByName(header), i);
390             } else {
391                 headerFiltered.add(null);
392             }
393         }
394 
395         LOGGER.trace("userHeadersArray: {}", StringUtils.join(headerFiltered, ","));
396 
397         LOGGER.trace("year: {}, month: {}, day: {}", yearCol, monthCol, dayCol);
398         LOGGER.trace("variables: {}", valuesCol);
399         if (usedHeaders.length != headersFromFile.length) {
400             final I18n i18n = new I18n("fr.inrae.agroclim.indicators.resources.messages", Locale.getDefault());
401             final String msg = i18n.format("error.climate.wrong.headers", headersFromFile.length,
402                     StringUtils.join(headersFromFile, ", "), usedHeaders.length, StringUtils.join(usedHeaders, ", "));
403             throw new RuntimeException(msg);
404         }
405         fireDataLoadingStartEvent("Start of reading file: " + getFile().getName());
406 
407         final CsvSchema schema = CsvSchema.emptySchema().withSkipFirstDataRow(true)//
408                 .withColumnSeparator(separator.charAt(0));
409         final CsvMapper mapper = new CsvMapper();
410         // important: we need "array wrapping" (see next section) here:
411         mapper.enable(CsvParser.Feature.WRAP_AS_ARRAY);
412         final ObjectReader objReader = mapper.readerFor(String[].class).with(schema);
413         try (MappingIterator<String[]> it = objReader.readValues(getFile())) {
414             ClimaticDailyData previous = null;
415             while (it.hasNext()) {
416                 final int lineNumber = it.getCurrentLocation().getLineNr();
417                 final String[] row = it.next();
418                 final Integer year = this.parseInt(row[yearCol], null);
419                 if (startYear != null && year < startYear || endYear != null && year > endYear) {
420                     continue;
421                 }
422                 final ClimaticDailyData dailyData = new ClimaticDailyData();
423                 dailyData.setTimescale(timeScale);
424                 dailyData.setYear(year);
425                 dailyData.setMonth(this.parseInt(row[monthCol], null));
426                 dailyData.setDay(this.parseInt(row[dayCol], null));
427                 if (timeScale == TimeScale.HOURLY) {
428                     final int hour = Integer.parseInt(row[hourCol]);
429                     if (midnight == DateUtils.NB_OF_HOURS_IN_DAY && hour == DateUtils.NB_OF_HOURS_IN_DAY) {
430                         final long newTime = dailyData.getDate().getTime() + DateUtils.NB_OF_MS_IN_DAY;
431                         final Date newDate = new Date(newTime);
432                         dailyData.setYear(DateUtils.getYear(newDate));
433                         dailyData.setMonth(DateUtils.getMonth(newDate));
434                         dailyData.setDay(DateUtils.getDom(newDate));
435                         dailyData.setHour(0);
436                     } else {
437                         dailyData.setHour(hour);
438                     }
439                 }
440                 valuesCol.forEach((variable, index) -> {
441                     if (row[index] != null && !row[index].isEmpty()) {
442                         dailyData.setValue(variable, Double.parseDouble(row[index]));
443                     }
444                 });
445                 if (timeScale == TimeScale.DAILY) {
446                     dailyData.setEtpCalcultator(getEtpCalculator());
447                 }
448                 dailyData.check(lineNumber, getFile().getName());
449                 checkDate(previous, dailyData, lineNumber, getFile().getName());
450                 fireDataLoadingAddEvent(dailyData);
451                 data.add(dailyData);
452                 previous = dailyData;
453             }
454             fireDataLoadingEndEvent("End of reading " + getFile().getName());
455         } catch (final IOException e) {
456             throw new RuntimeException(Messages.format("error.reading", getFile().getAbsolutePath()), e);
457         }
458         return data;
459     }
460 
461     /**
462      * Parse String to Integer, like {@code Integer.parseInt} method.
463      * @param value the string value to parsing
464      * @param defaultValue if not possible to parse, this value will be returned
465      * @return value parsed or defaultValue
466      */
467     private Integer parseInt(final String value, final Integer defaultValue) {
468         Integer ret;
469         try {
470             ret = Integer.parseInt(value);
471         } catch (final NumberFormatException e) {
472             ret = defaultValue;
473         }
474         return ret;
475     }
476     /**
477      * @param locale locale for date formatter
478      */
479     public void setLocale(final Locale locale) {
480         dateFormat = DateFormat.getDateInstance(DateFormat.SHORT, locale);
481         dateTimeFormat = DateFormat.getDateTimeInstance(DateFormat.SHORT, DateFormat.SHORT, locale);
482     }
483 }