/*
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 *
 * first author: Nicolas SALATGE
 */
package conversion.machine_learning.tweets.california_earthquake_2014;

import java.io.File;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;

import javax.xml.namespace.QName;

import org.apache.commons.codec.binary.StringUtils;
import org.json.JSONArray;
import org.json.JSONObject;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.CDATASection;
import org.w3c.dom.Document;
import org.w3c.dom.Element;

import fr.emac.gind.commons.utils.io.FileUtil;
import fr.emac.gind.commons.utils.xml.DOMUtil;
import fr.emac.gind.commons.utils.xml.XMLPrettyPrinter;
import fr.emac.gind.dataset.GJaxbDataset;
import fr.emac.gind.dataset.GJaxbDataset.Data;
import fr.emac.gind.dataset.GJaxbDatasetConfiguration;
import fr.emac.gind.dataset.GJaxbDatasetConfiguration.Emissions;
import fr.emac.gind.marshaller.JSONJAXBContext;
import fr.emac.gind.marshaller.XMLJAXBContext;
import fr.emac.gind.modeler.genericmodel.GJaxbGenericModel;
import fr.emac.gind.modeler.genericmodel.GJaxbNode;
import fr.emac.gind.modeler.genericmodel.GJaxbProperty;
import fr.emac.gind.modeler.genericmodel.GJaxbStatusType;
import fr.emac.gind.models.generic.modeler.generic_model.GenericModelHelper;
import fr.emac.gind.tweet.GJaxbEntities;
import fr.emac.gind.tweet.GJaxbHashtagObject;
import fr.emac.gind.tweet.GJaxbTweetObject;

public class ConversionTest {
	
	private static final Logger LOG = LoggerFactory.getLogger(ConversionTest.class.getName());


  private static int RECORD_INDEX = 0;

  @Test
  public void generateTrainTest() throws Exception {

    String trainBuffer = FileUtil.getContents(Thread.currentThread().getContextClassLoader().getResourceAsStream("conversion/machine_learning/tweets/2014_California_Earthquake/2014_California_Earthquake_CF_labeled_data.tsv"));

    String[] lines = trainBuffer.split("\n");
    boolean header = true;

    List<Record> records = new ArrayList<Record>();
    int unknownLabelIndice = 0;
    for(String line: lines) {
      LOG.debug(line);
      if(!header) {
        String tweet_id = "";
        String tweet_text = "";
        String label = "";

        String[] tabs = line.split("\t");
        LOG.debug("" + tabs);
        tweet_id = tabs[0].replace("'", "");
        tweet_text = tabs[1]; //.replace("'", " ").
        //       replace(",", " ").replace("…", " ");
        //   byte[] bytes = tweet_text.getBytes(StandardCharsets.UTF_8);
        tweet_text = StringUtils.newStringUtf8((StringUtils.getBytesUtf8(tweet_text)));



        if(tabs.length >= 2) {
          label = tabs[2].trim();
        }
        if(label.contains("infrastructure_and_utilities_damage") ||
            label.contains("missing_trapped_or_found_people") ||
            label.contains("injured_or_dead_people"))
          records.add(new Record(tweet_id, tweet_text, label));
        else {
          if(unknownLabelIndice < 5) {
            label = null;
            records.add(new Record(tweet_id, tweet_text, label));
          } else {
            label = "trash";
            records.add(new Record(tweet_id, tweet_text, label));
          }
          unknownLabelIndice = unknownLabelIndice + 1;
        }
      }
      header = false;
    }

    LOG.debug("number of records: " + records.size());
    GJaxbGenericModel model = this.createModel(records);
    GJaxbDataset dataset = this.createDataset(records);


    new File("./target/freezed_objectives_DL_Labels_Tweets.xml").createNewFile();
    FileUtil.setContents(new File("./target/freezed_objectives_DL_Labels_Tweets.xml"), XMLPrettyPrinter.print(XMLJAXBContext.getInstance().marshallAnyElement(model)));

    new File("./target/" + dataset.getName() + ".xml").createNewFile();

    Document datasetXML = XMLJAXBContext.getInstance().marshallAnyElement(dataset);
    String dsBuffer = XMLPrettyPrinter.print(datasetXML);
    // dsBuffer = dsBuffer.replace("&lt;![CDATA[[", "<![CDATA[[").replace("]]&gt;", "]]>");

    FileUtil.setContents(new File("./target/" + dataset.getName() + ".xml"), dsBuffer);

  }



  private GJaxbDataset createDataset(List<Record> records) throws Exception {
    GJaxbDataset dataset = new GJaxbDataset();
    dataset.setName("2014_California_Earthquake_CF_dataset");

    dataset.setTopicToSubscribe(new QName("http://www.mines-albi.fr/tweets/CrisisNLP_Tweets_Topic", "CrisisNLP_Tweets_Topic"));
    dataset.setDatasetConfiguration(new GJaxbDatasetConfiguration());
    dataset.getDatasetConfiguration().setEmissions(new Emissions());
    dataset.getDatasetConfiguration().getEmissions().setPeriod(50);

    dataset.setData(new Data());

    JSONArray tweetArray = new JSONArray();
    for(Record r: records) {
      tweetArray.put(new JSONObject(JSONJAXBContext.getInstance().marshallAnyElement(r.getTweet())));
    }


    Document doc = DOMUtil.getInstance().newDocument();
    Element tweets = doc.createElement("tweets");
    CDATASection cdata = doc.createCDATASection(tweetArray.toString());
    //   CDATASection cdata = doc.createCDATASection("<![CDATA[" + tweetArray.toString() + "]]>");

    tweets.appendChild(cdata);
    doc.appendChild(tweets);

    //doc.appendChild(cdata);
    dataset.getData().getRaw().getAny().add(doc.getDocumentElement());
    return dataset;
  }



  private GJaxbGenericModel createModel(List<Record> records) {
    GJaxbGenericModel model = new GJaxbGenericModel();
    for(Record r: records) {
      if(r.getNode() != null) {
        model.getNode().add(r.getNode());
      }
    }
    return model;
  }



  public class Record {


    private GJaxbTweetObject tweet = null;
    private GJaxbNode node = null;


    public Record(String tweet_id, String tweet_txt, String label) throws Exception {
      this.tweet = new GJaxbTweetObject();
      this.tweet.setIdStr(tweet_id);
      this.tweet.setId(BigInteger.valueOf(Long.parseLong(tweet_id)));
      this.tweet.setText(tweet_txt);
      if(label != null) {
        this.tweet.setEntities(new GJaxbEntities());
        this.tweet.getEntities().getHashtags().add(new GJaxbHashtagObject());
        this.tweet.getEntities().getHashtags().get(0).setIndices("rio_ml_label");
        JSONArray labelsArray = new JSONArray();
        JSONObject labelJson = new JSONObject();
        labelsArray.put(labelJson);
        if(label.equals("trash")) {
          labelJson.put("name", label);
        } else {
          labelJson.put("name", "Actuality::" + label);
        }
        this.tweet.getEntities().getHashtags().get(0).setText(labelsArray.toString());
      }
      

      if(label != null && !label.equals("trash")) {
        this.node = new GJaxbNode();
        this.node.setId(UUID.randomUUID().toString());
        this.node.getStatus().add(GJaxbStatusType.FREEZE);
        this.node.setType(new QName("http://fr.emac.gind/collaborative-model", "Actuality"));
        this.node.getExtends().add(QName.valueOf("{http://fr.emac.gind/core-model}Semantic_Concept"));
        this.node.setSpecificExportPackage("freezed_objectives_DL_Labels_Tweets");
        GenericModelHelper.findProperty("name", this.node.getProperty(), true).setValue(label + " n" + ++ConversionTest.RECORD_INDEX);
        GenericModelHelper.findProperty("type", this.node.getProperty(), true).setValue("Damage");
        
        JSONArray inferByValue = new JSONArray();
        JSONArray primitiveEventValue = new JSONArray();
        GJaxbProperty propType = GenericModelHelper.createProperty("type", "PRIMITIVE_EVENT");

        JSONArray refsValue = new JSONArray();
        JSONArray refsRowValue = new JSONArray();
        GJaxbProperty propFieldName = GenericModelHelper.createProperty("field_name", "tweet_id");
        GJaxbProperty propFieldValue = GenericModelHelper.createProperty("field_value", tweet_id);
        refsRowValue.put(new JSONObject(JSONJAXBContext.getInstance().marshallAnyElement(propFieldName)).get("property"));
        refsRowValue.put(new JSONObject(JSONJAXBContext.getInstance().marshallAnyElement(propFieldValue)).get("property"));
        refsValue.put(refsRowValue);
        GJaxbProperty propRefs = GenericModelHelper.createProperty("refs", refsValue.toString());

        GJaxbProperty propCreatedBy = GenericModelHelper.createProperty("created by", "USER");
        GJaxbProperty propAddInfos = GenericModelHelper.createProperty("additional information", "[]");
        
        primitiveEventValue.put(new JSONObject(JSONJAXBContext.getInstance().marshallAnyElement(propType)).get("property"));
        primitiveEventValue.put(new JSONObject(JSONJAXBContext.getInstance().marshallAnyElement(propRefs)).get("property"));
        primitiveEventValue.put(new JSONObject(JSONJAXBContext.getInstance().marshallAnyElement(propCreatedBy)).get("property"));
        primitiveEventValue.put(new JSONObject(JSONJAXBContext.getInstance().marshallAnyElement(propAddInfos)).get("property"));
        inferByValue.put(primitiveEventValue);
        GenericModelHelper.findProperty("infer by", this.node.getProperty(), true).setValue(inferByValue.toString());
        
        
        JSONArray labels = new JSONArray();
        JSONArray labelNode = new JSONArray();
        GJaxbProperty propLabel = GenericModelHelper.createProperty("label", label);
        labelNode.put(new JSONObject(JSONJAXBContext.getInstance().marshallAnyElement(propLabel)).get("property"));
        labels.put(labelNode);
        GenericModelHelper.findProperty("labels", this.node.getProperty(), true).setValue(labels.toString());
      }
    }


    public GJaxbTweetObject getTweet() {
      return tweet;
    }


    public GJaxbNode getNode() {
      return node;
    }

  }

}