/*
 * prepare tag data for pie chart
 */

import * as dfd from "danfojs";
import * as tokenizer from "wink-tokenizer";

import { removeStopwords, eng } from "stopword";

export default function prepareTagDataForBarChart(tempTags) {
  // console.log("temp tags", tempTags);
  // var tokenizer = new natural.WordPunctTokenizer();

  // Create it's instance.
  var myTokenizer = tokenizer();

  tempTags = tempTags.toString();

  //Tokenize
  // tempTags = tokenizer.tokenize(tempTags);
  tempTags = myTokenizer.tokenize(tempTags);
  //get only words
  let temp = [];
  tempTags.map((item, index) => {
    if (["word", "mention", "hashtag"].includes(item["tag"])) {
      let tempTag = item["value"];
      //causes app to crash when a top 10 tag has @ or # in the tag
      tempTag = tempTag.replace(/@/g, "");
      tempTag = tempTag.replace(/#/g, "");
      temp.push(tempTag);
    }
  });
  tempTags = temp;
  // console.log(tempTags);

  //Remove Stopwords

  let customStopwords = [
    ",",
    "not",
    "a",
    "b",
    "c",
    "tempTags",
    "e",
    "f",
    "g",
    "h",
    "i",
    "j",
    "k",
    "l",
    "m",
    "n",
    "o",
    "p",
    "q",
    "r",
    "s",
    "t",
    "u",
    "v",
    "w",
    "x",
    "y",
    "z",
    "'",
  ];
  tempTags = removeStopwords(tempTags, [...eng, ...customStopwords]);

  /* stemming - not so good so commennted */
  // let newD = [];
  // tempTags.forEach((item) => {
  //   newD.push(natural.PorterStemmer.stem(item));
  // });

  let df = new dfd.Series(tempTags);
  df = df.valueCounts();
  // console.log("valuecount");
  // df.print();
  df.sortValues({ inplace: true, ascending: false });
  // df.print();
  let noTags = 0;
  if (df.shape[0] > 10) {
    noTags = 10;
  } else {
    noTags = df.shape[0];
  }

  let dfChartBarTags = new dfd.DataFrame({
    name: df.iloc(["0:" + noTags]).index,
    count: df.iloc(["0:" + noTags]).values,
  });

  return dfd.toJSON(dfChartBarTags);
  // setTagChartBarData(dfd.toJSON(dfChartBarTags));
}
