import PizZip from "pizzip";
import { DOMParser } from "@xmldom/xmldom";
import * as XLSX from "xlsx";
import pdfToText from "react-pdftotext";
import { auth } from "../helper/firebaseClient";
import { api } from "../utils/axios-instance";

const str2xml = (str) => {
  if (str.charCodeAt(0) === 65279) {
    // BOM sequence
    str = str.substr(1);
  }
  return new DOMParser().parseFromString(str, "text/xml");
};

// Get paragraphs as javascript array
const getParagraphs = (content) => {
  const zip = new PizZip(content);
  const xml = str2xml(zip.files["word/document.xml"].asText());
  const paragraphsXml = xml.getElementsByTagName("w:p");
  const paragraphs = [];

  for (let i = 0, len = paragraphsXml.length; i < len; i++) {
    let fullText = "";
    const textsXml = paragraphsXml[i].getElementsByTagName("w:t");
    for (let j = 0, len2 = textsXml.length; j < len2; j++) {
      const textXml = textsXml[j];
      if (textXml.childNodes) {
        fullText += textXml.childNodes[0].nodeValue;
      }
    }
    if (fullText) {
      paragraphs.push(fullText);
    }
  }
  return paragraphs;
};

const readFileAsArrayBuffer = (file) => {
  return new Promise((resolve, reject) => {
    const reader = new FileReader();
    reader.onload = (e) => {
      const content = e.target.result;
      resolve(content);
    };
    reader.onerror = (err) => reject(err);
    reader.readAsArrayBuffer(file);
  });
};

const parseExcel = async (file) => {
  let res = [];
  const data = await file.arrayBuffer();
  const workbook = XLSX.read(data);
  for (const sheet of workbook.SheetNames) {
    const worksheet = workbook.Sheets[sheet];
    const jsonData = XLSX.utils.sheet_to_json(worksheet);
    res.push.apply(res, jsonData);
  }
  return res;
};

const handleTextExtraction = async (
  file,
  id,
  name,
  deptColor,
  profileData,
  isUpdate
) => {
  try {
    let response;
    const token = await auth.currentUser.getIdToken();

    if (file.type === "application/pdf") {
      response = await pdfToText(file);
    } else if (
      file.type ===
      "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
    ) {
      const content = await readFileAsArrayBuffer(file);
      response = getParagraphs(content);
    } else if (
      file.type ===
      "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
    ) {
      response = await parseExcel(file);
      response = JSON.stringify(response);
    } else return;

    console.log({response})

    const fileData = {
      name: name,
      bg: deptColor,
      size: file.size,
      mimetype: file.type,
      profilePic: profileData.profilePictureUrl,
    };

    console.log({isUpdate})

    if (!isUpdate) {
      //Upload extracted text to OpenSearch
      const res = await api.post(`/file/uploadToOpenSearch/`, {
        idToken: token,
        id: id,
        document: response,
        org_id: profileData.org,
        fileData: fileData,
      });
    } else {
      const res = await api.put(`/file/updateFileOnOpenSearch/`, {
        idToken: token,
        id: id,
        document: response,
        org_id: profileData.org,
        fileData: fileData,
      });

      console.log({ res });
    }
  } catch (e) {
    console.log(e);
  }
};

export default handleTextExtraction;
