/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.language;

import com.google.common.base.Optional;
import com.optimaize.langdetect.LanguageDetector;
import com.optimaize.langdetect.LanguageDetectorBuilder;
import com.optimaize.langdetect.i18n.LdLocale;
import com.optimaize.langdetect.ngram.NgramExtractor;
import com.optimaize.langdetect.ngram.NgramExtractors;
import com.optimaize.langdetect.profiles.LanguageProfile;
import com.optimaize.langdetect.profiles.LanguageProfileReader;
import com.optimaize.langdetect.text.RemoveMinorityScriptsTextFilter;
import com.optimaize.langdetect.text.TextFilter;
import com.optimaize.langdetect.text.TextObjectFactory;
import com.optimaize.langdetect.text.TextObjectFactoryBuilder;
import com.optimaize.langdetect.text.UrlTextFilter;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import org.jetbrains.annotations.Nullable;
import org.languagetool.JLanguageTool;
import org.languagetool.Language;
import org.languagetool.Languages;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class LanguageIdentifier {
    private static final Logger logger = LoggerFactory.getLogger(LanguageIdentifier.class);
    private static final double MINIMAL_CONFIDENCE = 0.9;
    private static final int K_HIGHEST_SCORES = 5;
    private static final int SHORT_ALGO_THRESHOLD = 50;
    private static final Pattern SIGNATURE = Pattern.compile("\n-- \n.*", 32);
    private static final List<String> ignoreLangCodes = Arrays.asList("ast", "gl");
    private static final List<String> externalLangCodes = Arrays.asList("eo");
    private final LanguageDetector languageDetector;
    private final TextObjectFactory textObjectFactory;
    private final int maxLength;
    private boolean fasttextEnabled = false;
    private Process fasttextProcess;
    private BufferedReader fasttextIn;
    private BufferedWriter fasttextOut;

    public LanguageIdentifier() {
        this(1000);
    }

    public LanguageIdentifier(int maxLength) {
        try {
            List<LanguageProfile> profiles = this.loadProfiles(LanguageIdentifier.getLanguageCodes());
            this.languageDetector = LanguageDetectorBuilder.create((NgramExtractor)NgramExtractors.standard()).minimalConfidence(0.9).shortTextAlgorithm(50).withProfiles(profiles).build();
            this.textObjectFactory = new TextObjectFactoryBuilder().maxTextLength(10000).withTextFilter((TextFilter)UrlTextFilter.getInstance()).withTextFilter((TextFilter)RemoveMinorityScriptsTextFilter.forThreshold((double)0.3)).withTextFilter((TextFilter)new RemoveEMailSignatureFilter()).build();
        }
        catch (IOException e) {
            throw new RuntimeException("Could not set up language identifier", e);
        }
        if (maxLength < 10) {
            throw new IllegalArgumentException("maxLength must be >= 10 (but values > 100 are recommended): " + maxLength);
        }
        this.maxLength = maxLength;
    }

    public void enableFasttext(File fasttextBinary, File fasttextModel) {
        if (fasttextBinary != null && fasttextModel != null) {
            try {
                this.startFasttext(fasttextModel, fasttextBinary);
                logger.info("Started fasttext process for language identification: Binary " + fasttextBinary + " with model @ " + fasttextModel);
                this.fasttextEnabled = true;
            }
            catch (IOException e) {
                this.fasttextEnabled = false;
                throw new RuntimeException("Could not start fasttext process for language identification @ " + fasttextBinary + " with model @ " + fasttextModel, e);
            }
        }
    }

    private static List<String> getLanguageCodes() {
        ArrayList<String> langCodes = new ArrayList<String>();
        for (Language lang : Languages.get()) {
            String langCode = lang.getShortCode();
            boolean ignore = lang.isVariant() || ignoreLangCodes.contains(langCode) || externalLangCodes.contains(langCode);
            if (ignore) continue;
            if ("zh".equals(langCode)) {
                langCodes.add("zh-CN");
                langCodes.add("zh-TW");
                continue;
            }
            langCodes.add(langCode);
        }
        return langCodes;
    }

    private List<LanguageProfile> loadProfiles(List<String> langCodes) throws IOException {
        LanguageProfileReader profileReader = new LanguageProfileReader();
        List profiles = profileReader.read(langCodes);
        for (String externalLangCode : externalLangCodes) {
            String profilePath = "/" + externalLangCode + "/" + externalLangCode + ".profile";
            if (!JLanguageTool.getDataBroker().resourceExists(profilePath)) continue;
            InputStream profile = JLanguageTool.getDataBroker().getFromResourceDirAsStream(profilePath);
            Throwable throwable = null;
            try {
                profiles.add(new LanguageProfileReader().read(profile));
            }
            catch (Throwable throwable2) {
                throwable = throwable2;
                throw throwable2;
            }
            finally {
                if (profile == null) continue;
                if (throwable != null) {
                    try {
                        profile.close();
                    }
                    catch (Throwable throwable3) {
                        throwable.addSuppressed(throwable3);
                    }
                    continue;
                }
                profile.close();
            }
        }
        return profiles;
    }

    @Nullable
    public Language detectLanguage(String text) {
        String shortText = text.length() > this.maxLength ? text.substring(0, this.maxLength) : text;
        shortText = this.textObjectFactory.forText((CharSequence)shortText).toString();
        String languageCode = null;
        if (this.fasttextEnabled) {
            try {
                languageCode = this.getHighestScoringResult(this.runFasttext(shortText));
            }
            catch (Exception e) {
                this.fasttextEnabled = false;
                logger.error("Disabling fasttext language identification, got error for text: " + text, (Throwable)e);
                this.fasttextProcess.destroy();
            }
        }
        if (!this.fasttextEnabled) {
            languageCode = this.detectLanguageCode(shortText);
        }
        if (languageCode != null && Languages.isLanguageSupported(languageCode)) {
            return Languages.getLanguageForShortCode(languageCode);
        }
        return null;
    }

    private void startFasttext(File modelPath, File binaryPath) throws IOException {
        this.fasttextProcess = new ProcessBuilder(binaryPath.getPath(), "predict-prob", modelPath.getPath(), "-", "5").start();
        this.fasttextIn = new BufferedReader(new InputStreamReader(this.fasttextProcess.getInputStream(), StandardCharsets.UTF_8));
        this.fasttextOut = new BufferedWriter(new OutputStreamWriter(this.fasttextProcess.getOutputStream(), StandardCharsets.UTF_8));
    }

    private String getHighestScoringResult(Map<String, Double> probs) {
        String result = null;
        double max = -1.0;
        for (Map.Entry<String, Double> entry : probs.entrySet()) {
            if (!(entry.getValue() > max)) continue;
            max = entry.getValue();
            result = entry.getKey();
        }
        return result;
    }

    private synchronized Map<String, Double> runFasttext(String text) throws IOException {
        HashMap<String, Double> probabilities = new HashMap<String, Double>();
        String joined = text.replace("\n", " ");
        this.fasttextOut.write(joined);
        this.fasttextOut.newLine();
        this.fasttextOut.flush();
        String buffer = this.fasttextIn.readLine();
        String[] values = buffer.split(" ");
        if (values.length % 2 != 0) {
            throw new RuntimeException("Error while parsing fasttext output: " + buffer);
        }
        for (int i = 0; i < values.length; i += 2) {
            String lang = values[i];
            String langCode = lang.substring(lang.lastIndexOf("__") + 2);
            String prob = values[i + 1];
            Double probValue = Double.parseDouble(prob);
            if (!Languages.isLanguageSupported(langCode)) continue;
            probabilities.put(langCode, probValue);
        }
        return probabilities;
    }

    @Nullable
    private String detectLanguageCode(String text) {
        Optional lang = this.languageDetector.detect((CharSequence)text);
        if (lang.isPresent()) {
            return ((LdLocale)lang.get()).getLanguage();
        }
        return null;
    }

    class RemoveEMailSignatureFilter
    implements TextFilter {
        RemoveEMailSignatureFilter() {
        }

        public String filter(CharSequence text) {
            return SIGNATURE.matcher(text.toString()).replaceFirst("");
        }
    }
}

