package uk.ac.cam.ch.wwmm.oscarMEMM.memm.rescorer;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.commons.collections.set.UnmodifiableSet;
import org.apache.commons.math.stat.descriptive.DescriptiveStatistics;
import uk.ac.cam.ch.wwmm.oscar.document.NamedEntity;
import uk.ac.cam.ch.wwmm.oscar.document.Token;
import uk.ac.cam.ch.wwmm.oscar.document.TokenSequence;
import uk.ac.cam.ch.wwmm.oscar.tools.StringTools;

/* loaded from: input_file:uk/ac/cam/ch/wwmm/oscarMEMM/memm/rescorer/FeatureExtractor.class */
final class FeatureExtractor {
    List<NamedEntity> entities;
    Map<String, Double> averageScores;
    Map<String, Double> maxScores;
    Pattern allCaps = Pattern.compile("[A-Z]+");
    Map<String, List<String>> abbrevFeatures = new HashMap();

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v121, types: [java.util.List] */
    public FeatureExtractor(List<NamedEntity> list) {
        Token nAfter;
        ArrayList arrayList;
        this.entities = list;
        HashMap hashMap = new HashMap();
        for (NamedEntity namedEntity : list) {
            if (!namedEntity.isBlocked()) {
            }
            String surface = namedEntity.getSurface();
            if (!hashMap.containsKey(surface)) {
                hashMap.put(surface, new ArrayList());
            }
            ((List) hashMap.get(surface)).add(Double.valueOf(namedEntity.getConfidence()));
        }
        this.averageScores = new HashMap();
        this.maxScores = new HashMap();
        for (String str : hashMap.keySet()) {
            DescriptiveStatistics descriptiveStatistics = new DescriptiveStatistics();
            Iterator it = ((List) hashMap.get(str)).iterator();
            while (it.hasNext()) {
                descriptiveStatistics.addValue(((Double) it.next()).doubleValue());
            }
            this.maxScores.put(str, Double.valueOf(descriptiveStatistics.getMax()));
            this.averageScores.put(str, Double.valueOf(descriptiveStatistics.getMean()));
        }
        HashMap hashMap2 = new HashMap();
        for (NamedEntity namedEntity2 : list) {
            if (!namedEntity2.isBlocked()) {
                hashMap2.put(namedEntity2.getLastToken(), namedEntity2);
            }
        }
        for (NamedEntity namedEntity3 : list) {
            Token nAfter2 = namedEntity3.getFirstToken().getNAfter(-1);
            Token nAfter3 = namedEntity3.getLastToken().getNAfter(1);
            if (nAfter2 != null && nAfter3 != null && nAfter2.getSurface().equals("(") && nAfter3.getSurface().equals(")") && (nAfter = namedEntity3.getFirstToken().getNAfter(-2)) != null) {
                String surface2 = namedEntity3.getSurface();
                surface2 = (surface2.matches(".*[A-Z]s") || nAfter.getSurface().endsWith("s")) ? surface2.substring(0, surface2.length() - 1) : surface2;
                if (this.abbrevFeatures.containsKey(surface2)) {
                    arrayList = (List) this.abbrevFeatures.get(surface2);
                } else {
                    arrayList = new ArrayList();
                    this.abbrevFeatures.put(namedEntity3.getSurface(), arrayList);
                }
                if (hashMap2.containsKey(nAfter)) {
                    NamedEntity namedEntity4 = (NamedEntity) hashMap2.get(nAfter);
                    String str2 = StringTools.testForAcronym(surface2, namedEntity4.getSurface()) ? "abbr2:" : "abbr1:";
                    str2 = surface2.matches(".*\\s.*") ? str2 + "wws:" : str2;
                    double d = -5.0d;
                    while (true) {
                        double d2 = d;
                        if (d2 < 5.05d) {
                            double logitToProb = ProbabilityConvertor.logitToProb(d2);
                            if (namedEntity4.getConfidence() > logitToProb) {
                                arrayList.add(str2 + "abbr>" + logitToProb);
                            } else {
                                arrayList.add(str2 + "abbr<" + logitToProb);
                            }
                            d = d2 + 0.5d;
                        }
                    }
                } else {
                    int index = namedEntity3.getFirstToken().getIndex();
                    TokenSequence tokenSequence = namedEntity3.getFirstToken().getTokenSequence();
                    int length = surface2.length();
                    boolean z = false;
                    if (this.allCaps.matcher(surface2).matches() && length <= index - 1) {
                        z = true;
                        for (int i = 0; i < length; i++) {
                            if (!tokenSequence.getToken(((index - length) - 1) + i).getSurface().toUpperCase().startsWith(surface2.substring(i, i + 1))) {
                                z = false;
                            }
                        }
                        if (z) {
                            arrayList.add("allUpperAbbrev");
                        }
                    }
                    if (!z) {
                        arrayList.add("seenInBrackets");
                    }
                }
            }
        }
    }

    public List<String> getFeatures(NamedEntity namedEntity, UnmodifiableSet unmodifiableSet) {
        double confidence = namedEntity.getConfidence();
        double log = Math.log(confidence) - Math.log(1.0d - confidence);
        ArrayList arrayList = new ArrayList();
        namedEntity.getTokens().get(0).getTokenSequence();
        int size = namedEntity.getTokens().size();
        int index = (namedEntity.getTokens().get(0).getIndex() + size) - 1;
        String surface = namedEntity.getSurface();
        if (size > 0 && unmodifiableSet.contains(surface)) {
            arrayList.add("LongInCND");
        }
        if (this.abbrevFeatures.containsKey(surface)) {
            arrayList.addAll(this.abbrevFeatures.get(surface));
        } else {
            arrayList.add("noabbrev");
        }
        if (!namedEntity.isBlocked() && this.averageScores.containsKey(surface)) {
            double doubleValue = this.averageScores.get(surface).doubleValue();
            if (doubleValue > confidence) {
                arrayList.add("avg+");
            }
            if (doubleValue > confidence + 0.05d) {
                arrayList.add("avg++");
            }
            if (doubleValue > confidence + 0.1d) {
                arrayList.add("avg+++");
            }
            if (doubleValue < confidence) {
                arrayList.add("avg-");
            }
            if (doubleValue < confidence - 0.05d) {
                arrayList.add("avg--");
            }
            if (doubleValue < confidence - 0.1d) {
                arrayList.add("avg---");
            }
            double doubleValue2 = this.maxScores.get(surface).doubleValue();
            if (doubleValue2 - confidence > 0.05d) {
                arrayList.add("0.05below");
            }
            if (doubleValue2 - confidence > 0.1d) {
                arrayList.add("0.1below");
            }
            if (doubleValue2 - confidence > 0.15d) {
                arrayList.add("0.15below");
            }
        }
        double d = 0.0d;
        while (true) {
            double d2 = d;
            if (d2 >= Math.min(log, 15.0d)) {
                break;
            }
            arrayList.add("conf+");
            d = d2 + 0.05d;
        }
        double d3 = 0.0d;
        while (true) {
            double d4 = d3;
            if (d4 <= Math.max(log, -15.0d)) {
                return arrayList;
            }
            arrayList.add("conf-");
            d3 = d4 - 0.05d;
        }
    }
}
