package uk.ac.cam.ch.wwmm.oscarrecogniser.regex;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import uk.ac.cam.ch.wwmm.oscar.document.NamedEntity;
import uk.ac.cam.ch.wwmm.oscar.document.Token;
import uk.ac.cam.ch.wwmm.oscar.document.TokenSequence;
import uk.ac.cam.ch.wwmm.oscar.types.NamedEntityType;
import uk.ac.cam.ch.wwmm.oscarrecogniser.interfaces.ChemicalEntityRecogniser;
import uk.ac.cam.ch.wwmm.oscarrecogniser.saf.StandoffResolver;

/* loaded from: input_file:uk/ac/cam/ch/wwmm/oscarrecogniser/regex/RegexRecogniser.class */
public class RegexRecogniser implements ChemicalEntityRecogniser {
    private Pattern pattern;
    private NamedEntityType neType = NamedEntityType.COMPOUND;

    public RegexRecogniser(String str) {
        if ("".equals(str)) {
            throw new IllegalArgumentException("regex must not be empty");
        }
        this.pattern = Pattern.compile(str);
    }

    @Override // uk.ac.cam.ch.wwmm.oscarrecogniser.interfaces.ChemicalEntityRecogniser
    public List<NamedEntity> findNamedEntities(List<TokenSequence> list) {
        return findNamedEntities(list, StandoffResolver.ResolutionMode.REMOVE_BLOCKED);
    }

    @Override // uk.ac.cam.ch.wwmm.oscarrecogniser.interfaces.ChemicalEntityRecogniser
    public List<NamedEntity> findNamedEntities(List<TokenSequence> list, StandoffResolver.ResolutionMode resolutionMode) {
        ArrayList arrayList = new ArrayList();
        for (TokenSequence tokenSequence : list) {
            Matcher matcher = this.pattern.matcher(tokenSequence.getSurface());
            while (matcher.find()) {
                Token tokenByStartIndex = tokenSequence.getTokenByStartIndex(matcher.start());
                Token tokenByEndIndex = tokenSequence.getTokenByEndIndex(matcher.end());
                if (tokenByStartIndex != null && tokenByEndIndex != null) {
                    ArrayList arrayList2 = new ArrayList(tokenSequence.getTokens());
                    while (true) {
                        Token token = (Token) arrayList2.get(0);
                        if (token == tokenByStartIndex) {
                            break;
                        }
                        arrayList2.remove(token);
                    }
                    while (true) {
                        Token token2 = (Token) arrayList2.get(arrayList2.size() - 1);
                        if (token2 == tokenByEndIndex) {
                            break;
                        }
                        arrayList2.remove(token2);
                    }
                    arrayList.add(new NamedEntity(arrayList2, matcher.group(), this.neType));
                }
            }
        }
        if (resolutionMode == StandoffResolver.ResolutionMode.REMOVE_BLOCKED) {
            StandoffResolver.resolveStandoffs(arrayList);
        } else {
            if (resolutionMode != StandoffResolver.ResolutionMode.MARK_BLOCKED) {
                throw new RuntimeException(resolutionMode + " not yet implemented");
            }
            StandoffResolver.markBlockedStandoffs(arrayList);
        }
        return arrayList;
    }

    public Pattern getPattern() {
        return this.pattern;
    }

    public void setNamedEntityType(NamedEntityType namedEntityType) {
        this.neType = namedEntityType;
    }
}
