package org.cleartk.util.cr;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.regex.Pattern;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.filefilter.AndFileFilter;
import org.apache.commons.io.filefilter.IOFileFilter;
import org.apache.commons.io.filefilter.NameFileFilter;
import org.apache.commons.io.filefilter.OrFileFilter;
import org.apache.commons.io.filefilter.SuffixFileFilter;
import org.apache.commons.io.filefilter.TrueFileFilter;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.collection.CollectionReader;
import org.apache.uima.collection.CollectionReaderDescription;
import org.apache.uima.fit.component.JCasCollectionReader_ImplBase;
import org.apache.uima.fit.component.ViewCreatorAnnotator;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.SofaCapability;
import org.apache.uima.fit.factory.CollectionReaderFactory;
import org.apache.uima.jcas.JCas;
import org.apache.uima.pear.util.FileUtil;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.metadata.TypeSystemDescription;
import org.apache.uima.util.Progress;
import org.apache.uima.util.ProgressImpl;
import org.cleartk.util.ViewUriUtil;

@SofaCapability(outputSofas = {ViewUriUtil.URI})
/* loaded from: input_file:org/cleartk/util/cr/FilesCollectionReader.class */
public class FilesCollectionReader extends JCasCollectionReader_ImplBase {
    public static final String PARAM_ROOT_FILE = "rootFile";

    @ConfigurationParameter(name = PARAM_ROOT_FILE, mandatory = true, description = "takes either the name of a single file or the root directory containing all the files to be processed.")
    protected File rootFile;
    public static final String PARAM_VIEW_NAME = "viewName";

    @ConfigurationParameter(name = "viewName", mandatory = false, description = "takes the the name that should be given to the JCas view that the document texts should be set to.", defaultValue = {"_InitialView"})
    private String viewName;
    public static final String PARAM_LANGUAGE = "language";

    @ConfigurationParameter(name = "language", mandatory = false, description = "takes the language code corresponding to the language of the documents being examined.  The value of this parameter is simply passed on to JCas.setDocumentLanguage(String).")
    private String language;
    public static final String PARAM_ENCODING = "encoding";

    @ConfigurationParameter(name = "encoding", mandatory = false, description = "takes the encoding of the text files (e.g. \"UTF-8\").  See javadoc for java.nio.charset.Charset for a list of encoding names.")
    private String encoding;
    public static final String PARAM_SUFFIXES = "suffixes";

    @ConfigurationParameter(name = "suffixes", mandatory = false, description = "takes suffixes (e.g. .txt) of the files that should be read in.")
    private String[] suffixes;
    public static final String PARAM_PATTERNS = "patterns";

    @ConfigurationParameter(name = PARAM_PATTERNS, mandatory = false, description = "\ttakes regular expressions for matching the files that should be read in. Note that these will be searched for using java.util. regex.Matcher.find, so if you want to make sure the entire file name matches a pattern, you should start the string with ^ and end the string with $.")
    private String[] patterns;
    public static final String PARAM_NAME_FILES_FILE_NAMES = "nameFilesFileNames";

    @ConfigurationParameter(name = PARAM_NAME_FILES_FILE_NAMES, mandatory = false, description = "names files which contain lists of file names. For example, if the value 'mydata/mylist.txt' is provided, then the file 'mylist.txt' should contain a line delimited list of file names.  The file names in the list should not have directory information but should just be the names of the files. The directory is determined by 'rootFile' and the files that are processed result from traversing the directory structure provided and looking for files with a name found in the lists of file names. That is, no exception will be thrown if a file name in the list does not actually correspond to a file.")
    private String[] nameFilesFileNames;
    public static final String PARAM_FILE_NAMES = "fileNames";

    @ConfigurationParameter(name = PARAM_FILE_NAMES, mandatory = false, description = "provides a list of file names that should be read in. The directory of the file names is determined by 'rootFile' and the files that are processed result from traversing the directory structure provided and looking for files with a name found in the list of file names. That is, no exception will be thrown if a file name in the list does not actually correspond to a file.")
    private String[] fileNames;
    public static final String PARAM_IGNORE_SYSTEM_FILES = "ignoreSystemFiles";
    protected Iterator<File> files;
    protected File currentFile;

    @ConfigurationParameter(name = PARAM_IGNORE_SYSTEM_FILES, mandatory = false, description = "This parameter provides a flag that determines whether file iteration will traverse into directories that begin with a period '.' - to loosely correspond to 'system' files.  Setting this parameter to true will not cause file names that begin with a period to be ignored - just directories. ")
    private boolean ignoreSystemFiles = true;
    protected int completed = 0;
    protected int filesCount = 0;

    public static CollectionReaderDescription getDescription(String str) throws ResourceInitializationException {
        return CollectionReaderFactory.createReaderDescription(FilesCollectionReader.class, (TypeSystemDescription) null, new Object[]{PARAM_ROOT_FILE, str});
    }

    public static CollectionReader getCollectionReader(String str) throws ResourceInitializationException {
        return CollectionReaderFactory.createReader(getDescription(str), new Object[0]);
    }

    public static CollectionReaderDescription getDescriptionWithView(String str, String str2) throws ResourceInitializationException {
        return CollectionReaderFactory.createReaderDescription(FilesCollectionReader.class, new Object[]{PARAM_ROOT_FILE, str, "viewName", str2});
    }

    public static CollectionReader getCollectionReaderWithView(String str, String str2) throws ResourceInitializationException {
        return CollectionReaderFactory.createReader(getDescriptionWithView(str, str2), new Object[0]);
    }

    public static CollectionReaderDescription getDescriptionWithPatterns(String str, String str2, String... strArr) throws ResourceInitializationException {
        return CollectionReaderFactory.createReaderDescription(FilesCollectionReader.class, new Object[]{PARAM_ROOT_FILE, str, "viewName", str2, PARAM_PATTERNS, strArr});
    }

    public static CollectionReader getCollectionReaderWithPatterns(String str, String str2, String... strArr) throws ResourceInitializationException {
        return CollectionReaderFactory.createReader(getDescriptionWithPatterns(str, str2, strArr), new Object[0]);
    }

    public static CollectionReaderDescription getDescriptionWithSuffixes(String str, String str2, String... strArr) throws ResourceInitializationException {
        return CollectionReaderFactory.createReaderDescription(FilesCollectionReader.class, new Object[]{PARAM_ROOT_FILE, str, "viewName", str2, "suffixes", strArr});
    }

    public static CollectionReader getCollectionReaderWithSuffixes(String str, String str2, String... strArr) throws ResourceInitializationException {
        return CollectionReaderFactory.createReader(getDescriptionWithSuffixes(str, str2, strArr), new Object[0]);
    }

    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        if (!this.rootFile.exists()) {
            throw new ResourceInitializationException(new IOException(String.format("file or directory %s does not exist", this.rootFile.getPath())));
        }
        if (this.rootFile.isFile()) {
            this.files = Arrays.asList(this.rootFile).iterator();
            this.filesCount = 1;
        } else {
            this.files = createFileIterator();
            this.filesCount = countFiles(createFileIterator());
        }
    }

    protected Iterator<File> createFileIterator() throws ResourceInitializationException {
        IOFileFilter iOFileFilter = TrueFileFilter.INSTANCE;
        if (this.suffixes != null) {
            iOFileFilter = new AndFileFilter(iOFileFilter, new SuffixFileFilter(this.suffixes));
        }
        if (this.patterns != null && this.patterns.length > 0) {
            IOFileFilter regexFileFilter = new RegexFileFilter(Pattern.compile(this.patterns[0]));
            if (this.patterns.length > 1) {
                for (int i = 1; i < this.patterns.length; i++) {
                    regexFileFilter = new OrFileFilter(regexFileFilter, new RegexFileFilter(this.patterns[i]));
                }
            }
            iOFileFilter = new AndFileFilter(iOFileFilter, regexFileFilter);
        }
        if (this.nameFilesFileNames != null) {
            ArrayList arrayList = new ArrayList();
            try {
                for (String str : this.nameFilesFileNames) {
                    arrayList.addAll(Arrays.asList(FileUtil.loadListOfStrings(new File(str))));
                }
                iOFileFilter = new AndFileFilter(iOFileFilter, new NameFileFilter(arrayList));
            } catch (IOException e) {
                throw new ResourceInitializationException(e);
            }
        }
        if (this.fileNames != null) {
            iOFileFilter = new AndFileFilter(iOFileFilter, new NameFileFilter(this.fileNames));
        }
        RegexFileFilter regexFileFilter2 = TrueFileFilter.INSTANCE;
        if (this.ignoreSystemFiles) {
            regexFileFilter2 = new RegexFileFilter("^[^\\.].*$");
            iOFileFilter = new AndFileFilter(iOFileFilter, new RegexFileFilter("^[^\\.].*$"));
        }
        return FileUtils.iterateFiles(this.rootFile, iOFileFilter, regexFileFilter2);
    }

    public void getNext(JCas jCas) throws IOException, CollectionException {
        if (!hasNext()) {
            throw new RuntimeException("getNext(jCas) was called but hasNext() returns false");
        }
        try {
            JCas createViewSafely = ViewCreatorAnnotator.createViewSafely(jCas, this.viewName);
            createViewSafely.setSofaDataString(org.apache.uima.util.FileUtils.file2String(this.currentFile, this.encoding), "text/plain");
            if (this.language != null) {
                createViewSafely.setDocumentLanguage(this.language);
            }
            ViewUriUtil.setURI(jCas, this.currentFile.toURI());
            this.completed++;
            this.currentFile = null;
        } catch (AnalysisEngineProcessException e) {
            throw new CollectionException(e);
        }
    }

    protected int countFiles(Iterator<File> it) {
        int i = 0;
        while (it.hasNext()) {
            if (it.next().isFile()) {
                i++;
            }
        }
        return i;
    }

    public Progress[] getProgress() {
        return new Progress[]{new ProgressImpl(this.completed, this.filesCount, "entities")};
    }

    public boolean hasNext() throws IOException, CollectionException {
        if (this.currentFile != null) {
            return true;
        }
        while (this.files.hasNext()) {
            this.currentFile = this.files.next();
            if (this.currentFile.isFile()) {
                return true;
            }
        }
        return false;
    }

    public void close() throws IOException {
    }
}
