Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 29
0.00% covered (danger)
0.00%
0 / 3
CRAP
0.00% covered (danger)
0.00%
0 / 1
QtiItemContentTokenizer
0.00% covered (danger)
0.00%
0 / 29
0.00% covered (danger)
0.00%
0 / 3
110
0.00% covered (danger)
0.00%
0 / 1
 getStrings
0.00% covered (danger)
0.00%
0 / 25
0.00% covered (danger)
0.00%
0 / 1
56
 getFileReferenceSerializer
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 applyFilters
0.00% covered (danger)
0.00%
0 / 3
0.00% covered (danger)
0.00%
0 / 1
6
1<?php
2
3/**
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; under version 2
7 * of the License (non-upgradable).
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 *
18 * Copyright (c) 2014-2021 (original work) Open Assessment Technologies SA (under the project TAO-PRODUCT);
19 *
20 */
21
22namespace oat\taoQtiItem\model\search;
23
24use core_kernel_classes_Resource;
25use oat\generis\model\fileReference\FileReferenceSerializer;
26use oat\generis\model\OntologyAwareTrait;
27use oat\oatbox\service\ConfigurableService;
28use oat\tao\model\search\tokenizer\ResourceTokenizer;
29use oat\taoQtiItem\model\qti\Service;
30use oat\taoQtiItem\model\search\Tokenizer\Filter\TokenFilterInterface;
31use taoItems_models_classes_ItemsService;
32
33class QtiItemContentTokenizer extends ConfigurableService implements ResourceTokenizer
34{
35    use OntologyAwareTrait;
36
37    public const SERVICE_ID = 'taoQtiItem/QtiItemContentTokenizer';
38    public const OPTION_FILTERS = 'data_filters';
39
40    /**
41     * Get tokens as string[] extracted from a QTI file
42     * XML inside qti.xml is parsed and all text is tokenized
43     *
44     * @return array
45     */
46    public function getStrings(core_kernel_classes_Resource $resource)
47    {
48        try {
49            $ontologyFiles = $resource->getPropertyValues(
50                $this->getProperty(taoItems_models_classes_ItemsService::PROPERTY_ITEM_CONTENT)
51            );
52            if (empty($ontologyFiles)) {
53                return [];
54            }
55        } catch (\core_kernel_classes_EmptyProperty $e) {
56            return [];
57        }
58
59        $file = $this->getFileReferenceSerializer()
60            ->unserializeDirectory(reset($ontologyFiles))
61            ->getFile(Service::QTI_ITEM_FILE);
62
63        if (!$file->exists()) {
64            return [];
65        }
66
67        $content = $file->read();
68        if (empty($content)) {
69            return [];
70        }
71
72        $dom = new \DOMDocument();
73        $dom->loadXML($content);
74        $xpath = new \DOMXPath($dom);
75
76        $textNodes = $xpath->query('//text()');
77        unset($xpath);
78
79        $contentStrings = [];
80        foreach ($textNodes as $textNode) {
81            if ($this->applyFilters($textNode->wholeText)) {
82                $contentStrings[] = trim($textNode->wholeText);
83            }
84        }
85
86        return $contentStrings;
87    }
88
89    protected function getFileReferenceSerializer(): FileReferenceSerializer
90    {
91        return $this->getServiceManager()->get(FileReferenceSerializer::SERVICE_ID);
92    }
93
94    protected function applyFilters(string $data): string
95    {
96        /** @var TokenFilterInterface $filter */
97        foreach ($this->getOption(self::OPTION_FILTERS, []) as $filter) {
98            $output = $filter->filter($output ?? $data);
99        }
100        return $output ?? $data;
101    }
102}