| 1 | /* |
| 2 | SPDX-FileCopyrightText: 2018 Stefan BrĂ¼ns <stefan.bruens@rwth-aachen.de> |
| 3 | |
| 4 | SPDX-License-Identifier: LGPL-2.1-or-later |
| 5 | */ |
| 6 | |
| 7 | |
| 8 | #include "xmlextractor.h" |
| 9 | #include "kfilemetadata_debug.h" |
| 10 | #include "dublincoreextractor.h" |
| 11 | |
| 12 | #include <QDomDocument> |
| 13 | #include <QFile> |
| 14 | #include <QXmlStreamReader> |
| 15 | |
| 16 | #ifdef SVG_XML_COMPRESSED_SUPPORT |
| 17 | #include <KCompressionDevice> |
| 18 | #endif |
| 19 | |
| 20 | namespace { |
| 21 | |
| 22 | //inline QString dcElementNS() { return QStringLiteral("http://purl.org/dc/elements/1.1/"); } |
| 23 | inline QString svgNS() { return QStringLiteral("http://www.w3.org/2000/svg" ); } |
| 24 | inline QString rdfNS() { return QStringLiteral("http://www.w3.org/1999/02/22-rdf-syntax-ns#" ); } |
| 25 | inline QString ccNS() { return QStringLiteral("http://creativecommons.org/ns#" ); } |
| 26 | |
| 27 | void (KFileMetaData::ExtractionResult* result, const QDomElement &node) |
| 28 | { |
| 29 | if (node.namespaceURI() != svgNS()) { |
| 30 | return; |
| 31 | } |
| 32 | |
| 33 | if ((node.localName() == QLatin1String("g" )) || |
| 34 | (node.localName() == QLatin1String("a" ))) { |
| 35 | QDomElement e = node.firstChildElement(); |
| 36 | for (; !e.isNull(); e = e.nextSiblingElement()) { |
| 37 | extractSvgText(result, node: e); |
| 38 | } |
| 39 | } else if (node.localName() == QLatin1String("text" )) { |
| 40 | qCDebug(KFILEMETADATA_LOG) << node.text(); |
| 41 | result->append(text: node.text()); |
| 42 | } |
| 43 | } |
| 44 | |
| 45 | static const QStringList supportedMimeTypes = { |
| 46 | QStringLiteral("application/xml" ), |
| 47 | QStringLiteral("image/svg+xml" ), |
| 48 | QStringLiteral("image/svg+xml-compressed" ), |
| 49 | QStringLiteral("image/svg" ), |
| 50 | }; |
| 51 | |
| 52 | } |
| 53 | |
| 54 | namespace KFileMetaData |
| 55 | { |
| 56 | |
| 57 | XmlExtractor::(QObject* parent) |
| 58 | : ExtractorPlugin(parent) |
| 59 | { |
| 60 | |
| 61 | } |
| 62 | |
| 63 | QStringList XmlExtractor::() const |
| 64 | { |
| 65 | return supportedMimeTypes; |
| 66 | } |
| 67 | |
| 68 | void XmlExtractor::(ExtractionResult* result) |
| 69 | { |
| 70 | auto flags = result->inputFlags(); |
| 71 | |
| 72 | QFile file(result->inputUrl()); |
| 73 | if (!file.open(flags: QIODevice::ReadOnly)) { |
| 74 | qCWarning(KFILEMETADATA_LOG) << "Document is not a valid file" ; |
| 75 | return; |
| 76 | } |
| 77 | |
| 78 | |
| 79 | if ((result->inputMimetype() == QLatin1String("image/svg" )) || |
| 80 | (result->inputMimetype() == QLatin1String("image/svg+xml-compressed" )) || |
| 81 | (result->inputMimetype() == QLatin1String("image/svg+xml" ))) { |
| 82 | |
| 83 | result->addType(type: Type::Image); |
| 84 | |
| 85 | QIODevice *ioDevice = &file; |
| 86 | #ifdef SVG_XML_COMPRESSED_SUPPORT |
| 87 | std::unique_ptr<KCompressionDevice> gzReader; |
| 88 | if (result->inputMimetype() == QLatin1String("image/svg+xml-compressed" )) { |
| 89 | gzReader.reset(p: new KCompressionDevice(&file, false, KCompressionDevice::CompressionType::GZip)); |
| 90 | if (!gzReader->open(mode: QIODevice::ReadOnly)) { |
| 91 | qCDebug(KFILEMETADATA_LOG) << "Failed to open" << result->inputUrl() << "-" << gzReader->errorString(); |
| 92 | return; |
| 93 | } |
| 94 | ioDevice = gzReader.get(); |
| 95 | } |
| 96 | #else |
| 97 | if (result->inputMimetype() == QLatin1String("image/svg+xml-compressed" )) { |
| 98 | return; |
| 99 | } |
| 100 | #endif |
| 101 | |
| 102 | QDomDocument doc; |
| 103 | doc.setContent(device: ioDevice, options: QDomDocument::ParseOption::UseNamespaceProcessing); |
| 104 | QDomElement svg = doc.firstChildElement(); |
| 105 | |
| 106 | if (!svg.isNull() |
| 107 | && svg.localName() == QLatin1String("svg" ) |
| 108 | && svg.namespaceURI() == svgNS()) { |
| 109 | |
| 110 | QDomElement e = svg.firstChildElement(); |
| 111 | for (; !e.isNull(); e = e.nextSiblingElement()) { |
| 112 | if (e.namespaceURI() != svgNS()) { |
| 113 | continue; |
| 114 | } |
| 115 | |
| 116 | if (e.localName() == QLatin1String("metadata" )) { |
| 117 | if (!(flags & ExtractionResult::ExtractMetaData)) { |
| 118 | continue; |
| 119 | } |
| 120 | |
| 121 | auto rdf = e.firstChildElement(tagName: QLatin1String("RDF" )); |
| 122 | if (rdf.isNull() || rdf.namespaceURI() != rdfNS()) { |
| 123 | continue; |
| 124 | } |
| 125 | |
| 126 | auto cc = rdf.firstChildElement(tagName: QLatin1String("Work" )); |
| 127 | if (cc.isNull() || cc.namespaceURI() != ccNS()) { |
| 128 | continue; |
| 129 | } |
| 130 | |
| 131 | DublinCoreExtractor::extract(result, fragment: cc); |
| 132 | |
| 133 | } else if (e.localName() == QLatin1String("defs" )) { |
| 134 | // skip |
| 135 | continue; |
| 136 | } else if (flags & ExtractionResult::ExtractPlainText) { |
| 137 | // extract |
| 138 | extractSvgText(result, node: e); |
| 139 | } |
| 140 | } |
| 141 | } |
| 142 | } else { |
| 143 | result->addType(type: Type::Text); |
| 144 | |
| 145 | if (flags & ExtractionResult::ExtractPlainText) { |
| 146 | QXmlStreamReader stream(&file); |
| 147 | while (!stream.atEnd()) { |
| 148 | QXmlStreamReader::TokenType token = stream.readNext(); |
| 149 | |
| 150 | if (token == QXmlStreamReader::Characters) { |
| 151 | QString text = stream.text().trimmed().toString(); |
| 152 | if (!text.isEmpty()) { |
| 153 | result->append(text); |
| 154 | } |
| 155 | } |
| 156 | } |
| 157 | } |
| 158 | } |
| 159 | } |
| 160 | |
| 161 | } // namespace KFileMetaData |
| 162 | |
| 163 | #include "moc_xmlextractor.cpp" |
| 164 | |