| 1 | /* |
| 2 | SPDX-FileCopyrightText: 2022 Kai Uwe Broulik <kde@broulik.de> |
| 3 | |
| 4 | SPDX-License-Identifier: LGPL-2.1-or-later |
| 5 | */ |
| 6 | |
| 7 | #include "datetimeparser_p.h" |
| 8 | #include "fb2extractor.h" |
| 9 | #include "kfilemetadata_debug.h" |
| 10 | |
| 11 | #include <QDateTime> |
| 12 | #include <QFile> |
| 13 | #include <QXmlStreamReader> |
| 14 | |
| 15 | #include <KZip> |
| 16 | |
| 17 | #include <memory> |
| 18 | |
| 19 | using namespace KFileMetaData; |
| 20 | |
| 21 | Fb2Extractor::(QObject *parent) |
| 22 | : ExtractorPlugin(parent) |
| 23 | { |
| 24 | } |
| 25 | |
| 26 | namespace |
| 27 | { |
| 28 | static const QString regularMimeType() |
| 29 | { |
| 30 | return QStringLiteral("application/x-fictionbook+xml" ); |
| 31 | } |
| 32 | |
| 33 | static const QString compressedMimeType() |
| 34 | { |
| 35 | return QStringLiteral("application/x-zip-compressed-fb2" ); |
| 36 | } |
| 37 | |
| 38 | static const QStringList supportedMimeTypes = {regularMimeType(), compressedMimeType()}; |
| 39 | |
| 40 | } |
| 41 | |
| 42 | QStringList Fb2Extractor::() const |
| 43 | { |
| 44 | return supportedMimeTypes; |
| 45 | } |
| 46 | |
| 47 | void Fb2Extractor::(ExtractionResult *result) |
| 48 | { |
| 49 | std::unique_ptr<QIODevice> device; |
| 50 | std::unique_ptr<KZip> zip; |
| 51 | |
| 52 | if (result->inputMimetype() == regularMimeType()) { |
| 53 | device.reset(p: new QFile(result->inputUrl())); |
| 54 | if (!device->open(mode: QIODevice::ReadOnly | QIODevice::Text)) { |
| 55 | return; |
| 56 | } |
| 57 | |
| 58 | } else if (result->inputMimetype() == compressedMimeType()) { |
| 59 | zip.reset(p: new KZip(result->inputUrl())); |
| 60 | if (!zip->open(mode: QIODevice::ReadOnly)) { |
| 61 | qCDebug(KFILEMETADATA_LOG) << "Failed to open" << zip->fileName() << "-" << zip->errorString(); |
| 62 | return; |
| 63 | } |
| 64 | |
| 65 | const auto entries = zip->directory()->entries(); |
| 66 | if (entries.count() != 1) { |
| 67 | return; |
| 68 | } |
| 69 | |
| 70 | const QString entryPath = entries.first(); |
| 71 | if (!entryPath.endsWith(s: QLatin1String(".fb2" ))) { |
| 72 | return; |
| 73 | } |
| 74 | |
| 75 | const auto *entry = zip->directory()->file(name: entryPath); |
| 76 | if (!entry) { |
| 77 | return; |
| 78 | } |
| 79 | |
| 80 | device.reset(p: entry->createDevice()); |
| 81 | } |
| 82 | |
| 83 | result->addType(type: Type::Document); |
| 84 | |
| 85 | QXmlStreamReader xml(device.get()); |
| 86 | |
| 87 | bool inFictionBook = false; |
| 88 | bool inDescription = false; |
| 89 | bool inTitleInfo = false; |
| 90 | bool inAuthor = false; |
| 91 | bool inDocumentInfo = false; |
| 92 | bool inPublishInfo = false; |
| 93 | bool inBody = false; |
| 94 | |
| 95 | QString authorFirstName; |
| 96 | QString authorMiddleName; |
| 97 | QString authorLastName; |
| 98 | QString authorNickName; |
| 99 | |
| 100 | while (!xml.atEnd() && !xml.hasError()) { |
| 101 | xml.readNext(); |
| 102 | |
| 103 | if (xml.name() == QLatin1String("FictionBook" )) { |
| 104 | if (xml.isStartElement()) { |
| 105 | inFictionBook = true; |
| 106 | } else if (xml.isEndElement()) { |
| 107 | break; |
| 108 | } |
| 109 | } else if (xml.name() == QLatin1String("description" )) { |
| 110 | if (xml.isStartElement()) { |
| 111 | inDescription = true; |
| 112 | } else if (xml.isEndElement()) { |
| 113 | inDescription = false; |
| 114 | } |
| 115 | } else if (xml.name() == QLatin1String("title-info" )) { |
| 116 | if (xml.isStartElement()) { |
| 117 | inTitleInfo = true; |
| 118 | } else if (xml.isEndElement()) { |
| 119 | inTitleInfo = false; |
| 120 | } |
| 121 | } else if (xml.name() == QLatin1String("document-info" )) { |
| 122 | if (xml.isStartElement()) { |
| 123 | inDocumentInfo = true; |
| 124 | } else if (xml.isEndElement()) { |
| 125 | inDocumentInfo = false; |
| 126 | } |
| 127 | } else if (xml.name() == QLatin1String("publish-info" )) { |
| 128 | if (xml.isStartElement()) { |
| 129 | inPublishInfo = true; |
| 130 | } else if (xml.isEndElement()) { |
| 131 | inPublishInfo = false; |
| 132 | } |
| 133 | } else if (xml.name() == QLatin1String("body" )) { |
| 134 | if (xml.isStartElement()) { |
| 135 | inBody = true; |
| 136 | } else if (xml.isEndElement()) { |
| 137 | inBody = false; |
| 138 | } |
| 139 | } |
| 140 | |
| 141 | if (!inFictionBook) { |
| 142 | continue; |
| 143 | } |
| 144 | |
| 145 | if (inDescription && result->inputFlags() & ExtractionResult::ExtractMetaData) { |
| 146 | if (inTitleInfo) { |
| 147 | if (xml.isStartElement()) { |
| 148 | if (xml.name() == QLatin1String("author" )) { |
| 149 | inAuthor = true; |
| 150 | } else if (inAuthor) { |
| 151 | if (xml.name() == QLatin1String("first-name" )) { |
| 152 | authorFirstName = xml.readElementText(); |
| 153 | } else if (xml.name() == QLatin1String("middle-name" )) { |
| 154 | authorMiddleName = xml.readElementText(); |
| 155 | } else if (xml.name() == QLatin1String("last-name" )) { |
| 156 | authorLastName = xml.readElementText(); |
| 157 | } else if (xml.name() == QLatin1String("nickname" )) { |
| 158 | authorNickName = xml.readElementText(); |
| 159 | } |
| 160 | } else if (xml.name() == QLatin1String("book-title" )) { |
| 161 | result->add(property: Property::Title, value: xml.readElementText()); |
| 162 | } else if (xml.name() == QLatin1String("annotation" )) { |
| 163 | result->add(property: Property::Description, value: xml.readElementText(behaviour: QXmlStreamReader::IncludeChildElements).trimmed()); |
| 164 | } else if (xml.name() == QLatin1String("lang" )) { |
| 165 | result->add(property: Property::Language, value: xml.readElementText()); |
| 166 | } else if (xml.name() == QLatin1String("genre" )) { |
| 167 | result->add(property: Property::Genre, value: xml.readElementText()); |
| 168 | } |
| 169 | } else if (xml.isEndElement()) { |
| 170 | inAuthor = false; |
| 171 | |
| 172 | QStringList nameParts = {authorFirstName, authorMiddleName, authorLastName}; |
| 173 | nameParts.removeAll(t: QString()); |
| 174 | |
| 175 | if (!nameParts.isEmpty()) { |
| 176 | result->add(property: Property::Author, value: nameParts.join(sep: QLatin1Char(' '))); |
| 177 | } else if (!authorNickName.isEmpty()) { |
| 178 | result->add(property: Property::Author, value: authorNickName); |
| 179 | } |
| 180 | |
| 181 | authorFirstName.clear(); |
| 182 | authorMiddleName.clear(); |
| 183 | authorLastName.clear(); |
| 184 | authorNickName.clear(); |
| 185 | } |
| 186 | } else if (inDocumentInfo) { |
| 187 | if (xml.name() == QLatin1String("date" )) { |
| 188 | // Date can be "not exact" but date "value", if present, is an xs:date |
| 189 | const auto dateValue = xml.attributes().value(qualifiedName: QLatin1String("value" )); |
| 190 | QDateTime dt = QDateTime::fromString(string: dateValue.toString()); |
| 191 | |
| 192 | if (!dt.isValid()) { |
| 193 | dt = Parser::dateTimeFromString(dateString: xml.readElementText()); |
| 194 | } |
| 195 | |
| 196 | if (dt.isValid()) { |
| 197 | result->add(property: Property::CreationDate, value: dt); |
| 198 | } |
| 199 | } else if (xml.name() == QLatin1String("program-used" )) { |
| 200 | result->add(property: Property::Generator, value: xml.readElementText()); |
| 201 | // "Owner of the fb2 document copyrights" |
| 202 | } else if (xml.name() == QLatin1String("publisher" )) { |
| 203 | result->add(property: Property::Copyright, value: xml.readElementText()); |
| 204 | } |
| 205 | } else if (inPublishInfo) { |
| 206 | if (xml.name() == QLatin1String("publisher" )) { |
| 207 | result->add(property: Property::Publisher, value: xml.readElementText()); |
| 208 | } else if (xml.name() == QLatin1String("year" )) { |
| 209 | bool ok; |
| 210 | const int releaseYear = xml.readElementText().toInt(ok: &ok); |
| 211 | if (ok) { |
| 212 | result->add(property: Property::ReleaseYear, value: releaseYear); |
| 213 | } |
| 214 | } |
| 215 | } |
| 216 | } else if (inBody && result->inputFlags() & ExtractionResult::ExtractPlainText && xml.isCharacters() && !xml.isWhitespace()) { |
| 217 | result->append(text: xml.text().toString()); |
| 218 | } |
| 219 | } |
| 220 | } |
| 221 | |
| 222 | #include "moc_fb2extractor.cpp" |
| 223 | |