| 1 | /* |
| 2 | Gettext translation file analyzer |
| 3 | |
| 4 | SPDX-FileCopyrightText: 2007 Montel Laurent <montel@kde.org> |
| 5 | SPDX-FileCopyrightText: 2009 Jos van den Oever <jos@vandenoever.info> |
| 6 | SPDX-FileCopyrightText: 2014 Nick Shaforostoff <shaforostoff@gmail.com> |
| 7 | |
| 8 | SPDX-License-Identifier: LGPL-2.1-or-later |
| 9 | */ |
| 10 | |
| 11 | |
| 12 | #include "poextractor.h" |
| 13 | #include <QFile> |
| 14 | #include <fstream> |
| 15 | |
| 16 | using namespace KFileMetaData; |
| 17 | |
| 18 | POExtractor::(QObject* parent) |
| 19 | : ExtractorPlugin(parent) |
| 20 | { |
| 21 | |
| 22 | } |
| 23 | |
| 24 | const QStringList supportedMimeTypes = { |
| 25 | QStringLiteral("text/x-gettext-translation" ), |
| 26 | }; |
| 27 | |
| 28 | QStringList POExtractor::() const |
| 29 | { |
| 30 | return supportedMimeTypes; |
| 31 | } |
| 32 | |
| 33 | void POExtractor::() |
| 34 | { |
| 35 | messages++; |
| 36 | fuzzy+=isFuzzy; |
| 37 | untranslated+=(!isTranslated); |
| 38 | |
| 39 | isFuzzy = false; |
| 40 | isTranslated = false; |
| 41 | state = WHITESPACE; |
| 42 | } |
| 43 | |
| 44 | void POExtractor::handleComment(const char* data, quint32 length) |
| 45 | { |
| 46 | state = COMMENT; |
| 47 | if (length >= 8 && strncmp(s1: data, s2: "#, fuzzy" , n: 8) == 0) { // could be better |
| 48 | isFuzzy = true; |
| 49 | } |
| 50 | } |
| 51 | |
| 52 | void POExtractor::handleLine(const char* data, quint32 length) |
| 53 | { |
| 54 | if (state == ERROR) { |
| 55 | return; |
| 56 | } |
| 57 | if (state == WHITESPACE) { |
| 58 | if (length == 0) { |
| 59 | return; |
| 60 | } |
| 61 | if (data[0] != '#') { |
| 62 | state = COMMENT; //this allows PO files w/o comments |
| 63 | } else { |
| 64 | handleComment(data, length); |
| 65 | return; |
| 66 | } |
| 67 | } |
| 68 | if (state == COMMENT) { |
| 69 | if (length == 0) { |
| 70 | state = WHITESPACE; |
| 71 | } else if (data[0] == '#') { |
| 72 | handleComment(data, length); |
| 73 | } else if (length > 7 && strncmp(s1: "msgctxt" , s2: data, n: 7) == 0) { |
| 74 | state = MSGCTXT; |
| 75 | } else if (length > 7 && strncmp(s1: "msgid \"" , s2: data, n: 7) == 0) { |
| 76 | state = MSGID; |
| 77 | } else { |
| 78 | state = ERROR; |
| 79 | } |
| 80 | return; |
| 81 | } else if (length > 1 && data[0] == '"' && data[length-1] == '"' |
| 82 | && (state == MSGCTXT || state == MSGID || state == MSGSTR |
| 83 | || state == MSGID_PLURAL)) { |
| 84 | // continued text field |
| 85 | isTranslated = state == MSGSTR && length > 2; |
| 86 | } else if (state == MSGCTXT |
| 87 | && length > 7 && strncmp(s1: "msgid \"" , s2: data, n: 7) == 0) { |
| 88 | state = MSGID; |
| 89 | } else if (state == MSGID |
| 90 | && length > 14 && strncmp(s1: "msgid_plural \"" , s2: data, n: 14) == 0) { |
| 91 | state = MSGID_PLURAL; |
| 92 | } else if ((state == MSGID || state == MSGID_PLURAL || state == MSGSTR) |
| 93 | && length > 8 && strncmp(s1: "msgstr" , s2: data, n: 6) == 0) { |
| 94 | state = MSGSTR; |
| 95 | isTranslated = strncmp(s1: data+length-3, s2: " \"\"" , n: 3) != 0; |
| 96 | } else if (state == MSGSTR) { |
| 97 | if (length == 0) { |
| 98 | endMessage(); |
| 99 | } else if (data[0]=='#' || data[0]=='m') { //allow PO without empty line between entries |
| 100 | endMessage(); |
| 101 | state = COMMENT; |
| 102 | handleLine(data, length); |
| 103 | } else { |
| 104 | state = ERROR; |
| 105 | } |
| 106 | } else { |
| 107 | state = ERROR; |
| 108 | } |
| 109 | #if 0 |
| 110 | if (messages > 1 || state != MSGSTR) return; |
| 111 | |
| 112 | // handle special values in the first message |
| 113 | // assumption is that value takes up only one line |
| 114 | if (strncmp("\"POT-Creation-Date: " , data, 20) == 0) { |
| 115 | result->add(Property::TranslationTemplateDate, QByteArray(data + 20, length - 21)); |
| 116 | } else if (strncmp("\"PO-Revision-Date: " , data, 19) == 0) { |
| 117 | result->add(Property::TranslationLastUpDate, QByteArray(data + 19, length - 20)); |
| 118 | } else if (strncmp("\"Last-Translator: " , data, 18) == 0) { |
| 119 | result->add(Property::TranslationLastAuthor, QByteArray(data + 18, length - 19)); |
| 120 | } |
| 121 | #endif |
| 122 | } |
| 123 | |
| 124 | void POExtractor::(ExtractionResult* result) |
| 125 | { |
| 126 | std::ifstream fstream(QFile::encodeName(fileName: result->inputUrl()).constData()); |
| 127 | if (!fstream.is_open()) { |
| 128 | return; |
| 129 | } |
| 130 | |
| 131 | result->addType(type: Type::Text); |
| 132 | if (!(result->inputFlags() & ExtractionResult::ExtractPlainText)) { |
| 133 | return; |
| 134 | } |
| 135 | |
| 136 | state = WHITESPACE; |
| 137 | messages = 0; |
| 138 | untranslated = 0; |
| 139 | fuzzy = 0; |
| 140 | isFuzzy = false; |
| 141 | isTranslated = false; |
| 142 | |
| 143 | std::string line; |
| 144 | int lines = 0; |
| 145 | while (std::getline(is&: fstream, str&: line)) { |
| 146 | //TODO add a parsed text of translation units |
| 147 | //QByteArray arr = QByteArray::fromRawData(line.c_str(), line.size()); |
| 148 | //result->append(QString::fromUtf8(arr)); |
| 149 | |
| 150 | handleLine(data: line.c_str(), length: line.size()); |
| 151 | lines++; |
| 152 | |
| 153 | |
| 154 | if (messages <= 1 && state == MSGSTR) |
| 155 | { |
| 156 | // handle special values in the first message |
| 157 | // assumption is that value takes up only one line |
| 158 | if (strncmp(s1: "\"POT-Creation-Date: " , s2: line.c_str(), n: 20) == 0) { |
| 159 | result->add(property: Property::TranslationTemplateDate, value: QByteArray(line.c_str() + 20, line.size() - 21)); |
| 160 | } else if (strncmp(s1: "\"PO-Revision-Date: " , s2: line.c_str(), n: 19) == 0) { |
| 161 | result->add(property: Property::TranslationLastUpDate, value: QByteArray(line.c_str() + 19, line.size() - 20)); |
| 162 | } else if (strncmp(s1: "\"Last-Translator: " , s2: line.c_str(), n: 18) == 0) { |
| 163 | result->add(property: Property::TranslationLastAuthor, value: QString::fromUtf8(ba: QByteArray::fromRawData(data: line.c_str() + 18, size: line.size() - 19))); |
| 164 | } |
| 165 | } |
| 166 | } |
| 167 | handleLine(data: "" , length: 0); //for files with non-empty last line |
| 168 | messages--;//cause header does not count |
| 169 | |
| 170 | result->add(property: Property::TranslationUnitsTotal, value: messages); |
| 171 | result->add(property: Property::TranslationUnitsWithTranslation, value: messages-untranslated); |
| 172 | result->add(property: Property::TranslationUnitsWithDraftTranslation, value: fuzzy); |
| 173 | result->add(property: Property::LineCount, value: lines); |
| 174 | //TODO WordCount |
| 175 | } |
| 176 | |
| 177 | #include "moc_poextractor.cpp" |
| 178 | |