1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/netwerk/streamconv/converters/nsDirIndexParser.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,434 @@ 1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 + 1.9 +/* This parsing code originally lived in xpfe/components/directory/ - bbaetz */ 1.10 + 1.11 +#include "mozilla/ArrayUtils.h" 1.12 + 1.13 +#include "prprf.h" 1.14 + 1.15 +#include "nsDirIndexParser.h" 1.16 +#include "nsEscape.h" 1.17 +#include "nsIInputStream.h" 1.18 +#include "nsCRT.h" 1.19 +#include "mozilla/dom/FallbackEncoding.h" 1.20 +#include "nsITextToSubURI.h" 1.21 +#include "nsIDirIndex.h" 1.22 +#include "nsServiceManagerUtils.h" 1.23 + 1.24 +using namespace mozilla; 1.25 + 1.26 +NS_IMPL_ISUPPORTS(nsDirIndexParser, 1.27 + nsIRequestObserver, 1.28 + nsIStreamListener, 1.29 + nsIDirIndexParser) 1.30 + 1.31 +nsDirIndexParser::nsDirIndexParser() { 1.32 +} 1.33 + 1.34 +nsresult 1.35 +nsDirIndexParser::Init() { 1.36 + mLineStart = 0; 1.37 + mHasDescription = false; 1.38 + mFormat = nullptr; 1.39 + mozilla::dom::FallbackEncoding::FromLocale(mEncoding); 1.40 + 1.41 + nsresult rv; 1.42 + // XXX not threadsafe 1.43 + if (gRefCntParser++ == 0) 1.44 + rv = CallGetService(NS_ITEXTTOSUBURI_CONTRACTID, &gTextToSubURI); 1.45 + else 1.46 + rv = NS_OK; 1.47 + 1.48 + return rv; 1.49 +} 1.50 + 1.51 +nsDirIndexParser::~nsDirIndexParser() { 1.52 + delete[] mFormat; 1.53 + // XXX not threadsafe 1.54 + if (--gRefCntParser == 0) { 1.55 + NS_IF_RELEASE(gTextToSubURI); 1.56 + } 1.57 +} 1.58 + 1.59 +NS_IMETHODIMP 1.60 +nsDirIndexParser::SetListener(nsIDirIndexListener* aListener) { 1.61 + mListener = aListener; 1.62 + return NS_OK; 1.63 +} 1.64 + 1.65 +NS_IMETHODIMP 1.66 +nsDirIndexParser::GetListener(nsIDirIndexListener** aListener) { 1.67 + NS_IF_ADDREF(*aListener = mListener.get()); 1.68 + return NS_OK; 1.69 +} 1.70 + 1.71 +NS_IMETHODIMP 1.72 +nsDirIndexParser::GetComment(char** aComment) { 1.73 + *aComment = ToNewCString(mComment); 1.74 + 1.75 + if (!*aComment) 1.76 + return NS_ERROR_OUT_OF_MEMORY; 1.77 + 1.78 + return NS_OK; 1.79 +} 1.80 + 1.81 +NS_IMETHODIMP 1.82 +nsDirIndexParser::SetEncoding(const char* aEncoding) { 1.83 + mEncoding.Assign(aEncoding); 1.84 + return NS_OK; 1.85 +} 1.86 + 1.87 +NS_IMETHODIMP 1.88 +nsDirIndexParser::GetEncoding(char** aEncoding) { 1.89 + *aEncoding = ToNewCString(mEncoding); 1.90 + 1.91 + if (!*aEncoding) 1.92 + return NS_ERROR_OUT_OF_MEMORY; 1.93 + 1.94 + return NS_OK; 1.95 +} 1.96 + 1.97 +NS_IMETHODIMP 1.98 +nsDirIndexParser::OnStartRequest(nsIRequest* aRequest, nsISupports* aCtxt) { 1.99 + return NS_OK; 1.100 +} 1.101 + 1.102 +NS_IMETHODIMP 1.103 +nsDirIndexParser::OnStopRequest(nsIRequest *aRequest, nsISupports *aCtxt, 1.104 + nsresult aStatusCode) { 1.105 + // Finish up 1.106 + if (mBuf.Length() > (uint32_t) mLineStart) { 1.107 + ProcessData(aRequest, aCtxt); 1.108 + } 1.109 + 1.110 + return NS_OK; 1.111 +} 1.112 + 1.113 +nsDirIndexParser::Field 1.114 +nsDirIndexParser::gFieldTable[] = { 1.115 + { "Filename", FIELD_FILENAME }, 1.116 + { "Description", FIELD_DESCRIPTION }, 1.117 + { "Content-Length", FIELD_CONTENTLENGTH }, 1.118 + { "Last-Modified", FIELD_LASTMODIFIED }, 1.119 + { "Content-Type", FIELD_CONTENTTYPE }, 1.120 + { "File-Type", FIELD_FILETYPE }, 1.121 + { nullptr, FIELD_UNKNOWN } 1.122 +}; 1.123 + 1.124 +nsrefcnt nsDirIndexParser::gRefCntParser = 0; 1.125 +nsITextToSubURI *nsDirIndexParser::gTextToSubURI; 1.126 + 1.127 +nsresult 1.128 +nsDirIndexParser::ParseFormat(const char* aFormatStr) { 1.129 + // Parse a "200" format line, and remember the fields and their 1.130 + // ordering in mFormat. Multiple 200 lines stomp on each other. 1.131 + 1.132 + // Lets find out how many elements we have. 1.133 + // easier to do this then realloc 1.134 + const char* pos = aFormatStr; 1.135 + unsigned int num = 0; 1.136 + do { 1.137 + while (*pos && nsCRT::IsAsciiSpace(char16_t(*pos))) 1.138 + ++pos; 1.139 + 1.140 + ++num; 1.141 + // There are a maximum of six allowed header fields (doubled plus 1.142 + // terminator, just in case) -- Bug 443299 1.143 + if (num > (2 * ArrayLength(gFieldTable))) 1.144 + return NS_ERROR_UNEXPECTED; 1.145 + 1.146 + if (! *pos) 1.147 + break; 1.148 + 1.149 + while (*pos && !nsCRT::IsAsciiSpace(char16_t(*pos))) 1.150 + ++pos; 1.151 + 1.152 + } while (*pos); 1.153 + 1.154 + delete[] mFormat; 1.155 + mFormat = new int[num+1]; 1.156 + // Prevent nullptr Deref - Bug 443299 1.157 + if (mFormat == nullptr) 1.158 + return NS_ERROR_OUT_OF_MEMORY; 1.159 + mFormat[num] = -1; 1.160 + 1.161 + int formatNum=0; 1.162 + do { 1.163 + while (*aFormatStr && nsCRT::IsAsciiSpace(char16_t(*aFormatStr))) 1.164 + ++aFormatStr; 1.165 + 1.166 + if (! *aFormatStr) 1.167 + break; 1.168 + 1.169 + nsAutoCString name; 1.170 + int32_t len = 0; 1.171 + while (aFormatStr[len] && !nsCRT::IsAsciiSpace(char16_t(aFormatStr[len]))) 1.172 + ++len; 1.173 + name.SetCapacity(len + 1); 1.174 + name.Append(aFormatStr, len); 1.175 + aFormatStr += len; 1.176 + 1.177 + // Okay, we're gonna monkey with the nsStr. Bold! 1.178 + name.SetLength(nsUnescapeCount(name.BeginWriting())); 1.179 + 1.180 + // All tokens are case-insensitive - http://www.mozilla.org/projects/netlib/dirindexformat.html 1.181 + if (name.LowerCaseEqualsLiteral("description")) 1.182 + mHasDescription = true; 1.183 + 1.184 + for (Field* i = gFieldTable; i->mName; ++i) { 1.185 + if (name.EqualsIgnoreCase(i->mName)) { 1.186 + mFormat[formatNum] = i->mType; 1.187 + ++formatNum; 1.188 + break; 1.189 + } 1.190 + } 1.191 + 1.192 + } while (*aFormatStr); 1.193 + 1.194 + return NS_OK; 1.195 +} 1.196 + 1.197 +nsresult 1.198 +nsDirIndexParser::ParseData(nsIDirIndex *aIdx, char* aDataStr) { 1.199 + // Parse a "201" data line, using the field ordering specified in 1.200 + // mFormat. 1.201 + 1.202 + if (!mFormat) { 1.203 + // Ignore if we haven't seen a format yet. 1.204 + return NS_OK; 1.205 + } 1.206 + 1.207 + nsresult rv = NS_OK; 1.208 + 1.209 + nsAutoCString filename; 1.210 + 1.211 + for (int32_t i = 0; mFormat[i] != -1; ++i) { 1.212 + // If we've exhausted the data before we run out of fields, just 1.213 + // bail. 1.214 + if (! *aDataStr) 1.215 + break; 1.216 + 1.217 + while (*aDataStr && nsCRT::IsAsciiSpace(*aDataStr)) 1.218 + ++aDataStr; 1.219 + 1.220 + char *value = aDataStr; 1.221 + 1.222 + if (*aDataStr == '"' || *aDataStr == '\'') { 1.223 + // it's a quoted string. snarf everything up to the next quote character 1.224 + const char quotechar = *(aDataStr++); 1.225 + ++value; 1.226 + while (*aDataStr && *aDataStr != quotechar) 1.227 + ++aDataStr; 1.228 + *aDataStr++ = '\0'; 1.229 + 1.230 + if (! aDataStr) { 1.231 + NS_WARNING("quoted value not terminated"); 1.232 + } 1.233 + } else { 1.234 + // it's unquoted. snarf until we see whitespace. 1.235 + value = aDataStr; 1.236 + while (*aDataStr && (!nsCRT::IsAsciiSpace(*aDataStr))) 1.237 + ++aDataStr; 1.238 + *aDataStr++ = '\0'; 1.239 + } 1.240 + 1.241 + fieldType t = fieldType(mFormat[i]); 1.242 + switch (t) { 1.243 + case FIELD_FILENAME: { 1.244 + // don't unescape at this point, so that UnEscapeAndConvert() can 1.245 + filename = value; 1.246 + 1.247 + bool success = false; 1.248 + 1.249 + nsAutoString entryuri; 1.250 + 1.251 + if (gTextToSubURI) { 1.252 + char16_t *result = nullptr; 1.253 + if (NS_SUCCEEDED(rv = gTextToSubURI->UnEscapeAndConvert(mEncoding.get(), filename.get(), 1.254 + &result)) && (result)) { 1.255 + if (*result) { 1.256 + aIdx->SetLocation(filename.get()); 1.257 + if (!mHasDescription) 1.258 + aIdx->SetDescription(result); 1.259 + success = true; 1.260 + } 1.261 + NS_Free(result); 1.262 + } else { 1.263 + NS_WARNING("UnEscapeAndConvert error"); 1.264 + } 1.265 + } 1.266 + 1.267 + if (!success) { 1.268 + // if unsuccessfully at charset conversion, then 1.269 + // just fallback to unescape'ing in-place 1.270 + // XXX - this shouldn't be using UTF8, should it? 1.271 + // when can we fail to get the service, anyway? - bbaetz 1.272 + aIdx->SetLocation(filename.get()); 1.273 + if (!mHasDescription) { 1.274 + aIdx->SetDescription(NS_ConvertUTF8toUTF16(value).get()); 1.275 + } 1.276 + } 1.277 + } 1.278 + break; 1.279 + case FIELD_DESCRIPTION: 1.280 + nsUnescape(value); 1.281 + aIdx->SetDescription(NS_ConvertUTF8toUTF16(value).get()); 1.282 + break; 1.283 + case FIELD_CONTENTLENGTH: 1.284 + { 1.285 + int64_t len; 1.286 + int32_t status = PR_sscanf(value, "%lld", &len); 1.287 + if (status == 1) 1.288 + aIdx->SetSize(len); 1.289 + else 1.290 + aIdx->SetSize(UINT64_MAX); // UINT64_MAX means unknown 1.291 + } 1.292 + break; 1.293 + case FIELD_LASTMODIFIED: 1.294 + { 1.295 + PRTime tm; 1.296 + nsUnescape(value); 1.297 + if (PR_ParseTimeString(value, false, &tm) == PR_SUCCESS) { 1.298 + aIdx->SetLastModified(tm); 1.299 + } 1.300 + } 1.301 + break; 1.302 + case FIELD_CONTENTTYPE: 1.303 + aIdx->SetContentType(value); 1.304 + break; 1.305 + case FIELD_FILETYPE: 1.306 + // unescape in-place 1.307 + nsUnescape(value); 1.308 + if (!nsCRT::strcasecmp(value, "directory")) { 1.309 + aIdx->SetType(nsIDirIndex::TYPE_DIRECTORY); 1.310 + } else if (!nsCRT::strcasecmp(value, "file")) { 1.311 + aIdx->SetType(nsIDirIndex::TYPE_FILE); 1.312 + } else if (!nsCRT::strcasecmp(value, "symbolic-link")) { 1.313 + aIdx->SetType(nsIDirIndex::TYPE_SYMLINK); 1.314 + } else { 1.315 + aIdx->SetType(nsIDirIndex::TYPE_UNKNOWN); 1.316 + } 1.317 + break; 1.318 + case FIELD_UNKNOWN: 1.319 + // ignore 1.320 + break; 1.321 + } 1.322 + } 1.323 + 1.324 + return NS_OK; 1.325 +} 1.326 + 1.327 +NS_IMETHODIMP 1.328 +nsDirIndexParser::OnDataAvailable(nsIRequest *aRequest, nsISupports *aCtxt, 1.329 + nsIInputStream *aStream, 1.330 + uint64_t aSourceOffset, 1.331 + uint32_t aCount) { 1.332 + if (aCount < 1) 1.333 + return NS_OK; 1.334 + 1.335 + int32_t len = mBuf.Length(); 1.336 + 1.337 + // Ensure that our mBuf has capacity to hold the data we're about to 1.338 + // read. 1.339 + if (!mBuf.SetLength(len + aCount, fallible_t())) 1.340 + return NS_ERROR_OUT_OF_MEMORY; 1.341 + 1.342 + // Now read the data into our buffer. 1.343 + nsresult rv; 1.344 + uint32_t count; 1.345 + rv = aStream->Read(mBuf.BeginWriting() + len, aCount, &count); 1.346 + if (NS_FAILED(rv)) return rv; 1.347 + 1.348 + // Set the string's length according to the amount of data we've read. 1.349 + // Note: we know this to work on nsCString. This isn't guaranteed to 1.350 + // work on other strings. 1.351 + mBuf.SetLength(len + count); 1.352 + 1.353 + return ProcessData(aRequest, aCtxt); 1.354 +} 1.355 + 1.356 +nsresult 1.357 +nsDirIndexParser::ProcessData(nsIRequest *aRequest, nsISupports *aCtxt) { 1.358 + if (!mListener) 1.359 + return NS_ERROR_FAILURE; 1.360 + 1.361 + int32_t numItems = 0; 1.362 + 1.363 + while(true) { 1.364 + ++numItems; 1.365 + 1.366 + int32_t eol = mBuf.FindCharInSet("\n\r", mLineStart); 1.367 + if (eol < 0) break; 1.368 + mBuf.SetCharAt(char16_t('\0'), eol); 1.369 + 1.370 + const char *line = mBuf.get() + mLineStart; 1.371 + 1.372 + int32_t lineLen = eol - mLineStart; 1.373 + mLineStart = eol + 1; 1.374 + 1.375 + if (lineLen >= 4) { 1.376 + nsresult rv; 1.377 + const char *buf = line; 1.378 + 1.379 + if (buf[0] == '1') { 1.380 + if (buf[1] == '0') { 1.381 + if (buf[2] == '0' && buf[3] == ':') { 1.382 + // 100. Human-readable comment line. Ignore 1.383 + } else if (buf[2] == '1' && buf[3] == ':') { 1.384 + // 101. Human-readable information line. 1.385 + mComment.Append(buf + 4); 1.386 + 1.387 + char *value = ((char *)buf) + 4; 1.388 + nsUnescape(value); 1.389 + mListener->OnInformationAvailable(aRequest, aCtxt, NS_ConvertUTF8toUTF16(value)); 1.390 + 1.391 + } else if (buf[2] == '2' && buf[3] == ':') { 1.392 + // 102. Human-readable information line, HTML. 1.393 + mComment.Append(buf + 4); 1.394 + } 1.395 + } 1.396 + } else if (buf[0] == '2') { 1.397 + if (buf[1] == '0') { 1.398 + if (buf[2] == '0' && buf[3] == ':') { 1.399 + // 200. Define field names 1.400 + rv = ParseFormat(buf + 4); 1.401 + if (NS_FAILED(rv)) { 1.402 + return rv; 1.403 + } 1.404 + } else if (buf[2] == '1' && buf[3] == ':') { 1.405 + // 201. Field data 1.406 + nsCOMPtr<nsIDirIndex> idx = do_CreateInstance("@mozilla.org/dirIndex;1",&rv); 1.407 + if (NS_FAILED(rv)) 1.408 + return rv; 1.409 + 1.410 + rv = ParseData(idx, ((char *)buf) + 4); 1.411 + if (NS_FAILED(rv)) { 1.412 + return rv; 1.413 + } 1.414 + 1.415 + mListener->OnIndexAvailable(aRequest, aCtxt, idx); 1.416 + } 1.417 + } 1.418 + } else if (buf[0] == '3') { 1.419 + if (buf[1] == '0') { 1.420 + if (buf[2] == '0' && buf[3] == ':') { 1.421 + // 300. Self-referring URL 1.422 + } else if (buf[2] == '1' && buf[3] == ':') { 1.423 + // 301. OUR EXTENSION - encoding 1.424 + int i = 4; 1.425 + while (buf[i] && nsCRT::IsAsciiSpace(buf[i])) 1.426 + ++i; 1.427 + 1.428 + if (buf[i]) 1.429 + SetEncoding(buf+i); 1.430 + } 1.431 + } 1.432 + } 1.433 + } 1.434 + } 1.435 + 1.436 + return NS_OK; 1.437 +}