michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0:  * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0:  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0: 
michael@0: /* This parsing code originally lived in xpfe/components/directory/ - bbaetz */
michael@0: 
michael@0: #include "mozilla/ArrayUtils.h"
michael@0: 
michael@0: #include "prprf.h"
michael@0: 
michael@0: #include "nsDirIndexParser.h"
michael@0: #include "nsEscape.h"
michael@0: #include "nsIInputStream.h"
michael@0: #include "nsCRT.h"
michael@0: #include "mozilla/dom/FallbackEncoding.h"
michael@0: #include "nsITextToSubURI.h"
michael@0: #include "nsIDirIndex.h"
michael@0: #include "nsServiceManagerUtils.h"
michael@0: 
michael@0: using namespace mozilla;
michael@0: 
michael@0: NS_IMPL_ISUPPORTS(nsDirIndexParser,
michael@0:                   nsIRequestObserver,
michael@0:                   nsIStreamListener,
michael@0:                   nsIDirIndexParser)
michael@0: 
michael@0: nsDirIndexParser::nsDirIndexParser() {
michael@0: }
michael@0: 
michael@0: nsresult
michael@0: nsDirIndexParser::Init() {
michael@0:   mLineStart = 0;
michael@0:   mHasDescription = false;
michael@0:   mFormat = nullptr;
michael@0:   mozilla::dom::FallbackEncoding::FromLocale(mEncoding);
michael@0:  
michael@0:   nsresult rv;
michael@0:   // XXX not threadsafe
michael@0:   if (gRefCntParser++ == 0)
michael@0:     rv = CallGetService(NS_ITEXTTOSUBURI_CONTRACTID, &gTextToSubURI);
michael@0:   else
michael@0:     rv = NS_OK;
michael@0: 
michael@0:   return rv;
michael@0: }
michael@0: 
michael@0: nsDirIndexParser::~nsDirIndexParser() {
michael@0:   delete[] mFormat;
michael@0:   // XXX not threadsafe
michael@0:   if (--gRefCntParser == 0) {
michael@0:     NS_IF_RELEASE(gTextToSubURI);
michael@0:   }
michael@0: }
michael@0: 
michael@0: NS_IMETHODIMP
michael@0: nsDirIndexParser::SetListener(nsIDirIndexListener* aListener) {
michael@0:   mListener = aListener;
michael@0:   return NS_OK;
michael@0: }
michael@0: 
michael@0: NS_IMETHODIMP
michael@0: nsDirIndexParser::GetListener(nsIDirIndexListener** aListener) {
michael@0:   NS_IF_ADDREF(*aListener = mListener.get());
michael@0:   return NS_OK;
michael@0: }
michael@0: 
michael@0: NS_IMETHODIMP
michael@0: nsDirIndexParser::GetComment(char** aComment) {
michael@0:   *aComment = ToNewCString(mComment);
michael@0: 
michael@0:   if (!*aComment)
michael@0:     return NS_ERROR_OUT_OF_MEMORY;
michael@0:   
michael@0:   return NS_OK;
michael@0: }
michael@0: 
michael@0: NS_IMETHODIMP
michael@0: nsDirIndexParser::SetEncoding(const char* aEncoding) {
michael@0:   mEncoding.Assign(aEncoding);
michael@0:   return NS_OK;
michael@0: }
michael@0: 
michael@0: NS_IMETHODIMP
michael@0: nsDirIndexParser::GetEncoding(char** aEncoding) {
michael@0:   *aEncoding = ToNewCString(mEncoding);
michael@0: 
michael@0:   if (!*aEncoding)
michael@0:     return NS_ERROR_OUT_OF_MEMORY;
michael@0: 
michael@0:   return NS_OK;
michael@0: }
michael@0: 
michael@0: NS_IMETHODIMP
michael@0: nsDirIndexParser::OnStartRequest(nsIRequest* aRequest, nsISupports* aCtxt) {
michael@0:   return NS_OK;
michael@0: }
michael@0: 
michael@0: NS_IMETHODIMP
michael@0: nsDirIndexParser::OnStopRequest(nsIRequest *aRequest, nsISupports *aCtxt,
michael@0:                                 nsresult aStatusCode) {
michael@0:   // Finish up
michael@0:   if (mBuf.Length() > (uint32_t) mLineStart) {
michael@0:     ProcessData(aRequest, aCtxt);
michael@0:   }
michael@0: 
michael@0:   return NS_OK;
michael@0: }
michael@0: 
michael@0: nsDirIndexParser::Field
michael@0: nsDirIndexParser::gFieldTable[] = {
michael@0:   { "Filename", FIELD_FILENAME },
michael@0:   { "Description", FIELD_DESCRIPTION },
michael@0:   { "Content-Length", FIELD_CONTENTLENGTH },
michael@0:   { "Last-Modified", FIELD_LASTMODIFIED },
michael@0:   { "Content-Type", FIELD_CONTENTTYPE },
michael@0:   { "File-Type", FIELD_FILETYPE },
michael@0:   { nullptr, FIELD_UNKNOWN }
michael@0: };
michael@0: 
michael@0: nsrefcnt nsDirIndexParser::gRefCntParser = 0;
michael@0: nsITextToSubURI *nsDirIndexParser::gTextToSubURI;
michael@0: 
michael@0: nsresult
michael@0: nsDirIndexParser::ParseFormat(const char* aFormatStr) {
michael@0:   // Parse a "200" format line, and remember the fields and their
michael@0:   // ordering in mFormat. Multiple 200 lines stomp on each other.
michael@0: 
michael@0:   // Lets find out how many elements we have.
michael@0:   // easier to do this then realloc
michael@0:   const char* pos = aFormatStr;
michael@0:   unsigned int num = 0;
michael@0:   do {
michael@0:     while (*pos && nsCRT::IsAsciiSpace(char16_t(*pos)))
michael@0:       ++pos;
michael@0:     
michael@0:     ++num;
michael@0:     // There are a maximum of six allowed header fields (doubled plus
michael@0:     // terminator, just in case) -- Bug 443299
michael@0:     if (num > (2 * ArrayLength(gFieldTable)))
michael@0:       return NS_ERROR_UNEXPECTED;
michael@0: 
michael@0:     if (! *pos)
michael@0:       break;
michael@0: 
michael@0:     while (*pos && !nsCRT::IsAsciiSpace(char16_t(*pos)))
michael@0:       ++pos;
michael@0: 
michael@0:   } while (*pos);
michael@0: 
michael@0:   delete[] mFormat;
michael@0:   mFormat = new int[num+1];
michael@0:   // Prevent nullptr Deref - Bug 443299 
michael@0:   if (mFormat == nullptr)
michael@0:     return NS_ERROR_OUT_OF_MEMORY;
michael@0:   mFormat[num] = -1;
michael@0:   
michael@0:   int formatNum=0;
michael@0:   do {
michael@0:     while (*aFormatStr && nsCRT::IsAsciiSpace(char16_t(*aFormatStr)))
michael@0:       ++aFormatStr;
michael@0:     
michael@0:     if (! *aFormatStr)
michael@0:       break;
michael@0: 
michael@0:     nsAutoCString name;
michael@0:     int32_t     len = 0;
michael@0:     while (aFormatStr[len] && !nsCRT::IsAsciiSpace(char16_t(aFormatStr[len])))
michael@0:       ++len;
michael@0:     name.SetCapacity(len + 1);
michael@0:     name.Append(aFormatStr, len);
michael@0:     aFormatStr += len;
michael@0:     
michael@0:     // Okay, we're gonna monkey with the nsStr. Bold!
michael@0:     name.SetLength(nsUnescapeCount(name.BeginWriting()));
michael@0: 
michael@0:     // All tokens are case-insensitive - http://www.mozilla.org/projects/netlib/dirindexformat.html
michael@0:     if (name.LowerCaseEqualsLiteral("description"))
michael@0:       mHasDescription = true;
michael@0:     
michael@0:     for (Field* i = gFieldTable; i->mName; ++i) {
michael@0:       if (name.EqualsIgnoreCase(i->mName)) {
michael@0:         mFormat[formatNum] = i->mType;
michael@0:         ++formatNum;
michael@0:         break;
michael@0:       }
michael@0:     }
michael@0: 
michael@0:   } while (*aFormatStr);
michael@0:   
michael@0:   return NS_OK;
michael@0: }
michael@0: 
michael@0: nsresult
michael@0: nsDirIndexParser::ParseData(nsIDirIndex *aIdx, char* aDataStr) {
michael@0:   // Parse a "201" data line, using the field ordering specified in
michael@0:   // mFormat.
michael@0: 
michael@0:   if (!mFormat) {
michael@0:     // Ignore if we haven't seen a format yet.
michael@0:     return NS_OK;
michael@0:   }
michael@0: 
michael@0:   nsresult rv = NS_OK;
michael@0: 
michael@0:   nsAutoCString filename;
michael@0: 
michael@0:   for (int32_t i = 0; mFormat[i] != -1; ++i) {
michael@0:     // If we've exhausted the data before we run out of fields, just
michael@0:     // bail.
michael@0:     if (! *aDataStr)
michael@0:       break;
michael@0: 
michael@0:     while (*aDataStr && nsCRT::IsAsciiSpace(*aDataStr))
michael@0:       ++aDataStr;
michael@0: 
michael@0:     char    *value = aDataStr;
michael@0: 
michael@0:     if (*aDataStr == '"' || *aDataStr == '\'') {
michael@0:       // it's a quoted string. snarf everything up to the next quote character
michael@0:       const char quotechar = *(aDataStr++);
michael@0:       ++value;
michael@0:       while (*aDataStr && *aDataStr != quotechar)
michael@0:         ++aDataStr;
michael@0:       *aDataStr++ = '\0';
michael@0: 
michael@0:       if (! aDataStr) {
michael@0:         NS_WARNING("quoted value not terminated");
michael@0:       }
michael@0:     } else {
michael@0:       // it's unquoted. snarf until we see whitespace.
michael@0:       value = aDataStr;
michael@0:       while (*aDataStr && (!nsCRT::IsAsciiSpace(*aDataStr)))
michael@0:         ++aDataStr;
michael@0:       *aDataStr++ = '\0';
michael@0:     }
michael@0: 
michael@0:     fieldType t = fieldType(mFormat[i]);
michael@0:     switch (t) {
michael@0:     case FIELD_FILENAME: {
michael@0:       // don't unescape at this point, so that UnEscapeAndConvert() can
michael@0:       filename = value;
michael@0:       
michael@0:       bool    success = false;
michael@0:       
michael@0:       nsAutoString entryuri;
michael@0:       
michael@0:       if (gTextToSubURI) {
michael@0:         char16_t   *result = nullptr;
michael@0:         if (NS_SUCCEEDED(rv = gTextToSubURI->UnEscapeAndConvert(mEncoding.get(), filename.get(),
michael@0:                                                                 &result)) && (result)) {
michael@0:           if (*result) {
michael@0:             aIdx->SetLocation(filename.get());
michael@0:             if (!mHasDescription)
michael@0:               aIdx->SetDescription(result);
michael@0:             success = true;
michael@0:           }
michael@0:           NS_Free(result);
michael@0:         } else {
michael@0:           NS_WARNING("UnEscapeAndConvert error");
michael@0:         }
michael@0:       }
michael@0:       
michael@0:       if (!success) {
michael@0:         // if unsuccessfully at charset conversion, then
michael@0:         // just fallback to unescape'ing in-place
michael@0:         // XXX - this shouldn't be using UTF8, should it?
michael@0:         // when can we fail to get the service, anyway? - bbaetz
michael@0:         aIdx->SetLocation(filename.get());
michael@0:         if (!mHasDescription) {
michael@0:           aIdx->SetDescription(NS_ConvertUTF8toUTF16(value).get());
michael@0:         }
michael@0:       }
michael@0:     }
michael@0:       break;
michael@0:     case FIELD_DESCRIPTION:
michael@0:       nsUnescape(value);
michael@0:       aIdx->SetDescription(NS_ConvertUTF8toUTF16(value).get());
michael@0:       break;
michael@0:     case FIELD_CONTENTLENGTH:
michael@0:       {
michael@0:         int64_t len;
michael@0:         int32_t status = PR_sscanf(value, "%lld", &len);
michael@0:         if (status == 1)
michael@0:           aIdx->SetSize(len);
michael@0:         else
michael@0:           aIdx->SetSize(UINT64_MAX); // UINT64_MAX means unknown
michael@0:       }
michael@0:       break;
michael@0:     case FIELD_LASTMODIFIED:
michael@0:       {
michael@0:         PRTime tm;
michael@0:         nsUnescape(value);
michael@0:         if (PR_ParseTimeString(value, false, &tm) == PR_SUCCESS) {
michael@0:           aIdx->SetLastModified(tm);
michael@0:         }
michael@0:       }
michael@0:       break;
michael@0:     case FIELD_CONTENTTYPE:
michael@0:       aIdx->SetContentType(value);
michael@0:       break;
michael@0:     case FIELD_FILETYPE:
michael@0:       // unescape in-place
michael@0:       nsUnescape(value);
michael@0:       if (!nsCRT::strcasecmp(value, "directory")) {
michael@0:         aIdx->SetType(nsIDirIndex::TYPE_DIRECTORY);
michael@0:       } else if (!nsCRT::strcasecmp(value, "file")) {
michael@0:         aIdx->SetType(nsIDirIndex::TYPE_FILE);
michael@0:       } else if (!nsCRT::strcasecmp(value, "symbolic-link")) {
michael@0:         aIdx->SetType(nsIDirIndex::TYPE_SYMLINK);
michael@0:       } else {
michael@0:         aIdx->SetType(nsIDirIndex::TYPE_UNKNOWN);
michael@0:       }
michael@0:       break;
michael@0:     case FIELD_UNKNOWN:
michael@0:       // ignore
michael@0:       break;
michael@0:     }
michael@0:   }
michael@0: 
michael@0:   return NS_OK;
michael@0: }
michael@0: 
michael@0: NS_IMETHODIMP
michael@0: nsDirIndexParser::OnDataAvailable(nsIRequest *aRequest, nsISupports *aCtxt,
michael@0:                                   nsIInputStream *aStream,
michael@0:                                   uint64_t aSourceOffset,
michael@0:                                   uint32_t aCount) {
michael@0:   if (aCount < 1)
michael@0:     return NS_OK;
michael@0:   
michael@0:   int32_t len = mBuf.Length();
michael@0:   
michael@0:   // Ensure that our mBuf has capacity to hold the data we're about to
michael@0:   // read.
michael@0:   if (!mBuf.SetLength(len + aCount, fallible_t()))
michael@0:     return NS_ERROR_OUT_OF_MEMORY;
michael@0: 
michael@0:   // Now read the data into our buffer.
michael@0:   nsresult rv;
michael@0:   uint32_t count;
michael@0:   rv = aStream->Read(mBuf.BeginWriting() + len, aCount, &count);
michael@0:   if (NS_FAILED(rv)) return rv;
michael@0: 
michael@0:   // Set the string's length according to the amount of data we've read.
michael@0:   // Note: we know this to work on nsCString. This isn't guaranteed to
michael@0:   //       work on other strings.
michael@0:   mBuf.SetLength(len + count);
michael@0: 
michael@0:   return ProcessData(aRequest, aCtxt);
michael@0: }
michael@0: 
michael@0: nsresult
michael@0: nsDirIndexParser::ProcessData(nsIRequest *aRequest, nsISupports *aCtxt) {
michael@0:   if (!mListener)
michael@0:     return NS_ERROR_FAILURE;
michael@0:   
michael@0:   int32_t     numItems = 0;
michael@0:   
michael@0:   while(true) {
michael@0:     ++numItems;
michael@0:     
michael@0:     int32_t             eol = mBuf.FindCharInSet("\n\r", mLineStart);
michael@0:     if (eol < 0)        break;
michael@0:     mBuf.SetCharAt(char16_t('\0'), eol);
michael@0:     
michael@0:     const char  *line = mBuf.get() + mLineStart;
michael@0:     
michael@0:     int32_t lineLen = eol - mLineStart;
michael@0:     mLineStart = eol + 1;
michael@0:     
michael@0:     if (lineLen >= 4) {
michael@0:       nsresult  rv;
michael@0:       const char        *buf = line;
michael@0:       
michael@0:       if (buf[0] == '1') {
michael@0:         if (buf[1] == '0') {
michael@0:           if (buf[2] == '0' && buf[3] == ':') {
michael@0:             // 100. Human-readable comment line. Ignore
michael@0:           } else if (buf[2] == '1' && buf[3] == ':') {
michael@0:             // 101. Human-readable information line.
michael@0:             mComment.Append(buf + 4);
michael@0: 
michael@0:             char    *value = ((char *)buf) + 4;
michael@0:             nsUnescape(value);
michael@0:             mListener->OnInformationAvailable(aRequest, aCtxt, NS_ConvertUTF8toUTF16(value));
michael@0: 
michael@0:           } else if (buf[2] == '2' && buf[3] == ':') {
michael@0:             // 102. Human-readable information line, HTML.
michael@0:             mComment.Append(buf + 4);
michael@0:           }
michael@0:         }
michael@0:       } else if (buf[0] == '2') {
michael@0:         if (buf[1] == '0') {
michael@0:           if (buf[2] == '0' && buf[3] == ':') {
michael@0:             // 200. Define field names
michael@0:             rv = ParseFormat(buf + 4);
michael@0:             if (NS_FAILED(rv)) {
michael@0:               return rv;
michael@0:             }
michael@0:           } else if (buf[2] == '1' && buf[3] == ':') {
michael@0:             // 201. Field data
michael@0:             nsCOMPtr<nsIDirIndex> idx = do_CreateInstance("@mozilla.org/dirIndex;1",&rv);
michael@0:             if (NS_FAILED(rv))
michael@0:               return rv;
michael@0:             
michael@0:             rv = ParseData(idx, ((char *)buf) + 4);
michael@0:             if (NS_FAILED(rv)) {
michael@0:               return rv;
michael@0:             }
michael@0: 
michael@0:             mListener->OnIndexAvailable(aRequest, aCtxt, idx);
michael@0:           }
michael@0:         }
michael@0:       } else if (buf[0] == '3') {
michael@0:         if (buf[1] == '0') {
michael@0:           if (buf[2] == '0' && buf[3] == ':') {
michael@0:             // 300. Self-referring URL
michael@0:           } else if (buf[2] == '1' && buf[3] == ':') {
michael@0:             // 301. OUR EXTENSION - encoding
michael@0:             int i = 4;
michael@0:             while (buf[i] && nsCRT::IsAsciiSpace(buf[i]))
michael@0:               ++i;
michael@0:             
michael@0:             if (buf[i])
michael@0:               SetEncoding(buf+i);
michael@0:           }
michael@0:         }
michael@0:       }
michael@0:     }
michael@0:   }
michael@0:   
michael@0:   return NS_OK;
michael@0: }