Logo Search packages:      
Sourcecode: chromium-browser version File versions  Download package

template_url.cc

// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "chrome/browser/search_engines/template_url.h"

#include "app/l10n_util.h"
#include "base/i18n/icu_string_conversions.h"
#include "base/i18n/rtl.h"
#include "base/logging.h"
#include "base/utf_string_conversions.h"
#include "chrome/browser/browser_process.h"
#include "chrome/browser/google_url_tracker.h"
#include "chrome/browser/search_engines/template_url_model.h"
#include "gfx/favicon_size.h"
#include "net/base/escape.h"

#if defined(OS_WIN)
#include "chrome/browser/rlz/rlz.h"
#endif

// The TemplateURLRef has any number of terms that need to be replaced. Each of
// the terms is enclosed in braces. If the character preceeding the final
// brace is a ?, it indicates the term is optional and can be replaced with
// an empty string.
static const wchar_t kStartParameter = '{';
static const wchar_t kEndParameter = '}';
static const wchar_t kOptional = '?';

// Known parameters found in the URL.
static const wchar_t kSearchTermsParameter[] = L"searchTerms";
static const char kSearchTermsParameterFull[] = "{searchTerms}";
static const wchar_t kCountParameter[] = L"count";
static const wchar_t kStartIndexParameter[] = L"startIndex";
static const wchar_t kStartPageParameter[] = L"startPage";
static const wchar_t kLanguageParameter[] = L"language";
static const wchar_t kInputEncodingParameter[] = L"inputEncoding";
static const wchar_t kOutputEncodingParameter[] = L"outputEncoding";

static const wchar_t kGoogleAcceptedSuggestionParameter[] =
    L"google:acceptedSuggestion";
// Host/Domain Google searches are relative to.
static const wchar_t kGoogleBaseURLParameter[] = L"google:baseURL";
static const char kGoogleBaseURLParameterFull[] = "{google:baseURL}";
// Like google:baseURL, but for the Search Suggest capability.
static const char kGoogleBaseSuggestURLParameter[] =
    "google:baseSuggestURL";
static const char kGoogleBaseSuggestURLParameterFull[] =
    "{google:baseSuggestURL}";
static const wchar_t kGoogleOriginalQueryForSuggestionParameter[] =
    L"google:originalQueryForSuggestion";
static const wchar_t kGoogleRLZParameter[] = L"google:RLZ";
// Same as kSearchTermsParameter, with no escaping.
static const wchar_t kGoogleUnescapedSearchTermsParameter[] =
    L"google:unescapedSearchTerms";
static const char kGoogleUnescapedSearchTermsParameterFull[] =
    "{google:unescapedSearchTerms}";

// Display value for kSearchTermsParameter.
static const char kDisplaySearchTerms[] = "%s";

// Display value for kGoogleUnescapedSearchTermsParameter.
static const char kDisplayUnescapedSearchTerms[] = "%S";

// Used if the count parameter is not optional. Indicates we want 10 search
// results.
static const wchar_t kDefaultCount[] = L"10";

// Used if the parameter kOutputEncodingParameter is required.
static const wchar_t kOutputEncodingType[] = L"UTF-8";

// static
std::wstring* TemplateURLRef::google_base_url_ = NULL;

TemplateURLRef::TemplateURLRef() {
  Set(std::wstring(), 0, 0);
}

void TemplateURLRef::Set(const std::wstring& url,
                         int index_offset,
                         int page_offset) {
  url_ = url;
  index_offset_ = index_offset;
  page_offset_ = page_offset;
  InvalidateCachedValues();
}

bool TemplateURLRef::ParseParameter(size_t start,
                                    size_t end,
                                    std::wstring* url,
                                    Replacements* replacements) const {
  DCHECK(start != std::string::npos &&
         end != std::string::npos && end > start);
  size_t length = end - start - 1;
  bool optional = false;
  if ((*url)[end - 1] == kOptional) {
    optional = true;
    length--;
  }
  std::wstring parameter(url->substr(start + 1, length));
  std::wstring full_parameter(url->substr(start, end - start + 1));
  // Remove the parameter from the string.
  url->erase(start, end - start + 1);
  if (parameter == kSearchTermsParameter) {
    replacements->push_back(Replacement(SEARCH_TERMS, static_cast<int>(start)));
  } else if (parameter == kCountParameter) {
    if (!optional)
      url->insert(start, kDefaultCount);
  } else if (parameter == kStartIndexParameter) {
    if (!optional) {
      url->insert(start, IntToWString(index_offset_));
    }
  } else if (parameter == kStartPageParameter) {
    if (!optional) {
      url->insert(start, IntToWString(page_offset_));
    }
  } else if (parameter == kLanguageParameter) {
    replacements->push_back(Replacement(LANGUAGE, static_cast<int>(start)));
  } else if (parameter == kInputEncodingParameter) {
    replacements->push_back(Replacement(ENCODING, static_cast<int>(start)));
  } else if (parameter == kOutputEncodingParameter) {
    if (!optional)
      url->insert(start, kOutputEncodingType);
  } else if (parameter == kGoogleAcceptedSuggestionParameter) {
    replacements->push_back(Replacement(GOOGLE_ACCEPTED_SUGGESTION,
                                        static_cast<int>(start)));
  } else if (parameter == kGoogleBaseURLParameter) {
    replacements->push_back(Replacement(GOOGLE_BASE_URL,
                                        static_cast<int>(start)));
  } else if (WideToUTF16Hack(parameter) ==
             ASCIIToUTF16(kGoogleBaseSuggestURLParameter)) {
    replacements->push_back(Replacement(GOOGLE_BASE_SUGGEST_URL,
                                        static_cast<int>(start)));
  } else if (parameter == kGoogleOriginalQueryForSuggestionParameter) {
    replacements->push_back(Replacement(GOOGLE_ORIGINAL_QUERY_FOR_SUGGESTION,
                                        static_cast<int>(start)));
  } else if (parameter == kGoogleRLZParameter) {
    replacements->push_back(Replacement(GOOGLE_RLZ, static_cast<int>(start)));
  } else if (parameter == kGoogleUnescapedSearchTermsParameter) {
    replacements->push_back(Replacement(GOOGLE_UNESCAPED_SEARCH_TERMS,
                                        static_cast<int>(start)));
  } else {
    // It can be some garbage but can also be a javascript block. Put it back.
    url->insert(start, full_parameter);
    return false;
  }
  return true;
}

std::wstring TemplateURLRef::ParseURL(const std::wstring& url,
                                      Replacements* replacements,
                                      bool* valid) const {
  *valid = false;
  std::wstring parsed_url = url;
  for (size_t last = 0; last != std::string::npos; ) {
    last = parsed_url.find(kStartParameter, last);
    if (last != std::string::npos) {
      size_t template_end = parsed_url.find(kEndParameter, last);
      if (template_end != std::string::npos) {
        // Since we allow Javascript in the URL, {} pairs could be nested. Match
        // only leaf pairs with supported parameters.
        size_t next_template_start = parsed_url.find(kStartParameter, last + 1);
        if (next_template_start == std::string::npos ||
            next_template_start > template_end) {
          // If successful, ParseParameter erases from the string as such no
          // need to update |last|. If failed, move |last| to the end of pair.
          if (!ParseParameter(last, template_end, &parsed_url, replacements)) {
            // |template_end| + 1 may be beyond the end of the string.
            last = template_end;
          }
        } else {
          last = next_template_start;
        }
      } else {
        // Open brace without a closing brace, return.
        return std::wstring();
      }
    }
  }
  *valid = true;
  return parsed_url;
}

void TemplateURLRef::ParseIfNecessary() const {
  if (!parsed_) {
    parsed_ = true;
    parsed_url_ = ParseURL(url_, &replacements_, &valid_);
    supports_replacements_ = false;
    if (valid_) {
      bool has_only_one_search_term = false;
      for (Replacements::const_iterator i = replacements_.begin();
           i != replacements_.end(); ++i) {
        if ((i->type == SEARCH_TERMS) ||
            (i->type == GOOGLE_UNESCAPED_SEARCH_TERMS)) {
          if (has_only_one_search_term) {
            has_only_one_search_term = false;
            break;
          }
          has_only_one_search_term = true;
          supports_replacements_ = true;
        }
      }
      // Only parse the host/key if there is one search term. Technically there
      // could be more than one term, but it's uncommon; so we punt.
      if (has_only_one_search_term)
        ParseHostAndSearchTermKey();
    }
  }
}

void TemplateURLRef::ParseHostAndSearchTermKey() const {
  string16 url_string = WideToUTF16Hack(url_);
  ReplaceSubstringsAfterOffset(&url_string, 0,
                               ASCIIToUTF16(kGoogleBaseURLParameterFull),
                               WideToUTF16Hack(GoogleBaseURLValue()));
  ReplaceSubstringsAfterOffset(&url_string, 0,
                               ASCIIToUTF16(kGoogleBaseSuggestURLParameterFull),
                               WideToUTF16Hack(GoogleBaseSuggestURLValue()));

  GURL url(UTF16ToUTF8(url_string));
  if (!url.is_valid())
    return;

  std::string query_string = url.query();
  if (query_string.empty())
    return;

  url_parse::Component query, key, value;
  query.len = static_cast<int>(query_string.size());
  while (url_parse::ExtractQueryKeyValue(query_string.c_str(), &query, &key,
                                         &value)) {
    if (key.is_nonempty() && value.is_nonempty()) {
      std::string value_string = query_string.substr(value.begin, value.len);
      if (value_string.find(kSearchTermsParameterFull, 0) !=
          std::string::npos ||
          value_string.find(kGoogleUnescapedSearchTermsParameterFull, 0) !=
          std::string::npos) {
        search_term_key_ = query_string.substr(key.begin, key.len);
        host_ = url.host();
        path_ = url.path();
        break;
      }
    }
  }
}

std::wstring TemplateURLRef::ReplaceSearchTerms(
    const TemplateURL& host,
    const std::wstring& terms,
    int accepted_suggestion,
    const std::wstring& original_query_for_suggestion) const {
  ParseIfNecessary();
  if (!valid_)
    return std::wstring();

  if (replacements_.empty())
    return parsed_url_;

  // Determine if the search terms are in the query or before. We're escaping
  // space as '+' in the former case and as '%20' in the latter case.
  bool is_in_query = true;
  for (Replacements::iterator i = replacements_.begin();
       i != replacements_.end(); ++i) {
    if (i->type == SEARCH_TERMS) {
      std::wstring::size_type query_start = parsed_url_.find(L'?');
      is_in_query = query_start != std::wstring::npos &&
          (static_cast<std::wstring::size_type>(i->index) > query_start);
      break;
    }
  }

  string16 encoded_terms;
  string16 encoded_original_query;
  std::wstring input_encoding;
  // If the search terms are in query - escape them respecting the encoding.
  if (is_in_query) {
    // Encode the search terms so that we know the encoding.
    const std::vector<std::string>& encodings = host.input_encodings();
    for (size_t i = 0; i < encodings.size(); ++i) {
      if (EscapeQueryParamValue(WideToUTF16Hack(terms),
                                encodings[i].c_str(), true,
                                &encoded_terms)) {
        if (!original_query_for_suggestion.empty()) {
          EscapeQueryParamValue(WideToUTF16Hack(original_query_for_suggestion),
                                encodings[i].c_str(),
                                true,
                                &encoded_original_query);
        }
        input_encoding = ASCIIToWide(encodings[i]);
        break;
      }
    }
    if (input_encoding.empty()) {
      encoded_terms = WideToUTF16Hack(
          EscapeQueryParamValueUTF8(terms, true));
      if (!original_query_for_suggestion.empty()) {
        encoded_original_query =
            WideToUTF16Hack(EscapeQueryParamValueUTF8(
            original_query_for_suggestion, true));
      }
      input_encoding = L"UTF-8";
    }
  } else {
    encoded_terms = WideToUTF16Hack(UTF8ToWide(EscapePath(WideToUTF8(terms))));
    input_encoding = L"UTF-8";
  }

  std::wstring url = parsed_url_;

  // replacements_ is ordered in ascending order, as such we need to iterate
  // from the back.
  for (Replacements::reverse_iterator i = replacements_.rbegin();
       i != replacements_.rend(); ++i) {
    switch (i->type) {
      case ENCODING:
        url.insert(i->index, input_encoding);
        break;

      case GOOGLE_ACCEPTED_SUGGESTION:
        if (accepted_suggestion == NO_SUGGESTION_CHOSEN)
          url.insert(i->index, L"aq=f&");
        else if (accepted_suggestion != NO_SUGGESTIONS_AVAILABLE)
          url.insert(i->index, StringPrintf(L"aq=%d&", accepted_suggestion));
        break;

      case GOOGLE_BASE_URL:
        url.insert(i->index, GoogleBaseURLValue());
        break;

      case GOOGLE_BASE_SUGGEST_URL:
        url.insert(i->index, GoogleBaseSuggestURLValue());
        break;

      case GOOGLE_ORIGINAL_QUERY_FOR_SUGGESTION:
        if (accepted_suggestion >= 0)
          url.insert(i->index, L"oq=" +
                     UTF16ToWideHack(encoded_original_query) + L"&");
        break;

      case GOOGLE_RLZ: {
        // On platforms that don't have RLZ, we still want this branch
        // to happen so that we replace the RLZ template with the
        // empty string.  (If we don't handle this case, we hit a
        // NOTREACHED below.)
#if defined(OS_WIN)
        std::wstring rlz_string;
        RLZTracker::GetAccessPointRlz(RLZTracker::CHROME_OMNIBOX, &rlz_string);
        if (!rlz_string.empty()) {
          rlz_string = L"rlz=" + rlz_string + L"&";
          url.insert(i->index, rlz_string);
        }
#endif
        break;
      }

      case GOOGLE_UNESCAPED_SEARCH_TERMS: {
        std::string unescaped_terms;
        base::WideToCodepage(terms, WideToASCII(input_encoding).c_str(),
                             base::OnStringConversionError::SKIP,
                             &unescaped_terms);
        url.insert(i->index, std::wstring(unescaped_terms.begin(),
                                          unescaped_terms.end()));
        break;
      }

      case LANGUAGE:
        url.insert(i->index,
                   ASCIIToWide(g_browser_process->GetApplicationLocale()));
        break;

      case SEARCH_TERMS:
        url.insert(i->index, UTF16ToWideHack(encoded_terms));
        break;

      default:
        NOTREACHED();
        break;
    }
  }

  return url;
}

bool TemplateURLRef::SupportsReplacement() const {
  ParseIfNecessary();
  return valid_ && supports_replacements_;
}

bool TemplateURLRef::IsValid() const {
  ParseIfNecessary();
  return valid_;
}

std::wstring TemplateURLRef::DisplayURL() const {
  ParseIfNecessary();
  if (!valid_)
    return url_;  // If we're not valid, don't escape anything.

  if (replacements_.empty())
    return url_;  // Nothing to replace, return the url.

  string16 result = WideToUTF16Hack(url_);
  ReplaceSubstringsAfterOffset(&result, 0,
                               ASCIIToUTF16(kSearchTermsParameterFull),
                               ASCIIToUTF16(kDisplaySearchTerms));

  ReplaceSubstringsAfterOffset(
      &result, 0,
      ASCIIToUTF16(kGoogleUnescapedSearchTermsParameterFull),
      ASCIIToUTF16(kDisplayUnescapedSearchTerms));

  return UTF16ToWideHack(result);
}

// static
std::wstring TemplateURLRef::DisplayURLToURLRef(
    const std::wstring& display_url) {
  string16 result = WideToUTF16Hack(display_url);
  ReplaceSubstringsAfterOffset(&result, 0, ASCIIToUTF16(kDisplaySearchTerms),
                               ASCIIToUTF16(kSearchTermsParameterFull));
  ReplaceSubstringsAfterOffset(
      &result, 0,
      ASCIIToUTF16(kDisplayUnescapedSearchTerms),
      ASCIIToUTF16(kGoogleUnescapedSearchTermsParameterFull));
  return UTF16ToWideHack(result);
}

const std::string& TemplateURLRef::GetHost() const {
  ParseIfNecessary();
  return host_;
}

const std::string& TemplateURLRef::GetPath() const {
  ParseIfNecessary();
  return path_;
}

const std::string& TemplateURLRef::GetSearchTermKey() const {
  ParseIfNecessary();
  return search_term_key_;
}

std::wstring TemplateURLRef::SearchTermToWide(const TemplateURL& host,
                                              const std::string& term) const {
  const std::vector<std::string>& encodings = host.input_encodings();
  std::wstring result;

  std::string unescaped =
      UnescapeURLComponent(term, UnescapeRule::REPLACE_PLUS_WITH_SPACE |
                                 UnescapeRule::URL_SPECIAL_CHARS);
  for (size_t i = 0; i < encodings.size(); ++i) {
    if (base::CodepageToWide(unescaped, encodings[i].c_str(),
                             base::OnStringConversionError::FAIL, &result))
      return result;
  }

  // Always fall back on UTF-8 if it works.
  if (base::CodepageToWide(unescaped, base::kCodepageUTF8,
                           base::OnStringConversionError::FAIL, &result))
    return result;

  // When nothing worked, just use the escaped text. We have no idea what the
  // encoding is. We need to substitute spaces for pluses ourselves since we're
  // not sending it through an unescaper.
  result = UTF8ToWide(term);
  std::replace(result.begin(), result.end(), '+', ' ');
  return result;
}

bool TemplateURLRef::HasGoogleBaseURLs() const {
  ParseIfNecessary();
  for (size_t i = 0; i < replacements_.size(); ++i) {
    if ((replacements_[i].type == GOOGLE_BASE_URL) ||
        (replacements_[i].type == GOOGLE_BASE_SUGGEST_URL))
      return true;
  }
  return false;
}

void TemplateURLRef::InvalidateCachedValues() const {
  supports_replacements_ = valid_ = parsed_ = false;
  host_.clear();
  path_.clear();
  search_term_key_.clear();
  replacements_.clear();
}

// Returns the value to use for replacements of type GOOGLE_BASE_URL.
// static
std::wstring TemplateURLRef::GoogleBaseURLValue() {
  return google_base_url_ ?
    (*google_base_url_) : UTF8ToWide(GoogleURLTracker::GoogleURL().spec());
}

// Returns the value to use for replacements of type GOOGLE_BASE_SUGGEST_URL.
// static
std::wstring TemplateURLRef::GoogleBaseSuggestURLValue() {
  // The suggest base URL we want at the end is something like
  // "http://clients1.google.TLD/complete/".  The key bit we want from the
  // original Google base URL is the TLD.

  // Start with the Google base URL.
  const GURL base_url(google_base_url_ ?
      GURL(WideToUTF8(*google_base_url_)) : GoogleURLTracker::GoogleURL());
  DCHECK(base_url.is_valid());

  // Change "www." to "clients1." in the hostname.  If no "www." was found, just
  // prepend "clients1.".
  const std::string base_host(base_url.host());
  GURL::Replacements repl;
  const std::string suggest_host("clients1." +
      (base_host.compare(0, 4, "www.") ? base_host : base_host.substr(4)));
  repl.SetHostStr(suggest_host);

  // Replace any existing path with "/complete/".
  static const std::string suggest_path("/complete/");
  repl.SetPathStr(suggest_path);

  // Clear the query and ref.
  repl.ClearQuery();
  repl.ClearRef();
  return UTF8ToWide(base_url.ReplaceComponents(repl).spec());
}

// TemplateURL ----------------------------------------------------------------

// static
GURL TemplateURL::GenerateFaviconURL(const GURL& url) {
  DCHECK(url.is_valid());
  GURL::Replacements rep;

  const char favicon_path[] = "/favicon.ico";
  int favicon_path_len = arraysize(favicon_path) - 1;

  rep.SetPath(favicon_path, url_parse::Component(0, favicon_path_len));
  rep.ClearUsername();
  rep.ClearPassword();
  rep.ClearQuery();
  rep.ClearRef();
  return url.ReplaceComponents(rep);
}

// static
bool TemplateURL::SupportsReplacement(const TemplateURL* turl) {
  return turl && turl->url() && turl->url()->SupportsReplacement();
}

std::wstring TemplateURL::AdjustedShortNameForLocaleDirection() const {
  std::wstring bidi_safe_short_name;
  if (base::i18n::AdjustStringForLocaleDirection(short_name_,
                                                 &bidi_safe_short_name))
    return bidi_safe_short_name;
  return short_name_;
}

void TemplateURL::SetSuggestionsURL(const std::wstring& suggestions_url,
                                    int index_offset,
                                    int page_offset) {
  suggestions_url_.Set(suggestions_url, index_offset, page_offset);
}

void TemplateURL::SetURL(const std::wstring& url,
                         int index_offset,
                         int page_offset) {
  url_.Set(url, index_offset, page_offset);
}

void TemplateURL::set_keyword(const std::wstring& keyword) {
  // Case sensitive keyword matching is confusing. As such, we force all
  // keywords to be lower case.
  keyword_ = l10n_util::ToLower(keyword);
  autogenerate_keyword_ = false;
}

const std::wstring& TemplateURL::keyword() const {
  if (autogenerate_keyword_ && keyword_.empty()) {
    // Generate a keyword and cache it.
    keyword_ = TemplateURLModel::GenerateKeyword(
        TemplateURLModel::GenerateSearchURL(this).GetWithEmptyPath(), true);
  }
  return keyword_;
}

bool TemplateURL::ShowInDefaultList() const {
  return show_in_default_list() && url() && url()->SupportsReplacement();
}

void TemplateURL::SetFavIconURL(const GURL& url) {
  for (std::vector<ImageRef>::iterator i = image_refs_.begin();
       i != image_refs_.end(); ++i) {
    if (i->type == L"image/x-icon" &&
        i->width == kFavIconSize && i->height == kFavIconSize) {
      if (!url.is_valid())
        image_refs_.erase(i);
      else
        i->url = url;
      return;
    }
  }
  // Don't have one yet, add it.
  if (url.is_valid()) {
    add_image_ref(
        TemplateURL::ImageRef(L"image/x-icon", kFavIconSize, kFavIconSize,
                              url));
  }
}

GURL TemplateURL::GetFavIconURL() const {
  for (std::vector<ImageRef>::const_iterator i = image_refs_.begin();
       i != image_refs_.end(); ++i) {
    if ((i->type == L"image/x-icon" || i->type == L"image/vnd.microsoft.icon")
        && i->width == kFavIconSize && i->height == kFavIconSize) {
      return i->url;
    }
  }
  return GURL();
}

void TemplateURL::InvalidateCachedValues() const {
  url_.InvalidateCachedValues();
  suggestions_url_.InvalidateCachedValues();
  if (autogenerate_keyword_)
    keyword_.clear();
}

Generated by  Doxygen 1.6.0   Back to index