Why I switched my search API from Bing to Google

Googlelogo
Image by Mark Knol

Going through the Techstars program, a few of my mentors worried about how much I was revealing through my blog. Fundamentally it isn't a calculation, I love what I'm doing and I love talking about it, but I just ran into yet another situation where being open paid off.

Joehtweet

Joe Heitzeberg dropped me that note in reply to my last blog post on switching to Bing from BOSS, and it was gold-dust. I was aware of the Ajax API from a couple of years ago, but when I last looked into Google's offerings they were extremely restrictive about what you could do with the interface. Checking out their documentation I saw they talk about more than client-side apps, they offer a REST interface and even have some PHP examples! The terms-of-service don't prohibit non-client use, though they do specify that your application must be freely available to users.

After a bit of experimentation I was able to get it up and running, and it made me extremely happy. In the test case I'm running, Google finds 44 Facebook profile pages for Susan Fogg, Bing finds 6 and BOSS only finds 1. That makes a massive difference to the usefulness of the friend suggestion part of Mailana.

There are a few wrinkles to the API. By default it only returns 4 results per call, and I had to add the &rsz=large to get 8. Since I'm getting 50 at a time from the other providers, I then had to loop through adding &start=0, &start=8 , etc to pull in multiple pages. Google also don't include possible duplicate results by default, but adding &filter=0 fixed that.

Updated code included inline below, or you can download the complete source here

<?php

// You'll need to get your own API keys for these services. See

// http://developer.yahoo.com/wsregapp/

// http://www.bing.com/developers/createapp.aspx

// http://code.google.com/apis/ajaxsearch/signup.html

define('BING_API_KEY', '');

define('YAHOO_API_KEY', '');

define('GOOGLE_API_KEY', '');

function pete_curl_get($url, $params)

{

$post_params = array();

foreach ($params as $key => &$val) {

  if (is_array($val)) $val = implode(',', $val);

$post_params[] = $key.'='.urlencode($val);

}

$post_string = implode('&', $post_params);

$fullurl = $url."?".$post_string;

$ch = curl_init();

curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);

    curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, FALSE);

curl_setopt($ch, CURLOPT_URL, $fullurl);

curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);

curl_setopt($ch, CURLOPT_USERAGENT, 'Mailana (curl)');

$result = curl_exec($ch);

curl_close($ch);

return $result;

}

function perform_boss_web_search($termstring)

{

$searchurl = 'http://boss.yahooapis.com/ysearch/web/v1/&#039;;

$searchurl .= urlencode($termstring);

$searchparams = array(

'appid' => YAHOO_API_KEY,

'format' => 'json',

'count' => '50',

);

$response = pete_curl_get($searchurl, $searchparams);

$responseobject = json_decode($response, true);

error_log(print_r($responseobject, true));

if ($responseobject['ysearchresponse']['totalhits']==0)

return array();

$allresponseresults = $responseobject['ysearchresponse']['resultset_web'];

$result = array();

foreach ($allresponseresults as $responseresult)

{

$result[] = array(

'url' => $responseresult['url'],

'title' => $responseresult['title'],

'abstract' => $responseresult['abstract'],

);

}

return $result;

}

function perform_bing_web_search($termstring)

{

$searchurl = 'http://api.bing.net/json.aspx?&#039;;

$searchurl .= 'AppId='.BING_API_KEY;

$searchurl .= '&Query='.urlencode($termstring);

$searchurl .= '&Sources=Web';

$searchurl .= '&Web.Count=50';

$searchurl .= '&Web.Offset=0';

$searchurl .= '&Web.Options=DisableHostCollapsing+DisableQueryAlterations';

$searchurl .= '&JsonType=raw';

$response = pete_curl_get($searchurl, array());

$responseobject = json_decode($response, true);

if ($responseobject['SearchResponse']['Web']['Total']==0)

return array();

$allresponseresults = $responseobject['SearchResponse']['Web']['Results'];

$result = array();

foreach ($allresponseresults as $responseresult)

{

$result[] = array(

'url' => $responseresult['Url'],

'title' => $responseresult['Title'],

'abstract' => $responseresult['Description'],

);

}

return $result;

}

function perform_google_web_search($termstring)

{

$start = 0;

$result = array();

while ($start<50)

{

$searchurl = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&#039;;

$searchurl .= '&key='.GOOGLE_API_KEY;

$searchurl .= '&start='.$start;

$searchurl .= '&rsz=large';

$searchurl .= '&filter=0';

$searchurl .= '&q='.urlencode($termstring);

$response = pete_curl_get($searchurl, array());

$responseobject = json_decode($response, true);

if (count($responseobject['responseData']['results'])==0)

break;

$allresponseresults = $responseobject['responseData']['results'];

foreach ($allresponseresults as $responseresult)

{

$result[] = array(

'url' => $responseresult['url'],

'title' => $responseresult['title'],

'abstract' => $responseresult['content'],

);

}

$start += 8;

}

return $result;

}

if (isset($_REQUEST['q'])) {

$termstring = urldecode($_REQUEST['q']);

} else {

$termstring = '';

}

?>

<html>

<head>

<title>Test page for Google, BOSS and Bing search apis</title>

</head>

<body>

<div style="padding:20px;">

<center>

<form method="GET" action="searchexample.php">

Search terms: <input type="text" size="40" name="q" value='<?=$termstring?>'/>

</form>

</center>

</div>

<?php

if ($termstring!='') {

$googleresults = perform_google_web_search($termstring);

$bingresults = perform_bing_web_search($termstring);

$bossresults = perform_boss_web_search($termstring);

print '<br/><br/><h2>Google search results ('.count($googleresults).')</h2><br/>';

foreach ($googleresults as $result) {

print '<a href="'.$result['url'].'">'.$result['title'].'</a><br/>';

print '<span style="font-size:80%">'.$result['abstract'].'</span><br/><hr/>';

}

print '<br/><br/><h2>Bing search results ('.count($bingresults).')</h2><br/>';

foreach ($bingresults as $result) {

print '<a href="'.$result['url'].'">'.$result['title'].'</a><br/>';

print '<span style="font-size:80%">'.$result['abstract'].'</span><br/><hr/>';

}

print '<br/><br/><h2>BOSS search results ('.count($bossresults).')</h2><br/>';

foreach ($bossresults as $result) {

print '<a href="'.$result['url'].'">'.$result['title'].'</a><br/>';

print '<span style="font-size:80%">'.$result['abstract'].'</span><br/><hr/>';

}

}

?>

Why I switched my search API from BOSS to Bing

Bing

I'm a massive fan of Yahoo's developer tools, I think they're massively underrated by geekdom, and I'm still heavily reliant on their geo-coding services like Placemaker. It makes me pretty sad to admit I've recently switched from Yahoo BOSS to Bing's search API, so I thought I'd share my reasons, together with some PHP sample code.

In a nutshell, BOSS wasn't finding enough results for the sort of work I'm doing. Here's an example search, looking for people called Susan Fogg with public Facebook profiles:

http://www.bing.com/search?q=site%3Awww.facebook.com%2Fpeople+intitle%3A%22Susan+Fogg%22
6 results

http://search.yahoo.com/search?p=site%3Awww.facebook.com%2Fpeople+intitle%3A%22Susan+Fogg%22
1 result

http://www.google.com/search?q=site%3Awww.facebook.com%2Fpeople+intitle%3A%22Susan+Fogg%22&filter=0
15 results

This is not a scientific survey by any means, but Bing seems to index a lot more of the obscure pages on social networks than Yahoo. If only Google offered an API, they would be even better, but switching to Bing still offers a big improvement for my application.

I was nervous that Bing would be crippled by usage terms, but luckily they are effectively unrestricted and can be used for non-user-facing applications like mine.

Here's the code I'm using, as a download or included inline below:

<?php

// You'll ned to get your own API keys for these services. See
// http://developer.yahoo.com/wsregapp/
// http://www.bing.com/developers/createapp.aspx
define('BING_API_KEY', '');
define('YAHOO_API_KEY', '');

function pete_curl_get($url, $params)
{
    $post_params = array();
    foreach ($params as $key => &$val) {
      if (is_array($val)) $val = implode(',', $val);
        $post_params[] = $key.'='.urlencode($val);
    }
    $post_string = implode('&', $post_params);

    $fullurl = $url."?".$post_string;

    $ch = curl_init();
    curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
    curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, FALSE);
    curl_setopt($ch, CURLOPT_URL, $fullurl);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($ch, CURLOPT_USERAGENT, 'Mailana (curl)');
    $result = curl_exec($ch);
    curl_close($ch);

    return $result;
}

function perform_boss_web_search($terms)
{
    $searchurl = 'http://boss.yahooapis.com/ysearch/web/v1/&#039;;
    $searchurl .= urlencode(implode(' ', $terms));
    $searchparams = array(
        'appid' => YAHOO_API_KEY,
        'format' => 'json',
        'count' => '50',
    );

    $response = pete_curl_get($searchurl, $searchparams);
   
    $responseobject = json_decode($response, true);
   
    if ($responseobject['ysearchresponse']['totalhits']==0)
        return array();
   
    $allresponseresults = $responseobject['ysearchresponse']['resultset_web'];

    $result = array();
    foreach ($allresponseresults as $responseresult)
    {
        $result[] = array(
            'url' => $responseresult['url'],
            'title' => $responseresult['title'],
            'abstract' => $responseresult['abstract'],
        );
    }

    return $result;
}

function perform_bing_web_search($terms)
{
    $searchurl = 'http://api.bing.net/json.aspx?&#039;;
    $searchurl .= 'AppId='.BING_API_KEY;
    $searchurl .= '&Query='.urlencode(implode(' ', $terms));
    $searchurl .= '&Sources=Web';
    $searchurl .= '&Web.Count=50';
    $searchurl .= '&Web.Offset=0';
    $searchurl .= '&Web.Options=DisableHostCollapsing+DisableQueryAlterations';
    $searchurl .= '&JsonType=raw';

    $response = pete_curl_get($searchurl, array());
   
    $responseobject = json_decode($response, true);
    if ($responseobject['SearchResponse']['Web']['Total']==0)
        return array();
   
    $allresponseresults = $responseobject['SearchResponse']['Web']['Results'];

    $result = array();
    foreach ($allresponseresults as $responseresult)
    {
        $result[] = array(
            'url' => $responseresult['Url'],
            'title' => $responseresult['Title'],
            'abstract' => $responseresult['Description'],
        );
    }

    return $result;
}

if (isset($_REQUEST['q'])) {
    $terms = explode(' ', urldecode($_REQUEST['q']));
} else {
    $terms = array();
}

$termstring = implode(' ', $terms);
?>
<html>
<head>
<title>Test page for BOSS and Bing search apis</title>
</head>
<body>
<div style=&quot
;padding:20px;">
<center>
<form method="GET" action="index.php">
Search terms: <input type="text" size="40" name="q" value="<?=$termstring?>"/>
</form>
</center>
</div>
<?php
if (count($terms)>0) {

    $bingresults = perform_bing_web_search($terms);
    $bossresults = perform_boss_web_search($terms);

    print '<br/><br/><h2>Bing search results ('.count($bingresults).')</h2><br/>';
    foreach ($bingresults as $result) {
        print '<a href="'.$result['url'].'">'.$result['title'].'</a><br/>';
        print '<span style="font-size:80%">'.$result['abstract'].'</span><br/><hr/>';
    }

    print '<br/><br/><h2>BOSS search results ('.count($bingresults).')</h2><br/>';
    foreach ($bingresults as $result) {
        print '<a href="'.$result['url'].'">'.$result['title'].'</a><br/>';
        print '<span style="font-size:80%">'.$result['abstract'].'</span><br/><hr/>';
    }

}

?>