User:ArmoryProfileBot/Source

This bot was written in PHP and uses the HTTP extension. It currently runs from a DreamHost shell account using PHP 5.2.3 (cli).

<?php

// No promises on when these TODOs will get handled. Don't hold your breath.

// APB 2.0 features: // * HttpRequestPool? // * subst:-chained templates (significantly reduce display-time load) // * round-the-clock updates with burst-resistance queueing? // * per-character templates includable from any page // // APB 2.x features: // * find characters using API "embeddedin" and "templates" (abolish U:APB/Users, U:*/APB Config, and U:*/APB Table) // * stats tailored to build (i.e. no Str for mages, +heal for resto druids, etc.)

// // Reference data //

$url_armory = array('US' => "http://www.wowarmory.com/", 'EU' => "http://eu.wowarmory.com/"); $url_wiki = "http://www.wowwiki.com/index.php";

// Exception codes define('APB_EX_FATAL', 1); // Abort the whole program. define('APB_EX_SKIP', 2); // Skip the current item.

// Profile fields are accessed as XPaths for sanity. Add new data fields here; this array is foreached. $files = array;

$files['character-sheet'] = array(       'name'     => '//character/@name',        'realm'    => '//character/@realm',        'race'     => '//character/@race',        'gender'   => '//character/@gender',        'class'    => '//character/@class',        'guild'    => '//character/@guildName',        'level'    => '//character/@level',        'talent1'  => '//talentSpec/@treeOne',        'talent2'  => '//talentSpec/@treeTwo',        'talent3'  => '//talentSpec/@treeThree',        'prof1'    => '//professions/skill[1]/@name',        'prof1val' => '//professions/skill[1]/@value',        'prof2'    => '//professions/skill[2]/@name',        'prof2val' => '//professions/skill[2]/@value',        'health'   => '//health/@effective',        'mana'     => '//secondBar[@type="m"]/@effective',        'str'      => '//strength/@effective',        'strbase'  => '//strength/@base',        'agi'      => '//agility/@effective', 'agibase' => '//agility/@base', 'sta'     => '//stamina/@effective', 'stabase' => '//stamina/@base', 'int'     => '//intellect/@effective', 'intbase' => '//intellect/@base', 'spi'     => '//spirit/@effective', 'spibase' => '//spirit/@base', 'arm'     => '//armor/@effective', 'armbase' => '//armor/@base', );

$files['character-skills'] = array(       'cooking'  => '//skill[@key="cooking"]/@value',        'firstaid' => '//skill[@key="firstaid"]/@value',        'fishing'  => '//skill[@key="fishing"]/@value',        'riding'   => '//skill[@key="riding"]/@value', );

// // Classes //

// This only exists so that we can tell our own exceptions apart. class APBException extends Exception {}

// // Functions //

function armory_get($region, $file, $query) { global $url_armory;

$msg = http_get($url_armory[$region] . "$file.xml?$query", array('redirect' => 10,'useragent' => 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2) Gecko/20070219 Firefox/2.0.0.2'), $info);

// Second attempt on DNS errors if (isset($info) && !empty($info['error']) && $info['error'] == "name lookup timed out") $msg = http_get($url_armory[$region] . "$file.xml?$query", array('redirect' => 10,'useragent' => 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2) Gecko/20070219 Firefox/2.0.0.2'), $info);

if (isset($info) && !empty($info['error'])) throw new APBException("Failed to retrieve profile, error \"$info[error]\"", APB_EX_SKIP); if (isset($info) && isset($info['response_code']) && $info['response_code'] != 200) throw new APBException("Failed to retrieve profile, HTTP code $info[response_code]", APB_EX_SKIP); if (empty($msg)) throw new APBException("Failed to retrieve profile, unknown error", APB_EX_SKIP);

return http_parse_message($msg)->body; }

// Wiki pages can contain a mix of UTF-8 and HTML entities, so the entire // string can't be passed to mb_convert_encoding. Also, entities will be // double encoded (e.g. &amp;ouml;). This function accounts for both // conditions. function toUTF8($str) { $count = 1; while ($count) $str = preg_replace('/&(?:[0-9A-Za-z]+|#(?:[0-9]+|(x[0-9A-Fa-f]+)));/e', 'mb_convert_encoding(isset($1) ? "&#" . hexdec($1). ";" : $0, "UTF-8", "HTML-ENTITIES")', $str, -1, $count); return $str; }

function wiki_get($title) { global $url_wiki;

// The "raw" action appears to be similar to Special:Export, but without the data we don't care about. $msg = http_get("$url_wiki?action=raw&title=" . urlencode($title), array, $info);

// Second attempt on DNS errors if (isset($info) && !empty($info['error']) && $info['error'] == "name lookup timed out") $msg = http_get("$url_wiki?action=raw&title=" . urlencode($title), array, $info);

if (isset($info) && !empty($info['error'])) throw new APBException("Failed to retrieve wiki page, error \"$info[error]\"", APB_EX_SKIP); if (isset($info) && isset($info['response_code']) && $info['response_code'] != 200) throw new APBException("Failed to retrieve wiki page, HTTP code $info[response_code]", APB_EX_SKIP); if (empty($msg)) throw new APBException("Failed to retrieve wiki page, unknown error", APB_EX_SKIP);

$body = http_parse_message($msg)->body; if (preg_match('/ 'Special:Userlogin',                'action' => 'submitlogin',                'wpName' => 'ArmoryProfileBot',                'wpPassword' => ' yeah, like I'd leave it in ',                'wpLoginattempt' => 'Log in',        ), $edit_tokens = array(                'wpEditToken',                'wpEdittime',                'wpStarttime',        );

// Authenticate if this is the first submit of the session if (!isset($req)) { // HACK: Lighttpd 1.4 (which WoWWiki uses) does not support "continue" // requests. It seems that the only way to prevent HttpRequest from // using that header is to force it to make HTTP/1.0 requests. $req = new HttpRequest($url_wiki, HTTP_METH_POST, array('protocol' => HTTP_VERSION_1_0)); $req->enableCookies; $req->setPostFields($req_fields); $req->send;

$code = $req->getResponseCode; if ($code != 200 && $code != 302) throw new APBException("Login failed, HTTP code $code.", APB_EX_FATAL);

$success = false; foreach ($req->getResponseCookies as $obj) foreach ($obj->cookies as $name => $val) if ($name == "wowwikiUserName") { $success = true; break; }               if (!$success) throw new APBException("Login failed, login cookies not set.", APB_EX_FATAL); }

// Get the necessary edit tokens //     $req->setPostFields(array('title' => $title, 'action' => 'edit')); //     $req->send; // HACK: wpStarttime and wpEdittime are empty for POST requests. $req->setMethod(HTTP_METH_GET); $req->setUrl("$url_wiki?action=edit&title=$title"); $req->send; $req->setMethod(HTTP_METH_POST); $req->setUrl($url_wiki);

$code = $req->getResponseCode; if ($code != 200 && $code != 302) throw new APBException("Post failed, HTTP code $code.", APB_EX_SKIP);

// Save the message body so that the edit tokens can be extracted after resetting the POST fields. $body = $req->getResponseBody;

$submit_fields = array(               'action' => 'submit',                'title' => $title,                'wpSave' => 'Save page',                'wpSection' => '',                'wpSummary' => $comment,                'wpTextbox1' => $text,        );

if ($minor) $submit_fields['wpMinoredit'] = 1;

foreach ($edit_tokens as $token) { if (preg_match('/value\s*=\s*"(? [^"]*)"\s[^<>]*name\s*=\s*"(? ' . $token . ')"/', $body, $match)) {                       $submit_fields[$match['name']] = $match['value'];                        continue;                }

if (preg_match('/name\s*=\s*"(? ' . $token . ')"\s[^<>]*value\s*=\s*"(? [^"]*)"/', $body, $match)) {                       $submit_fields[$match['name']] = $match['value'];                        continue;                }

throw new APBException("Post failed, could not find required edit token \"$token\".", APB_EX_SKIP); }

if ($post) { $req->setPostFields($submit_fields); $req->send;

$code = $req->getResponseCode; if ($code != 200 && $code != 302) throw new APBException("Post failed, HTTP code $code.", APB_EX_SKIP); } else if ($verbose) { echo "\nTest run. Would have posted to \"$title\":\n\n$text\n"; } }

// Currently, the two types of wiki pages supported by this bot (user list and // character lists) use the same format, which allows non-word characters (e.g. // list formatting) in front of each line for display purposes and ignores // whitespace-only lines and any lines which contain "=" (headings) or "{" // (templates). function wiki_list($text) { $text = explode("\n", str_replace("\r", "\n", $text));

$items = array; foreach ($text as $line) { if (false !== strpos($line, '=')) continue; if (false !== strpos($line, '{')) continue; if (false !== strpos($line, '<')) continue; if (false !== strpos($line, '[')) continue; if (preg_match('/^\s*$/', $line)) continue; $items[] = preg_replace('/^\s*(?:\W+\s*)*(.+)\s*$/', '$1', trim($line)); }

return $items; }

// // "main" //

$done_users = array; $done_chars = array;

// Passing a command-line parameter of "--quiet" prevents status messages and non-fatal errors from being displayed. // Passing "--verbose" causes status messages to be displayed on stdout. if (isset($argv)) { if (in_array("--quiet", $argv)) { error_reporting(E_ERROR | E_USER_ERROR); $verbose = false; } else if (in_array("--verbose", $argv)) { $verbose = true; } else { $verbose = false; }

if (in_array("--post", $argv)) $post = true; else { trigger_error("Updates will not be posted to WoWWiki unless this script is run with the \"--post\" option.", E_USER_WARNING); $post = false; }

$users = array_filter(array_slice($argv, 1), create_function('$arg', 'return $arg{0} != "-";')); } else { trigger_error("This script does not appear to have been run from the command line. Updates will not be posted to WoWWiki unless this script is run from the command-line with the \"--post\" option.", E_USER_WARNING);

$post = false; $verbose = false; }

if (count($users) == 0) { try { if ($verbose) echo "Retrieving list of APB users...\n"; if (!($users = wiki_get("User:ArmoryProfileBot/Users"))) trigger_error("User list is empty?", E_USER_ERROR); $users = wiki_list($users); } catch (APBException $ex) { // All errors are fatal here; no user list, no work. trigger_error($ex, E_USER_ERROR); } catch (Exception $ex) { trigger_error("Unexpected exception retrieving user list: $ex", E_USER_ERROR); } }

foreach ($users as $user) { $user = str_replace(array("%2f", "%2F"), "/", urlencode(str_replace(' ', '_', $user)));

// Don't process a user more than once. if (isset($done_users[$user]) && $done_users[$user]) continue; $done_users[$user] = true;

try { if ($verbose) echo "Retrieving list of characters for \"$user\"...\n"; if (!($names = wiki_get("User:$user/APB_Config"))) { trigger_error("User config empty for \"$user\".", E_USER_WARNING); continue; }       } catch (APBException $ex) { if ($ex->getCode == APB_EX_FATAL) trigger_error($ex, E_USER_ERROR); trigger_error($ex, E_USER_WARNING); continue; } catch (Exception $ex) { trigger_error("Unexpected exception getting config for \"$user\": $ex", E_USER_WARNING); continue; }

$names = wiki_list($names); if (count($names) > 20) { trigger_error($msg = "More than 20 rows. Due to the complexity of the APB templates, putting more than 20 rows on a single page causes WoWWiki to choke.  Only the first 20 rows will be processed.", E_USER_WARNING); $table = "\n";

$skipped = array_slice($names, 20); $names = array_slice($names, 0, 20); } else { $table = ""; $skipped = array; }

$table .= "\n";

foreach ($names as $rawname) { // encode before splitting so that it only has to be encoded once $name = urlencode(toUTF8($rawname));

if (-1 == ($pos = strpos($name, "+"))) { trigger_error($msg = "Incorrectly formatted line \"$rawname\".", E_USER_WARNING); $table .= "\n"; if (count($skipped)) array_push($names, array_shift($skipped)); continue; }

$region = strtoupper(substr($name, 0, $pos)); $name = substr($name, $pos + 1);

if (!$url_armory[$region]) { trigger_error($msg = "Unrecognized region: \"$region\".", E_USER_WARNING); $table .= "\n"; if (count($skipped)) array_push($names, array_shift($skipped)); continue; }

if (-1 == ($pos = strrpos($name, "+"))) { trigger_error($msg = "Incorrectly formatted line \"$rawname\".", E_USER_WARNING); $table .= "\n"; if (count($skipped)) array_push($names, array_shift($skipped)); continue; }

$realm = substr($name, 0, $pos); $name = ucfirst(strtolower(substr($name, $pos + 1)));

if (!isset($done_chars[$region])) $done_chars[$region] = array; if (!isset($done_chars[$region][$realm])) $done_chars[$region][$realm] = array;

if (isset($done_chars[$region][$realm][$name])) { $char = $done_chars[$region][$realm][$name]; } else { $char = array("region" => $region);

foreach ($files as $file => $fields) { try { if ($verbose) echo "Retrieving profile for \"$region $realm $name\"...\n"; if (!($xml = armory_get($region, $file, "r=$realm&n=$name"))) { trigger_error($msg = "Could not read profile for character \"$name\" on server \"$realm\" in region \"$region\".", E_USER_WARNING); $table .= "\n"; if (count($skipped)) array_push($names, array_shift($skipped)); continue 2; }                               } catch (APBException $ex) { if ($ex->getCode == APB_EX_FATAL) trigger_error($ex, E_USER_ERROR); trigger_error($ex, E_USER_WARNING); if (count($skipped)) array_push($names, array_shift($skipped)); continue 2; } catch (Exception $ex) { // Don't expose unknown exceptions to the Wiki results. trigger_error(($msg = "Unexpected exception retrieving profile data for character \"$name\" on server \"$realm\" in region \"$region\".") . ": $ex", E_USER_WARNING); $table .= "\n"; if (count($skipped)) array_push($names, array_shift($skipped)); continue 2; }

if (false !== strpos($xml, "")) { trigger_error($msg = "No character named \"$name\" on server \"$realm\" in region \"$region\" or other Armory error.", E_USER_WARNING); $table .= "\n"; if (count($skipped)) array_push($names, array_shift($skipped)); continue 2; }

try { $dom = new DOMDocument; $dom->loadXML($xml); $xpath = new DOMXPath($dom); } catch (Exception $ex) { // Don't expose unknown exceptions to the Wiki results. trigger_error(($msg = "Error parsing profile data for character \"$name\" on server \"$realm\" in region \"$region\".") . ": $ex", E_USER_WARNING); $table .= "\n"; if (count($skipped)) array_push($names, array_shift($skipped)); continue 2; }

foreach ($fields as $key => $path) { $nodes = $xpath->query($path); if ($nodes && $nodes->length) $char[$key] = $nodes->item(0)->value; else $char[$key] = ""; }                       }

// The class name is required to even set the row template; without it, give up. if (!$char["class"]) { trigger_error($msg = "Could not determine class for \"$region $realm $name\".", E_USER_WARNING); $table .= "\n"; if (count($skipped)) array_push($names, array_shift($skipped)); continue; }

// "guild" should be the only field which can contain <, >, &, or | (all of which are dangerous) if ($char["guild"]) $char["guild"] = str_replace("|", "&#124;", htmlspecialchars($char["guild"], ENT_NOQUOTES));

foreach (array("str", "agi", "sta", "int", "spi", "arm") as $key) { if ($char[$key] == $char["${key}base"]) { $char["${key}base"] = ""; $char["${key}plus"] = ""; } else { $char["${key}plus"] = $char[$key] - $char["${key}base"]; }                       }

if (is_numeric($char["talent1"])) { $spec = array($char["talent1"], $char["talent2"], $char["talent3"]); arsort($spec); $spec = array_keys($spec); if ($char["talent" . ($spec[0] + 1)] == 0) { $char["spec1"] = "None"; $char["spec2"] = ""; } else { $lim = $char["talent". ($spec[0] + 1)] / 2; if ($char["talent" . ($spec[2] + 1)] >= $lim) { $char["spec1"] = "Hybrid"; $char["spec2"] = ""; } else if ($char["talent" . ($spec[1] + 1)] >= $lim) { $char["spec1"] = $spec[0] + 1; $char["spec2"] = $spec[1] + 1; } else { $char["spec1"] = $spec[0] + 1; $char["spec2"] = ""; }                               }

$char["talents"] = "$char[talent1]/$char[talent2]/$char[talent3]"; } else { $char["spec1"] = ""; $char["spec2"] = ""; $char["talents"] = ""; }

$done_chars[$region][$realm][$name] = $char; }

$table .= preg_replace('/\|+}}/', '}}', "\n"); }

foreach ($skipped as $row) $table .= "\n";

try { if ($verbose) echo "Posting character table for \"$user\"...\n"; wiki_put("User:$user/APB_Table", "$table|}"); } catch (APBException $ex) { if ($ex->getCode == APB_EX_FATAL) trigger_error($ex, E_USER_ERROR); trigger_error($ex, E_USER_WARNING); continue; } catch (Exception $ex) { trigger_error("Unexpected exception posting charcter table for \"$user\": $ex", E_USER_WARNING); continue; } }