bboschecker 0.2

This commit is contained in:
tengel 2024-03-20 09:29:18 -05:00
parent aca861840a
commit 1bcaaf7f18
5 changed files with 579 additions and 33 deletions

15
ChangeLog Normal file
View file

@ -0,0 +1,15 @@
Version 0.2 (2008-05-11)
===========
- Implement tableExtractor.class.php to more smartly parse out the carrier
information, allowing for network and region based lookups
- Add form elements to support the new network/region functionality
- Add user-controlled verbosity setting
- Add output of links needed to check manually
- Add quick BBFAQ debug script to spit out the table array
- Minor bugfix to PHP_SELF coding error
- Add CREDITS and ChangeLog files
Version 0.1 (2007-11-07)
===========
- Initial release

2
README
View file

@ -1,5 +1,5 @@
bboschecker - check for new BB device OS releases
Copyright (C) 2007 troyengel
Copyright (C) 2008 troyengel
About
=====

View file

@ -1,7 +1,7 @@
<?php
/*
bboschecker 0.1 - check for new BB device OS releases
Copyright (C) 2007 troyengel
bboschecker 0.2 - check for new BB device OS releases
Copyright (C) 2008 troyengel
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -16,9 +16,19 @@
http://www.gnu.org/licenses/gpl-2.0.txt
*/
// This class right here is worth it's weight in gold - give the author
// some kudos. See the debug script for basic usage.
//
// http://jacksleight.com/blog/2008/01/14/really-shiny/scripts/table-extractor.txt
include 'tableExtractor.class.php';
// where is the list of carriers?
$BBF_LIST = "http://blackberryfaq.com/index.php/BlackBerry_Operating_System_Downloads";
// what text immediately preceeds our target table? (not IN the table!)
// hopefully nobody changes this, but hey - it's the interweb tubes
$BBF_ANCHOR = 'desktop and device software download page';
// we will delay this many seconds when retrieving pages from the
// RIM server, we don't want to be a bad netizen.
$RIM_SLEEP = 1;
@ -31,6 +41,9 @@
// where to store the session cookiejar
$SCRIPT_CJ = "/tmp/bbos_cookies";
// default verbosity, can be changed by user
$VERBOSE = false;
?>
<html>
@ -54,6 +67,14 @@ curl_setopt($curl_handle, CURLOPT_RETURNTRANSFER, TRUE);
curl_setopt($curl_handle, CURLOPT_COOKIEFILE, $SCRIPT_CJ);
curl_setopt($curl_handle, CURLOPT_COOKIEJAR, $SCRIPT_CJ);
function chatty($text) {
global $VERBOSE;
if ($VERBOSE) {
echo $text;
flush();
}
}
function getPage($url, $method="GET", $postfields="") {
global $curl_handle;
if (!is_string($url)) {
@ -85,40 +106,84 @@ function getPage($url, $method="GET", $postfields="") {
}
}
if (isset($_REQUEST['device']) && is_numeric($_REQUEST['device'])) {
if (isset($_REQUEST['BB_DEVICE']) && is_numeric($_REQUEST['BB_DEVICE'])) {
global $VERBOSE;
$rimDevice = $_REQUEST['BB_DEVICE'];
$rimNetwork = $_REQUEST['BB_NETWORK'];
$rimRegion = $_REQUEST['BB_REGION'];
if (isset($_REQUEST['BB_VERBOSE']) && $_REQUEST['BB_VERBOSE']) {
$VERBOSE = true;
}
$device = $_REQUEST['device'];
// IE hack - it buffers and won't display unless 256b
print(str_repeat(" ", 300) . "\n");
echo "Searching for BlackBerry OS releases for:<br>\n".
"Device: <b>$rimDevice</b><br>\n".
"Network: <b>$rimNetwork</b><br>\n".
"Region: <b>$rimRegion</b><br>\n".
"<br>\n";
flush();
echo "Downloading carrier list from BlackBerryFAQ... ";
flush();
$carriers = getPage($BBF_LIST);
echo "<b>done</b>.<br>\n";
chatty("\n<br>&nbsp;&nbsp;URL:".$BBF_LIST."<br>\n");
$tx = new tableExtractor;
$tx->source = file_get_contents($BBF_LIST);
$tx->anchor = $BBF_ANCHOR;
$tx->anchorWithin = false;
$carrierArray = $tx->extractTable();
echo "<b>done</b>.<br><br>\n";
flush();
echo "Parsing carrier list for links... ";
flush();
chatty("<br>\n");
reset($carrierArray);
$link_array = array();
$cnt_array = explode("\n", $carriers);
foreach ($cnt_array as $line) {
if (stristr($line, "www.blackberry.com")) {
// http://www.the-art-of-web.com/php/parse-links/
$regexp = "<a\s[^>]*href=(\"??)([^\" >]*?)\\1[^>]*>(.*)<\/a>";
if (preg_match_all("/$regexp/siU", $line, $matches, PREG_SET_ORDER)) {
foreach ($matches as $match) {
# $match[2] = link address
# $match[3] = link text
$link_array[$match[2]] = $match[3];
}
}
}
}
$reject_array = array();
foreach($carrierArray as $carrier) {
if (($carrier['Network'] == $rimNetwork) ||
($rimNetwork == "All")) {
if (($carrier['Region'] == $rimRegion) ||
($rimRegion == "All")) {
if (stristr($carrier['Carrier'], "www.blackberry.com")) {
// http://www.the-art-of-web.com/php/parse-links/
$regexp = "<a\s[^>]*href=(\"??)([^\" >]*?)\\1[^>]*>(.*)<\/a>";
if (preg_match_all("/$regexp/siU", $carrier['Carrier'], $matches,
PREG_SET_ORDER)) {
foreach ($matches as $match) {
# $match[2] = link address
# $match[3] = link text
chatty("\n&nbsp;&nbsp;<b>Adding:</b> " . $match[3] .
" (Network: " . $rimNetwork . ", Region: " .
$rimRegion . ")<br>\n");
$link_array[$match[2]] = $match[3];
} // foreach link
} // rexexp match
} else {
chatty("\n&nbsp;&nbsp;<b>Rejecting:</b> " . $carrier['Carrier'] .
" (not hosted on RIM server)<br>\n");
$reject_array[] = $carrier['Carrier'];
} // blackberry.com match
} // region match
} // network match
} // foreach carrier
echo "<b>done</b>.<br><br>\n";
flush();
echo "Searching for OS releases for the BlackBerry <b>$device</b><br><br>\n";
flush();
// output the list of sites to check by hand
if (count($reject_array) > 0) {
echo "These downloads are not hosted by RIM, check manually:<br>\n";
flush();
reset($reject_array);
foreach($reject_array as $reject) {
echo "&nbsp;&nbsp;" . $reject . "<br>\n";
}
echo "<br>\n";
}
reset($link_array);
while (list($link, $title) = each($link_array)) {
echo "Checking <a href=\"$link\"><b>$title</b></a>...<br>\n";
@ -131,7 +196,7 @@ if (isset($_REQUEST['device']) && is_numeric($_REQUEST['device'])) {
$code = "";
foreach ($rim_array as $line) {
// get the value
if (stristr($line, $device)) {
if (stristr($line, $rimDevice)) {
$regexp = "<option\svalue=\"([^\"]*)\">(.*)<\/option>";
if (preg_match("/$regexp/siU", $line, $match)) {
$got_v = 1;
@ -148,9 +213,10 @@ if (isset($_REQUEST['device']) && is_numeric($_REQUEST['device'])) {
}
}
if ($got_v && $got_c) {
echo "&nbsp;&nbsp;Found one, retrieving product page...<br>\n";
flush();
chatty("&nbsp;&nbsp;Found one, retrieving product page...<br>\n");
$postvars = "productName=$value&code=$code";
$os_url = $RIM_DLURL . ", POST, " . $postvars;
chatty("&nbsp;&nbsp;URL Data: ".$os_url."<br>\n");
$os_page = getPage($RIM_DLURL, "POST", $postvars);
$os_array = explode("\n", $os_page);
foreach ($os_array as $osline) {
@ -168,19 +234,47 @@ if (isset($_REQUEST['device']) && is_numeric($_REQUEST['device'])) {
} else {
?>
Enter the model number ONLY of the device.<br>
(7290, 8100, 8320, 8800, etc.)<br>
<form name="deviceselect" method="GET" action="<?php $_PHP_SELF; ?>">
<br>
<input type="text" size="10" name="device">
<input type="submit" value="Search Carriers">
<form name="deviceselect" method="POST" action="<?php echo $_SERVER['PHP_SELF']; ?>">
<b>Device:</b>&nbsp;
<input type="text" size="15" name="BB_DEVICE">
<br>
Enter the model number ONLY of the device.<br>
(7290, 8100, 8320, 8703, 8800, etc.)
<br><br>
<b>Verbose Output:</b>
<input type="checkbox" name="BB_VERBOSE" value="true">
<br><br>
<b>Network:</b>
<br>
<select name="BB_NETWORK">
<option>All</option>
<option>CDMA</option>
<option>GSM</option>
<option>iDEN</option>
<option>Mobitex</option>
</select>
<br><br>
<b>Region:</b>
<br>
<select name="BB_REGION">
<option>All</option>
<option>Africa</option>
<option>Asia Pacific</option>
<option>Europe</option>
<option>Latin America / South America</option>
<option>Middle East</option>
<option>North America</option>
</select>
<br><br>
<input type="submit" value="Search Carriers">
</form>
<br>
<b>Note</b>: This tool will only search carriers who<br>
host their downloads on www.blackberry.com<br>
<?php
} // device
} // BB_DEVICE
curl_close($curl_handle);
?>

54
debugbbfarray.php Normal file
View file

@ -0,0 +1,54 @@
<html>
<head><title>BBOS Carrier Array Check</title></head>
<body>
<?php
/*
bboschecker 0.2 - check for new BB device OS releases
Copyright (C) 2008 rivviepop (blackberryforums.com)
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
http://www.gnu.org/licenses/gpl-2.0.txt
*/
// this script is used to just debug the BlackBerryFAQ OS table, in case
// the HTML formatting or anchor text has changed. Mmmm, debug - it's what's
// for dinner.
// This class right here is worth it's weight in gold - give the author
// some kudos.
//
// http://jacksleight.com/blog/2008/01/14/really-shiny/scripts/table-extractor.txt
include 'tableExtractor.class.php';
// BBFAQ URL
$BBF_LIST = "http://blackberryfaq.com/index.php/BlackBerry_Operating_System_Downloads";
$tx = new tableExtractor;
$tx->source = file_get_contents($BBF_LIST);
// hopefully nobody will change this text right above the table
$tx->anchor = 'desktop and device software download page';
$tx->anchorWithin = false;
$tableArray = $tx->extractTable();
print("<pre>\n");
print_r($tableArray);
print("</pre>\n");
?>
</body>
</html>

383
tableExtractor.class.php Normal file
View file

@ -0,0 +1,383 @@
<?php
/*----------------------------------------------------------------------
Table Extractor
===============
Table extractor is a php class that can extract almost any table
from any html document/page, and then convert that html table into
a php array.
Version 1.3
Compatibility: PHP 4.4.1 +
Copyright Jack Sleight - www.reallyshiny.com
This script is licensed under the Creative Commons License.
----------------------------------------------------------------------*/
class tableExtractor {
var $source = NULL;
var $anchor = NULL;
var $anchorWithin = false;
var $headerRow = true;
var $startRow = 1;
var $maxRows = 0;
var $startCol = 1;
var $maxCols = 0;
var $stripTags = false;
var $extraCols = array();
var $rowCount = 0;
var $dropRows = NULL;
var $cleanHTML = NULL;
var $rawArray = NULL;
var $finalArray = NULL;
/*--------------------------------------------------
--------------------------------------------------*/
function extractTable() {
$this->cleanHTML();
$this->prepareArray();
return $this->createArray();
}
/*--------------------------------------------------
--------------------------------------------------*/
function cleanHTML() {
// php 4 compatibility functions
if(!function_exists('stripos')) {
function stripos($haystack,$needle,$offset = 0) {
return(strpos(strtolower($haystack),strtolower($needle),$offset));
}
}
// find unique string that appears before the table you want to extract
if ($this->anchorWithin) {
/*------------------------------------------------------------
With thanks to Khary Sharp for suggesting and writing
the anchor within functionality.
------------------------------------------------------------*/
$anchorPos = stripos($this->source, $this->anchor) + strlen($this->anchor);
$sourceSnippet = strrev(substr($this->source, 0, $anchorPos));
$tablePos = stripos($sourceSnippet, strrev(("<table"))) + 6;
$startSearch = strlen($sourceSnippet) - $tablePos;
}
else {
$startSearch = stripos($this->source, $this->anchor);
}
// extract table
$startTable = stripos($this->source, '<table', $startSearch);
$endTable = stripos($this->source, '</table>', $startTable) + 8;
$table = substr($this->source, $startTable, $endTable - $startTable);
if(!function_exists('lcase_tags')) {
function lcase_tags($input) {
return strtolower($input[0]);
}
}
// lowercase all table related tags
$table = preg_replace_callback('/<(\/?)(table|tr|th|td)/is', 'lcase_tags', $table);
// remove all thead and tbody tags
$table = preg_replace('/<\/?(thead|tbody).*?>/is', '', $table);
// replace th tags with td tags
$table = preg_replace('/<(\/?)th(.*?)>/is', '<$1td$2>', $table);
// clean string
$table = trim($table);
$table = str_replace("\r\n", "", $table);
$this->cleanHTML = $table;
}
/*--------------------------------------------------
--------------------------------------------------*/
function prepareArray() {
// split table into individual elements
$pattern = '/(<\/?(?:tr|td).*?>)/is';
$table = preg_split($pattern, $this->cleanHTML, -1, PREG_SPLIT_DELIM_CAPTURE);
// define array for new table
$tableCleaned = array();
// define variables for looping through table
$rowCount = 0;
$colCount = 1;
$trOpen = false;
$tdOpen = false;
// loop through table
foreach($table as $item) {
// trim item
$item = str_replace('&nbsp;', '', $item);
$item = trim($item);
// save the item
$itemUnedited = $item;
// clean if tag
$item = preg_replace('/<(\/?)(table|tr|td).*?>/is', '<$1$2>', $item);
// pick item type
switch ($item) {
case '<tr>':
// start a new row
$rowCount++;
$colCount = 1;
$trOpen = true;
break;
case '<td>':
// save the td tag for later use
$tdTag = $itemUnedited;
$tdOpen = true;
break;
case '</td>':
$tdOpen = false;
break;
case '</tr>':
$trOpen = false;
break;
default :
// if a TD tag is open
if($tdOpen) {
// check if td tag contained colspan
if(preg_match('/<td [^>]*colspan\s*=\s*(?:\'|")?\s*([0-9]+)[^>]*>/is', $tdTag, $matches))
$colspan = $matches[1];
else
$colspan = 1;
// check if td tag contained rowspan
if(preg_match('/<td [^>]*rowspan\s*=\s*(?:\'|")?\s*([0-9]+)[^>]*>/is', $tdTag, $matches))
$rowspan = $matches[1];
else
$rowspan = 0;
// loop over the colspans
for($c = 0; $c < $colspan; $c++) {
// if the item data has not already been defined by a rowspan loop, set it
if(!isset($tableCleaned[$rowCount][$colCount]))
$tableCleaned[$rowCount][$colCount] = $item;
else
$tableCleaned[$rowCount][$colCount + 1] = $item;
// create new rowCount variable for looping through rowspans
$futureRows = $rowCount;
// loop through row spans
for($r = 1; $r < $rowspan; $r++) {
$futureRows++;
if($colspan > 1)
$tableCleaned[$futureRows][$colCount + 1] = $item;
else
$tableCleaned[$futureRows][$colCount] = $item;
}
// increase column count
$colCount++;
}
// sort the row array by the column keys (as inserting rowspans screws up the order)
ksort($tableCleaned[$rowCount]);
}
break;
}
}
// set row count
if($this->headerRow)
$this->rowCount = count($tableCleaned) - 1;
else
$this->rowCount = count($tableCleaned);
$this->rawArray = $tableCleaned;
}
/*--------------------------------------------------
--------------------------------------------------*/
function createArray() {
// define array to store table data
$tableData = array();
// get column headers
if($this->headerRow) {
// trim string
$row = $this->rawArray[$this->headerRow];
// set column names array
$columnNames = array();
$uniqueNames = array();
// loop over column names
$colCount = 0;
foreach($row as $cell) {
$colCount++;
$cell = strip_tags($cell);
$cell = trim($cell);
// save name if there is one, otherwise save index
if($cell) {
if(isset($uniqueNames[$cell])) {
$uniqueNames[$cell]++;
$cell .= ' ('.($uniqueNames[$cell] + 1).')';
}
else {
$uniqueNames[$cell] = 0;
}
$columnNames[$colCount] = $cell;
}
else
$columnNames[$colCount] = $colCount;
}
// remove the headers row from the table
unset($this->rawArray[$this->headerRow]);
}
// remove rows to drop
foreach(explode(',', $this->dropRows) as $key => $value) {
unset($this->rawArray[$value]);
}
// set the end row
if($this->maxRows)
$endRow = $this->startRow + $this->maxRows - 1;
else
$endRow = count($this->rawArray);
// loop over row array
$rowCount = 0;
$newRowCount = 0;
foreach($this->rawArray as $row) {
$rowCount++;
// if the row was requested then add it
if($rowCount >= $this->startRow && $rowCount <= $endRow) {
$newRowCount++;
// create new array to store data
$tableData[$newRowCount] = array();
//$tableData[$newRowCount]['origRow'] = $rowCount;
//$tableData[$newRowCount]['data'] = array();
$tableData[$newRowCount] = array();
// set the end column
if($this->maxCols)
$endCol = $this->startCol + $this->maxCols - 1;
else
$endCol = count($row);
// loop over cell array
$colCount = 0;
$newColCount = 0;
foreach($row as $cell) {
$colCount++;
// if the column was requested then add it
if($colCount >= $this->startCol && $colCount <= $endCol) {
$newColCount++;
if($this->extraCols) {
foreach($this->extraCols as $extraColumn) {
if($extraColumn['column'] == $colCount) {
if(preg_match($extraColumn['regex'], $cell, $matches)) {
if(is_array($extraColumn['names'])) {
$this->extraColsCount = 0;
foreach($extraColumn['names'] as $extraColumnSub) {
$this->extraColsCount++;
$tableData[$newRowCount][$extraColumnSub] = $matches[$this->extraColsCount];
}
} else {
$tableData[$newRowCount][$extraColumn['names']] = $matches[1];
}
} else {
$this->extraColsCount = 0;
if(is_array($extraColumn['names'])) {
$this->extraColsCount = 0;
foreach($extraColumn['names'] as $extraColumnSub) {
$this->extraColsCount++;
$tableData[$newRowCount][$extraColumnSub] = '';
}
} else {
$tableData[$newRowCount][$extraColumn['names']] = '';
}
}
}
}
}
if($this->stripTags)
$cell = strip_tags($cell);
// set the column key as the column number
$colKey = $newColCount;
// if there is a table header, use the column name as the key
if($this->headerRow)
if(isset($columnNames[$colCount]))
$colKey = $columnNames[$colCount];
// add the data to the array
//$tableData[$newRowCount]['data'][$colKey] = $cell;
$tableData[$newRowCount][$colKey] = $cell;
}
}
}
}
$this->finalArray = $tableData;
return $tableData;
}
}
?>