bboschecker 0.2
This commit is contained in:
parent
aca861840a
commit
1bcaaf7f18
5 changed files with 579 additions and 33 deletions
15
ChangeLog
Normal file
15
ChangeLog
Normal file
|
|
@ -0,0 +1,15 @@
|
||||||
|
Version 0.2 (2008-05-11)
|
||||||
|
===========
|
||||||
|
- Implement tableExtractor.class.php to more smartly parse out the carrier
|
||||||
|
information, allowing for network and region based lookups
|
||||||
|
- Add form elements to support the new network/region functionality
|
||||||
|
- Add user-controlled verbosity setting
|
||||||
|
- Add output of links needed to check manually
|
||||||
|
- Add quick BBFAQ debug script to spit out the table array
|
||||||
|
- Minor bugfix to PHP_SELF coding error
|
||||||
|
- Add CREDITS and ChangeLog files
|
||||||
|
|
||||||
|
Version 0.1 (2007-11-07)
|
||||||
|
===========
|
||||||
|
- Initial release
|
||||||
|
|
||||||
2
README
2
README
|
|
@ -1,5 +1,5 @@
|
||||||
bboschecker - check for new BB device OS releases
|
bboschecker - check for new BB device OS releases
|
||||||
Copyright (C) 2007 troyengel
|
Copyright (C) 2008 troyengel
|
||||||
|
|
||||||
About
|
About
|
||||||
=====
|
=====
|
||||||
|
|
|
||||||
158
bboschecker.php
158
bboschecker.php
|
|
@ -1,7 +1,7 @@
|
||||||
<?php
|
<?php
|
||||||
/*
|
/*
|
||||||
bboschecker 0.1 - check for new BB device OS releases
|
bboschecker 0.2 - check for new BB device OS releases
|
||||||
Copyright (C) 2007 troyengel
|
Copyright (C) 2008 troyengel
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
|
@ -16,9 +16,19 @@
|
||||||
http://www.gnu.org/licenses/gpl-2.0.txt
|
http://www.gnu.org/licenses/gpl-2.0.txt
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
// This class right here is worth it's weight in gold - give the author
|
||||||
|
// some kudos. See the debug script for basic usage.
|
||||||
|
//
|
||||||
|
// http://jacksleight.com/blog/2008/01/14/really-shiny/scripts/table-extractor.txt
|
||||||
|
include 'tableExtractor.class.php';
|
||||||
|
|
||||||
// where is the list of carriers?
|
// where is the list of carriers?
|
||||||
$BBF_LIST = "http://blackberryfaq.com/index.php/BlackBerry_Operating_System_Downloads";
|
$BBF_LIST = "http://blackberryfaq.com/index.php/BlackBerry_Operating_System_Downloads";
|
||||||
|
|
||||||
|
// what text immediately preceeds our target table? (not IN the table!)
|
||||||
|
// hopefully nobody changes this, but hey - it's the interweb tubes
|
||||||
|
$BBF_ANCHOR = 'desktop and device software download page';
|
||||||
|
|
||||||
// we will delay this many seconds when retrieving pages from the
|
// we will delay this many seconds when retrieving pages from the
|
||||||
// RIM server, we don't want to be a bad netizen.
|
// RIM server, we don't want to be a bad netizen.
|
||||||
$RIM_SLEEP = 1;
|
$RIM_SLEEP = 1;
|
||||||
|
|
@ -31,6 +41,9 @@
|
||||||
|
|
||||||
// where to store the session cookiejar
|
// where to store the session cookiejar
|
||||||
$SCRIPT_CJ = "/tmp/bbos_cookies";
|
$SCRIPT_CJ = "/tmp/bbos_cookies";
|
||||||
|
|
||||||
|
// default verbosity, can be changed by user
|
||||||
|
$VERBOSE = false;
|
||||||
?>
|
?>
|
||||||
|
|
||||||
<html>
|
<html>
|
||||||
|
|
@ -54,6 +67,14 @@ curl_setopt($curl_handle, CURLOPT_RETURNTRANSFER, TRUE);
|
||||||
curl_setopt($curl_handle, CURLOPT_COOKIEFILE, $SCRIPT_CJ);
|
curl_setopt($curl_handle, CURLOPT_COOKIEFILE, $SCRIPT_CJ);
|
||||||
curl_setopt($curl_handle, CURLOPT_COOKIEJAR, $SCRIPT_CJ);
|
curl_setopt($curl_handle, CURLOPT_COOKIEJAR, $SCRIPT_CJ);
|
||||||
|
|
||||||
|
function chatty($text) {
|
||||||
|
global $VERBOSE;
|
||||||
|
if ($VERBOSE) {
|
||||||
|
echo $text;
|
||||||
|
flush();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
function getPage($url, $method="GET", $postfields="") {
|
function getPage($url, $method="GET", $postfields="") {
|
||||||
global $curl_handle;
|
global $curl_handle;
|
||||||
if (!is_string($url)) {
|
if (!is_string($url)) {
|
||||||
|
|
@ -85,40 +106,84 @@ function getPage($url, $method="GET", $postfields="") {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isset($_REQUEST['device']) && is_numeric($_REQUEST['device'])) {
|
if (isset($_REQUEST['BB_DEVICE']) && is_numeric($_REQUEST['BB_DEVICE'])) {
|
||||||
|
|
||||||
|
global $VERBOSE;
|
||||||
|
$rimDevice = $_REQUEST['BB_DEVICE'];
|
||||||
|
$rimNetwork = $_REQUEST['BB_NETWORK'];
|
||||||
|
$rimRegion = $_REQUEST['BB_REGION'];
|
||||||
|
if (isset($_REQUEST['BB_VERBOSE']) && $_REQUEST['BB_VERBOSE']) {
|
||||||
|
$VERBOSE = true;
|
||||||
|
}
|
||||||
|
|
||||||
$device = $_REQUEST['device'];
|
|
||||||
// IE hack - it buffers and won't display unless 256b
|
// IE hack - it buffers and won't display unless 256b
|
||||||
print(str_repeat(" ", 300) . "\n");
|
print(str_repeat(" ", 300) . "\n");
|
||||||
|
|
||||||
|
echo "Searching for BlackBerry OS releases for:<br>\n".
|
||||||
|
"Device: <b>$rimDevice</b><br>\n".
|
||||||
|
"Network: <b>$rimNetwork</b><br>\n".
|
||||||
|
"Region: <b>$rimRegion</b><br>\n".
|
||||||
|
"<br>\n";
|
||||||
|
flush();
|
||||||
|
|
||||||
echo "Downloading carrier list from BlackBerryFAQ... ";
|
echo "Downloading carrier list from BlackBerryFAQ... ";
|
||||||
flush();
|
flush();
|
||||||
$carriers = getPage($BBF_LIST);
|
chatty("\n<br> URL:".$BBF_LIST."<br>\n");
|
||||||
echo "<b>done</b>.<br>\n";
|
$tx = new tableExtractor;
|
||||||
|
$tx->source = file_get_contents($BBF_LIST);
|
||||||
|
$tx->anchor = $BBF_ANCHOR;
|
||||||
|
$tx->anchorWithin = false;
|
||||||
|
$carrierArray = $tx->extractTable();
|
||||||
|
echo "<b>done</b>.<br><br>\n";
|
||||||
flush();
|
flush();
|
||||||
|
|
||||||
echo "Parsing carrier list for links... ";
|
echo "Parsing carrier list for links... ";
|
||||||
flush();
|
flush();
|
||||||
|
chatty("<br>\n");
|
||||||
|
reset($carrierArray);
|
||||||
$link_array = array();
|
$link_array = array();
|
||||||
$cnt_array = explode("\n", $carriers);
|
$reject_array = array();
|
||||||
foreach ($cnt_array as $line) {
|
foreach($carrierArray as $carrier) {
|
||||||
if (stristr($line, "www.blackberry.com")) {
|
if (($carrier['Network'] == $rimNetwork) ||
|
||||||
// http://www.the-art-of-web.com/php/parse-links/
|
($rimNetwork == "All")) {
|
||||||
$regexp = "<a\s[^>]*href=(\"??)([^\" >]*?)\\1[^>]*>(.*)<\/a>";
|
if (($carrier['Region'] == $rimRegion) ||
|
||||||
if (preg_match_all("/$regexp/siU", $line, $matches, PREG_SET_ORDER)) {
|
($rimRegion == "All")) {
|
||||||
foreach ($matches as $match) {
|
if (stristr($carrier['Carrier'], "www.blackberry.com")) {
|
||||||
# $match[2] = link address
|
// http://www.the-art-of-web.com/php/parse-links/
|
||||||
# $match[3] = link text
|
$regexp = "<a\s[^>]*href=(\"??)([^\" >]*?)\\1[^>]*>(.*)<\/a>";
|
||||||
$link_array[$match[2]] = $match[3];
|
if (preg_match_all("/$regexp/siU", $carrier['Carrier'], $matches,
|
||||||
}
|
PREG_SET_ORDER)) {
|
||||||
}
|
foreach ($matches as $match) {
|
||||||
}
|
# $match[2] = link address
|
||||||
}
|
# $match[3] = link text
|
||||||
|
chatty("\n <b>Adding:</b> " . $match[3] .
|
||||||
|
" (Network: " . $rimNetwork . ", Region: " .
|
||||||
|
$rimRegion . ")<br>\n");
|
||||||
|
$link_array[$match[2]] = $match[3];
|
||||||
|
} // foreach link
|
||||||
|
} // rexexp match
|
||||||
|
} else {
|
||||||
|
chatty("\n <b>Rejecting:</b> " . $carrier['Carrier'] .
|
||||||
|
" (not hosted on RIM server)<br>\n");
|
||||||
|
$reject_array[] = $carrier['Carrier'];
|
||||||
|
} // blackberry.com match
|
||||||
|
} // region match
|
||||||
|
} // network match
|
||||||
|
} // foreach carrier
|
||||||
echo "<b>done</b>.<br><br>\n";
|
echo "<b>done</b>.<br><br>\n";
|
||||||
flush();
|
flush();
|
||||||
|
|
||||||
echo "Searching for OS releases for the BlackBerry <b>$device</b><br><br>\n";
|
// output the list of sites to check by hand
|
||||||
flush();
|
if (count($reject_array) > 0) {
|
||||||
|
echo "These downloads are not hosted by RIM, check manually:<br>\n";
|
||||||
|
flush();
|
||||||
|
reset($reject_array);
|
||||||
|
foreach($reject_array as $reject) {
|
||||||
|
echo " " . $reject . "<br>\n";
|
||||||
|
}
|
||||||
|
echo "<br>\n";
|
||||||
|
}
|
||||||
|
|
||||||
reset($link_array);
|
reset($link_array);
|
||||||
while (list($link, $title) = each($link_array)) {
|
while (list($link, $title) = each($link_array)) {
|
||||||
echo "Checking <a href=\"$link\"><b>$title</b></a>...<br>\n";
|
echo "Checking <a href=\"$link\"><b>$title</b></a>...<br>\n";
|
||||||
|
|
@ -131,7 +196,7 @@ if (isset($_REQUEST['device']) && is_numeric($_REQUEST['device'])) {
|
||||||
$code = "";
|
$code = "";
|
||||||
foreach ($rim_array as $line) {
|
foreach ($rim_array as $line) {
|
||||||
// get the value
|
// get the value
|
||||||
if (stristr($line, $device)) {
|
if (stristr($line, $rimDevice)) {
|
||||||
$regexp = "<option\svalue=\"([^\"]*)\">(.*)<\/option>";
|
$regexp = "<option\svalue=\"([^\"]*)\">(.*)<\/option>";
|
||||||
if (preg_match("/$regexp/siU", $line, $match)) {
|
if (preg_match("/$regexp/siU", $line, $match)) {
|
||||||
$got_v = 1;
|
$got_v = 1;
|
||||||
|
|
@ -148,9 +213,10 @@ if (isset($_REQUEST['device']) && is_numeric($_REQUEST['device'])) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ($got_v && $got_c) {
|
if ($got_v && $got_c) {
|
||||||
echo " Found one, retrieving product page...<br>\n";
|
chatty(" Found one, retrieving product page...<br>\n");
|
||||||
flush();
|
|
||||||
$postvars = "productName=$value&code=$code";
|
$postvars = "productName=$value&code=$code";
|
||||||
|
$os_url = $RIM_DLURL . ", POST, " . $postvars;
|
||||||
|
chatty(" URL Data: ".$os_url."<br>\n");
|
||||||
$os_page = getPage($RIM_DLURL, "POST", $postvars);
|
$os_page = getPage($RIM_DLURL, "POST", $postvars);
|
||||||
$os_array = explode("\n", $os_page);
|
$os_array = explode("\n", $os_page);
|
||||||
foreach ($os_array as $osline) {
|
foreach ($os_array as $osline) {
|
||||||
|
|
@ -168,19 +234,47 @@ if (isset($_REQUEST['device']) && is_numeric($_REQUEST['device'])) {
|
||||||
} else {
|
} else {
|
||||||
?>
|
?>
|
||||||
|
|
||||||
Enter the model number ONLY of the device.<br>
|
<form name="deviceselect" method="POST" action="<?php echo $_SERVER['PHP_SELF']; ?>">
|
||||||
(7290, 8100, 8320, 8800, etc.)<br>
|
<b>Device:</b>
|
||||||
<form name="deviceselect" method="GET" action="<?php $_PHP_SELF; ?>">
|
<input type="text" size="15" name="BB_DEVICE">
|
||||||
<br>
|
<br>
|
||||||
<input type="text" size="10" name="device">
|
Enter the model number ONLY of the device.<br>
|
||||||
<input type="submit" value="Search Carriers">
|
(7290, 8100, 8320, 8703, 8800, etc.)
|
||||||
|
<br><br>
|
||||||
|
<b>Verbose Output:</b>
|
||||||
|
<input type="checkbox" name="BB_VERBOSE" value="true">
|
||||||
|
<br><br>
|
||||||
|
<b>Network:</b>
|
||||||
|
<br>
|
||||||
|
<select name="BB_NETWORK">
|
||||||
|
<option>All</option>
|
||||||
|
<option>CDMA</option>
|
||||||
|
<option>GSM</option>
|
||||||
|
<option>iDEN</option>
|
||||||
|
<option>Mobitex</option>
|
||||||
|
</select>
|
||||||
|
<br><br>
|
||||||
|
<b>Region:</b>
|
||||||
|
<br>
|
||||||
|
<select name="BB_REGION">
|
||||||
|
<option>All</option>
|
||||||
|
<option>Africa</option>
|
||||||
|
<option>Asia Pacific</option>
|
||||||
|
<option>Europe</option>
|
||||||
|
<option>Latin America / South America</option>
|
||||||
|
<option>Middle East</option>
|
||||||
|
<option>North America</option>
|
||||||
|
</select>
|
||||||
|
<br><br>
|
||||||
|
<input type="submit" value="Search Carriers">
|
||||||
</form>
|
</form>
|
||||||
|
|
||||||
|
<br>
|
||||||
<b>Note</b>: This tool will only search carriers who<br>
|
<b>Note</b>: This tool will only search carriers who<br>
|
||||||
host their downloads on www.blackberry.com<br>
|
host their downloads on www.blackberry.com<br>
|
||||||
|
|
||||||
<?php
|
<?php
|
||||||
} // device
|
} // BB_DEVICE
|
||||||
curl_close($curl_handle);
|
curl_close($curl_handle);
|
||||||
?>
|
?>
|
||||||
|
|
||||||
|
|
|
||||||
54
debugbbfarray.php
Normal file
54
debugbbfarray.php
Normal file
|
|
@ -0,0 +1,54 @@
|
||||||
|
<html>
|
||||||
|
<head><title>BBOS Carrier Array Check</title></head>
|
||||||
|
<body>
|
||||||
|
|
||||||
|
<?php
|
||||||
|
|
||||||
|
/*
|
||||||
|
bboschecker 0.2 - check for new BB device OS releases
|
||||||
|
Copyright (C) 2008 rivviepop (blackberryforums.com)
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
http://www.gnu.org/licenses/gpl-2.0.txt
|
||||||
|
*/
|
||||||
|
|
||||||
|
// this script is used to just debug the BlackBerryFAQ OS table, in case
|
||||||
|
// the HTML formatting or anchor text has changed. Mmmm, debug - it's what's
|
||||||
|
// for dinner.
|
||||||
|
|
||||||
|
// This class right here is worth it's weight in gold - give the author
|
||||||
|
// some kudos.
|
||||||
|
//
|
||||||
|
// http://jacksleight.com/blog/2008/01/14/really-shiny/scripts/table-extractor.txt
|
||||||
|
include 'tableExtractor.class.php';
|
||||||
|
|
||||||
|
// BBFAQ URL
|
||||||
|
$BBF_LIST = "http://blackberryfaq.com/index.php/BlackBerry_Operating_System_Downloads";
|
||||||
|
|
||||||
|
$tx = new tableExtractor;
|
||||||
|
$tx->source = file_get_contents($BBF_LIST);
|
||||||
|
|
||||||
|
// hopefully nobody will change this text right above the table
|
||||||
|
$tx->anchor = 'desktop and device software download page';
|
||||||
|
$tx->anchorWithin = false;
|
||||||
|
|
||||||
|
$tableArray = $tx->extractTable();
|
||||||
|
|
||||||
|
print("<pre>\n");
|
||||||
|
print_r($tableArray);
|
||||||
|
print("</pre>\n");
|
||||||
|
|
||||||
|
?>
|
||||||
|
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
|
||||||
383
tableExtractor.class.php
Normal file
383
tableExtractor.class.php
Normal file
|
|
@ -0,0 +1,383 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
/*----------------------------------------------------------------------
|
||||||
|
Table Extractor
|
||||||
|
===============
|
||||||
|
Table extractor is a php class that can extract almost any table
|
||||||
|
from any html document/page, and then convert that html table into
|
||||||
|
a php array.
|
||||||
|
|
||||||
|
Version 1.3
|
||||||
|
Compatibility: PHP 4.4.1 +
|
||||||
|
Copyright Jack Sleight - www.reallyshiny.com
|
||||||
|
This script is licensed under the Creative Commons License.
|
||||||
|
----------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
class tableExtractor {
|
||||||
|
|
||||||
|
var $source = NULL;
|
||||||
|
var $anchor = NULL;
|
||||||
|
var $anchorWithin = false;
|
||||||
|
var $headerRow = true;
|
||||||
|
var $startRow = 1;
|
||||||
|
var $maxRows = 0;
|
||||||
|
var $startCol = 1;
|
||||||
|
var $maxCols = 0;
|
||||||
|
var $stripTags = false;
|
||||||
|
var $extraCols = array();
|
||||||
|
var $rowCount = 0;
|
||||||
|
var $dropRows = NULL;
|
||||||
|
|
||||||
|
var $cleanHTML = NULL;
|
||||||
|
var $rawArray = NULL;
|
||||||
|
var $finalArray = NULL;
|
||||||
|
|
||||||
|
/*--------------------------------------------------
|
||||||
|
|
||||||
|
--------------------------------------------------*/
|
||||||
|
|
||||||
|
function extractTable() {
|
||||||
|
|
||||||
|
$this->cleanHTML();
|
||||||
|
$this->prepareArray();
|
||||||
|
|
||||||
|
return $this->createArray();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/*--------------------------------------------------
|
||||||
|
|
||||||
|
--------------------------------------------------*/
|
||||||
|
|
||||||
|
function cleanHTML() {
|
||||||
|
|
||||||
|
// php 4 compatibility functions
|
||||||
|
if(!function_exists('stripos')) {
|
||||||
|
function stripos($haystack,$needle,$offset = 0) {
|
||||||
|
return(strpos(strtolower($haystack),strtolower($needle),$offset));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// find unique string that appears before the table you want to extract
|
||||||
|
if ($this->anchorWithin) {
|
||||||
|
/*------------------------------------------------------------
|
||||||
|
With thanks to Khary Sharp for suggesting and writing
|
||||||
|
the anchor within functionality.
|
||||||
|
------------------------------------------------------------*/
|
||||||
|
$anchorPos = stripos($this->source, $this->anchor) + strlen($this->anchor);
|
||||||
|
$sourceSnippet = strrev(substr($this->source, 0, $anchorPos));
|
||||||
|
$tablePos = stripos($sourceSnippet, strrev(("<table"))) + 6;
|
||||||
|
$startSearch = strlen($sourceSnippet) - $tablePos;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
$startSearch = stripos($this->source, $this->anchor);
|
||||||
|
}
|
||||||
|
|
||||||
|
// extract table
|
||||||
|
$startTable = stripos($this->source, '<table', $startSearch);
|
||||||
|
$endTable = stripos($this->source, '</table>', $startTable) + 8;
|
||||||
|
$table = substr($this->source, $startTable, $endTable - $startTable);
|
||||||
|
|
||||||
|
if(!function_exists('lcase_tags')) {
|
||||||
|
function lcase_tags($input) {
|
||||||
|
return strtolower($input[0]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// lowercase all table related tags
|
||||||
|
$table = preg_replace_callback('/<(\/?)(table|tr|th|td)/is', 'lcase_tags', $table);
|
||||||
|
|
||||||
|
// remove all thead and tbody tags
|
||||||
|
$table = preg_replace('/<\/?(thead|tbody).*?>/is', '', $table);
|
||||||
|
|
||||||
|
// replace th tags with td tags
|
||||||
|
$table = preg_replace('/<(\/?)th(.*?)>/is', '<$1td$2>', $table);
|
||||||
|
|
||||||
|
// clean string
|
||||||
|
$table = trim($table);
|
||||||
|
$table = str_replace("\r\n", "", $table);
|
||||||
|
|
||||||
|
$this->cleanHTML = $table;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/*--------------------------------------------------
|
||||||
|
|
||||||
|
--------------------------------------------------*/
|
||||||
|
|
||||||
|
function prepareArray() {
|
||||||
|
|
||||||
|
// split table into individual elements
|
||||||
|
$pattern = '/(<\/?(?:tr|td).*?>)/is';
|
||||||
|
$table = preg_split($pattern, $this->cleanHTML, -1, PREG_SPLIT_DELIM_CAPTURE);
|
||||||
|
|
||||||
|
// define array for new table
|
||||||
|
$tableCleaned = array();
|
||||||
|
|
||||||
|
// define variables for looping through table
|
||||||
|
$rowCount = 0;
|
||||||
|
$colCount = 1;
|
||||||
|
$trOpen = false;
|
||||||
|
$tdOpen = false;
|
||||||
|
|
||||||
|
// loop through table
|
||||||
|
foreach($table as $item) {
|
||||||
|
|
||||||
|
// trim item
|
||||||
|
$item = str_replace(' ', '', $item);
|
||||||
|
$item = trim($item);
|
||||||
|
|
||||||
|
// save the item
|
||||||
|
$itemUnedited = $item;
|
||||||
|
|
||||||
|
// clean if tag
|
||||||
|
$item = preg_replace('/<(\/?)(table|tr|td).*?>/is', '<$1$2>', $item);
|
||||||
|
|
||||||
|
// pick item type
|
||||||
|
switch ($item) {
|
||||||
|
|
||||||
|
case '<tr>':
|
||||||
|
// start a new row
|
||||||
|
$rowCount++;
|
||||||
|
$colCount = 1;
|
||||||
|
$trOpen = true;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case '<td>':
|
||||||
|
// save the td tag for later use
|
||||||
|
$tdTag = $itemUnedited;
|
||||||
|
$tdOpen = true;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case '</td>':
|
||||||
|
$tdOpen = false;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case '</tr>':
|
||||||
|
$trOpen = false;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default :
|
||||||
|
|
||||||
|
// if a TD tag is open
|
||||||
|
if($tdOpen) {
|
||||||
|
|
||||||
|
// check if td tag contained colspan
|
||||||
|
if(preg_match('/<td [^>]*colspan\s*=\s*(?:\'|")?\s*([0-9]+)[^>]*>/is', $tdTag, $matches))
|
||||||
|
$colspan = $matches[1];
|
||||||
|
else
|
||||||
|
$colspan = 1;
|
||||||
|
|
||||||
|
// check if td tag contained rowspan
|
||||||
|
if(preg_match('/<td [^>]*rowspan\s*=\s*(?:\'|")?\s*([0-9]+)[^>]*>/is', $tdTag, $matches))
|
||||||
|
$rowspan = $matches[1];
|
||||||
|
else
|
||||||
|
$rowspan = 0;
|
||||||
|
|
||||||
|
// loop over the colspans
|
||||||
|
for($c = 0; $c < $colspan; $c++) {
|
||||||
|
|
||||||
|
// if the item data has not already been defined by a rowspan loop, set it
|
||||||
|
if(!isset($tableCleaned[$rowCount][$colCount]))
|
||||||
|
$tableCleaned[$rowCount][$colCount] = $item;
|
||||||
|
else
|
||||||
|
$tableCleaned[$rowCount][$colCount + 1] = $item;
|
||||||
|
|
||||||
|
// create new rowCount variable for looping through rowspans
|
||||||
|
$futureRows = $rowCount;
|
||||||
|
|
||||||
|
// loop through row spans
|
||||||
|
for($r = 1; $r < $rowspan; $r++) {
|
||||||
|
$futureRows++;
|
||||||
|
if($colspan > 1)
|
||||||
|
$tableCleaned[$futureRows][$colCount + 1] = $item;
|
||||||
|
else
|
||||||
|
$tableCleaned[$futureRows][$colCount] = $item;
|
||||||
|
}
|
||||||
|
|
||||||
|
// increase column count
|
||||||
|
$colCount++;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// sort the row array by the column keys (as inserting rowspans screws up the order)
|
||||||
|
ksort($tableCleaned[$rowCount]);
|
||||||
|
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// set row count
|
||||||
|
if($this->headerRow)
|
||||||
|
$this->rowCount = count($tableCleaned) - 1;
|
||||||
|
else
|
||||||
|
$this->rowCount = count($tableCleaned);
|
||||||
|
|
||||||
|
$this->rawArray = $tableCleaned;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/*--------------------------------------------------
|
||||||
|
|
||||||
|
--------------------------------------------------*/
|
||||||
|
|
||||||
|
function createArray() {
|
||||||
|
|
||||||
|
// define array to store table data
|
||||||
|
$tableData = array();
|
||||||
|
|
||||||
|
// get column headers
|
||||||
|
if($this->headerRow) {
|
||||||
|
|
||||||
|
// trim string
|
||||||
|
$row = $this->rawArray[$this->headerRow];
|
||||||
|
|
||||||
|
// set column names array
|
||||||
|
$columnNames = array();
|
||||||
|
$uniqueNames = array();
|
||||||
|
|
||||||
|
// loop over column names
|
||||||
|
$colCount = 0;
|
||||||
|
foreach($row as $cell) {
|
||||||
|
|
||||||
|
$colCount++;
|
||||||
|
|
||||||
|
$cell = strip_tags($cell);
|
||||||
|
$cell = trim($cell);
|
||||||
|
|
||||||
|
// save name if there is one, otherwise save index
|
||||||
|
if($cell) {
|
||||||
|
|
||||||
|
if(isset($uniqueNames[$cell])) {
|
||||||
|
$uniqueNames[$cell]++;
|
||||||
|
$cell .= ' ('.($uniqueNames[$cell] + 1).')';
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
$uniqueNames[$cell] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
$columnNames[$colCount] = $cell;
|
||||||
|
|
||||||
|
}
|
||||||
|
else
|
||||||
|
$columnNames[$colCount] = $colCount;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// remove the headers row from the table
|
||||||
|
unset($this->rawArray[$this->headerRow]);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// remove rows to drop
|
||||||
|
foreach(explode(',', $this->dropRows) as $key => $value) {
|
||||||
|
unset($this->rawArray[$value]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// set the end row
|
||||||
|
if($this->maxRows)
|
||||||
|
$endRow = $this->startRow + $this->maxRows - 1;
|
||||||
|
else
|
||||||
|
$endRow = count($this->rawArray);
|
||||||
|
|
||||||
|
// loop over row array
|
||||||
|
$rowCount = 0;
|
||||||
|
$newRowCount = 0;
|
||||||
|
foreach($this->rawArray as $row) {
|
||||||
|
|
||||||
|
$rowCount++;
|
||||||
|
|
||||||
|
// if the row was requested then add it
|
||||||
|
if($rowCount >= $this->startRow && $rowCount <= $endRow) {
|
||||||
|
|
||||||
|
$newRowCount++;
|
||||||
|
|
||||||
|
// create new array to store data
|
||||||
|
$tableData[$newRowCount] = array();
|
||||||
|
|
||||||
|
//$tableData[$newRowCount]['origRow'] = $rowCount;
|
||||||
|
//$tableData[$newRowCount]['data'] = array();
|
||||||
|
$tableData[$newRowCount] = array();
|
||||||
|
|
||||||
|
// set the end column
|
||||||
|
if($this->maxCols)
|
||||||
|
$endCol = $this->startCol + $this->maxCols - 1;
|
||||||
|
else
|
||||||
|
$endCol = count($row);
|
||||||
|
|
||||||
|
// loop over cell array
|
||||||
|
$colCount = 0;
|
||||||
|
$newColCount = 0;
|
||||||
|
foreach($row as $cell) {
|
||||||
|
|
||||||
|
$colCount++;
|
||||||
|
|
||||||
|
// if the column was requested then add it
|
||||||
|
if($colCount >= $this->startCol && $colCount <= $endCol) {
|
||||||
|
|
||||||
|
$newColCount++;
|
||||||
|
|
||||||
|
if($this->extraCols) {
|
||||||
|
foreach($this->extraCols as $extraColumn) {
|
||||||
|
if($extraColumn['column'] == $colCount) {
|
||||||
|
if(preg_match($extraColumn['regex'], $cell, $matches)) {
|
||||||
|
if(is_array($extraColumn['names'])) {
|
||||||
|
$this->extraColsCount = 0;
|
||||||
|
foreach($extraColumn['names'] as $extraColumnSub) {
|
||||||
|
$this->extraColsCount++;
|
||||||
|
$tableData[$newRowCount][$extraColumnSub] = $matches[$this->extraColsCount];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
$tableData[$newRowCount][$extraColumn['names']] = $matches[1];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
$this->extraColsCount = 0;
|
||||||
|
if(is_array($extraColumn['names'])) {
|
||||||
|
$this->extraColsCount = 0;
|
||||||
|
foreach($extraColumn['names'] as $extraColumnSub) {
|
||||||
|
$this->extraColsCount++;
|
||||||
|
$tableData[$newRowCount][$extraColumnSub] = '';
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
$tableData[$newRowCount][$extraColumn['names']] = '';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if($this->stripTags)
|
||||||
|
$cell = strip_tags($cell);
|
||||||
|
|
||||||
|
// set the column key as the column number
|
||||||
|
$colKey = $newColCount;
|
||||||
|
|
||||||
|
// if there is a table header, use the column name as the key
|
||||||
|
if($this->headerRow)
|
||||||
|
if(isset($columnNames[$colCount]))
|
||||||
|
$colKey = $columnNames[$colCount];
|
||||||
|
|
||||||
|
// add the data to the array
|
||||||
|
//$tableData[$newRowCount]['data'][$colKey] = $cell;
|
||||||
|
$tableData[$newRowCount][$colKey] = $cell;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->finalArray = $tableData;
|
||||||
|
|
||||||
|
return $tableData;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
||||||
Reference in a new issue