| <?php |
| |
| require_once "/home/data/httpd/eclipse-php-classes/system/dbconnection_dashboard_rw.class.php"; |
| require_once($_SERVER['DOCUMENT_ROOT'] . "/projects/stats/news.class.php"); |
| require_once($_SERVER['DOCUMENT_ROOT'] . "/projects/stats/post.class.php"); |
| require_once($_SERVER['DOCUMENT_ROOT'] . "/projects/stats/config.php"); |
| require_once($_SERVER['DOCUMENT_ROOT'] . "/projects/stats/utils.php"); |
| require_once($_SERVER['DOCUMENT_ROOT'] . "/projects/stats/insiders.class.php"); |
| require_once($_SERVER['DOCUMENT_ROOT'] . "/projects/stats/hostname.php"); |
| require_once($_SERVER['DOCUMENT_ROOT'] . "/projects/common/projects-info.class.php"); |
| |
| echo "<html>\n<body>\n"; |
| echo "<b>Starting at: </b>".date("H:i:s")."<br>\n"; |
| |
| # Establish a connection to the newsgroups server |
| $news = new News(); |
| $news->connect(NEWS_SERVER,NEWS_PORT,NEWS_TIMEOUT); |
| $news->authenticate(NEWS_USER,NEWS_PASS); |
| |
| # Establish a connection to the database server |
| $dbc = new DBConnectionDashboard(); |
| $dbh = $dbc->connect(); |
| |
| |
| echo "<br>\nConnected to DB<br>\n"; |
| |
| # List of projects |
| $projects = New ProjectsInfo(); |
| |
| # Group information |
| $ginfo = new GInfo(); // Part of news.class.php |
| |
| # Insider object |
| $ins = new Insider(); |
| |
| $mysql_date = date('Y-m-d'); |
| |
| # How many days ago do we want stats for |
| $days = explode(",",DAYS); |
| $num_days = count($days); |
| |
| if (ob_get_level() == 0){ |
| ob_start(); |
| } |
| |
| # For every project available |
| $project_count = $projects->hasProjects(); |
| echo "Total number of projects to process: ".$project_count."<br>\n"; |
| for($pindex=0;$pindex<$project_count;$pindex++){ |
| $project = $projects->getProject($pindex); |
| if ($project->exclude_from_dashboard()) // Don't process projects with |
| continue; // this XML tag |
| flush_print("<b>Processing project: ".$project->getName()."</b>"); |
| |
| # Initialize all variables for this project |
| $text = ""; # New log for this group |
| $num_valid_newsgroups = -1; |
| $all_messages = array(); |
| $all_cachedposts = array(); |
| $messages = null; |
| $cachedposts = null; |
| $two_month_date = getDateBefore(DAYS_IN_TWO_MONTHS); |
| # Does this project have newsgroups? If so cache the data for all newsgroups |
| # for the last two months if available |
| if ($project->hasNewsgroups() > 0){ # At least one to process |
| for($nindex=0;$nindex<$project->hasNewsgroups();$nindex++){ |
| $newsgroup = $project->getNewsgroup($nindex); |
| flush_print(" Getting information for newsgroup $newsgroup<br>\n"); |
| |
| # Select group and get info |
| $ginfo = $news->selectGroup($newsgroup); |
| if ($ginfo->getNewsgroupName() == -1){ // Non-existant newsgroup |
| $text .= "Non-existant newsgroup $newsgroup\r\n"; |
| continue; |
| } |
| if ($ginfo->getNumberPosts() == 0){ // No posts in newsgroup |
| $text .= "No posts found for newsgroup $newsgroup\r\n"; |
| continue; |
| } |
| $num_valid_newsgroups++; // At least one valid newsgroup |
| $two_month_post = $news->getFirstArticleFromDate($ginfo->getFirstPost(), |
| $ginfo->getLastPost(), |
| $two_month_date); |
| if ($two_month_post == -1) // No posts from 2 months ago |
| continue; |
| # Cache all message IDs & Timestamps for later search from 2 months ago |
| $messages = $news->cacheMessageID($two_month_post,$ginfo->getLastPost()); |
| $all_messages = $all_messages + $messages; // Message IDs are unique (RFC977) |
| $cachedposts = $news->cachePosts($two_month_post,$ginfo->getLastPost()); |
| $all_cachedposts = $all_cachedposts + $cachedposts; |
| } |
| } |
| # Get standard deviation and mean from last two months posts time |
| $extra_time = get_times($all_cachedposts); |
| list($mean,$std) = get_stats($all_cachedposts); |
| |
| for($ptr=0;$ptr<$num_days;$ptr++){ # For every expected date |
| $day = $days[$ptr]; |
| flush_print("Info for the last $day days"); |
| $text .= "\r\n\r\nINFORMATION FOR THE LAST ".$day. " DAYS\r\n\r\n"; |
| |
| if ($num_valid_newsgroups == -1){ // No valid newsgroups |
| echo "No newsgroups found for project ".$project->getName()."<br>\n"; |
| $text = "No newsgroups found for project ".$project->getName()."\r\n"; |
| updateLog($dbh,$project->getProjectID(),$text); |
| updateNewsStats($dbh,$project->getProjectID(),$mysql_date,$day,-1,-1,-1,-1,-1); |
| continue; |
| } |
| |
| # Get the date for $day days ago |
| $date = getDateBefore($day); |
| |
| # Initialize variables |
| $top_level = array(); |
| $unanswered = 0; |
| $avg_time = 0; |
| $total_time = 0; |
| $responded = 0; |
| $insidercount = 0; |
| $all_cposts = array(); // Cached posts from all newsgroups |
| $group_numposts = 0; // Total number of posts for all newsgroups |
| $total = 0; |
| $total_numposts = 0; // Total number of posts since date |
| $gen_num = -1; |
| # Get info for all newsgroups for this project |
| for($nindex=0;$nindex<$project->hasNewsgroups();$nindex++){ |
| $newsgroup = $project->getNewsgroup($nindex); |
| $ginfo = $news->selectGroup($newsgroup); // Select the newsgroup to read |
| if ($ginfo->getNumberPosts() == 0) // No posts in newsgroup |
| continue; |
| // Get first article from date. Start looking from the first |
| // post from two months ago until the most recent. |
| $num = $news->getFirstArticleFromDate($two_month_post, |
| $ginfo->getLastPost(), |
| $date); |
| $group_numposts += $ginfo->getNumberPosts(); |
| // No posts since date |
| if ($num == -1) |
| continue; |
| $gen_num++; // At least one post in group or since date |
| |
| // Get total number of posts since date |
| $numposts = $news->numPostsSinceDate($num,$ginfo->getLastPost()); |
| $text .= "Number of posts for newsgroup $newsgroup: $numposts\r\n"; |
| $total_numposts += $numposts; // Total for all newsgroups |
| |
| // Cache new posts for processing |
| $cposts = $news->cachePosts($num,$ginfo->getLastPost()); |
| $all_cposts = $all_cposts + $cposts; |
| $total += count($cposts); |
| |
| // Insiders vs. Outsiders (Committers?) |
| // $project --> $groups->getProjectFromNewsgroup($tgroup); |
| $insidercount += get_insiders($text,$ins,$cposts,$project); |
| } |
| |
| if ($gen_num == -1 || $group_numposts == 0){ |
| echo "No posts found since date or no posts in group for all newsgroups<br>\n"; |
| $text .= "No posts found since date or no posts in group for all newsgroups\r\n"; |
| updateLog($dbh,$project->getProjectID(),$text); |
| updateNewsStats($dbh,$project->getProjectID(),$mysql_date,$day,0,0,-1,0,0); |
| continue; // Next DAY |
| } |
| |
| // Average number of posts |
| $average = round(($total_numposts / $day),1); |
| $text .= "Average number of posts for all newsgroups: $average\r\n"; |
| |
| // Insider count as a percentage |
| $insidercount /= $total; |
| |
| // Answered vs. unanswered posts |
| list($avg_time,$unanswered) = answered_and_unanswered($text, $all_cposts, |
| $extra_time, |
| $mean, $std); |
| |
| flush_print("DAY ".$day.": ".$total_numposts."|".$average."|".$avg_time."|".$unanswered."|".$insidercount); |
| |
| updateNewsStats($dbh,$project->getProjectID(),$mysql_date,$day, |
| $total_numposts,$average,$avg_time,$unanswered, |
| $insidercount); |
| } |
| if ($num_valid_newsgroups != -1) // Non existant newsgroup still |
| updateLog($dbh,$project->getProjectID(),$text); // update the DB log |
| } |
| |
| echo "<b>Ending at: </b>".date("H:i:s")."<br>\n"; |
| echo "</body>\n</html>\n"; |
| |
| $dbh = null; |
| $dbc = null; |
| $news->disconnect(); |
| $news = null; |
| |
| // function updateLog($dbh,$project,$text) |
| // |
| // Update the DB log for users to retrive how processing was done. |
| // The log will be downloaded by |
| // $_SERVER['DOCUMENT_ROOT'] . "projects/dashboard/index.php" |
| // and |
| // $_SERVER['DOCUMENT_ROOT'] . "projects/dashboard/dashboard_detail.php" |
| function updateLog($dbh,$project,$text){ |
| $text = str_replace("\"","'",$text); // mysql can't handle |
| $query = "SELECT * FROM ".log_table()." WHERE project_id = \"".$project."\""; |
| $result = mysql_query($query,$dbh); |
| if ($result && mysql_num_rows($result)){ |
| $query = "UPDATE ".log_table()." SET news_text = \"".$text."\""; |
| $query .= " WHERE project_id = \"".$project."\""; |
| }else{ |
| $query = "INSERT INTO ".log_table()." (project_id, news_text) "; |
| $query .= "VALUES(\"".$project."\",\"".$text."\")"; |
| } |
| |
| #echo "<pre>".$text."</pre><br>\n"; |
| mysql_query($query,$dbh); // or die("Error: ".mysql_error()); |
| } |
| |
| // function updateNewsStats($dbh,$project,$mysql_date,$day,$numposts, |
| // $average,$avg_time,$unanswered,$insidercount) |
| // Update the newsgroups stats in the DB. Perform a query to determine if |
| // there is already information in the table as we only allow one stat per |
| // project per day. Also prevents getting errors if a different stats program |
| // (mail, bugs, articles, etc) created the row. |
| function updateNewsStats($dbh,$project,$mysql_date,$day,$numposts, |
| $average,$avg_time,$unanswered,$insidercount){ |
| $query = "SELECT * FROM ".stats_table()." WHERE project_id = \"".$project; |
| $query .= "\" AND stats_date = \"".$mysql_date."\""; |
| $result = mysql_query($query,$dbh) or die(mysql_errno($dbh).": ".mysql_error($dbh)); |
| if ($result && mysql_num_rows($result)){ |
| updateNews($dbh,$project,$mysql_date,$day,$numposts,$average, |
| $avg_time,$unanswered,$insidercount); |
| }else{ |
| insertNews($dbh,$project,$mysql_date,$day,$numposts,$average, |
| $avg_time,$unanswered,$insidercount); |
| } |
| } |
| |
| // function updateNews($dbh,$project,$mysql_date,$day,$numposts,$average, |
| // $avg_time,$unanswered,$insidercount) |
| // |
| // This will perform an update on an alreay existing row for a project on |
| // the stats table. |
| function updateNews($dbh,$project,$mysql_date,$day,$numposts,$average, |
| $avg_time,$unanswered,$insidercount){ |
| $query = "UPDATE ".stats_table()." SET news_".$day."_answer_average_time = "; |
| $query .= $avg_time.", news_".$day."_unanswered_number = ".$unanswered; |
| $query .= ", news_".$day."_insider_posts = ".$insidercount.", news_".$day; |
| $query .= "_number_posts = ".$numposts.", news_".$day."_average_posts = "; |
| $query .= $average." WHERE project_id = \"".$project."\" AND stats_date = \""; |
| $query .= $mysql_date."\""; |
| |
| mysql_query($query,$dbh); // or die("could not update"); |
| } |
| |
| // function insertNews($dbh,$project,$mysql_date,$day,$numposts,$average, |
| // $avg_time,$unanswered,$insidercount) |
| // |
| // This will perform a single project/row insert on the stats table. |
| function insertNews($dbh,$project,$mysql_date,$day,$numposts,$average, |
| $avg_time,$unanswered,$insidercount){ |
| $query = "INSERT INTO ".stats_table()." (project_id,stats_date,news_".$day; |
| $query .= "_answer_average_time, news_".$day."_unanswered_number, news_"; |
| $query .= $day."_insider_posts, news_".$day."_number_posts, news_".$day; |
| $query .= "_average_posts) VALUES(\"".$project."\",\"".$mysql_date."\","; |
| $query .= $avg_time.",".$unanswered.",".$insidercount.",".$numposts.","; |
| $query .= $average.")"; |
| |
| mysql_query($query,$dbh); // or die("could not insert"); |
| } |
| |
| // function get_insiders(&$text,$ins,$cached_posts,$project) |
| // |
| // Wrapper function to determine if a particular post was made by a |
| // committer to the project (internal). This function should be extended |
| // in the future to obtain the information from the Eclipse Foundation DB. |
| function get_insiders(&$text,$ins,$cached_posts,$project){ |
| $text .= "Project insiders:\r\n\r\n"; |
| $total = count($cached_posts); |
| for($i=0;$i<$total;$i++){ |
| $post = $cached_posts[$i]; |
| if ($ins->isInsider($post->getAuthor(),$project)){ |
| $text .= $post->getAuthor()."\r\n"; |
| $insidercount++; |
| } |
| } |
| $text .= "\r\n"; |
| |
| return $insidercount; |
| } |
| |
| // function get_times($cached_posts) |
| // |
| // Calculate and store in Unix Timestamp format the post times for all |
| // posts passed. |
| function get_times($cached_posts){ |
| $times = array(); |
| |
| $total = count($cached_posts); |
| for($i=0;$i<$total;$i++){ |
| $post = $cached_posts[$i]; |
| $time = getUnixTimestamp($post->getDate(),$post->getTime()); |
| $times[$post->getMessageID()] = $time; |
| } |
| |
| return $times; |
| } |
| |
| // function get_timediffs($cached_posts) |
| // |
| // Calculate the time difference for all posts that have a reply. This |
| // excludes top-level posts (first posts). If a post has multiple replies |
| // it will only use the time from the first reply. |
| function get_timediffs($cached_posts){ |
| $answers = array(); |
| $times = array(); |
| $timediff = array(); |
| $top_level = array(); |
| $k = 0; |
| $unanswered = 0; |
| |
| # Populate data structures |
| $total = count($cached_posts); |
| for($i=0;$i<$total;$i++){ |
| $post = $cached_posts[$i]; |
| if (!isset($answers[$post->getMessageID()])) # First time |
| $answers[$post->getMessageID()] = ""; |
| if ($post->getReference() != ""){ # Not for top-level |
| if (isset($answers[$post->getReference()])) # Only for found posts |
| $answers[$post->getReference()] .= $post->getMessageID().","; |
| }else |
| $top_level[$post->getMessageID()] = 1; |
| $time = getUnixTimestamp($post->getDate(),$post->getTime()); |
| $times[$post->getMessageID()] = $time; |
| } |
| |
| # Get time differences |
| $total = count($answers); |
| $keys = array_keys($answers); |
| for($i=0;$i<$total;$i++){ |
| $key = $keys[$i]; |
| if ($answers[$key] == ""){ # No replies |
| if (isset($top_level[$key])) |
| $unanswered++; |
| continue; |
| } |
| $original_post_time = $times[$key]; |
| $tmp_array = explode(",",$answers[$key]); |
| $tmp_total = count($tmp_array)-1; |
| for($j=0;$j<$tmp_total;$j++) |
| $timediff[$k++] = $times[$tmp_array[$j]] - $original_post_time; |
| } |
| |
| return array($timediff,$unanswered); |
| } |
| |
| // function get_answers(&$text,$cached_posts,$extra_times) |
| // |
| // Determine which posts have answers, whether it is in the original time |
| // window or the extended (2 month) window. It will determine which posts |
| // have answers and which don't as well as their time differences |
| function get_answers(&$text,$cached_posts,$extra_times){ |
| $answers = array(); |
| $times = array(); |
| $timediff = array(); |
| $top_level = array(); |
| $k = 0; |
| $unanswered = 0; |
| $old_unanswered = -1; // Bug # 115497 |
| |
| $text .= "Getting top-level and reply posts\r\n\r\n"; |
| # Populate data structures |
| $total = count($cached_posts); |
| for($i=0;$i<$total;$i++){ |
| $post = $cached_posts[$i]; |
| if (!isset($answers[$post->getMessageID()])) # First time |
| $answers[$post->getMessageID()] = ""; |
| if ($post->getReference() != ""){ # Not for top-level |
| $text .= "Reply post: ".$post->getMessageID()." to post: "; |
| $text .= $post->getReference()."\r\n"; |
| $answers[$post->getReference()] .= $post->getMessageID().","; |
| }else{ |
| $text .= "Top level post: ".$post->getMessageID()."\r\n"; |
| $top_level[$post->getMessageID()] = 1; |
| } |
| $time = getUnixTimestamp($post->getDate(),$post->getTime()); |
| $times[$post->getMessageID()] = $time; |
| } |
| |
| $text .= "\r\nGetting time differences\r\n\r\n"; |
| # Get time differences |
| $total = count($answers); |
| $keys = array_keys($answers); |
| for($i=0;$i<$total;$i++){ |
| $key = $keys[$i]; |
| if ($answers[$key] == ""){ # No replies |
| if (isset($top_level[$key])){ |
| $text .= "Unanswered top level post: ".$key."\r\n"; |
| $unanswered++; |
| if (is_before_today($times[$key])){ |
| $old_unanswered = 0; |
| } |
| } |
| continue; |
| } |
| if (isset($times[$key])){ |
| $ans = substr($answers[$key],0,-1); |
| $text .= "Answer ".$ans." found on main search window\r\n"; |
| $original_post_time = $times[$key]; |
| }else |
| if (isset($extra_times[$key])){ |
| $ans = substr($answers[$key],0,-1); |
| $text .= "Answer ".$ans." found on extended search "; |
| $text .= "window\r\n"; |
| $original_post_time = $extra_times[$key]; |
| }else{ |
| $ans = substr($answers[$key],0,-1); |
| $text .= "Answer ".$ans." for post $key not found on "; |
| $text .= "allowed window\r\n"; |
| continue; # Not found in the extra time window |
| } |
| $tmp_array = explode(",",$answers[$key]); |
| $tmp_total = count($tmp_array)-1; |
| for($j=0;$j<$tmp_total;$j++) |
| $timediff[$k++] = $times[$tmp_array[$j]] - $original_post_time; |
| } |
| $text .= "\r\n"; |
| |
| return array($timediff,$unanswered,$old_unanswered); |
| } |
| |
| // function is_before_today($date) |
| // |
| // Determine if the date passed is older than today. |
| function is_before_today($date){ |
| $today = mktime(0, 0, 0, date("m"), date("d")+1, date("Y")); |
| if (($date + ONE_DAY) < $today) |
| return 1; |
| |
| return 0; |
| } |
| |
| // function get_stats($cached_posts) |
| // |
| // Obtain the mean and standard deviation from the posts cache time difference |
| function get_stats($cached_posts){ |
| $timediffs = array(); |
| $tmp_sum = 0; |
| $mean = 0; |
| $std = 0; |
| |
| list($timediffs,$dummy) = get_timediffs($cached_posts); |
| |
| # Compute mean and std. deviation |
| $total = count($timediffs); |
| for($i=0;$i<$total;$i++) |
| $mean += $timediffs[$i]; |
| if ($total != 0){ |
| $mean /= $total; |
| $mean = round($mean); |
| |
| for($i=0;$i<$total;$i++) |
| $tmp_sum += pow(($timediffs[$i] - $mean),2); |
| $total = ($total != 1)?--$total:$total; |
| $std = round(sqrt($tmp_sum/$total)); |
| } |
| |
| #echo "<br>\nMEAN: ".realDate($mean)." STD: ".realDate($std)."[".$std."]"."<br>\n"; |
| |
| return array($mean,$std); |
| } |
| |
| // function answered_and_unanswered(&$text,$cached_posts,$times,$mean,$std_dev) |
| // |
| // Determine which posts should be counted as answered or unanswered based |
| // on using the mean +/- (TIMES_STD * standard deviation). |
| function answered_and_unanswered(&$text,$cached_posts,$times,$mean,$std_dev){ |
| $timediffs = array(); |
| $average_ttr = 0; |
| $skip_count = 0; |
| $count = 0; |
| $sum = 0; |
| |
| # Get ranges |
| $low = $mean - (TIMES_STD * $std_dev); |
| $low = ($low < 0)?0:$low; |
| $high = $mean + (TIMES_STD * $std_dev); |
| $text .= "Comparison ranges: mean +/- (".TIMES_STD." * standard deviation)"; |
| $text .= "\r\n"; |
| $text .= "low: ".$low."\r\n"; |
| $text .= "high: ".$high."\r\n\r\n"; |
| |
| list($timediffs,$unanswered,$old) = get_answers($text,$cached_posts,$times); |
| $total = count($timediffs); |
| for($i=0;$i<$total;$i++){ |
| if ($timediffs[$i] >= $low && $timediffs[$i] <= $high){ |
| $sum += $timediffs[$i]; |
| $count++; |
| }else{ |
| $skipcount++; |
| } |
| } |
| $text .= $skipcount." articles not within comparison range\r\n\r\n"; |
| |
| if ($count != 0) |
| $average_ttr = round($sum / $count); |
| else // Bug 115497 |
| $average_ttr = $old; |
| |
| return array($average_ttr,$unanswered); |
| } |
| |
| // function flush_pring($text) |
| // |
| // Will force the browser to do a flush print and not wait for more input. |
| function flush_print($text){ |
| echo $text."<br>\n"; |
| ob_flush(); |
| flush(); |
| } |
| |
| ?> |