article.php 3.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. <?php
  2. require_once('vendor/autoload.php');
  3. $article_url = "";
  4. $article_html = "";
  5. $error_text = "";
  6. $loc = "US";
  7. if( isset( $_GET['loc'] ) ) {
  8. $loc = strtoupper($_GET["loc"]);
  9. }
  10. if( isset( $_GET['a'] ) ) {
  11. $article_url = $_GET["a"];
  12. } else {
  13. echo "What do you think you're doing... >:(";
  14. exit();
  15. }
  16. if (substr( $article_url, 0, 23 ) != "https://news.google.com") {
  17. echo("That's not news :(");
  18. die();
  19. }
  20. use andreskrey\Readability\Readability;
  21. use andreskrey\Readability\Configuration;
  22. use andreskrey\Readability\ParseException;
  23. $configuration = new Configuration();
  24. $configuration
  25. ->setArticleByLine(false);
  26. $readability = new Readability($configuration);
  27. if(!$article_html = file_get_contents($article_url)) {
  28. $error_text .= "Failed to get the article :( <br>";
  29. }
  30. try {
  31. $readability->parse($article_html);
  32. $readable_article = strip_tags($readability->getContent(), '<ol><ul><li><br><p><small><font><b><strong><i><em><blockquote><h1><h2><h3><h4><h5><h6>');
  33. $readable_article = str_replace( 'strong>', 'b>', $readable_article ); //change <strong> to <b>
  34. $readable_article = str_replace( 'em>', 'i>', $readable_article ); //change <em> to <i>
  35. $readable_article = clean_str($readable_article);
  36. } catch (ParseException $e) {
  37. $error_text .= 'Sorry! ' . $e->getMessage() . '<br>';
  38. }
  39. //replace chars that old machines probably can't handle
  40. function clean_str($str) {
  41. $str = str_replace( "‘", "'", $str );
  42. $str = str_replace( "’", "'", $str );
  43. $str = str_replace( "“", '"', $str );
  44. $str = str_replace( "”", '"', $str );
  45. $str = str_replace( "–", '-', $str );
  46. return $str;
  47. }
  48. ?>
  49. <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 2.0//EN">
  50. <html>
  51. <head>
  52. <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
  53. <meta name="viewport" content="width=device-width, initial-scale=1.0">
  54. <link rel="stylesheet" type="text/css" href="/main.css">
  55. <title>
  56. <?php echo $readability->getTitle();?>
  57. </title>
  58. </head>
  59. <body>
  60. <small><a href="/index.php?loc=<?php echo $loc ?>">
  61. &lt; Back to
  62. <span class="newsfeed">NewsFeed</span>
  63. <?php echo $loc ?> front page
  64. </a></small>
  65. <h1 class="newsfeed"><?php echo clean_str($readability->getTitle());?></h1>
  66. <p><small><a href="<?php echo $article_url ?>" target="_blank">Original source</a> (on modern site) <?php
  67. $img_num = 0;
  68. $imgline_html = "| Article images:";
  69. foreach ($readability->getImages() as $image_url):
  70. //we can only do png and jpg
  71. if (strpos($image_url, ".jpg") || strpos($image_url, ".jpeg") || strpos($image_url, ".png") === true) {
  72. $img_num++;
  73. $imgline_html .= " <a href='image.php?loc=" . $loc . "&i=" . $image_url . "'>[$img_num]</a> ";
  74. }
  75. endforeach;
  76. if($img_num>0) {
  77. echo $imgline_html ;
  78. }
  79. ?></small></p>
  80. <?php if($error_text) { echo "<p><font color='red'>" . $error_text . "</font></p>"; } ?>
  81. <p><?php echo $readable_article;?></font></p>
  82. <small><a href="/index.php?loc=<?php echo $loc ?>">< Back to <span class="brand">NewsFeed</span> <?php echo $loc ?> front page</a></small>
  83. </body>
  84. </html>