Skip to content
Snippets Groups Projects

New schema plus metadata

2 files
+ 204
499
Compare changes
  • Side-by-side
  • Inline
Files
2
cron.php deleted 100644 → 0
+ 0
499
<?php
// run this script once per day
// set time limit to unlimited, as it can last long to get extensions, including pauses to reduce load on Mozilla servers
set_time_limit(0);
// display all errors (for debug)
ini_set('display_errors', 1);
ini_set('display_startup_errors', 1);
error_reporting(E_ALL);
// connect to DB
$db = new PDO('mysql:host=localhost;dbname=mozzarella', "root", "root");
// update the licenses if new are added
// $licenses = file_get_contents("licenses.json");
$licenses = file_get_contents("https://raw.githubusercontent.com/spdx/license-list-data/master/json/licenses.json");
$licensesArr = json_decode($licenses, true);
foreach($licensesArr["licenses"] as $larr) {
$stmt = $db->prepare("SELECT * FROM license WHERE slug = :slug");
$stmt->bindValue(":slug", $larr["licenseId"]);
$stmt->execute();
$found = $stmt->fetch(PDO::FETCH_ASSOC);
if(!$found) {
$stmt = $db->prepare("INSERT INTO license (slug, url, name, fsfApproved) VALUES (:slug, :url, :name, :fsfApproved)");
$stmt->bindValue(":slug", $larr["licenseId"]);
$stmt->bindValue(":url", $larr["reference"]);
$stmt->bindValue(":name", $larr["name"]);
$stmt->bindValue(":fsfApproved", isset($larr["isFsfLibre"]) && $larr["isFsfLibre"] ? 1 : 0);
$stmt->execute();
}
}
// 09/21/2022 NOTE: the API link passed without "category" parameter returns max 25000 entries (but says ~28000 are available).
// To get all the 28000 extensions, the first run will fetch category per category.
// Then, unless 25000 extensions are added in a day, the API can be called without specifying the category, to limit requests.
// 1. Check if if's the first run (already has 25000+ extensions in database)
$stmt = $db->prepare("SELECT count(*) c FROM extensions");
$stmt->execute();
$count = $stmt->fetch(PDO::FETCH_ASSOC)["c"];
// if($count < 25000)
$firstRun = false;
if($firstRun)
{
// 2. If never runned, loop extensions categories and fetch everything
// the condition has to be improved, but works at the moment
$categories =
[
"alerts-updates",
"appearance",
"bookmarks",
"download-management",
"feeds-news-blogging",
"games-entertainment",
"language-support",
"other",
"photos-music-videos",
"privacy-security",
"search-tools",
"shopping",
"social-communication",
"tabs",
"web-development"
];
// todo prevent doublon (check if ID already inserted), as an extension can be in multiple categories.
foreach($categories as $category)
{
// for each category, loop every page of extensions and add to db
// pause between each fetch to limit load on Mozilla severs (possible ban ?)
$url = "https://addons.mozilla.org/api/v5/addons/search/?app=firefox&sort=created&type=extension&page_size=50&category=".$category;
do
{
$results = json_decode(file_get_contents($url), true);
foreach($results["results"] as $k => $result)
{
$stmt = $db->prepare("SELECT * FROM extensions WHERE original_id = :id");
$stmt->bindValue(":id", $result["id"], PDO::PARAM_INT);
$stmt->execute();
if($result["has_eula"]) {
// add an entry for has eula reason. can have multiple reason.
// this code is temporary
saveRejected("eula", $result["id"], $result["current_version"]["license"]["slug"] ?? null);
}
if($result["has_privacy_policy"]) {
// add an entry for has eula reason. can have multiple reason.
// this code is temporary
saveRejected("pp", $result["id"], $result["current_version"]["license"]["slug"] ?? null);
}
// not found in db, add (prevent doublons, necessary as an extension can have multiple categories)
// avoid extensions that have eula
if(!$stmt->fetch() && !$result["has_eula"] && !$result["has_privacy_policy"])
addToDb($result);
}
if($results["next"])
$url = $results["next"];
// sleep(1);
} while($results["next"]);
// sleep(1);
// die();
}
}
else {
// 3. If already runned, fetch from all categories at once, sorted by newly added, until found an entry in DB
$url = "https://addons.mozilla.org/api/v5/addons/search/?app=firefox&sort=created&type=extension&page_size=50";
// die();
do
{
sleep(1);
$results = json_decode(file_get_contents($url), true);
if($results)
foreach ($results["results"] as $result) {
$stmt = $db->prepare("SELECT * FROM extensions WHERE original_id = :id");
$stmt->bindValue(":id", $result["id"], PDO::PARAM_INT);
$stmt->execute();
$found = $stmt->fetch() <> null;
if(!$found && !$result["has_eula"] && !$result["has_privacy_policy"])
{
addToDb($result);
}
}
$url = $results["next"];
} while($url && !$found);
}
// END 1.
// header('Content-Type: application/json; charset=utf-8');
// cron task to get new extensions (any category, sorting by recently updated)
// this script replaces scraper.php, which runs once only and get all extensions
// die();
// do this while debug
// $db->query("DELETE FROM extensions WHERE 1");
// $db->query("DELETE FROM locale WHERE 1");
function addToDb($d)
{
global $db;
$stmt = $db->prepare("INSERT INTO extensions (download_link, original_id, license, icon_url) VALUES (:download_link, :original_id, :license, :icon_url)");
$license = null;
if(!$d["current_version"]["license"]["is_custom"] && $d["current_version"]["license"]["slug"]) {
// die($d["id"]);
// not a custom license, license should already exist in DB
// search the license id with the slug
$stmt1 = $db->prepare("SELECT id, fsfApproved, slug FROM license WHERE slug = :slug");
$stmt1->bindValue(":slug", $d["current_version"]["license"]["slug"]);
$stmt1->execute();
$l = $stmt1->fetch(PDO::FETCH_ASSOC);
if($l && $l["fsfApproved"]) {
// die("ok");
// license found
$license = $l["id"];
}
else {
// skip this extension, no license or not fsf approved
saveRejected("licenseError", $d["id"], $d["current_version"]["license"]["slug"] ?? null);
return;
}
}
else {
saveRejected("licenseError", $d["id"], $d["current_version"]["license"]["slug"] ?? null);
// no license, skip ?
return;
}
$stmt->bindValue(":download_link", $d["current_version"]["file"]["url"]);
$stmt->bindValue(":original_id", $d["id"]);
if($d["icon_url"]) {
// todo, detect the file extension instead of hardcode png
// uncomment to save the icon
$image = file_get_contents($d["icon_url"]);
if($image) {
$stmt->bindValue(":icon_url", $d["id"].".png");
file_put_contents("icons/".$d["id"].".png", $image);
}
else {
$stmt->bindValue(":icon_url", null);
}
}
$stmt->bindValue(":icon_url", $d["id"].".png");
$stmt->bindValue(":license", $license);
// do not add the extension, while debug
$stmt->execute();
$extId = $db->lastInsertId();
// build an array with locale as key
// description and summary can be null (do not exist in the current locale)
// ex.:
/*
[
"fr" => [
"name" => "téléchargeur youtube",
"description" => "extension pour télécharger des vidéos youtube",
"summary" => "télécharger des vidéos youtube en 1 clic"
]
"en-US" => [
"name" => "youtube downloader",
"description" => NULL,
"summary" => "Download youtube video in 1 click"
]
]
*/
$locales = [];
// extension added, add the names...
foreach($d["name"] as $locale => $name) {
// add the array the the locale (ex. "fr")
if(!isset($locales[$locale])) $locales[$locale] = [
"name" => NULL,
"description" => NULL,
"summary" => NULL,
"isDefaultLocale" => $locale == $d["default_locale"] ? 1: 0
];
$locales[$locale]["name"] = $name;
// print_r($locale);
// find if locale exists, if it does not, add it
// $stmt = $db->prepare("SELECT * FROM locale WHERE locale = :locale");
// $stmt->bindValue(":locale", $locale);
// $stmt->execute();
// $localeDb = $stmt->fetch(PDO::FETCH_ASSOC);
// if(!$localeDb) {
// $stmt = $db->prepare("INSERT INTO locale (locale) VALUES (:locale)");
// $stmt->bindValue(":locale", $locale);
// $stmt->execute();
// $localeId = $db->lastInsertId();
// }
// else
// $localeId = $localeDb["id"];
// $stmt = $db->prepare("INSERT INTO extension_locale_name (extension, locale, name, isDefaultLocale) VALUES (:extension, :locale, :name, :isDefaultLocale)");
// $stmt->bindValue(":extension", $extId, PDO::PARAM_INT);
// $stmt->bindValue(":locale", $localeId, PDO::PARAM_INT);
// $stmt->bindValue(":name", $name);
// // the current locale is the default one
// $stmt->bindValue(":isDefaultLocale", $locale == $d["default_locale"] ? 1: 0);
// $stmt->execute();
}
// ... and descriptions (if exist)
if($d["description"]) {
// description can be null
foreach($d["description"] as $locale => $description) {
// add the array the the locale (ex. "fr")
if(!isset($locales[$locale])) $locales[$locale] = [
"name" => NULL,
"description" => NULL,
"summary" => NULL,
"isDefaultLocale" => $locale == $d["default_locale"] ? 1: 0
];
$locales[$locale]["description"] = $description;
// the locale should be the same as name
// find if locale exists, if it does not, add it
// $stmt = $db->prepare("SELECT * FROM locale WHERE locale = :locale");
// $stmt->bindValue(":locale", $locale);
// $stmt->execute();
// $localeDb = $stmt->fetch(PDO::FETCH_ASSOC);
// if(!$localeDb) {
// $stmt = $db->prepare("INSERT INTO locale (locale) VALUES (:locale)");
// $stmt->bindValue(":locale", $locale);
// $stmt->execute();
// $localeId = $db->lastInsertId();
// }
// else
// $localeId = $localeDb["id"];
// $stmt = $db->prepare("INSERT INTO extension_locale_description (extension, locale, content, isDefaultLocale) VALUES (:extension, :locale, :content, :isDefaultLocale)");
// $stmt->bindValue(":extension", $extId, PDO::PARAM_INT);
// $stmt->bindValue(":locale", $localeId, PDO::PARAM_INT);
// $stmt->bindValue(":content", $description);
// // the current locale is the default one
// $stmt->bindValue(":isDefaultLocale", $locale == $d["default_locale"] ? 1: 0);
// $stmt->execute();
}
}
// add locale summary
foreach($d["summary"] as $locale => $summary) {
// add the array the the locale (ex. "fr")
if(!isset($locales[$locale])) $locales[$locale] = [
"name" => NULL,
"description" => NULL,
"summary" => NULL,
"isDefaultLocale" => $locale == $d["default_locale"] ? 1: 0
];
$locales[$locale]["summary"] = $summary;
// print_r($locale);
// find if locale exists, if it does not, add it
// $stmt = $db->prepare("SELECT * FROM locale WHERE locale = :locale");
// $stmt->bindValue(":locale", $locale);
// $stmt->execute();
// $localeDb = $stmt->fetch(PDO::FETCH_ASSOC);
// if(!$localeDb) {
// $stmt = $db->prepare("INSERT INTO locale (locale) VALUES (:locale)");
// $stmt->bindValue(":locale", $locale);
// $stmt->execute();
// $localeId = $db->lastInsertId();
// }
// else
// $localeId = $localeDb["id"];
// $stmt = $db->prepare("INSERT INTO extension_locale_summary (extension, locale, content, isDefaultLocale) VALUES (:extension, :locale, :content, :isDefaultLocale)");
// $stmt->bindValue(":extension", $extId, PDO::PARAM_INT);
// $stmt->bindValue(":locale", $localeId, PDO::PARAM_INT);
// $stmt->bindValue(":content", $summary);
// // the current locale is the default one
// $stmt->bindValue(":isDefaultLocale", $locale == $d["default_locale"] ? 1: 0);
// $stmt->execute();
}
// print_r($locales);
foreach($locales as $key => $locale) {
// loop the locales, add a line per locale (en-US, fr...), grouping name, description and summary
// check if the locale exists in db, if not, add it
$stmt = $db->prepare("SELECT * FROM locale WHERE locale = :locale");
$stmt->bindValue(":locale", $key);
$stmt->execute();
$localeDb = $stmt->fetch(PDO::FETCH_ASSOC);
if($localeDb) $localeDb = $localeDb["id"];
else {
$stmt = $db->prepare("INSERT INTO locale (locale) VALUES (:locale)");
$stmt->bindValue(":locale", $key);
$stmt->execute();
$localeDb = $db->lastInsertId();
}
// add the line
$stmt = $db->prepare("INSERT INTO extension_locale (extension, locale, name, description, summary, isDefaultLocale)
VALUES (:extension, :locale, :name, :description, :summary, :isDefaultLocale)
");
$stmt->bindValue(":extension", $extId, PDO::PARAM_INT);
$stmt->bindValue(":locale", $localeDb, PDO::PARAM_INT);
$stmt->bindValue(":name", $locale["name"]);
$stmt->bindValue(":description", $locale["description"]);
$stmt->bindValue(":summary", $locale["summary"]);
$stmt->bindValue(":isDefaultLocale", $locale["isDefaultLocale"], PDO::PARAM_INT);
$stmt->execute();
}
// stop here for debug
// die();
// return;
// add categories for the extension (can belong to multiple categories)
foreach($d["categories"]["firefox"] as $ckey => $catName)
{
$stmt = $db->prepare("SELECT * FROM categories WHERE name = :name");
$stmt->bindValue(":name", $catName);
$stmt->execute();
$found = $stmt->fetch(PDO::FETCH_ASSOC);
if(!$found)
{
$stmt = $db->prepare("INSERT INTO categories (name) VALUES (:name)");
$stmt->bindValue(":name", $catName);
$stmt->execute();
$stmt->fetch(PDO::FETCH_ASSOC);
$catId = $db->lastInsertId();
}
else {
$catId = $found["id"];
}
$stmt = $db->prepare("INSERT INTO ext_cat (ext_id, cat_id) VALUES (:ext_id, :cat_id)");
$stmt->bindValue(":ext_id", $extId, PDO::PARAM_INT);
$stmt->bindValue(":cat_id", $catId, PDO::PARAM_INT);
$stmt->execute();
}
// die();
}
function saveRejected($reason, $id, $license) {
global $db;
// save rejected extensions with the reason (has eula, privacy policy, no license, non fsf license)
$stmt = $db->prepare("INSERT INTO rejected (original_id, reason, license) VALUES (:original_id, :reason, :license)");
$stmt->bindValue(":original_id", $id, PDO::PARAM_INT);
$stmt->bindValue(":reason", $reason);
$stmt->bindValue(":license", $license);
$stmt->execute();
}
?>
\ No newline at end of file
Loading