-
-
Notifications
You must be signed in to change notification settings - Fork 48
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Semi-automate identifying matching snaps/flatpaks #31
Comments
Here's a script which
It aggressively caches the data (because fetching six thousand things takes a little while), but it tells you what it's cached, and you can remove that; in particular, it caches all the fetched results in one big file ( Script first: #!/usr/bin/env python3
import requests_cache
import subprocess
import json
import os
import socket
from urllib3.connection import HTTPConnection
from urllib3.connectionpool import HTTPConnectionPool
from requests.adapters import HTTPAdapter
import logging
logging.basicConfig(level='WARNING')
logging.getLogger('requests_cache').setLevel('DEBUG')
# from https://stackoverflow.com/a/59594889
class SnapdConnection(HTTPConnection):
def __init__(self):
super().__init__("localhost")
def connect(self):
self.sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
self.sock.connect("/run/snapd.socket")
class SnapdConnectionPool(HTTPConnectionPool):
def __init__(self):
super().__init__("localhost")
def _new_conn(self):
return SnapdConnection()
class SnapdAdapter(HTTPAdapter):
def get_connection(self, url, proxies=None):
return SnapdConnectionPool()
def make_cache():
cache_obj = lambda: None # haaaaaaaaack to make a singleton obj with props
cache_obj.cache_folder = os.path.join(os.path.dirname(__file__), "cache")
try:
os.makedirs(cache_obj.cache_folder)
except FileExistsError:
pass
cache_obj.flatpak = requests_cache.CachedSession(
cache_name=os.path.join(cache_obj.cache_folder, "flatpak"),
allowable_codes=(200, 404))
cache_obj.snap = requests_cache.CachedSession(
cache_name=os.path.join(cache_obj.cache_folder, "snap"),
allowable_codes=(200, 404))
cache_obj.snap.mount("http://snapd/", SnapdAdapter())
cache_obj.file = lambda x: os.path.join(cache_obj.cache_folder, x)
return cache_obj
def get_flatpak_names(cache):
cache_fn = cache.file("flatpak-names.json")
try:
with open(cache_fn, encoding="utf-8") as fp:
print(f"(using cached flatpak name data: rm {cache_fn} to clear)")
return json.load(fp)
except:
pass
proc = subprocess.run(["flatpak", "search", ".", "--columns=application"],
capture_output=True, encoding="utf-8")
fns = [x.strip() for x in proc.stdout.split("\n")
if x.strip() and "Gtk3theme" not in x and not x.endswith(".Sdk")]
with open(cache_fn, encoding="utf-8", mode="w") as fp:
json.dump(fns, fp, indent=2)
return fns
def get_snap_names(cache):
with open("/var/cache/snapd/names", encoding="utf-8") as fp:
sns = [x.strip() for x in fp.readlines() if x.strip()]
return sns
def populate_flatpak_data(fns, cache):
cache_fn = cache.file("flatpak-data.json")
try:
with open(cache_fn, encoding="utf-8") as fp:
print(f"(using cached flatpak detail data: rm {cache_fn} to clear)")
return json.load(fp)
except:
pass
print(f"Fetching flatpak data from API (which may be cached; rm {cache.flatpak._cache_name} to clear)")
flatpak_data = {}
count = len(fns)
for (idx, flatpak_name) in enumerate(fns):
if idx % 100 == 0:
print(f" flatpak data {idx}/{count}")
url = f"https://flathub.org/api/v1/apps/{flatpak_name}"
resp = cache.flatpak.get(url)
if resp.status_code == 404:
continue
try:
flatpak_data[flatpak_name] = resp.json()
except:
print("Unexpected flathub data error from {url}, {resp}")
with open(cache_fn, encoding="utf-8", mode="w") as fp:
json.dump(flatpak_data, fp, indent=2)
return flatpak_data
def populate_snap_data(sns, cache):
cache_fn = cache.file("snap-data.json")
try:
with open(cache_fn, encoding="utf-8") as fp:
print(f"(using cached snap detail data: rm {cache_fn} to clear)")
return json.load(fp)
except:
pass
print(f"Fetching snap data from API (which may be cached; rm {cache.flatpak._cache_name} to clear)")
snap_data = {}
count = len(sns)
for idx, snap_name in enumerate(sns):
if idx % 100 == 0:
print(f" snap data {idx}/{count}")
resp = cache.snap.get(f"http://snapd/v2/find?name={snap_name}")
snap_data[snap_name] = resp.json()
with open(cache_fn, encoding="utf-8", mode="w") as fp:
json.dump(snap_data, fp, indent=2)
return snap_data
def get_pairs(flatpaks, snaps):
flatpaks_by_website = [
(v.get("homepageUrl", v.get("bugtrackerUrl")), k)
for (k, v) in flatpaks.items()
]
flatpaks_by_website = dict([x for x in flatpaks_by_website if x[0]])
snaps_by_website = [
(v.get("result", [{}])[0].get("website"), k)
for (k, v) in snaps.items()
if v.get("status-code") == 200
]
snaps_by_website = dict([x for x in snaps_by_website if x[0]])
snap_websites = set(snaps_by_website.keys())
flatpak_websites = set(flatpaks_by_website.keys())
matches = snap_websites.intersection(flatpak_websites)
return sorted([
(snaps_by_website[w], flatpaks_by_website[w])
for w in matches
])
def main():
cache = make_cache()
flatpak_names = get_flatpak_names(cache)
snap_names = get_snap_names(cache)
flatpaks = populate_flatpak_data(flatpak_names, cache)
snaps = populate_snap_data(snap_names, cache)
pairs = get_pairs(flatpaks, snaps)
print("I suggest that the following are snap/flatpak pairs:")
print("\n".join(["{} = {}".format(s, f) for (s, f) in pairs]))
print(f"({len(pairs)} potential matches)")
if __name__ == "__main__":
main() Secondly, the results. This is not a PR against the CSV file because I have not checked most of these, so I don't know whether the decisions it makes are good. But here's the list so someone else can do that.
|
It's kinda time consuming and potentially error-prone to match the various snaps with their equivalent flatpaks. So far I've just been browsing the stores to look for matches, but I think we can do better.
The list of published snaps is easily found on a system running snapd, it's in
/var/cache/snapd/names
. There's (currently) 4671 snaps in that list.The list of flatpaks is easy to get too, with a simple
flatpak search . --columns=application
. There's 2338 in the list. We can filter out some which aren't likely to match withflatpak search . --columns=application | grep -v Gtk3theme | grep -v org.freedesktop.Sdk
which gets us down to 1950 flatpaks.It feels like it might be possible to iterate through the snaps and find flatpaks from the list with some fuzzy matching. We can then verify and submit them as pull requests to
applist.csv
. Volunteers for this sought! :DI've attached the snap names file and flatpak list in case anyone wants to play with the data.
flatpak_2022-04-11.txt
snap_2022-04-11.txt
The text was updated successfully, but these errors were encountered: