Merge pull request #1083 from jjlin/global-domains

Add a script to auto-generate the global equivalent domains JSON file
This commit is contained in:
Daniel García 2020-08-08 16:19:30 +02:00 committed by GitHub
commit 83dff9ae6e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 110 additions and 41 deletions

View File

@ -39,8 +39,7 @@
"Type": 1, "Type": 1,
"Domains": [ "Domains": [
"apple.com", "apple.com",
"icloud.com", "icloud.com"
"tv.apple.com"
], ],
"Excluded": false "Excluded": false
}, },
@ -106,6 +105,7 @@
"passport.net", "passport.net",
"windows.com", "windows.com",
"microsoftonline.com", "microsoftonline.com",
"office.com",
"office365.com", "office365.com",
"microsoftstore.com", "microsoftstore.com",
"xbox.com", "xbox.com",
@ -193,7 +193,12 @@
"amazon.it", "amazon.it",
"amazon.com.au", "amazon.com.au",
"amazon.co.nz", "amazon.co.nz",
"amazon.in" "amazon.in",
"amazon.com.mx",
"amazon.nl",
"amazon.sg",
"amazon.com.tr",
"amazon.ae"
], ],
"Excluded": false "Excluded": false
}, },
@ -386,8 +391,7 @@
"alibaba.com", "alibaba.com",
"aliexpress.com", "aliexpress.com",
"aliyun.com", "aliyun.com",
"net.cn", "net.cn"
"www.net.cn"
], ],
"Excluded": false "Excluded": false
}, },
@ -717,41 +721,27 @@
"eventbrite.ca", "eventbrite.ca",
"eventbrite.ch", "eventbrite.ch",
"eventbrite.cl", "eventbrite.cl",
"eventbrite.co.id", "eventbrite.co",
"eventbrite.co.in",
"eventbrite.co.kr",
"eventbrite.co.nz", "eventbrite.co.nz",
"eventbrite.co.uk", "eventbrite.co.uk",
"eventbrite.co.ve",
"eventbrite.com", "eventbrite.com",
"eventbrite.com.ar",
"eventbrite.com.au", "eventbrite.com.au",
"eventbrite.com.bo",
"eventbrite.com.br", "eventbrite.com.br",
"eventbrite.com.co", "eventbrite.com.mx",
"eventbrite.com.hk",
"eventbrite.com.hn",
"eventbrite.com.pe", "eventbrite.com.pe",
"eventbrite.com.sg",
"eventbrite.com.tr",
"eventbrite.com.tw",
"eventbrite.cz",
"eventbrite.de", "eventbrite.de",
"eventbrite.dk", "eventbrite.dk",
"eventbrite.es",
"eventbrite.fi", "eventbrite.fi",
"eventbrite.fr", "eventbrite.fr",
"eventbrite.gy", "eventbrite.hk",
"eventbrite.hu",
"eventbrite.ie", "eventbrite.ie",
"eventbrite.is",
"eventbrite.it", "eventbrite.it",
"eventbrite.jp",
"eventbrite.mx",
"eventbrite.nl", "eventbrite.nl",
"eventbrite.no",
"eventbrite.pl",
"eventbrite.pt", "eventbrite.pt",
"eventbrite.ru", "eventbrite.se",
"eventbrite.se" "eventbrite.sg"
], ],
"Excluded": false "Excluded": false
}, },
@ -769,15 +759,6 @@
}, },
{ {
"Type": 75, "Type": 75,
"Domains": [
"netcup.de",
"netcup.eu",
"customercontrolpanel.de"
],
"Excluded": false
},
{
"Type": 76,
"Domains": [ "Domains": [
"docusign.com", "docusign.com",
"docusign.net" "docusign.net"
@ -785,7 +766,7 @@
"Excluded": false "Excluded": false
}, },
{ {
"Type": 77, "Type": 76,
"Domains": [ "Domains": [
"envato.com", "envato.com",
"themeforest.net", "themeforest.net",
@ -799,7 +780,7 @@
"Excluded": false "Excluded": false
}, },
{ {
"Type": 78, "Type": 77,
"Domains": [ "Domains": [
"x10hosting.com", "x10hosting.com",
"x10premium.com" "x10premium.com"
@ -807,7 +788,7 @@
"Excluded": false "Excluded": false
}, },
{ {
"Type": 79, "Type": 78,
"Domains": [ "Domains": [
"dnsomatic.com", "dnsomatic.com",
"opendns.com", "opendns.com",
@ -816,7 +797,7 @@
"Excluded": false "Excluded": false
}, },
{ {
"Type": 80, "Type": 79,
"Domains": [ "Domains": [
"cagreatamerica.com", "cagreatamerica.com",
"canadaswonderland.com", "canadaswonderland.com",
@ -835,11 +816,19 @@
"Excluded": false "Excluded": false
}, },
{ {
"Type": 81, "Type": 80,
"Domains": [ "Domains": [
"ubnt.com", "ubnt.com",
"ui.com" "ui.com"
], ],
"Excluded": false "Excluded": false
},
{
"Type": 81,
"Domains": [
"discordapp.com",
"discord.com"
],
"Excluded": false
} }
] ]

80
tools/global_domains.py Executable file
View File

@ -0,0 +1,80 @@
#!/usr/bin/env python3
#
# This script generates a global equivalent domains JSON file from
# the upstream Bitwarden source repo.
#
import json
import re
import sys
import urllib.request
from collections import OrderedDict
if len(sys.argv) != 2:
print("usage: %s <OUTPUT-FILE>" % sys.argv[0])
print()
print("This script generates a global equivalent domains JSON file from")
print("the upstream Bitwarden source repo.")
sys.exit(1)
OUTPUT_FILE = sys.argv[1]
BASE_URL = 'https://github.com/bitwarden/server/raw/master'
ENUMS_URL = '%s/src/Core/Enums/GlobalEquivalentDomainsType.cs' % BASE_URL
DOMAIN_LISTS_URL = '%s/src/Core/Utilities/StaticStore.cs' % BASE_URL
# Enum lines look like:
#
# EnumName0 = 0,
# EnumName1 = 1,
#
ENUM_RE = re.compile(
r'\s*' # Leading whitespace (optional).
r'([_0-9a-zA-Z]+)' # Enum name (capture group 1).
r'\s*=\s*' # '=' with optional surrounding whitespace.
r'([0-9]+)' # Enum value (capture group 2).
)
# Global domains lines look like:
#
# GlobalDomains.Add(GlobalEquivalentDomainsType.EnumName, new List<string> { "x.com", "y.com" });
#
DOMAIN_LIST_RE = re.compile(
r'\s*' # Leading whitespace (optional).
r'GlobalDomains\.Add\(GlobalEquivalentDomainsType\.'
r'([_0-9a-zA-Z]+)' # Enum name (capture group 1).
r'\s*,\s*new List<string>\s*{'
r'([^}]+)' # Domain list (capture group 2).
r'}\);'
)
enums = dict()
domain_lists = OrderedDict()
# Read in the enum names and values.
with urllib.request.urlopen(ENUMS_URL) as response:
for ln in response.read().decode('utf-8').split('\n'):
m = ENUM_RE.match(ln)
if m:
enums[m.group(1)] = int(m.group(2))
# Read in the domain lists.
with urllib.request.urlopen(DOMAIN_LISTS_URL) as response:
for ln in response.read().decode('utf-8').split('\n'):
m = DOMAIN_LIST_RE.match(ln)
if m:
# Strip double quotes and extraneous spaces in each domain.
domain_lists[m.group(1)] = [d.strip(' "') for d in m.group(2).split(",")]
# Build the global domains data structure.
global_domains = []
for name, domain_list in domain_lists.items():
entry = OrderedDict()
entry["Type"] = enums[name]
entry["Domains"] = domain_list
entry["Excluded"] = False
global_domains.append(entry)
# Write out the global domains JSON file.
with open(OUTPUT_FILE, 'w') as f:
json.dump(global_domains, f, indent=2)