# -*- coding: utf-8 -*- # parser.py - Module for parsing whois response data # Copyright (c) 2008 Andrey Petrov # # This module is part of pywhois and is released under # the MIT license: http://www.opensource.org/licenses/mit-license.php from __future__ import absolute_import from __future__ import unicode_literals from __future__ import print_function from __future__ import division from future import standard_library import re from datetime import datetime import json from past.builtins import basestring from builtins import str from builtins import * standard_library.install_aliases() try: import dateutil.parser as dp from .time_zones import tz_data DATEUTIL = True except ImportError: DATEUTIL = False EMAIL_REGEX = "[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?" KNOWN_FORMATS = [ '%d-%b-%Y', # 02-jan-2000 '%d-%B-%Y', # 11-February-2000 '%d-%m-%Y', # 20-10-2000 '%Y-%m-%d', # 2000-01-02 '%d.%m.%Y', # 2.1.2000 '%Y.%m.%d', # 2000.01.02 '%Y/%m/%d', # 2000/01/02 '%Y%m%d', # 20170209 '%d/%m/%Y', # 02/01/2013 '%Y. %m. %d.', # 2000. 01. 02. '%Y.%m.%d %H:%M:%S', # 2014.03.08 10:28:24 '%d-%b-%Y %H:%M:%S %Z', # 24-Jul-2009 13:20:03 UTC '%a %b %d %H:%M:%S %Z %Y', # Tue Jun 21 23:59:59 GMT 2011 '%Y-%m-%dT%H:%M:%SZ', # 2007-01-26T19:10:31Z '%Y-%m-%dT%H:%M:%S.%fZ', # 2018-12-01T16:17:30.568Z '%Y-%m-%dT%H:%M:%S%z', # 2013-12-06T08:17:22-0800 '%Y-%m-%d %H:%M:%SZ', # 2000-08-22 18:55:20Z '%Y-%m-%d %H:%M:%S', # 2000-08-22 18:55:20 '%d %b %Y %H:%M:%S', # 08 Apr 2013 05:44:00 '%d/%m/%Y %H:%M:%S', # 23/04/2015 12:00:07 EEST '%d/%m/%Y %H:%M:%S %Z', # 23/04/2015 12:00:07 EEST '%d/%m/%Y %H:%M:%S.%f %Z', # 23/04/2015 12:00:07.619546 EEST '%B %d %Y', # August 14 2017 '%d.%m.%Y %H:%M:%S', # 08.03.2014 10:28:24 ] class PywhoisError(Exception): pass def datetime_parse(s): for known_format in KNOWN_FORMATS: try: s = datetime.strptime(s, known_format) break except ValueError as e: pass # Wrong format, keep trying return s def cast_date(s, dayfirst=False, yearfirst=False): """Convert any date string found in WHOIS to a datetime object. """ if DATEUTIL: try: return dp.parse( s, tzinfos=tz_data, dayfirst=dayfirst, yearfirst=yearfirst ).replace(tzinfo=None) except Exception: return datetime_parse(s) else: return datetime_parse(s) class WhoisEntry(dict): """Base class for parsing a Whois entries. """ # regular expressions to extract domain data from whois profile # child classes will override this _regex = { 'domain_name': 'Domain Name: *(.+)', 'registrar': 'Registrar: *(.+)', 'whois_server': 'Whois Server: *(.+)', 'referral_url': 'Referral URL: *(.+)', # http url of whois_server 'updated_date': 'Updated Date: *(.+)', 'creation_date': 'Creation Date: *(.+)', 'expiration_date': 'Expir\w+ Date: *(.+)', 'name_servers': 'Name Server: *(.+)', # list of name servers 'status': 'Status: *(.+)', # list of statuses 'emails': EMAIL_REGEX, # list of email s 'dnssec': 'dnssec: *([\S]+)', 'name': 'Registrant Name: *(.+)', 'org': 'Registrant\s*Organization: *(.+)', 'address': 'Registrant Street: *(.+)', 'city': 'Registrant City: *(.+)', 'state': 'Registrant State/Province: *(.+)', 'zipcode': 'Registrant Postal Code: *(.+)', 'country': 'Registrant Country: *(.+)', } dayfirst = False yearfirst = False def __init__(self, domain, text, regex=None): if 'This TLD has no whois server, but you can access the whois database at' in text: raise PywhoisError(text) else: self.domain = domain self.text = text if regex is not None: self._regex = regex self.parse() def parse(self): """The first time an attribute is called it will be calculated here. The attribute is then set to be accessed directly by subsequent calls. """ for attr, regex in list(self._regex.items()): if regex: values = [] for data in re.findall(regex, self.text, re.IGNORECASE | re.M): matches = data if isinstance(data, tuple) else [data] for value in matches: value = self._preprocess(attr, value) if value and value not in values: # avoid duplicates values.append(value) if values and attr in ('registrar', 'whois_server', 'referral_url'): values = values[-1] # ignore junk if len(values) == 1: values = values[0] elif not values: values = None self[attr] = values def _preprocess(self, attr, value): value = value.strip() if value and isinstance(value, basestring) and not value.isdigit() and '_date' in attr: # try casting to date format value = cast_date( value, dayfirst=self.dayfirst, yearfirst=self.yearfirst) return value def __setitem__(self, name, value): super(WhoisEntry, self).__setitem__(name, value) def __getattr__(self, name): return self.get(name) def __str__(self): def handler(e): return str(e) return json.dumps(self, indent=2, default=handler) def __getstate__(self): return self.__dict__ def __setstate__(self, state): self.__dict__ = state @staticmethod def load(domain, text): """Given whois output in ``text``, return an instance of ``WhoisEntry`` that represents its parsed contents. """ if text.strip() == 'No whois server is known for this kind of object.': raise PywhoisError(text) if domain.endswith('.com'): return WhoisCom(domain, text) elif domain.endswith('.net'): return WhoisNet(domain, text) elif domain.endswith('.org'): return WhoisOrg(domain, text) elif domain.endswith('.name'): return WhoisName(domain, text) elif domain.endswith('.me'): return WhoisMe(domain, text) elif domain.endswith('ae'): return WhoisAe(domain, text) elif domain.endswith('.au'): return WhoisAU(domain, text) elif domain.endswith('.ru'): return WhoisRu(domain, text) elif domain.endswith('.us'): return WhoisUs(domain, text) elif domain.endswith('.uk'): return WhoisUk(domain, text) elif domain.endswith('.fr'): return WhoisFr(domain, text) elif domain.endswith('.nl'): return WhoisNl(domain, text) elif domain.endswith('.fi'): return WhoisFi(domain, text) elif domain.endswith('.hr'): return WhoisHr(domain, text) elif domain.endswith('.hn'): return WhoisHn(domain, text) elif domain.endswith('.hk'): return WhoisHk(domain, text) elif domain.endswith('.jp'): return WhoisJp(domain, text) elif domain.endswith('.pl'): return WhoisPl(domain, text) elif domain.endswith('.br'): return WhoisBr(domain, text) elif domain.endswith('.eu'): return WhoisEu(domain, text) elif domain.endswith('.ee'): return WhoisEe(domain, text) elif domain.endswith('.kr'): return WhoisKr(domain, text) elif domain.endswith('.pt'): return WhoisPt(domain, text) elif domain.endswith('.bg'): return WhoisBg(domain, text) elif domain.endswith('.de'): return WhoisDe(domain, text) elif domain.endswith('.at'): return WhoisAt(domain, text) elif domain.endswith('.ca'): return WhoisCa(domain, text) elif domain.endswith('.be'): return WhoisBe(domain, text) elif domain.endswith('.рф'): return WhoisRf(domain, text) elif domain.endswith('.info'): return WhoisInfo(domain, text) elif domain.endswith('.su'): return WhoisSu(domain, text) elif domain.endswith('si'): return WhoisSi(domain, text) elif domain.endswith('.kg'): return WhoisKg(domain, text) elif domain.endswith('.io'): return WhoisIo(domain, text) elif domain.endswith('.biz'): return WhoisBiz(domain, text) elif domain.endswith('.mobi'): return WhoisMobi(domain, text) elif domain.endswith('.ch'): return WhoisChLi(domain, text) elif domain.endswith('.li'): return WhoisChLi(domain, text) elif domain.endswith('.id'): return WhoisID(domain, text) elif domain.endswith('.sk'): return WhoisSK(domain, text) elif domain.endswith('.se'): return WhoisSe(domain, text) elif domain.endswith('no'): return WhoisNo(domain, text) elif domain.endswith('.nu'): return WhoisSe(domain, text) elif domain.endswith('.is'): return WhoisIs(domain, text) elif domain.endswith('.dk'): return WhoisDk(domain, text) elif domain.endswith('.it'): return WhoisIt(domain, text) elif domain.endswith('.mx'): return WhoisMx(domain, text) elif domain.endswith('.ai'): return WhoisAi(domain, text) elif domain.endswith('.il'): return WhoisIl(domain, text) elif domain.endswith('.in'): return WhoisIn(domain, text) elif domain.endswith('.cat'): return WhoisCat(domain, text) elif domain.endswith('.ie'): return WhoisIe(domain, text) elif domain.endswith('.nz'): return WhoisNz(domain, text) elif domain.endswith('.space'): return WhoisSpace(domain, text) elif domain.endswith('.lu'): return WhoisLu(domain, text) elif domain.endswith('.cz'): return WhoisCz(domain, text) elif domain.endswith('.online'): return WhoisOnline(domain, text) elif domain.endswith('.cn'): return WhoisCn(domain, text) elif domain.endswith('.app'): return WhoisApp(domain, text) elif domain.endswith('.money'): return WhoisMoney(domain, text) elif domain.endswith('.cl'): return WhoisCl(domain, text) elif domain.endswith('.ar'): return WhoisAr(domain, text) elif domain.endswith('.by'): return WhoisBy(domain, text) elif domain.endswith('.cr'): return WhoisCr(domain, text) elif domain.endswith('.do'): return WhoisDo(domain, text) elif domain.endswith('.jobs'): return WhoisJobs(domain, text) elif domain.endswith('.lat'): return WhoisLat(domain, text) elif domain.endswith('.pe'): return WhoisPe(domain, text) elif domain.endswith('.ro'): return WhoisRo(domain, text) elif domain.endswith('.sa'): return WhoisSa(domain, text) elif domain.endswith('.tw'): return WhoisTw(domain, text) elif domain.endswith('.tr'): return WhoisTr(domain, text) elif domain.endswith('.ve'): return WhoisVe(domain, text) elif domain.endswith('.ua'): return WhoisUA(domain, text) elif domain.endswith('.kz'): return WhoisKZ(domain, text) else: return WhoisEntry(domain, text) class WhoisCl(WhoisEntry): """Whois parser for .cl domains.""" regex = { 'domain_name': 'Domain name: *(.+)', 'registrant_name': 'Registrant name: *(.+)', 'registrant_organization': 'Registrant organisation: *(.+)', 'registrar': 'registrar name: *(.+)', 'registrar_url': 'Registrar URL: *(.+)', 'creation_date': 'Creation date: *(.+)', 'expiration_date': 'Expiration date: *(.+)', 'name_servers': 'Name server: *(.+)', # list of name servers } def __init__(self, domain, text): if 'No match for "' in text: raise PywhoisError(text) else: WhoisEntry.__init__(self, domain, text, self.regex) class WhoisPe(WhoisEntry): """Whois parser for .pe domains.""" regex = { 'domain_name': 'Domain name: *(.+)', 'status': 'Domain Status: *(.+)', 'whois_server': 'WHOIS Server: *(.+)', 'registrant_name': 'Registrant name: *(.+)', 'registrar': 'Sponsoring Registrar: *(.+)', 'admin': 'Admin Name: *(.+)', 'admin_email': 'Admin Email: *(.+)', 'dnssec': 'DNSSEC: *(.+)', 'name_servers': 'Name server: *(.+)', # list of name servers } def __init__(self, domain, text): if 'No match for "' in text: raise PywhoisError(text) else: WhoisEntry.__init__(self, domain, text, self.regex) class WhoisSpace(WhoisEntry): """Whois parser for .space domains """ def __init__(self, domain, text): if 'No match for "' in text: raise PywhoisError(text) else: WhoisEntry.__init__(self, domain, text) class WhoisCom(WhoisEntry): """Whois parser for .com domains """ def __init__(self, domain, text): if 'No match for "' in text: raise PywhoisError(text) else: WhoisEntry.__init__(self, domain, text) class WhoisNet(WhoisEntry): """Whois parser for .net domains """ def __init__(self, domain, text): if 'No match for "' in text: raise PywhoisError(text) else: WhoisEntry.__init__(self, domain, text) class WhoisOrg(WhoisEntry): """Whois parser for .org domains """ regex = { 'domain_name': 'Domain Name: *(.+)', 'registrar': 'Registrar: *(.+)', 'whois_server': 'Whois Server: *(.+)', # empty usually 'referral_url': 'Referral URL: *(.+)', # http url of whois_server: empty usually 'updated_date': 'Updated Date: *(.+)', 'creation_date': 'Creation Date: *(.+)', 'expiration_date': 'Registry Expiry Date: *(.+)', 'name_servers': 'Name Server: *(.+)', # list of name servers 'status': 'Status: *(.+)', # list of statuses 'emails': EMAIL_REGEX, # list of email addresses } def __init__(self, domain, text): if text.strip() == 'NOT FOUND': raise PywhoisError(text) else: WhoisEntry.__init__(self, domain, text) class WhoisRo(WhoisEntry): """Whois parser for .ro domains """ regex = { 'domain_name': 'Domain Name: *(.+)', 'domain_status': 'Domain Status: *(.+)', 'registrar': 'Registrar: *(.+)', 'referral_url': 'Referral URL: *(.+)', # http url of whois_server: empty usually 'creation_date': 'Registered On: *(.+)', 'expiration_date': 'Expires On: *(.+)', 'name_servers': 'Nameserver: *(.+)', # list of name servers 'status': 'Status: *(.+)', # list of statuses 'dnssec': 'DNSSEC: *(.+)', } def __init__(self, domain, text): if text.strip() == 'NOT FOUND': raise PywhoisError(text) else: WhoisEntry.__init__(self, domain, text, self.regex) class WhoisRu(WhoisEntry): """Whois parser for .ru domains """ regex = { 'domain_name': 'domain: *(.+)', 'registrar': 'registrar: *(.+)', 'creation_date': 'created: *(.+)', 'expiration_date': 'paid-till: *(.+)', 'updated_date': None, 'name_servers': 'nserver: *(.+)', # list of name servers 'status': 'state: *(.+)', # list of statuses 'emails': EMAIL_REGEX, # list of email addresses 'org': 'org: *(.+)' } def __init__(self, domain, text): if 'No entries found' in text: raise PywhoisError(text) else: WhoisEntry.__init__(self, domain, text, self.regex) class WhoisNl(WhoisEntry): """Whois parser for .nl domains """ regex = { 'domain_name': 'Domain Name: *(.+)', 'expiration_date': None, 'updated_date': None, 'creation_date': None, 'status': 'Status: *(.+)', # list of statuses 'name': None, 'registrar': 'Registrar:\s*(.*\n)', 'registrar_address': 'Registrar:\s*(?:.*\n){1}\s*(.*)', 'registrar_zip_code': 'Registrar:\s*(?:.*\n){2}\s*(\S*)\s(?:.*)', 'registrar_city': 'Registrar:\s*(?:.*\n){2}\s*(?:\S*)\s(.*)', 'registrar_country': 'Registrar:\s*(?:.*\n){3}\s*(.*)', 'dnssec': 'DNSSEC: *(.+)', } def __init__(self, domain, text): if text.endswith('is free'): raise PywhoisError(text) else: WhoisEntry.__init__(self, domain, text, self.regex) match = re.compile('Domain nameservers:(.*?)Record maintained by', re.DOTALL).search(text) if match: duplicate_nameservers_with_ip = [line.strip() for line in match.groups()[0].strip().splitlines()] duplicate_nameservers_without_ip = [nameserver.split(' ')[0] for nameserver in duplicate_nameservers_with_ip] self['name_servers'] = sorted(list(set(duplicate_nameservers_without_ip))) class WhoisName(WhoisEntry): """Whois parser for .name domains """ regex = { 'domain_name_id': 'Domain Name ID: *(.+)', 'domain_name': 'Domain Name: *(.+)', 'registrar_id': 'Sponsoring Registrar ID: *(.+)', 'registrar': 'Sponsoring Registrar: *(.+)', 'registrant_id': 'Registrant ID: *(.+)', 'admin_id': 'Admin ID: *(.+)', 'technical_id': 'Tech ID: *(.+)', 'billing_id': 'Billing ID: *(.+)', 'creation_date': 'Created On: *(.+)', 'expiration_date': 'Expires On: *(.+)', 'updated_date': 'Updated On: *(.+)', 'name_server_ids': 'Name Server ID: *(.+)', # list of name server ids 'name_servers': 'Name Server: *(.+)', # list of name servers 'status': 'Domain Status: *(.+)', # list of statuses } def __init__(self, domain, text): if 'No match for ' in text: raise PywhoisError(text) else: WhoisEntry.__init__(self, domain, text, self.regex) class WhoisUs(WhoisEntry): """Whois parser for .us domains """ regex = { 'domain_name': 'Domain Name: *(.+)', 'domain__id': 'Domain ID: *(.+)', 'whois_server': 'Registrar WHOIS Server: *(.+)', 'registrar': 'Registrar: *(.+)', 'registrar_id': 'Registrar IANA ID: *(.+)', 'registrar_url': 'Registrar URL: *(.+)', 'registrar_email': 'Registrar Abuse Contact Email: *(.+)', 'registrar_phone': 'Registrar Abuse Contact Phone: *(.+)', 'status': 'Domain Status: *(.+)', # list of statuses 'registrant_id': 'Registry Registrant ID: *(.+)', 'registrant_name': 'Registrant Name: *(.+)', 'registrant_organization': 'Registrant Organization: *(.+)', 'registrant_street': 'Registrant Street: *(.+)', 'registrant_city': 'Registrant City: *(.+)', 'registrant_state_province': 'Registrant State/Province: *(.+)', 'registrant_postal_code': 'Registrant Postal Code: *(.+)', 'registrant_country': 'Registrant Country: *(.+)', 'registrant_phone': 'Registrant Phone: *(.+)', 'registrant_email': 'Registrant Email: *(.+)', 'registrant_fax': 'Registrant Fax: *(.+)', 'registrant_application_purpose': 'Registrant Application Purpose: *(.+)', 'registrant_nexus_category': 'Registrant Nexus Category: *(.+)', 'admin_id': 'Registry Admin ID: *(.+)', 'admin': 'Admin Name: *(.+)', 'admin_organization': 'Admin Organization: *(.+)', 'admin_street': 'Admin Street: *(.+)', 'admin_city': 'Admin City: *(.+)', 'admin_state_province': 'Admin State/Province: *(.+)', 'admin_postal_code': 'Admin Postal Code: *(.+)', 'admin_country': 'Admin Country: *(.+)', 'admin_phone': 'Admin Phone: *(.+)', 'admin_email': 'Admin Email: *(.+)', 'admin_fax': 'Admin Fax: *(.+)', 'admin_application_purpose': 'Admin Application Purpose: *(.+)', 'admin_nexus_category': 'Admin Nexus Category: *(.+)', 'tech_id': 'Registry Tech ID: *(.+)', 'tech_name': 'Tech Name: *(.+)', 'tech_organization': 'Tech Organization: *(.+)', 'tech_street': 'Tech Street: *(.+)', 'tech_city': 'Tech City: *(.+)', 'tech_state_province': 'Tech State/Province: *(.+)', 'tech_postal_code': 'Tech Postal Code: *(.+)', 'tech_country': 'Tech Country: *(.+)', 'tech_phone': 'Tech Phone: *(.+)', 'tech_email': 'Tech Email: *(.+)', 'tech_fax': 'Tech Fax: *(.+)', 'tech_application_purpose': 'Tech Application Purpose: *(.+)', 'tech_nexus_category': 'Tech Nexus Category: *(.+)', 'name_servers': 'Name Server: *(.+)', # list of name servers 'creation_date': 'Creation Date: *(.+)', 'expiration_date': 'Registry Expiry Date: *(.+)', 'updated_date': 'Updated Date: *(.+)', } def __init__(self, domain, text): if 'Not found:' in text: raise PywhoisError(text) else: WhoisEntry.__init__(self, domain, text, self.regex) class WhoisPl(WhoisEntry): """Whois parser for .pl domains """ regex = { 'domain_name': 'DOMAIN NAME: *(.+)\n', 'name_servers': 'nameservers:((?:\s+.+\n+)*)', 'registrar': 'REGISTRAR:\s*(.+)', 'registrar_url': 'URL: *(.+)', # not available 'status': 'Registration status:\n\s*(.+)', # not available 'registrant_name': 'Registrant:\n\s*(.+)', # not available 'creation_date': '(?