morristech
6/1/2019 - 11:15 PM

Extract Onion URLs from Files

Extract Onion URLs from Files

#!/usr/bin/env python
# extract_onions.py

"""
author  : k4m4 (Nikolaos Kamarinakis)
email   : nikolaskam{at}gmail{dot}com
twitter : @NikolasKama (https://twitter.com/nikolaskama/)
"""

import re, os, sys

files = []

def parseArgs():
    files = []
    for arg in sys.argv[1:]:
        if os.path.isdir(arg):
            dirFiles = os.listdir(arg)
            for file in dirFiles:
                file = str(arg) + str(os.sep) + str(file)
                files.append(file)
        else:
            files.append(arg)

    return files

def main():
    myNewFile = open('onions.txt', 'w+')
    print('[!] Commencing onion extraction')
    files = parseArgs()
    for file in files:
        with open(file, 'r') as myFile:
            # print('[+] Reading file \'' + file + '\'...')
            lines = myFile.readlines()
            onions = re.findall(r'(?:https?://)?(?:www)?(\S*?\.onion)\b', '\n'.join(lines))
            # print('[+] Onions found: ' + str(len(onions)))
            for onion in onions:
                onion = onion.replace('\n', '').replace('<','').replace('>','')
                if not (len(onion) > len('.onion')):
                    pass
                else:
                    if (not onion.startswith('http://')) and (not onion.startswith('https://')):
                        onion = 'http://' + onion
                    myNewFile.write(onion + '\n')
    
    print('[+] Onions successfully extracted to --> onions.txt')
    myNewFile.close()

if __name__ == '__main__':
    if len(sys.argv) > 1:
        main()
    else:
        print('Usage: python extract_onions.py {file/directory}')