CREATE DATABASE pw
WITH
OWNER = XXXXXXXXXXXXXX
ENCODING = 'UTF8'
LC_COLLATE = 'en_US.UTF-8'
LC_CTYPE = 'en_US.UTF-8'
TABLESPACE = pg_default
CONNECTION LIMIT = -1;
CREATE TABLE public.passwords
(
password_id bigint NOT NULL DEFAULT nextval('passwords_password_id_seq'::regclass),
password_username text COLLATE pg_catalog."C.UTF-8",
password_password text COLLATE pg_catalog."default",
CONSTRAINT passwords_pkey PRIMARY KEY (password_id)
)
WITH (
OIDS = FALSE
)
TABLESPACE pg_default;
ALTER TABLE public.passwords
OWNER to XXXXXXXXXXXXXX;
-- Index: i_password
-- DROP INDEX public.i_password;
CREATE INDEX i_password
ON public.passwords USING btree
(password_password COLLATE pg_catalog."default" varchar_ops)
TABLESPACE pg_default;
-- Index: i_username
-- DROP INDEX public.i_username;
CREATE INDEX i_username
ON public.passwords USING btree
(password_username COLLATE pg_catalog."C.UTF-8" bpchar_pattern_ops)
TABLESPACE pg_default;
-- Index: u_up
-- DROP INDEX public.u_up;
CREATE UNIQUE INDEX u_up
ON public.passwords USING btree
(password_username COLLATE pg_catalog."C.UTF-8", password_password COLLATE pg_catalog."default")
TABLESPACE pg_default;
#!/usr/bin/env python3
# by Gunni @ github.com 2017-12-13
import glob
import sys
import os.path
import pathlib
import psycopg2
from random import shuffle
from psycopg2 import sql
files = glob.glob('data/*/*', recursive=True)
connect_str = "dbname='pw' user='xxxxxxxxxx' host='xxxxxxxxxx' password='xxxxxxxxxx'"
conn = psycopg2.connect(connect_str)
cursor = conn.cursor()
def mark(c):
sys.stdout.write(c)
sys.stdout.flush()
shuffle(files)
for pwfile in files:
print('{} > '.format(pwfile), end = '')
if os.path.isdir(os.path.join('done', pwfile)):
print('file is in done')
continue
n = 0
b = 0
with open(pwfile, 'r', encoding='utf-8', errors='surrogateescape') as f:
try:
for line in f:
up = line.encode('utf-8', errors='surrogateescape').decode('utf8', 'replace').split(':')
user = up[0]
if len(up) != 2:
passw = ''
else:
passw = ':'.join(up[1:])
try:
cursor.execute('insert into passwords (password_username, password_password) values (%s, %s)', [user, passw])
n += 1
if n % 1000 == 0:
conn.commit()
mark('.')
except psycopg2.IntegrityError as e:
conn.rollback()
b += 1
if b % 1000 == 0:
mark('!')
except ValueError as e:
conn.rollback()
passw = '\\0'
mark('0')
except UnicodeDecodeError as e:
print(e)
raise e
print()
pathlib.Path(os.path.join('done', pwfile)).mkdir(parents=True, exist_ok=True)
print('all done...')