Stackoverflow - Load CSV import
// Create the Vendors so we can just match on them - 404 ms, 783 on 2.2
USING PERIODIC COMMIT 10000
LOAD CSV WITH HEADERS FROM "file:///Users/btibert/Dropbox/Projects/bentley-search-neo4j/data/vendors.csv" AS row
WITH row
MERGE (v:Vendor {name:row.vendors});
// Create the abilities - 164ms, 164
USING PERIODIC COMMIT 10000
LOAD CSV WITH HEADERS FROM "file:///Users/btibert/Dropbox/Projects/bentley-search-neo4j/data/ability.csv" AS row
WITH row
MERGE (a:Ability {name:row.ability});
// Create the topics - 154 ms, 468 after a huge gap
USING PERIODIC COMMIT 10000
LOAD CSV WITH HEADERS FROM "file:///Users/btibert/Dropbox/Projects/bentley-search-neo4j/data/topics.csv" AS row
WITH row
MERGE (t:Topic {name:row.topics});
// Load the templates - 554 ms, 543
USING PERIODIC COMMIT 10000
LOAD CSV WITH HEADERS FROM "file:///Users/btibert/Dropbox/Projects/bentley-search-neo4j/data/templates.csv" AS row
WITH row
MATCH (r:Vendor {name:row.vendor})
WITH row, r
MERGE (p:Template {name:row.template_clean})
MERGE (v:Version {version:row.template_ver})
MERGE (p)-[:FROM_VERSION]->(v)
MERGE (p)-[:CREATED_BY]->(r);
// Associate templates with topics - 239 ms, 233
USING PERIODIC COMMIT 10000
LOAD CSV WITH HEADERS FROM "file:///Users/btibert/Dropbox/Projects/bentley-search-neo4j/data/template_topic.csv" AS row
WITH row
MATCH (p:Template {name:row.template_clean})
MATCH (t:Topic {name:row.topic})
WITH row, p, t
MERGE (p)-[:HAS_TOPIC]->(t)
// Create the students - 16434 ms, 35964 (77962 students)
USING PERIODIC COMMIT 10000
LOAD CSV WITH HEADERS FROM "file:///Users/btibert/Dropbox/Projects/bentley-search-neo4j/data/recs.csv" AS row
WITH row
MERGE (s:Student {pidm:toInt(row.pidm),
hash_pidm:toInt(row.hash_pidm),
sid:row.id,
race:row.ethn_code,
sex:row.sex,
major:row.majr_code,
rsta:row.rsta_code,
sanc:row.sanc_score,
sahc:row.sahc_score,
hs_state: row.schl_state_code});
// Associate the students with vendors and abilities - 16408 ms, 80404 ms
USING PERIODIC COMMIT 10000
LOAD CSV WITH HEADERS FROM "file:///Users/btibert/Dropbox/Projects/bentley-search-neo4j/data/recs.csv" AS row
WITH row
MATCH (s:Student {pidm: toInt(row.pidm)} )
MATCH (v:Vendor {name: row.vendor} )
MATCH (a:Ability {name: row.ability} )
WITH row, s, v, a
MERGE (s)-[:PURCHASED_FROM]->(v)
MERGE (s)-[:HAS_ABILITY]->(a);
// Create the contacts - 15782 ms
USING PERIODIC COMMIT 10000
LOAD CSV WITH HEADERS FROM "file:///Users/btibert/Dropbox/Projects/bentley-search-neo4j/data/cont.csv" AS row
WITH row
MATCH (s:Student {pidm:toInt(row.pidm)} )
WITH row, s
MERGE (c:Contact {code:row.contcode,
cid:row.cid,
category:row.contcat,
seqnum:toInt(row.seqnum),
timestamp:toInt(row.timestamp)} )
MERGE (s)-[:HAS_CONTACT]->(c);
// First and last contacts - 15959 ms
USING PERIODIC COMMIT 10000
LOAD CSV WITH HEADERS FROM "file:///Users/btibert/Dropbox/Projects/bentley-search-neo4j/data/cont.csv" AS row
WITH row
MATCH (s:Student {pidm:toInt(row.pidm)})
WITH row, s, toInt(row.seqnum) as seq
WHERE seq = 1
MATCH (f:Contact {cid:row.first_cont})
MATCH (l:Contact {cid:row.last_cont})
WITH s, f, l
MERGE (s)-[:FIRST]->(f)
MERGE (s)-[:LAST]->(l);
// Create the contact chain - 1351 ms
USING PERIODIC COMMIT 10000
LOAD CSV WITH HEADERS FROM "file:///Users/btibert/Dropbox/Projects/bentley-search-neo4j/data/cont.csv" AS row
WITH row, toInt(row.seqnum) as seq
WHERE seq > 1
MATCH (c:Contact {cid:row.cid})
MATCH (p:Contact {cid:row.prev_contact})
WITH c, p
MERGE (p)-[:NEXT]->(c);
// BRM email data
// About 5 minutes for a load of all data from scratch
USING PERIODIC COMMIT 10000
LOAD CSV WITH HEADERS FROM "file:///Users/btibert/Dropbox/Projects/bentley-search-neo4j/data/brm.csv" AS row
WITH row
MATCH (s:Student {pidm:toInt(row.pidm)})
WITH row, s
MERGE (e:Email {refdesc:row.refdesc,
subject:row.subject,
msgid:row.msgid,
commdate:row.commdate,
seqnum:toInt(row.seqnum),
deliv: toInt(row.delivflag),
optout: toInt(row.optoutflag),
open_ind: toInt(row.open_ind),
opens: toInt(row.opens),
click_ind: toInt(row.click_ind),
clicks: toInt(row.clicks)} )
MERGE (s)-[:WAS_SENT]->(e);
// First and last emails
USING PERIODIC COMMIT 10000
LOAD CSV WITH HEADERS FROM "file:///Users/btibert/Dropbox/Projects/bentley-search-neo4j/data/email_first_last.csv" AS row
WITH row
MATCH (s:Student {pidm:toInt(row.pidm)})
MATCH (f:Email {msgid:row.first_email})
MATCH (l:Email {msgid:row.last_email})
WITH s, f, l
MERGE (s)-[:FIRST]->(f)
MERGE (s)-[:LAST]->(l);
// Create the email chain
USING PERIODIC COMMIT 10000
LOAD CSV WITH HEADERS FROM "file:///Users/btibert/Dropbox/Projects/bentley-search-neo4j/data/email_chain.csv" AS row
WITH row
MATCH (c:Email {cid:row.msgid})
MATCH (p:Email {cid:row.last_email})
WITH c, p
MERGE (p)-[:NEXT]->(c);
// Apply the Constraints
CREATE CONSTRAINT ON (s:Student) ASSERT s.pidm IS UNIQUE;
CREATE CONSTRAINT ON (v:Vendor) ASSERT v.name IS UNIQUE;
CREATE CONSTRAINT ON (t:Topic) ASSERT t.name IS UNIQUE;
CREATE CONSTRAINT ON (e:Email) ASSERT e.msgid IS UNIQUE;
CREATE CONSTRAINT ON (a:Ability) ASSERT a.name IS UNIQUE;
CREATE CONSTRAINT ON (c:Contact) ASSERT c.cid IS UNIQUE;
CREATE CONSTRAINT ON (p:Template) ASSERT p.name IS UNIQUE;
CREATE CONSTRAINT ON (v:Version) ASSERT v.version IS UNIQUE;