steps
-
sample dataset
- ml-latest-small.zip about 900KB
-
unzip and serve these csv files
$ cd /ml-latest-small
-
line of data
$ wc -l * 9126 links.csv 48 load.cql.sq 9126 movies.csv 100005 ratings.csv 1297 tags.csv 119749 total
-
serve these data
$ python -m SimpleHTTPServer
-
-
$ docker run --rm \ --publish=7474:7474 \ --publish=7687:7687 \ --volume=$HOME/neo4j/data:/data \ --volume=$HOME/neo4j/logs:/logs \ --name gneo4j neo4j
-
import csv data
-
create index
CREATE CONSTRAINT ON (m:Movie) ASSERT m.id IS UNIQUE; CREATE CONSTRAINT ON (u:User) ASSERT u.id IS UNIQUE; CREATE CONSTRAINT ON (g:Genre) ASSERT g.name IS UNIQUE;
-
import
movies.csv
////////////////// // movies.csv 9126 // movieId,title,genres USING PERIODIC COMMIT LOAD CSV WITH HEADERS FROM "http://localhost:8000/movies.csv" AS line WITH line, SPLIT(line.genres, "|") AS Genres CREATE (m:Movie { id: TOINTEGER(line.`movieId`), title: line.`title` }) WITH Genres UNWIND RANGE(0, SIZE(Genres)-1) as i MERGE (g:Genre {name: UPPER(Genres[i])}) CREATE (m)-[r:GENRE {position:i+1}]->(g); // if you import from neo4j browser // Added 9145 labels, created 9251 nodes, set 18376 properties, created 106 relationships, statement completed in 8739 ms.
-
import
tags.csv
//////////////// // tags.csv 1297 // userId,movieId,tag,timestamp USING PERIODIC COMMIT LOAD CSV WITH HEADERS FROM "http://localhost:8000/tags.csv" AS line WITH line MATCH (m:Movie { id: TOINTEGER(line.`movieId`) }) MERGE (u:User { id: TOINTEGER(line.`userId`) }) CREATE (u)-[r:TAG {tag: line.`tag`}]->(m); // if you import from neo4j browser // Added 61 labels, created 61 nodes, set 1357 properties, created 1296 relationships, statement completed in 1789 ms.
-
import
ratings.csv
///////////////////// // ratings.csv 100005 // userId,movieId,rating,timestamp USING PERIODIC COMMIT LOAD CSV WITH HEADERS FROM "http://localhost:8000/ratings.csv" AS line WITH line MATCH (m:Movie { id: TOINTEGER(line.`movieId`) }) MATCH (u:User { id: TOINTEGER(line.`userId`) }) CREATE (u)-[r:RATING {rating: TOFLOAT(line.`rating`)}]->(m); // if you import from neo4j browser // Set 19072 properties, created 19072 relationships, statement completed in 11461 ms.
-
-
run cypher query against this dataset
-
find user 23
// find user 23 MATCH (u:User {id: 23}) RETURN u
-
find user 23’s rating movies
// find user 23's rating movies MATCH (u:User {id: 23}) MATCH (u)-[:RATING]->(m:Movie) RETURN u, m LIMIT 10
-
find user 23’s rating movies and movies’s genre
// find user 23's rating movies and movies' genre MATCH (u:User {id: 23}) MATCH (u)-[:RATING]->(m:Movie) MATCH (m)-[r:CATEGORY]->(g) RETURN u, m, g LIMIT 10
-