.
Recommender Systems 推薦系統
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
#coding:utf-8 from __future__ import division from collections import Counter print "---有興趣的有哪些---" users_interests = [ ["Hadoop", "Big Data", "HBase", "Java", "Spark", "Storm", "Cassandra"], ["NoSQL", "MongoDB", "Cassandra", "HBase", "Postgres"], ["Python", "scikit-learn", "scipy", "numpy", "statsmodels", "pandas"], ["R", "Python", "statistics", "regression", "probability"], ["machine learning", "regression", "decision trees", "libsvm"], ["Python", "R", "Java", "C++", "Haskell", "programming languages"], ["statistics", "probability", "mathematics", "theory"], ["machine learning", "scikit-learn", "Mahout", "neural networks"], ["neural networks", "deep learning", "Big Data", "artificial intelligence"], ["Hadoop", "Java", "MapReduce", "Big Data"], ["statistics", "R", "statsmodels"], ["C++", "deep learning", "artificial intelligence", "probability"], ["pandas", "R", "Python"], ["databases", "HBase", "Postgres", "MySQL", "MongoDB"], ["libsvm", "regression", "support vector machines"] ] popular_interests = Counter(interest for user_interests in users_interests for interest in user_interests).most_common() print "Popular Interests" print popular_interests |
1 2 3 4 5 6 7 8 9 10 11 12 |
print "---用大眾資料推薦你適合甚麼---" def most_popular_new_interests(user_interests, max_results=5): suggestions = [(interest, frequency) for interest, frequency in popular_interests if interest not in user_interests] return suggestions[:max_results] print "Most Popular New Interests" print "already like:", ["NoSQL", "MongoDB", "Cassandra", "HBase", "Postgres"] print most_popular_new_interests(["NoSQL", "MongoDB", "Cassandra", "HBase", "Postgres"]) print "Me like Python Java Hbase" print most_popular_new_interests(["Python","Java","Hbase"]) |
用相似的人與你喜歡進行分析,算出哪個跟你最相似
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
print "---與誰最相似---" from linear_algebra import dot import math unique_interests = sorted(list({ interest for user_interests in users_interests for interest in user_interests })) def make_user_interest_vector(user_interests): """given a list of interests, produce a vector whose i-th element is 1 if unique_interests[i] is in the list, 0 otherwise""" return [1 if interest in user_interests else 0 for interest in unique_interests] # 兩兩做比對,算出哪個跟你最相近 def cosine_similarity(v, w): return dot(v, w) / math.sqrt(dot(v, v) * dot(w, w)) user_interest_matrix = map(make_user_interest_vector, users_interests) user_similarities = [[cosine_similarity(interest_vector_i, interest_vector_j) for interest_vector_j in user_interest_matrix] for interest_vector_i in user_interest_matrix] def most_similar_users_to(user_id): pairs = [(other_user_id, similarity) # find other for other_user_id, similarity in # users with enumerate(user_similarities[user_id]) # nonzero if user_id != other_user_id and similarity > 0] # similarity return sorted(pairs, # sort them key=lambda (_, similarity): similarity, # most similar reverse=True) # first print "User based similarity" print "most similar to 11",most_similar_users_to(11) |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
print "---以誰為基準進行推薦---" from collections import defaultdict def user_based_suggestions(user_id, include_current_interests=False): # sum up the similarities suggestions = defaultdict(float) for other_user_id, similarity in most_similar_users_to(user_id): for interest in users_interests[other_user_id]: suggestions[interest] += similarity # convert them to a sorted list suggestions = sorted(suggestions.items(), key=lambda (_, weight): weight, reverse=True) # and (maybe) exclude already-interests if include_current_interests: return suggestions else: return [(suggestion, weight) for suggestion, weight in suggestions if suggestion not in users_interests[user_id]] print "Suggestions for 10" print user_based_suggestions(10) |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 |
print "---以項目為基準進行興趣分析---" interest_user_matrix = [[user_interest_vector[j] for user_interest_vector in user_interest_matrix] for j, _ in enumerate(unique_interests)] interest_similarities = [[cosine_similarity(user_vector_i, user_vector_j) for user_vector_j in interest_user_matrix] for user_vector_i in interest_user_matrix] def most_similar_interests_to(interest_id): similarities = interest_similarities[interest_id] pairs = [(unique_interests[other_interest_id], similarity) for other_interest_id, similarity in enumerate(similarities) if interest_id != other_interest_id and similarity > 0] return sorted(pairs, key=lambda (_, similarity): similarity, reverse=True) print "Item based similarity" print "most similar to 'Big Data'" print most_similar_interests_to(0) |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 |
print "---以項目為基準進行推薦分析---" def item_based_suggestions(user_id, include_current_interests=False): suggestions = defaultdict(float) user_interest_vector = user_interest_matrix[user_id] for interest_id, is_interested in enumerate(user_interest_vector): if is_interested == 1: similar_interests = most_similar_interests_to(interest_id) for interest, similarity in similar_interests: suggestions[interest] += similarity suggestions = sorted(suggestions.items(), key=lambda (_, similarity): similarity, reverse=True) if include_current_interests: return suggestions else: return [(suggestion, weight) for suggestion, weight in suggestions if suggestion not in users_interests[user_id]] print "suggestions for user 0" print item_based_suggestions(0) |
深度學習
以類神經為基礎
DNN DEEP Neual network
深度不固定
高速公路網路
ReLU f(x)=max(0,x)取0與x最大值,
Sigmoid 不管值是什麼都變成0-1 or -1 – 1
Maxout 只取大的點
CNN Convolutional neural network 捲基
減少像素圖片的權重訓練
RNN
把類神經計算前一個時間與後一個時間
深度學習套件
CAFFE 快,python
THEANO 結合GPU,pythonTENSORFLOW 多GPU,分布式計算
LASAGNE 簡化THEANO
KERAS 注重CNN,單GPU
MXNET 多GPU
SKLEARN-THEANO 把CNN當作特徵器
NOLEARN
DIGITS web介面
PYLEARN2 較少改版
DEEPLEARNING4J 用JAVA開發
ENSEMBLE 集成學習
建立好幾個模型,做分析預測,看哪個模型訓練結果最高當作輸出
Bagging, boosting, randomforest
MORVAN
Pip install Keras 先裝好Keras
先跑一次regressor-example,之後會產生一個
再去修改裡面的json
把tensorflow改成theano
classifer結果
備註:2017/06/08 計算方法分析與設計 課堂筆記