Monday, June 27, 2016

[Algorithm] Design Twitter

Design a simple twitter


Problem statement:


Design a simplified version of Twitter where users can post tweets, follow/unfollow another user and is able to see the 10 most recent tweets in the user's news feed. Your design should support the following methods:
  1. postTweet(userId, tweetId): Compose a new tweet.
  2. getNewsFeed(userId): Retrieve the 10 most recent tweet ids in the user's news feed. Each item in the news feed must be posted by users who the user followed or by the user herself. Tweets must be ordered from most recent to least recent.
  3. follow(followerId, followeeId): Follower follows a followee.
  4. unfollow(followerId, followeeId): Follower unfollows a followee.


Solution 1: Tweet-oriented approach


Time complexity:
follow: O(log(m))
unfollow: O(1)
post: O(log(m))
getNewsFeed: best: O(log(N)), worst O(N)

This solution focuses on more on tweets. All tweets are managed together in a multiset sorted by time created (system timestamp) from earliest to latest. All the users will only keep 3 pieces of information: the users they are following, the users following them, a recent activity timestamp which keeps the timestamp of the tweets they post or the tweets they followed post whichever is latest.

When getting the news feed a binary search will be performed to find the lower bound of the relevant tweet given the recent activity timestamp of the user.

When posting a tweet, the user has to notify all their followers to update the recent activity timestamp.

This approach works well in scenarios where the user activity is very frequent thus rendering us no major gap between user's own recent activity and activities of  those they followed. The worst time complexity could be O(N). The other downside is the "post" will notify all followers which can be as worse as O(m).

Implementation is as follows:

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
class Twitter {
public:
    /** Initialize your data structure here. */
    Twitter() {
        systime = 0;
    }
    
    /** Compose a new tweet. */
    void postTweet(int userId, int tweetId) {
        if (users.count(userId) == 0) {
            users[userId] = User(userId);
        }
        users[userId].tweet(systime, users);
        tweets.insert(Tweet(systime, userId, tweetId));
        systime++;
    }
    
    /** Retrieve the 10 most recent tweet ids in the user's news feed. Each item in the news feed must be posted by users who the user followed or by the user herself. Tweets must be ordered from most recent to least recent. */
    vector<int> getNewsFeed(int userId) {
        vector<int> result;
        if (users.count(userId) == 0) return result;
        
        User& user = users[userId];
        Tweet tmp = Tweet(user.getRecentTime(), 0, 0);
        auto lower = tweets.lower_bound(tmp);
        
        for (int i=0; i<10 && lower != tweets.end(); lower++) {
            if (lower->userId == userId || user.isFollowing(lower->userId)) {
                result.push_back(lower->tweetId);
                i++;
            }
        }
        
        systime++;
        return result;
    }
    
    /** Follower follows a followee. If the operation is invalid, it should be a no-op. */
    void follow(int followerId, int followeeId) {
        if (users.count(followerId) == 0) {
            users[followerId] = User(followerId);
        }
        if (users.count(followeeId) == 0) {
            users[followeeId] = User(followeeId);
        }
        
        users[followerId].follow(users[followeeId]);
        systime++;
    }
    
    /** Follower unfollows a followee. If the operation is invalid, it should be a no-op. */
    void unfollow(int followerId, int followeeId) {
        if (users.count(followerId) == 0) return;
        if (users.count(followeeId) == 0) return;
        
        users[followerId].unfollow(users[followeeId]);
        systime++;
    }
    
private:
   class User;
   class Tweet;
   
   class User {
   public:
       inline User() {
           m_recent = 0;
           m_prev = 0;
           userId = -1;
       }
       
       inline User(int uid) {
           userId = uid;
           m_recent = 0;
           m_prev = 0;
       }
   
       inline void follow(User& leader) {
           if (m_following.count(leader.userId) == 1) return;
           m_following.insert(leader.userId);
           m_history.insert(leader.m_recent);
           
           leader.m_follower.insert(userId);
       }
       
       inline void unfollow(User& leader) {
           if (m_following.count(leader.userId) == 0) return;
           
           m_following.erase(leader.userId);
           m_history.erase(leader.m_recent);
           
           leader.m_follower.erase(userId);
       }
       
       inline void tweet(int timestamp, unordered_map<int, User>& users) {
           m_prev = m_recent;
           m_recent = timestamp;
           for (int userId : m_follower) {
               User& user = users[userId];
               user.m_history.erase(m_prev);
               user.m_history.insert(m_recent);
           }
       }
       
       inline int getRecentTime() {
           int tmp = m_history.empty() ? m_recent : (*m_history.rbegin());
           return max(tmp, m_recent);
       }
       
       inline bool isFollowing(int userId) {
           return m_following.count(userId) == 1;
       }
       
       int userId;
       
   private:
     unordered_set<int> m_following;
     unordered_set<int> m_follower;
     multiset<int> m_history;
     int m_recent;
     int m_prev;
   };
   
   class Tweet {
   public:
     Tweet(int t, int uid, int tid) : timestamp(t), userId(uid), tweetId(tid) {};
     bool operator<(const Tweet& rhs) const {
         return (timestamp > rhs.timestamp);
     }
     
     int timestamp;
     int userId;
     int tweetId;
   };
   
   unordered_map<int, User> users;
   multiset<Tweet> tweets;
   int systime;
};


Solution 2: User-oriented design


Time complexity:
follow: O(1)
unfollow: O(1)
post: O(1)
getNewsFeed: O(mlog(n/m))

This solution seems to be a more natural way of design. This design is user oriented which means the system keeps track of all users and their tweets are managed be the user themselves.

System will keep a map of all users and another map which has <key, value> as <userid, userids they are following> to represent the following relationship.

The follow/unfollow is quite straightforward simply set the user following map with user ids.

The post is also very simple, but keep in mind to incorporate a system timestamp into the tweet and make sure the tweets are ordered by timestamp in descending order.

The getNewsFeed is then a k-way merging of tweets from the user him/herself and all his/her followers. The trick here is to user the iterator of the tweets instead of copying the whole set into the merging process. I introduced a TweetRange class to represent the two iterators of the users' tweet. This is just to make it look more straightforward at first glance. You can also survive with simple an std::pair.

The k-way merging we are using is not strictly a merging because we don't actually merge them. 

We start off with having a prority queue of TweetRanges from user him/herself and his/her followees. Then we start to get the latest among them and move forward the iterator from the TweetRange we pick until we either hit the 10 or there's no more left. And of course once one TweetRange is exhausted we just remove that from the queue. Every time we trying to pick one tweet out of the queue we simply pop out that TweetRange we are looking at, pick up the tweet and push it back in so the next TweetRange we look at will have the latest.

This approach is better than solution 1 in terms of time complexity and also more scalable in architecture in a way that we can partition the users without much hiccups. (Partitioning with solution 1 is not going to be easy since the tweets and users are managed separately)

Implementation as follows:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
class Twitter {
public:
    /** Initialize your data structure here. */
    Twitter() {
        systemTime = 0;
    }
    
    /** Compose a new tweet. */
    void postTweet(int userId, int tweetId) {
        users[userId].insert(Tweet(systemTime++, tweetId));
    }
    
    /** Retrieve the 10 most recent tweet ids in the user's news feed. Each item in the news feed must be posted by users who the user followed or by the user herself. Tweets must be ordered from most recent to least recent. */
    vector<int> getNewsFeed(int userId) {
        multiset<TweetRange> data;
        
        set<Tweet>& self = users[userId];
        if (!self.empty()) {
            data.insert(TweetRange(self.begin(), self.end()));
        }
        
        unordered_set<int>& followees = follows[userId];
        for (int f : followees) {
            set<Tweet>& ft = users[f];
            if (!ft.empty()) {
                data.insert(TweetRange(ft.begin(), ft.end()));
            }
        }
        
        vector<int> feeds;
        while (feeds.size() < 10 && !data.empty()) {
            auto it = data.begin();
            TweetRange first = *it;
            data.erase(it);
            feeds.push_back(first.front->tweetId);
            if ((++first.front) != first.end) {
                data.insert(first);
            }
        }
        
        return feeds;
    }
    
    /** Follower follows a followee. If the operation is invalid, it should be a no-op. */
    void follow(int followerId, int followeeId) {
        if (followerId == followeeId) return;
        follows[followerId].insert(followeeId);
    }
    
    /** Follower unfollows a followee. If the operation is invalid, it should be a no-op. */
    void unfollow(int followerId, int followeeId) {
        if (follows.count(followerId) == 0) return;
        if (follows[followerId].count(followeeId) == 0) return;
        follows[followerId].erase(followeeId);
    }
    
private:
    class Tweet {
    public:
        Tweet(int t, int tid) : timestamp(t), tweetId(tid) {}
        int timestamp;
        int tweetId;
        
        bool operator<(const Tweet& rhs) const {
            return (timestamp > rhs.timestamp);
        }
    };
    
    class TweetRange {
    public:
        TweetRange(set<Tweet>::iterator f, set<Tweet>::iterator e) : front(f), end(e) {};
        bool operator<(const TweetRange& rhs) const {
            return (front->timestamp > rhs.front->timestamp);
        }
        set<Tweet>::iterator front;
        set<Tweet>::iterator end;
    };
    
private:
    unordered_map<int, unordered_set<int> > follows;
    unordered_map<int, set<Tweet> > users;
    int systemTime;
};





No comments:

Post a Comment