import numpy as np from mrjob.job import MRJob from itertools import combinations, permutations from scipy.stats.stats import pearsonr class RestaurantSimilarities(MRJob): def steps(self): "the steps in the map-reduce process" thesteps = [ self.mr(mapper=self.line_mapper, reducer=self.users_items_collector), self.mr(mapper=self.pair_items_mapper, reducer=self.calc_sim_collector) ] return thesteps def line_mapper(self,_,line): "this is the complete implementation" user_id,business_id,stars,business_avg,user_avg=line.split(',') yield user_id, (business_id,stars,business_avg,user_avg) def users_items_collector(self, user_id, values): """ #iterate over the list of tuples yielded in the previous mapper #and append them to an array of rating information """ pass def pair_items_mapper(self, user_id, values): """ ignoring the user_id key, take all combinations of business pairs and yield as key the pair id, and as value the pair rating information """ pass #your code here def calc_sim_collector(self, key, values): """ Pick up the information from the previous yield as shown. Compute the pearson correlation and yield the final information as in the last line here. """ (rest1, rest2), common_ratings = key, values #your code here yield (rest1, rest2), (rho, n_common) #Below MUST be there for things to work if __name__ == '__main__': RestaurantSimilarities.run()