From 011ad73470907b5f784c813cd4ef9e5a701e1fd9 Mon Sep 17 00:00:00 2001 From: John Connor Date: Wed, 14 Feb 2024 11:36:56 -0600 Subject: [PATCH 1/3] global search fix --- api/blog/serializers.py | 7 +++-- api/common/reqs.py | 44 ++++++++++++++---------------- api/past/serializers_READONLY.py | 11 ++++++++ api/voyage/serializers_READONLY.py | 7 +++++ 4 files changed, 43 insertions(+), 26 deletions(-) diff --git a/api/blog/serializers.py b/api/blog/serializers.py index a8f8bff..be44f9b 100644 --- a/api/blog/serializers.py +++ b/api/blog/serializers.py @@ -126,7 +126,8 @@ class PostListRequestSerializer(serializers.Serializer): page_size=serializers.IntegerField(required=False,allow_null=True) filter=PostFilterItemSerializer(many=True,allow_null=True,required=False) order_by=serializers.ListField(child=serializers.CharField(allow_null=True),required=False,allow_null=True) - + global_search=serializers.CharField(allow_null=True,required=False) + class PostListResponseSerializer(serializers.Serializer): page=serializers.IntegerField() page_size=serializers.IntegerField() @@ -161,7 +162,7 @@ class AuthorListRequestSerializer(serializers.Serializer): page_size=serializers.IntegerField(required=False,allow_null=True) filter=AuthorFilterItemSerializer(many=True,allow_null=True,required=False) order_by=serializers.ListField(child=serializers.CharField(allow_null=True),required=False,allow_null=True) - + class AuthorListResponseSerializer(serializers.Serializer): page=serializers.IntegerField() page_size=serializers.IntegerField() @@ -196,7 +197,7 @@ class InstitutionListRequestSerializer(serializers.Serializer): page_size=serializers.IntegerField(required=False,allow_null=True) filter=InstitutionFilterItemSerializer(many=True,allow_null=True,required=False) order_by=serializers.ListField(child=serializers.CharField(allow_null=True),required=False,allow_null=True) - + class InstitutionListResponseSerializer(serializers.Serializer): page=serializers.IntegerField() page_size=serializers.IntegerField() diff --git a/api/common/reqs.py b/api/common/reqs.py index 48493e2..1faa803 100644 --- a/api/common/reqs.py +++ b/api/common/reqs.py @@ -124,7 +124,7 @@ def post_req(queryset,s,r,options_dict,auto_prefetch=True): always_commit=True, timeout=10 ) - search_string=params['global_search'][0] + search_string=params['global_search'] search_string=re.sub("\s+"," ",search_string) search_string=search_string.strip() searchstringcomponents=[''.join(filter(str.isalnum,s)) for s in search_string.split(' ')] @@ -132,28 +132,26 @@ def post_req(queryset,s,r,options_dict,auto_prefetch=True): results=solr.search('text:%s' %finalsearchstring,**{'rows':10000000,'fl':'id'}) ids=[doc['id'] for doc in results.docs] queryset=queryset.filter(id__in=ids) - - #used by dataframes calls to prefetch specific columns - - kwargs={} - for item in filter_obj: - op=item['op'] - searchTerm=item["searchTerm"] - varName=item["varName"] - if varName in all_fields and op in ['lte','gte','exact','in','icontains']: - django_filter_term='__'.join([varName,op]) - kwargs[django_filter_term]=searchTerm - elif varName in all_fields and op =='btw' and type(searchTerm)==list and len(searchTerm)==2: - searchTerm.sort() - min,max=searchTerm - kwargs['{0}__{1}'.format(varName, 'lte')]=max - kwargs['{0}__{1}'.format(varName, 'gte')]=min - else: - if varName not in all_fields: - errormessages.append("var %s not in model" %varName) - if op not in ['lte','gte','exact','in','icontains']: - errormessages.append("%s is not a valid django search operation" %op) - queryset=queryset.filter(**kwargs) + else: + kwargs={} + for item in filter_obj: + op=item['op'] + searchTerm=item["searchTerm"] + varName=item["varName"] + if varName in all_fields and op in ['lte','gte','exact','in','icontains']: + django_filter_term='__'.join([varName,op]) + kwargs[django_filter_term]=searchTerm + elif varName in all_fields and op =='btw' and type(searchTerm)==list and len(searchTerm)==2: + searchTerm.sort() + min,max=searchTerm + kwargs['{0}__{1}'.format(varName, 'lte')]=max + kwargs['{0}__{1}'.format(varName, 'gte')]=min + else: + if varName not in all_fields: + errormessages.append("var %s not in model" %varName) + if op not in ['lte','gte','exact','in','icontains']: + errormessages.append("%s is not a valid django search operation" %op) + queryset=queryset.filter(**kwargs) results_count=queryset.count() if DEBUG: print("resultset size:",results_count) diff --git a/api/past/serializers_READONLY.py b/api/past/serializers_READONLY.py index d4bfa4b..66e0fa7 100644 --- a/api/past/serializers_READONLY.py +++ b/api/past/serializers_READONLY.py @@ -362,6 +362,7 @@ class EnslavementRelationListRequestSerializer(serializers.Serializer): page_size=serializers.IntegerField() filter=EnslavementRelationFilterItemSerializer(many=True,required=False,allow_null=True) order_by=serializers.ListField(child=serializers.CharField(allow_null=True),required=False,allow_null=True) + global_search=serializers.CharField(allow_null=True,required=False) ########### PAGINATED ENSLAVED LISTS @extend_schema_serializer( @@ -390,6 +391,7 @@ class EnslavedListRequestSerializer(serializers.Serializer): page_size=serializers.IntegerField() filter=EnslavedFilterItemSerializer(many=True,required=False,allow_null=True) order_by=serializers.ListField(child=serializers.CharField(allow_null=True),required=False,allow_null=True) + global_search=serializers.CharField(allow_null=True,required=False) class EnslavedListResponseSerializer(serializers.Serializer): page=serializers.IntegerField() @@ -429,6 +431,7 @@ class EnslaverListRequestSerializer(serializers.Serializer): page_size=serializers.IntegerField(required=False,allow_null=True) filter=EnslaverFilterItemSerializer(many=True,required=False,allow_null=True) order_by=serializers.ListField(child=serializers.CharField(allow_null=True),required=False,allow_null=True) + global_search=serializers.CharField(allow_null=True,required=False) class EnslaverListResponseSerializer(serializers.Serializer): page=serializers.IntegerField() @@ -470,6 +473,7 @@ class EnslaverAutoCompleteRequestSerializer(serializers.Serializer): offset=serializers.IntegerField() limit=serializers.IntegerField() filter=EnslaverFilterItemSerializer(many=True,required=False,allow_null=True) + global_search=serializers.CharField(allow_null=True,required=False) class EnslaverAutoCompletekvSerializer(serializers.Serializer): value=serializers.CharField() @@ -510,6 +514,7 @@ class EnslavedAutoCompleteRequestSerializer(serializers.Serializer): offset=serializers.IntegerField() limit=serializers.IntegerField() filter=EnslavedFilterItemSerializer(many=True,required=False,allow_null=True) + global_search=serializers.CharField(allow_null=True,required=False) class EnslavedAutoCompletekvSerializer(serializers.Serializer): value=serializers.CharField() @@ -616,6 +621,7 @@ class EnslavedDataframesRequestSerializer(serializers.Serializer): ]) ) filter=EnslavedFilterItemSerializer(many=True,allow_null=True,required=False) + global_search=serializers.CharField(allow_null=True,required=False) @extend_schema_serializer( examples=[ @@ -646,6 +652,7 @@ class EnslaverDataframesRequestSerializer(serializers.Serializer): ]) ) filter=EnslaverFilterItemSerializer(many=True,allow_null=True,required=False) + global_search=serializers.CharField(allow_null=True,required=False) @extend_schema_serializer( @@ -678,6 +685,7 @@ class EnslavementRelationDataframesRequestSerializer(serializers.Serializer): ]) ) filter=EnslavementRelationFilterItemSerializer(many=True,allow_null=True,required=False) + global_search=serializers.CharField(allow_null=True,required=False) ############ GEOTREE REQUESTS @extend_schema_serializer( @@ -714,6 +722,7 @@ class EnslavedGeoTreeFilterRequestSerializer(serializers.Serializer): ) ) filter=EnslavedFilterItemSerializer(many=True,allow_null=True,required=False) + global_search=serializers.CharField(allow_null=True,required=False) ############ GEOTREE REQUESTS @@ -751,6 +760,7 @@ class EnslaverGeoTreeFilterRequestSerializer(serializers.Serializer): ) ) filter=EnslaverFilterItemSerializer(many=True,allow_null=True,required=False) + global_search=serializers.CharField(allow_null=True,required=False) ############ PAST AGGREGATION ROUTE MAPS @@ -777,6 +787,7 @@ class EnslaverGeoTreeFilterRequestSerializer(serializers.Serializer): class EnslavedAggRoutesRequestSerializer(serializers.Serializer): zoomlevel=serializers.CharField() filter=EnslavedFilterItemSerializer(many=True,allow_null=True,required=False) + global_search=serializers.CharField(allow_null=True,required=False) class EnslavedAggRoutesEdgesSerializer(serializers.Serializer): source=serializers.CharField() diff --git a/api/voyage/serializers_READONLY.py b/api/voyage/serializers_READONLY.py index 4a48009..d8e4d8f 100644 --- a/api/voyage/serializers_READONLY.py +++ b/api/voyage/serializers_READONLY.py @@ -334,12 +334,14 @@ class VoyageListRequestSerializer(serializers.Serializer): page_size=serializers.IntegerField(required=False,allow_null=True) filter=VoyageFilterItemSerializer(many=True,allow_null=True,required=False) order_by=serializers.ListField(child=serializers.CharField(),allow_null=True,required=False) + global_search=serializers.CharField(allow_null=True,required=False) class VoyageListResponseSerializer(serializers.Serializer): page=serializers.IntegerField() page_size=serializers.IntegerField() count=serializers.IntegerField() results=VoyageSerializer(many=True,read_only=True) + ############ BAR, SCATTER, AND PIE CHARTS @extend_schema_serializer( @@ -383,6 +385,7 @@ class VoyageGroupByRequestSerializer(serializers.Serializer): 'voyage_bar_and_donut_charts' ] filter=VoyageFilterItemSerializer(many=True,allow_null=True,required=False) + global_search=serializers.CharField(allow_null=True,required=False) # class VoyageGroupByResponseSerializer(serializers.Serializer): # data=serializers.JSONField() @@ -418,6 +421,7 @@ class VoyageDataframesRequestSerializer(serializers.Serializer): ]) ) filter=VoyageFilterItemSerializer(many=True,allow_null=True,required=False) + global_search=serializers.CharField(allow_null=True,required=False) # class VoyageDataframesResponseSerializer(serializers.Serializer): # data=serializers.JSONField() @@ -458,6 +462,7 @@ class VoyageGeoTreeFilterRequestSerializer(serializers.Serializer): ) ) filter=VoyageFilterItemSerializer(many=True,allow_null=True,required=False) + global_search=serializers.CharField(allow_null=True,required=False) ############ VOYAGE AGGREGATION ROUTE MAPS @extend_schema_serializer( @@ -488,6 +493,7 @@ class VoyageGeoTreeFilterRequestSerializer(serializers.Serializer): class VoyageAggRoutesRequestSerializer(serializers.Serializer): zoomlevel=serializers.CharField() filter=VoyageFilterItemSerializer(many=True,allow_null=True,required=False) + global_search=serializers.CharField(allow_null=True,required=False) class VoyageAggRoutesEdgesSerializer(serializers.Serializer): source=serializers.CharField() @@ -668,6 +674,7 @@ class VoyageAutoCompleteRequestSerializer(serializers.Serializer): offset=serializers.IntegerField() limit=serializers.IntegerField() filter=VoyageFilterItemSerializer(many=True,allow_null=True,required=False) + global_search=serializers.CharField(allow_null=True,required=False) class VoyageAutoCompletekvSerializer(serializers.Serializer): value=serializers.CharField() From fc5d049f49811922eaaa17d4b78cbca31207adee Mon Sep 17 00:00:00 2001 From: John Connor Date: Wed, 14 Feb 2024 12:40:09 -0600 Subject: [PATCH 2/3] timeline endpoint ready for testing --- api/assessment/serializers.py | 38 ++++++++++++++++++++++++-- api/assessment/urls.py | 9 ++++--- api/assessment/views.py | 51 ++++++++++++++++++++++++++++++++--- stats/app.py | 35 ++++++++++++++++++++---- 4 files changed, 119 insertions(+), 14 deletions(-) diff --git a/api/assessment/serializers.py b/api/assessment/serializers.py index 2f51b88..3fae8ad 100644 --- a/api/assessment/serializers.py +++ b/api/assessment/serializers.py @@ -112,14 +112,48 @@ class EstimateDataframesRequestSerializer(serializers.Serializer): ) filter=EstimateFilterItemSerializer(many=True,allow_null=True,required=False) +############ TIMELINE SERIALIZERS + +@extend_schema_serializer( + examples=[ + OpenApiExample( + 'Filtered for histogram w 2 series', + summary='Filtered 2-series histogram', + description='Here we request a histogram for numbers of people embarked and disembarked between 1775 and 1820', + value={ + "filter": [ + { + "varName":"year", + "op":"btw", + "searchTerm":[1775,1820] + } + ] + } + ) + ] +) +class EstimateTimelineRequestSerializer(serializers.Serializer): + filter=EstimateFilterItemSerializer(many=True,allow_null=True,required=False) + +class EstimateTimelineResponseSerializer(serializers.Serializer): + disembarked_slaves=serializers.ListField( + child=serializers.IntegerField() + ) + embarked_slaves=serializers.ListField( + child=serializers.IntegerField() + ) + year=serializers.ListField( + child=serializers.IntegerField() + ) + ############ CROSSTAB SERIALIZERS @extend_schema_serializer( examples=[ OpenApiExample( - 'Paginated request for binned years & embarkation geo vars', + 'Request for binned years & embarkation geo vars', summary='Multi-level, 20-year bins', - description='Here, we request cross-tabs on the geographic locations where enslaved people were embarked in 20-year periods. We also request that our columns be grouped in a multi-level way, from broad region to region and place. The cell value we wish to calculate is the number of people embarked, and we aggregate these as a sum. We are requesting the first 5 rows of these cross-tab results.', + description='Here, we request cross-tabs on the geographic locations where enslaved people were embarked in 20-year periods. We also request that our columns be grouped in a multi-level way, from broad region to region and place. The cell value we wish to calculate is the number of people embarked, and we aggregate these as a sum.', value={ "cols": [ "embarkation_region__export_area__name", diff --git a/api/assessment/urls.py b/api/assessment/urls.py index 78c24c7..e718ed2 100644 --- a/api/assessment/urls.py +++ b/api/assessment/urls.py @@ -3,7 +3,8 @@ from . import views urlpatterns = [ - path('', views.AssessmentList.as_view()), - path('dataframes/',views.EstimateDataFrames.as_view()), - path('crosstabs/',views.EstimateCrossTabs.as_view()) - ] \ No newline at end of file + path('', views.AssessmentList.as_view()), + path('dataframes/',views.EstimateDataFrames.as_view()), + path('crosstabs/',views.EstimateCrossTabs.as_view()), + path('timelines/',views.EstimateTimeline.as_view()) +] \ No newline at end of file diff --git a/api/assessment/views.py b/api/assessment/views.py index 8c7e5c7..cd9fa2e 100644 --- a/api/assessment/views.py +++ b/api/assessment/views.py @@ -74,11 +74,57 @@ def post(self,request): return JsonResponse(output_dicts,safe=False) -class EstimateCrossTabs(generics.GenericAPIView): + +class EstimateTimeline(generics.GenericAPIView): authentication_classes=[TokenAuthentication] permission_classes=[IsAuthenticated] @extend_schema( description="Paginated crosstabs endpoint, with Pandas as the back-end.", + request=EstimateTimelineRequestSerializer, + responses=EstimateTimelineResponseSerializer + ) + def post(self,request): + st=time.time() + print("ESTIMATE TIMELINE+++++++\nusername:",request.auth.user) + + #VALIDATE THE REQUEST + serialized_req = EstimateTimelineRequestSerializer(data=request.data) + if not serialized_req.is_valid(): + return JsonResponse(serialized_req.errors,status=400) + + #FILTER THE VOYAGES BASED ON THE REQUEST'S FILTER OBJECT + queryset=Estimate.objects.all() + queryset,results_count=post_req( + queryset, + self, + request, + Estimate_options, + auto_prefetch=True + ) + + #MAKE THE CROSSTABS REQUEST TO VOYAGES-STATS + ids=[i[0] for i in queryset.values_list('id')] + u2=STATS_BASE_URL+'estimates_timeline/' + params=dict(request.data) + stats_req_data=params + stats_req_data['ids']=ids + r=requests.post(url=u2,data=json.dumps(stats_req_data),headers={"Content-type":"application/json"}) + + #VALIDATE THE RESPONSE + if r.ok: + j=json.loads(r.text) + serialized_resp=EstimateTimelineResponseSerializer(data=j) + print("Internal Response Time:",time.time()-st,"\n+++++++") + if not serialized_resp.is_valid(): + return JsonResponse(serialized_resp.errors,status=400) + else: + return JsonResponse(serialized_resp.data,safe=False) + +class EstimateCrossTabs(generics.GenericAPIView): + authentication_classes=[TokenAuthentication] + permission_classes=[IsAuthenticated] + @extend_schema( + description="HTML dump crosstabs endpoint, with Pandas as the back-end.", request=EstimateCrossTabRequestSerializer, responses=EstimateCrossTabResponseSerializer ) @@ -103,11 +149,10 @@ def post(self,request): #MAKE THE CROSSTABS REQUEST TO VOYAGES-STATS ids=[i[0] for i in queryset.values_list('id')] - u2=STATS_BASE_URL+'pivot/' + u2=STATS_BASE_URL+'estimates_pivot/' params=dict(request.data) stats_req_data=params stats_req_data['ids']=ids - stats_req_data['cachename']='estimate_pivot_tables' r=requests.post(url=u2,data=json.dumps(stats_req_data),headers={"Content-type":"application/json"}) #VALIDATE THE RESPONSE diff --git a/stats/app.py b/stats/app.py index f609596..43a9b8f 100644 --- a/stats/app.py +++ b/stats/app.py @@ -120,7 +120,9 @@ def is_nan(v): return np.isnan(v) def __new__(self, a): - return {k: v for k, v in a if not self.is_nan(v) and v!={}} + return { + k: v for k, v in a if not self.is_nan(v) and v!={} + } def interval_to_str(s): @@ -134,8 +136,8 @@ def makestr(s): return str(s) -@app.route('/pivot/',methods=['POST']) -def pivot(): +@app.route('/estimates_pivot/',methods=['POST']) +def estimates_pivot(): ''' We cannot implement multi-level rows in AG Grid @@ -145,7 +147,6 @@ def pivot(): ''' st=time.time() rdata=request.json - dfname=rdata['cachename'] ids=rdata['ids'] rows=rdata['rows'] cols=rdata['cols'] @@ -153,7 +154,7 @@ def pivot(): binsize=rdata.get('binsize') mode=rdata['mode'] - df=eval(dfname)['df'] + df=eval('estimate_pivot_tables')['df'] #filter down on the pk's pv=df[df['id'].isin(ids)] @@ -232,6 +233,9 @@ def pivot(): pv=pv.fillna(0) html=pv.to_html(index_names=False) html=re.sub('\\n\s+','',html) + html=re.sub("disembarked_slaves","Disembarked",html) + html=re.sub("embarked_slaves","Embarked",html) + html=re.sub("\.0","",html) return json.dumps( { "data":html @@ -239,6 +243,27 @@ def pivot(): ) +@app.route('/estimates_timeline/',methods=['POST']) +def estimates_timeline(): + + ''' + Implements the pandas groupby function and returns the sparse summary. + Excellent for bar & pie charts. + ''' + st=time.time() + rdata=request.json + ids=rdata['ids'] + df=eval('estimate_pivot_tables')['df'] + cols=['disembarked_slaves','embarked_slaves'] + df2=df[df['id'].isin(ids)] + ct=df2.groupby('year',group_keys=True)[cols].agg('sum') + ct=ct.fillna(0) + resp={'year':list(ct.index)} + for col in cols: + resp[col]=list(ct[col]) + return json.dumps(resp) + + @app.route('/crosstabs/',methods=['POST']) def crosstabs(): From 7e7ab09fa2fee8a3760fa4904704f0cf015516cd Mon Sep 17 00:00:00 2001 From: John Connor Date: Wed, 14 Feb 2024 13:10:19 -0600 Subject: [PATCH 3/3] turned other plots back on, duh --- api/voyage/serializers_READONLY.py | 1 + stats/app.py | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/api/voyage/serializers_READONLY.py b/api/voyage/serializers_READONLY.py index d8e4d8f..000eb11 100644 --- a/api/voyage/serializers_READONLY.py +++ b/api/voyage/serializers_READONLY.py @@ -629,6 +629,7 @@ class VoyageCrossTabRequestSerializer(serializers.Serializer): offset=serializers.IntegerField() limit=serializers.IntegerField() order_by=serializers.ListField(child=serializers.CharField(),allow_null=True,required=False) + global_search=serializers.CharField(allow_null=True,required=False) class VoyageCrossTabResponseSerializer(serializers.Serializer): tablestructure=serializers.JSONField() diff --git a/stats/app.py b/stats/app.py index 43a9b8f..f3523b0 100644 --- a/stats/app.py +++ b/stats/app.py @@ -33,10 +33,10 @@ def load_long_df(endpoint,variables,options): return(df) registered_caches=[ -# voyage_bar_and_donut_charts, -# voyage_summary_statistics, -# voyage_pivot_tables, -# voyage_xyscatter, + voyage_bar_and_donut_charts, + voyage_summary_statistics, + voyage_pivot_tables, + voyage_xyscatter, estimate_pivot_tables ]