@@ -72,7 +72,7 @@ Next, let's extract the data from the SciFact dataset and add it to our Vector S
7272
7373``` python
7474ids = [f " { id_} " for id_ in corpus[" train" ][" doc_id" ]]
75- texts = [text[ 0 ] for text in corpus[" train" ][" abstract" ]]
75+ texts = [' ' .join( text) for text in corpus[" train" ][" abstract" ]]
7676metadata = [{" title" : title} for title in corpus[" train" ][" title" ]]
7777```
7878
@@ -97,17 +97,17 @@ def preprocess_scifact(claims_dataset, dataset_type="train"):
9797 claims_dict = {}
9898
9999 for item in claims_dataset[dataset_type]:
100- claim = item[' claim' ]
101- relevance = (item[' evidence_doc_id' ], 1 ) # 1 indicates that the evidence is relevant to the question
100+ claim = item[' claim' ] # Assuming 'claim' is the field for the question
101+ relevance = item[' cited_doc_ids' ] # Assuming 'cited_doc_ids' is the field for relevance
102+ relevance = [(str (r), 1 ) for r in relevance]
102103
103104 # Check for non-empty relevance
104- if relevance[0 ] != " " :
105- if claim not in claims_dict:
106- claims_dict[claim] = [relevance]
107- else :
108- # If the does not exist in the dictionary, append the new relevance
109- if relevance not in claims_dict[claim]:
110- claims_dict[claim].append(relevance)
105+ if claim not in claims_dict:
106+ claims_dict[claim] = relevance
107+ else :
108+ # If the does not exist in the dictionary, append the new relevance
109+ if relevance not in claims_dict[claim]:
110+ claims_dict[claim].extend(relevance)
111111
112112 # Split the dictionary into two lists: claims and relevances
113113 claims = list (claims_dict.keys())
@@ -144,16 +144,16 @@ relevances[:10]
144144```
145145
146146```
147- [[('13734012', 1)],
147+ [[('31715818', 1)],
148+ [('13734012', 1)],
149+ [('22942787', 1)],
150+ [('2613775', 1)],
148151 [('44265107', 1)],
149- [('33409100', 1)],
150- [('6490571', 1)],
151- [('12670680', 1)],
152- [('24341590', 1)],
153- [('12428497', 1)],
154- [('11705328', 1)],
155- [('13497630', 1)],
156- [('13497630', 1)]]
152+ [('32587939', 1)],
153+ [('32587939', 1)],
154+ [('33409100', 1), ('33409100', 1)],
155+ [('641786', 1)],
156+ [('22080671', 1)]]
157157```
158158
159159### Running the Deep Memory Training <a href =" #running-the-deep-memory-training " id =" running-the-deep-memory-training " ></a >
@@ -200,27 +200,23 @@ validation_claims, validation_relevances = preprocess_scifact(claims_dataset, da
200200)
201201</code ></pre >
202202
203- We observe that the recall has improved by p to 30%, depending on the ` k ` value.
204-
205- ``` python
206- recalls
207- ```
203+ We observe that the recall has improved by p to 16%, depending on the ` k ` value.
208204
209205```
210- ---- Evaluating without model ----
211- Recall@1: 29.5 %
212- Recall@3: 45.0 %
213- Recall@5: 51.8 %
214- Recall@10: 58.1 %
215- Recall@50: 77.4 %
216- Recall@100: 84 .9%
217- ---- Evaluating with model ----
218- Recall@1: 55.1 %
219- Recall@3: 68.2 %
206+ ---- Evaluating without Deep Memory ----
207+ Recall@1: 44.2 %
208+ Recall@3: 56.9 %
209+ Recall@5: 61.3 %
210+ Recall@10: 67.3 %
211+ Recall@50: 77.2 %
212+ Recall@100: 79 .9%
213+ ---- Evaluating with Deep Memory ----
214+ Recall@1: 60.4 %
215+ Recall@3: 67.6 %
220216Recall@5: 71.7%
221- Recall@10: 77.9 %
222- Recall@50: 90 .1%
223- Recall@100: 92.6 %
217+ Recall@10: 75.4 %
218+ Recall@50: 79 .1%
219+ Recall@100: 80.2 %
224220```
225221
226222### Using Deep Memory in your Application <a href =" #using-deep-memory-in-your-application " id =" using-deep-memory-in-your-application " ></a >
0 commit comments