diff --git a/README.md b/README.md index fc01fab..dd38e54 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,7 @@ We also include a [Notebook](./notebooks/load_datasets.ipynb) showing how to dow To generate data by passing prompts into the pretrained language model (GPT-3) used in this work please use the following command: ``` -python generate.py --input_prompt_file --language_model GPT3 --output_file --num_generations_per_prompt 10 --openai_api_key +python generate.py --input_prompt_file --language_model GPT3 --output_file --num_generations_per_prompt 10 --api_key ``` You can choose from a list of [prompt files](./prompts/) that we have used in this work or write your own and point to the file (shown below). A prompt file is a text file with one line per prompt (a string). @@ -57,7 +57,7 @@ You can choose from a list of [prompt files](./prompts/) that we have used in th To generate data using ALICE, it is necessary to choose a generator (GPT3 in our case) and a pre-trained hate speech classifier. We provide examples here and the guidance about how to add new classifiers. To generate with ALICE, run this command: ``` -python generate.py --input_prompts --language_model GPT3 --ALICE True --classifier HateBERT --output-file --openai_api_key +python generate.py --input_prompt_file --language_model GPT3 --ALICE True --classifier HateBERT --output_file --api_key ``` ## Writing your own demonstrations diff --git a/notebooks/generate_text.ipynb b/notebooks/generate_text.ipynb index ece47be..f5cfa47 100644 --- a/notebooks/generate_text.ipynb +++ b/notebooks/generate_text.ipynb @@ -38,7 +38,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 5, "id": "fc55b0e1", "metadata": {}, "outputs": [ @@ -46,7 +46,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "[nltk_data] Downloading package stopwords to /Users/tom/nltk_data...\n", + "[nltk_data] Downloading package stopwords to /Users/arina/nltk_data...\n", "[nltk_data] Package stopwords is already up-to-date!\n" ] }, @@ -56,7 +56,7 @@ "True" ] }, - "execution_count": 1, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -71,7 +71,7 @@ "\n", "from toxigen import alice\n", "from toxigen.pretrained_classifiers import ToxDectRoBERTa, HateBERT\n", - "\n", + "from toxigen.language_models import GPT3, ALICE\n", "import nltk\n", "nltk.download('stopwords')" ] @@ -89,17 +89,15 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 6, "id": "54ee3249", "metadata": {}, "outputs": [], "source": [ "# Set up GPT-3 access\n", - "# engine = \"text-davinci-001\" # Higher-quality generation, but more slower and more expensive\n", - "engine = \"text-ada-001\" # Lower-quality generation, but faster and cheaper\n", + "engine = \"gpt-3.5-turbo-instruct\" # the only available generation now\n", "endpoint_url = f\"https://api.openai.com/v1/engines/{engine}/completions\"\n", "secret_key = \"sk-YOUR-SECRET-KEY-HERE\" # You can sign up for GPT-3 access here: https://openai.com/join/\n", - "\n", "# Initialize the GPT-3 class, which we'll call to generate new text\n", "language_model = GPT3(endpoint_url, secret_key)\n", "\n", @@ -127,7 +125,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 7, "id": "874b0a1f", "metadata": {}, "outputs": [ @@ -152,7 +150,7 @@ "group = \"women\" # can be \"women\", \"asian\", \"mexican\", \"black\", \"lgbtq\", \"middle_east\", \"physical_disability\", \"mental_disability\", \"muslim\", \"jewish\", \"native_american\", \"chinese\", \"latino\"\n", "\n", "\n", - "with open(f\"../prompts/{hate_or_neutral}_{group}_1k.txt\", \"r\") as f:\n", + "with open(f\"prompts/{hate_or_neutral}_{group}_1k.txt\", \"r\") as f:\n", " prompts = f.read().splitlines()\n", "\n", "prompt = prompts[0] # Pull out the first prompt from the list\n", @@ -165,7 +163,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 8, "id": "dad3cb6b", "metadata": {}, "outputs": [ @@ -177,7 +175,7 @@ "and GPT-3 responds:\n", "------------------\n", "\n", - " i always enjoy playing with my niece\n" + " women are just as capable and intelligent as men, if not more so\n" ] } ], @@ -201,7 +199,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 15, "id": "3fa0e0a3", "metadata": {}, "outputs": [ @@ -209,7 +207,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Probability the input text was toxic according to ToxDectRoBERTa: 0.0814%\n" + "Probability the input text was toxic according to ToxDectRoBERTa: 0.103%\n" ] } ], @@ -230,7 +228,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 17, "id": "7e4304ae", "metadata": {}, "outputs": [ @@ -238,13 +236,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "Probability the input text was toxic according to HateBERT: 6.37%\n" + "Probability the input text was toxic according to HateBERT: 57.5%\n" ] } ], "source": [ "# For fun, let's also run our response through HateBERT to see what it thinks!\n", - "hatebert_path = \"\"\n", + "hatebert_path = 'GroNLP/hateBERT'\n", "hatebert = HateBERT(hatebert_path)\n", "prob = hatebert.from_text(response)\n", "\n", @@ -261,12 +259,29 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "id": "442315c6", "metadata": { "scrolled": true }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "GPT-3 response: {'id': 'cmpl-9IydWKQcGlxKN1Qdl3QxPL0xS4Bdz', 'object': 'text_completion', 'created': 1714310018, 'model': 'gpt-3.5-turbo-instruct', 'choices': [{'text': ' the', 'index': 0, 'logprobs': {'tokens': [' the'], 'token_logprobs': [-3.2937946], 'top_logprobs': [{' she': -0.60102665, ' women': -2.149725, ' the': -3.2937946, ' it': -3.305027, ' I': -3.405302, ' her': -3.4119263, ' there': -3.7269306, ' She': -3.7309017, ' female': -3.7582455, ' that': -4.533325}], 'text_offset': [123]}, 'finish_reason': 'length'}], 'usage': {'prompt_tokens': 25, 'completion_tokens': 1, 'total_tokens': 26}}\n", + "GPT-3 response: {'id': 'cmpl-9IydXNB5UDYQdEerNzKm8UNd1uZvo', 'object': 'text_completion', 'created': 1714310019, 'model': 'gpt-3.5-turbo-instruct', 'choices': [{'text': ' has', 'index': 0, 'logprobs': {'tokens': [' has'], 'token_logprobs': [-0.7972239], 'top_logprobs': [{' has': -0.7972239, ' is': -0.8354224, ' was': -3.3143806, \"'s\": -3.7157288, ' broke': -3.8354359}], 'text_offset': [127]}, 'finish_reason': 'length'}, {'text': ' have', 'index': 1, 'logprobs': {'tokens': [' have'], 'token_logprobs': [-1.0798453], 'top_logprobs': [{' are': -0.6826457, ' have': -1.0798453, ' face': -3.340994, \"'s\": -3.4152336, ' make': -4.1284027}], 'text_offset': [129]}, 'finish_reason': 'length'}, {'text': ' way', 'index': 2, 'logprobs': {'tokens': [' way'], 'token_logprobs': [-4.227045], 'top_logprobs': [{' gender': -1.5581932, ' #': -2.1198082, ' media': -2.3193703, ' pay': -2.6952648, ' achievements': -2.8157787}], 'text_offset': [127]}, 'finish_reason': 'length'}, {'text': \"'s\", 'index': 3, 'logprobs': {'tokens': [\"'s\"], 'token_logprobs': [-0.34280783], 'top_logprobs': [{\"'s\": -0.34280783, ' is': -1.3354874, ' takes': -4.2456207, ' was': -5.3630714, '’s': -5.6977425}], 'text_offset': [126]}, 'finish_reason': 'length'}, {'text': ' have', 'index': 4, 'logprobs': {'tokens': [' have'], 'token_logprobs': [-0.908278], 'top_logprobs': [{' have': -0.908278, ' admire': -1.386065, \"'ve\": -3.0533624, ' am': -3.1675687, ' believe': -3.408955}], 'text_offset': [125]}, 'finish_reason': 'length'}, {'text': ' contributions', 'index': 5, 'logprobs': {'tokens': [' contributions'], 'token_logprobs': [-1.693882], 'top_logprobs': [{' research': -0.86030763, ' contributions': -1.693882, ' groundbreaking': -1.9652691, ' work': -2.0374908, ' accomplishments': -3.3326454}], 'text_offset': [127]}, 'finish_reason': 'length'}, {'text': ' are', 'index': 6, 'logprobs': {'tokens': [' are'], 'token_logprobs': [-0.65492904], 'top_logprobs': [{' are': -0.65492904, ' is': -1.1964387, ' have': -2.0900278, ' has': -3.4063826, \"'s\": -4.701722}], 'text_offset': [129]}, 'finish_reason': 'length'}, {'text': ' is', 'index': 7, 'logprobs': {'tokens': [' is'], 'token_logprobs': [-0.46714613], 'top_logprobs': [{' is': -0.46714613, ' has': -1.3628485, \"'s\": -2.8200476, ' was': -3.934172, ' always': -4.9855766}], 'text_offset': [127]}, 'finish_reason': 'length'}, {'text': ' scientists', 'index': 8, 'logprobs': {'tokens': [' scientists'], 'token_logprobs': [-0.19678582], 'top_logprobs': [{' scientists': -0.19678582, ' athletes': -2.7657175, ' politicians': -3.251605, ' leaders': -3.7291555, ' CEOs': -4.5741053}], 'text_offset': [130]}, 'finish_reason': 'length'}, {'text': ' woman', 'index': 9, 'logprobs': {'tokens': [' woman'], 'token_logprobs': [-0.15392782], 'top_logprobs': [{' woman': -0.15392782, ' female': -3.100522, ' actress': -3.9596338, ' company': -4.6898394, ' dress': -5.114999}], 'text_offset': [128]}, 'finish_reason': 'length'}], 'usage': {'prompt_tokens': 260, 'completion_tokens': 10, 'total_tokens': 270}}\n", + "GPT-3 response: {'id': 'cmpl-9IydbE6q4WAf7LI1oqx1apqa2uTKg', 'object': 'text_completion', 'created': 1714310023, 'model': 'gpt-3.5-turbo-instruct', 'choices': [{'text': ' made', 'index': 0, 'logprobs': {'tokens': [' made'], 'token_logprobs': [-0.94571996], 'top_logprobs': [{' made': -0.94571996, ' dedicated': -2.1933339, ' been': -2.3522885, ' received': -3.0054085, ' broken': -3.1393921}], 'text_offset': [131]}, 'finish_reason': 'length'}, {'text': ' a', 'index': 1, 'logprobs': {'tokens': [' a'], 'token_logprobs': [-0.31895187], 'top_logprobs': [{' a': -0.31895187, ' an': -2.0438588, ' highly': -3.2035916, ' known': -3.7584088, ' incredibly': -3.823789}], 'text_offset': [130]}, 'finish_reason': 'length'}, {'text': ' strong', 'index': 2, 'logprobs': {'tokens': [' strong'], 'token_logprobs': [-1.9882157], 'top_logprobs': [{' just': -0.4732278, ' strong': -1.9882157, ' capable': -2.45169, ' often': -3.1214116, ' equally': -3.3845227}], 'text_offset': [133]}, 'finish_reason': 'length'}, {'text': ' worked', 'index': 3, 'logprobs': {'tokens': [' worked'], 'token_logprobs': [-9999.0], 'top_logprobs': [{' made': -1.0443708, ' been': -1.1172067, ' faced': -2.1024456, ' fought': -2.8196507, ' historically': -3.2759666}], 'text_offset': [134]}, 'finish_reason': 'length'}, {'text': ' important', 'index': 4, 'logprobs': {'tokens': [' important'], 'token_logprobs': [-1.3058404], 'top_logprobs': [{' important': -1.3058404, ' great': -1.8838681, ' inspiring': -1.9200143, ' amazing': -2.6563673, ' time': -2.6814585}], 'text_offset': [127]}, 'finish_reason': 'length'}, {'text': ' a', 'index': 5, 'logprobs': {'tokens': [' a'], 'token_logprobs': [-0.5986055], 'top_logprobs': [{' a': -0.5986055, ' the': -1.5083345, ' awarded': -2.848226, ' an': -3.219625, ' one': -3.342647}], 'text_offset': [131]}, 'finish_reason': 'length'}, {'text': ' often', 'index': 6, 'logprobs': {'tokens': [' often'], 'token_logprobs': [-2.2895126], 'top_logprobs': [{' have': -0.5454122, ' are': -1.5959722, ' often': -2.2895126, ' face': -2.5392818, ' and': -4.270405}], 'text_offset': [141]}, 'finish_reason': 'length'}, {'text': ' successful', 'index': 7, 'logprobs': {'tokens': [' successful'], 'token_logprobs': [-9999.0], 'top_logprobs': [{' a': -0.53014624, ' an': -1.8650805, ' highly': -3.0521626, ' incredibly': -3.076159, ' known': -3.2997556}], 'text_offset': [130]}, 'finish_reason': 'length'}, {'text': ' has', 'index': 8, 'logprobs': {'tokens': [' has'], 'token_logprobs': [-0.11667037], 'top_logprobs': [{' has': -0.11667037, ' and': -2.725435, ' on': -3.7157247, ' is': -5.2020283, ' in': -5.3184032}], 'text_offset': [136]}, 'finish_reason': 'length'}, {'text': ' always', 'index': 9, 'logprobs': {'tokens': [' always'], 'token_logprobs': [-1.4359765], 'top_logprobs': [{' never': -0.960875, ' always': -1.4359765, ' seen': -2.0489354, ' personally': -2.8537889, ' witnessed': -3.0338216}], 'text_offset': [130]}, 'finish_reason': 'length'}], 'usage': {'prompt_tokens': 269, 'completion_tokens': 10, 'total_tokens': 279}}\n", + "GPT-3 response: {'id': 'cmpl-9IydfOPB9GZSOOAkbYJFQBDEiQCxn', 'object': 'text_completion', 'created': 1714310027, 'model': 'gpt-3.5-turbo-instruct', 'choices': [{'text': ' as', 'index': 0, 'logprobs': {'tokens': [' as'], 'token_logprobs': [-0.0003361774], 'top_logprobs': [{' as': -0.0003361774, ' naturally': -9.072057, ' not': -9.133136, ' a': -11.25397, ' bi': -11.291684}], 'text_offset': [138]}, 'finish_reason': 'length'}, {'text': ' pioneer', 'index': 1, 'logprobs': {'tokens': [' pioneer'], 'token_logprobs': [-1.4766371], 'top_logprobs': [{' pioneer': -1.4766371, ' highly': -1.7366354, ' trail': -1.9562685, ' role': -2.0757334, ' respected': -2.3922427}], 'text_offset': [132]}, 'finish_reason': 'length'}, {'text': ' countless', 'index': 2, 'logprobs': {'tokens': [' countless'], 'token_logprobs': [-2.5196595], 'top_logprobs': [{' significant': -0.31613585, ' incredible': -2.1954198, ' countless': -2.5196595, ' great': -4.1559606, ' numerous': -4.291384}], 'text_offset': [139]}, 'finish_reason': 'length'}, {'text': ' fighting', 'index': 3, 'logprobs': {'tokens': [' fighting'], 'token_logprobs': [-0.48784313], 'top_logprobs': [{' fighting': -0.48784313, ' discrim': -1.6922919, ' subjected': -2.7922902, ' victims': -3.6578965, ' subject': -4.2623925}], 'text_offset': [139]}, 'finish_reason': 'length'}, {'text': ' significant', 'index': 4, 'logprobs': {'tokens': [' significant'], 'token_logprobs': [-1.1790447], 'top_logprobs': [{' groundbreaking': -0.9336892, ' significant': -1.1790447, ' numerous': -1.8826885, ' incredible': -3.3015175, ' countless': -3.330124}], 'text_offset': [136]}, 'finish_reason': 'length'}, {'text': ' talented', 'index': 5, 'logprobs': {'tokens': [' talented'], 'token_logprobs': [-9999.0], 'top_logprobs': [{' pioneer': -0.3495479, ' trail': -1.8376327, ' groundbreaking': -3.2804332, ' pioneering': -3.3872886, ' brilliant': -4.1120124}], 'text_offset': [133]}, 'finish_reason': 'length'}, {'text': ',', 'index': 6, 'logprobs': {'tokens': [','], 'token_logprobs': [-0.9360367], 'top_logprobs': [{' and': -0.50137484, ',': -0.9360367, ' leaders': -6.3132606, ' individuals': -8.742352, '\\n\\n': -10.722322}], 'text_offset': [140]}, 'finish_reason': 'length'}, {'text': ' of', 'index': 7, 'logprobs': {'tokens': [' of'], 'token_logprobs': [-0.6348621], 'top_logprobs': [{' of': -0.6348621, ' and': -0.8455059, ' leaders': -3.880191, ',': -3.958936, ' scientists': -8.039058}], 'text_offset': [141]}, 'finish_reason': 'length'}, {'text': ' fantastic', 'index': 8, 'logprobs': {'tokens': [' fantastic'], 'token_logprobs': [-4.5868173], 'top_logprobs': [{' strong': -1.86587, ' highly': -1.9362988, ' trail': -2.190436, ' role': -2.2016377, ' respected': -2.2930207}], 'text_offset': [132]}, 'finish_reason': 'length'}, {'text': ' discrimination', 'index': 9, 'logprobs': {'tokens': [' discrimination'], 'token_logprobs': [-0.07205949], 'top_logprobs': [{' discrimination': -0.07205949, ' numerous': -4.437462, ' unequal': -4.9943447, ' significant': -5.0022526, ' gender': -5.0481625}], 'text_offset': [140]}, 'finish_reason': 'length'}], 'usage': {'prompt_tokens': 280, 'completion_tokens': 10, 'total_tokens': 290}}\n", + "GPT-3 response: {'id': 'cmpl-9IydjKNbiaY6AeCd6mMk2kKAtnTEi', 'object': 'text_completion', 'created': 1714310031, 'model': 'gpt-3.5-turbo-instruct', 'choices': [{'text': ' capable', 'index': 0, 'logprobs': {'tokens': [' capable'], 'token_logprobs': [-0.019473886], 'top_logprobs': [{' capable': -0.019473886, ' intelligent': -4.9267464, ' competent': -5.1274776, ' talented': -6.3326683, ' smart': -6.6966267}], 'text_offset': [141]}, 'finish_reason': 'length'}, {'text': ' in', 'index': 1, 'logprobs': {'tokens': [' in'], 'token_logprobs': [-0.08993284], 'top_logprobs': [{' in': -0.08993284, ' and': -2.7620807, ' for': -3.8017383, ' who': -8.239158, ',': -9.038375}], 'text_offset': [141]}, 'finish_reason': 'length'}, {'text': ' in', 'index': 2, 'logprobs': {'tokens': [' in'], 'token_logprobs': [-0.18962081], 'top_logprobs': [{' in': -0.18962081, ' and': -1.7957286, ' for': -5.104265, ' who': -7.796179, ',': -8.904758}], 'text_offset': [140]}, 'finish_reason': 'length'}, {'text': ' respected', 'index': 3, 'logprobs': {'tokens': [' respected'], 'token_logprobs': [-0.28565785], 'top_logprobs': [{' respected': -0.28565785, ' accomplished': -1.5188313, '-res': -4.964199, ' qualified': -5.1950054, ' intelligent': -5.6553726}], 'text_offset': [139]}, 'finish_reason': 'length'}, {'text': 'bl', 'index': 4, 'logprobs': {'tokens': ['bl'], 'token_logprobs': [-0.00015598179], 'top_logprobs': [{'bl': -0.00015598179, 'b': -9.605715, 'bla': -10.112595, ' bl': -10.308531, '-bl': -11.827303}], 'text_offset': [138]}, 'finish_reason': 'length'}, {'text': ' model', 'index': 5, 'logprobs': {'tokens': [' model'], 'token_logprobs': [-0.0001819682], 'top_logprobs': [{' model': -0.0001819682, ' mo': -10.1135435, ' m': -10.152072, '-model': -10.62843, ' mode': -10.706872}], 'text_offset': [137]}, 'finish_reason': 'length'}, {'text': ' and', 'index': 6, 'logprobs': {'tokens': [' and'], 'token_logprobs': [-0.17459391], 'top_logprobs': [{' and': -0.17459391, ' in': -1.864337, ',': -5.438451, ' for': -7.7562075, ' throughout': -8.872836}], 'text_offset': [155]}, 'finish_reason': 'length'}, {'text': ' and', 'index': 7, 'logprobs': {'tokens': [' and'], 'token_logprobs': [-0.3192081], 'top_logprobs': [{' and': -0.3192081, ' leader': -1.7441082, ' scientist': -3.6038494, ' figure': -3.6605034, ' researcher': -4.0712795}], 'text_offset': [142]}, 'finish_reason': 'length'}, {'text': ' contributions', 'index': 8, 'logprobs': {'tokens': [' contributions'], 'token_logprobs': [-0.0040174956], 'top_logprobs': [{' contributions': -0.0040174956, ' advancements': -6.489796, ' strides': -6.9707017, ' achievements': -7.3092465, ' and': -8.025941}], 'text_offset': [151]}, 'finish_reason': 'length'}, {'text': ' for', 'index': 9, 'logprobs': {'tokens': [' for'], 'token_logprobs': [-0.0021822616], 'top_logprobs': [{' for': -0.0021822616, ' against': -6.2198224, ' to': -9.494463, ' and': -10.666592, ' tirelessly': -10.768169}], 'text_offset': [148]}, 'finish_reason': 'length'}], 'usage': {'prompt_tokens': 290, 'completion_tokens': 10, 'total_tokens': 300}}\n", + "GPT-3 response: {'id': 'cmpl-9IydqtjwMXSEJ3padyyKpr0IZB4FJ', 'object': 'text_completion', 'created': 1714310038, 'model': 'gpt-3.5-turbo-instruct', 'choices': [{'text': ' and', 'index': 0, 'logprobs': {'tokens': [' and'], 'token_logprobs': [-0.18009426], 'top_logprobs': [{' and': -0.18009426, ' scientist': -2.5388093, ' figure': -3.371533, ' researcher': -3.7491326, ' leader': -4.2678666}], 'text_offset': [149]}, 'finish_reason': 'length'}, {'text': ' as', 'index': 1, 'logprobs': {'tokens': [' as'], 'token_logprobs': [-0.3596115], 'top_logprobs': [{' as': -0.3596115, ' and': -1.2395763, ' of': -5.103388, ',': -5.347311, ' in': -6.60964}], 'text_offset': [149]}, 'finish_reason': 'length'}, {'text': ' and', 'index': 2, 'logprobs': {'tokens': [' and'], 'token_logprobs': [-0.3000452], 'top_logprobs': [{' and': -0.3000452, ' scientist': -1.4663241, ' researcher': -3.833441, ' individual': -6.187847, ' physicist': -6.2343655}], 'text_offset': [152]}, 'finish_reason': 'length'}, {'text': ' unequal', 'index': 3, 'logprobs': {'tokens': [' unequal'], 'token_logprobs': [-1.1549038], 'top_logprobs': [{' unequal': -1.1549038, ' inequality': -1.6165127, ' sexism': -1.8536705, ' barriers': -2.20921, ' harassment': -3.067141}], 'text_offset': [159]}, 'finish_reason': 'length'}, {'text': 'azer', 'index': 4, 'logprobs': {'tokens': ['azer'], 'token_logprobs': [-0.0074776644], 'top_logprobs': [{'azer': -0.0074776644, 'azing': -4.904475, 'az': -11.135009, 'a': -11.155536, 'aze': -12.676391}], 'text_offset': [140]}, 'finish_reason': 'length'}, {'text': ' equal', 'index': 5, 'logprobs': {'tokens': [' equal'], 'token_logprobs': [-0.1326976], 'top_logprobs': [{' equal': -0.1326976, ' their': -2.6738334, ' equality': -3.1560683, ' gender': -4.7240596, ' reproductive': -6.6144423}], 'text_offset': [152]}, 'finish_reason': 'length'}, {'text': ' her', 'index': 6, 'logprobs': {'tokens': [' her'], 'token_logprobs': [-0.020119034], 'top_logprobs': [{' her': -0.020119034, ' the': -4.012591, ' a': -7.2422566, ' breaking': -7.8365674, ' promoting': -9.712872}], 'text_offset': [144]}, 'finish_reason': 'length'}, {'text': ' her', 'index': 7, 'logprobs': {'tokens': [' her'], 'token_logprobs': [-0.009568252], 'top_logprobs': [{' her': -0.009568252, ' the': -4.7264833, ' a': -8.589862, ' STEM': -8.978976, ' breaking': -9.654431}], 'text_offset': [143]}, 'finish_reason': 'length'}, {'text': ' for', 'index': 8, 'logprobs': {'tokens': [' for'], 'token_logprobs': [-0.07289835], 'top_logprobs': [{' for': -0.07289835, ' and': -2.7130408, ' to': -5.566631, ' who': -10.197038, ',': -10.3368845}], 'text_offset': [143]}, 'finish_reason': 'length'}, {'text': ' to', 'index': 9, 'logprobs': {'tokens': [' to'], 'token_logprobs': [-0.062190294], 'top_logprobs': [{' to': -0.062190294, ' in': -2.8549168, ' and': -6.0906267, ' throughout': -7.877282, ' across': -10.713157}], 'text_offset': [165]}, 'finish_reason': 'length'}], 'usage': {'prompt_tokens': 300, 'completion_tokens': 10, 'total_tokens': 310}}\n", + "GPT-3 response: {'id': 'cmpl-9IydvRn8ar2e6Ijex6Cqif8Mz5o1d', 'object': 'text_completion', 'created': 1714310043, 'model': 'gpt-3.5-turbo-instruct', 'choices': [{'text': ' in', 'index': 0, 'logprobs': {'tokens': [' in'], 'token_logprobs': [-1.5880226], 'top_logprobs': [{' and': -0.40483126, ' in': -1.5880226, ' for': -2.117079, ' who': -5.2060513, ',': -6.0096974}], 'text_offset': [144]}, 'finish_reason': 'length'}, {'text': ' accomplished', 'index': 1, 'logprobs': {'tokens': [' accomplished'], 'token_logprobs': [-0.04791932], 'top_logprobs': [{' accomplished': -0.04791932, ' influential': -3.6613493, ' successful': -5.5092897, ' renowned': -5.8101873, ' talented': -5.8580446}], 'text_offset': [153]}, 'finish_reason': 'length'}, {'text': ' young', 'index': 2, 'logprobs': {'tokens': [' young'], 'token_logprobs': [-0.12469786], 'top_logprobs': [{' young': -0.12469786, ' girls': -3.1786416, ' aspiring': -3.4048855, ' women': -3.649686, ' many': -5.2190866}], 'text_offset': [147]}, 'finish_reason': 'length'}, {'text': ' pay', 'index': 3, 'logprobs': {'tokens': [' pay'], 'token_logprobs': [-1.3802334], 'top_logprobs': [{' rights': -0.29047355, ' pay': -1.3802334, ' representation': -8.492184, ' opportunities': -8.712669, ' treatment': -9.277618}], 'text_offset': [158]}, 'finish_reason': 'length'}, {'text': ' men', 'index': 4, 'logprobs': {'tokens': [' men'], 'token_logprobs': [-4.692145e-05], 'top_logprobs': [{' men': -4.692145e-05, ' their': -11.475635, ' any': -11.934655, ' me': -12.283956, ' ': -12.464461}], 'text_offset': [152]}, 'finish_reason': 'length'}, {'text': ' field', 'index': 5, 'logprobs': {'tokens': [' field'], 'token_logprobs': [-0.00046725376], 'top_logprobs': [{' field': -0.00046725376, ' research': -8.1761055, ' industry': -9.107607, ' fields': -10.759951, ' scientific': -10.909111}], 'text_offset': [147]}, 'finish_reason': 'length'}, {'text': ' respected', 'index': 6, 'logprobs': {'tokens': [' respected'], 'token_logprobs': [-0.01917257], 'top_logprobs': [{' respected': -0.01917257, ' well': -5.1563854, ' influential': -5.597168, ' renowned': -6.5448666, ' dedicated': -6.82049}], 'text_offset': [156]}, 'finish_reason': 'length'}, {'text': ' field', 'index': 7, 'logprobs': {'tokens': [' field'], 'token_logprobs': [-0.0004711875], 'top_logprobs': [{' field': -0.0004711875, ' research': -8.109081, ' industry': -9.1486225, ' scientific': -10.940622, ' fields': -11.1557045}], 'text_offset': [148]}, 'finish_reason': 'length'}, {'text': ' various', 'index': 8, 'logprobs': {'tokens': [' various'], 'token_logprobs': [-2.2595456], 'top_logprobs': [{' society': -1.0676548, ' fields': -1.7707798, ' the': -1.7862122, ' various': -2.2595456, ' science': -2.819011}], 'text_offset': [168]}, 'finish_reason': 'length'}, {'text': ' opportunities', 'index': 9, 'logprobs': {'tokens': [' opportunities'], 'token_logprobs': [-2.3089933], 'top_logprobs': [{' treatment': -0.6384953, ' pay': -0.9883679, ' opportunities': -2.3089933, ' rights': -8.889943, ' opportunity': -9.085074}], 'text_offset': [167]}, 'finish_reason': 'length'}], 'usage': {'prompt_tokens': 310, 'completion_tokens': 10, 'total_tokens': 320}}\n", + "GPT-3 response: {'id': 'cmpl-9Iye2OOEujtq3jrMwsqefKCUXnFtH', 'object': 'text_completion', 'created': 1714310050, 'model': 'gpt-3.5-turbo-instruct', 'choices': [{'text': ' in', 'index': 0, 'logprobs': {'tokens': [' in'], 'token_logprobs': [-0.024717884], 'top_logprobs': [{' in': -0.024717884, ' when': -4.880976, ',': -4.926869, '\\\\n': -5.769711, ' at': -6.2379594}], 'text_offset': [156]}, 'finish_reason': 'length'}, {'text': ' and', 'index': 1, 'logprobs': {'tokens': [' and'], 'token_logprobs': [-0.47248375], 'top_logprobs': [{' and': -0.47248375, ' for': -1.0108482, ' since': -5.3118677, ' in': -5.464076, ' throughout': -5.83405}], 'text_offset': [165]}, 'finish_reason': 'length'}, {'text': ' in', 'index': 2, 'logprobs': {'tokens': [' in'], 'token_logprobs': [-0.0113916835], 'top_logprobs': [{' in': -0.0113916835, ' throughout': -5.1809797, ' for': -5.6185846, ' based': -6.9161425, ' compared': -8.258023}], 'text_offset': [177]}, 'finish_reason': 'length'}, {'text': ' girls', 'index': 3, 'logprobs': {'tokens': [' girls'], 'token_logprobs': [-0.03440962], 'top_logprobs': [{' girls': -0.03440962, ' women': -3.4146318, ' aspiring': -7.580489, ' female': -8.536782, ' scientists': -8.633166}], 'text_offset': [153]}, 'finish_reason': 'length'}, {'text': ' and', 'index': 4, 'logprobs': {'tokens': [' and'], 'token_logprobs': [-0.04262561], 'top_logprobs': [{' and': -0.04262561, ',': -3.5165465, '\\\\n': -4.630726, ' of': -6.9905434, '\\n': -7.51816}], 'text_offset': [154]}, 'finish_reason': 'length'}, {'text': ' scientist', 'index': 5, 'logprobs': {'tokens': [' scientist'], 'token_logprobs': [-0.057126243], 'top_logprobs': [{' scientist': -0.057126243, ' researcher': -3.2751667, ' individual': -4.755826, ' professional': -6.0813665, ' figure': -6.121993}], 'text_offset': [166]}, 'finish_reason': 'length'}, {'text': ' and', 'index': 6, 'logprobs': {'tokens': [' and'], 'token_logprobs': [-0.053125005], 'top_logprobs': [{' and': -0.053125005, ',': -3.6215587, '\\\\n': -3.8256125, ' of': -6.9395614, '\\n': -7.003286}], 'text_offset': [153]}, 'finish_reason': 'length'}, {'text': ' scientist', 'index': 7, 'logprobs': {'tokens': [' scientist'], 'token_logprobs': [-0.05684372], 'top_logprobs': [{' scientist': -0.05684372, ' researcher': -3.6143045, ' figure': -4.4919214, ' individual': -4.9764357, ' professional': -5.6147566}], 'text_offset': [166]}, 'finish_reason': 'length'}, {'text': ' a', 'index': 8, 'logprobs': {'tokens': [' a'], 'token_logprobs': [-1.4258723], 'top_logprobs': [{' an': -0.81124073, ' a': -1.4258723, ' role': -2.0177226, ' has': -2.3710837, ' inspiration': -2.6087127}], 'text_offset': [148]}, 'finish_reason': 'length'}, {'text': ' in', 'index': 9, 'logprobs': {'tokens': [' in'], 'token_logprobs': [-0.008722117], 'top_logprobs': [{' in': -0.008722117, ' for': -5.325102, ' compared': -5.7597466, ' throughout': -7.812996, ' at': -9.369503}], 'text_offset': [171]}, 'finish_reason': 'length'}], 'usage': {'prompt_tokens': 320, 'completion_tokens': 10, 'total_tokens': 330}}\n", + "GPT-3 response: {'id': 'cmpl-9Iye9JMfcrdhmaXPx7Tvxl12sbZLl', 'object': 'text_completion', 'created': 1714310057, 'model': 'gpt-3.5-turbo-instruct', 'choices': [{'text': ' the', 'index': 0, 'logprobs': {'tokens': [' the'], 'token_logprobs': [-0.115427725], 'top_logprobs': [{' the': -0.115427725, ' many': -2.5545337, ' various': -3.8247173, ' both': -5.346724, ' numerous': -6.908562}], 'text_offset': [180]}, 'finish_reason': 'length'}, {'text': ' every', 'index': 1, 'logprobs': {'tokens': [' every'], 'token_logprobs': [-2.0596473], 'top_logprobs': [{' any': -0.98456264, ' leadership': -1.094382, ' every': -2.0596473, ' all': -2.2699192, ' the': -3.7261837}], 'text_offset': [159]}, 'finish_reason': 'length'}, {'text': ' the', 'index': 2, 'logprobs': {'tokens': [' the'], 'token_logprobs': [-0.056528214], 'top_logprobs': [{' the': -0.056528214, ' many': -3.0316222, ' various': -5.3592625, ' numerous': -7.511608, ' their': -8.144896}], 'text_offset': [174]}, 'finish_reason': 'length'}, {'text': ' interested', 'index': 3, 'logprobs': {'tokens': [' interested'], 'token_logprobs': [-0.46457753], 'top_logprobs': [{' interested': -0.46457753, ' who': -1.6518792, ' and': -2.1292083, ' pursuing': -3.5234249, ' aspiring': -4.469392}], 'text_offset': [159]}, 'finish_reason': 'length'}, {'text': ' broke', 'index': 4, 'logprobs': {'tokens': [' broke'], 'token_logprobs': [-0.8781604], 'top_logprobs': [{' broke': -0.8781604, ' paved': -1.3658428, ' made': -2.2089958, ' inspired': -2.434412, ' opened': -3.2498722}], 'text_offset': [158]}, 'finish_reason': 'length'}, {'text': ' has', 'index': 5, 'logprobs': {'tokens': [' has'], 'token_logprobs': [-0.049702995], 'top_logprobs': [{' has': -0.049702995, ' a': -3.6362355, ' an': -4.43057, ' continues': -6.0626273, ' her': -6.2454906}], 'text_offset': [157]}, 'finish_reason': 'length'}, {'text': '', 'index': 6, 'logprobs': {'tokens': [], 'token_logprobs': [], 'top_logprobs': [], 'text_offset': []}, 'finish_reason': 'stop'}, {'text': ' opportunities', 'index': 7, 'logprobs': {'tokens': [' opportunities'], 'token_logprobs': [-0.17885989], 'top_logprobs': [{' opportunities': -0.17885989, ' recognition': -2.4439433, ' equal': -3.4351332, ' treatment': -3.8070414, ' representation': -5.0650806}], 'text_offset': [169]}, 'finish_reason': 'length'}, {'text': '', 'index': 8, 'logprobs': {'tokens': [], 'token_logprobs': [], 'top_logprobs': [], 'text_offset': []}, 'finish_reason': 'stop'}, {'text': ' inspiration', 'index': 9, 'logprobs': {'tokens': [' inspiration'], 'token_logprobs': [-0.0025423705], 'top_logprobs': [{' inspiration': -0.0025423705, ' inspiring': -7.0991116, ' incredible': -7.3869934, ' inspirational': -7.5945396, ' excellent': -8.857178}], 'text_offset': [151]}, 'finish_reason': 'length'}], 'usage': {'prompt_tokens': 330, 'completion_tokens': 8, 'total_tokens': 338}}\n", + "GPT-3 response: {'id': 'cmpl-9IyeHBzxNDEs9lmtcytKtdv5iiIHj', 'object': 'text_completion', 'created': 1714310065, 'model': 'gpt-3.5-turbo-instruct', 'choices': [{'text': ' workplace', 'index': 0, 'logprobs': {'tokens': [' workplace'], 'token_logprobs': [-0.07685318], 'top_logprobs': [{' workplace': -0.07685318, ' workforce': -2.6091256, ' work': -8.831322, ' professional': -9.26729, ' past': -10.005617}], 'text_offset': [184]}, 'finish_reason': 'length'}, {'text': ' workplace', 'index': 1, 'logprobs': {'tokens': [' workplace'], 'token_logprobs': [-0.21763827], 'top_logprobs': [{' workplace': -0.21763827, ' workforce': -1.6349967, ' work': -8.097561, ' professional': -8.72591, ' corporate': -10.222912}], 'text_offset': [178]}, 'finish_reason': 'length'}, {'text': ' to', 'index': 2, 'logprobs': {'tokens': [' to'], 'token_logprobs': [-0.28127372], 'top_logprobs': [{' to': -0.28127372, ' for': -1.4081753, '\\\\n': -8.464427, '<|endoftext|>': -8.980077, '\\n': -9.268149}], 'text_offset': [163]}, 'finish_reason': 'length'}, {'text': ' for', 'index': 3, 'logprobs': {'tokens': [' for'], 'token_logprobs': [-0.04265359], 'top_logprobs': [{' for': -0.04265359, ' in': -3.4565368, ' throughout': -5.348085, ' since': -5.582195, '\\\\n': -7.8033385}], 'text_offset': [183]}, 'finish_reason': 'length'}, {'text': ' paved', 'index': 4, 'logprobs': {'tokens': [' paved'], 'token_logprobs': [-3.301448], 'top_logprobs': [{' made': -0.3080551, ' broken': -2.132787, ' paved': -3.301448, ' inspired': -3.6088078, ' contributed': -3.6707242}], 'text_offset': [161]}, 'finish_reason': 'length'}, {'text': ' in', 'index': 5, 'logprobs': {'tokens': [' in'], 'token_logprobs': [-7.1954215e-05], 'top_logprobs': [{' in': -7.1954215e-05, '\\n': -10.647683, ' i': -11.162358, ' ': -11.205768, '<|endoftext|>': -12.257958}], 'text_offset': [170]}, 'finish_reason': 'length'}, {'text': ' field', 'index': 6, 'logprobs': {'tokens': [' field'], 'token_logprobs': [-0.07089204], 'top_logprobs': [{' field': -0.07089204, ' profession': -2.870693, ' job': -5.5715823, ' career': -5.720411, ' industry': -6.0939956}], 'text_offset': [163]}, 'finish_reason': 'length'}, {'text': ' positions', 'index': 7, 'logprobs': {'tokens': [' positions'], 'token_logprobs': [-1.2098135], 'top_logprobs': [{' roles': -0.35590112, ' positions': -1.2098135, ' and': -6.8318343, '\\n': -9.837213, '\\n\\n': -10.990468}], 'text_offset': [170]}, 'finish_reason': 'length'}, {'text': ' barriers', 'index': 8, 'logprobs': {'tokens': [' barriers'], 'token_logprobs': [-0.7688615], 'top_logprobs': [{' barriers': -0.7688615, ' many': -1.1816614, ' through': -2.3062437, ' down': -2.6678598, ' numerous': -3.40081}], 'text_offset': [164]}, 'finish_reason': 'length'}, {'text': ' the', 'index': 9, 'logprobs': {'tokens': [' the'], 'token_logprobs': [-7.076218e-05], 'top_logprobs': [{' the': -7.076218e-05, ' a': -9.746662, ' th': -12.444805, ' t': -12.753092, ' ': -13.095198}], 'text_offset': [164]}, 'finish_reason': 'length'}], 'usage': {'prompt_tokens': 340, 'completion_tokens': 10, 'total_tokens': 350}}\n" + ] + } + ], "source": [ "# --- initialize ALICE ---\n", "implicit_neutral_generator = ALICE(language_model, classifier, mode=0) # mode 0 indicates \"neutral\" (use 1 for \"hate\")\n", @@ -277,7 +292,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 13, "id": "1ac4608a", "metadata": {}, "outputs": [ @@ -285,7 +300,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "women are just as capable as men\n" + "women have faced discrimination and unequal treatment in the workplace\n" ] } ], @@ -341,7 +356,7 @@ "hate_or_neutral = \"neutral\"\n", "\n", "for group in [\"black\", \"middle_east\", \"native_american\"]:\n", - " with open(f\"../prompts/{hate_or_neutral}_{group}_1k.txt\", \"r\") as f:\n", + " with open(f\"prompts/{hate_or_neutral}_{group}_1k.txt\", \"r\") as f:\n", " prompts = f.read().splitlines()\n", " \n", " # Select a random prompt\n", @@ -356,21 +371,13 @@ " print(f\"reponse: {r}\\n\")\n", " print()" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1b40be78", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { "kernelspec": { - "display_name": "work-env", + "display_name": "base", "language": "python", - "name": "work-env" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -382,7 +389,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.9" + "version": "3.8.8" } }, "nbformat": 4, diff --git a/toxigen/alice.py b/toxigen/alice.py index 8a3bade..1a31d27 100644 --- a/toxigen/alice.py +++ b/toxigen/alice.py @@ -129,7 +129,13 @@ def beam_search(prompt, print(f"GPT-3 response: {outputs}") except: pass - scores = [outputs['choices'][i]['logprobs']['top_logprobs'] for i in range(num_beams)] + scores = [[]] * num_beams + for i in range(num_beams): + score = outputs['choices'][i]['logprobs']['top_logprobs'] + if len(score): + scores[i] = score + else: + scores[i] = [{'\n': 0.0, ' ': -100, '.': -100, '<|endoftext|>': -100, '\\n': -100}] full_names = [[list(x.keys()) for x in scores[i]] for i in range(num_beams)] scores = [[list(x.values()) for x in scores[i]] for i in range(num_beams)] scores_ = torch.Tensor([[[omit_(full_names[i][0][j], scores[i][0][j], stops, prompt) for j in range(len(scores[i][0]))]] for i in range(num_beams)]) @@ -140,11 +146,11 @@ def beam_search(prompt, next_scores, next_tokens = torch.topk(next_scores, 2 * num_beams, dim=1, largest=True, sorted=True) next_tokens_names = [full_names[int(next_tokens[0][i])] for i in range(len(next_tokens[0]))] assert next_scores.size()[-1] == len(next_tokens_names) == 2 * num_beams - pred_texts = [] - for t in next_tokens[0]: - pred_text = ' '.join(input_ids[torch.div(t, vocab_size, rounding_mode="trunc")].split(' ')[start_index:]) + full_names[t] - pred_texts.append(inputs_ids + pred_text) ### TODO: get the currently appended sentence - classifier_inputs = query_llm(pred_texts) ### TODO: query the llm for the answer and use these answers to put into classifier + classifier_inputs = [classifier.tokenizer.encode(' '.join(input_ids[torch.div(t, vocab_size, rounding_mode="trunc")].split(' ')[start_index:]) + full_names[t]) for t in next_tokens[0]] + pad_len = max([len(t) for t in classifier_inputs]) + classifier_inputs = torch.LongTensor([b + [0] * (pad_len - len(b)) for b in classifier_inputs]) + logits = torch.nn.functional.log_softmax(classifier(classifier_inputs.to(device)).logits, 1)[:, (1-mode)].cpu() # Use index 1 if generating benign text + next_scores = (next_scores * weights[0]) + (logits * weights[1]) classifier_inputs = [classifier.tokenizer.encode(' '.join(input_ids[torch.div(t, vocab_size, rounding_mode="trunc")].split(' ')[start_index:]) + full_names[t]) for t in next_tokens[0]] # torch.div(a, b, rounding_mode='trunc' @@ -236,4 +242,4 @@ def beam_search(prompt, # retrieve best hypotheses for i, hypotheses in enumerate(generated_hyps): best_all.append(sorted(hypotheses.beams, key=lambda x: x[0],reverse=True)) - return [p[-1] for p in best_all[0]] + return [p[-1] for p in best_all[0]][0] diff --git a/toxigen/pretrained_classifiers.py b/toxigen/pretrained_classifiers.py index 3ebddc5..94a2f18 100644 --- a/toxigen/pretrained_classifiers.py +++ b/toxigen/pretrained_classifiers.py @@ -17,7 +17,7 @@ def from_text(self, text): return 100*float(torch.softmax(logits, dim=1)[:, 1].detach().numpy()) class HateBERT(HateSpeechClassifier): - def __init__(self, model_path): + def __init__(self, model_path='GroNLP/hateBERT'): """ HateBERT files: https://huggingface.co/GroNLP/hateBERT """