import tensorflow as tf import tensorflow_hub as hub from transformers.modeling_tf_openai import TFOpenAIGPTLMHeadModel #this is the GPT transformer with additional layers added for easy language modeling from transformers.tokenization_openai import OpenAIGPTTokenizer import simpletransformers
model = TFOpenAIGPTLMHeadModel.from_pretrained("openai-gpt") tokenizer = OpenAIGPTTokenizer.from_pretrained("openai-gpt")
All model checkpoint weights were used when initializing TFOpenAIGPTLMHeadModel. All the weights of TFOpenAIGPTLMHeadModel were initialized from the model checkpoint at openai-gpt. If your task is similar to the task the model of the checkpoint was trained on, you can already use TFOpenAIGPTLMHeadModel for predictions without further training. ftfy or spacy is not installed using BERT BasicTokenizer instead of SpaCy & ftfy.
prompt_text = "I could not think of a clever prompt because"
encoded_prompt = tokenizer.encode(prompt_text,add_special_tokens=False,return_tensors='tf') encoded_prompt
num_sequences = 1 # this is the number of different sequences/ sentences the generator will create given the prompt length = 15 # if you want to strictly follow the prompt for this question set the length to one. If you want to see how creative GPT can be feel free to amend
generated_sequences = model.generate( input_ids=encoded_prompt, do_sample=True, max_length=length + len(encoded_prompt), temperature=1.0, top_k=5, top_p=0.9, repetition_penalty=1.0, )
for sequence in generated_sequences: text = tokenizer.decode(sequence, clean_up_tokenization_spaces=True) print(text)
i could not think of a clever prompt because i was so exhausted. i fell asleep. i dreamt about the fire,
from transformers import pipeline generator = pipeline("text-generation") text_1 = generator("Text generation is cool because", max_length=50) text_1['generated_text']
/opt/venv/lib/python3.7/site-packages/transformers/modeling_tf_auto.py:694: FutureWarning: The class `TFAutoModelWithLMHead` is deprecated and will be removed in a future version. Please use `TFAutoModelForCausalLM` for causal language models, `TFAutoModelForMaskedLM` for masked language models and `TFAutoModelForSeq2SeqLM` for encoder-decoder models. FutureWarning, All model checkpoint weights were used when initializing TFGPT2LMHeadModel. All the weights of TFGPT2LMHeadModel were initialized from the model checkpoint at gpt2. If your task is similar to the task the model of the checkpoint was trained on, you can already use TFGPT2LMHeadModel for predictions without further training. Setting `pad_token_id` to 50256 (first `eos_token_id`) to generate sequence
embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")
INFO:absl:Using /tmp/tfhub_modules to cache modules. INFO:absl:Downloading TF-Hub Module 'https://tfhub.dev/google/universal-sentence-encoder/4'. INFO:absl:Downloaded https://tfhub.dev/google/universal-sentence-encoder/4, Total size: 987.47MB INFO:absl:Downloaded TF-Hub Module 'https://tfhub.dev/google/universal-sentence-encoder/4'.
embeddings = embed([ "the circus is filled with a variety of exotic animals", "the ringleader had the lion jump through the hoop", "excel is a popular tool in many business environments"]) embeddings
from scipy import spatial result_1 = 1 - spatial.distance.cosine(embeddings, embeddings) result_2 = 1 - spatial.distance.cosine(embeddings, embeddings) result_3 = 1 - spatial.distance.cosine(embeddings, embeddings) print(result_1,result_2,result_3)
0.424555242061615 0.024492371827363968 0.2151227593421936
def UniversalEmbedding(x): return embed(tf.squeeze(tf.cast(x, tf.string)), signature="default", as_dict=True)["default"]
def make_model(vocab_size): model = Sequential() model.add(layers.Lambda(UniversalEmbedding,output_shape=(embed_size,))) model.add(LSTM(100, return_sequences=True)) model.add(LSTM(100)) model.add(Dropout(0.2)) model.add(Dense(100, activation='relu')) model.add(Dense(vocab_size, activation='softmax')) return model