Meeting summarization agent

my_config = { "SERVICE_ACCOUNT_CREDENTIALS": { "type": "service_account", # ... }, "GEMINI_API_KEY": "", } # Google Cloud Storage bucket bucket_name = ""

# Upload your recording in the folder of your notebook, under the "meeting_recording.mp3" name. source_file_name = "meeting_recording.mp3" destination_blob_name = source_file_name

from google.oauth2 import service_account # google-auth from google.cloud import storage # google-cloud-storage credentials = service_account.Credentials.from_service_account_info( my_config["SERVICE_ACCOUNT_CREDENTIALS"] ) storage_client = storage.Client(credentials=credentials) bucket = storage_client.bucket(bucket_name) blob = bucket.blob(destination_blob_name) blob.upload_from_filename(source_file_name) gcs_uri = "gs://" + bucket_name + "/" + destination_blob_name print(f"File {source_file_name} uploaded to {gcs_uri}.")

# We use Speech to Text V1, which supports identification of speakers from google.cloud import speech # google-cloud-speech speechclient = speech.SpeechClient(credentials=credentials) # Transcribe the speech recording stored in Google Storage # Reference: https://cloud.google.com/speech-to-text/docs/samples/speech-transcribe-diarization-gcs-beta?hl=en#speech_transcribe_diarization_gcs_beta-python # Examples of language codes # https://cloud.google.com/speech-to-text/docs/speech-to-text-supported-languages diarization_config = speech.SpeakerDiarizationConfig( enable_speaker_diarization=True, min_speaker_count=1, max_speaker_count=10, ) recognition_config = speech.RecognitionConfig( # Reference regarding audio encoding: # https://cloud.google.com/speech-to-text/docs/encoding encoding=speech.RecognitionConfig.AudioEncoding.MP3, # You can finds the sample rate from the file metadata on your laptop sample_rate_hertz=44100, language_code="en-US", diarization_config=diarization_config, enable_automatic_punctuation=True, ) # Set the remote path for the audio file audio = speech.RecognitionAudio(uri=gcs_uri) print("Transcribing, this will take at least 15 minutes...") # Use non-blocking call for getting file transcription operation = speechclient.long_running_recognize(config=recognition_config, audio=audio) response = operation.result(timeout=30 * 60) # The transcript within each result is separate and sequential per result. # However, the words list within an alternative includes all the words # from all the results thus far. Thus, to get all the words with speaker # tags, you only have to take the words list from the last result transcript = "" if response and response.results: result = response.results[-1] words_info = result.alternatives[0].words transcript = "" speaker = "" for word_info in words_info: word = word_info.word speaker_tag = word_info.speaker_tag if speaker_tag == speaker: transcript += word + " " else: speaker = speaker_tag transcript += f"\n\n<Speaker{speaker_tag}> {word} " print(transcript)

# Summarize # https://ai.google.dev/gemini-api/docs from google import genai # google-genai llm_client = genai.Client(api_key=my_config["GEMINI_API_KEY"]) prompt = f"""Summarize the following text provided between the <text> tags, by generating a Markdown summary in the format provided between the <example> tags. The Markdown summary consists of two bullet points, 'summary' and 'next steps', each with less than 10 sub-bullet points. Under summary, please make sure to list the main agreements and decisions reached. Under next steps, please make sure the list the agreed actions and next steps. Bullet points must start with a star character. Sub bullet points must be indented with 4 spaces followed by a * character. Please do not include any other text in your response, other than the list of bullet points. <text> {transcript} </text> Here is an illustrative example of the output: <example> * Summary * The meeting participants agreed to pursue business relationships * Next steps * The meeting participants agreed to meet again in two weeks. * They will revert back with names of potential team members within 1 week. </example> """ response = llm_client.models.generate_content( model="gemini-2.0-flash", contents=prompt, ) meeting_summary = response.text print(meeting_summary)